source: nikanabo/current/xdelta/diy/xdelta3.h@ 981

Last change on this file since 981 was 185, checked in by geyser, 18 years ago
File size: 39.8 KB
RevLine 
[185]1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19/* Welcome to Xdelta. If you want to know more about Xdelta, start by reading xdelta3.c.
20 * If you are ready to use the API, continue reading here. There are two interfaces --
21 * xd3_encode_input and xd3_decode_input -- plus a dozen or so related calls. This
22 * interface is styled after Zlib. */
23
24#ifndef _XDELTA3_H_
25#define _XDELTA3_H_
26
27#include <stdlib.h>
28#include <string.h>
29#include <sys/types.h>
30
31/**********************************************************************/
32
33/* Default configured value of stream->winsize. If the program supplies
34 * xd3_encode_input() with data smaller than winsize the stream will
35 * automatically buffer the input, otherwise the input buffer is used directly.
36 */
37#ifndef XD3_DEFAULT_WINSIZE
38#define XD3_DEFAULT_WINSIZE (1U << 23)
39#endif
40
41/* Default total size of the source window used in xdelta3-main.h */
42#ifndef XD3_DEFAULT_SRCWINSZ
43#define XD3_DEFAULT_SRCWINSZ (1U << 26)
44#endif
45
46/* When Xdelta requests a memory allocation for certain buffers, it rounds up to units of
47 * at least this size. The code assumes (and asserts) that this is a power-of-two. */
48#ifndef XD3_ALLOCSIZE
49#define XD3_ALLOCSIZE (1U<<14)
50#endif
51
52/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect decoders against
53 * malicious files. The decoder will never decode a window larger than this. If the file
54 * specifies VCD_TARGET the decoder may require two buffers of this size.
55 *
56 * 8-16MB is reasonable, probably don't need to go larger. */
57#ifndef XD3_HARDMAXWINSIZE
58#define XD3_HARDMAXWINSIZE (1U<<24)
59#endif
60
61/* The XD3_NODECOMPRESSSIZE parameter tells the xdelta main routine not to try to
62 * externally-decompress source inputs that are too large. Since these files must be
63 * seekable, they are decompressed to a temporary file location and the user may not wish
64 * for this. */
65#ifndef XD3_NODECOMPRESSSIZE
66#define XD3_NODECOMPRESSSIZE (1U<<28)
67#endif
68
69/* The IOPT_SIZE value sets the size of a buffer used to batch overlapping copy
70 * instructions before they are optimized by picking the best non-overlapping ranges. The
71 * larger this buffer, the longer a forced xd3_srcwin_setup() decision is held off.
72 * Setting this value to 0 causes an unlimited buffer to be used. */
73#ifndef XD3_DEFAULT_IOPT_SIZE
74#define XD3_DEFAULT_IOPT_SIZE (1U<<15)
75#endif
76
77/* The maximum distance backward to search for small matches */
78#ifndef XD3_DEFAULT_SPREVSZ
79#define XD3_DEFAULT_SPREVSZ (1U<<18)
80#endif
81
82/* The default compression level
83 */
84#ifndef XD3_DEFAULT_LEVEL
85#define XD3_DEFAULT_LEVEL 1
86#endif
87
88/* Sizes and addresses within VCDIFF windows are represented as usize_t
89 *
90 * For source-file offsets and total file sizes, total input and output counts, the xoff_t
91 * type is used. The decoder and encoder generally check for overflow of the xoff_t size,
92 * and this is tested at the 32bit boundary [xdelta3-test.h].
93 */
94#ifndef _WIN32
95typedef unsigned int usize_t;
96typedef u_int8_t uint8_t;
97typedef u_int16_t uint16_t;
98
99#ifndef __uint32_t_defined /* Note: Cygwin compat */
100typedef u_int32_t uint32_t;
101#endif
102
103typedef u_int64_t uint64_t;
104#else
105#define WIN32_LEAN_AND_MEAN
106#include <windows.h>
107#define inline
108typedef unsigned int uint;
109typedef signed int ssize_t;
110typedef unsigned int usize_t;
111typedef unsigned char uint8_t;
112typedef unsigned short uint16_t;
113typedef unsigned long uint32_t;
114typedef ULONGLONG uint64_t;
115#endif
116
117#define SIZEOF_USIZE_T 4
118
119#ifndef XD3_USE_LARGEFILE64
120#define XD3_USE_LARGEFILE64 1
121#endif
122
123#if XD3_USE_LARGEFILE64
124#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */
125typedef uint64_t xoff_t;
126#define SIZEOF_XOFF_T 8
127#else
128typedef uint32_t xoff_t;
129#define SIZEOF_XOFF_T 4
130#endif
131
132#define USE_UINT32 (SIZEOF_USIZE_T == 4 || SIZEOF_XOFF_T == 4 || REGRESSION_TEST)
133#define USE_UINT64 (SIZEOF_USIZE_T == 8 || SIZEOF_XOFF_T == 8 || REGRESSION_TEST)
134
135/**********************************************************************/
136
137#ifndef INLINE
138#define INLINE inline
139#endif
140
141/* Whether to build the encoder, otherwise only build the decoder. */
142#ifndef XD3_ENCODER
143#define XD3_ENCODER 1
144#endif
145
146/* The code returned when main() fails, also defined in system includes. */
147#ifndef EXIT_FAILURE
148#define EXIT_FAILURE 1
149#endif
150
151/* REGRESSION TEST enables the "xdelta3 test" command, which runs a series of self-tests. */
152#ifndef REGRESSION_TEST
153#define REGRESSION_TEST 0
154#endif
155
156/* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1 enable some
157 * additional output only useful during development and debugging. */
158#ifndef XD3_DEBUG
159#define XD3_DEBUG 0
160#endif
161
162#ifndef PYTHON_MODULE
163#define PYTHON_MODULE 0
164#endif
165
166#ifndef SWIG_MODULE
167#define SWIG_MODULE 0
168#endif
169
170/* There are three string matching functions supplied: one fast, one slow (default), and
171 * one soft-configurable. To disable any of these, use the following definitions. */
172#ifndef XD3_BUILD_SLOW
173#define XD3_BUILD_SLOW 1
174#endif
175#ifndef XD3_BUILD_FAST
176#define XD3_BUILD_FAST 1
177#endif
178#ifndef XD3_BUILD_FASTEST
179#define XD3_BUILD_FASTEST 1
180#endif
181#ifndef XD3_BUILD_SOFT
182#define XD3_BUILD_SOFT 1
183#endif
184#ifndef XD3_BUILD_DEFAULT
185#define XD3_BUILD_DEFAULT 1
186#endif
187
188#if XD3_DEBUG
189#include <stdio.h>
190#endif
191
192/* XPRINT. Debug output and VCDIFF_TOOLS functions report to stderr. I have used an
193 * irregular style to abbreviate [fprintf(stderr, "] as [DP(RINT "]. */
194#define DP fprintf
195#define RINT stderr,
196
197typedef struct _xd3_stream xd3_stream;
198typedef struct _xd3_source xd3_source;
199typedef struct _xd3_hash_cfg xd3_hash_cfg;
200typedef struct _xd3_smatcher xd3_smatcher;
201typedef struct _xd3_rinst xd3_rinst;
202typedef struct _xd3_dinst xd3_dinst;
203typedef struct _xd3_hinst xd3_hinst;
204typedef struct _xd3_rpage xd3_rpage;
205typedef struct _xd3_addr_cache xd3_addr_cache;
206typedef struct _xd3_output xd3_output;
207typedef struct _xd3_desect xd3_desect;
208typedef struct _xd3_iopt_buflist xd3_iopt_buflist;
209typedef struct _xd3_rlist xd3_rlist;
210typedef struct _xd3_sec_type xd3_sec_type;
211typedef struct _xd3_sec_cfg xd3_sec_cfg;
212typedef struct _xd3_sec_stream xd3_sec_stream;
213typedef struct _xd3_config xd3_config;
214typedef struct _xd3_code_table_desc xd3_code_table_desc;
215typedef struct _xd3_code_table_sizes xd3_code_table_sizes;
216typedef struct _xd3_slist xd3_slist;
217
218/* The stream configuration has three callbacks functions, all of which may be supplied
219 * with NULL values. If config->getblk is provided as NULL, the stream returns
220 * XD3_GETSRCBLK. */
221
222typedef void* (xd3_alloc_func) (void *opaque,
223 usize_t items,
224 usize_t size);
225typedef void (xd3_free_func) (void *opaque,
226 void *address);
227
228typedef int (xd3_getblk_func) (xd3_stream *stream,
229 xd3_source *source,
230 xoff_t blkno);
231
232/* These are internal functions to delay construction of encoding tables and support
233 * alternate code tables. See the comments & code enabled by GENERIC_ENCODE_TABLES. */
234
235typedef const xd3_dinst* (xd3_code_table_func) (void);
236typedef int (xd3_comp_table_func) (xd3_stream *stream,
237 const uint8_t **data,
238 usize_t *size);
239
240
241/* Some junk. */
242
243#ifndef XD3_ASSERT
244#if XD3_DEBUG
245#define XD3_ASSERT(x) \
246 do { if (! (x)) { DP(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \
247 abort (); } } while (0)
248#else
249#define XD3_ASSERT(x) (void)0
250#endif
251#endif
252
253#ifdef __GNUC__
254/* As seen on linux-kernel. */
255#ifndef max
256#define max(x,y) ({ \
257 const typeof(x) _x = (x); \
258 const typeof(y) _y = (y); \
259 (void) (&_x == &_y); \
260 _x > _y ? _x : _y; })
261#endif
262
263#ifndef min
264#define min(x,y) ({ \
265 const typeof(x) _x = (x); \
266 const typeof(y) _y = (y); \
267 (void) (&_x == &_y); \
268 _x < _y ? _x : _y; })
269#endif
270#else
271#ifndef max
272#define max(x,y) ((x) < (y) ? (y) : (x))
273#endif
274#ifndef min
275#define min(x,y) ((x) < (y) ? (x) : (y))
276#endif
277#endif
278
279/******************************************************************************************
280 PUBLIC ENUMS
281 ******************************************************************************************/
282
283/* These are the five ordinary status codes returned by the xd3_encode_input() and
284 * xd3_decode_input() state machines. */
285typedef enum {
286
287 /* An application must be prepared to handle these five return values from either
288 * xd3_encode_input or xd3_decode_input, except in the case of no-source compression, in
289 * which case XD3_GETSRCBLK is never returned. More detailed comments for these are
290 * given in xd3_encode_input and xd3_decode_input comments, below. */
291 XD3_INPUT = -17703, /* need input */
292 XD3_OUTPUT = -17704, /* have output */
293 XD3_GETSRCBLK = -17705, /* need a block of source input (with no xd3_getblk function),
294 * a chance to do non-blocking read. */
295 XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & first window header */
296 XD3_WINSTART = -17707, /* notification: returned before a window is processed, giving a
297 * chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. */
298 XD3_WINFINISH = -17708, /* notification: returned after encode/decode & output for a window */
299 XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by getblk() if the block is too old */
300 XD3_INTERNAL = -17710, /* internal error */
301 XD3_INVALID = -17711, /* invalid config */
302 XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */
303
304} xd3_rvalues;
305
306/* special values in config->flags */
307typedef enum
308{
309 XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see xdelta3-main.h. */
310 XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see xdelta3-main.h. */
311 XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see xdelta3-main.h. */
312 XD3_FLUSH = (1 << 4), /* flush the stream buffer to prepare for xd3_stream_close(). */
313
314 XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */
315 XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */
316 XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK),
317
318 XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of the data section. */
319 XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of the inst section. */
320 XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of the addr section. */
321
322 XD3_SEC_OTHER = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR),
323
324 XD3_ADLER32 = (1 << 10), /* enable checksum computation in the encoder. */
325 XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in the decoder. */
326
327 XD3_ALT_CODE_TABLE = (1 << 12), /* for testing the alternate code table encoding. */
328
329 XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data compression feature,
330 * only search the source, not the target. */
331 XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass algorithm", instead use
332 * greedy matching. Greedy is off by default. */
333
334 /* 4 bits to set the compression level the same as the command-line
335 * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag,
336 * and is independent of compression level). This is for
337 * convenience, especially with xd3_encode_memory(). */
338
339 XD3_COMPLEVEL_SHIFT = 20, /* 20 - 24 */
340 XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT),
341 XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT),
342 XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT),
343 XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT),
344 XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT),
345
346} xd3_flags;
347
348/* The values of this enumeration are set in xd3_config using the
349 * smatch_cfg variable. It can be set to default, slow, fast, etc.,
350 * and soft. */
351typedef enum
352{
353 XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits, else default. */
354 XD3_SMATCH_SLOW = 1,
355 XD3_SMATCH_FAST = 2,
356 XD3_SMATCH_FASTEST = 3,
357 XD3_SMATCH_SOFT = 4,
358} xd3_smatch_cfg;
359
360/******************************************************************************************
361 PRIVATE ENUMS
362 ******************************************************************************************/
363
364/* stream->match_state is part of the xd3_encode_input state machine for source matching:
365 *
366 * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching
367 * 2. this state spans encoder windows: a match and end-of-window will continue in the next
368 * 3. the initial target byte and source byte are a presumed match, to avoid some computation
369 * in case the inputs are identical.
370 */
371typedef enum {
372
373 MATCH_TARGET = 0, /* in this state, attempt to match the start of the target with the
374 * previously set source address (initially 0). */
375 MATCH_BACKWARD = 1, /* currently expanding a match backward in the source/target. */
376 MATCH_FORWARD = 2, /* currently expanding a match forward in the source/target. */
377 MATCH_SEARCHING = 3, /* currently searching for a match. */
378
379} xd3_match_state;
380
381/* The xd3_encode_input state machine steps through these states in the following order.
382 * The matcher is reentrant and returns XD3_INPUT whenever it requires more data. After
383 * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close().
384 */
385typedef enum {
386
387 ENC_INIT = 0, /* xd3_encode_input has never been called. */
388 ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */
389 ENC_SEARCH = 2, /* currently searching for matches. */
390 ENC_FLUSH = 3, /* currently emitting output. */
391 ENC_POSTOUT = 4, /* after an output section. */
392 ENC_POSTWIN = 5, /* after all output sections. */
393 ENC_ABORTED = 6, /* abort. */
394} xd3_encode_state;
395
396/* The xd3_decode_input state machine steps through these states in the following order.
397 * The matcher is reentrant and returns XD3_INPUT whenever it requires more data. After
398 * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close().
399 *
400 * 0-8: the VCDIFF header
401 * 9-18: the VCDIFF window header
402 * 19-21: the three primary sections: data (which I think should have gone last), inst, addr
403 * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK,
404 * 23: return XD3_WINFINISH, set state=9 to decode more input
405 */
406typedef enum {
407
408 DEC_VCHEAD = 0, /* VCDIFF header */
409 DEC_HDRIND = 1, /* header indicator */
410
411 DEC_SECONDID = 2, /* secondary compressor ID */
412
413 DEC_TABLEN = 3, /* code table length */
414 DEC_NEAR = 4, /* code table near */
415 DEC_SAME = 5, /* code table same */
416 DEC_TABDAT = 6, /* code table data */
417
418 DEC_APPLEN = 7, /* application data length */
419 DEC_APPDAT = 8, /* application data */
420
421 DEC_WININD = 9, /* window indicator */
422
423 DEC_CPYLEN = 10, /* copy window length */
424 DEC_CPYOFF = 11, /* copy window offset */
425
426 DEC_ENCLEN = 12, /* length of delta encoding */
427 DEC_TGTLEN = 13, /* length of target window */
428 DEC_DELIND = 14, /* delta indicator */
429
430 DEC_DATALEN = 15, /* length of ADD+RUN data */
431 DEC_INSTLEN = 16, /* length of instruction data */
432 DEC_ADDRLEN = 17, /* length of address data */
433
434 DEC_CKSUM = 18, /* window checksum */
435
436 DEC_DATA = 19, /* data section */
437 DEC_INST = 20, /* instruction section */
438 DEC_ADDR = 21, /* address section */
439
440 DEC_EMIT = 22, /* producing data */
441
442 DEC_FINISH = 23, /* window finished */
443
444 DEC_ABORTED = 24, /* xd3_abort_stream */
445} xd3_decode_state;
446
447/* An application never sees these internal codes: */
448typedef enum {
449 XD3_NOSECOND = -17708, /* when secondary compression finds no improvement. */
450} xd3_pvalues;
451
452/******************************************************************************************
453 internal types
454 ******************************************************************************************/
455
456/* instruction lists used in the IOPT buffer */
457struct _xd3_rlist
458{
459 xd3_rlist *next;
460 xd3_rlist *prev;
461};
462
463/* the raw encoding of an instruction used in the IOPT buffer */
464struct _xd3_rinst
465{
466 uint8_t type;
467 uint8_t xtra;
468 uint8_t code1;
469 uint8_t code2;
470 usize_t pos;
471 usize_t size;
472 xoff_t addr;
473 xd3_rlist link;
474};
475
476/* the code-table form of an single- or double-instruction */
477struct _xd3_dinst
478{
479 uint8_t type1;
480 uint8_t size1;
481 uint8_t type2;
482 uint8_t size2;
483};
484
485/* the decoded form of a single (half) instruction. */
486struct _xd3_hinst
487{
488 uint8_t type;
489 uint32_t size;
490 uint32_t addr;
491};
492
493/* used by the encoder to buffer output in sections. list of blocks. */
494struct _xd3_output
495{
496 uint8_t *base;
497 usize_t next;
498 usize_t avail;
499 xd3_output *next_page;
500};
501
502/* the VCDIFF address cache, see the RFC */
503struct _xd3_addr_cache
504{
505 uint s_near;
506 uint s_same;
507 usize_t next_slot; /* the circular index for near */
508 usize_t *near_array; /* array of size s_near */
509 usize_t *same_array; /* array of size s_same*256 */
510};
511
512/* the IOPT buffer list is just a list of buffers, which may be allocated
513 * during encode when using an unlimited buffer. */
514struct _xd3_iopt_buflist
515{
516 xd3_rinst *buffer;
517 xd3_iopt_buflist *next;
518};
519
520/* This is the record of a pre-compiled configuration, a subset of xd3_config. */
521struct _xd3_smatcher
522{
523 const char *name;
524 int (*string_match) (xd3_stream *stream);
525 uint large_look;
526 uint large_step;
527 uint small_look;
528 uint small_chain;
529 uint small_lchain;
530 uint max_lazy;
531 uint long_enough;
532};
533
534/* hash table size & power-of-two hash function. */
535struct _xd3_hash_cfg
536{
537 usize_t size;
538 usize_t shift;
539 usize_t mask;
540};
541
542/* the sprev list */
543struct _xd3_slist
544{
545 usize_t last_pos;
546};
547
548/* a decoder section (data, inst, or addr). there is an optimization to avoid copying
549 * these sections if all the input is available, related to the copied field below.
550 * secondation compression uses the copied2 field. */
551struct _xd3_desect
552{
553 const uint8_t *buf;
554 const uint8_t *buf_max;
555 uint32_t size;
556 usize_t pos;
557 uint8_t *copied1;
558 usize_t alloc1;
559 uint8_t *copied2;
560 usize_t alloc2;
561};
562
563/******************************************************************************************
564 public types
565 ******************************************************************************************/
566
567/* Settings for the secondary compressor. */
568struct _xd3_sec_cfg
569{
570 int data_type; /* Which section. (set automatically) */
571 int ngroups; /* Number of DJW Huffman groups. */
572 int sector_size; /* Sector size. */
573 int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */
574};
575
576/* This is the user-visible stream configuration. */
577struct _xd3_config
578{
579 usize_t winsize; /* The encoder window size. */
580 usize_t sprevsz; /* How far back small string matching goes */
581 usize_t iopt_size; /* entries in the instruction-optimizing buffer */
582 usize_t srcwin_maxsz; /* srcwin_size grows by a factor of 2 when no matches are found */
583
584 xd3_getblk_func *getblk; /* The three callbacks. */
585 xd3_alloc_func *alloc;
586 xd3_free_func *freef;
587 void *opaque; /* Not used. */
588 int flags; /* stream->flags are initialized from xd3_config &
589 * never modified by the library. Use xd3_set_flags
590 * to modify flags settings mid-stream. */
591
592 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
593 xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */
594 xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */
595
596 xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for soft config */
597 xd3_smatcher smatcher_soft;
598};
599
600/* The primary source file object. You create one of these objects and initialize the
601 * first four fields. This library maintains the next 5 fields. The configured getblk
602 * implementation is responsible for setting the final 3 fields when called (and/or when
603 * XD3_GETSRCBLK is returned).
604 */
605struct _xd3_source
606{
607 /* you set */
608 xoff_t size; /* size of this source */
609 usize_t blksize; /* block size */
610 const char *name; /* its name, for debug/print purposes */
611 void *ioh; /* opaque handle */
612
613 /* getblk sets */
614 xoff_t curblkno; /* current block number: client sets after getblk request */
615 usize_t onblk; /* number of bytes on current block: client sets, xd3 verifies */
616 const uint8_t *curblk; /* current block array: client sets after getblk request */
617
618 /* xd3 sets */
619 usize_t srclen; /* length of this source window */
620 xoff_t srcbase; /* offset of this source window in the source itself */
621 xoff_t blocks; /* the total number of blocks in this source */
622 usize_t cpyoff_blocks; /* offset of copy window in blocks */
623 usize_t cpyoff_blkoff; /* offset of copy window in blocks, remainder */
624 xoff_t getblkno; /* request block number: xd3 sets current getblk request */
625};
626
627/* The primary xd3_stream object, used for encoding and decoding. You may access only two
628 * fields: avail_out, next_out. Use the methods above to operate on xd3_stream. */
629struct _xd3_stream
630{
631 /* input state */
632 const uint8_t *next_in; /* next input byte */
633 usize_t avail_in; /* number of bytes available at next_in */
634 xoff_t total_in; /* how many bytes in */
635
636 /* output state */
637 uint8_t *next_out; /* next output byte */
638 usize_t avail_out; /* number of bytes available at next_out */
639 usize_t space_out; /* total out space */
640 xoff_t current_window; /* number of windows encoded/decoded */
641 xoff_t total_out; /* how many bytes out */
642
643 /* to indicate an error, xd3 sets */
644 const char *msg; /* last error message, NULL if no error */
645
646 /* source configuration */
647 xd3_source *src; /* source array */
648
649 /* encoder memory configuration */
650 usize_t winsize; /* suggested window size */
651 usize_t sprevsz; /* small string, previous window size (power of 2) */
652 usize_t sprevmask; /* small string, previous window size mask */
653 uint iopt_size;
654 uint iopt_unlimited;
655 uint srcwin_maxsz;
656
657 /* general configuration */
658 xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */
659 xd3_alloc_func *alloc; /* malloc function */
660 xd3_free_func *free; /* free function */
661 void* opaque; /* private data object passed to alloc, free, and getblk */
662 int flags; /* various options */
663
664 /* secondary compressor configuration */
665 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
666 xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */
667 xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */
668
669 xd3_smatcher smatcher;
670
671 usize_t *large_table; /* table of large checksums */
672 xd3_hash_cfg large_hash; /* large hash config */
673
674 usize_t *small_table; /* table of small checksums */
675 xd3_slist *small_prev; /* table of previous offsets, circular linked list */
676 int small_reset; /* true if small table should be reset */
677
678 xd3_hash_cfg small_hash; /* small hash config */
679 xd3_addr_cache acache; /* the vcdiff address cache */
680 xd3_encode_state enc_state; /* state of the encoder */
681
682 usize_t taroff; /* base offset of the target input */
683 usize_t input_position; /* current input position */
684 usize_t min_match; /* current minimum match length, avoids redundent matches */
685 usize_t unencoded_offset; /* current input, first unencoded offset. this value is <= the first
686 * instruction's position in the iopt buffer, if there is at least one
687 * match in the buffer. */
688
689 // SRCWIN
690 // these variables plus srcwin_maxsz above (set by config)
691 int srcwin_decided; /* boolean: true if the srclen,srcbase have been decided. */
692 xoff_t srcwin_cksum_pos; /* Source checksum position */
693
694 // MATCH
695 xd3_match_state match_state; /* encoder match state */
696 xoff_t match_srcpos; /* current match source position relative to srcbase */
697 xoff_t match_minaddr; /* smallest matching address to set window params
698 * (reset each window xd3_encode_reset) */
699 xoff_t match_maxaddr; /* largest matching address to set window params
700 * (reset each window xd3_encode_reset) */
701 usize_t match_back; /* match extends back so far */
702 usize_t match_maxback; /* match extends back maximum */
703 usize_t match_fwd; /* match extends forward so far */
704 usize_t match_maxfwd; /* match extends forward maximum */
705
706 xoff_t maxsrcaddr; /* address of the last source match (across windows) */
707
708 uint8_t *buf_in; /* for saving buffered input */
709 usize_t buf_avail; /* amount of saved input */
710 const uint8_t *buf_leftover; /* leftover content of next_in (i.e., user's buffer) */
711 usize_t buf_leftavail; /* amount of leftover content */
712
713 xd3_output *enc_current; /* current output buffer */
714 xd3_output *enc_free; /* free output buffers */
715 xd3_output *enc_heads[4]; /* array of encoded outputs: head of chain */
716 xd3_output *enc_tails[4]; /* array of encoded outputs: tail of chain */
717
718 xd3_rlist iopt_used; /* instruction optimizing buffer */
719 xd3_rlist iopt_free;
720 xd3_rinst *iout; /* next single instruction */
721 xd3_iopt_buflist *iopt_alloc;
722
723 const uint8_t *enc_appheader; /* application header to encode */
724 usize_t enc_appheadsz; /* application header size */
725
726 /* decoder stuff */
727 xd3_decode_state dec_state; /* current DEC_XXX value */
728 uint dec_hdr_ind; /* VCDIFF header indicator */
729 uint dec_win_ind; /* VCDIFF window indicator */
730 uint dec_del_ind; /* VCDIFF delta indicator */
731
732 uint8_t dec_magic[4]; /* First four bytes */
733 usize_t dec_magicbytes; /* Magic position. */
734
735 uint dec_secondid; /* Optional secondary compressor ID. */
736
737 uint32_t dec_codetblsz; /* Optional code table: length. */
738 uint8_t *dec_codetbl; /* Optional code table: storage. */
739 usize_t dec_codetblbytes; /* Optional code table: position. */
740
741 uint32_t dec_appheadsz; /* Optional application header: size. */
742 uint8_t *dec_appheader; /* Optional application header: storage */
743 usize_t dec_appheadbytes; /* Optional application header: position. */
744
745 usize_t dec_cksumbytes; /* Optional checksum: position. */
746 uint8_t dec_cksum[4]; /* Optional checksum: storage. */
747 uint32_t dec_adler32; /* Optional checksum: value. */
748
749 uint32_t dec_cpylen; /* length of copy window (VCD_SOURCE or VCD_TARGET) */
750 xoff_t dec_cpyoff; /* offset of copy window (VCD_SOURCE or VCD_TARGET) */
751 uint32_t dec_enclen; /* length of delta encoding */
752 uint32_t dec_tgtlen; /* length of target window */
753
754#if USE_UINT64
755 uint64_t dec_64part; /* part of a decoded uint64_t */
756#endif
757#if USE_UINT32
758 uint32_t dec_32part; /* part of a decoded uint32_t */
759#endif
760
761 xoff_t dec_winstart; /* offset of the start of current target window */
762 xoff_t dec_window_count; /* == current_window + 1 in DEC_FINISH */
763 usize_t dec_winbytes; /* bytes of the three sections so far consumed */
764 usize_t dec_hdrsize; /* VCDIFF + app header size */
765
766 const uint8_t *dec_tgtaddrbase; /* Base of decoded target addresses (addr >= dec_cpylen). */
767 const uint8_t *dec_cpyaddrbase; /* Base of decoded copy addresses (addr < dec_cpylen). */
768
769 usize_t dec_position; /* current decoder position counting the cpylen offset */
770 usize_t dec_maxpos; /* maximum decoder position counting the cpylen offset */
771 xd3_hinst dec_current1; /* current instruction */
772 xd3_hinst dec_current2; /* current instruction */
773
774 uint8_t *dec_buffer; /* Decode buffer */
775 uint8_t *dec_lastwin; /* In case of VCD_TARGET, the last target window. */
776 usize_t dec_lastlen; /* length of the last target window */
777 xoff_t dec_laststart; /* offset of the start of last target window */
778 usize_t dec_lastspace; /* allocated space of last target window, for reuse */
779
780 xd3_desect inst_sect; /* staging area for decoding window sections */
781 xd3_desect addr_sect;
782 xd3_desect data_sect;
783
784 xd3_code_table_func *code_table_func;
785 xd3_comp_table_func *comp_table_func;
786 const xd3_dinst *code_table;
787 const xd3_code_table_desc *code_table_desc;
788 xd3_dinst *code_table_alloc;
789
790 /* secondary compression */
791 const xd3_sec_type *sec_type;
792 xd3_sec_stream *sec_stream_d;
793 xd3_sec_stream *sec_stream_i;
794 xd3_sec_stream *sec_stream_a;
795
796 /* statistics */
797 xoff_t n_scpy;
798 xoff_t n_tcpy;
799 xoff_t n_add;
800 xoff_t n_run;
801
802 xoff_t l_scpy;
803 xoff_t l_tcpy;
804 xoff_t l_add;
805 xoff_t l_run;
806
807 usize_t i_slots_used;
808
809#if XD3_DEBUG
810 usize_t large_ckcnt;
811
812 /* memory usage */
813 usize_t alloc_cnt;
814 usize_t free_cnt;
815
816 xoff_t n_emit;
817#endif
818};
819
820/******************************************************************************************
821 PUBLIC FUNCTIONS
822 ******************************************************************************************/
823
824/* This function configures an xd3_stream using the provided in-memory input buffer,
825 * source buffer, output buffer, and flags. The output array must be large enough or else
826 * ENOSPC will be returned. This is the simplest in-memory encoding interface. */
827int xd3_encode_memory (const uint8_t *input,
828 usize_t input_size,
829 const uint8_t *source,
830 usize_t source_size,
831 uint8_t *output_buffer,
832 usize_t *output_size,
833 usize_t avail_output,
834 int flags);
835
836/* The reverse of xd3_encode_memory. */
837int xd3_decode_memory (const uint8_t *input,
838 usize_t input_size,
839 const uint8_t *source,
840 usize_t source_size,
841 uint8_t *output_buf,
842 usize_t *output_size,
843 usize_t avail_output,
844 int flags);
845
846/* This function encodes an in-memory input. Everything else about the xd3_stream is
847 * configurable. The output array must be large enough to hold the output or else ENOSPC
848 * is returned. The source (if any) should be set using xd3_set_source() with a
849 * single-block xd3_source. This calls the underlying non-blocking interface,
850 * xd3_encode_input(), handling the necessary input/output states. This method be
851 * considered a reference for any application using xd3_encode_input() directly.
852 *
853 * xd3_stream stream;
854 * xd3_config config;
855 * xd3_source src;
856 *
857 * memset (& src, 0, sizeof (src));
858 * memset (& stream, 0, sizeof (stream));
859 * memset (& config, 0, sizeof (config));
860 *
861 * if (source != NULL)
862 * {
863 * src.size = source_size;
864 * src.blksize = source_size;
865 * src.curblkno = 0;
866 * src.onblk = source_size;
867 * src.curblk = source;
868 * xd3_set_source(&stream, &src);
869 * }
870 *
871 * config.flags = flags;
872 * config.srcwin_maxsz = source_size;
873 * config.winsize = input_size;
874 *
875 * ... set smatcher, appheader, encoding-table, compression-level, etc.
876 *
877 * xd3_config_stream(&stream, &config);
878 * xd3_encode_stream(&stream, ...);
879 * xd3_free_stream(&stream);
880 *
881 * DO NOT USE except for testing. These methods are allocate bad buffer sizes.
882 */
883int xd3_encode_stream (xd3_stream *stream,
884 const uint8_t *input,
885 usize_t input_size,
886 uint8_t *output,
887 usize_t *output_size,
888 usize_t avail_output);
889
890/* The reverse of xd3_encode_stream. */
891int xd3_decode_stream (xd3_stream *stream,
892 const uint8_t *input,
893 usize_t input_size,
894 uint8_t *output,
895 usize_t *output_size,
896 usize_t avail_size);
897
898/* This is the non-blocking interface.
899 *
900 * Handling input and output states is the same for encoding or decoding using the
901 * xd3_avail_input() and xd3_consume_output() routines, inlined below.
902 *
903 * Return values:
904 *
905 * XD3_INPUT: the process requires more input: call xd3_avail_input() then repeat
906 * XD3_OUTPUT: the process has more output: read stream->next_out, stream->avail_out,
907 * then call xd3_consume_output(), then repeat
908 * XD3_GOTHEADER: (decoder-only) notification returned following the VCDIFF header and
909 * first window header. the decoder may use the header to configure itself.
910 * XD3_WINSTART: a general notification returned once for each window except the 0-th
911 * window, which is implied by XD3_GOTHEADER. It is recommended to
912 * use a switch-stmt such as:
913 * ...
914 * again:
915 * switch ((ret = xd3_decode_input (stream))) {
916 * case XD3_GOTHEADER: {
917 * assert(stream->current_window == 0);
918 * stuff;
919 * }
920 * // fallthrough
921 * case XD3_WINSTART: {
922 * something(stream->current_window);
923 * goto again;
924 * }
925 * ...
926 * XD3_WINFINISH: a general notification, following the complete input & output of a
927 * window. at this point, stream->total_in and stream->total_out are
928 * consistent for either encoding or decoding.
929 * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value is returned to
930 * initiate a non-blocking source read.
931 */
932int xd3_decode_input (xd3_stream *stream);
933int xd3_encode_input (xd3_stream *stream);
934
935/* The xd3_config structure is used to initialize a stream - all data is copied into
936 * stream so config may be a temporary variable. See the [documentation] or comments on
937 * the xd3_config structure. */
938int xd3_config_stream (xd3_stream *stream,
939 xd3_config *config);
940
941/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an error check that the
942 * stream is in a proper state to be closed: this means the encoder is flushed and the
943 * decoder is at a window boundary. The application is responsible for freeing any of the
944 * resources it supplied. */
945int xd3_close_stream (xd3_stream *stream);
946
947/* This unconditionally closes/frees the stream, future close() will succeed. */
948void xd3_abort_stream (xd3_stream *stream);
949
950/* xd3_free_stream frees all memory allocated for the stream. The application is
951 * responsible for freeing any of the resources it supplied. */
952void xd3_free_stream (xd3_stream *stream);
953
954/* This function informs the encoder or decoder that source matching (i.e.,
955 * delta-compression) is possible. For encoding, this should be called before the first
956 * xd3_encode_input. A NULL source is ignored. For decoding, this should be called
957 * before the first window is decoded, but the appheader may be read first
958 * (XD3_GOTHEADER). At this point, consult xd3_decoder_needs_source(), inlined below, to
959 * determine if a source is expected by the decoder. */
960int xd3_set_source (xd3_stream *stream,
961 xd3_source *source);
962
963/* This should be called before the first call to xd3_encode_input() to include
964 * application-specific data in the VCDIFF header. */
965void xd3_set_appheader (xd3_stream *stream,
966 const uint8_t *data,
967 usize_t size);
968
969/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. For convenience,
970 * the decoder always adds a single byte padding to the end of the application header,
971 * which is set to zero in case the application header is a string. */
972int xd3_get_appheader (xd3_stream *stream,
973 uint8_t **data,
974 usize_t *size);
975
976/* After receiving XD3_GOTHEADER, the decoder should check this function which returns 1
977 * if the decoder will require source data. */
978int xd3_decoder_needs_source (xd3_stream *stream);
979
980/* Gives an error string for xdelta3-speficic errors, returns NULL for system errors */
981const char* xd3_strerror (int ret);
982
983/* For convenience, zero & initialize the xd3_config structure with specified flags. */
984static inline
985void xd3_init_config (xd3_config *config,
986 int flags)
987{
988 memset (config, 0, sizeof (*config));
989 config->flags = flags;
990}
991
992/* This supplies some input to the stream. */
993static inline
994void xd3_avail_input (xd3_stream *stream,
995 const uint8_t *idata,
996 usize_t isize)
997{
998 /* Even if isize is zero, the code expects a non-NULL idata. Why? It uses this value
999 * to determine whether xd3_avail_input has ever been called. If xd3_encode_input is
1000 * called before xd3_avail_input it will return XD3_INPUT right away without allocating
1001 * a stream->winsize buffer. This is to avoid an unwanted allocation. */
1002 XD3_ASSERT (idata != NULL);
1003
1004 stream->next_in = idata;
1005 stream->avail_in = isize;
1006}
1007
1008/* This acknowledges receipt of output data, must be called after any XD3_OUTPUT
1009 * return. */
1010static inline
1011void xd3_consume_output (xd3_stream *stream)
1012{
1013 stream->avail_out = 0;
1014}
1015
1016/* These are set for each XD3_WINFINISH return. */
1017static inline
1018int xd3_encoder_used_source (xd3_stream *stream) { return stream->src != NULL && stream->src->srclen > 0; }
1019static inline
1020xoff_t xd3_encoder_srcbase (xd3_stream *stream) { return stream->src->srcbase; }
1021static inline
1022usize_t xd3_encoder_srclen (xd3_stream *stream) { return stream->src->srclen; }
1023
1024/* Checks for legal flag changes. */
1025static inline
1026void xd3_set_flags (xd3_stream *stream, int flags)
1027{
1028 /* The bitwise difference should contain only XD3_FLUSH or XD3_SKIP_WINDOW */
1029 XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0);
1030 stream->flags = flags;
1031}
1032
1033/* Gives some extra information about the latest library error, if any is known. */
1034static inline
1035const char* xd3_errstring (xd3_stream *stream)
1036{
1037 return stream->msg ? stream->msg : "";
1038}
1039
1040/* This function tells the number of bytes expected to be set in source->onblk after a
1041 * getblk request. This is for convenience of handling a partial last block. */
1042static inline
1043usize_t xd3_bytes_on_srcblk (xd3_source *source, xoff_t blkno)
1044{
1045 XD3_ASSERT (blkno < source->blocks);
1046
1047 if (blkno != source->blocks - 1)
1048 {
1049 return source->blksize;
1050 }
1051
1052 return (usize_t)((source->size - 1) % source->blksize) + 1;
1053}
1054
1055#endif /* _XDELTA3_H_ */
Note: See TracBrowser for help on using the repository browser.