source: nikanabo/current/xdelta/diy/xdelta3.c @ 185

Last change on this file since 185 was 185, checked in by geyser, 14 years ago
File size: 149.4 KB
Line 
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006.  Joshua P. MacDonald
3 *
4 *  This program is free software; you can redistribute it and/or modify
5 *  it under the terms of the GNU General Public License as published by
6 *  the Free Software Foundation; either version 2 of the License, or
7 *  (at your option) any later version.
8 *
9 *  This program is distributed in the hope that it will be useful,
10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 *  GNU General Public License for more details.
13 *
14 *  You should have received a copy of the GNU General Public License
15 *  along with this program; if not, write to the Free Software
16 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18   -------------------------------------------------------------------
19
20                               Xdelta 3
21
22   The goal of this library is to to implement both the (stand-alone)
23   data-compression and delta-compression aspects of VCDIFF encoding, and
24   to support a programming interface that works like Zlib
25   (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic
26   Differencing and Compression Data Format.
27
28   VCDIFF is a unified encoding that combines data-compression and
29   delta-encoding ("differencing").
30
31   VCDIFF has a detailed byte-code instruction set with many features.
32   The instruction format supports an immediate size operand for small
33   COPYs and ADDs (e.g., under 18 bytes).  There are also instruction
34   "modes", which are used to compress COPY addresses by using two
35   address caches.  An instruction mode refers to slots in the NEAR
36   and SAME caches for recent addresses.  NEAR remembers the
37   previous 4 (by default) COPY addresses, and SAME catches
38   frequent re-uses of the same address using a 3-way (by default)
39   256-entry associative cache of [ADDR mod 256], the encoded byte.
40   A hit in the NEAR/SAME cache requires 0/1 ADDR bytes.
41
42   VCDIFF has a default instruction table, but an alternate
43   instruction tables may themselves be be delta-compressed and
44   included in the encoding header.  This allows even more freedom.
45   There are 9 instruction modes in the default code table, 4 near, 3
46   same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the
47   current position).
48
49   ----------------------------------------------------------------------
50
51                              Algorithms
52
53   Aside from the details of encoding and decoding, there are a bunch
54   of algorithms needed.
55
56   1. STRING-MATCH.  A two-level fingerprinting approach is used.  A
57   single loop computes the two checksums -- small and large -- at
58   successive offsets in the TARGET file.  The large checksum is more
59   accurate and is used to discover SOURCE matches, which are
60   potentially very long.  The small checksum is used to discover
61   copies within the TARGET.  Small matching, which is more expensive,
62   usually dominates the large STRING-MATCH costs in this code - the
63   more exhaustive the search, the better the results.  Either of the
64   two string-matching mechanisms may be disabled.
65
66   2. INSTRUCTION SELECTION.  The IOPT buffer here represents a queue
67   used to store overlapping copy instructions.  There are two possible
68   optimizations that go beyond a greedy search.  Both of these fall
69   into the category of "non-greedy matching" optimizations.
70
71   The first optimization stems from backward SOURCE-COPY matching.
72   When a new SOURCE-COPY instruction covers a previous instruction in
73   the target completely, it is erased from the queue.  Randal Burns
74   originally analyzed these algorithms and did a lot of related work
75   (\cite the 1.5-pass algorithm).
76
77   The second optimization comes by the encoding of common very-small
78   COPY and ADD instructions, for which there are special DOUBLE-code
79   instructions, which code two instructions in a single byte.
80
81   The cost of bad instruction-selection overhead is relatively high
82   for data-compression, relative to delta-compression, so this second
83   optimization is fairly important.  With "lazy" matching (the name
84   used in Zlib for a similar optimization), the string-match
85   algorithm searches after a match for potential overlapping copy
86   instructions.  In Xdelta and by default, VCDIFF, the minimum match
87   size is 4 bytes, whereas Zlib searches with a 3-byte minimum.  This
88   feature, combined with double instructions, provides a nice
89   challenge.  Search in this file for "black magic", a heuristic.
90
91   3. STREAM ALIGNMENT.  Stream alignment is needed to compress large
92   inputs in constant space.  See xd3_srcwin_move_point().
93
94   4. WINDOW SELECTION.  When the IOPT buffer flushes, in the first call
95   to xd3_iopt_finish_encoding containing any kind of copy instruction,
96   the parameters of the source window must be decided: the offset into
97   the source and the length of the window.  Since the IOPT buffer is
98   finite, the program may be forced to fix these values before knowing
99   the best offset/length.
100
101   5. SECONDARY COMPRESSION.  VCDIFF supports a secondary encoding to
102   be applied to the individual sections of the data format, which are
103   ADDRess, INSTruction, and DATA.  Several secondary compressor
104   variations are implemented here, although none is standardized yet.
105
106   One is an adaptive huffman algorithm -- the FGK algorithm (Faller,
107   Gallager, and Knuth, 1985).  This compressor is extremely slow.
108
109   The other is a simple static Huffman routine, which is the base
110   case of a semi-adaptive scheme published by D.J. Wheeler and first
111   widely used in bzip2 (by Julian Seward).  This is a very
112   interesting algorithm, originally published in nearly cryptic form
113   by D.J. Wheeler. !!!NOTE!!! Because these are not standardized,
114   secondary compression remains off by default.
115   ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps}
116   --------------------------------------------------------------------
117
118                            Other Features
119
120   1. USER CONVENIENCE
121
122   For user convenience, it is essential to recognize Gzip-compressed
123   files and automatically Gzip-decompress them prior to
124   delta-compression (or else no delta-compression will be achieved
125   unless the user manually decompresses the inputs).  The compressed
126   represention competes with Xdelta, and this must be hidden from the
127   command-line user interface.  The Xdelta-1.x encoding was simple, not
128   compressed itself, so Xdelta-1.x uses Zlib internally to compress the
129   representation.
130
131   This implementation supports external compression, which implements
132   the necessary fork() and pipe() mechanics.  There is a tricky step
133   involved to support automatic detection of a compressed input in a
134   non-seekable input.  First you read a bit of the input to detect
135   magic headers.  When a compressed format is recognized, exec() the
136   external compression program and create a second child process to
137   copy the original input stream. [Footnote: There is a difficulty
138   related to using Gzip externally. It is not possible to decompress
139   and recompress a Gzip file transparently.  If FILE.GZ had a
140   cryptographic signature, then, after: (1) Gzip-decompression, (2)
141   Xdelta-encoding, (3) Gzip-compression the signature could be
142   broken.  The only way to solve this problem is to guess at Gzip's
143   compression level or control it by other means.  I recommend that
144   specific implementations of any compression scheme store
145   information needed to exactly re-compress the input, that way
146   external compression is transparent - however, this won't happen
147   here until it has stabilized.]
148
149   2. APPLICATION-HEADER
150
151   This feature was introduced in RFC3284.  It allows any application
152   to include a header within the VCDIFF file format.  This allows
153   general inter-application data exchange with support for
154   application-specific extensions to communicate metadata.
155
156   3. VCDIFF CHECKSUM
157
158   An optional checksum value is included with each window, which can
159   be used to validate the final result.  This verifies the correct source
160   file was used for decompression as well as the obvious advantage:
161   checking the implementation (and underlying) correctness.
162
163   4. LIGHT WEIGHT
164
165   The code makes efforts to avoid copying data more than necessary.
166   The code delays many initialization tasks until the first use, it
167   optimizes for identical (perfectly matching) inputs.  It does not
168   compute any checksums until the first lookup misses.  Memory usage
169   is reduced.  String-matching is templatized (by slightly gross use
170   of CPP) to hard-code alternative compile-time defaults.  The code
171   has few outside dependencies.
172   ----------------------------------------------------------------------
173
174                The default rfc3284 instruction table:
175                    (see RFC for the explanation)
176
177           TYPE      SIZE     MODE    TYPE     SIZE     MODE     INDEX
178   --------------------------------------------------------------------
179       1.  Run         0        0     Noop       0        0        0
180       2.  Add    0, [1,17]     0     Noop       0        0      [1,18]
181       3.  Copy   0, [4,18]     0     Noop       0        0     [19,34]
182       4.  Copy   0, [4,18]     1     Noop       0        0     [35,50]
183       5.  Copy   0, [4,18]     2     Noop       0        0     [51,66]
184       6.  Copy   0, [4,18]     3     Noop       0        0     [67,82]
185       7.  Copy   0, [4,18]     4     Noop       0        0     [83,98]
186       8.  Copy   0, [4,18]     5     Noop       0        0     [99,114]
187       9.  Copy   0, [4,18]     6     Noop       0        0    [115,130]
188      10.  Copy   0, [4,18]     7     Noop       0        0    [131,146]
189      11.  Copy   0, [4,18]     8     Noop       0        0    [147,162]
190      12.  Add       [1,4]      0     Copy     [4,6]      0    [163,174]
191      13.  Add       [1,4]      0     Copy     [4,6]      1    [175,186]
192      14.  Add       [1,4]      0     Copy     [4,6]      2    [187,198]
193      15.  Add       [1,4]      0     Copy     [4,6]      3    [199,210]
194      16.  Add       [1,4]      0     Copy     [4,6]      4    [211,222]
195      17.  Add       [1,4]      0     Copy     [4,6]      5    [223,234]
196      18.  Add       [1,4]      0     Copy       4        6    [235,238]
197      19.  Add       [1,4]      0     Copy       4        7    [239,242]
198      20.  Add       [1,4]      0     Copy       4        8    [243,246]
199      21.  Copy        4      [0,8]   Add        1        0    [247,255]
200   --------------------------------------------------------------------
201
202                     Reading the source: Overview
203
204   This file includes itself in several passes to macro-expand certain
205   sections with variable forms.  Just read ahead, there's only a
206   little confusion.  I know this sounds ugly, but hard-coding some of
207   the string-matching parameters results in a 10-15% increase in
208   string-match performance.  The only time this hurts is when you have
209   unbalanced #if/endifs.
210
211   A single compilation unit tames the Makefile.  In short, this is to
212   allow the above-described hack without an explodingMakefile.  The
213   single compilation unit includes the core library features,
214   configurable string-match templates, optional main() command-line
215   tool, misc optional features, and a regression test.  Features are
216   controled with CPP #defines, see Makefile.am.
217
218   The initial __XDELTA3_C_HEADER_PASS__ starts first, the INLINE and
219   TEMPLATE sections follow.  Easy stuff first, hard stuff last.
220
221   Optional features include:
222
223     xdelta3-main.h     The command-line interface, external compression
224                        support, POSIX-specific, info & VCDIFF-debug tools.
225     xdelta3-second.h   The common secondary compression routines.
226     xdelta3-decoder.h  All decoding routines.
227     xdelta3-djw.h      The semi-adaptive huffman secondary encoder.
228     xdelta3-fgk.h      The adaptive huffman secondary encoder.
229     xdelta3-test.h     The unit test covers major algorithms,
230                        encoding and decoding.  There are single-bit
231                        error decoding tests.  There are 32/64-bit file size
232                        boundary tests.  There are command-line tests.
233                        There are compression tests.  There are external
234                        compression tests.  There are string-matching tests.
235                        There should be more tests...
236
237   Additional headers include:
238
239     xdelta3.h          The public header file.
240     xdelta3-cfgs.h     The default settings for default, built-in
241                        encoders.  These are hard-coded at
242                        compile-time.  There is also a single
243                        soft-coded string matcher for experimenting
244                        with arbitrary values.
245     xdelta3-list.h     A cyclic list template
246
247   Misc little debug utilities:
248
249     badcopy.c          Randomly modifies an input file based on two
250                        parameters: (1) the probability that a byte in
251                        the file is replaced with a pseudo-random value,
252                        and (2) the mean change size.  Changes are
253                        generated using an expoential distribution
254                        which approximates the expected error_prob
255                        distribution.
256   --------------------------------------------------------------------
257
258   This file itself is unusually large.  I hope to defend this layout
259   with lots of comments.  Everything in this file is related to
260   encoding and decoding.  I like it all together - the template stuff
261   is just a hack. */
262
263#ifndef __XDELTA3_C_HEADER_PASS__
264#define __XDELTA3_C_HEADER_PASS__
265
266#include <errno.h>
267#include <string.h>
268
269#include "xdelta3.h"
270
271/******************************************************************************************
272 STATIC CONFIGURATION
273 ******************************************************************************************/
274
275#ifndef XD3_MAIN                  /* the main application */
276#define XD3_MAIN 0
277#endif
278
279#ifndef VCDIFF_TOOLS
280#define VCDIFF_TOOLS XD3_MAIN
281#endif
282
283#ifndef SECONDARY_FGK             /* one from the algorithm preservation department: */
284#define SECONDARY_FGK 0           /* adaptive Huffman routines */
285#endif
286
287#ifndef SECONDARY_DJW             /* semi-adaptive/static Huffman for the eventual */
288#define SECONDARY_DJW 0           /* standardization, off by default until such time. */
289#endif
290
291#ifndef GENERIC_ENCODE_TABLES    /* These three are the RFC-spec'd app-specific */
292#define GENERIC_ENCODE_TABLES 0  /* code features.  This is tested but not recommended */
293#endif                           /* unless there's a real application. */
294#ifndef GENERIC_ENCODE_TABLES_COMPUTE
295#define GENERIC_ENCODE_TABLES_COMPUTE 0
296#endif
297#ifndef GENERIC_ENCODE_TABLES_COMPUTE_PRINT
298#define GENERIC_ENCODE_TABLES_COMPUTE_PRINT 0
299#endif
300
301#if XD3_USE_LARGEFILE64          /* How does everyone else do this? */
302#ifndef WIN32
303#define Q "q"
304#else
305#define Q "I64"
306#endif
307#else
308#define Q
309#endif
310
311#if XD3_ENCODER
312#define IF_ENCODER(x) x
313#else
314#define IF_ENCODER(x)
315#endif
316
317/******************************************************************************************/
318
319typedef enum {
320
321  /* header indicator bits */
322  VCD_SECONDARY  = (1 << 0),  /* uses secondary compressor */
323  VCD_CODETABLE  = (1 << 1),  /* supplies code table data */
324  VCD_APPHEADER  = (1 << 2),  /* supplies application data */
325  VCD_INVHDR     = ~7U,
326
327  /* window indicator bits */
328  VCD_SOURCE     = (1 << 0),  /* copy window in source file */
329  VCD_TARGET     = (1 << 1),  /* copy window in target file */
330  VCD_ADLER32    = (1 << 2),  /* has adler32 checksum */
331  VCD_INVWIN     = ~7U,
332
333  VCD_SRCORTGT   = VCD_SOURCE | VCD_TARGET,
334
335  /* delta indicator bits */
336  VCD_DATACOMP   = (1 << 0),
337  VCD_INSTCOMP   = (1 << 1),
338  VCD_ADDRCOMP   = (1 << 2),
339  VCD_INVDEL     = ~0x7U,
340
341} xd3_indicator;
342
343typedef enum {
344  VCD_DJW_ID    = 1,
345  VCD_FGK_ID    = 16, /* !!!Note: these are not a standard IANA-allocated ID!!! */
346} xd3_secondary_ids;
347
348typedef enum {
349  SEC_NOFLAGS     = 0,
350  SEC_COUNT_FREQS = (1 << 0), /* OPT: Not implemented: Could eliminate first pass of Huffman... */
351} xd3_secondary_flags;
352
353typedef enum {
354  DATA_SECTION, /* These indicate which section to the secondary compressor. */
355  INST_SECTION, /* The header section is not compressed, therefore not listed here. */
356  ADDR_SECTION,
357} xd3_section_type;
358
359typedef enum
360{
361  XD3_NOOP = 0,
362  XD3_ADD  = 1,
363  XD3_RUN  = 2,
364  XD3_CPY  = 3, /* XD3_CPY rtypes are represented as (XD3_CPY + copy-mode value) */
365} xd3_rtype;
366
367/******************************************************************************************/
368
369#include "xdelta3-list.h"
370
371XD3_MAKELIST(xd3_rlist, xd3_rinst, link);
372
373/******************************************************************************************/
374
375#ifndef unlikely              /* The unlikely macro - any good? */
376#if defined(__GNUC__) && __GNUC__ >= 3
377#define unlikely(x) __builtin_expect((x),0)
378#define likely(x)   __builtin_expect((x),1)
379#else
380#define unlikely(x) (x)
381#define likely(x)   (x)
382#endif
383#endif
384
385#define SECONDARY_MIN_SAVINGS 2  /* Secondary compression has to save at least this many bytes. */
386#define SECONDARY_MIN_INPUT   10 /* Secondary compression needs at least this many bytes. */
387
388#define VCDIFF_MAGIC1  0xd6  /* 1st file byte */
389#define VCDIFF_MAGIC2  0xc3  /* 2nd file byte */
390#define VCDIFF_MAGIC3  0xc4  /* 3rd file byte */
391#define VCDIFF_VERSION 0x00  /* 4th file byte */
392
393#define VCD_SELF       0     /* 1st address mode */
394#define VCD_HERE       1     /* 2nd address mode */
395
396#define CODE_TABLE_STRING_SIZE (6 * 256) /* Should fit a code table string. */
397#define CODE_TABLE_VCDIFF_SIZE (6 * 256) /* Should fit a compressed code table string */
398
399#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK) /* True if any secondary compressor is used. */
400
401#define ALPHABET_SIZE      256  /* Used in test code--size of the secondary compressor alphabet. */
402
403#define HASH_PRIME         0    /* Old hashing experiments */
404#define HASH_PERMUTE       1
405#define ARITH_SMALL_CKSUM  1
406
407#define HASH_CKOFFSET      1U   /* Table entries distinguish "no-entry" from offset 0 using this offset. */
408
409#define MIN_SMALL_LOOK    2U    /* Match-optimization stuff. */
410#define MIN_LARGE_LOOK    2U
411#define MIN_MATCH_OFFSET  1U
412#define MAX_MATCH_SPLIT   18U   /* VCDIFF code table: 18 is the default limit for direct-coded ADD sizes */
413
414#define LEAST_MATCH_INCR  0   /* The least number of bytes an overlapping match must beat
415                               * the preceding match by.  This is a bias for the lazy
416                               * match optimization.  A non-zero value means that an
417                               * adjacent match has to be better by more than the step
418                               * between them.  0. */
419
420#define MIN_MATCH         4U  /* VCDIFF code table: MIN_MATCH=4 */
421#define MIN_ADD           1U  /* 1 */
422#define MIN_RUN           8U  /* The shortest run, if it is shorter than this an immediate
423                               * add/copy will be just as good.  ADD1/COPY6 = 1I+1D+1A bytes,
424                               * RUN18 = 1I+1D+1A. */
425
426#define MAX_MODES         9  /* Maximum number of nodes used for compression--does not limit decompression. */
427
428#define ENC_SECTS         4  /* Number of separate output sections. */
429
430#define HDR_TAIL(s)  (stream->enc_tails[0])
431#define DATA_TAIL(s) (stream->enc_tails[1])
432#define INST_TAIL(s) (stream->enc_tails[2])
433#define ADDR_TAIL(s) (stream->enc_tails[3])
434
435#define HDR_HEAD(s)  (stream->enc_heads[0])
436#define DATA_HEAD(s) (stream->enc_heads[1])
437#define INST_HEAD(s) (stream->enc_heads[2])
438#define ADDR_HEAD(s) (stream->enc_heads[3])
439
440#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
441
442#define TOTAL_MODES(x) (2+(x)->acache.s_same+(x)->acache.s_near)
443
444/* Template instances. */
445#if XD3_BUILD_SLOW
446#define IF_BUILD_SLOW(x) x
447#else
448#define IF_BUILD_SLOW(x)
449#endif
450#if XD3_BUILD_FAST
451#define IF_BUILD_FAST(x) x
452#else
453#define IF_BUILD_FAST(x)
454#endif
455#if XD3_BUILD_FASTEST
456#define IF_BUILD_FASTEST(x) x
457#else
458#define IF_BUILD_FASTEST(x)
459#endif
460#if XD3_BUILD_SOFT
461#define IF_BUILD_SOFT(x) x
462#else
463#define IF_BUILD_SOFT(x)
464#endif
465#if XD3_BUILD_DEFAULT
466#define IF_BUILD_DEFAULT(x) x
467#else
468#define IF_BUILD_DEFAULT(x)
469#endif
470
471IF_BUILD_SOFT(static const xd3_smatcher    __smatcher_soft;)
472IF_BUILD_FAST(static const xd3_smatcher    __smatcher_fast;)
473IF_BUILD_SLOW(static const xd3_smatcher    __smatcher_slow;)
474IF_BUILD_FASTEST(static const xd3_smatcher    __smatcher_fastest;)
475IF_BUILD_DEFAULT(static const xd3_smatcher    __smatcher_default;)
476
477#if XD3_DEBUG
478#define SMALL_HASH_DEBUG1(s,inp)                                  \
479  usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash,       \
480         xd3_scksum ((inp), (s)->smatcher.small_look))
481#define SMALL_HASH_DEBUG2(s,inp)                                  \
482  XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \
483         xd3_scksum ((inp), (s)->smatcher.small_look)))
484#else
485#define SMALL_HASH_DEBUG1(s,inp)
486#define SMALL_HASH_DEBUG2(s,inp)
487#endif /* XD3_DEBUG */
488
489/* Update the run-length state */
490#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } else { run_c = (c); run_l = 1; } } while (0)
491
492/* Update the checksum state. */
493#define LARGE_CKSUM_UPDATE(cksum,base,look)                              \
494  do {                                                                   \
495    uint32_t old_c = PERMUTE((base)[0]);                                    \
496    uint32_t new_c = PERMUTE((base)[(look)]);                               \
497    uint32_t low   = (((cksum) & 0xffff) - old_c + new_c) & 0xffff;         \
498    uint32_t high  = (((cksum) >> 16) - (old_c * (look)) + low) & 0xffff;   \
499    (cksum) = (high << 16) | low;                                        \
500  } while (0)
501
502/* Multiply and add hash function */
503#if ARITH_SMALL_CKSUM
504#define SMALL_CKSUM_UPDATE(cksum,base,look) (cksum) = ((*(unsigned long*)(base+1)) * 71143)
505#else
506#define SMALL_CKSUM_UPDATE LARGE_CKSUM_UPDATE
507#endif
508
509/* Consume N bytes of input, only used by the decoder. */
510#define DECODE_INPUT(n)             \
511  do {                              \
512  stream->total_in += (xoff_t) (n); \
513  stream->avail_in -= (n);          \
514  stream->next_in  += (n);          \
515  } while (0)
516
517/* This CPP-conditional stuff can be cleaned up... */
518#if XD3_DEBUG
519#define IF_DEBUG(x) x
520#define DEBUG_ARG(x) , x
521#else
522#define IF_DEBUG(x)
523#define DEBUG_ARG(x)
524#endif
525#if XD3_DEBUG > 1
526#define IF_DEBUG1(x) x
527#else
528#define IF_DEBUG1(x)
529#endif
530#if XD3_DEBUG > 2
531#define IF_DEBUG2(x) x
532#else
533#define IF_DEBUG2(x)
534#endif
535#if REGRESSION_TEST
536#define IF_REGRESSION(x) x
537#else
538#define IF_REGRESSION(x)
539#endif
540
541/******************************************************************************************/
542
543#if XD3_ENCODER
544static void*       xd3_alloc0 (xd3_stream *stream,
545                               usize_t      elts,
546                               usize_t      size);
547
548
549static xd3_output* xd3_alloc_output (xd3_stream *stream,
550                                     xd3_output *old_output);
551
552static int         xd3_alloc_iopt (xd3_stream *stream, int elts);
553
554static void        xd3_free_output (xd3_stream *stream,
555                                    xd3_output *output);
556
557static int         xd3_emit_byte (xd3_stream  *stream,
558                                  xd3_output **outputp,
559                                  uint8_t      code);
560
561static int         xd3_emit_bytes (xd3_stream     *stream,
562                                   xd3_output    **outputp,
563                                   const uint8_t  *base,
564                                   usize_t          size);
565
566static int         xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code);
567static int         xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code);
568
569static usize_t      xd3_sizeof_output (xd3_output *output);
570
571static int         xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos);
572static int         xd3_source_extend_match (xd3_stream *stream);
573static int         xd3_srcwin_setup (xd3_stream *stream);
574static usize_t     xd3_iopt_last_matched (xd3_stream *stream);
575static int         xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num);
576
577#endif /* XD3_ENCODER */
578
579static int         xd3_decode_allocate (xd3_stream  *stream, usize_t       size,
580                                        uint8_t    **copied1, usize_t      *alloc1,
581                                        uint8_t    **copied2, usize_t      *alloc2);
582
583static void        xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str);
584static void*       xd3_alloc (xd3_stream *stream, usize_t      elts, usize_t      size);
585static void        xd3_free  (xd3_stream *stream, void       *ptr);
586
587static int         xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp,
588                                      const uint8_t *max, uint32_t *valp);
589
590#if REGRESSION_TEST
591static int         xd3_selftest      (void);
592#endif
593
594/******************************************************************************************/
595
596#define UINT32_OFLOW_MASK 0xfe000000U
597#define UINT64_OFLOW_MASK 0xfe00000000000000ULL
598
599#ifndef UINT32_MAX
600#define UINT32_MAX 4294967295U
601#endif
602
603#ifndef UINT64_MAX
604#define UINT64_MAX 18446744073709551615ULL
605#endif
606
607#if SIZEOF_USIZE_T == 4
608#define USIZE_T_MAX        UINT32_MAX
609#define xd3_decode_size   xd3_decode_uint32_t
610#define xd3_emit_size     xd3_emit_uint32_t
611#define xd3_sizeof_size   xd3_sizeof_uint32_t
612#define xd3_read_size     xd3_read_uint32_t
613#elif SIZEOF_USIZE_T == 8
614#define USIZE_T_MAX        UINT64_MAX
615#define xd3_decode_size   xd3_decode_uint64_t
616#define xd3_emit_size     xd3_emit_uint64_t
617#define xd3_sizeof_size   xd3_sizeof_uint64_t
618#define xd3_read_size     xd3_read_uint64_t
619#endif
620
621#if SIZEOF_XOFF_T == 4
622#define XOFF_T_MAX        UINT32_MAX
623#define xd3_decode_offset xd3_decode_uint32_t
624#define xd3_emit_offset   xd3_emit_uint32_t
625#elif SIZEOF_XOFF_T == 8
626#define XOFF_T_MAX        UINT64_MAX
627#define xd3_decode_offset xd3_decode_uint64_t
628#define xd3_emit_offset   xd3_emit_uint64_t
629#endif
630
631#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b))
632#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b))
633
634const char* xd3_strerror (int ret)
635{
636  switch (ret)
637    {
638    case XD3_INPUT: return "XD3_INPUT";
639    case XD3_OUTPUT: return "XD3_OUTPUT";
640    case XD3_GETSRCBLK: return "XD3_GETSRCBLK";
641    case XD3_GOTHEADER: return "XD3_GOTHEADER";
642    case XD3_WINSTART: return "XD3_WINSTART";
643    case XD3_WINFINISH: return "XD3_WINFINISH";
644    case XD3_TOOFARBACK: return "XD3_TOOFARBACK";
645    case XD3_INTERNAL: return "XD3_INTERNAL";
646    }
647  return NULL;
648}
649
650/******************************************************************************************/
651
652#if SECONDARY_ANY == 0
653#define IF_SEC(x)
654#define IF_NSEC(x) x
655#else /* yuck */
656#define IF_SEC(x) x
657#define IF_NSEC(x)
658#include "xdelta3-second.h"
659#endif /* SECONDARY_ANY */
660
661#if SECONDARY_FGK
662#include "xdelta3-fgk.h"
663
664static const xd3_sec_type fgk_sec_type =
665{
666  VCD_FGK_ID,
667  "FGK Adaptive Huffman",
668  SEC_NOFLAGS,
669  (xd3_sec_stream* (*)())  fgk_alloc,
670  (void (*)())             fgk_destroy,
671  (void (*)())             fgk_init,
672  (int (*)())              xd3_decode_fgk,
673  IF_ENCODER((int (*)())   xd3_encode_fgk)
674};
675
676#define IF_FGK(x) x
677#define FGK_CASE(s) \
678  s->sec_type = & fgk_sec_type; \
679  break;
680#else
681#define IF_FGK(x)
682#define FGK_CASE(s) \
683  s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \
684  return XD3_INTERNAL;
685#endif
686
687#if SECONDARY_DJW
688#include "xdelta3-djw.h"
689
690static const xd3_sec_type djw_sec_type =
691{
692  VCD_DJW_ID,
693  "Static Huffman",
694  SEC_COUNT_FREQS,
695  (xd3_sec_stream* (*)())  djw_alloc,
696  (void (*)())             djw_destroy,
697  (void (*)())             djw_init,
698  (int (*)())              xd3_decode_huff,
699  IF_ENCODER((int (*)())   xd3_encode_huff)
700};
701
702#define IF_DJW(x) x
703#define DJW_CASE(s) \
704  s->sec_type = & djw_sec_type; \
705  break;
706#else
707#define IF_DJW(x)
708#define DJW_CASE(s) \
709  s->msg = "unavailable secondary compressor: DJW Static Huffman"; \
710  return XD3_INTERNAL;
711#endif
712
713/******************************************************************************************/
714
715/* Process the inline pass. */
716#define __XDELTA3_C_INLINE_PASS__
717#include "xdelta3.c"
718#undef __XDELTA3_C_INLINE_PASS__
719
720/* Process template passes - this includes xdelta3.c several times. */
721#define __XDELTA3_C_TEMPLATE_PASS__
722#include "xdelta3-cfgs.h"
723#undef __XDELTA3_C_TEMPLATE_PASS__
724
725#if XD3_MAIN || PYTHON_MODULE || SWIG_MODULE
726#include "xdelta3-main.h"
727#endif
728
729#if REGRESSION_TEST
730#include "xdelta3-test.h"
731#endif
732
733#if PYTHON_MODULE
734#include "xdelta3-python.h"
735#endif
736
737#endif /* __XDELTA3_C_HEADER_PASS__ */
738#ifdef __XDELTA3_C_INLINE_PASS__
739
740/******************************************************************************************
741 Instruction tables
742 ******************************************************************************************/
743
744/* The following code implements a parametrized description of the
745 * code table given above for a few reasons.  It is not necessary for
746 * implementing the standard, to support compression with variable
747 * tables, so an implementation is only required to know the default
748 * code table to begin decompression.  (If the encoder uses an
749 * alternate table, the table is included in compressed form inside
750 * the VCDIFF file.)
751 *
752 * Before adding variable-table support there were two functions which
753 * were hard-coded to the default table above.
754 * xd3_compute_default_table() would create the default table by
755 * filling a 256-elt array of xd3_dinst values.  The corresponding
756 * function, xd3_choose_instruction(), would choose an instruction
757 * based on the hard-coded parameters of the default code table.
758 *
759 * Notes: The parametrized code table description here only generates
760 * tables of a certain regularity similar to the default table by
761 * allowing to vary the distribution of single- and
762 * double-instructions and change the number of near and same copy
763 * modes.  More exotic tables are only possible by extending this
764 * code.
765 *
766 * For performance reasons, both the parametrized and non-parametrized
767 * versions of xd3_choose_instruction remain.  The parametrized
768 * version is only needed for testing multi-table decoding support.
769 * If ever multi-table encoding is required, this can be optimized by
770 * compiling static functions for each table.
771 */
772
773/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the
774 * table description when GENERIC_ENCODE_TABLES are in use.  The
775 * IF_GENCODETBL macro enables generic-code-table specific code. */
776#if GENERIC_ENCODE_TABLES
777#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (stream->code_table_desc, prev, inst)
778#define IF_GENCODETBL(x) x
779#else
780#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (prev, inst)
781#define IF_GENCODETBL(x)
782#endif
783
784/* This structure maintains information needed by
785 * xd3_choose_instruction to compute the code for a double instruction
786 * by first indexing an array of code_table_sizes by copy mode, then
787 * using (offset + (muliplier * X)) */
788struct _xd3_code_table_sizes {
789  uint8_t cpy_max;
790  uint8_t offset;
791  uint8_t mult;
792};
793
794/* This contains a complete description of a code table. */
795struct _xd3_code_table_desc
796{
797  /* Assumes a single RUN instruction */
798  /* Assumes that MIN_MATCH is 4 */
799
800  uint8_t add_sizes;            /* Number of immediate-size single adds (default 17) */
801  uint8_t near_modes;           /* Number of near copy modes (default 4) */
802  uint8_t same_modes;           /* Number of same copy modes (default 3) */
803  uint8_t cpy_sizes;            /* Number of immediate-size single copies (default 15) */
804
805  uint8_t addcopy_add_max;      /* Maximum add size for an add-copy double instruction, all modes (default 4) */
806  uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, up through VCD_NEAR modes (default 6) */
807  uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, VCD_SAME modes (default 4) */
808
809  uint8_t copyadd_add_max;      /* Maximum add size for a copy-add double instruction, all modes (default 1) */
810  uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, up through VCD_NEAR modes (default 4) */
811  uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, VCD_SAME modes (default 4) */
812
813  xd3_code_table_sizes addcopy_max_sizes[MAX_MODES];
814  xd3_code_table_sizes copyadd_max_sizes[MAX_MODES];
815};
816
817/* The rfc3284 code table is represented: */
818static const xd3_code_table_desc __rfc3284_code_table_desc = {
819  17, /* add sizes */
820  4,  /* near modes */
821  3,  /* same modes */
822  15, /* copy sizes */
823
824  4,  /* add-copy max add */
825  6,  /* add-copy max cpy, near */
826  4,  /* add-copy max cpy, same */
827
828  1,  /* copy-add max add */
829  4,  /* copy-add max cpy, near */
830  4,  /* copy-add max cpy, same */
831
832  /* addcopy */
833  { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} },
834  /* copyadd */
835  { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} },
836};
837
838#if GENERIC_ENCODE_TABLES
839/* An alternate code table for testing (5 near, 0 same):
840 *
841 *         TYPE      SIZE     MODE    TYPE     SIZE     MODE     INDEX
842 *        ---------------------------------------------------------------
843 *     1.  Run         0        0     Noop       0        0        0
844 *     2.  Add    0, [1,23]     0     Noop       0        0      [1,24]
845 *     3.  Copy   0, [4,20]     0     Noop       0        0     [25,42]
846 *     4.  Copy   0, [4,20]     1     Noop       0        0     [43,60]
847 *     5.  Copy   0, [4,20]     2     Noop       0        0     [61,78]
848 *     6.  Copy   0, [4,20]     3     Noop       0        0     [79,96]
849 *     7.  Copy   0, [4,20]     4     Noop       0        0     [97,114]
850 *     8.  Copy   0, [4,20]     5     Noop       0        0    [115,132]
851 *     9.  Copy   0, [4,20]     6     Noop       0        0    [133,150]
852 *    10.  Add       [1,4]      0     Copy     [4,6]      0    [151,162]
853 *    11.  Add       [1,4]      0     Copy     [4,6]      1    [163,174]
854 *    12.  Add       [1,4]      0     Copy     [4,6]      2    [175,186]
855 *    13.  Add       [1,4]      0     Copy     [4,6]      3    [187,198]
856 *    14.  Add       [1,4]      0     Copy     [4,6]      4    [199,210]
857 *    15.  Add       [1,4]      0     Copy     [4,6]      5    [211,222]
858 *    16.  Add       [1,4]      0     Copy     [4,6]      6    [223,234]
859 *    17.  Copy        4      [0,6]   Add      [1,3]      0    [235,255]
860 *        --------------------------------------------------------------- */
861static const xd3_code_table_desc __alternate_code_table_desc = {
862  23, /* add sizes */
863  5,  /* near modes */
864  0,  /* same modes */
865  17, /* copy sizes */
866
867  4,  /* add-copy max add */
868  6,  /* add-copy max cpy, near */
869  0,  /* add-copy max cpy, same */
870
871  3,  /* copy-add max add */
872  4,  /* copy-add max cpy, near */
873  0,  /* copy-add max cpy, same */
874
875  /* addcopy */
876  { {6,151,3},{6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{0,0,0},{0,0,0} },
877  /* copyadd */
878  { {4,235,1},{4,238,1},{4,241,1},{4,244,1},{4,247,1},{4,250,1},{4,253,1},{0,0,0},{0,0,0} },
879};
880#endif
881
882/* Computes code table entries of TBL using the specified description. */
883static void
884xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl)
885{
886  int size1, size2, mode;
887  int cpy_modes = 2 + desc->near_modes + desc->same_modes;
888  xd3_dinst *d = tbl;
889
890  (d++)->type1 = XD3_RUN;
891  (d++)->type1 = XD3_ADD;
892
893  for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1)
894    {
895      d->type1 = XD3_ADD;
896      d->size1 = size1;
897    }
898
899  for (mode = 0; mode < cpy_modes; mode += 1)
900    {
901      (d++)->type1 = XD3_CPY + mode;
902
903      for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1)
904        {
905          d->type1 = XD3_CPY + mode;
906          d->size1 = size1;
907        }
908    }
909
910  for (mode = 0; mode < cpy_modes; mode += 1)
911    {
912      for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1)
913        {
914          int max = (mode < 2 + desc->near_modes) ? desc->addcopy_near_cpy_max : desc->addcopy_same_cpy_max;
915
916          for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1)
917            {
918              d->type1 = XD3_ADD;
919              d->size1 = size1;
920              d->type2 = XD3_CPY + mode;
921              d->size2 = size2;
922            }
923        }
924    }
925
926  for (mode = 0; mode < cpy_modes; mode += 1)
927    {
928      int max = (mode < 2 + desc->near_modes) ? desc->copyadd_near_cpy_max : desc->copyadd_same_cpy_max;
929
930      for (size1 = MIN_MATCH; size1 <= max; size1 += 1)
931        {
932          for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1)
933            {
934              d->type1 = XD3_CPY + mode;
935              d->size1 = size1;
936              d->type2 = XD3_ADD;
937              d->size2 = size2;
938            }
939        }
940    }
941
942  XD3_ASSERT (d - tbl == 256);
943}
944
945/* This function generates the static default code table. */
946static const xd3_dinst*
947xd3_rfc3284_code_table (void)
948{
949  static xd3_dinst __rfc3284_code_table[256];
950
951  if (__rfc3284_code_table[0].type1 != XD3_RUN)
952    {
953      xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table);
954    }
955
956  return __rfc3284_code_table;
957}
958
959#if XD3_ENCODER
960#if GENERIC_ENCODE_TABLES
961/* This function generates the alternate code table. */
962static const xd3_dinst*
963xd3_alternate_code_table (void)
964{
965  static xd3_dinst __alternate_code_table[256];
966
967  if (__alternate_code_table[0].type1 != XD3_RUN)
968    {
969      xd3_build_code_table (& __alternate_code_table_desc, __alternate_code_table);
970    }
971
972  return __alternate_code_table;
973}
974
975/* This function computes the ideal second instruction INST based on preceding instruction
976 * PREV.  If it is possible to issue a double instruction based on this pair it sets
977 * PREV->code2, otherwise it sets INST->code1. */
978static void
979xd3_choose_instruction (const xd3_code_table_desc *desc, xd3_rinst *prev, xd3_rinst *inst)
980{
981  switch (inst->type)
982    {
983    case XD3_RUN:
984      /* The 0th instruction is RUN */
985      inst->code1 = 0;
986      break;
987
988    case XD3_ADD:
989
990      if (inst->size > desc->add_sizes)
991        {
992          /* The first instruction is non-immediate ADD */
993          inst->code1 = 1;
994        }
995      else
996        {
997          /* The following ADD_SIZES instructions are immediate ADDs */
998          inst->code1 = 1 + inst->size;
999
1000          /* Now check for a possible COPY-ADD double instruction */
1001          if (prev != NULL)
1002            {
1003              int prev_mode = prev->type - XD3_CPY;
1004
1005              /* If previous is a copy.  Note: as long as the previous is not a RUN
1006               * instruction, it should be a copy because it cannot be an add.  This check
1007               * is more clear. */
1008              if (prev_mode >= 0 && inst->size <= desc->copyadd_add_max)
1009                {
1010                  const xd3_code_table_sizes *sizes = & desc->copyadd_max_sizes[prev_mode];
1011
1012                  /* This check and the inst->size-<= above are == in the default table. */
1013                  if (prev->size <= sizes->cpy_max)
1014                    {
1015                      /* The second and third exprs are 0 in the default table. */
1016                      prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_MATCH)) + (inst->size - MIN_ADD);
1017                    }
1018                }
1019            }
1020        }
1021      break;
1022
1023    default:
1024      {
1025        int mode = inst->type - XD3_CPY;
1026
1027        /* The large copy instruction is offset by the run, large add, and immediate adds,
1028         * then multipled by the number of immediate copies plus one (the large copy)
1029         * (i.e., if there are 15 immediate copy instructions then there are 16 copy
1030         * instructions per mode). */
1031        inst->code1 = 2 + desc->add_sizes + (1 + desc->cpy_sizes) * mode;
1032
1033        /* Now if the copy is short enough for an immediate instruction. */
1034        if (inst->size < MIN_MATCH + desc->cpy_sizes)
1035          {
1036            inst->code1 += inst->size + 1 - MIN_MATCH;
1037
1038            /* Now check for a possible ADD-COPY double instruction. */
1039            if ( (prev != NULL) &&
1040                 (prev->type == XD3_ADD) &&
1041                 (prev->size <= desc->addcopy_add_max) )
1042              {
1043                const xd3_code_table_sizes *sizes = & desc->addcopy_max_sizes[mode];
1044
1045                if (inst->size <= sizes->cpy_max)
1046                  {
1047                    prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_ADD)) + (inst->size - MIN_MATCH);
1048                  }
1049              }
1050          }
1051      }
1052    }
1053}
1054#else /* GENERIC_ENCODE_TABLES */
1055
1056/* This version of xd3_choose_instruction is hard-coded for the default table. */
1057static void
1058xd3_choose_instruction (/* const xd3_code_table_desc *desc,*/ xd3_rinst *prev, xd3_rinst *inst)
1059{
1060  switch (inst->type)
1061    {
1062    case XD3_RUN:
1063      inst->code1 = 0;
1064      break;
1065
1066    case XD3_ADD:
1067      inst->code1 = 1;
1068
1069      if (inst->size <= 17)
1070        {
1071          inst->code1 += inst->size;
1072
1073          if ( (inst->size == 1) &&
1074               (prev != NULL) &&
1075               (prev->size == 4) &&
1076               (prev->type >= XD3_CPY) )
1077            {
1078              prev->code2 = 247 + (prev->type - XD3_CPY);
1079            }
1080        }
1081
1082      break;
1083
1084    default:
1085      {
1086        int mode = inst->type - XD3_CPY;
1087
1088        XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12);
1089
1090        inst->code1 = 19 + 16 * mode;
1091
1092        if (inst->size <= 18)
1093          {
1094            inst->code1 += inst->size - 3;
1095
1096            if ( (prev != NULL) &&
1097                 (prev->type == XD3_ADD) &&
1098                 (prev->size <= 4) )
1099              {
1100                if ( (inst->size <= 6) &&
1101                     (mode       <= 5) )
1102                  {
1103                    prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4);
1104
1105                    XD3_ASSERT (prev->code2 <= 234);
1106                  }
1107                else if ( (inst->size == 4) &&
1108                          (mode       >= 6) )
1109                  {
1110                    prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1);
1111
1112                    XD3_ASSERT (prev->code2 <= 246);
1113                  }
1114              }
1115          }
1116
1117        XD3_ASSERT (inst->code1 <= 162);
1118      }
1119      break;
1120    }
1121}
1122#endif /* GENERIC_ENCODE_TABLES */
1123
1124/******************************************************************************************
1125 Instruction table encoder/decoder
1126 ******************************************************************************************/
1127
1128#if GENERIC_ENCODE_TABLES
1129#if GENERIC_ENCODE_TABLES_COMPUTE == 0
1130
1131/* In this case, we hard-code the result of compute_code_table_encoding for each alternate
1132 * code table, presuming that saves time/space.  This has been 131 bytes, but secondary
1133 * compression was turned off. */
1134static const uint8_t __alternate_code_table_compressed[178] =
1135{0xd6,0xc3,0xc4,0x00,0x00,0x01,0x8a,0x6f,0x40,0x81,0x27,0x8c,0x00,0x00,0x4a,0x4a,0x0d,0x02,0x01,0x03,
11360x01,0x03,0x00,0x01,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
11370x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x03,0x04,
11380x04,0x04,0x04,0x00,0x04,0x05,0x06,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x05,0x05,0x05,
11390x06,0x06,0x06,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x00,0x02,0x00,0x18,0x13,0x63,0x00,0x1b,0x00,0x54,
11400x00,0x15,0x23,0x6f,0x00,0x28,0x13,0x54,0x00,0x15,0x01,0x1a,0x31,0x23,0x6c,0x0d,0x23,0x48,0x00,0x15,
11410x93,0x6f,0x00,0x28,0x04,0x23,0x51,0x04,0x32,0x00,0x2b,0x00,0x12,0x00,0x12,0x00,0x12,0x00,0x12,0x00,
11420x12,0x00,0x12,0x53,0x57,0x9c,0x07,0x43,0x6f,0x00,0x34,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,
11430x0c,0x00,0x0c,0x00,0x15,0x00,0x82,0x6f,0x00,0x15,0x12,0x0c,0x00,0x03,0x03,0x00,0x06,0x00,};
1144
1145static int
1146xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size)
1147{
1148  (*data) = __alternate_code_table_compressed;
1149  (*size) = sizeof (__alternate_code_table_compressed);
1150  return 0;
1151}
1152
1153#else
1154
1155/* The alternate code table will be computed and stored here. */
1156static uint8_t __alternate_code_table_compressed[CODE_TABLE_VCDIFF_SIZE];
1157static usize_t  __alternate_code_table_compressed_size;
1158
1159/* This function generates a delta describing the code table for encoding within a VCDIFF
1160 * file.  This function is NOT thread safe because it is only intended that this function
1161 * is used to generate statically-compiled strings. */
1162int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table,
1163                                     uint8_t *comp_string, usize_t *comp_string_size)
1164{
1165  /* TODO: use xd3_encode_memory() */
1166  uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
1167  uint8_t code_string[CODE_TABLE_STRING_SIZE];
1168  xd3_stream stream;
1169  xd3_source source;
1170  xd3_config config;
1171  int ret;
1172
1173  memset (& source, 0, sizeof (source));
1174
1175  xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string);
1176  xd3_compute_code_table_string (code_table, code_string);
1177
1178  /* Use DJW secondary compression if it is on by default.  This saves about 20 bytes. */
1179  xd3_init_config (& config, XD3_FLUSH | (SECONDARY_DJW ? XD3_SEC_DJW : 0));
1180
1181  /* Be exhaustive. */
1182  config.sprevsz = 1<<11;
1183  config.srcwin_maxsz = CODE_TABLE_STRING_SIZE;
1184
1185  config.smatch_cfg = XD3_SMATCH_SOFT;
1186  config.smatcher_soft.large_look    = 4;
1187  config.smatcher_soft.large_step    = 1;
1188  config.smatcher_soft.small_look    = 4;
1189  config.smatcher_soft.small_chain   = CODE_TABLE_STRING_SIZE;
1190  config.smatcher_soft.small_lchain  = CODE_TABLE_STRING_SIZE;
1191  config.smatcher_soft.max_lazy      = CODE_TABLE_STRING_SIZE;
1192  config.smatcher_soft.long_enough   = CODE_TABLE_STRING_SIZE;
1193
1194  if ((ret = xd3_config_stream (& stream, & config))) { goto fail; }
1195
1196  source.size     = CODE_TABLE_STRING_SIZE;
1197  source.blksize  = CODE_TABLE_STRING_SIZE;
1198  source.onblk    = CODE_TABLE_STRING_SIZE;
1199  source.name     = "";
1200  source.curblk   = dflt_string;
1201  source.curblkno = 0;
1202
1203  if ((ret = xd3_set_source (& stream, & source))) { goto fail; }
1204
1205  if ((ret = xd3_encode_stream (& stream, code_string, CODE_TABLE_STRING_SIZE,
1206                                comp_string, comp_string_size, CODE_TABLE_VCDIFF_SIZE))) { goto fail; }
1207
1208 fail:
1209
1210  in_stream->msg = stream.msg;
1211  xd3_free_stream (& stream);
1212  return ret;
1213}
1214
1215/* Compute a delta between alternate and rfc3284 tables.  As soon as another alternate
1216 * table is added, this code should become generic.  For now there is only one alternate
1217 * table for testing. */
1218static int
1219xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size)
1220{
1221  int ret;
1222
1223  if (__alternate_code_table_compressed[0] == 0)
1224    {
1225      if ((ret = xd3_compute_code_table_encoding (stream, xd3_alternate_code_table (),
1226                                                  __alternate_code_table_compressed,
1227                                                  & __alternate_code_table_compressed_size)))
1228        {
1229          return ret;
1230        }
1231
1232      /* During development of a new code table, enable this variable to print the new
1233       * static contents and determine its size.  At run time the table will be filled in
1234       * appropriately, but at least it should have the proper size beforehand. */
1235#if GENERIC_ENCODE_TABLES_COMPUTE_PRINT
1236      {
1237        int i;
1238
1239        DP(RINT, "\nstatic const usize_t __alternate_code_table_compressed_size = %u;\n",
1240                 __alternate_code_table_compressed_size);
1241
1242        DP(RINT, "static const uint8_t __alternate_code_table_compressed[%u] =\n{",
1243                 __alternate_code_table_compressed_size);
1244
1245        for (i = 0; i < __alternate_code_table_compressed_size; i += 1)
1246          {
1247            DP(RINT, "0x%02x,", __alternate_code_table_compressed[i]);
1248            if ((i % 20) == 19) { DP(RINT, "\n"); }
1249          }
1250
1251        DP(RINT, "};\n");
1252      }
1253#endif
1254    }
1255
1256  (*data) = __alternate_code_table_compressed;
1257  (*size) = __alternate_code_table_compressed_size;
1258
1259  return 0;
1260}
1261#endif /* GENERIC_ENCODE_TABLES_COMPUTE != 0 */
1262#endif /* GENERIC_ENCODE_TABLES */
1263
1264#endif /* XD3_ENCODER */
1265
1266/* This function generates the 1536-byte string specified in sections 5.4 and 7 of
1267 * rfc3284, which is used to represent a code table within a VCDIFF file. */
1268void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str)
1269{
1270  int i, s;
1271
1272  XD3_ASSERT (CODE_TABLE_STRING_SIZE == 6 * 256);
1273
1274  for (s = 0; s < 6; s += 1)
1275    {
1276      for (i = 0; i < 256; i += 1)
1277        {
1278          switch (s)
1279            {
1280            case 0: *str++ = (code_table[i].type1 >= XD3_CPY ? XD3_CPY : code_table[i].type1); break;
1281            case 1: *str++ = (code_table[i].type2 >= XD3_CPY ? XD3_CPY : code_table[i].type2); break;
1282            case 2: *str++ = (code_table[i].size1); break;
1283            case 3: *str++ = (code_table[i].size2); break;
1284            case 4: *str++ = (code_table[i].type1 >= XD3_CPY ? code_table[i].type1 - XD3_CPY : 0); break;
1285            case 5: *str++ = (code_table[i].type2 >= XD3_CPY ? code_table[i].type2 - XD3_CPY : 0); break;
1286            }
1287        }
1288    }
1289}
1290
1291/* This function translates the code table string into the internal representation.  The
1292 * stream's near and same-modes should already be set. */
1293static int
1294xd3_apply_table_string (xd3_stream *stream, const uint8_t *code_string)
1295{
1296  int i, s;
1297  int modes = TOTAL_MODES (stream);
1298  xd3_dinst *code_table;
1299
1300  if ((code_table = stream->code_table_alloc = xd3_alloc (stream, sizeof (xd3_dinst), 256)) == NULL)
1301    {
1302      return ENOMEM;
1303    }
1304
1305  for (s = 0; s < 6; s += 1)
1306    {
1307      for (i = 0; i < 256; i += 1)
1308        {
1309          switch (s)
1310            {
1311            case 0:
1312              if (*code_string > XD3_CPY)
1313                {
1314                  stream->msg = "invalid code-table opcode";
1315                  return XD3_INTERNAL;
1316                }
1317              code_table[i].type1 = *code_string++;
1318              break;
1319            case 1:
1320              if (*code_string > XD3_CPY)
1321                {
1322                  stream->msg = "invalid code-table opcode";
1323                  return XD3_INTERNAL;
1324                }
1325              code_table[i].type2 = *code_string++;
1326              break;
1327            case 2:
1328              if (*code_string != 0 && code_table[i].type1 == XD3_NOOP)
1329                {
1330                  stream->msg = "invalid code-table size";
1331                  return XD3_INTERNAL;
1332                }
1333              code_table[i].size1 = *code_string++;
1334              break;
1335            case 3:
1336              if (*code_string != 0 && code_table[i].type2 == XD3_NOOP)
1337                {
1338                  stream->msg = "invalid code-table size";
1339                  return XD3_INTERNAL;
1340                }
1341              code_table[i].size2 = *code_string++;
1342              break;
1343            case 4:
1344              if (*code_string >= modes)
1345                {
1346                  stream->msg = "invalid code-table mode";
1347                  return XD3_INTERNAL;
1348                }
1349              if (*code_string != 0 && code_table[i].type1 != XD3_CPY)
1350                {
1351                  stream->msg = "invalid code-table mode";
1352                  return XD3_INTERNAL;
1353                }
1354              code_table[i].type1 += *code_string++;
1355              break;
1356            case 5:
1357              if (*code_string >= modes)
1358                {
1359                  stream->msg = "invalid code-table mode";
1360                  return XD3_INTERNAL;
1361                }
1362              if (*code_string != 0 && code_table[i].type2 != XD3_CPY)
1363                {
1364                  stream->msg = "invalid code-table mode";
1365                  return XD3_INTERNAL;
1366                }
1367              code_table[i].type2 += *code_string++;
1368              break;
1369            }
1370        }
1371    }
1372
1373  stream->code_table = code_table;
1374  return 0;
1375}
1376
1377/* This function applies a code table delta and returns an actual code table. */
1378static int
1379xd3_apply_table_encoding (xd3_stream *in_stream, const uint8_t *data, usize_t size)
1380{
1381  uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
1382  uint8_t code_string[CODE_TABLE_STRING_SIZE];
1383  usize_t code_size;
1384  xd3_stream stream;
1385  xd3_source source;
1386  int ret;
1387
1388  /* The default code table string can be cached if alternate code tables ever become
1389   * popular. */
1390  xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string);
1391
1392  source.size     = CODE_TABLE_STRING_SIZE;
1393  source.blksize  = CODE_TABLE_STRING_SIZE;
1394  source.onblk    = CODE_TABLE_STRING_SIZE;
1395  source.name     = "rfc3284 code table";
1396  source.curblk   = dflt_string;
1397  source.curblkno = 0;
1398
1399  if ((ret = xd3_config_stream (& stream, NULL)) ||
1400      (ret = xd3_set_source (& stream, & source)) ||
1401      (ret = xd3_decode_stream (& stream, data, size, code_string, & code_size, sizeof (code_string))))
1402    {
1403      in_stream->msg = stream.msg;
1404      goto fail;
1405    }
1406
1407  if (code_size != sizeof (code_string))
1408    {
1409      stream.msg = "corrupt code-table encoding";
1410      ret = XD3_INTERNAL;
1411      goto fail;
1412    }
1413
1414  if ((ret = xd3_apply_table_string (in_stream, code_string))) { goto fail; }
1415
1416 fail:
1417
1418  xd3_free_stream (& stream);
1419  return ret;
1420}
1421
1422/******************************************************************************************
1423 Permute stuff
1424 ******************************************************************************************/
1425
1426#if HASH_PERMUTE == 0
1427#define PERMUTE(x) (x)
1428#else
1429#define PERMUTE(x) (__single_hash[(uint)x])
1430
1431static const uint16_t __single_hash[256] =
1432{
1433  /* Random numbers generated using SLIB's pseudo-random number generator.  This hashes
1434   * the input alphabet. */
1435  0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
1436  0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
1437  0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
1438  0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
1439  0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
1440  0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
1441  0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
1442  0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
1443  0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
1444  0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
1445  0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
1446  0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
1447  0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
1448  0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
1449  0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
1450  0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
1451  0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
1452  0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
1453  0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
1454  0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
1455  0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
1456  0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
1457  0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
1458  0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
1459  0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
1460  0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
1461  0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
1462  0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
1463  0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
1464  0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
1465  0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
1466  0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
1467};
1468#endif
1469
1470/******************************************************************************************
1471 Ctable stuff
1472 ******************************************************************************************/
1473
1474#if HASH_PRIME
1475static const usize_t __primes[] =
1476{
1477  11, 19, 37, 73, 109,
1478  163, 251, 367, 557, 823,
1479  1237, 1861, 2777, 4177, 6247,
1480  9371, 14057, 21089, 31627, 47431,
1481  71143, 106721, 160073, 240101, 360163,
1482  540217, 810343, 1215497, 1823231, 2734867,
1483  4102283, 6153409, 9230113, 13845163, 20767711,
1484  31151543, 46727321, 70090921, 105136301, 157704401,
1485  236556601, 354834919, 532252367, 798378509, 1197567719,
1486  1796351503
1487};
1488
1489static const usize_t __nprimes = SIZEOF_ARRAY (__primes);
1490#endif
1491
1492static INLINE uint32_t
1493xd3_checksum_hash (const xd3_hash_cfg *cfg, const uint32_t cksum)
1494{
1495#if HASH_PRIME
1496  /* If the table is prime compute the modulus. */
1497  return (cksum % cfg->size);
1498#else
1499  /* If the table is power-of-two compute the mask.*/
1500  return (cksum ^ (cksum >> cfg->shift)) & cfg->mask;
1501#endif
1502}
1503
1504/******************************************************************************************
1505 Create the hash table.
1506 ******************************************************************************************/
1507
1508static INLINE void
1509xd3_swap_uint8p (uint8_t** p1, uint8_t** p2)
1510{
1511  uint8_t *t = (*p1);
1512  (*p1) = (*p2);
1513  (*p2) = t;
1514}
1515
1516static INLINE void
1517xd3_swap_usize_t (usize_t* p1, usize_t* p2)
1518{
1519  usize_t t = (*p1);
1520  (*p1) = (*p2);
1521  (*p2) = t;
1522}
1523
1524/* It's not constant time, but it computes the log. */
1525static int
1526xd3_check_pow2 (usize_t value, usize_t *logof)
1527{
1528  usize_t x = 1;
1529  usize_t nolog;
1530  if (logof == NULL) {
1531    logof = &nolog;
1532  }
1533
1534  *logof = 0;
1535
1536  for (; x != 0; x <<= 1, *logof += 1)
1537    {
1538      if (x == value)
1539        {
1540          return 0;
1541        }
1542    }
1543
1544  return XD3_INTERNAL;
1545}
1546
1547static usize_t
1548xd3_round_blksize (usize_t sz, usize_t blksz)
1549{
1550  usize_t mod = sz & (blksz-1);
1551
1552  XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0);
1553
1554  return mod ? (sz + (blksz - mod)) : sz;
1555}
1556
1557#if XD3_ENCODER
1558#if !HASH_PRIME
1559static usize_t
1560xd3_size_log2 (usize_t slots)
1561{
1562  int bits = 28; /* This should not be an unreasonable limit. */
1563  int i;
1564
1565  for (i = 3; i <= bits; i += 1)
1566    {
1567      if (slots < (1 << i))
1568        {
1569          bits = i-1;
1570          break;
1571        }
1572    }
1573
1574  return bits;
1575}
1576#endif
1577
1578static void
1579xd3_size_hashtable (xd3_stream    *stream,
1580                    usize_t        slots,
1581                    xd3_hash_cfg  *cfg)
1582{
1583  /* initialize ctable: the number of hash buckets is computed from the table of primes or
1584   * the nearest power-of-two, in both cases rounding down in favor of using less
1585   * memory. */
1586
1587#if HASH_PRIME
1588  usize_t i;
1589
1590  cfg->size = __primes[__nprimes-1];
1591
1592  for (i = 1; i < __nprimes; i += 1)
1593    {
1594      if (slots < __primes[i])
1595        {
1596          cfg->size = __primes[i-1];
1597          break;
1598        }
1599    }
1600#else
1601  int bits = xd3_size_log2 (slots);
1602
1603  cfg->size  = (1 << bits);
1604  cfg->mask  = (cfg->size - 1);
1605  cfg->shift = min (32 - bits, 16);
1606#endif
1607}
1608#endif
1609
1610/******************************************************************************************
1611 Cksum function
1612 ******************************************************************************************/
1613
1614/* OPT: It turns out that the compiler can't unroll the loop as well as you can by hand. */
1615static INLINE uint32_t
1616xd3_lcksum (const uint8_t *seg, const int ln)
1617{
1618  int   i    = 0;
1619  uint32_t low  = 0;
1620  uint32_t high = 0;
1621
1622  for (; i < ln; i += 1)
1623    {
1624      low  += PERMUTE(*seg++);
1625      high += low;
1626    }
1627
1628  return ((high & 0xffff) << 16) | (low & 0xffff);
1629}
1630
1631#if ARITH_SMALL_CKSUM
1632static INLINE usize_t
1633xd3_scksum (const uint8_t *seg, const int ln)
1634{
1635  usize_t c;
1636  /* The -1 is because UPDATE operates on seg[1..ln] */
1637  SMALL_CKSUM_UPDATE (c,(seg-1),ln);
1638  return c;
1639}
1640#else
1641#define xd3_scksum(seg,ln) xd3_lcksum(seg,ln)
1642#endif
1643
1644/******************************************************************************************
1645 Adler32 stream function: code copied from Zlib, defined in RFC1950
1646 ******************************************************************************************/
1647
1648#define A32_BASE 65521L /* Largest prime smaller than 2^16 */
1649#define A32_NMAX 5552   /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
1650
1651#define A32_DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
1652#define A32_DO2(buf,i)  A32_DO1(buf,i); A32_DO1(buf,i+1);
1653#define A32_DO4(buf,i)  A32_DO2(buf,i); A32_DO2(buf,i+2);
1654#define A32_DO8(buf,i)  A32_DO4(buf,i); A32_DO4(buf,i+4);
1655#define A32_DO16(buf)   A32_DO8(buf,0); A32_DO8(buf,8);
1656
1657static unsigned long adler32 (unsigned long adler, const uint8_t *buf, usize_t len)
1658{
1659    unsigned long s1 = adler & 0xffff;
1660    unsigned long s2 = (adler >> 16) & 0xffff;
1661    int k;
1662
1663    while (len > 0)
1664      {
1665        k    = (len < A32_NMAX) ? len : A32_NMAX;
1666        len -= k;
1667
1668        while (k >= 16)
1669          {
1670            A32_DO16(buf);
1671            buf += 16;
1672            k -= 16;
1673          }
1674
1675        if (k != 0)
1676          {
1677            do
1678              {
1679                s1 += *buf++;
1680                s2 += s1;
1681              }
1682            while (--k);
1683          }
1684
1685        s1 %= A32_BASE;
1686        s2 %= A32_BASE;
1687    }
1688
1689    return (s2 << 16) | s1;
1690}
1691
1692/******************************************************************************************
1693 Run-length function
1694 ******************************************************************************************/
1695
1696static INLINE int
1697xd3_comprun (const uint8_t *seg, int slook, uint8_t *run_cp)
1698{
1699  int i;
1700  int     run_l = 0;
1701  uint8_t run_c = 0;
1702
1703  for (i = 0; i < slook; i += 1)
1704    {
1705      NEXTRUN(seg[i]);
1706    }
1707
1708  (*run_cp) = run_c;
1709
1710  return run_l;
1711}
1712
1713/******************************************************************************************
1714 Basic encoder/decoder functions
1715 ******************************************************************************************/
1716
1717static int
1718xd3_decode_byte (xd3_stream *stream, uint *val)
1719{
1720  if (stream->avail_in == 0)
1721    {
1722      stream->msg = "further input required";
1723      return XD3_INPUT;
1724    }
1725
1726  (*val) = stream->next_in[0];
1727
1728  DECODE_INPUT (1);
1729  return 0;
1730}
1731
1732static int
1733xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size)
1734{
1735  usize_t want;
1736  usize_t take;
1737
1738  /* Note: The case where (*pos == size) happens when a zero-length appheader or code
1739   * table is transmitted, but there is nothing in the standard against that. */
1740
1741  while (*pos < size)
1742    {
1743      if (stream->avail_in == 0)
1744        {
1745          stream->msg = "further input required";
1746          return XD3_INPUT;
1747        }
1748
1749      want = size - *pos;
1750      take = min (want, stream->avail_in);
1751
1752      memcpy (buf + *pos, stream->next_in, take);
1753
1754      DECODE_INPUT (take);
1755      (*pos) += take;
1756    }
1757
1758  return 0;
1759}
1760
1761#if XD3_ENCODER
1762static int
1763xd3_emit_byte (xd3_stream  *stream,
1764               xd3_output **outputp,
1765               uint8_t      code)
1766{
1767  xd3_output *output = (*outputp);
1768
1769  if (output->next == output->avail)
1770    {
1771      xd3_output *aoutput;
1772
1773      if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
1774        {
1775          return ENOMEM;
1776        }
1777
1778      output = (*outputp) = aoutput;
1779    }
1780
1781  output->base[output->next++] = code;
1782
1783  return 0;
1784}
1785
1786static int
1787xd3_emit_bytes (xd3_stream     *stream,
1788                xd3_output    **outputp,
1789                const uint8_t  *base,
1790                usize_t          size)
1791{
1792  xd3_output *output = (*outputp);
1793
1794  do
1795    {
1796      usize_t take;
1797
1798      if (output->next == output->avail)
1799        {
1800          xd3_output *aoutput;
1801
1802          if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
1803            {
1804              return ENOMEM;
1805            }
1806
1807          output = (*outputp) = aoutput;
1808        }
1809
1810      take = min (output->avail - output->next, size);
1811
1812      memcpy (output->base + output->next, base, take);
1813
1814      output->next += take;
1815      size -= take;
1816      base += take;
1817    }
1818  while (size > 0);
1819
1820  return 0;
1821}
1822#endif /* XD3_ENCODER */
1823
1824/******************************************************************************************
1825 Integer encoder/decoder functions
1826 ******************************************************************************************/
1827
1828#define DECODE_INTEGER_TYPE(PART,OFLOW)                                \
1829  while (stream->avail_in != 0)                                        \
1830    {                                                                  \
1831      uint next = stream->next_in[0];                                  \
1832                                                                       \
1833      DECODE_INPUT(1);                                                 \
1834                                                                       \
1835      if (PART & OFLOW)                                                \
1836        {                                                              \
1837          stream->msg = "overflow in decode_integer";                  \
1838          return XD3_INVALID_INPUT;                                         \
1839        }                                                              \
1840                                                                       \
1841      PART = (PART << 7) | (next & 127);                               \
1842                                                                       \
1843      if ((next & 128) == 0)                                           \
1844        {                                                              \
1845          (*val) = PART;                                               \
1846          PART = 0;                                                    \
1847          return 0;                                                    \
1848        }                                                              \
1849    }                                                                  \
1850                                                                       \
1851  stream->msg = "further input required";                              \
1852  return XD3_INPUT
1853
1854#define READ_INTEGER_TYPE(TYPE, OFLOW)                                 \
1855  TYPE val = 0;                                                        \
1856  const uint8_t *inp = (*inpp);                                        \
1857  uint next;                                                           \
1858                                                                       \
1859  do                                                                   \
1860    {                                                                  \
1861      if (inp == max)                                                  \
1862        {                                                              \
1863          stream->msg = "end-of-input in read_integer";                \
1864          return XD3_INVALID_INPUT;                                         \
1865        }                                                              \
1866                                                                       \
1867      if (val & OFLOW)                                                 \
1868        {                                                              \
1869          stream->msg = "overflow in read_intger";                     \
1870          return XD3_INVALID_INPUT;                                         \
1871        }                                                              \
1872                                                                       \
1873      next = (*inp++);                                                 \
1874      val  = (val << 7) | (next & 127);                                \
1875    }                                                                  \
1876  while (next & 128);                                                  \
1877                                                                       \
1878  (*valp) = val;                                                       \
1879  (*inpp) = inp;                                                       \
1880                                                                       \
1881  return 0
1882
1883#define EMIT_INTEGER_TYPE()                                            \
1884  /* max 64-bit value in base-7 encoding is 9.1 bytes */               \
1885  uint8_t buf[10];                                                     \
1886  usize_t  bufi = 10;                                                  \
1887                                                                       \
1888  XD3_ASSERT (num >= 0);                                               \
1889                                                                       \
1890  /* This loop performs division and turns on all MSBs. */             \
1891  do                                                                   \
1892    {                                                                  \
1893      buf[--bufi] = (num & 127) | 128;                                 \
1894      num >>= 7;                                                       \
1895    }                                                                  \
1896  while (num != 0);                                                    \
1897                                                                       \
1898  /* Turn off MSB of the last byte. */                                 \
1899  buf[9] &= 127;                                                       \
1900                                                                       \
1901  XD3_ASSERT (bufi >= 0);                                              \
1902                                                                       \
1903  return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi)
1904
1905#define IF_SIZEOF32(x) if (num < (1U   << (7 * (x)))) return (x);
1906#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x);
1907
1908#if USE_UINT32
1909static uint
1910xd3_sizeof_uint32_t (uint32_t num)
1911{
1912  IF_SIZEOF32(1);
1913  IF_SIZEOF32(2);
1914  IF_SIZEOF32(3);
1915  IF_SIZEOF32(4);
1916  return 5;
1917}
1918
1919static int
1920xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val)
1921{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); }
1922
1923static int
1924xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint32_t *valp)
1925{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); }
1926
1927#if XD3_ENCODER
1928static int
1929xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num)
1930{ EMIT_INTEGER_TYPE (); }
1931#endif
1932#endif
1933
1934#if USE_UINT64
1935static int
1936xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val)
1937{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); }
1938
1939#if XD3_ENCODER
1940static int
1941xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num)
1942{ EMIT_INTEGER_TYPE (); }
1943#endif
1944
1945/* These are tested but not used */
1946#if REGRESSION_TEST
1947static int
1948xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint64_t *valp)
1949{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); }
1950
1951static uint
1952xd3_sizeof_uint64_t (uint64_t num)
1953{
1954  IF_SIZEOF64(1);
1955  IF_SIZEOF64(2);
1956  IF_SIZEOF64(3);
1957  IF_SIZEOF64(4);
1958  IF_SIZEOF64(5);
1959  IF_SIZEOF64(6);
1960  IF_SIZEOF64(7);
1961  IF_SIZEOF64(8);
1962  IF_SIZEOF64(9);
1963
1964  return 10;
1965}
1966#endif
1967
1968#endif
1969
1970/******************************************************************************************
1971 Address cache stuff
1972 ******************************************************************************************/
1973
1974static int
1975xd3_alloc_cache (xd3_stream *stream)
1976{
1977  if (((stream->acache.s_near > 0) &&
1978       (stream->acache.near_array = xd3_alloc (stream, stream->acache.s_near, sizeof (usize_t))) == NULL) ||
1979      ((stream->acache.s_same > 0) &&
1980       (stream->acache.same_array = xd3_alloc (stream, stream->acache.s_same * 256, sizeof (usize_t))) == NULL))
1981    {
1982      return ENOMEM;
1983    }
1984
1985  return 0;
1986}
1987
1988static void
1989xd3_init_cache (xd3_addr_cache* acache)
1990{
1991  if (acache->s_near > 0)
1992    {
1993      memset (acache->near_array, 0, acache->s_near * sizeof (usize_t));
1994      acache->next_slot = 0;
1995    }
1996
1997  if (acache->s_same > 0)
1998    {
1999      memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t));
2000    }
2001}
2002
2003static void
2004xd3_update_cache (xd3_addr_cache* acache, usize_t addr)
2005{
2006  if (acache->s_near > 0)
2007    {
2008      acache->near_array[acache->next_slot] = addr;
2009      acache->next_slot = (acache->next_slot + 1) % acache->s_near;
2010    }
2011
2012  if (acache->s_same > 0)
2013    {
2014      acache->same_array[addr % (acache->s_same*256)] = addr;
2015    }
2016}
2017
2018#if XD3_ENCODER
2019/* OPT: this gets called a lot, can it be optimized? */
2020static int
2021xd3_encode_address (xd3_stream *stream, usize_t addr, usize_t here, uint8_t* mode)
2022{
2023  usize_t d, bestd;
2024  int   i, bestm, ret;
2025  xd3_addr_cache* acache = & stream->acache;
2026
2027#define SMALLEST_INT(x) do { if (((x) & ~127) == 0) { goto good; } } while (0)
2028
2029  /* Attempt to find the address mode that yields the smallest integer value for "d", the
2030   * encoded address value, thereby minimizing the encoded size of the address. */
2031  bestd = addr;
2032  bestm = VCD_SELF;
2033
2034  XD3_ASSERT (addr < here);
2035
2036  SMALLEST_INT (bestd);
2037
2038  if ((d = here-addr) < bestd)
2039    {
2040      bestd = d;
2041      bestm = VCD_HERE;
2042
2043      SMALLEST_INT (bestd);
2044    }
2045
2046  for (i = 0; i < acache->s_near; i += 1)
2047    {
2048      d = addr - acache->near_array[i];
2049
2050      if (d >= 0 && d < bestd)
2051        {
2052          bestd = d;
2053          bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */
2054
2055          SMALLEST_INT (bestd);
2056        }
2057    }
2058
2059  if (acache->s_same > 0 && acache->same_array[d = addr%(acache->s_same*256)] == addr)
2060    {
2061      bestd = d%256;
2062      bestm = acache->s_near + 2 + d/256; /* 2 + s_near offsets past the VCD_NEAR modes */
2063
2064      if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) { return ret; }
2065    }
2066  else
2067    {
2068    good:
2069
2070      if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) { return ret; }
2071    }
2072
2073  xd3_update_cache (acache, addr);
2074
2075  (*mode) += bestm;
2076
2077  return 0;
2078}
2079#endif
2080
2081static int
2082xd3_decode_address (xd3_stream *stream, usize_t here, uint mode, const uint8_t **inpp, const uint8_t *max, uint32_t *valp)
2083{
2084  int ret;
2085  uint same_start = 2 + stream->acache.s_near;
2086
2087  if (mode < same_start)
2088    {
2089      if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; }
2090
2091      switch (mode)
2092        {
2093        case VCD_SELF:
2094          break;
2095        case VCD_HERE:
2096          (*valp) = here - (*valp);
2097          break;
2098        default:
2099          (*valp) += stream->acache.near_array[mode - 2];
2100          break;
2101        }
2102    }
2103  else
2104    {
2105      if (*inpp == max)
2106        {
2107          stream->msg = "address underflow";
2108          return XD3_INVALID_INPUT;
2109        }
2110
2111      mode -= same_start;
2112
2113      (*valp) = stream->acache.same_array[mode*256 + (**inpp)];
2114
2115      (*inpp) += 1;
2116    }
2117
2118  xd3_update_cache (& stream->acache, *valp);
2119
2120  return 0;
2121}
2122
2123/******************************************************************************************
2124 Alloc/free
2125 ******************************************************************************************/
2126
2127static void*
2128__xd3_alloc_func (void* opaque, usize_t items, usize_t size)
2129{
2130  return malloc (items * size);
2131}
2132
2133static void
2134__xd3_free_func (void* opaque, void* address)
2135{
2136  free (address);
2137}
2138
2139static void*
2140xd3_alloc (xd3_stream *stream,
2141           usize_t      elts,
2142           usize_t      size)
2143{
2144  void *a = stream->alloc (stream->opaque, elts, size);
2145
2146  if (a != NULL)
2147    {
2148      IF_DEBUG (stream->alloc_cnt += 1);
2149    }
2150  else
2151    {
2152      stream->msg = "out of memory";
2153    }
2154
2155  return a;
2156}
2157
2158static void
2159xd3_free (xd3_stream *stream,
2160          void       *ptr)
2161{
2162  if (ptr != NULL)
2163    {
2164      IF_DEBUG (stream->free_cnt += 1);
2165      XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt);
2166      stream->free (stream->opaque, ptr);
2167    }
2168}
2169
2170#if XD3_ENCODER
2171static void*
2172xd3_alloc0 (xd3_stream *stream,
2173            usize_t      elts,
2174            usize_t      size)
2175{
2176  void *a = xd3_alloc (stream, elts, size);
2177
2178  if (a != NULL)
2179    {
2180      memset (a, 0, elts * size);
2181    }
2182
2183  return a;
2184}
2185
2186static xd3_output*
2187xd3_alloc_output (xd3_stream *stream,
2188                  xd3_output *old_output)
2189{
2190  xd3_output *output;
2191  uint8_t    *base;
2192
2193  if (stream->enc_free != NULL)
2194    {
2195      output = stream->enc_free;
2196      stream->enc_free = output->next_page;
2197    }
2198  else
2199    {
2200      if ((output = xd3_alloc (stream, 1, sizeof (xd3_output))) == NULL)
2201        {
2202          return NULL;
2203        }
2204
2205      if ((base = xd3_alloc (stream, XD3_ALLOCSIZE, sizeof (uint8_t))) == NULL)
2206        {
2207          xd3_free (stream, output);
2208          return NULL;
2209        }
2210
2211      output->base  = base;
2212      output->avail = XD3_ALLOCSIZE;
2213    }
2214
2215  output->next = 0;
2216
2217  if (old_output)
2218    {
2219      old_output->next_page = output;
2220    }
2221
2222  output->next_page = NULL;
2223
2224  return output;
2225}
2226
2227static usize_t
2228xd3_sizeof_output (xd3_output *output)
2229{
2230  usize_t s = 0;
2231
2232  for (; output; output = output->next_page)
2233    {
2234      s += output->next;
2235    }
2236
2237  return s;
2238}
2239
2240static void
2241xd3_freelist_output (xd3_stream *stream,
2242                     xd3_output *output)
2243{
2244  xd3_output *tmp;
2245
2246  while (output)
2247    {
2248      tmp    = output;
2249      output = output->next_page;
2250
2251      tmp->next = 0;
2252      tmp->next_page = stream->enc_free;
2253      stream->enc_free = tmp;
2254    }
2255}
2256
2257static void
2258xd3_free_output (xd3_stream *stream,
2259                 xd3_output *output)
2260{
2261  xd3_output *next;
2262
2263 again:
2264  if (output == NULL)
2265    {
2266      return;
2267    }
2268
2269  next = output->next_page;
2270
2271  xd3_free (stream, output->base);
2272  xd3_free (stream, output);
2273
2274  output = next;
2275  goto again;
2276}
2277#endif /* XD3_ENCODER */
2278
2279void
2280xd3_free_stream (xd3_stream *stream)
2281{
2282  xd3_iopt_buflist *blist = stream->iopt_alloc;
2283
2284  while (blist != NULL)
2285    {
2286      xd3_iopt_buflist *tmp = blist;
2287      blist = blist->next;
2288      xd3_free (stream, tmp->buffer);
2289      xd3_free (stream, tmp);
2290    }
2291
2292  xd3_free (stream, stream->large_table);
2293  xd3_free (stream, stream->small_table);
2294  xd3_free (stream, stream->small_prev);
2295
2296#if XD3_ENCODER
2297  {
2298    int i;
2299    for (i = 0; i < ENC_SECTS; i += 1)
2300      {
2301        xd3_free_output (stream, stream->enc_heads[i]);
2302      }
2303    xd3_free_output (stream, stream->enc_free);
2304  }
2305#endif
2306
2307  xd3_free (stream, stream->acache.near_array);
2308  xd3_free (stream, stream->acache.same_array);
2309
2310  xd3_free (stream, stream->inst_sect.copied1);
2311  xd3_free (stream, stream->addr_sect.copied1);
2312  xd3_free (stream, stream->data_sect.copied1);
2313
2314  xd3_free (stream, stream->dec_buffer);
2315  xd3_free (stream, (uint8_t*) stream->dec_lastwin);
2316
2317  xd3_free (stream, stream->buf_in);
2318  xd3_free (stream, stream->dec_appheader);
2319  xd3_free (stream, stream->dec_codetbl);
2320  xd3_free (stream, stream->code_table_alloc);
2321
2322#if SECONDARY_ANY
2323  xd3_free (stream, stream->inst_sect.copied2);
2324  xd3_free (stream, stream->addr_sect.copied2);
2325  xd3_free (stream, stream->data_sect.copied2);
2326
2327  if (stream->sec_type != NULL)
2328    {
2329      stream->sec_type->destroy (stream, stream->sec_stream_d);
2330      stream->sec_type->destroy (stream, stream->sec_stream_i);
2331      stream->sec_type->destroy (stream, stream->sec_stream_a);
2332    }
2333#endif
2334
2335  XD3_ASSERT (stream->alloc_cnt == stream->free_cnt);
2336
2337  memset (stream, 0, sizeof (xd3_stream));
2338}
2339
2340#if (XD3_DEBUG > 1 || VCDIFF_TOOLS)
2341static const char*
2342xd3_rtype_to_string (xd3_rtype type, int print_mode)
2343{
2344  switch (type)
2345    {
2346    case XD3_NOOP:
2347      return "NOOP ";
2348    case XD3_RUN:
2349      return "RUN  ";
2350    case XD3_ADD:
2351      return "ADD  ";
2352    default: break;
2353    }
2354  if (! print_mode)
2355    {
2356      return "CPY  ";
2357    }
2358  switch (type)
2359    {
2360    case XD3_CPY + 0: return "CPY_0";
2361    case XD3_CPY + 1: return "CPY_1";
2362    case XD3_CPY + 2: return "CPY_2";
2363    case XD3_CPY + 3: return "CPY_3";
2364    case XD3_CPY + 4: return "CPY_4";
2365    case XD3_CPY + 5: return "CPY_5";
2366    case XD3_CPY + 6: return "CPY_6";
2367    case XD3_CPY + 7: return "CPY_7";
2368    case XD3_CPY + 8: return "CPY_8";
2369    case XD3_CPY + 9: return "CPY_9";
2370    default:          return "CPY>9";
2371    }
2372}
2373#endif
2374
2375/******************************************************************************************
2376 Stream configuration
2377 ******************************************************************************************/
2378
2379int
2380xd3_config_stream(xd3_stream *stream,
2381                  xd3_config *config)
2382{
2383  int ret;
2384  xd3_config defcfg;
2385  xd3_smatcher *smatcher = &stream->smatcher;
2386
2387  if (config == NULL)
2388    {
2389      config = & defcfg;
2390      memset (config, 0, sizeof (*config));
2391    }
2392
2393  /* Initial setup: no error checks yet */
2394  memset (stream, 0, sizeof (*stream));
2395
2396  stream->winsize   = config->winsize   ? config->winsize : XD3_DEFAULT_WINSIZE;
2397  stream->sprevsz   = config->sprevsz   ? config->sprevsz : XD3_DEFAULT_SPREVSZ;
2398  stream->srcwin_maxsz = config->srcwin_maxsz ? config->srcwin_maxsz : XD3_DEFAULT_SRCWINSZ;
2399
2400  if (config->iopt_size == 0)
2401    {
2402      stream->iopt_size = XD3_ALLOCSIZE / sizeof(xd3_rinst);
2403      stream->iopt_unlimited = 1;
2404    }
2405  else
2406    {
2407      stream->iopt_size = config->iopt_size;
2408    }
2409
2410  stream->getblk    = config->getblk;
2411  stream->alloc     = config->alloc ? config->alloc : __xd3_alloc_func;
2412  stream->free      = config->freef ? config->freef : __xd3_free_func;
2413  stream->opaque    = config->opaque;
2414  stream->flags     = config->flags;
2415
2416  /* Secondary setup. */
2417  stream->sec_data  = config->sec_data;
2418  stream->sec_inst  = config->sec_inst;
2419  stream->sec_addr  = config->sec_addr;
2420
2421  stream->sec_data.data_type = DATA_SECTION;
2422  stream->sec_inst.data_type = INST_SECTION;
2423  stream->sec_addr.data_type = ADDR_SECTION;
2424
2425  /* Check static sizes. */
2426  if (sizeof (usize_t) != SIZEOF_USIZE_T ||
2427      sizeof (xoff_t) != SIZEOF_XOFF_T ||
2428      (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL)))
2429    {
2430      stream->msg = "incorrect compilation: wrong integer sizes";
2431      return XD3_INTERNAL;
2432    }
2433
2434  /* Check/set secondary compressor. */
2435  switch (stream->flags & XD3_SEC_TYPE)
2436    {
2437    case 0:
2438      if (stream->flags & XD3_SEC_OTHER)
2439        {
2440          stream->msg = "XD3_SEC flags require a secondary compressor type";
2441          return XD3_INTERNAL;
2442        }
2443      break;
2444    case XD3_SEC_FGK:
2445      FGK_CASE (stream);
2446    case XD3_SEC_DJW:
2447      DJW_CASE (stream);
2448    default:
2449      stream->msg = "too many secondary compressor types set";
2450      return XD3_INTERNAL;
2451    }
2452
2453  /* Check/set encoder code table. */
2454  switch (stream->flags & XD3_ALT_CODE_TABLE) {
2455  case 0:
2456    stream->code_table_desc = & __rfc3284_code_table_desc;
2457    stream->code_table_func = xd3_rfc3284_code_table;
2458    break;
2459#if GENERIC_ENCODE_TABLES
2460  case XD3_ALT_CODE_TABLE:
2461    stream->code_table_desc = & __alternate_code_table_desc;
2462    stream->code_table_func = xd3_alternate_code_table;
2463    stream->comp_table_func = xd3_compute_alternate_table_encoding;
2464    break;
2465#endif
2466  default:
2467    stream->msg = "alternate code table support was not compiled";
2468    return XD3_INTERNAL;
2469  }
2470
2471  /* Check sprevsz */
2472  if (smatcher->small_chain == 1)
2473    {
2474      stream->sprevsz = 0;
2475    }
2476  else
2477    {
2478      if ((ret = xd3_check_pow2 (stream->sprevsz, NULL)))
2479        {
2480          stream->msg = "sprevsz is required to be a power of two";
2481          return XD3_INTERNAL;
2482        }
2483
2484      stream->sprevmask = stream->sprevsz - 1;
2485    }
2486
2487  /* Default scanner settings. */
2488  switch (config->smatch_cfg)
2489    {
2490      IF_BUILD_SOFT(case XD3_SMATCH_SOFT:
2491      {
2492        *smatcher = config->smatcher_soft;
2493        smatcher->string_match = __smatcher_soft.string_match;
2494        smatcher->name = __smatcher_soft.name;
2495        if (smatcher->large_look  < MIN_MATCH ||
2496            smatcher->large_step  < 1         ||
2497            smatcher->small_look  < MIN_MATCH)
2498          {
2499            stream->msg = "invalid soft string-match config";
2500            return XD3_INVALID;
2501          }
2502        break;
2503      })
2504
2505      IF_BUILD_DEFAULT(case XD3_SMATCH_DEFAULT:
2506                    *smatcher = __smatcher_default;
2507                    break;)
2508      IF_BUILD_SLOW(case XD3_SMATCH_SLOW:
2509                    *smatcher = __smatcher_slow;
2510                    break;)
2511      IF_BUILD_FASTEST(case XD3_SMATCH_FASTEST:
2512                    *smatcher = __smatcher_fastest;
2513                    break;)
2514      IF_BUILD_FAST(case XD3_SMATCH_FAST:
2515                    *smatcher = __smatcher_fast;
2516                    break;)
2517    default:
2518      stream->msg = "invalid string match config type";
2519      return XD3_INTERNAL;
2520    }
2521
2522  if (config->smatch_cfg == XD3_SMATCH_DEFAULT &&
2523      (stream->flags & XD3_COMPLEVEL_MASK) != 0)
2524    {
2525      int level = (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT;
2526
2527      switch (level)
2528        {
2529        case 1: case 2:
2530          IF_BUILD_FASTEST(*smatcher = __smatcher_fastest;
2531                           break;)
2532        case 3: case 4: case 5:
2533          IF_BUILD_FAST(*smatcher = __smatcher_fast;
2534                        break;)
2535        case 6:
2536          IF_BUILD_DEFAULT(*smatcher = __smatcher_default;
2537                           break;)
2538        default:
2539          IF_BUILD_SLOW(*smatcher = __smatcher_slow;
2540                        break;)
2541        }
2542    }
2543
2544  return 0;
2545}
2546
2547/******************************************************************************************
2548 Getblk interface
2549 ******************************************************************************************/
2550
2551/* This function interfaces with the client getblk function, checks its results, etc. */
2552static int
2553xd3_getblk (xd3_stream *stream/*, xd3_source *source*/, xoff_t blkno)
2554{
2555  int ret;
2556  xd3_source *source = stream->src;
2557
2558  if (blkno >= source->blocks)
2559    {
2560      IF_DEBUG1 (DP(RINT "[getblk] block %"Q"u\n", blkno));
2561      stream->msg = "source file too short";
2562      return XD3_INTERNAL;
2563    }
2564
2565  if (blkno != source->curblkno || source->curblk == NULL)
2566    {
2567      XD3_ASSERT (source->curblk != NULL || blkno != source->curblkno);
2568
2569      source->getblkno = blkno;
2570
2571      if (stream->getblk == NULL)
2572        {
2573          stream->msg = "getblk source input";
2574          return XD3_GETSRCBLK;
2575        }
2576      else if ((ret = stream->getblk (stream, source, blkno)) != 0)
2577        {
2578          stream->msg = "getblk failed";
2579          return ret;
2580        }
2581
2582      XD3_ASSERT (source->curblk != NULL);
2583    }
2584
2585  if (source->onblk != xd3_bytes_on_srcblk (source, blkno))
2586    {
2587      stream->msg = "getblk returned short block";
2588      return XD3_INTERNAL;
2589    }
2590
2591  return 0;
2592}
2593
2594/******************************************************************************************
2595 Stream open/close
2596 ******************************************************************************************/
2597
2598int
2599xd3_set_source (xd3_stream *stream,
2600                xd3_source *src)
2601{
2602  xoff_t blk_num;
2603  xoff_t tail_size;
2604
2605  IF_DEBUG1 (DP(RINT "[set source] size %"Q"u\n", src->size));
2606
2607  if (src == NULL || src->size < stream->smatcher.large_look) { return 0; }
2608
2609  stream->src  = src;
2610  blk_num      = src->size / src->blksize;
2611  tail_size    = src->size % src->blksize;
2612  src->blocks  = blk_num + (tail_size > 0);
2613  src->srclen  = 0;
2614  src->srcbase = 0;
2615
2616  return 0;
2617}
2618
2619void
2620xd3_abort_stream (xd3_stream *stream)
2621{
2622  stream->dec_state = DEC_ABORTED;
2623  stream->enc_state = ENC_ABORTED;
2624}
2625
2626int
2627xd3_close_stream (xd3_stream *stream)
2628{
2629  if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED)
2630    {
2631      /* If encoding, should be ready for more input but not actually have any. */
2632      if (stream->enc_state != ENC_INPUT || stream->avail_in != 0)
2633        {
2634          stream->msg = "encoding is incomplete";
2635          return XD3_INTERNAL;
2636        }
2637    }
2638  else
2639    {
2640      switch (stream->dec_state)
2641        {
2642        case DEC_VCHEAD:
2643        case DEC_WININD:
2644          /* TODO: Address the zero-byte ambiguity.  Does the encoder emit a window or
2645           * not?  If so, then catch an error here.  If not, need another routine to say
2646           * decode_at_least_one_if_empty. */
2647        case DEC_ABORTED:
2648          break;
2649        default:
2650          /* If decoding, should be ready for the next window. */
2651          stream->msg = "EOF in decode";
2652          return XD3_INTERNAL;
2653        }
2654    }
2655
2656  return 0;
2657}
2658
2659/******************************************************************************************
2660 Application header
2661 ******************************************************************************************/
2662
2663int
2664xd3_get_appheader (xd3_stream  *stream,
2665                   uint8_t    **data,
2666                   usize_t      *size)
2667{
2668  if (stream->dec_state < DEC_WININD)
2669    {
2670      stream->msg = "application header not available";
2671      return XD3_INTERNAL;
2672    }
2673
2674  (*data) = stream->dec_appheader;
2675  (*size) = stream->dec_appheadsz;
2676  return 0;
2677}
2678
2679/******************************************************************************************
2680 Decoder stuff
2681 ******************************************************************************************/
2682
2683#include "xdelta3-decode.h"
2684
2685/******************************************************************************************
2686 Encoder stuff
2687 ******************************************************************************************/
2688
2689#if XD3_ENCODER
2690void
2691xd3_set_appheader (xd3_stream    *stream,
2692                   const uint8_t *data,
2693                   usize_t         size)
2694{
2695  stream->enc_appheader = data;
2696  stream->enc_appheadsz = size;
2697}
2698
2699#if XD3_DEBUG
2700static int
2701xd3_iopt_check (xd3_stream *stream)
2702{
2703  int ul = xd3_rlist_length (& stream->iopt_used);
2704  int fl = xd3_rlist_length (& stream->iopt_free);
2705
2706  return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size;
2707}
2708#endif
2709
2710static xd3_rinst*
2711xd3_iopt_free (xd3_stream *stream, xd3_rinst *i)
2712{
2713  xd3_rinst *n = xd3_rlist_remove (i);
2714  xd3_rlist_push_back (& stream->iopt_free, i);
2715  return n;
2716}
2717
2718static void
2719xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i)
2720{
2721  if (i->type != XD3_ADD)
2722    {
2723      xd3_rlist_push_back (& stream->iopt_free, i);
2724    }
2725}
2726
2727/* When an instruction is ready to flush from the iopt buffer, this function is called to
2728 * produce an encoding.  It writes the instruction plus size, address, and data to the
2729 * various encoding sections. */
2730static int
2731xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2732{
2733  int ret;
2734
2735  /* Check for input overflow. */
2736  XD3_ASSERT (inst->pos + inst->size <= stream->avail_in);
2737
2738  switch (inst->type)
2739    {
2740    case XD3_CPY:
2741      {
2742        /* the address may have an offset if there is a source window. */
2743        usize_t addr;
2744        xd3_source *src = stream->src;
2745
2746        if (src != NULL)
2747          {
2748            /* If there is a source copy, the source must have its source window decided
2749             * before we can encode.  This can be bad -- we have to make this decision
2750             * even if no source matches have been found. */
2751            if (stream->srcwin_decided == 0)
2752              {
2753                if ((ret = xd3_srcwin_setup (stream))) { return ret; }
2754              }
2755
2756            /* xtra field indicates the copy is from the source */
2757            if (inst->xtra)
2758              {
2759                XD3_ASSERT (inst->addr >= src->srcbase);
2760                XD3_ASSERT (inst->addr + inst->size <= src->srcbase + src->srclen);
2761                addr = (inst->addr - src->srcbase);
2762                stream->n_scpy += 1;
2763                stream->l_scpy += inst->size;
2764              }
2765            else
2766              {
2767                /* with source window: target copy address is offset by taroff. */
2768                addr = stream->taroff + (usize_t) inst->addr;
2769                stream->n_tcpy += 1;
2770                stream->l_tcpy += inst->size;
2771              }
2772          }
2773        else
2774          {
2775            addr = (usize_t) inst->addr;
2776            stream->n_tcpy += 1;
2777            stream->l_tcpy += inst->size;
2778          }
2779
2780        XD3_ASSERT (inst->size >= MIN_MATCH);
2781
2782        /* the "here" position is always offset by taroff */
2783        if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, & inst->type)))
2784          {
2785            return ret;
2786          }
2787
2788        IF_DEBUG1 ({
2789          static int cnt;
2790          DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n",
2791                   cnt++,
2792                   stream->total_in + inst->pos,
2793                   stream->total_in + inst->pos + inst->size,
2794                   inst->addr, inst->addr + inst->size, inst->size);
2795        });
2796        break;
2797      }
2798    case XD3_RUN:
2799      {
2800        XD3_ASSERT (inst->size >= MIN_MATCH);
2801
2802        if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; }
2803
2804        stream->n_run += 1;
2805        stream->l_run += inst->size;
2806
2807        IF_DEBUG1 ({
2808          static int cnt;
2809          DP(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
2810        });
2811        break;
2812      }
2813    case XD3_ADD:
2814      {
2815        if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream),
2816                                   stream->next_in + inst->pos, inst->size))) { return ret; }
2817
2818        stream->n_add += 1;
2819        stream->l_add += inst->size;
2820
2821        IF_DEBUG1 ({
2822          static int cnt;
2823          DP(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
2824        });
2825
2826        break;
2827      }
2828    }
2829
2830  /* This is the only place stream->unencoded_offset is incremented. */
2831  XD3_ASSERT (stream->unencoded_offset == inst->pos);
2832  stream->unencoded_offset += inst->size;
2833
2834  IF_DEBUG (stream->n_emit += inst->size);
2835
2836  inst->code2 = 0;
2837
2838  XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst);
2839
2840  if (stream->iout != NULL)
2841    {
2842      if (stream->iout->code2 != 0)
2843        {
2844          if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; }
2845
2846          xd3_iopt_free_nonadd (stream, stream->iout);
2847          xd3_iopt_free_nonadd (stream, inst);
2848          stream->iout = NULL;
2849          return 0;
2850        }
2851      else
2852        {
2853          if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
2854
2855          xd3_iopt_free_nonadd (stream, stream->iout);
2856        }
2857    }
2858
2859  stream->iout = inst;
2860
2861  return 0;
2862}
2863
2864/* This possibly encodes an add instruction, iadd, which must remain on the stack until
2865 * the following call to xd3_iopt_finish_encoding. */
2866static int
2867xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd)
2868{
2869  int ret;
2870  usize_t off = stream->unencoded_offset;
2871
2872  if (pos > off)
2873    {
2874      iadd->type = XD3_ADD;
2875      iadd->pos  = off;
2876      iadd->size = pos - off;
2877
2878      if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; }
2879    }
2880
2881  return 0;
2882}
2883
2884/* This function calls xd3_iopt_finish_encoding to finish encoding an instruction, and it
2885 * may also produce an add instruction for an unmatched region. */
2886static int
2887xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst)
2888{
2889  int ret;
2890  xd3_rinst iadd;
2891
2892  if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; }
2893
2894  if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; }
2895
2896  return 0;
2897}
2898
2899/* Generates a final add instruction to encode the remaining input. */
2900static int
2901xd3_iopt_add_finalize (xd3_stream *stream)
2902{
2903  int ret;
2904  xd3_rinst iadd;
2905
2906  if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; }
2907
2908  if (stream->iout)
2909    {
2910      if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
2911
2912      xd3_iopt_free_nonadd (stream, stream->iout);
2913      stream->iout = NULL;
2914    }
2915
2916  return 0;
2917}
2918
2919/* Compact the instruction buffer by choosing the best non-overlapping instructions when
2920 * lazy string-matching.  There are no ADDs in the iopt buffer because those are
2921 * synthesized in xd3_iopt_add_encoding and during xd3_iopt_add_finalize. */
2922static int
2923xd3_iopt_flush_instructions (xd3_stream *stream, int force)
2924{
2925  xd3_rinst *r1 = xd3_rlist_front (& stream->iopt_used);
2926  xd3_rinst *r2;
2927  xd3_rinst *r3;
2928  usize_t r1end;
2929  usize_t r2end;
2930  usize_t r2off;
2931  usize_t r2moff;
2932  usize_t gap;
2933  usize_t flushed;
2934  int ret;
2935
2936  XD3_ASSERT (xd3_iopt_check (stream));
2937
2938  /* Note: once tried to skip this step if it's possible to assert there are no
2939   * overlapping instructions.  Doesn't work because xd3_opt_erase leaves overlapping
2940   * instructions. */
2941  while (! xd3_rlist_end (& stream->iopt_used, r1) &&
2942         ! xd3_rlist_end (& stream->iopt_used, r2 = xd3_rlist_next (r1)))
2943    {
2944      r1end = r1->pos + r1->size;
2945
2946      /* If the instructions do not overlap, continue. */
2947      if (r1end <= r2->pos)
2948        {
2949          r1 = r2;
2950          continue;
2951        }
2952
2953      r2end = r2->pos + r2->size;
2954
2955      /* The min_match adjustments prevent this. */
2956      XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR));
2957
2958      /* If r3 is available... */
2959      if (! xd3_rlist_end (& stream->iopt_used, r3 = xd3_rlist_next (r2)))
2960        {
2961          /* If r3 starts before r1 finishes or just about, r2 is irrelevant */
2962          if (r3->pos <= r1end + 1)
2963            {
2964              xd3_iopt_free (stream, r2);
2965              continue;
2966            }
2967        }
2968      else if (! force)
2969        {
2970          /* Unless force, end the loop when r3 is not available. */
2971          break;
2972        }
2973
2974      r2off  = r2->pos - r1->pos;
2975      r2moff = r2end - r1end;
2976      gap    = r2end - r1->pos;
2977
2978      /* If the two matches overlap almost entirely, choose the better match and discard
2979       * the other.  This heuristic is BLACK MAGIC.  Havesomething better? */
2980      if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2)
2981        {
2982          /* Only one match should be used, choose the longer one. */
2983          if (r1->size < r2->size)
2984            {
2985              xd3_iopt_free (stream, r1);
2986              r1 = r2;
2987            }
2988          else
2989            {
2990              /* We are guaranteed that r1 does not overlap now, so advance past r2 */
2991              r1 = xd3_iopt_free (stream, r2);
2992            }
2993          continue;
2994        }
2995      else
2996        {
2997          /* Shorten one of the instructions -- could be optimized based on the address
2998           * cache. */
2999          usize_t average;
3000          usize_t newsize;
3001          usize_t adjust1;
3002
3003          XD3_ASSERT (r1end > r2->pos && r2end > r1->pos);
3004
3005          /* Try to balance the length of both instructions, but avoid making both longer
3006           * than MAX_MATCH_SPLIT . */
3007          average = (gap) / 2;
3008          newsize = min (MAX_MATCH_SPLIT, gap - average);
3009
3010          /* Should be possible to simplify this code. */
3011          if (newsize > r1->size)
3012            {
3013              /* shorten r2 */
3014              adjust1 = r1end - r2->pos;
3015            }
3016          else if (newsize > r2->size)
3017            {
3018              /* shorten r1 */
3019              adjust1 = r1end - r2->pos;
3020
3021              XD3_ASSERT (r1->size > adjust1);
3022
3023              r1->size -= adjust1;
3024
3025              /* don't shorten r2 */
3026              adjust1 = 0;
3027            }
3028          else
3029            {
3030              /* shorten r1 */
3031              adjust1 = r1->size - newsize;
3032
3033              if (r2->pos > r1end - adjust1)
3034                {
3035                  adjust1 -= r2->pos - (r1end - adjust1);
3036                }
3037
3038              XD3_ASSERT (r1->size > adjust1);
3039
3040              r1->size -= adjust1;
3041
3042              /* shorten r2 */
3043              XD3_ASSERT (r1->pos + r1->size >= r2->pos);
3044
3045              adjust1 = r1->pos + r1->size - r2->pos;
3046            }
3047
3048          /* Fallthrough above if-else, shorten r2 */
3049          XD3_ASSERT (r2->size > adjust1);
3050
3051          r2->size -= adjust1;
3052          r2->pos  += adjust1;
3053          r2->addr += adjust1;
3054
3055          XD3_ASSERT (r1->size >= MIN_MATCH);
3056          XD3_ASSERT (r2->size >= MIN_MATCH);
3057
3058          r1 = r2;
3059        }
3060    }
3061
3062  XD3_ASSERT (xd3_iopt_check (stream));
3063
3064  /* If forcing, pick instructions until the list is empty, otherwise this empties 50% of
3065   * the queue. */
3066  for (flushed = 0; ! xd3_rlist_empty (& stream->iopt_used); )
3067    {
3068      xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt_used);
3069      if ((ret = xd3_iopt_add_encoding (stream, renc)))
3070        {
3071          return ret;
3072        }
3073
3074      if (! force)
3075        {
3076          if (++flushed > stream->iopt_size / 2)
3077            {
3078              break;
3079            }
3080
3081          /* If there are only two instructions remaining, break, because they were
3082           * not optimized.  This means there were more than 50% eliminated by the
3083           * loop above. */
3084          r1 = xd3_rlist_front (& stream->iopt_used);
3085          if (xd3_rlist_end(& stream->iopt_used, r1) ||
3086              xd3_rlist_end(& stream->iopt_used, r2 = xd3_rlist_next (r1)) ||
3087              xd3_rlist_end(& stream->iopt_used, r3 = xd3_rlist_next (r2)))
3088            {
3089              break;
3090            }
3091        }
3092    }
3093
3094  XD3_ASSERT (xd3_iopt_check (stream));
3095
3096  XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt_used) == 0);
3097
3098  return 0;
3099}
3100
3101static int
3102xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr)
3103{
3104  xd3_rinst *i;
3105  int ret;
3106
3107  if (xd3_rlist_empty (& stream->iopt_free))
3108    {
3109      if (stream->iopt_unlimited)
3110        {
3111          int elts = XD3_ALLOCSIZE / sizeof(xd3_rinst);
3112
3113          if ((ret = xd3_alloc_iopt (stream, elts)))
3114            {
3115              return ret;
3116            }
3117
3118          stream->iopt_size += elts;
3119        }
3120      else
3121        {
3122          if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; }
3123
3124          XD3_ASSERT (! xd3_rlist_empty (& stream->iopt_free));
3125        }
3126    }
3127
3128  i = xd3_rlist_pop_back (& stream->iopt_free);
3129
3130  xd3_rlist_push_back (& stream->iopt_used, i);
3131
3132  (*iptr) = i;
3133
3134  ++stream->i_slots_used;
3135
3136  return 0;
3137}
3138
3139/* A copy is about to be emitted that extends backwards to POS, therefore it may
3140 * completely cover some existing instructions in the buffer.  If an instruction is
3141 * completely covered by this new match, erase it.  If the new instruction is covered by
3142 * the previous one, return 1 to skip it. */
3143static void
3144xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size)
3145{
3146  while (! xd3_rlist_empty (& stream->iopt_used))
3147    {
3148      xd3_rinst *r = xd3_rlist_back (& stream->iopt_used);
3149
3150      /* Verify that greedy is working.  The previous instruction should end before the
3151       * new one begins. */
3152      XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos));
3153      /* Verify that min_match is working.  The previous instruction should end before the
3154       * new one ends. */
3155      XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size));
3156
3157      /* See if the last instruction starts before the new instruction.  If so, there is
3158       * nothing to erase. */
3159      if (r->pos < pos)
3160        {
3161          return;
3162        }
3163
3164      /* Otherwise, the new instruction covers the old one, delete it and repeat. */
3165      xd3_rlist_remove (r);
3166      xd3_rlist_push_back (& stream->iopt_free, r);
3167      --stream->i_slots_used;
3168    }
3169}
3170
3171/* This function tells the last matched input position. */
3172static usize_t
3173xd3_iopt_last_matched (xd3_stream *stream)
3174{
3175  xd3_rinst *r;
3176
3177  if (xd3_rlist_empty (& stream->iopt_used))
3178    {
3179      return 0;
3180    }
3181
3182  r = xd3_rlist_back (& stream->iopt_used);
3183
3184  return r->pos + r->size;
3185}
3186
3187/******************************************************************************************
3188 Emit routines
3189 ******************************************************************************************/
3190
3191static int
3192xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code)
3193{
3194  int has_size = stream->code_table[code].size1 == 0;
3195  int ret;
3196
3197  IF_DEBUG1 (DP(RINT "[emit1] %u %s (%u) code %u\n",
3198               single->pos,
3199               xd3_rtype_to_string (single->type, 0),
3200               single->size,
3201               code));
3202
3203  if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; }
3204
3205  if (has_size)
3206    {
3207      if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) { return ret; }
3208    }
3209
3210  return 0;
3211}
3212
3213static int
3214xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code)
3215{
3216  int ret;
3217
3218  /* All double instructions use fixed sizes, so all we need to do is output the
3219   * instruction code, no sizes. */
3220  XD3_ASSERT (stream->code_table[code].size1 != 0 &&
3221              stream->code_table[code].size2 != 0);
3222
3223  if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; }
3224
3225  IF_DEBUG1 (DP(RINT "[emit2]: %u %s (%u) %s (%u) code %u\n",
3226               first->pos,
3227               xd3_rtype_to_string (first->type, 0),
3228               first->size,
3229               xd3_rtype_to_string (second->type, 0),
3230               second->size,
3231               code));
3232
3233  return 0;
3234}
3235
3236/* This enters a potential run instruction into the iopt buffer.  The position argument is
3237 * relative to the target window. */
3238static INLINE int
3239xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t run_c)
3240{
3241  xd3_rinst* ri;
3242  int ret;
3243
3244  XD3_ASSERT (pos + size <= stream->avail_in);
3245
3246  if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
3247
3248  ri->type = XD3_RUN;
3249  ri->xtra = run_c;
3250  ri->pos  = pos;
3251  ri->size = size;
3252
3253  return 0;
3254}
3255
3256/* This enters a potential copy instruction into the iopt buffer.  The position argument
3257 * is relative to the target window.. */
3258static INLINE int
3259xd3_found_match (xd3_stream *stream, usize_t pos, usize_t size, xoff_t addr, int is_source)
3260{
3261  xd3_rinst* ri;
3262  int ret;
3263
3264  XD3_ASSERT (pos + size <= stream->avail_in);
3265
3266  if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
3267
3268  ri->type = XD3_CPY;
3269  ri->xtra = is_source;
3270  ri->pos  = pos;
3271  ri->size = size;
3272  ri->addr = addr;
3273
3274  return 0;
3275}
3276
3277static int
3278xd3_emit_hdr (xd3_stream *stream)
3279{
3280  int  ret;
3281  int  use_secondary = stream->sec_type != NULL;
3282  int  use_adler32   = stream->flags & XD3_ADLER32;
3283  int  vcd_source    = xd3_encoder_used_source (stream);
3284  uint win_ind = 0;
3285  uint del_ind = 0;
3286  usize_t enc_len;
3287  usize_t tgt_len;
3288  usize_t data_len;
3289  usize_t inst_len;
3290  usize_t addr_len;
3291
3292  XD3_ASSERT (stream->n_emit == stream->avail_in);
3293
3294  if (stream->current_window == 0)
3295    {
3296      uint hdr_ind = 0;
3297      int use_appheader  = stream->enc_appheader != NULL;
3298      int use_gencodetbl = GENERIC_ENCODE_TABLES && (stream->code_table_desc != & __rfc3284_code_table_desc);
3299
3300      if (use_secondary)  { hdr_ind |= VCD_SECONDARY; }
3301      if (use_gencodetbl) { hdr_ind |= VCD_CODETABLE; }
3302      if (use_appheader)  { hdr_ind |= VCD_APPHEADER; }
3303
3304      if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC1)) != 0 ||
3305          (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC2)) != 0 ||
3306          (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC3)) != 0 ||
3307          (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_VERSION)) != 0 ||
3308          (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0)
3309        {
3310          return ret;
3311        }
3312
3313      /* Secondary compressor ID */
3314#if SECONDARY_ANY
3315      if (use_secondary && (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->sec_type->id))) { return ret; }
3316#endif
3317
3318      /* Compressed code table */
3319      if (use_gencodetbl)
3320        {
3321          usize_t code_table_size;
3322          const uint8_t *code_table_data;
3323
3324          if ((ret = stream->comp_table_func (stream, & code_table_data, & code_table_size))) { return ret; }
3325
3326          if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), code_table_size + 2)) ||
3327              (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->near_modes)) ||
3328              (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->same_modes)) ||
3329              (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), code_table_data, code_table_size))) { return ret; }
3330        }
3331
3332      /* Application header */
3333      if (use_appheader)
3334        {
3335          if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->enc_appheadsz)) ||
3336              (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), stream->enc_appheader, stream->enc_appheadsz)))
3337            {
3338              return ret;
3339            }
3340        }
3341    }
3342
3343  /* try to compress this window */
3344#if SECONDARY_ANY
3345  if (use_secondary)
3346    {
3347      int data_sec = 0;
3348      int inst_sec = 0;
3349      int addr_sec = 0;
3350
3351#     define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \
3352             ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \
3353              (ret = xd3_encode_secondary (stream, & UPPER ## _HEAD (stream), & UPPER ## _TAIL (stream), \
3354                                        & xd3_sec_ ## LOWER (stream), \
3355                                        & stream->sec_ ## LOWER, & LOWER ## _sec)))
3356
3357      if (ENCODE_SECONDARY_SECTION (DATA, data) ||
3358          ENCODE_SECONDARY_SECTION (INST, inst) ||
3359          ENCODE_SECONDARY_SECTION (ADDR, addr))
3360        {
3361          return ret;
3362        }
3363
3364      del_ind |= (data_sec ? VCD_DATACOMP : 0);
3365      del_ind |= (inst_sec ? VCD_INSTCOMP : 0);
3366      del_ind |= (addr_sec ? VCD_ADDRCOMP : 0);
3367    }
3368#endif
3369
3370  /* if (vcd_target) { win_ind |= VCD_TARGET; } */
3371  if (vcd_source)  { win_ind |= VCD_SOURCE; }
3372  if (use_adler32) { win_ind |= VCD_ADLER32; }
3373
3374  /* window indicator */
3375  if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) { return ret; }
3376
3377  /* source window */
3378  if (vcd_source)
3379    {
3380      /* or (vcd_target) { ... } */
3381      if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srclen)) ||
3382          (ret = xd3_emit_offset (stream, & HDR_TAIL (stream), stream->src->srcbase))) { return ret; }
3383    }
3384
3385  tgt_len  = stream->avail_in;
3386  data_len = xd3_sizeof_output (DATA_HEAD (stream));
3387  inst_len = xd3_sizeof_output (INST_HEAD (stream));
3388  addr_len = xd3_sizeof_output (ADDR_HEAD (stream));
3389
3390  /* The enc_len field is redundent... doh! */
3391  enc_len = (1 + (xd3_sizeof_size (tgt_len) +
3392                  xd3_sizeof_size (data_len) +
3393                  xd3_sizeof_size (inst_len) +
3394                  xd3_sizeof_size (addr_len)) +
3395             data_len +
3396             inst_len +
3397             addr_len +
3398             (use_adler32 ? 4 : 0));
3399
3400  if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) ||
3401      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) ||
3402      (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) ||
3403      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) ||
3404      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) ||
3405      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len)))
3406    {
3407      return ret;
3408    }
3409
3410  if (use_adler32)
3411    {
3412      uint8_t  send[4];
3413      uint32_t a32 = adler32 (1L, stream->next_in, stream->avail_in);
3414
3415      send[0] = (a32 >> 24);
3416      send[1] = (a32 >> 16);
3417      send[2] = (a32 >> 8);
3418      send[3] = (a32 & 0xff);
3419
3420      if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) { return ret; }
3421    }
3422
3423  return 0;
3424}
3425
3426/******************************************************************************************
3427 Encode routines
3428 ******************************************************************************************/
3429
3430static int
3431xd3_encode_buffer_leftover (xd3_stream *stream)
3432{
3433  usize_t take;
3434  usize_t room;
3435
3436  /* Allocate the buffer. */
3437  if (stream->buf_in == NULL && (stream->buf_in = xd3_alloc (stream, stream->winsize, 1)) == NULL)
3438    {
3439      return ENOMEM;
3440    }
3441
3442  /* Take leftover input first. */
3443  if (stream->buf_leftover != NULL)
3444    {
3445      XD3_ASSERT (stream->buf_avail == 0);
3446      XD3_ASSERT (stream->buf_leftavail < stream->winsize);
3447
3448      IF_DEBUG1 (DP(RINT "[leftover] previous %u avail %u\n", stream->buf_leftavail, stream->avail_in));
3449
3450      memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail);
3451
3452      stream->buf_leftover = NULL;
3453      stream->buf_avail    = stream->buf_leftavail;
3454    }
3455
3456  /* Copy into the buffer. */
3457  room = stream->winsize - stream->buf_avail;
3458  take = min (room, stream->avail_in);
3459
3460  memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take);
3461
3462  stream->buf_avail += take;
3463
3464  if (take < stream->avail_in)
3465    {
3466      /* Buffer is full */
3467      stream->buf_leftover  = stream->next_in  + take;
3468      stream->buf_leftavail = stream->avail_in - take;
3469
3470      IF_DEBUG1 (DP(RINT "[leftover] take %u remaining %u\n", take, stream->buf_leftavail));
3471    }
3472  else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH))
3473    {
3474      /* Buffer has space */
3475      IF_DEBUG1 (DP(RINT "[leftover] %u emptied\n", take));
3476      return XD3_INPUT;
3477    }
3478
3479  /* Use the buffer: */
3480  stream->next_in   = stream->buf_in;
3481  stream->avail_in  = stream->buf_avail;
3482  stream->buf_avail = 0;
3483
3484  return 0;
3485}
3486
3487/* Allocates one block of xd3_rlist elements */
3488static int
3489xd3_alloc_iopt (xd3_stream *stream, int elts)
3490{
3491  int i;
3492  xd3_iopt_buflist* last = xd3_alloc (stream, sizeof (xd3_iopt_buflist), 1);
3493
3494  if (last == NULL ||
3495      (last->buffer = xd3_alloc (stream, sizeof (xd3_rinst), elts)) == NULL)
3496    {
3497      return ENOMEM;
3498    }
3499
3500  last->next = stream->iopt_alloc;
3501  stream->iopt_alloc = last;
3502
3503  for (i = 0; i < elts; i += 1)
3504    {
3505      xd3_rlist_push_back (& stream->iopt_free, & last->buffer[i]);
3506    }
3507
3508  return 0;
3509}
3510
3511/* This function allocates all memory initially used by the encoder. */
3512static int
3513xd3_encode_init (xd3_stream *stream)
3514{
3515  int i;
3516  int large_comp = (stream->src != NULL);
3517  int small_comp = ! (stream->flags & XD3_NOCOMPRESS);
3518
3519  /* Memory allocations for checksum tables are delayed until xd3_string_match_init in the
3520   * first call to string_match--that way identical or short inputs require no table
3521   * allocation. */
3522  if (large_comp)
3523    {
3524      usize_t hash_values = (stream->srcwin_maxsz / stream->smatcher.large_step);
3525
3526      xd3_size_hashtable (stream,
3527                          hash_values,
3528                          & stream->large_hash);
3529    }
3530
3531  if (small_comp)
3532    {
3533      usize_t hash_values = min(stream->winsize, stream->sprevsz);
3534
3535      xd3_size_hashtable (stream,
3536                          hash_values,
3537                          & stream->small_hash);
3538    }
3539
3540  for (i = 0; i < ENC_SECTS; i += 1)
3541    {
3542      if ((stream->enc_heads[i] =
3543           stream->enc_tails[i] =
3544           xd3_alloc_output (stream, NULL)) == NULL)
3545        {
3546          goto fail;
3547        }
3548    }
3549
3550  /* iopt buffer */
3551  xd3_rlist_init (& stream->iopt_used);
3552  xd3_rlist_init (& stream->iopt_free);
3553
3554  if (xd3_alloc_iopt (stream, stream->iopt_size) != 0) { goto fail; }
3555
3556  XD3_ASSERT (xd3_rlist_length (& stream->iopt_free) == stream->iopt_size);
3557  XD3_ASSERT (xd3_rlist_length (& stream->iopt_used) == 0);
3558
3559  /* address cache, code table */
3560  stream->acache.s_near = stream->code_table_desc->near_modes;
3561  stream->acache.s_same = stream->code_table_desc->same_modes;
3562  stream->code_table    = stream->code_table_func ();
3563
3564  return xd3_alloc_cache (stream);
3565
3566 fail:
3567
3568  return ENOMEM;
3569}
3570
3571/* Called after the ENC_POSTOUT state, this puts the output buffers back into separate
3572 * lists and re-initializes some variables.  (The output lists were spliced together
3573 * during the ENC_FLUSH state.) */
3574static void
3575xd3_encode_reset (xd3_stream *stream)
3576{
3577  int i;
3578  xd3_output *olist;
3579
3580  IF_DEBUG (stream->n_emit = 0);
3581  stream->avail_in     = 0;
3582  stream->small_reset  = 1;
3583  stream->i_slots_used = 0;
3584
3585  if (stream->src != NULL)
3586    {
3587      stream->src->srcbase   = 0;
3588      stream->src->srclen    = 0;
3589      stream->srcwin_decided = 0;
3590      stream->match_minaddr  = 0;
3591      stream->match_maxaddr  = 0;
3592      stream->taroff         = 0;
3593    }
3594
3595  /* Reset output chains. */
3596  olist = stream->enc_heads[0];
3597
3598  for (i = 0; i < ENC_SECTS; i += 1)
3599    {
3600      XD3_ASSERT (olist != NULL);
3601
3602      stream->enc_heads[i] = olist;
3603      stream->enc_tails[i] = olist;
3604      olist = olist->next_page;
3605
3606      stream->enc_heads[i]->next = 0;
3607      stream->enc_heads[i]->next_page = NULL;
3608
3609      stream->enc_tails[i]->next_page = NULL;
3610      stream->enc_tails[i] = stream->enc_heads[i];
3611    }
3612
3613  xd3_freelist_output (stream, olist);
3614}
3615
3616/* The main encoding routine. */
3617int
3618xd3_encode_input (xd3_stream *stream)
3619{
3620  int ret, i;
3621
3622  if (stream->dec_state != 0)
3623    {
3624      stream->msg = "encoder/decoder transition";
3625      return XD3_INTERNAL;
3626    }
3627
3628  switch (stream->enc_state)
3629    {
3630    case ENC_INIT:
3631      /* Only reached on first time through: memory setup. */
3632      if ((ret = xd3_encode_init (stream))) { return ret; }
3633
3634      stream->enc_state = ENC_INPUT;
3635
3636    case ENC_INPUT:
3637
3638      /* If there is no input yet, just return.  This checks for next_in == NULL, not
3639       * avail_in == 0 since zero bytes is a valid input.  There is an assertion in
3640       * xd3_avail_input() that next_in != NULL for this reason.  By returning right away
3641       * we avoid creating an input buffer before the caller has supplied its first data.
3642       * It is possible for xd3_avail_input to be called both before and after the first
3643       * call to xd3_encode_input(). */
3644      if (stream->next_in == NULL)
3645        {
3646          return XD3_INPUT;
3647        }
3648
3649    enc_flush:
3650      /* See if we should buffer the input: either if there is already a leftover buffer,
3651       * or if the input is short of winsize without flush.  The label at this point is
3652       * reached by a goto below, when there is leftover input after postout. */
3653      if ((stream->buf_leftover != NULL) ||
3654          (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH)))
3655        {
3656          if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; }
3657        }
3658
3659      /* Initalize the address cache before each window. */
3660      xd3_init_cache (& stream->acache);
3661
3662      stream->input_position    = 0;
3663      stream->min_match = MIN_MATCH;
3664      stream->unencoded_offset = 0;
3665
3666      stream->enc_state = ENC_SEARCH;
3667
3668      IF_DEBUG1 (DP(RINT "[input window:%"Q"u] input bytes %u offset %"Q"u\n",
3669                   stream->current_window, stream->avail_in, stream->total_in));
3670
3671      return XD3_WINSTART;
3672
3673    case ENC_SEARCH:
3674
3675      /* Reentrant matching. */
3676      if (stream->src != NULL)
3677        {
3678          switch (stream->match_state)
3679            {
3680            case MATCH_TARGET:
3681              /* Try matching forward at the start of the target.  This is entered the
3682               * first time through, to check for a perfect match, and whenever there is a
3683               * source match that extends to the end of the previous window.  The
3684               * match_srcpos field is initially zero and later set during
3685               * xd3_source_extend_match. */
3686              if (stream->avail_in > 0)
3687                {
3688                  /* This call can't fail because the source window is unrestricted. */
3689                  ret = xd3_source_match_setup (stream, stream->match_srcpos);
3690                  XD3_ASSERT (ret == 0);
3691                  stream->match_state = MATCH_FORWARD;
3692                }
3693              else
3694                {
3695                  stream->match_state = MATCH_SEARCHING;
3696                }
3697              XD3_ASSERT (stream->match_fwd == 0);
3698
3699            case MATCH_FORWARD:
3700            case MATCH_BACKWARD:
3701              if (stream->avail_in != 0)
3702                {
3703                  if ((ret = xd3_source_extend_match (stream)) != 0)
3704                    {
3705                      return ret;
3706                    }
3707
3708                  stream->input_position += stream->match_fwd;
3709                }
3710
3711            case MATCH_SEARCHING:
3712              /* Continue string matching.  (It's possible that the initial match
3713               * continued through the entire input, in which case we're still in
3714               * MATCH_FORWARD and should remain so for the next input window.) */
3715              break;
3716            }
3717        }
3718
3719      /* String matching... */
3720      if (stream->avail_in != 0 &&
3721          (ret = stream->smatcher.string_match (stream)))
3722        {
3723          return ret;
3724        }
3725
3726      /* Flush the instrution buffer, then possibly add one more instruction, then emit
3727       * the header. */
3728      stream->enc_state = ENC_FLUSH;
3729      if ((ret = xd3_iopt_flush_instructions (stream, 1)) ||
3730          (ret = xd3_iopt_add_finalize (stream)) ||
3731          (ret = xd3_emit_hdr (stream)))
3732        {
3733          return ret;
3734        }
3735
3736      /* Begin output. */
3737      stream->enc_current = HDR_HEAD (stream);
3738
3739      /* Chain all the outputs together.  After doing this, it looks as if there is only
3740       * one section.  The other enc_heads are set to NULL to avoid freeing them more than
3741       * once. */
3742       for (i = 1; i < ENC_SECTS; i += 1)
3743        {
3744          stream->enc_tails[i-1]->next_page = stream->enc_heads[i];
3745          stream->enc_heads[i] = NULL;
3746        }
3747
3748    enc_output:
3749
3750      stream->enc_state  = ENC_POSTOUT;
3751      stream->next_out   = stream->enc_current->base;
3752      stream->avail_out  = stream->enc_current->next;
3753      stream->total_out += (xoff_t) stream->avail_out;
3754
3755      /* If there is any output in this buffer, return it, otherwise fall through to
3756       * handle the next buffer or finish the window after all buffers have been
3757       * output. */
3758      if (stream->avail_out > 0)
3759        {
3760          /* This is the only place xd3_encode returns XD3_OUTPUT */
3761          return XD3_OUTPUT;
3762        }
3763
3764    case ENC_POSTOUT:
3765
3766      if (stream->avail_out != 0)
3767        {
3768          stream->msg = "missed call to consume output";
3769          return XD3_INTERNAL;
3770        }
3771
3772      /* Continue outputting one buffer at a time, until the next is NULL. */
3773      if ((stream->enc_current = stream->enc_current->next_page) != NULL)
3774        {
3775          goto enc_output;
3776        }
3777
3778      stream->total_in += (xoff_t) stream->avail_in;
3779      stream->enc_state = ENC_POSTWIN;
3780
3781      return XD3_WINFINISH;
3782
3783    case ENC_POSTWIN:
3784
3785      xd3_encode_reset (stream);
3786
3787      stream->current_window += 1;
3788      stream->enc_state = ENC_INPUT;
3789
3790      /* If there is leftover input to flush, repeat. */
3791      if ((stream->buf_leftover != NULL) && (stream->flags & XD3_FLUSH))
3792        {
3793          goto enc_flush;
3794        }
3795
3796      /* Ready for more input. */
3797      return XD3_INPUT;
3798
3799    default:
3800      stream->msg = "invalid state";
3801      return XD3_INTERNAL;
3802    }
3803}
3804#endif /* XD3_ENCODER */
3805
3806/******************************************************************************************
3807 Client convenience functions
3808 ******************************************************************************************/
3809
3810static int
3811xd3_process_stream (int            is_encode,
3812                    xd3_stream    *stream,
3813                    int          (*func) (xd3_stream *),
3814                    int            close_stream,
3815                    const uint8_t *input,
3816                    usize_t        input_size,
3817                    uint8_t       *output,
3818                    usize_t       *output_size,
3819                    usize_t        output_size_max)
3820{
3821  usize_t ipos = 0;
3822  usize_t n = min(stream->winsize, input_size);
3823
3824  (*output_size) = 0;
3825
3826  stream->flags |= XD3_FLUSH; 
3827
3828  xd3_avail_input (stream, input + ipos, n);
3829  ipos += n;
3830
3831  for (;;)
3832    {
3833      int ret;
3834      switch((ret = func (stream)))
3835        {
3836        case XD3_OUTPUT: { /* memcpy below */ break; }
3837        case XD3_INPUT: {
3838          n = min(stream->winsize, input_size - ipos);
3839          if (n == 0) {
3840            goto done;
3841          }
3842          xd3_avail_input (stream, input + ipos, n);
3843          ipos += n;
3844          continue;
3845        }
3846        case XD3_GOTHEADER: { /* ignore */ continue; }
3847        case XD3_WINSTART: { /* ignore */ continue; }
3848        case XD3_WINFINISH: { /* ignore */ continue; }
3849        case XD3_GETSRCBLK:
3850          {
3851            stream->msg = "stream requires source input";
3852            return XD3_INTERNAL;
3853          }
3854        case 0:
3855          {
3856            /* xd3_encode_input/xd3_decode_input never return 0 */
3857            stream->msg = "invalid return: 0";
3858            return XD3_INTERNAL;
3859          }
3860        default:
3861          return ret;
3862        }
3863
3864      if (*output_size + stream->avail_out > output_size_max)
3865        {
3866          stream->msg = "insufficient output space";
3867          return ENOSPC;
3868        }
3869
3870      memcpy (output + *output_size, stream->next_out, stream->avail_out);
3871
3872      *output_size += stream->avail_out;
3873
3874      xd3_consume_output (stream);
3875    }
3876 done:
3877  return (close_stream == 0) ? 0 : xd3_close_stream (stream);
3878}
3879
3880static int
3881xd3_process_memory (int            is_encode,
3882                    int          (*func) (xd3_stream *),
3883                    int            close_stream,
3884                    const uint8_t *input,
3885                    usize_t        input_size,
3886                    const uint8_t *source,
3887                    usize_t        source_size,
3888                    uint8_t       *output,
3889                    usize_t       *output_size,
3890                    usize_t        output_size_max,
3891                    int            flags) {
3892  xd3_stream stream;
3893  xd3_config config;
3894  xd3_source src;
3895  int ret;
3896
3897  if (input == NULL || output == NULL) {
3898    stream.msg = "invalid input/output buffer";
3899    return XD3_INTERNAL;
3900  }
3901
3902  memset (& stream, 0, sizeof (stream));
3903  memset (& config, 0, sizeof (config));
3904
3905  config.flags = flags;
3906
3907  if (is_encode)
3908    {
3909      config.srcwin_maxsz = source_size;
3910      config.winsize = min(input_size, (usize_t) XD3_DEFAULT_WINSIZE);
3911      config.iopt_size = min(input_size / 32, XD3_DEFAULT_IOPT_SIZE);
3912      config.iopt_size = max(config.iopt_size, 128U);
3913
3914      config.sprevsz = XD3_DEFAULT_SPREVSZ;
3915
3916      while (config.sprevsz / 2 > input_size)
3917        {
3918          config.sprevsz /=  2;
3919        }
3920    }
3921
3922  if ((ret = xd3_config_stream (&stream, &config)) != 0)
3923    {
3924      goto exit;
3925    }
3926
3927  if (source != NULL)
3928    {
3929      memset (& src, 0, sizeof (src));
3930      src.size = source_size;
3931      src.blksize = source_size;
3932      src.onblk = source_size;
3933      src.curblk = source;
3934      src.curblkno = 0;
3935
3936      if ((ret = xd3_set_source (&stream, &src)) != 0)
3937        {
3938          goto exit;
3939        }
3940    }
3941
3942  if ((ret = xd3_process_stream (is_encode,
3943                                 & stream,
3944                                 func, 1,
3945                                 input, input_size,
3946                                 output,
3947                                 output_size,
3948                                 output_size_max)) != 0)
3949    {
3950      goto exit;
3951    }
3952
3953 exit:
3954  xd3_free_stream(&stream);
3955  return ret;
3956}
3957
3958int
3959xd3_decode_stream (xd3_stream    *stream,
3960                   const uint8_t *input,
3961                   usize_t        input_size,
3962                   uint8_t       *output,
3963                   usize_t       *output_size,
3964                   usize_t        output_size_max)
3965{
3966  return xd3_process_stream (0, stream, & xd3_decode_input, 1,
3967                             input, input_size,
3968                             output, output_size, output_size_max);
3969}
3970
3971int
3972xd3_decode_memory (const uint8_t *input,
3973                   usize_t        input_size,
3974                   const uint8_t *source,
3975                   usize_t        source_size,
3976                   uint8_t       *output,
3977                   usize_t       *output_size,
3978                   usize_t        output_size_max,
3979                   int            flags) {
3980  return xd3_process_memory (0, & xd3_decode_input, 1,
3981                             input, input_size,
3982                             source, source_size,
3983                             output, output_size, output_size_max,
3984                             flags);
3985}
3986
3987
3988#if XD3_ENCODER
3989int
3990xd3_encode_stream (xd3_stream    *stream,
3991                   const uint8_t *input,
3992                   usize_t         input_size,
3993                   uint8_t       *output,
3994                   usize_t        *output_size,
3995                   usize_t         output_size_max)
3996{
3997  return xd3_process_stream (1, stream, & xd3_encode_input, 1,
3998                             input, input_size,
3999                             output, output_size, output_size_max);
4000}
4001
4002int
4003xd3_encode_memory (const uint8_t *input,
4004                   usize_t        input_size,
4005                   const uint8_t *source,
4006                   usize_t        source_size,
4007                   uint8_t       *output,
4008                   usize_t        *output_size,
4009                   usize_t        output_size_max,
4010                   int            flags) {
4011  return xd3_process_memory (1, & xd3_encode_input, 1,
4012                             input, input_size,
4013                             source, source_size,
4014                             output, output_size, output_size_max,
4015                             flags);
4016}
4017#endif
4018
4019
4020/******************************************************************************************
4021 String matching helpers
4022 ******************************************************************************************/
4023
4024#if XD3_ENCODER
4025/* Do the initial xd3_string_match() checksum table setup.  Allocations are delayed until
4026 * first use to avoid allocation sometimes (e.g., perfect matches, zero-length inputs). */
4027static int
4028xd3_string_match_init (xd3_stream *stream)
4029{
4030  const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
4031  const int DO_LARGE = (stream->src != NULL);
4032
4033  if (DO_LARGE && stream->large_table == NULL)
4034    {
4035      if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL)
4036        {
4037          return ENOMEM;
4038        }
4039    }
4040
4041  if (DO_SMALL)
4042    {
4043      /* Subsequent calls can return immediately after checking reset. */
4044      if (stream->small_table != NULL)
4045        {
4046          /* The target hash table is reinitialized once per window. */
4047          /* TODO: This would not have to be reinitialized if absolute offsets
4048           * were being stored, as we would do for VCD_TARGET encoding. */
4049          if (stream->small_reset)
4050            {
4051              stream->small_reset = 0;
4052              memset (stream->small_table, 0, sizeof (usize_t) * stream->small_hash.size);
4053            }
4054
4055          return 0;
4056        }
4057
4058      if ((stream->small_table = xd3_alloc0 (stream,
4059                                             stream->small_hash.size,
4060                                             sizeof (usize_t))) == NULL)
4061        {
4062          return ENOMEM;
4063        }
4064
4065      /* If there is a previous table needed. */
4066      if (stream->smatcher.small_lchain > 1 ||
4067          stream->smatcher.small_chain > 1)
4068        {
4069          if ((stream->small_prev = xd3_alloc (stream,
4070                                               stream->sprevsz,
4071                                               sizeof (xd3_slist))) == NULL)
4072            {
4073              return ENOMEM;
4074            }
4075        }
4076    }
4077
4078  return 0;
4079}
4080
4081#if XD3_USE_LARGEFILE64
4082/* This function handles the 32/64bit ambiguity -- file positions are 64bit but the hash
4083 * table for source-offsets is 32bit. */
4084static xoff_t
4085xd3_source_cksum_offset(xd3_stream *stream, usize_t low)
4086{
4087  xoff_t scp = stream->srcwin_cksum_pos;
4088  xoff_t s0 = scp >> 32;
4089
4090  usize_t sr = (usize_t) scp;
4091
4092  if (s0 == 0) {
4093    return low;
4094  }
4095
4096  /* This should not be >= because srcwin_cksum_pos is the next
4097   * position to index. */
4098  if (low > sr) {
4099    return (--s0 << 32) | low;
4100  }
4101
4102  return (s0 << 32) | low;
4103}
4104#else
4105static xoff_t
4106xd3_source_cksum_offset(xd3_stream *stream, usize_t low)
4107{
4108  return (xoff_t) low;
4109}
4110#endif
4111
4112/* This function sets up the stream->src fields srcbase, srclen.  The call is delayed
4113 * until these values are needed to encode a copy address.  At this point the decision has
4114 * to be made. */
4115static int
4116xd3_srcwin_setup (xd3_stream *stream)
4117{
4118  xd3_source *src = stream->src;
4119  xoff_t length;
4120
4121  /* Check the undecided state. */
4122  XD3_ASSERT (src->srclen == 0 && src->srcbase == 0);
4123
4124  /* Avoid repeating this call. */
4125  stream->srcwin_decided = 1;
4126
4127  /* If the stream is flushing, then the iopt buffer was able to contain the complete
4128   * encoding.  If no copies were issued no source window is actually needed.  This
4129   * prevents the VCDIFF header from including source base/len.  xd3_emit_hdr checks
4130   * for srclen == 0. */
4131  if (stream->enc_state == ENC_FLUSH && stream->match_maxaddr == 0)
4132    {
4133      goto done;
4134    }
4135
4136  /* Check for overflow, srclen is usize_t - this can't happen unless XD3_DEFAULT_SRCBACK
4137   * and related parameters are extreme - should use smaller windows. */
4138  length = stream->match_maxaddr - stream->match_minaddr;
4139
4140  if (length > (xoff_t) USIZE_T_MAX)
4141    {
4142      stream->msg = "source window length overflow (not 64bit)";
4143      return XD3_INTERNAL;
4144    }
4145
4146  /* If ENC_FLUSH, then we know the exact source window to use because no more copies can
4147   * be issued. */
4148  if (stream->enc_state == ENC_FLUSH)
4149    {
4150      src->srcbase = stream->match_minaddr;
4151      src->srclen  = (usize_t) length;
4152      XD3_ASSERT (src->srclen);
4153      goto done;
4154    }
4155
4156  /* Otherwise, we have to make a guess.  More copies may still be issued, but we have to
4157   * decide the source window base and length now.  */
4158  src->srcbase = stream->match_minaddr;
4159  src->srclen  = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2));
4160  if (src->size < src->srcbase + (xoff_t) src->srclen)
4161    {
4162      /* Could reduce srcbase, as well. */
4163      src->srclen = src->size - src->srcbase;
4164    }
4165
4166  XD3_ASSERT (src->srclen);
4167 done:
4168  /* Set the taroff.  This convenience variable is used even when stream->src == NULL. */
4169  stream->taroff = src->srclen;
4170  return 0;
4171}
4172
4173/* This function computes more source checksums to advance the window.
4174 * Called at every entrance to the string-match loop and each time
4175 * stream->input_position reaches the value returned as
4176 * *next_move_point.  NB: this is one of the most expensive functions
4177 * in this code and also the most critical for good compression.
4178 *
4179 * TODO: really would like a good test for this logic. how?
4180 * TODO: optimize the inner loop
4181 */
4182static int
4183xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
4184{
4185  xoff_t logical_input_cksum_pos;
4186
4187  XD3_ASSERT(stream->srcwin_cksum_pos <= stream->src->size);
4188  if (stream->srcwin_cksum_pos == stream->src->size)
4189    {
4190      *next_move_point = USIZE_T_MAX;
4191      return 0;
4192    }
4193
4194  /* Begin by advancing at twice the input rate, up to half the
4195   * maximum window size. */
4196  logical_input_cksum_pos = min((stream->total_in + stream->input_position) * 2,
4197                                (stream->total_in + stream->input_position) +
4198                                  (stream->srcwin_maxsz / 2));
4199
4200  /* If srcwin_cksum_pos is already greater, wait until the difference
4201   * is met. */
4202  if (stream->srcwin_cksum_pos > logical_input_cksum_pos)
4203    {
4204      *next_move_point = stream->input_position +
4205        (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos);
4206      return 0;
4207    }
4208
4209  /* A long match may have extended past srcwin_cksum_pos.  Don't
4210   * start checksumming already-matched source data. */
4211  if (stream->maxsrcaddr > stream->srcwin_cksum_pos)
4212    {
4213      stream->srcwin_cksum_pos = stream->maxsrcaddr;
4214    }
4215
4216  if (logical_input_cksum_pos < stream->srcwin_cksum_pos)
4217    {
4218      logical_input_cksum_pos = stream->srcwin_cksum_pos;
4219    }
4220
4221  /* Advance at least one source block.  With the command-line
4222   * defaults this means:
4223   *
4224   * if (src->size <= srcwin_maxsz), index the entire source at once
4225   * using the position of the first non-match.  This is good for
4226   * small inputs, especially when the content may have moved anywhere
4227   * in the file (e.g., tar files).
4228   *
4229   * if (src->size > srcwin_maxsz), index at least one block (which
4230   * the command-line sets to 1/32 of srcwin_maxsz) ahead of the
4231   * logical position.  This is good for different reasons: when a
4232   * long match spanning several source blocks is encountered, this
4233   * avoids computing checksums for those blocks.  If the data can
4234   * move anywhere, this is bad.
4235   */
4236  logical_input_cksum_pos += stream->src->blksize;
4237
4238  IF_DEBUG1 (DP(RINT "[srcwin_move_point] T=%"Q"u S=%"Q"u/%"Q"u\n",
4239               stream->total_in + stream->input_position,
4240               stream->srcwin_cksum_pos,
4241               logical_input_cksum_pos));
4242
4243  while (stream->srcwin_cksum_pos < logical_input_cksum_pos &&
4244         stream->srcwin_cksum_pos < stream->src->size)
4245    {
4246      xoff_t  blkno = stream->srcwin_cksum_pos / stream->src->blksize;
4247      ssize_t oldpos = stream->srcwin_cksum_pos % stream->src->blksize;
4248      ssize_t blkpos = xd3_bytes_on_srcblk (stream->src, blkno);
4249      int ret;
4250
4251      if (oldpos + stream->smatcher.large_look > blkpos)
4252        {
4253          stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
4254          continue;
4255        }
4256
4257      if ((ret = xd3_getblk (stream, blkno)))
4258        {
4259          /* TOOFARBACK should never occur here, since we read forward. */
4260          if (ret == XD3_TOOFARBACK)
4261            {
4262              ret = XD3_INTERNAL;
4263            }
4264          return ret;
4265        }
4266
4267      /* This inserts checksums for the entire block, in reverse,
4268       * starting from the end of the block.  This logic does not test
4269       * stream->srcwin_cksum_pos because it always advances it to the
4270       * start of the next block.
4271       *
4272       * oldpos is the srcwin_cksum_pos within this block.  blkpos is
4273       * the number of bytes available.  Each iteration inspects
4274       * large_look bytes then steps back large_step bytes.  The
4275       * if-stmt above ensures at least one large_look of data. */
4276      blkpos -= stream->smatcher.large_look;
4277
4278      do
4279        {
4280          uint32_t cksum = xd3_lcksum (stream->src->curblk + blkpos,
4281                                       stream->smatcher.large_look);
4282          usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum);
4283
4284          stream->large_table[hval] =
4285            (usize_t) ((stream->src->blksize * blkno) +
4286                       (xoff_t)(blkpos + HASH_CKOFFSET));
4287
4288          IF_DEBUG (stream->large_ckcnt += 1);
4289
4290          blkpos -= stream->smatcher.large_step;
4291        }
4292      while (blkpos >= oldpos);
4293
4294      stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
4295    }
4296
4297  if (stream->srcwin_cksum_pos >= stream->src->size)
4298    {
4299      /* This invariant is needed for xd3_source_cksum_offset() */
4300      stream->srcwin_cksum_pos = stream->src->size;
4301      *next_move_point = USIZE_T_MAX;
4302      return 0;
4303    }
4304
4305  /* How long until this function should be called again. */
4306  XD3_ASSERT(stream->srcwin_cksum_pos >= logical_input_cksum_pos);
4307  *next_move_point = stream->input_position + 1 +
4308    (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos);
4309  return 0;
4310}
4311
4312/* Sets the bounding region for a newly discovered source match, prior to calling
4313 * xd3_source_extend_match().  This sets the match_maxfwd, match_maxback variables.  Note:
4314 * srcpos is an absolute position (xoff_t) but the match_maxfwd, match_maxback variables
4315 * are usize_t.  Returns 0 if the setup succeeds, or 1 if the source position lies outside
4316 * an already-decided srcbase/srclen window. */
4317static int
4318xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
4319{
4320  xd3_source *src = stream->src;
4321  usize_t greedy_or_not;
4322
4323  stream->match_maxback = 0;
4324  stream->match_maxfwd  = 0;
4325  stream->match_back    = 0;
4326  stream->match_fwd     = 0;
4327
4328  /* Going backwards, the 1.5-pass algorithm allows some already-matched input may be
4329   * covered by a longer source match.  The greedy algorithm does not allow this. */
4330  if (stream->flags & XD3_BEGREEDY)
4331    {
4332      /* The greedy algorithm allows backward matching to the last matched position. */
4333      greedy_or_not = xd3_iopt_last_matched (stream);
4334    }
4335  else
4336    {
4337      /* The 1.5-pass algorithm allows backward matching to go back as far as the
4338       * unencoded offset, which is updated as instructions pass out of the iopt buffer.
4339       * If this (default) is chosen, it means xd3_iopt_erase may be called to eliminate
4340       * instructions when a covering source match is found. */
4341      greedy_or_not = stream->unencoded_offset;
4342    }
4343
4344
4345
4346  /* Backward target match limit. */
4347  XD3_ASSERT (stream->input_position >= greedy_or_not);
4348  stream->match_maxback = stream->input_position - greedy_or_not;
4349
4350  /* Forward target match limit. */
4351  XD3_ASSERT (stream->avail_in > stream->input_position);
4352  stream->match_maxfwd = stream->avail_in - stream->input_position;
4353
4354  /* Now we take the source position into account.  It depends whether the srclen/srcbase
4355   * have been decided yet. */
4356  if (stream->srcwin_decided == 0)
4357    {
4358      /* Unrestricted case: the match can cover the entire source, 0--src->size.  We
4359       * compare the usize_t match_maxfwd/match_maxback against the xoff_t src->size/srcpos values
4360       * and take the min. */
4361      xoff_t srcavail;
4362
4363      if (srcpos < (xoff_t) stream->match_maxback)
4364        {
4365          stream->match_maxback = srcpos;
4366        }
4367
4368      srcavail = src->size - srcpos;
4369      if (srcavail < (xoff_t) stream->match_maxfwd)
4370        {
4371          stream->match_maxfwd = srcavail;
4372        }
4373
4374      goto good;
4375    }
4376
4377  /* Decided some source window. */
4378  XD3_ASSERT (src->srclen > 0);
4379
4380  /* Restricted case: fail if the srcpos lies outside the source window */
4381  if ((srcpos < src->srcbase) || (srcpos > (src->srcbase + (xoff_t) src->srclen)))
4382    {
4383      goto bad;
4384    }
4385  else
4386    {
4387      usize_t srcavail;
4388
4389      srcavail = (usize_t) (srcpos - src->srcbase);
4390      if (srcavail < stream->match_maxback)
4391        {
4392          stream->match_maxback = srcavail;
4393        }
4394
4395      srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos);
4396      if (srcavail < stream->match_maxfwd)      {
4397          stream->match_maxfwd = srcavail;
4398        }
4399
4400      goto good;
4401    }
4402
4403 good:
4404  stream->match_state  = MATCH_BACKWARD;
4405  stream->match_srcpos = srcpos;
4406  return 0;
4407
4408 bad:
4409  stream->match_state  = MATCH_SEARCHING;
4410  return 1;
4411}
4412
4413/* This function expands the source match backward and forward.  It is reentrant, since
4414 * xd3_getblk may return XD3_GETSRCBLK, so most variables are kept in xd3_stream.  There
4415 * are two callers of this function, the string_matching routine when a checksum match is
4416 * discovered, and xd3_encode_input whenever a continuing (or initial) match is suspected.
4417 * The two callers do different things with the input_position, thus this function leaves
4418 * that variable untouched.  If a match is taken the resulting stream->match_fwd is left
4419 * non-zero. */
4420static int
4421xd3_source_extend_match (xd3_stream *stream)
4422{
4423  int ret;
4424  xd3_source *src = stream->src;
4425  xoff_t matchoff;  /* matchoff is the current right/left-boundary of the source match being tested. */
4426  usize_t streamoff; /* streamoff is the current right/left-boundary of the input match being tested. */
4427  xoff_t tryblk;    /* tryblk, tryoff are the block, offset position of matchoff */
4428  usize_t tryoff;
4429  usize_t tryrem;    /* tryrem is the number of matchable bytes on the source block */
4430
4431  XD3_ASSERT (src != NULL);
4432
4433  /* Does it make sense to compute backward match AFTER forward match? */
4434  if (stream->match_state == MATCH_BACKWARD)
4435    {
4436      /* Note: this code is practically duplicated below, substituting
4437       * match_fwd/match_back and direction.  Consolidate? */
4438      matchoff  = stream->match_srcpos - stream->match_back;
4439      streamoff = stream->input_position - stream->match_back;
4440      tryblk    = matchoff / src->blksize;
4441      tryoff    = matchoff % src->blksize;
4442
4443      /* this loops backward over source blocks */
4444      while (stream->match_back < stream->match_maxback)
4445        {
4446          /* see if we're backing across a source block boundary */
4447          if (tryoff == 0)
4448            {
4449              tryoff  = src->blksize;
4450              tryblk -= 1;
4451            }
4452
4453          if ((ret = xd3_getblk (stream, tryblk)))
4454            {
4455              /* if search went too far back, continue forward. */
4456              if (ret == XD3_TOOFARBACK)
4457                {
4458                  break;
4459                }
4460
4461              /* could be a XD3_GETSRCBLK failure. */
4462              return ret;
4463            }
4464
4465          /* OPT: This code can be optimized. */
4466          for (tryrem = min (tryoff, stream->match_maxback - stream->match_back);
4467               tryrem != 0;
4468               tryrem -= 1, stream->match_back += 1)
4469            {
4470              if (src->curblk[tryoff-1] != stream->next_in[streamoff-1])
4471                {
4472                  goto doneback;
4473                }
4474
4475              tryoff    -= 1;
4476              streamoff -= 1;
4477            }
4478        }
4479
4480    doneback:
4481      stream->match_state = MATCH_FORWARD;
4482    }
4483
4484  XD3_ASSERT (stream->match_state == MATCH_FORWARD);
4485
4486  matchoff  = stream->match_srcpos + stream->match_fwd;
4487  streamoff = stream->input_position + stream->match_fwd;
4488  tryblk    = matchoff / src->blksize;
4489  tryoff    = matchoff % src->blksize;
4490
4491  /* Note: practically the same code as backwards case above: same comments */
4492  while (stream->match_fwd < stream->match_maxfwd)
4493    {
4494      if ((ret = xd3_getblk (stream, tryblk)))
4495        {
4496          /* if search went too far back, continue forward. */
4497          if (ret == XD3_TOOFARBACK)
4498            {
4499              break;
4500            }
4501
4502          /* could be a XD3_GETSRCBLK failure. */
4503          return ret;
4504        }
4505
4506      /* There's a good speedup for doing word comparions: see zlib. */
4507      for (tryrem = min(stream->match_maxfwd - stream->match_fwd,
4508                        src->blksize - tryoff);
4509           tryrem != 0;
4510           tryrem -= 1, stream->match_fwd += 1)
4511        {
4512          if (src->curblk[tryoff] != stream->next_in[streamoff])
4513            {
4514              goto donefwd;
4515            }
4516
4517          tryoff    += 1;
4518          streamoff += 1;
4519        }
4520
4521      if (tryoff == src->blksize)
4522        {
4523          tryoff  = 0;
4524          tryblk += 1;
4525        }
4526    }
4527
4528 donefwd:
4529  stream->match_state = MATCH_SEARCHING;
4530
4531  /* Now decide whether to take the match.  There are several ways to answer this
4532   * question and this is likely the best answer.  There is currently an assertion
4533   * in xd3_iopt_erase that checks whether min_match works.  This variable maintains
4534   * that every match exceeds the end of the previous match.  However, it is
4535   * possible that match_back allows us to find a match that goes a long way back
4536   * but not enough forward.  We could try an alternate approach, which might help
4537   * or it might just be extra complexity: eliminate the next match_fwd >= min_match
4538   * test and call xd3_iopt_erase right away.  Erase instructions as far as it goes
4539   * back, then either remember what was deleted and re-insert it, or count on the
4540   * string-matching algorithm to find that match again.  I think it is more
4541   * worthwhile to implement large_hash duplicates. */
4542  if (stream->match_fwd < stream->min_match)
4543    {
4544      stream->match_fwd = 0;
4545    }
4546  else
4547    {
4548      usize_t total  = stream->match_fwd + stream->match_back;
4549
4550      /* Correct the variables to remove match_back from the equation. */
4551      usize_t target_position = stream->input_position - stream->match_back;
4552      usize_t match_length   = stream->match_back      + stream->match_fwd;
4553      xoff_t match_position  = stream->match_srcpos    - stream->match_back;
4554      xoff_t match_end       = stream->match_srcpos    + stream->match_fwd;
4555
4556      /* At this point we may have to erase any iopt-buffer instructions that are
4557       * fully covered by a backward-extending copy. */
4558      if (stream->match_back > 0)
4559        {
4560          xd3_iopt_erase (stream, target_position, total);
4561        }
4562
4563      stream->match_back = 0;
4564
4565      /* Update ranges.  The first source match occurs with both values set to 0. */
4566      if (stream->match_maxaddr == 0 ||
4567          match_position < stream->match_minaddr)
4568        {
4569          stream->match_minaddr = match_position;
4570        }
4571
4572      if (match_end > stream->match_maxaddr)
4573        {
4574          /* Note: per-window */
4575          stream->match_maxaddr = match_end;
4576        }
4577
4578      if (match_end > stream->maxsrcaddr)
4579        {
4580          /* Note: across windows */
4581          stream->maxsrcaddr = match_end;
4582        }
4583
4584      IF_DEBUG1 ({
4585        static int x = 0;
4586        DP(RINT "[source match:%d] <inp %"Q"u %"Q"u>  <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n",
4587           x++,
4588           stream->total_in + target_position,
4589           stream->total_in + target_position + match_length,
4590           match_position,
4591           match_position + match_length,
4592           (stream->total_in + target_position == match_position) ? "same" : "diff",
4593           match_length);
4594      });
4595
4596      if ((ret = xd3_found_match (stream,
4597                                  /* decoder position */ target_position,
4598                                  /* length */ match_length,
4599                                  /* address */ match_position,
4600                                  /* is_source */ 1)))
4601        {
4602          return ret;
4603        }
4604
4605      /* If the match ends with the available input: */
4606      if (target_position + match_length == stream->avail_in)
4607        {
4608          /* Setup continuing match for the next window. */
4609          stream->match_state  = MATCH_TARGET;
4610          stream->match_srcpos = match_end;
4611        }
4612    }
4613
4614  return 0;
4615}
4616
4617/* Update the small hash.  Values in the small_table are offset by
4618 * HASH_CKOFFSET (1) to distinguish empty buckets from real offsets. */
4619static void
4620xd3_scksum_insert (xd3_stream *stream,
4621                   usize_t inx,
4622                   usize_t scksum,
4623                   usize_t pos)
4624{
4625  /* If we are maintaining previous duplicates. */
4626  if (stream->small_prev)
4627    {
4628      usize_t    last_pos = stream->small_table[inx];
4629      xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask];
4630
4631      /* Note last_pos is offset by HASH_CKOFFSET. */
4632      pos_list->last_pos = last_pos;
4633    }
4634
4635  /* Enter the new position into the hash bucket. */
4636  stream->small_table[inx] = pos + HASH_CKOFFSET;
4637}
4638
4639#if XD3_DEBUG
4640static int
4641xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0,
4642                  const uint8_t *inp_max, usize_t cmp_len)
4643{
4644  int i;
4645
4646  for (i = 0; i < cmp_len; i += 1)
4647    {
4648      XD3_ASSERT (ref0[i] == inp0[i]);
4649    }
4650
4651  if (inp0 + cmp_len < inp_max)
4652    {
4653      XD3_ASSERT (inp0[i] != ref0[i]);
4654    }
4655
4656  return 1;
4657}
4658#endif /* XD3_DEBUG */
4659
4660/* When the hash table indicates a possible small string match, it
4661 * calls this routine to find the best match.  The first matching
4662 * position is taken from the small_table, HASH_CKOFFSET is subtracted
4663 * to get the actual position.  After checking that match, if previous
4664 * linked lists are in use (because stream->smatcher.small_chain > 1),
4665 * previous matches are tested searching for the longest match.  If
4666 * (stream->min_match > MIN_MATCH) then a lazy match is in effect.
4667 *
4668 * TODO: This is the second most-expensive function, after
4669 * xd3_srcwin_move_point().
4670 */
4671static usize_t
4672xd3_smatch (xd3_stream *stream,
4673            usize_t base,
4674            usize_t scksum,
4675            usize_t *match_offset)
4676{
4677  usize_t         cmp_len;
4678  usize_t         match_length = 0;
4679  usize_t         chain        = (stream->min_match == MIN_MATCH ?
4680                                  stream->smatcher.small_chain :
4681                                  stream->smatcher.small_lchain);
4682  const uint8_t *inp_max      = stream->next_in + stream->avail_in;
4683  const uint8_t *inp;
4684  const uint8_t *ref;
4685
4686  SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position);
4687
4688  XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in);
4689
4690  base -= HASH_CKOFFSET;
4691
4692 again:
4693
4694  /* For small matches, we can always go to the end-of-input because
4695   * the matching position must be less than the input position. */
4696  XD3_ASSERT (base < stream->input_position);
4697
4698  ref = stream->next_in + base;
4699  inp = stream->next_in + stream->input_position;
4700
4701  SMALL_HASH_DEBUG2 (stream, ref);
4702
4703  /* Expand potential match forward. */
4704  while (inp < inp_max && *inp == *ref)
4705    {
4706      ++inp;
4707      ++ref;
4708    }
4709
4710  cmp_len = inp - (stream->next_in + stream->input_position);
4711
4712  /* Verify correctness */
4713  XD3_ASSERT (xd3_check_smatch (stream->next_in + base,
4714                                stream->next_in + stream->input_position,
4715                                inp_max, cmp_len));
4716
4717  /* Update longest match */
4718  if (cmp_len > match_length)
4719    {
4720      ( match_length) = cmp_len;
4721      (*match_offset) = base;
4722
4723      /* Stop if we match the entire input or have a long_enough match. */
4724      if (inp == inp_max || cmp_len >= stream->smatcher.long_enough)
4725        {
4726          goto done;
4727        }
4728    }
4729
4730  /* If we have not reached the chain limit, see if there is another
4731     previous position. */
4732  while (--chain != 0)
4733    {
4734      /* Calculate the previous offset. */
4735      usize_t last_pos = stream->small_prev[base & stream->sprevmask].last_pos;
4736
4737      if (last_pos == 0)
4738        {
4739          break;
4740        }
4741
4742      last_pos -= HASH_CKOFFSET;
4743      base = last_pos;
4744
4745      /* Stop if the position is wrong (because the lists are not
4746       * re-initialized across input windows). */
4747      if (base < stream->input_position)
4748        {
4749          goto again;
4750        }
4751
4752      break;
4753    }
4754
4755 done:
4756  return match_length;
4757}
4758
4759#if XD3_DEBUG
4760static void
4761xd3_verify_small_state (xd3_stream    *stream,
4762                        const uint8_t *inp,
4763                        uint32_t          x_cksum)
4764{
4765  uint32_t cksum = xd3_scksum (inp, stream->smatcher.small_look);
4766
4767  XD3_ASSERT (cksum == x_cksum);
4768}
4769
4770static void
4771xd3_verify_large_state (xd3_stream    *stream,
4772                        const uint8_t *inp,
4773                        uint32_t          x_cksum)
4774{
4775  uint32_t cksum = xd3_lcksum (inp, stream->smatcher.large_look);
4776
4777  XD3_ASSERT (cksum == x_cksum);
4778}
4779
4780static void
4781xd3_verify_run_state (xd3_stream    *stream,
4782                      const uint8_t *inp,
4783                      int            x_run_l,
4784                      uint8_t        x_run_c)
4785{
4786  int     slook = stream->smatcher.small_look;
4787  uint8_t run_c;
4788  int     run_l = xd3_comprun (inp, slook, &run_c);
4789
4790  XD3_ASSERT (run_l == 0 || run_c == x_run_c);
4791  XD3_ASSERT (x_run_l > slook || run_l == x_run_l);
4792}
4793#endif /* XD3_DEBUG */
4794#endif /* XD3_ENCODER */
4795
4796/******************************************************************************************
4797 TEMPLATE pass
4798 ******************************************************************************************/
4799
4800#endif /* __XDELTA3_C_INLINE_PASS__ */
4801#ifdef __XDELTA3_C_TEMPLATE_PASS__
4802
4803#if XD3_ENCODER
4804
4805/******************************************************************************************
4806 Templates
4807 ******************************************************************************************/
4808
4809/* Template macros */
4810#define XD3_TEMPLATE(x)      XD3_TEMPLATE2(x,TEMPLATE)
4811#define XD3_TEMPLATE2(x,n)   XD3_TEMPLATE3(x,n)
4812#define XD3_TEMPLATE3(x,n)   x ## n
4813#define XD3_STRINGIFY(x)     XD3_STRINGIFY2(x)
4814#define XD3_STRINGIFY2(x)    #x
4815
4816static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream);
4817
4818static const xd3_smatcher XD3_TEMPLATE(__smatcher_) =
4819{
4820  XD3_STRINGIFY(TEMPLATE),
4821  XD3_TEMPLATE(xd3_string_match_),
4822#if SOFTCFG == 1
4823  0, 0, 0, 0, 0, 0, 0
4824#else
4825  LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, MAXLAZY, LONGENOUGH
4826#endif
4827};
4828
4829static int
4830XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
4831{
4832  /* TODO config: These next three variables should be statically compliled in various
4833   * scan_cfg configurations? */
4834  const int      DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
4835  const int      DO_LARGE = (stream->src != NULL);
4836  const int      DO_RUN   = (1);
4837
4838  const uint8_t *inp;
4839  uint32_t       scksum = 0;
4840  uint32_t       lcksum = 0;
4841  usize_t         sinx;
4842  usize_t         linx;
4843  uint8_t        run_c;
4844  int            run_l;
4845  int            ret;
4846  usize_t         match_length;
4847  usize_t         match_offset;  /* "may be unused" warnings are bogus (due to min_match test) */
4848  usize_t         next_move_point;
4849
4850  /* If there will be no compression due to settings or short input, skip it entirely. */
4851  if (! (DO_SMALL || DO_LARGE || DO_RUN) ||
4852      stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; }
4853
4854  if ((ret = xd3_string_match_init (stream))) { return ret; }
4855
4856  /* The restartloop label is reached when the incremental loop state needs to be
4857   * reset. */
4858 restartloop:
4859
4860  /* If there is not enough input remaining for any kind of match, skip it. */
4861  if (stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; }
4862
4863  /* Now reset the incremental loop state: */
4864
4865  /* The min_match variable is updated to avoid matching the same lazy match over and over
4866   * again.  For example, if you find a (small) match of length 9 at one position, you
4867   * will likely find a match of length 8 at the next position. */
4868  stream->min_match = MIN_MATCH;
4869
4870  /* The current input byte. */
4871  inp = stream->next_in + stream->input_position;
4872
4873  /* Small match state. */
4874  if (DO_SMALL)
4875    {
4876      scksum = xd3_scksum (inp, SLOOK);
4877    }
4878
4879  /* Run state. */
4880  if (DO_RUN)
4881    {
4882      run_l = xd3_comprun (inp, SLOOK, & run_c);
4883    }
4884
4885  /* Large match state.  We continue the loop even after not enough bytes for LLOOK
4886   * remain, so always check stream->input_position in DO_LARGE code. */
4887  if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in))
4888    {
4889      /* Source window: next_move_point is the point that stream->input_position must reach before
4890       * computing more source checksum. */
4891      if ((ret = xd3_srcwin_move_point (stream, & next_move_point)))
4892        {
4893          return ret;
4894        }
4895
4896      lcksum = xd3_lcksum (inp, LLOOK);
4897    }
4898
4899  /* TRYLAZYLEN: True if a certain length match should be followed by lazy search.  This
4900   * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to
4901   * consider lazy matching.  "Enough" is set to 2 since the next match will start at the
4902   * next offset, it must match two extra characters. */
4903#define TRYLAZYLEN(LEN,POS,MAX) ((MAXLAZY) > 0 && (LEN) < (MAXLAZY) && (POS) + (LEN) <= (MAX) - 2)
4904
4905  /* HANDLELAZY: This statement is called each time an instruciton is emitted (three
4906   * cases).  If the instruction is large enough, the loop is restarted, otherwise lazy
4907   * matching may ensue. */
4908#define HANDLELAZY(mlen) \
4909  if (TRYLAZYLEN ((mlen), (stream->input_position), (stream->avail_in))) \
4910    { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \
4911  else \
4912    { stream->input_position += (mlen); goto restartloop; }
4913
4914  /* Now loop over one input byte at a time until a match is found... */
4915  for (;; inp += 1, stream->input_position += 1)
4916    {
4917      /* Now we try three kinds of string match in order of expense:
4918       * run, large match, small match. */
4919
4920      /* Expand the start of a RUN.  The test for (run_l == SLOOK) avoids repeating this
4921       * check when we pass through a run area performing lazy matching.  The run is only
4922       * expanded once when the min_match is first reached.  If lazy matching is
4923       * performed, the run_l variable will remain inconsistent until the first
4924       * non-running input character is reached, at which time the run_l may then again
4925       * grow to SLOOK. */
4926      if (DO_RUN && run_l == SLOOK)
4927        {
4928          usize_t max_len = stream->avail_in - stream->input_position;
4929
4930          IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, run_c));
4931
4932          while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; }
4933
4934          /* Output a RUN instruction. */
4935          if (run_l >= stream->min_match && run_l >= MIN_RUN)
4936            {
4937              if ((ret = xd3_emit_run (stream, stream->input_position, run_l, run_c))) { return ret; }
4938
4939              HANDLELAZY (run_l);
4940            }
4941        }
4942
4943      /* If there is enough input remaining. */
4944      if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in))
4945        {
4946          if ((stream->input_position >= next_move_point) &&
4947              (ret = xd3_srcwin_move_point (stream, & next_move_point)))
4948            {
4949              return ret;
4950            }
4951
4952          linx = xd3_checksum_hash (& stream->large_hash, lcksum);
4953
4954          IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum));
4955
4956          /* Note: To handle large checksum duplicates, this code should be rearranged to
4957           * resemble the small_match case more.  But how much of the code can be truly
4958           * shared?  The main difference is the need for xd3_source_extend_match to work
4959           * outside of xd3_string_match, in the case where inputs are identical. */
4960          if (unlikely (stream->large_table[linx] != 0))
4961            {
4962              /* the match_setup will fail if the source window has been decided and the
4963               * match lies outside it.  You could consider forcing a window at this point
4964               * to permit a new source window. */
4965              xoff_t adj_offset =
4966                xd3_source_cksum_offset(stream,
4967                                        stream->large_table[linx] - HASH_CKOFFSET);
4968              if (xd3_source_match_setup (stream, adj_offset) == 0)
4969                {
4970                  if ((ret = xd3_source_extend_match (stream)))
4971                    {
4972                      return ret;
4973                    }
4974
4975                  /* Update stream position.  match_fwd is zero if no match. */
4976                  if (stream->match_fwd > 0)
4977                    {
4978                      HANDLELAZY (stream->match_fwd);
4979                    }
4980                }
4981            }
4982        }
4983
4984      /* Small matches. */
4985      if (DO_SMALL)
4986        {
4987          sinx = xd3_checksum_hash (& stream->small_hash, scksum);
4988
4989          /* Verify incremental state in debugging mode. */
4990          IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
4991
4992          /* Search for the longest match */
4993          if (unlikely (stream->small_table[sinx] != 0))
4994            {
4995              match_length = xd3_smatch (stream,
4996                                         stream->small_table[sinx],
4997                                         scksum,
4998                                         & match_offset);
4999            }
5000          else
5001            {
5002              match_length = 0;
5003            }
5004
5005          /* Insert a hash for this string. */
5006          xd3_scksum_insert (stream, sinx, scksum, stream->input_position);
5007
5008          /* Maybe output a COPY instruction */
5009          if (unlikely (match_length >= stream->min_match))
5010            {
5011              IF_DEBUG1 ({
5012                static int x = 0;
5013                DP(RINT "[target match:%d] <inp %u %u>  <cpy %u %u> (-%d) [ %u bytes ]\n",
5014                   x++,
5015                   stream->input_position,
5016                   stream->input_position + match_length,
5017                   match_offset,
5018                   match_offset + match_length,
5019                   stream->input_position - match_offset,
5020                   match_length);
5021              });
5022
5023              if ((ret = xd3_found_match (stream,
5024                                        /* decoder position */ stream->input_position,
5025                                        /* length */ match_length,
5026                                        /* address */ match_offset,
5027                                        /* is_source */ 0))) { return ret; }
5028
5029              /* Copy instruction. */
5030              HANDLELAZY (match_length);
5031            }
5032        }
5033
5034      /* The logic above prevents excess work during lazy matching by increasing min_match
5035       * to avoid smaller matches.  Each time we advance stream->input_position by one, the minimum match
5036       * shortens as well.  */
5037      if (stream->min_match > MIN_MATCH)
5038        {
5039          stream->min_match -= 1;
5040        }
5041
5042    updateone:
5043
5044      /* See if there are no more incremental cksums to compute. */
5045      if (stream->input_position + SLOOK == stream->avail_in)
5046        {
5047          goto loopnomore;
5048        }
5049
5050      /* Compute next RUN, CKSUM */
5051      if (DO_RUN)   { NEXTRUN (inp[SLOOK]); }
5052      if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); }
5053      if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in)) { LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); }
5054    }
5055
5056 loopnomore:
5057  return 0;
5058}
5059#endif /* XD3_ENCODER */
5060#endif /* __XDELTA3_C_TEMPLATE_PASS__ */
Note: See TracBrowser for help on using the repository browser.