source: nikanabo/current/xdelta/diy/xdelta3-djw.h@ 663

Last change on this file since 663 was 185, checked in by geyser, 18 years ago
File size: 51.0 KB
Line 
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2002, 2006, 2007. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef _XDELTA3_DJW_H_
20#define _XDELTA3_DJW_H_
21
22/* The following people deserve much credit for the algorithms and techniques contained in
23 * this file:
24
25 Julian Seward
26 Bzip2 sources, implementation of the multi-table Huffman technique.
27
28 Jean-loup Gailly and Mark Adler and L. Peter Deutsch
29 Zlib source code, RFC 1951
30
31 Daniel S. Hirschberg and Debra A. LeLewer
32 "Efficient Decoding of Prefix Codes"
33 Communications of the ACM, April 1990 33(4).
34
35 David J. Wheeler
36 Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps.
37 This contains the idea behind the multi-table Huffman and 1-2 coding techniques.
38 ftp://ftp.cl.cam.ac.uk/users/djw3/
39
40*/
41
42/* OPT: during the multi-table iteration, pick the worst-overall performing table and
43 * replace it with exactly the frequencies of the worst-overall performing sector or
44 * N-worst performing sectors. */
45
46/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with the Bzip prefix coding
47 * strategy. xdfs-0.256 contains the last of the other-format tests, including RFC1950
48 * and the RFC1950+MTF tests. */
49
50#define DJW_MAX_CODELEN 32 /* Maximum length of an alphabet code. */
51
52#define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2) /* [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */
53
54#define RUN_0 0 /* Symbols used in MTF+1/2 coding. */
55#define RUN_1 1
56
57#define DJW_BASIC_CODES 5 /* Number of code lengths always encoded (djw_encode_basic array) */
58#define DJW_RUN_CODES 2 /* Number of run codes */
59#define DJW_EXTRA_12OFFSET 7 /* Offset of extra codes */
60#define DJW_EXTRA_CODES 15 /* Number of optionally encoded code lengths (djw_encode_extra array) */
61#define DJW_EXTRA_CODE_BITS 4 /* Number of bits to code [0-DJW_EXTRA_CODES] */
62
63#define DJW_MAX_GROUPS 8 /* Max number of group coding tables */
64#define DJW_GROUP_BITS 3 /* Number of bits to code [1-DJW_MAX_GROUPS] */
65
66#define DJW_SECTORSZ_MULT 5 /* Multiplier for encoded sectorsz */
67#define DJW_SECTORSZ_BITS 5 /* Number of bits to code group size */
68#define DJW_SECTORSZ_MAX ((1 << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT)
69
70#define DJW_MAX_ITER 6 /* Maximum number of iterations to find group tables. */
71#define DJW_MIN_IMPROVEMENT 20 /* Minimum number of bits an iteration must reduce coding by. */
72
73#define DJW_MAX_CLCLEN 15 /* Maximum code length of a prefix code length */
74#define DJW_CLCLEN_BITS 4 /* Number of bits to code [0-DJW_MAX_CLCLEN] */
75
76#define DJW_MAX_GBCLEN 7 /* Maximum code length of a group selector */
77#define DJW_GBCLEN_BITS 3 /* Number of bits to code [0-DJW_MAX_GBCLEN]
78 * @!@ Actually, should never have zero code lengths here, or
79 * else a group went unused. Write a test for this: if a group
80 * goes unused, eliminate it? */
81
82#define EFFICIENCY_BITS 16 /* It has to save at least this many bits... */
83
84typedef struct _djw_stream djw_stream;
85typedef struct _djw_heapen djw_heapen;
86typedef struct _djw_prefix djw_prefix;
87typedef uint32_t djw_weight;
88
89/* To enable Huffman tuning code... */
90#ifndef TUNE_HUFFMAN
91#define TUNE_HUFFMAN 0
92#endif
93
94#if TUNE_HUFFMAN == 0
95#define xd3_real_encode_huff xd3_encode_huff
96#define IF_TUNE(x)
97#define IF_NTUNE(x) x
98#else
99static uint xd3_bitsof_output (xd3_output *output, bit_state *bstate);
100#define IF_TUNE(x) x
101#define IF_NTUNE(x)
102static djw_weight tune_freq[DJW_TOTAL_CODES];
103static uint8_t tune_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
104static usize_t tune_prefix_bits;
105static usize_t tune_select_bits;
106static usize_t tune_encode_bits;
107#endif
108struct _djw_heapen
109{
110 uint32_t depth;
111 uint32_t freq;
112 uint32_t parent;
113};
114
115struct _djw_prefix
116{
117 usize_t scount;
118 uint8_t *symbol;
119 usize_t mcount;
120 uint8_t *mtfsym;
121 uint8_t *repcnt;
122};
123
124struct _djw_stream
125{
126 int unused;
127};
128
129/* Each Huffman table consists of 256 "code length" (CLEN) codes, which are themselves
130 * Huffman coded after eliminating repeats and move-to-front coding. The prefix consists
131 * of all the CLEN codes in djw_encode_basic plus a 4-bit value stating how many of the
132 * djw_encode_extra codes are actually coded (the rest are presumed zero, or unused CLEN
133 * codes).
134 *
135 * These values of these two arrays were arrived at by studying the distribution of min
136 * and max clen over a collection of DATA, INST, and ADDR inputs. The goal is to specify
137 * the order of djw_extra_codes that is most likely to minimize the number of extra codes
138 * that must be encoded.
139 *
140 * Results: 158896 sections were counted by compressing files (window size 512K) listed
141 * with: `find / -type f ( -user jmacd -o -perm +444 )`
142 *
143 * The distribution of CLEN codes for each efficient invocation of the secondary
144 * compressor (taking the best number of groups/sector size) was recorded. Then we look at
145 * the distribution of min and max clen values, counting the number of times the value
146 * C_low is less than the min and C_high is greater than the max. Values >= C_high and <=
147 * C_low will not have their lengths coded. The results are sorted and the least likely
148 * 15 are placed into the djw_encode_extra[] array in order. These values are used as
149 * the initial MTF ordering.
150
151 clow[1] = 155119
152 clow[2] = 140325
153 clow[3] = 84072
154 ---
155 clow[4] = 7225
156 clow[5] = 1093
157 clow[6] = 215
158 ---
159 chigh[4] = 1
160 chigh[5] = 30
161 chigh[6] = 218
162 chigh[7] = 2060
163 chigh[8] = 13271
164 ---
165 chigh[9] = 39463
166 chigh[10] = 77360
167 chigh[11] = 118298
168 chigh[12] = 141360
169 chigh[13] = 154086
170 chigh[14] = 157967
171 chigh[15] = 158603
172 chigh[16] = 158864
173 chigh[17] = 158893
174 chigh[18] = 158895
175 chigh[19] = 158896
176 chigh[20] = 158896
177
178*/
179
180static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] =
181 {
182 9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20
183 };
184
185static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] =
186 {
187 4, 5, 6, 7, 8,
188 };
189
190/*********************************************************************/
191/* DECLS */
192/*********************************************************************/
193
194static djw_stream* djw_alloc (xd3_stream *stream /*, int alphabet_size */);
195static void djw_init (djw_stream *h);
196static void djw_destroy (xd3_stream *stream,
197 djw_stream *h);
198
199#if XD3_ENCODER
200static int xd3_encode_huff (xd3_stream *stream,
201 djw_stream *sec_stream,
202 xd3_output *input,
203 xd3_output *output,
204 xd3_sec_cfg *cfg);
205#endif
206
207static int xd3_decode_huff (xd3_stream *stream,
208 djw_stream *sec_stream,
209 const uint8_t **input,
210 const uint8_t *const input_end,
211 uint8_t **output,
212 const uint8_t *const output_end);
213
214/*********************************************************************/
215/* HUFFMAN */
216/*********************************************************************/
217
218static djw_stream*
219djw_alloc (xd3_stream *stream)
220{
221 return xd3_alloc (stream, sizeof (djw_stream), 1);
222}
223
224static void
225djw_init (djw_stream *h)
226{
227 /* Fields are initialized prior to use. */
228}
229
230static void
231djw_destroy (xd3_stream *stream,
232 djw_stream *h)
233{
234 xd3_free (stream, h);
235}
236
237
238/*********************************************************************/
239/* HEAP */
240/*********************************************************************/
241
242static INLINE int
243heap_less (const djw_heapen *a, const djw_heapen *b)
244{
245 return a->freq < b->freq ||
246 (a->freq == b->freq &&
247 a->depth < b->depth);
248}
249
250static INLINE void
251heap_insert (uint *heap, const djw_heapen *ents, uint p, const uint e)
252{
253 /* Insert ents[e] into next slot heap[p] */
254 uint pp = p/2; /* P's parent */
255
256 while (heap_less (& ents[e], & ents[heap[pp]]))
257 {
258 heap[p] = heap[pp];
259 p = pp;
260 pp = p/2;
261 }
262
263 heap[p] = e;
264}
265
266static INLINE djw_heapen*
267heap_extract (uint *heap, const djw_heapen *ents, uint heap_last)
268{
269 uint smallest = heap[1];
270 uint p, pc, t;
271
272 /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */
273 heap[1] = heap[heap_last+1];
274
275 /* Re-heapify */
276 for (p = 1; ; p = pc)
277 {
278 pc = p*2;
279
280 /* Reached bottom of heap */
281 if (pc > heap_last) { break; }
282
283 /* See if second child is smaller. */
284 if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) { pc += 1; }
285
286 /* If pc is not smaller than p, heap property re-established. */
287 if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; }
288
289 t = heap[pc];
290 heap[pc] = heap[p];
291 heap[p] = t;
292 }
293
294 return (djw_heapen*) & ents[smallest];
295}
296
297#if XD3_DEBUG
298static void
299heap_check (uint *heap, djw_heapen *ents, uint heap_last)
300{
301 uint i;
302 for (i = 1; i <= heap_last; i += 1)
303 {
304 /* Heap property: child not less than parent */
305 XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]]));
306 }
307}
308#endif
309
310/*********************************************************************/
311/* MTF, 1/2 */
312/*********************************************************************/
313
314static INLINE usize_t
315djw_update_mtf (uint8_t *mtf, usize_t mtf_i)
316{
317 int k;
318 usize_t sym = mtf[mtf_i];
319
320 for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; }
321
322 mtf[0] = sym;
323 return sym;
324}
325
326static INLINE void
327djw_update_1_2 (int *mtf_run, usize_t *mtf_i, uint8_t *mtfsym, djw_weight *freq)
328{
329 int code;
330
331 do
332 {
333 /* Offset by 1, since any number of RUN_ symbols implies run>0... */
334 *mtf_run -= 1;
335
336 code = (*mtf_run & 1) ? RUN_1 : RUN_0;
337
338 mtfsym[(*mtf_i)++] = code;
339 freq[code] += 1;
340 *mtf_run >>= 1;
341 }
342 while (*mtf_run >= 1);
343
344 *mtf_run = 0;
345}
346
347static void
348djw_init_clen_mtf_1_2 (uint8_t *clmtf)
349{
350 int i, cl_i = 0;
351
352 clmtf[cl_i++] = 0;
353 for (i = 0; i < DJW_BASIC_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12basic[i]; }
354 for (i = 0; i < DJW_EXTRA_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12extra[i]; }
355}
356
357/*********************************************************************/
358/* PREFIX CODES */
359/*********************************************************************/
360#if XD3_ENCODER
361static usize_t
362djw_build_prefix (const djw_weight *freq, uint8_t *clen, int asize, int maxlen)
363{
364 /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 internal nodes,
365 * never more than ALPHABET_SIZE entries actually in the heap (minimum weight subtrees
366 * during prefix construction). First ALPHABET_SIZE entries are the actual symbols,
367 * next ALPHABET_SIZE-1 are internal nodes. */
368 djw_heapen ents[ALPHABET_SIZE * 2];
369 uint heap[ALPHABET_SIZE + 1];
370
371 uint heap_last; /* Index of the last _valid_ heap entry. */
372 uint ents_size; /* Number of entries, including 0th fake entry */
373 int overflow; /* Number of code lengths that overflow */
374 uint32_t total_bits;
375 int i;
376
377 IF_DEBUG (uint32_t first_bits = 0);
378
379 /* Insert real symbol frequences. */
380 for (i = 0; i < asize; i += 1)
381 {
382 ents[i+1].freq = freq[i];
383 }
384
385 again:
386
387 /* The loop is re-entered each time an overflow occurs. Re-initialize... */
388 heap_last = 0;
389 ents_size = 1;
390 overflow = 0;
391 total_bits = 0;
392
393 /* 0th entry terminates the while loop in heap_insert (its the parent of the smallest
394 * element, always less-than) */
395 heap[0] = 0;
396 ents[0].depth = 0;
397 ents[0].freq = 0;
398
399 /* Initial heap. */
400 for (i = 0; i < asize; i += 1, ents_size += 1)
401 {
402 ents[ents_size].depth = 0;
403 ents[ents_size].parent = 0;
404
405 if (ents[ents_size].freq != 0)
406 {
407 heap_insert (heap, ents, ++heap_last, ents_size);
408 }
409 }
410
411 IF_DEBUG (heap_check (heap, ents, heap_last));
412
413 /* Must be at least one symbol, or else we can't get here. */
414 XD3_ASSERT (heap_last != 0);
415
416 /* If there is only one symbol, fake a second to prevent zero-length codes. */
417 if (unlikely (heap_last == 1))
418 {
419 /* Pick either the first or last symbol. */
420 int s = freq[0] ? asize-1 : 0;
421 ents[s+1].freq = 1;
422 goto again;
423 }
424
425 /* Build prefix tree. */
426 while (heap_last > 1)
427 {
428 djw_heapen *h1 = heap_extract (heap, ents, --heap_last);
429 djw_heapen *h2 = heap_extract (heap, ents, --heap_last);
430
431 ents[ents_size].freq = h1->freq + h2->freq;
432 ents[ents_size].depth = 1 + max (h1->depth, h2->depth);
433 ents[ents_size].parent = 0;
434
435 h1->parent = h2->parent = ents_size;
436
437 heap_insert (heap, ents, ++heap_last, ents_size++);
438
439 IF_DEBUG (heap_check (heap, ents, heap_last));
440 }
441
442 /* Now compute prefix code lengths, counting parents. */
443 for (i = 1; i < asize+1; i += 1)
444 {
445 int b = 0;
446
447 if (ents[i].freq != 0)
448 {
449 int p = i;
450
451 while ((p = ents[p].parent) != 0) { b += 1; }
452
453 if (b > maxlen) { overflow = 1; }
454
455 total_bits += b * freq[i-1];
456 }
457
458 /* clen is 0-origin, unlike ents. */
459 clen[i-1] = b;
460 }
461
462 IF_DEBUG (if (first_bits == 0) first_bits = total_bits);
463
464 if (! overflow)
465 {
466 IF_DEBUG (if (first_bits != total_bits)
467 {
468 DP(RINT "code length overflow changed %u bits\n", (usize_t)(total_bits - first_bits));
469 });
470 return total_bits;
471 }
472
473 /* OPT: There is a non-looping way to fix overflow shown in zlib, but this is easier
474 * (for now), as done in bzip2. */
475 for (i = 1; i < asize+1; i += 1)
476 {
477 ents[i].freq = ents[i].freq / 2 + 1;
478 }
479
480 goto again;
481}
482
483static void
484djw_build_codes (uint *codes, const uint8_t *clen, int asize DEBUG_ARG (int abs_max))
485{
486 int i, l;
487 int min_clen = DJW_MAX_CODELEN;
488 int max_clen = 0;
489 uint code = 0;
490
491 for (i = 0; i < asize; i += 1)
492 {
493 if (clen[i] > 0 && clen[i] < min_clen)
494 {
495 min_clen = clen[i];
496 }
497
498 max_clen = max (max_clen, (int) clen[i]);
499 }
500
501 XD3_ASSERT (max_clen <= abs_max);
502
503 for (l = min_clen; l <= max_clen; l += 1)
504 {
505 for (i = 0; i < asize; i += 1)
506 {
507 if (clen[i] == l) { codes[i] = code++; }
508 }
509
510 code <<= 1;
511 }
512}
513
514/*********************************************************************/
515/* MOVE-TO-FRONT */
516/*********************************************************************/
517static void
518djw_compute_mtf_1_2 (djw_prefix *prefix,
519 uint8_t *mtf,
520 djw_weight *freq_out, /* freak out! */
521 usize_t nsym)
522{
523 int i, j, k;
524 usize_t sym;
525 usize_t size = prefix->scount;
526 usize_t mtf_i = 0;
527 int mtf_run = 0;
528
529 memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+1));
530
531 for (i = 0; i < size; )
532 {
533 /* OPT: Bzip optimizes this algorithm a little by effectively checking j==0 before
534 * the MTF update. */
535 sym = prefix->symbol[i++];
536
537 for (j = 0; mtf[j] != sym; j += 1) { }
538
539 XD3_ASSERT (j < nsym);
540
541 for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; }
542
543 mtf[0] = sym;
544
545 if (j == 0)
546 {
547 mtf_run += 1;
548 continue;
549 }
550
551 if (mtf_run > 0)
552 {
553 djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
554 }
555
556 /* Non-zero symbols are offset by RUN_1 */
557 prefix->mtfsym[mtf_i++] = j+RUN_1;
558 freq_out[j+RUN_1] += 1;
559 }
560
561 if (mtf_run > 0)
562 {
563 djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
564 }
565
566 prefix->mcount = mtf_i;
567}
568
569static usize_t
570djw_count_freqs (djw_weight *freq, xd3_output *input)
571{
572 xd3_output *in;
573 usize_t size = 0;
574
575 memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE);
576
577 /* Freqency counting. OPT: can be accomplished beforehand. */
578 for (in = input; in; in = in->next_page)
579 {
580 const uint8_t *p = in->base;
581 const uint8_t *p_max = p + in->next;
582
583 size += in->next;
584
585 do { freq[*p++] += 1; } while (p < p_max);
586 }
587
588 IF_DEBUG1 ({int i;
589 DP(RINT "freqs: ");
590 for (i = 0; i < ALPHABET_SIZE; i += 1) { DP(RINT "%u ", freq[i]); }
591 DP(RINT "\n");});
592
593 return size;
594}
595
596static void
597djw_compute_multi_prefix (int groups,
598 uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE],
599 djw_prefix *prefix)
600{
601 int gp, i;
602
603 prefix->scount = ALPHABET_SIZE;
604 memcpy (prefix->symbol, clen[0], ALPHABET_SIZE);
605
606 for (gp = 1; gp < groups; gp += 1)
607 {
608 for (i = 0; i < ALPHABET_SIZE; i += 1)
609 {
610 if (clen[gp][i] == 0)
611 {
612 continue;
613 }
614
615 prefix->symbol[prefix->scount++] = clen[gp][i];
616 }
617 }
618}
619
620static void
621djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq)
622{
623 uint8_t clmtf[DJW_MAX_CODELEN+1];
624
625 djw_init_clen_mtf_1_2 (clmtf);
626
627 djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN+1);
628}
629
630static int
631djw_encode_prefix (xd3_stream *stream,
632 xd3_output **output,
633 bit_state *bstate,
634 djw_prefix *prefix)
635{
636 int ret, i;
637 uint num_to_encode;
638 djw_weight clfreq[DJW_TOTAL_CODES];
639 uint8_t clclen[DJW_TOTAL_CODES];
640 uint clcode[DJW_TOTAL_CODES];
641
642 IF_TUNE (memset (clfreq, 0, sizeof (clfreq)));
643
644 /* Move-to-front encode prefix symbols, count frequencies */
645 djw_compute_prefix_1_2 (prefix, clfreq);
646
647 /* Compute codes */
648 djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
649 djw_build_codes (clcode, clclen, DJW_TOTAL_CODES DEBUG_ARG (DJW_MAX_CLCLEN));
650
651 /* Compute number of extra codes beyond basic ones for this template. */
652 num_to_encode = DJW_TOTAL_CODES;
653 while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) { num_to_encode -= 1; }
654 XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
655
656 /* Encode: # of extra codes */
657 if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
658 num_to_encode - DJW_EXTRA_12OFFSET))) { return ret; }
659
660 /* Encode: MTF code lengths */
661 for (i = 0; i < num_to_encode; i += 1)
662 {
663 if ((ret = xd3_encode_bits (stream, output, bstate, DJW_CLCLEN_BITS, clclen[i]))) { return ret; }
664 }
665
666 /* Encode: CLEN code lengths */
667 for (i = 0; i < prefix->mcount; i += 1)
668 {
669 usize_t mtf_sym = prefix->mtfsym[i];
670 usize_t bits = clclen[mtf_sym];
671 usize_t code = clcode[mtf_sym];
672
673 if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) { return ret; }
674 }
675
676 IF_TUNE (memcpy (tune_freq, clfreq, sizeof (clfreq)));
677
678 return 0;
679}
680
681static void
682djw_compute_selector_1_2 (djw_prefix *prefix,
683 usize_t groups,
684 djw_weight *gbest_freq)
685{
686 uint8_t grmtf[DJW_MAX_GROUPS];
687 usize_t i;
688
689 for (i = 0; i < groups; i += 1) { grmtf[i] = i; }
690
691 djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);
692}
693
694static int
695xd3_encode_howmany_groups (xd3_stream *stream,
696 xd3_sec_cfg *cfg,
697 usize_t input_size,
698 usize_t *ret_groups,
699 usize_t *ret_sector_size)
700{
701 usize_t cfg_groups = 0;
702 usize_t cfg_sector_size = 0;
703 usize_t sugg_groups = 0;
704 usize_t sugg_sector_size = 0;
705
706 if (cfg->ngroups != 0)
707 {
708 if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS)
709 {
710 stream->msg = "invalid secondary encoder group number";
711 return XD3_INTERNAL;
712 }
713
714 cfg_groups = cfg->ngroups;
715 }
716
717 if (cfg->sector_size != 0)
718 {
719 if (cfg->sector_size < DJW_SECTORSZ_MULT || cfg->sector_size > DJW_SECTORSZ_MAX || (cfg->sector_size % DJW_SECTORSZ_MULT) != 0)
720 {
721 stream->msg = "invalid secondary encoder sector size";
722 return XD3_INTERNAL;
723 }
724
725 cfg_sector_size = cfg->sector_size;
726 }
727
728 if (cfg_groups == 0 || cfg_sector_size == 0)
729 {
730 /* These values were found empirically using xdelta3-tune around version
731 * xdfs-0.256. */
732 switch (cfg->data_type)
733 {
734 case DATA_SECTION:
735 if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; }
736 else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; }
737 else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; }
738 else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; }
739 else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; }
740 else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; }
741 else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; }
742 else { sugg_groups = 8; sugg_sector_size = 70; }
743 break;
744 case INST_SECTION:
745 if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; }
746 else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; }
747 else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; }
748 else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; }
749 else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; }
750 else { sugg_groups = 8; sugg_sector_size = 40; }
751 break;
752 case ADDR_SECTION:
753 if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; }
754 else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; }
755 else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; }
756 else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; }
757 else { sugg_groups = 7; sugg_sector_size = 130; }
758 break;
759 }
760
761 if (cfg_groups == 0)
762 {
763 cfg_groups = sugg_groups;
764 }
765
766 if (cfg_sector_size == 0)
767 {
768 cfg_sector_size = sugg_sector_size;
769 }
770 }
771
772 if (cfg_groups != 1 && cfg_sector_size == 0)
773 {
774 switch (cfg->data_type)
775 {
776 case DATA_SECTION:
777 cfg_sector_size = 20;
778 break;
779 case INST_SECTION:
780 cfg_sector_size = 50;
781 break;
782 case ADDR_SECTION:
783 cfg_sector_size = 130;
784 break;
785 }
786 }
787
788 (*ret_groups) = cfg_groups;
789 (*ret_sector_size) = cfg_sector_size;
790
791 XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS);
792 XD3_ASSERT (cfg_groups == 1 || (cfg_sector_size >= DJW_SECTORSZ_MULT && cfg_sector_size <= DJW_SECTORSZ_MAX));
793
794 return 0;
795}
796
797static int
798xd3_real_encode_huff (xd3_stream *stream,
799 djw_stream *h,
800 xd3_output *input,
801 xd3_output *output,
802 xd3_sec_cfg *cfg)
803{
804 int ret;
805 usize_t groups, sector_size;
806 bit_state bstate = BIT_STATE_ENCODE_INIT;
807 xd3_output *in;
808 int encode_bits;
809 usize_t input_bits;
810 usize_t input_bytes;
811 usize_t initial_offset = output->next;
812 djw_weight real_freq[ALPHABET_SIZE];
813 uint8_t *gbest = NULL; /* Dynamic allocations: could put these in djw_stream. */
814 uint8_t *gbest_mtf = NULL;
815
816 input_bytes = djw_count_freqs (real_freq, input);
817 input_bits = input_bytes * 8;
818
819 XD3_ASSERT (input_bytes > 0);
820
821 if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, & groups, & sector_size)))
822 {
823 return ret;
824 }
825
826 if (0)
827 {
828 regroup:
829 /* Sometimes we dynamically decide there are too many groups. Arrive here. */
830 output->next = initial_offset;
831 xd3_bit_state_encode_init (& bstate);
832 }
833
834 /* Encode: # of groups (3 bits) */
835 if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GROUP_BITS, groups-1))) { goto failure; }
836
837 if (groups == 1)
838 {
839 /* Single Huffman group. */
840 uint code[ALPHABET_SIZE]; /* Codes */
841 IF_TUNE (uint8_t *clen = tune_clen[0];)
842 IF_NTUNE (uint8_t clen[ALPHABET_SIZE];)
843 uint8_t prefix_mtfsym[ALPHABET_SIZE];
844 djw_prefix prefix;
845
846 encode_bits =
847 djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
848 djw_build_codes (code, clen, ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN));
849
850 if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
851
852 /* Encode: prefix */
853 prefix.mtfsym = prefix_mtfsym;
854 prefix.symbol = clen;
855 prefix.scount = ALPHABET_SIZE;
856
857 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; }
858
859 if (encode_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
860
861 IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate));
862 IF_TUNE (tune_select_bits = 0);
863 IF_TUNE (tune_encode_bits = encode_bits);
864
865 /* Encode: data */
866 for (in = input; in; in = in->next_page)
867 {
868 const uint8_t *p = in->base;
869 const uint8_t *p_max = p + in->next;
870
871 do
872 {
873 usize_t sym = *p++;
874 usize_t bits = clen[sym];
875
876 IF_DEBUG (encode_bits -= bits);
877
878 if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code[sym]))) { goto failure; }
879 }
880 while (p < p_max);
881 }
882
883 XD3_ASSERT (encode_bits == 0);
884 }
885 else
886 {
887 /* DJW Huffman */
888 djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];
889#if TUNE_HUFFMAN == 0
890 uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
891#else
892#define evolve_clen tune_clen
893#endif
894 djw_weight left = input_bytes;
895 int gp;
896 int niter = 0;
897 usize_t select_bits;
898 usize_t sym1 = 0, sym2 = 0, s;
899 usize_t gcost[DJW_MAX_GROUPS];
900 uint gbest_code[DJW_MAX_GROUPS+2];
901 uint8_t gbest_clen[DJW_MAX_GROUPS+2];
902 usize_t gbest_max = 1 + (input_bytes - 1) / sector_size;
903 int best_bits = 0;
904 usize_t gbest_no;
905 usize_t gpcnt;
906 const uint8_t *p;
907 IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]);
908
909 /* Encode: sector size (5 bits) */
910 if ((ret = xd3_encode_bits (stream, & output, & bstate,
911 DJW_SECTORSZ_BITS, (sector_size/DJW_SECTORSZ_MULT)-1))) { goto failure; }
912
913 /* Dynamic allocation. */
914 if (gbest == NULL)
915 {
916 if ((gbest = xd3_alloc (stream, gbest_max, 1)) == NULL) { ret = ENOMEM; goto failure; }
917 }
918
919 if (gbest_mtf == NULL)
920 {
921 if ((gbest_mtf = xd3_alloc (stream, gbest_max, 1)) == NULL) { ret = ENOMEM; goto failure; }
922 }
923
924 /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
925
926 /* Generate initial code length tables. */
927 for (gp = 0; gp < groups; gp += 1)
928 {
929 djw_weight sum = 0;
930 djw_weight goal = left / (groups - gp);
931
932 IF_DEBUG1 (usize_t nz = 0);
933
934 /* Due to the single-code granularity of this distribution, it may be that we
935 * can't generate a distribution for each group. In that case subtract one
936 * group and try again. If (inefficient), we're testing group behavior, so
937 * don't mess things up. */
938 if (goal == 0 && !cfg->inefficient)
939 {
940 IF_DEBUG1 (DP(RINT "too many groups (%u), dropping one\n", groups));
941 groups -= 1;
942 goto regroup;
943 }
944
945 /* Sum == goal is possible when (cfg->inefficient)... */
946 while (sum < goal)
947 {
948 XD3_ASSERT (sym2 < ALPHABET_SIZE);
949 IF_DEBUG1 (nz += real_freq[sym2] != 0);
950 sum += real_freq[sym2++];
951 }
952
953 IF_DEBUG1(DP(RINT "group %u has symbols %u..%u (%u non-zero) (%u/%u = %.3f)\n",
954 gp, sym1, sym2, nz, sum, input_bytes, sum / (double)input_bytes););
955
956 for (s = 0; s < ALPHABET_SIZE; s += 1)
957 {
958 evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;
959 }
960
961 left -= sum;
962 sym1 = sym2+1;
963 }
964
965 repeat:
966
967 niter += 1;
968 gbest_no = 0;
969 memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);
970 IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups));
971
972 /* For each input page (loop is irregular to allow non-pow2-size group size. */
973 in = input;
974 p = in->base;
975
976 /* For each group-size sector. */
977 do
978 {
979 const uint8_t *p0 = p;
980 xd3_output *in0 = in;
981 usize_t best = 0;
982 usize_t winner = 0;
983
984 /* Select best group for each sector, update evolve_freq. */
985 memset (gcost, 0, sizeof (gcost[0]) * groups);
986
987 /* For each byte in sector. */
988 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
989 {
990 /* For each group. */
991 for (gp = 0; gp < groups; gp += 1)
992 {
993 gcost[gp] += evolve_clen[gp][*p];
994 }
995
996 /* Check end-of-input-page. */
997# define GP_PAGE() \
998 if (++p - in->base == in->next) \
999 { \
1000 in = in->next_page; \
1001 if (in == NULL) { break; } \
1002 p = in->base; \
1003 }
1004
1005 GP_PAGE ();
1006 }
1007
1008 /* Find min cost group for this sector */
1009 best = -1U;
1010 for (gp = 0; gp < groups; gp += 1)
1011 {
1012 if (gcost[gp] < best) { best = gcost[gp]; winner = gp; }
1013 }
1014
1015 XD3_ASSERT(gbest_no < gbest_max);
1016 gbest[gbest_no++] = winner;
1017 IF_DEBUG1 (gcount[winner] += 1);
1018
1019 p = p0;
1020 in = in0;
1021
1022 /* Update group frequencies. */
1023 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1024 {
1025 evolve_freq[winner][*p] += 1;
1026
1027 GP_PAGE ();
1028 }
1029 }
1030 while (in != NULL);
1031
1032 XD3_ASSERT (gbest_no == gbest_max);
1033
1034 /* Recompute code lengths. */
1035 encode_bits = 0;
1036 for (gp = 0; gp < groups; gp += 1)
1037 {
1038 int i;
1039 uint8_t evolve_zero[ALPHABET_SIZE];
1040 int any_zeros = 0;
1041
1042 memset (evolve_zero, 0, sizeof (evolve_zero));
1043
1044 /* Cannot allow a zero clen when the real frequency is non-zero. Note: this
1045 * means we are going to encode a fairly long code for these unused entries. An
1046 * improvement would be to implement a NOTUSED code for when these are actually
1047 * zero, but this requires another data structure (evolve_zero) since we don't
1048 * know when evolve_freq[i] == 0... Briefly tested, looked worse. */
1049 for (i = 0; i < ALPHABET_SIZE; i += 1)
1050 {
1051 if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
1052 {
1053 evolve_freq[gp][i] = 1;
1054 evolve_zero[i] = 1;
1055 any_zeros = 1;
1056 }
1057 }
1058
1059 encode_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], ALPHABET_SIZE, DJW_MAX_CODELEN);
1060
1061 /* The above faking of frequencies does not matter for the last iteration, but
1062 * we don't know when that is yet. However, it also breaks the encode_bits
1063 * computation. Necessary for accuracy, and for the (encode_bits==0) assert
1064 * after all bits are output. */
1065 if (any_zeros)
1066 {
1067 IF_DEBUG1 (usize_t save_total = encode_bits);
1068
1069 for (i = 0; i < ALPHABET_SIZE; i += 1)
1070 {
1071 if (evolve_zero[i]) { encode_bits -= evolve_clen[gp][i]; }
1072 }
1073
1074 IF_DEBUG1 (DP(RINT "evolve_zero reduced %u bits in group %u\n", save_total - encode_bits, gp));
1075 }
1076 }
1077
1078 IF_DEBUG1(
1079 DP(RINT "pass %u total bits: %u group uses: ", niter, encode_bits);
1080 for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); }
1081 DP(RINT "\n"););
1082
1083 /* End iteration. (The following assertion proved invalid.) */
1084 /*XD3_ASSERT (niter == 1 || best_bits >= encode_bits);*/
1085
1086 IF_DEBUG1 (if (niter > 1 && best_bits < encode_bits) {
1087 DP(RINT "iteration lost %u bits\n", encode_bits - best_bits); });
1088
1089 if (niter == 1 || (niter < DJW_MAX_ITER && (best_bits - encode_bits) >= DJW_MIN_IMPROVEMENT))
1090 {
1091 best_bits = encode_bits;
1092 goto repeat;
1093 }
1094
1095 /* Efficiency check. */
1096 if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
1097
1098 IF_DEBUG1 (DP(RINT "djw compression: %u -> %0.3f\n", input_bytes, encode_bits / 8.0));
1099
1100 /* Encode: prefix */
1101 {
1102 uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];
1103 uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];
1104 uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];
1105 djw_prefix prefix;
1106
1107 prefix.symbol = prefix_symbol;
1108 prefix.mtfsym = prefix_mtfsym;
1109 prefix.repcnt = prefix_repcnt;
1110
1111 djw_compute_multi_prefix (groups, evolve_clen, & prefix);
1112 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; }
1113 }
1114
1115 /* Encode: selector frequencies */
1116 {
1117 djw_weight gbest_freq[DJW_MAX_GROUPS+1];
1118 djw_prefix gbest_prefix;
1119 usize_t i;
1120
1121 gbest_prefix.scount = gbest_no;
1122 gbest_prefix.symbol = gbest;
1123 gbest_prefix.mtfsym = gbest_mtf;
1124
1125 djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);
1126
1127 select_bits =
1128 djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);
1129 djw_build_codes (gbest_code, gbest_clen, groups+1 DEBUG_ARG (DJW_MAX_GBCLEN));
1130
1131 IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate));
1132 IF_TUNE (tune_select_bits = select_bits);
1133 IF_TUNE (tune_encode_bits = encode_bits);
1134
1135 for (i = 0; i < groups+1; i += 1)
1136 {
1137 if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GBCLEN_BITS, gbest_clen[i]))) { goto failure; }
1138 }
1139
1140 for (i = 0; i < gbest_prefix.mcount; i += 1)
1141 {
1142 usize_t gp_mtf = gbest_mtf[i];
1143 usize_t gp_sel_bits = gbest_clen[gp_mtf];
1144 usize_t gp_sel_code = gbest_code[gp_mtf];
1145
1146 XD3_ASSERT (gp_mtf < groups+1);
1147
1148 if ((ret = xd3_encode_bits (stream, & output, & bstate, gp_sel_bits, gp_sel_code))) { goto failure; }
1149
1150 IF_DEBUG (select_bits -= gp_sel_bits);
1151 }
1152
1153 XD3_ASSERT (select_bits == 0);
1154 }
1155
1156 /* Efficiency check. */
1157 if (encode_bits + select_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
1158
1159 /* Encode: data */
1160 {
1161 uint evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];
1162 usize_t sector = 0;
1163
1164 /* Build code tables for each group. */
1165 for (gp = 0; gp < groups; gp += 1)
1166 {
1167 djw_build_codes (evolve_code[gp], evolve_clen[gp], ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN));
1168 }
1169
1170 /* Now loop over the input. */
1171 in = input;
1172 p = in->base;
1173
1174 do
1175 {
1176 /* For each sector. */
1177 usize_t gp_best = gbest[sector];
1178 uint *gp_codes = evolve_code[gp_best];
1179 uint8_t *gp_clens = evolve_clen[gp_best];
1180
1181 XD3_ASSERT (sector < gbest_no);
1182
1183 sector += 1;
1184
1185 /* Encode the sector data. */
1186 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1187 {
1188 usize_t sym = *p;
1189 usize_t bits = gp_clens[sym];
1190 usize_t code = gp_codes[sym];
1191
1192 IF_DEBUG (encode_bits -= bits);
1193
1194 if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code))) { goto failure; }
1195
1196 GP_PAGE ();
1197 }
1198 }
1199 while (in != NULL);
1200
1201 XD3_ASSERT (select_bits == 0);
1202 XD3_ASSERT (encode_bits == 0);
1203
1204#undef evolve_clen
1205 }
1206 }
1207
1208 ret = xd3_flush_bits (stream, & output, & bstate);
1209
1210 if (0)
1211 {
1212 nosecond:
1213 stream->msg = "secondary compression was inefficient";
1214 ret = XD3_NOSECOND;
1215 }
1216
1217 failure:
1218
1219 xd3_free (stream, gbest);
1220 xd3_free (stream, gbest_mtf);
1221 return ret;
1222}
1223#endif /* XD3_ENCODER */
1224
1225/*********************************************************************/
1226/* DECODE */
1227/*********************************************************************/
1228
1229static void
1230djw_build_decoder (xd3_stream *stream,
1231 usize_t asize,
1232 usize_t abs_max,
1233 const uint8_t *clen,
1234 uint8_t *inorder,
1235 uint *base,
1236 uint *limit,
1237 uint *min_clenp,
1238 uint *max_clenp)
1239{
1240 int i, l;
1241 const uint8_t *ci;
1242 uint nr_clen [DJW_MAX_CODELEN+2];
1243 uint tmp_base[DJW_MAX_CODELEN+2];
1244 int min_clen;
1245 int max_clen;
1246
1247 /* Assumption: the two temporary arrays are large enough to hold abs_max. */
1248 XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);
1249
1250 /* This looks something like the start of zlib's inftrees.c */
1251 memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));
1252
1253 /* Count number of each code length */
1254 i = asize;
1255 ci = clen;
1256 do
1257 {
1258 /* Caller _must_ check that values are in-range. Most of the time
1259 * the caller decodes a specific number of bits, which imply the max value, and the
1260 * other time the caller decodes a huffman value, which must be in-range. Therefore,
1261 * its an assertion and this function cannot otherwise fail. */
1262 XD3_ASSERT (*ci <= abs_max);
1263
1264 nr_clen[*ci++]++;
1265 }
1266 while (--i != 0);
1267
1268 /* Compute min, max. */
1269 for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } }
1270 min_clen = i;
1271 for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } }
1272 max_clen = i;
1273
1274 /* Fill the BASE, LIMIT table. */
1275 tmp_base[min_clen] = 0;
1276 base[min_clen] = 0;
1277 limit[min_clen] = nr_clen[min_clen] - 1;
1278 for (i = min_clen + 1; i <= max_clen; i += 1)
1279 {
1280 uint last_limit = ((limit[i-1] + 1) << 1);
1281 tmp_base[i] = tmp_base[i-1] + nr_clen[i-1];
1282 limit[i] = last_limit + nr_clen[i] - 1;
1283 base[i] = last_limit - tmp_base[i];
1284 }
1285
1286 /* Fill the inorder array, canonically ordered codes. */
1287 ci = clen;
1288 for (i = 0; i < asize; i += 1)
1289 {
1290 if ((l = *ci++) != 0)
1291 {
1292 inorder[tmp_base[l]++] = i;
1293 }
1294 }
1295
1296 *min_clenp = min_clen;
1297 *max_clenp = max_clen;
1298}
1299
1300static INLINE int
1301djw_decode_symbol (xd3_stream *stream,
1302 bit_state *bstate,
1303 const uint8_t **input,
1304 const uint8_t *input_end,
1305 const uint8_t *inorder,
1306 const uint *base,
1307 const uint *limit,
1308 uint min_clen,
1309 uint max_clen,
1310 usize_t *sym,
1311 usize_t max_sym)
1312{
1313 usize_t code = 0;
1314 usize_t bits = 0;
1315
1316 /* OPT: Supposedly a small lookup table improves speed here... */
1317
1318 /* Code outline is similar to xd3_decode_bits... */
1319 if (bstate->cur_mask == 0x100) { goto next_byte; }
1320
1321 for (;;)
1322 {
1323 do
1324 {
1325 if (bits == max_clen) { goto corrupt; }
1326
1327 bits += 1;
1328 code = (code << 1);
1329
1330 if (bstate->cur_byte & bstate->cur_mask) { code |= 1; }
1331
1332 bstate->cur_mask <<= 1;
1333
1334 if (bits >= min_clen && code <= limit[bits]) { goto done; }
1335 }
1336 while (bstate->cur_mask != 0x100);
1337
1338 next_byte:
1339
1340 if (*input == input_end)
1341 {
1342 stream->msg = "secondary decoder end of input";
1343 return XD3_INTERNAL;
1344 }
1345
1346 bstate->cur_byte = *(*input)++;
1347 bstate->cur_mask = 1;
1348 }
1349
1350 done:
1351
1352 if (base[bits] <= code)
1353 {
1354 usize_t offset = code - base[bits];
1355
1356 if (offset <= max_sym)
1357 {
1358 IF_DEBUG2 (DP(RINT "(j) %u ", code));
1359 *sym = inorder[offset];
1360 return 0;
1361 }
1362 }
1363
1364 corrupt:
1365 stream->msg = "secondary decoder invalid code";
1366 return XD3_INTERNAL;
1367}
1368
1369static int
1370djw_decode_clclen (xd3_stream *stream,
1371 bit_state *bstate,
1372 const uint8_t **input,
1373 const uint8_t *input_end,
1374 uint8_t *cl_inorder,
1375 uint *cl_base,
1376 uint *cl_limit,
1377 uint *cl_minlen,
1378 uint *cl_maxlen,
1379 uint8_t *cl_mtf)
1380{
1381 int ret;
1382 uint8_t cl_clen[DJW_TOTAL_CODES];
1383 usize_t num_codes, value;
1384 int i;
1385
1386 /* How many extra code lengths to encode. */
1387 if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_EXTRA_CODE_BITS, & num_codes))) { return ret; }
1388
1389 num_codes += DJW_EXTRA_12OFFSET;
1390
1391 /* Read num_codes. */
1392 for (i = 0; i < num_codes; i += 1)
1393 {
1394 if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_CLCLEN_BITS, & value))) { return ret; }
1395
1396 cl_clen[i] = value;
1397 }
1398
1399 /* Set the rest to zero. */
1400 for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; }
1401
1402 /* No need to check for in-range clen values, because: */
1403 XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1);
1404
1405 /* Build the code-length decoder. */
1406 djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN,
1407 cl_clen, cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen);
1408
1409 /* Initialize the MTF state. */
1410 djw_init_clen_mtf_1_2 (cl_mtf);
1411
1412 return 0;
1413}
1414
1415static INLINE int
1416djw_decode_1_2 (xd3_stream *stream,
1417 bit_state *bstate,
1418 const uint8_t **input,
1419 const uint8_t *input_end,
1420 const uint8_t *inorder,
1421 const uint *base,
1422 const uint *limit,
1423 const uint *minlen,
1424 const uint *maxlen,
1425 uint8_t *mtfvals,
1426 usize_t elts,
1427 usize_t skip_offset,
1428 uint8_t *values)
1429{
1430 usize_t n = 0, rep = 0, mtf = 0, s = 0;
1431 int ret;
1432
1433 while (n < elts)
1434 {
1435 /* Special case inside generic code: CLEN only: If not the first group, we already
1436 * know the zero frequencies. */
1437 if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0)
1438 {
1439 values[n++] = 0;
1440 continue;
1441 }
1442
1443 /* Repeat last symbol. */
1444 if (rep != 0)
1445 {
1446 values[n++] = mtfvals[0];
1447 rep -= 1;
1448 continue;
1449 }
1450
1451 /* Symbol following last repeat code. */
1452 if (mtf != 0)
1453 {
1454 usize_t sym = djw_update_mtf (mtfvals, mtf);
1455 values[n++] = sym;
1456 mtf = 0;
1457 continue;
1458 }
1459
1460 /* Decode next symbol/repeat code. */
1461 if ((ret = djw_decode_symbol (stream, bstate, input, input_end,
1462 inorder, base, limit, *minlen, *maxlen,
1463 & mtf, DJW_TOTAL_CODES))) { return ret; }
1464
1465 if (mtf <= RUN_1)
1466 {
1467 /* Repetition. */
1468 rep = ((mtf + 1) << s);
1469 mtf = 0;
1470 s += 1;
1471 }
1472 else
1473 {
1474 /* Remove the RUN_1 MTF offset. */
1475 mtf -= 1;
1476 s = 0;
1477 }
1478 }
1479
1480 /* If (rep != 0) there were too many codes received. */
1481 if (rep != 0)
1482 {
1483 stream->msg = "secondary decoder invalid repeat code";
1484 return XD3_INTERNAL;
1485 }
1486
1487 return 0;
1488}
1489
1490static INLINE int
1491djw_decode_prefix (xd3_stream *stream,
1492 bit_state *bstate,
1493 const uint8_t **input,
1494 const uint8_t *input_end,
1495 const uint8_t *cl_inorder,
1496 const uint *cl_base,
1497 const uint *cl_limit,
1498 const uint *cl_minlen,
1499 const uint *cl_maxlen,
1500 uint8_t *cl_mtf,
1501 usize_t groups,
1502 uint8_t *clen)
1503{
1504 return djw_decode_1_2 (stream, bstate, input, input_end,
1505 cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen, cl_mtf,
1506 ALPHABET_SIZE * groups, ALPHABET_SIZE, clen);
1507}
1508
1509static int
1510xd3_decode_huff (xd3_stream *stream,
1511 djw_stream *h,
1512 const uint8_t **input_pos,
1513 const uint8_t *const input_end,
1514 uint8_t **output_pos,
1515 const uint8_t *const output_end)
1516{
1517 const uint8_t *input = *input_pos;
1518 uint8_t *output = *output_pos;
1519 bit_state bstate = BIT_STATE_DECODE_INIT;
1520 uint8_t *sel_group = NULL;
1521 usize_t groups, gp;
1522 usize_t output_bytes = (output_end - output);
1523 usize_t sector_size;
1524 usize_t sectors;
1525 int ret;
1526
1527 /* Invalid input. */
1528 if (output_bytes == 0)
1529 {
1530 stream->msg = "secondary decoder invalid input";
1531 return XD3_INTERNAL;
1532 }
1533
1534 /* Decode: number of groups */
1535 if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GROUP_BITS, & groups))) { goto fail; }
1536
1537 groups += 1;
1538
1539 if (groups > 1)
1540 {
1541 /* Decode: group size */
1542 if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_SECTORSZ_BITS, & sector_size))) { goto fail; }
1543
1544 sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT;
1545 }
1546 else
1547 {
1548 /* Default for groups == 1 */
1549 sector_size = output_bytes;
1550 }
1551
1552 sectors = 1 + (output_bytes - 1) / sector_size;
1553
1554 /* @!@ In the case of groups==1, lots of extra stack space gets used here. Could
1555 * dynamically allocate this memory, which would help with excess parameter passing,
1556 * too. Passing too many parameters in this file, simplify it! */
1557
1558 /* Outer scope: per-group symbol decoder tables. */
1559 {
1560 uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE];
1561 uint base [DJW_MAX_GROUPS][DJW_MAX_CODELEN+2];
1562 uint limit [DJW_MAX_GROUPS][DJW_MAX_CODELEN+2];
1563 uint minlen [DJW_MAX_GROUPS];
1564 uint maxlen [DJW_MAX_GROUPS];
1565
1566 /* Nested scope: code length decoder tables. */
1567 {
1568 uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE];
1569 uint8_t cl_inorder[DJW_TOTAL_CODES];
1570 uint cl_base [DJW_MAX_CLCLEN+2];
1571 uint cl_limit [DJW_MAX_CLCLEN+2];
1572 uint8_t cl_mtf [DJW_TOTAL_CODES];
1573 uint cl_minlen;
1574 uint cl_maxlen;
1575
1576 /* Compute the code length decoder. */
1577 if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end,
1578 cl_inorder, cl_base, cl_limit, & cl_minlen,
1579 & cl_maxlen, cl_mtf))) { goto fail; }
1580
1581 /* Now decode each group decoder. */
1582 if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end,
1583 cl_inorder, cl_base, cl_limit,
1584 & cl_minlen, & cl_maxlen, cl_mtf,
1585 groups, clen[0]))) { goto fail; }
1586
1587 /* Prepare the actual decoding tables. */
1588 for (gp = 0; gp < groups; gp += 1)
1589 {
1590 djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN,
1591 clen[gp], inorder[gp], base[gp], limit[gp],
1592 & minlen[gp], & maxlen[gp]);
1593 }
1594 }
1595
1596 /* Decode: selector clens. */
1597 {
1598 uint8_t sel_inorder[DJW_MAX_GROUPS+2];
1599 uint sel_base [DJW_MAX_GBCLEN+2];
1600 uint sel_limit [DJW_MAX_GBCLEN+2];
1601 uint8_t sel_mtf [DJW_MAX_GROUPS+2];
1602 uint sel_minlen;
1603 uint sel_maxlen;
1604
1605 /* Setup group selection. */
1606 if (groups > 1)
1607 {
1608 uint8_t sel_clen[DJW_MAX_GROUPS+1];
1609
1610 for (gp = 0; gp < groups+1; gp += 1)
1611 {
1612 usize_t value;
1613
1614 if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GBCLEN_BITS, & value))) { goto fail; }
1615
1616 sel_clen[gp] = value;
1617 sel_mtf[gp] = gp;
1618 }
1619
1620 if ((sel_group = xd3_alloc (stream, sectors, 1)) == NULL) { ret = ENOMEM; goto fail; }
1621
1622 djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen,
1623 sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen);
1624
1625 if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end,
1626 sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen, sel_mtf,
1627 sectors, 0, sel_group))) { goto fail; }
1628 }
1629
1630 /* Now decode each sector. */
1631 {
1632 uint8_t *gp_inorder = inorder[0]; /* Initialize for (groups==1) case. */
1633 uint *gp_base = base[0];
1634 uint *gp_limit = limit[0];
1635 uint gp_minlen = minlen[0];
1636 uint gp_maxlen = maxlen[0];
1637 usize_t c;
1638
1639 for (c = 0; c < sectors; c += 1)
1640 {
1641 usize_t n;
1642
1643 if (groups >= 2)
1644 {
1645 gp = sel_group[c];
1646
1647 XD3_ASSERT (gp < groups);
1648
1649 gp_inorder = inorder[gp];
1650 gp_base = base[gp];
1651 gp_limit = limit[gp];
1652 gp_minlen = minlen[gp];
1653 gp_maxlen = maxlen[gp];
1654 }
1655
1656 XD3_ASSERT (output_end - output > 0);
1657
1658 /* Decode next sector. */
1659 n = min (sector_size, (usize_t) (output_end - output));
1660
1661 do
1662 {
1663 usize_t sym;
1664
1665 if ((ret = djw_decode_symbol (stream, & bstate, & input, input_end,
1666 gp_inorder, gp_base, gp_limit, gp_minlen, gp_maxlen,
1667 & sym, ALPHABET_SIZE))) { goto fail; }
1668
1669 *output++ = sym;
1670 }
1671 while (--n);
1672 }
1673 }
1674 }
1675 }
1676
1677 IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) { goto fail; });
1678 XD3_ASSERT (ret == 0);
1679
1680 fail:
1681 xd3_free (stream, sel_group);
1682
1683 (*input_pos) = input;
1684 (*output_pos) = output;
1685 return ret;
1686}
1687
1688/*********************************************************************/
1689/* TUNING */
1690/*********************************************************************/
1691
1692#if TUNE_HUFFMAN && XD3_ENCODER
1693#include <stdio.h>
1694#include "xdelta3-fgk.h"
1695
1696static uint
1697xd3_bitsof_output (xd3_output *output, bit_state *bstate)
1698{
1699 uint x = 0;
1700 uint m = bstate->cur_mask;
1701
1702 while (m != 1)
1703 {
1704 x += 1;
1705 m >>= 1;
1706 }
1707
1708 return x + 8 * xd3_sizeof_output (output);
1709}
1710
1711static const char* xd3_sect_type (xd3_section_type type)
1712{
1713 switch (type)
1714 {
1715 case DATA_SECTION: return "DATA";
1716 case INST_SECTION: return "INST";
1717 case ADDR_SECTION: return "ADDR";
1718 }
1719 abort ();
1720}
1721
1722static int
1723xd3_encode_huff (xd3_stream *stream,
1724 djw_stream *h,
1725 xd3_output *input,
1726 xd3_output *unused_output,
1727 xd3_sec_cfg *cfg)
1728{
1729 int ret = 0;
1730 int input_size = xd3_sizeof_output (input);
1731 static int hdr = 0;
1732 const char *sect_type = xd3_sect_type (cfg->data_type);
1733 xd3_output *output;
1734 usize_t output_size;
1735
1736 if (hdr == 0) { hdr = 1; DP(RINT "____ SECT INSZ SECTORSZ GPNO OUTSZ PREFIX SELECT ENCODE\n"); }
1737
1738 DP(RINT "SECTION %s %u\n", sect_type, input_size);
1739
1740 {
1741 int gp, i;
1742 int best_size = 99999999;
1743 usize_t best_prefix = 0, best_select = 0, best_encode = 0, best_sector_size = 0;
1744 int best_gpno = -1;
1745 const char *t12 = "12";
1746 usize_t clen_count[DJW_MAX_CODELEN+1];
1747 djw_weight best_freq[DJW_TOTAL_CODES];
1748
1749 for (cfg->ngroups = 1; cfg->ngroups <= /*1*/ DJW_MAX_GROUPS; cfg->ngroups += 1)
1750 {
1751 for (cfg->sector_size = 10; cfg->sector_size <= DJW_SECTORSZ_MAX; cfg->sector_size += 10)
1752 {
1753 output = xd3_alloc_output (stream, NULL);
1754
1755 if ((ret = xd3_real_encode_huff (stream, h, input, output, cfg))) { goto fail; }
1756
1757 output_size = xd3_sizeof_output (output);
1758
1759 if (output_size < best_size)
1760 {
1761 best_size = output_size;
1762 best_gpno = cfg->ngroups;
1763 best_prefix = tune_prefix_bits;
1764 best_select = tune_select_bits;
1765 best_encode = tune_encode_bits;
1766 best_sector_size = cfg->sector_size;
1767 memset (clen_count, 0, sizeof (clen_count));
1768
1769 for (gp = 0; gp < cfg->ngroups; gp += 1)
1770 {
1771 for (i = 0; i < ALPHABET_SIZE; i += 1)
1772 {
1773 clen_count[tune_clen[gp][i]] += 1;
1774 }
1775 }
1776
1777 memcpy (best_freq, tune_freq, sizeof (tune_freq));
1778
1779 XD3_ASSERT (sizeof (tune_freq) == sizeof (mtf_freq));
1780 }
1781
1782 if (1)
1783 {
1784 DP(RINT "COMP%s %u %u %u %u %u %u\n",
1785 t12, cfg->ngroups, cfg->sector_size,
1786 output_size, tune_prefix_bits, tune_select_bits, tune_encode_bits);
1787 }
1788 else
1789 {
1790 fail:
1791 DP(RINT "COMP%s %u %u %u %u %u %u\n",
1792 t12, cfg->ngroups, cfg->sector_size,
1793 input_size, 0, 0, 0);
1794 }
1795
1796 xd3_free_output (stream, output);
1797
1798 XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND);
1799
1800 if (cfg->ngroups == 1) { break; }
1801 }
1802 }
1803
1804 if (best_gpno > 0)
1805 {
1806 DP(RINT "BEST%s %u %u %u %u %u %u\n",
1807 t12, best_gpno, best_sector_size,
1808 best_size, best_prefix, best_select, best_encode);
1809
1810#if 0
1811 DP(RINT "CLEN%s ", t12);
1812 for (i = 1; i <= DJW_MAX_CODELEN; i += 1)
1813 {
1814 DP(RINT "%u ", clen_count[i]);
1815 }
1816 DP(RINT "\n");
1817
1818 DP(RINT "FREQ%s ", t12);
1819 for (i = 0; i < DJW_TOTAL_CODES; i += 1)
1820 {
1821 DP(RINT "%u ", tune_freq[i]);
1822 }
1823 DP(RINT "\n");
1824#endif
1825 }
1826 }
1827
1828 /* Compare to split single-table windows. */
1829 {
1830 int parts, i;
1831
1832 cfg->ngroups = 1;
1833
1834 for (parts = 2; parts <= DJW_MAX_GROUPS; parts += 1)
1835 {
1836 usize_t part_size = input_size / parts;
1837 xd3_output *inp = input, *partin, *partin_head;
1838 usize_t off = 0;
1839 usize_t part_total = 0;
1840
1841 if (part_size < 1000) { break; }
1842
1843 for (i = 0; i < parts; i += 1)
1844 {
1845 usize_t inc;
1846
1847 partin = partin_head = xd3_alloc_output (stream, NULL);
1848 output = xd3_alloc_output (stream, NULL);
1849
1850 for (inc = 0; ((i < parts-1) && inc < part_size) ||
1851 ((i == parts-1) && inp != NULL); )
1852 {
1853 usize_t take;
1854
1855 if (i < parts-1)
1856 {
1857 take = min (part_size - inc, inp->next - off);
1858 }
1859 else
1860 {
1861 take = inp->next - off;
1862 }
1863
1864 ret = xd3_emit_bytes (stream, & partin, inp->base + off, take);
1865
1866 off += take;
1867 inc += take;
1868
1869 if (off == inp->next)
1870 {
1871 inp = inp->next_page;
1872 off = 0;
1873 }
1874 }
1875
1876 ret = xd3_real_encode_huff (stream, h, partin_head, output, cfg);
1877
1878 part_total += xd3_sizeof_output (output);
1879
1880 xd3_free_output (stream, partin_head);
1881 xd3_free_output (stream, output);
1882
1883 XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND);
1884
1885 if (ret == XD3_NOSECOND)
1886 {
1887 break;
1888 }
1889 }
1890
1891 if (ret != XD3_NOSECOND)
1892 {
1893 DP(RINT "PART %u %u\n", parts, part_total);
1894 }
1895 }
1896 }
1897
1898 /* Compare to FGK */
1899 {
1900 fgk_stream *fgk = fgk_alloc (stream);
1901
1902 fgk_init (fgk);
1903
1904 output = xd3_alloc_output (stream, NULL);
1905
1906 ret = xd3_encode_fgk (stream, fgk, input, output, NULL);
1907
1908 output_size = xd3_sizeof_output (output);
1909 xd3_free_output (stream, output);
1910 fgk_destroy (stream, fgk);
1911
1912 XD3_ASSERT (ret == 0);
1913
1914 DP(RINT "FGK %u\n", output_size);
1915 }
1916
1917 DP(RINT "END_SECTION %s %u\n", sect_type, input_size);
1918
1919 return 0;
1920}
1921#endif
1922
1923#endif
Note: See TracBrowser for help on using the repository browser.