source: Vago/zlib-1.2.8/contrib/inflate86/inffas86.c@ 1089

Last change on this file since 1089 was 1049, checked in by s10k, 8 years ago
File size: 39.7 KB
RevLine 
[1049]1/* inffas86.c is a hand tuned assembler version of
2 *
3 * inffast.c -- fast decoding
4 * Copyright (C) 1995-2003 Mark Adler
5 * For conditions of distribution and use, see copyright notice in zlib.h
6 *
7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
8 * Please use the copyright conditions above.
9 *
10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
14 * from http://fedora.linux.duke.edu/fc1_x86_64
15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
17 * when decompressing mozilla-source-1.3.tar.gz.
18 *
19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
21 * the moment. I have successfully compiled and tested this code with gcc2.96,
22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
24 * enabled. I will attempt to merge the MMX code into this version. Newer
25 * versions of this and inffast.S can be found at
26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
27 */
28
29#include "zutil.h"
30#include "inftrees.h"
31#include "inflate.h"
32#include "inffast.h"
33
34/* Mark Adler's comments from inffast.c: */
35
36/*
37 Decode literal, length, and distance codes and write out the resulting
38 literal and match bytes until either not enough input or output is
39 available, an end-of-block is encountered, or a data error is encountered.
40 When large enough input and output buffers are supplied to inflate(), for
41 example, a 16K input buffer and a 64K output buffer, more than 95% of the
42 inflate execution time is spent in this routine.
43
44 Entry assumptions:
45
46 state->mode == LEN
47 strm->avail_in >= 6
48 strm->avail_out >= 258
49 start >= strm->avail_out
50 state->bits < 8
51
52 On return, state->mode is one of:
53
54 LEN -- ran out of enough output space or enough available input
55 TYPE -- reached end of block code, inflate() to interpret next block
56 BAD -- error in block data
57
58 Notes:
59
60 - The maximum input bits used by a length/distance pair is 15 bits for the
61 length code, 5 bits for the length extra, 15 bits for the distance code,
62 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
63 Therefore if strm->avail_in >= 6, then there is enough input to avoid
64 checking for available input while decoding.
65
66 - The maximum bytes that a single length/distance pair can output is 258
67 bytes, which is the maximum length that can be coded. inflate_fast()
68 requires strm->avail_out >= 258 for each loop to avoid checking for
69 output space.
70 */
71void inflate_fast(strm, start)
72z_streamp strm;
73unsigned start; /* inflate()'s starting value for strm->avail_out */
74{
75 struct inflate_state FAR *state;
76 struct inffast_ar {
77/* 64 32 x86 x86_64 */
78/* ar offset register */
79/* 0 0 */ void *esp; /* esp save */
80/* 8 4 */ void *ebp; /* ebp save */
81/* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
82/* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
83/* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
84/* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
85/* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
86/* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
87/* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
88/* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
89/* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
90/* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
91/* 92 48 */ unsigned wsize; /* window size */
92/* 96 52 */ unsigned write; /* window write index */
93/*100 56 */ unsigned lmask; /* r12 mask for lcode */
94/*104 60 */ unsigned dmask; /* r13 mask for dcode */
95/*108 64 */ unsigned len; /* r14 match length */
96/*112 68 */ unsigned dist; /* r15 match distance */
97/*116 72 */ unsigned status; /* set when state chng*/
98 } ar;
99
100#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
101#define PAD_AVAIL_IN 6
102#define PAD_AVAIL_OUT 258
103#else
104#define PAD_AVAIL_IN 5
105#define PAD_AVAIL_OUT 257
106#endif
107
108 /* copy state to local variables */
109 state = (struct inflate_state FAR *)strm->state;
110 ar.in = strm->next_in;
111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
112 ar.out = strm->next_out;
113 ar.beg = ar.out - (start - strm->avail_out);
114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
115 ar.wsize = state->wsize;
116 ar.write = state->wnext;
117 ar.window = state->window;
118 ar.hold = state->hold;
119 ar.bits = state->bits;
120 ar.lcode = state->lencode;
121 ar.dcode = state->distcode;
122 ar.lmask = (1U << state->lenbits) - 1;
123 ar.dmask = (1U << state->distbits) - 1;
124
125 /* decode literals and length/distances until end-of-block or not enough
126 input data or output space */
127
128 /* align in on 1/2 hold size boundary */
129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
130 ar.hold += (unsigned long)*ar.in++ << ar.bits;
131 ar.bits += 8;
132 }
133
134#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
135 __asm__ __volatile__ (
136" leaq %0, %%rax\n"
137" movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
138" movq %%rsp, (%%rax)\n"
139" movq %%rax, %%rsp\n" /* make rsp point to &ar */
140" movq 16(%%rsp), %%rsi\n" /* rsi = in */
141" movq 32(%%rsp), %%rdi\n" /* rdi = out */
142" movq 24(%%rsp), %%r9\n" /* r9 = last */
143" movq 48(%%rsp), %%r10\n" /* r10 = end */
144" movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
145" movq 72(%%rsp), %%r11\n" /* r11 = dcode */
146" movq 80(%%rsp), %%rdx\n" /* rdx = hold */
147" movl 88(%%rsp), %%ebx\n" /* ebx = bits */
148" movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
149" movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
150 /* r14d = len */
151 /* r15d = dist */
152" cld\n"
153" cmpq %%rdi, %%r10\n"
154" je .L_one_time\n" /* if only one decode left */
155" cmpq %%rsi, %%r9\n"
156" je .L_one_time\n"
157" jmp .L_do_loop\n"
158
159".L_one_time:\n"
160" movq %%r12, %%r8\n" /* r8 = lmask */
161" cmpb $32, %%bl\n"
162" ja .L_get_length_code_one_time\n"
163
164" lodsl\n" /* eax = *(uint *)in++ */
165" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
166" addb $32, %%bl\n" /* bits += 32 */
167" shlq %%cl, %%rax\n"
168" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
169" jmp .L_get_length_code_one_time\n"
170
171".align 32,0x90\n"
172".L_while_test:\n"
173" cmpq %%rdi, %%r10\n"
174" jbe .L_break_loop\n"
175" cmpq %%rsi, %%r9\n"
176" jbe .L_break_loop\n"
177
178".L_do_loop:\n"
179" movq %%r12, %%r8\n" /* r8 = lmask */
180" cmpb $32, %%bl\n"
181" ja .L_get_length_code\n" /* if (32 < bits) */
182
183" lodsl\n" /* eax = *(uint *)in++ */
184" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
185" addb $32, %%bl\n" /* bits += 32 */
186" shlq %%cl, %%rax\n"
187" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
188
189".L_get_length_code:\n"
190" andq %%rdx, %%r8\n" /* r8 &= hold */
191" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
192
193" movb %%ah, %%cl\n" /* cl = this.bits */
194" subb %%ah, %%bl\n" /* bits -= this.bits */
195" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
196
197" testb %%al, %%al\n"
198" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
199
200" movq %%r12, %%r8\n" /* r8 = lmask */
201" shrl $16, %%eax\n" /* output this.val char */
202" stosb\n"
203
204".L_get_length_code_one_time:\n"
205" andq %%rdx, %%r8\n" /* r8 &= hold */
206" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
207
208".L_dolen:\n"
209" movb %%ah, %%cl\n" /* cl = this.bits */
210" subb %%ah, %%bl\n" /* bits -= this.bits */
211" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
212
213" testb %%al, %%al\n"
214" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
215
216" shrl $16, %%eax\n" /* output this.val char */
217" stosb\n"
218" jmp .L_while_test\n"
219
220".align 32,0x90\n"
221".L_test_for_length_base:\n"
222" movl %%eax, %%r14d\n" /* len = this */
223" shrl $16, %%r14d\n" /* len = this.val */
224" movb %%al, %%cl\n"
225
226" testb $16, %%al\n"
227" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
228" andb $15, %%cl\n" /* op &= 15 */
229" jz .L_decode_distance\n" /* if (!op) */
230
231".L_add_bits_to_len:\n"
232" subb %%cl, %%bl\n"
233" xorl %%eax, %%eax\n"
234" incl %%eax\n"
235" shll %%cl, %%eax\n"
236" decl %%eax\n"
237" andl %%edx, %%eax\n" /* eax &= hold */
238" shrq %%cl, %%rdx\n"
239" addl %%eax, %%r14d\n" /* len += hold & mask[op] */
240
241".L_decode_distance:\n"
242" movq %%r13, %%r8\n" /* r8 = dmask */
243" cmpb $32, %%bl\n"
244" ja .L_get_distance_code\n" /* if (32 < bits) */
245
246" lodsl\n" /* eax = *(uint *)in++ */
247" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
248" addb $32, %%bl\n" /* bits += 32 */
249" shlq %%cl, %%rax\n"
250" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
251
252".L_get_distance_code:\n"
253" andq %%rdx, %%r8\n" /* r8 &= hold */
254" movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
255
256".L_dodist:\n"
257" movl %%eax, %%r15d\n" /* dist = this */
258" shrl $16, %%r15d\n" /* dist = this.val */
259" movb %%ah, %%cl\n"
260" subb %%ah, %%bl\n" /* bits -= this.bits */
261" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
262" movb %%al, %%cl\n" /* cl = this.op */
263
264" testb $16, %%al\n" /* if ((op & 16) == 0) */
265" jz .L_test_for_second_level_dist\n"
266" andb $15, %%cl\n" /* op &= 15 */
267" jz .L_check_dist_one\n"
268
269".L_add_bits_to_dist:\n"
270" subb %%cl, %%bl\n"
271" xorl %%eax, %%eax\n"
272" incl %%eax\n"
273" shll %%cl, %%eax\n"
274" decl %%eax\n" /* (1 << op) - 1 */
275" andl %%edx, %%eax\n" /* eax &= hold */
276" shrq %%cl, %%rdx\n"
277" addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
278
279".L_check_window:\n"
280" movq %%rsi, %%r8\n" /* save in so from can use it's reg */
281" movq %%rdi, %%rax\n"
282" subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
283
284" cmpl %%r15d, %%eax\n"
285" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
286
287" movl %%r14d, %%ecx\n" /* ecx = len */
288" movq %%rdi, %%rsi\n"
289" subq %%r15, %%rsi\n" /* from = out - dist */
290
291" sarl %%ecx\n"
292" jnc .L_copy_two\n" /* if len % 2 == 0 */
293
294" rep movsw\n"
295" movb (%%rsi), %%al\n"
296" movb %%al, (%%rdi)\n"
297" incq %%rdi\n"
298
299" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
300" jmp .L_while_test\n"
301
302".L_copy_two:\n"
303" rep movsw\n"
304" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
305" jmp .L_while_test\n"
306
307".align 32,0x90\n"
308".L_check_dist_one:\n"
309" cmpl $1, %%r15d\n" /* if dist 1, is a memset */
310" jne .L_check_window\n"
311" cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
312" je .L_check_window\n"
313
314" movl %%r14d, %%ecx\n" /* ecx = len */
315" movb -1(%%rdi), %%al\n"
316" movb %%al, %%ah\n"
317
318" sarl %%ecx\n"
319" jnc .L_set_two\n"
320" movb %%al, (%%rdi)\n"
321" incq %%rdi\n"
322
323".L_set_two:\n"
324" rep stosw\n"
325" jmp .L_while_test\n"
326
327".align 32,0x90\n"
328".L_test_for_second_level_length:\n"
329" testb $64, %%al\n"
330" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
331
332" xorl %%eax, %%eax\n"
333" incl %%eax\n"
334" shll %%cl, %%eax\n"
335" decl %%eax\n"
336" andl %%edx, %%eax\n" /* eax &= hold */
337" addl %%r14d, %%eax\n" /* eax += len */
338" movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
339" jmp .L_dolen\n"
340
341".align 32,0x90\n"
342".L_test_for_second_level_dist:\n"
343" testb $64, %%al\n"
344" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
345
346" xorl %%eax, %%eax\n"
347" incl %%eax\n"
348" shll %%cl, %%eax\n"
349" decl %%eax\n"
350" andl %%edx, %%eax\n" /* eax &= hold */
351" addl %%r15d, %%eax\n" /* eax += dist */
352" movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
353" jmp .L_dodist\n"
354
355".align 32,0x90\n"
356".L_clip_window:\n"
357" movl %%eax, %%ecx\n" /* ecx = nbytes */
358" movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
359" negl %%ecx\n" /* nbytes = -nbytes */
360
361" cmpl %%r15d, %%eax\n"
362" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
363
364" addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
365" cmpl $0, 96(%%rsp)\n"
366" jne .L_wrap_around_window\n" /* if (write != 0) */
367
368" movq 56(%%rsp), %%rsi\n" /* from = window */
369" subl %%ecx, %%eax\n" /* eax -= nbytes */
370" addq %%rax, %%rsi\n" /* from += wsize - nbytes */
371
372" movl %%r14d, %%eax\n" /* eax = len */
373" cmpl %%ecx, %%r14d\n"
374" jbe .L_do_copy\n" /* if (nbytes >= len) */
375
376" subl %%ecx, %%eax\n" /* eax -= nbytes */
377" rep movsb\n"
378" movq %%rdi, %%rsi\n"
379" subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
380" jmp .L_do_copy\n"
381
382".align 32,0x90\n"
383".L_wrap_around_window:\n"
384" movl 96(%%rsp), %%eax\n" /* eax = write */
385" cmpl %%eax, %%ecx\n"
386" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
387
388" movl 92(%%rsp), %%esi\n" /* from = wsize */
389" addq 56(%%rsp), %%rsi\n" /* from += window */
390" addq %%rax, %%rsi\n" /* from += write */
391" subq %%rcx, %%rsi\n" /* from -= nbytes */
392" subl %%eax, %%ecx\n" /* nbytes -= write */
393
394" movl %%r14d, %%eax\n" /* eax = len */
395" cmpl %%ecx, %%eax\n"
396" jbe .L_do_copy\n" /* if (nbytes >= len) */
397
398" subl %%ecx, %%eax\n" /* len -= nbytes */
399" rep movsb\n"
400" movq 56(%%rsp), %%rsi\n" /* from = window */
401" movl 96(%%rsp), %%ecx\n" /* nbytes = write */
402" cmpl %%ecx, %%eax\n"
403" jbe .L_do_copy\n" /* if (nbytes >= len) */
404
405" subl %%ecx, %%eax\n" /* len -= nbytes */
406" rep movsb\n"
407" movq %%rdi, %%rsi\n"
408" subq %%r15, %%rsi\n" /* from = out - dist */
409" jmp .L_do_copy\n"
410
411".align 32,0x90\n"
412".L_contiguous_in_window:\n"
413" movq 56(%%rsp), %%rsi\n" /* rsi = window */
414" addq %%rax, %%rsi\n"
415" subq %%rcx, %%rsi\n" /* from += write - nbytes */
416
417" movl %%r14d, %%eax\n" /* eax = len */
418" cmpl %%ecx, %%eax\n"
419" jbe .L_do_copy\n" /* if (nbytes >= len) */
420
421" subl %%ecx, %%eax\n" /* len -= nbytes */
422" rep movsb\n"
423" movq %%rdi, %%rsi\n"
424" subq %%r15, %%rsi\n" /* from = out - dist */
425" jmp .L_do_copy\n" /* if (nbytes >= len) */
426
427".align 32,0x90\n"
428".L_do_copy:\n"
429" movl %%eax, %%ecx\n" /* ecx = len */
430" rep movsb\n"
431
432" movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
433" jmp .L_while_test\n"
434
435".L_test_for_end_of_block:\n"
436" testb $32, %%al\n"
437" jz .L_invalid_literal_length_code\n"
438" movl $1, 116(%%rsp)\n"
439" jmp .L_break_loop_with_status\n"
440
441".L_invalid_literal_length_code:\n"
442" movl $2, 116(%%rsp)\n"
443" jmp .L_break_loop_with_status\n"
444
445".L_invalid_distance_code:\n"
446" movl $3, 116(%%rsp)\n"
447" jmp .L_break_loop_with_status\n"
448
449".L_invalid_distance_too_far:\n"
450" movl $4, 116(%%rsp)\n"
451" jmp .L_break_loop_with_status\n"
452
453".L_break_loop:\n"
454" movl $0, 116(%%rsp)\n"
455
456".L_break_loop_with_status:\n"
457/* put in, out, bits, and hold back into ar and pop esp */
458" movq %%rsi, 16(%%rsp)\n" /* in */
459" movq %%rdi, 32(%%rsp)\n" /* out */
460" movl %%ebx, 88(%%rsp)\n" /* bits */
461" movq %%rdx, 80(%%rsp)\n" /* hold */
462" movq (%%rsp), %%rax\n" /* restore rbp and rsp */
463" movq 8(%%rsp), %%rbp\n"
464" movq %%rax, %%rsp\n"
465 :
466 : "m" (ar)
467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
469 );
470#elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
471 __asm__ __volatile__ (
472" leal %0, %%eax\n"
473" movl %%esp, (%%eax)\n" /* save esp, ebp */
474" movl %%ebp, 4(%%eax)\n"
475" movl %%eax, %%esp\n"
476" movl 8(%%esp), %%esi\n" /* esi = in */
477" movl 16(%%esp), %%edi\n" /* edi = out */
478" movl 40(%%esp), %%edx\n" /* edx = hold */
479" movl 44(%%esp), %%ebx\n" /* ebx = bits */
480" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
481
482" cld\n"
483" jmp .L_do_loop\n"
484
485".align 32,0x90\n"
486".L_while_test:\n"
487" cmpl %%edi, 24(%%esp)\n" /* out < end */
488" jbe .L_break_loop\n"
489" cmpl %%esi, 12(%%esp)\n" /* in < last */
490" jbe .L_break_loop\n"
491
492".L_do_loop:\n"
493" cmpb $15, %%bl\n"
494" ja .L_get_length_code\n" /* if (15 < bits) */
495
496" xorl %%eax, %%eax\n"
497" lodsw\n" /* al = *(ushort *)in++ */
498" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
499" addb $16, %%bl\n" /* bits += 16 */
500" shll %%cl, %%eax\n"
501" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
502
503".L_get_length_code:\n"
504" movl 56(%%esp), %%eax\n" /* eax = lmask */
505" andl %%edx, %%eax\n" /* eax &= hold */
506" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
507
508".L_dolen:\n"
509" movb %%ah, %%cl\n" /* cl = this.bits */
510" subb %%ah, %%bl\n" /* bits -= this.bits */
511" shrl %%cl, %%edx\n" /* hold >>= this.bits */
512
513" testb %%al, %%al\n"
514" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
515
516" shrl $16, %%eax\n" /* output this.val char */
517" stosb\n"
518" jmp .L_while_test\n"
519
520".align 32,0x90\n"
521".L_test_for_length_base:\n"
522" movl %%eax, %%ecx\n" /* len = this */
523" shrl $16, %%ecx\n" /* len = this.val */
524" movl %%ecx, 64(%%esp)\n" /* save len */
525" movb %%al, %%cl\n"
526
527" testb $16, %%al\n"
528" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
529" andb $15, %%cl\n" /* op &= 15 */
530" jz .L_decode_distance\n" /* if (!op) */
531" cmpb %%cl, %%bl\n"
532" jae .L_add_bits_to_len\n" /* if (op <= bits) */
533
534" movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
535" xorl %%eax, %%eax\n"
536" lodsw\n" /* al = *(ushort *)in++ */
537" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
538" addb $16, %%bl\n" /* bits += 16 */
539" shll %%cl, %%eax\n"
540" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
541" movb %%ch, %%cl\n" /* move op back to ecx */
542
543".L_add_bits_to_len:\n"
544" subb %%cl, %%bl\n"
545" xorl %%eax, %%eax\n"
546" incl %%eax\n"
547" shll %%cl, %%eax\n"
548" decl %%eax\n"
549" andl %%edx, %%eax\n" /* eax &= hold */
550" shrl %%cl, %%edx\n"
551" addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
552
553".L_decode_distance:\n"
554" cmpb $15, %%bl\n"
555" ja .L_get_distance_code\n" /* if (15 < bits) */
556
557" xorl %%eax, %%eax\n"
558" lodsw\n" /* al = *(ushort *)in++ */
559" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
560" addb $16, %%bl\n" /* bits += 16 */
561" shll %%cl, %%eax\n"
562" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
563
564".L_get_distance_code:\n"
565" movl 60(%%esp), %%eax\n" /* eax = dmask */
566" movl 36(%%esp), %%ecx\n" /* ecx = dcode */
567" andl %%edx, %%eax\n" /* eax &= hold */
568" movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
569
570".L_dodist:\n"
571" movl %%eax, %%ebp\n" /* dist = this */
572" shrl $16, %%ebp\n" /* dist = this.val */
573" movb %%ah, %%cl\n"
574" subb %%ah, %%bl\n" /* bits -= this.bits */
575" shrl %%cl, %%edx\n" /* hold >>= this.bits */
576" movb %%al, %%cl\n" /* cl = this.op */
577
578" testb $16, %%al\n" /* if ((op & 16) == 0) */
579" jz .L_test_for_second_level_dist\n"
580" andb $15, %%cl\n" /* op &= 15 */
581" jz .L_check_dist_one\n"
582" cmpb %%cl, %%bl\n"
583" jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
584
585" movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
586" xorl %%eax, %%eax\n"
587" lodsw\n" /* al = *(ushort *)in++ */
588" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
589" addb $16, %%bl\n" /* bits += 16 */
590" shll %%cl, %%eax\n"
591" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
592" movb %%ch, %%cl\n" /* move op back to ecx */
593
594".L_add_bits_to_dist:\n"
595" subb %%cl, %%bl\n"
596" xorl %%eax, %%eax\n"
597" incl %%eax\n"
598" shll %%cl, %%eax\n"
599" decl %%eax\n" /* (1 << op) - 1 */
600" andl %%edx, %%eax\n" /* eax &= hold */
601" shrl %%cl, %%edx\n"
602" addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
603
604".L_check_window:\n"
605" movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
606" movl %%edi, %%eax\n"
607" subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
608
609" cmpl %%ebp, %%eax\n"
610" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
611
612" movl 64(%%esp), %%ecx\n" /* ecx = len */
613" movl %%edi, %%esi\n"
614" subl %%ebp, %%esi\n" /* from = out - dist */
615
616" sarl %%ecx\n"
617" jnc .L_copy_two\n" /* if len % 2 == 0 */
618
619" rep movsw\n"
620" movb (%%esi), %%al\n"
621" movb %%al, (%%edi)\n"
622" incl %%edi\n"
623
624" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
625" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
626" jmp .L_while_test\n"
627
628".L_copy_two:\n"
629" rep movsw\n"
630" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
631" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
632" jmp .L_while_test\n"
633
634".align 32,0x90\n"
635".L_check_dist_one:\n"
636" cmpl $1, %%ebp\n" /* if dist 1, is a memset */
637" jne .L_check_window\n"
638" cmpl %%edi, 20(%%esp)\n"
639" je .L_check_window\n" /* out == beg, if outside window */
640
641" movl 64(%%esp), %%ecx\n" /* ecx = len */
642" movb -1(%%edi), %%al\n"
643" movb %%al, %%ah\n"
644
645" sarl %%ecx\n"
646" jnc .L_set_two\n"
647" movb %%al, (%%edi)\n"
648" incl %%edi\n"
649
650".L_set_two:\n"
651" rep stosw\n"
652" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
653" jmp .L_while_test\n"
654
655".align 32,0x90\n"
656".L_test_for_second_level_length:\n"
657" testb $64, %%al\n"
658" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
659
660" xorl %%eax, %%eax\n"
661" incl %%eax\n"
662" shll %%cl, %%eax\n"
663" decl %%eax\n"
664" andl %%edx, %%eax\n" /* eax &= hold */
665" addl 64(%%esp), %%eax\n" /* eax += len */
666" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
667" jmp .L_dolen\n"
668
669".align 32,0x90\n"
670".L_test_for_second_level_dist:\n"
671" testb $64, %%al\n"
672" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
673
674" xorl %%eax, %%eax\n"
675" incl %%eax\n"
676" shll %%cl, %%eax\n"
677" decl %%eax\n"
678" andl %%edx, %%eax\n" /* eax &= hold */
679" addl %%ebp, %%eax\n" /* eax += dist */
680" movl 36(%%esp), %%ecx\n" /* ecx = dcode */
681" movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
682" jmp .L_dodist\n"
683
684".align 32,0x90\n"
685".L_clip_window:\n"
686" movl %%eax, %%ecx\n"
687" movl 48(%%esp), %%eax\n" /* eax = wsize */
688" negl %%ecx\n" /* nbytes = -nbytes */
689" movl 28(%%esp), %%esi\n" /* from = window */
690
691" cmpl %%ebp, %%eax\n"
692" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
693
694" addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
695" cmpl $0, 52(%%esp)\n"
696" jne .L_wrap_around_window\n" /* if (write != 0) */
697
698" subl %%ecx, %%eax\n"
699" addl %%eax, %%esi\n" /* from += wsize - nbytes */
700
701" movl 64(%%esp), %%eax\n" /* eax = len */
702" cmpl %%ecx, %%eax\n"
703" jbe .L_do_copy\n" /* if (nbytes >= len) */
704
705" subl %%ecx, %%eax\n" /* len -= nbytes */
706" rep movsb\n"
707" movl %%edi, %%esi\n"
708" subl %%ebp, %%esi\n" /* from = out - dist */
709" jmp .L_do_copy\n"
710
711".align 32,0x90\n"
712".L_wrap_around_window:\n"
713" movl 52(%%esp), %%eax\n" /* eax = write */
714" cmpl %%eax, %%ecx\n"
715" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
716
717" addl 48(%%esp), %%esi\n" /* from += wsize */
718" addl %%eax, %%esi\n" /* from += write */
719" subl %%ecx, %%esi\n" /* from -= nbytes */
720" subl %%eax, %%ecx\n" /* nbytes -= write */
721
722" movl 64(%%esp), %%eax\n" /* eax = len */
723" cmpl %%ecx, %%eax\n"
724" jbe .L_do_copy\n" /* if (nbytes >= len) */
725
726" subl %%ecx, %%eax\n" /* len -= nbytes */
727" rep movsb\n"
728" movl 28(%%esp), %%esi\n" /* from = window */
729" movl 52(%%esp), %%ecx\n" /* nbytes = write */
730" cmpl %%ecx, %%eax\n"
731" jbe .L_do_copy\n" /* if (nbytes >= len) */
732
733" subl %%ecx, %%eax\n" /* len -= nbytes */
734" rep movsb\n"
735" movl %%edi, %%esi\n"
736" subl %%ebp, %%esi\n" /* from = out - dist */
737" jmp .L_do_copy\n"
738
739".align 32,0x90\n"
740".L_contiguous_in_window:\n"
741" addl %%eax, %%esi\n"
742" subl %%ecx, %%esi\n" /* from += write - nbytes */
743
744" movl 64(%%esp), %%eax\n" /* eax = len */
745" cmpl %%ecx, %%eax\n"
746" jbe .L_do_copy\n" /* if (nbytes >= len) */
747
748" subl %%ecx, %%eax\n" /* len -= nbytes */
749" rep movsb\n"
750" movl %%edi, %%esi\n"
751" subl %%ebp, %%esi\n" /* from = out - dist */
752" jmp .L_do_copy\n" /* if (nbytes >= len) */
753
754".align 32,0x90\n"
755".L_do_copy:\n"
756" movl %%eax, %%ecx\n"
757" rep movsb\n"
758
759" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
760" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
761" jmp .L_while_test\n"
762
763".L_test_for_end_of_block:\n"
764" testb $32, %%al\n"
765" jz .L_invalid_literal_length_code\n"
766" movl $1, 72(%%esp)\n"
767" jmp .L_break_loop_with_status\n"
768
769".L_invalid_literal_length_code:\n"
770" movl $2, 72(%%esp)\n"
771" jmp .L_break_loop_with_status\n"
772
773".L_invalid_distance_code:\n"
774" movl $3, 72(%%esp)\n"
775" jmp .L_break_loop_with_status\n"
776
777".L_invalid_distance_too_far:\n"
778" movl 8(%%esp), %%esi\n"
779" movl $4, 72(%%esp)\n"
780" jmp .L_break_loop_with_status\n"
781
782".L_break_loop:\n"
783" movl $0, 72(%%esp)\n"
784
785".L_break_loop_with_status:\n"
786/* put in, out, bits, and hold back into ar and pop esp */
787" movl %%esi, 8(%%esp)\n" /* save in */
788" movl %%edi, 16(%%esp)\n" /* save out */
789" movl %%ebx, 44(%%esp)\n" /* save bits */
790" movl %%edx, 40(%%esp)\n" /* save hold */
791" movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
792" movl (%%esp), %%esp\n"
793 :
794 : "m" (ar)
795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
796 );
797#elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
798 __asm {
799 lea eax, ar
800 mov [eax], esp /* save esp, ebp */
801 mov [eax+4], ebp
802 mov esp, eax
803 mov esi, [esp+8] /* esi = in */
804 mov edi, [esp+16] /* edi = out */
805 mov edx, [esp+40] /* edx = hold */
806 mov ebx, [esp+44] /* ebx = bits */
807 mov ebp, [esp+32] /* ebp = lcode */
808
809 cld
810 jmp L_do_loop
811
812ALIGN 4
813L_while_test:
814 cmp [esp+24], edi
815 jbe L_break_loop
816 cmp [esp+12], esi
817 jbe L_break_loop
818
819L_do_loop:
820 cmp bl, 15
821 ja L_get_length_code /* if (15 < bits) */
822
823 xor eax, eax
824 lodsw /* al = *(ushort *)in++ */
825 mov cl, bl /* cl = bits, needs it for shifting */
826 add bl, 16 /* bits += 16 */
827 shl eax, cl
828 or edx, eax /* hold |= *((ushort *)in)++ << bits */
829
830L_get_length_code:
831 mov eax, [esp+56] /* eax = lmask */
832 and eax, edx /* eax &= hold */
833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
834
835L_dolen:
836 mov cl, ah /* cl = this.bits */
837 sub bl, ah /* bits -= this.bits */
838 shr edx, cl /* hold >>= this.bits */
839
840 test al, al
841 jnz L_test_for_length_base /* if (op != 0) 45.7% */
842
843 shr eax, 16 /* output this.val char */
844 stosb
845 jmp L_while_test
846
847ALIGN 4
848L_test_for_length_base:
849 mov ecx, eax /* len = this */
850 shr ecx, 16 /* len = this.val */
851 mov [esp+64], ecx /* save len */
852 mov cl, al
853
854 test al, 16
855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
856 and cl, 15 /* op &= 15 */
857 jz L_decode_distance /* if (!op) */
858 cmp bl, cl
859 jae L_add_bits_to_len /* if (op <= bits) */
860
861 mov ch, cl /* stash op in ch, freeing cl */
862 xor eax, eax
863 lodsw /* al = *(ushort *)in++ */
864 mov cl, bl /* cl = bits, needs it for shifting */
865 add bl, 16 /* bits += 16 */
866 shl eax, cl
867 or edx, eax /* hold |= *((ushort *)in)++ << bits */
868 mov cl, ch /* move op back to ecx */
869
870L_add_bits_to_len:
871 sub bl, cl
872 xor eax, eax
873 inc eax
874 shl eax, cl
875 dec eax
876 and eax, edx /* eax &= hold */
877 shr edx, cl
878 add [esp+64], eax /* len += hold & mask[op] */
879
880L_decode_distance:
881 cmp bl, 15
882 ja L_get_distance_code /* if (15 < bits) */
883
884 xor eax, eax
885 lodsw /* al = *(ushort *)in++ */
886 mov cl, bl /* cl = bits, needs it for shifting */
887 add bl, 16 /* bits += 16 */
888 shl eax, cl
889 or edx, eax /* hold |= *((ushort *)in)++ << bits */
890
891L_get_distance_code:
892 mov eax, [esp+60] /* eax = dmask */
893 mov ecx, [esp+36] /* ecx = dcode */
894 and eax, edx /* eax &= hold */
895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
896
897L_dodist:
898 mov ebp, eax /* dist = this */
899 shr ebp, 16 /* dist = this.val */
900 mov cl, ah
901 sub bl, ah /* bits -= this.bits */
902 shr edx, cl /* hold >>= this.bits */
903 mov cl, al /* cl = this.op */
904
905 test al, 16 /* if ((op & 16) == 0) */
906 jz L_test_for_second_level_dist
907 and cl, 15 /* op &= 15 */
908 jz L_check_dist_one
909 cmp bl, cl
910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
911
912 mov ch, cl /* stash op in ch, freeing cl */
913 xor eax, eax
914 lodsw /* al = *(ushort *)in++ */
915 mov cl, bl /* cl = bits, needs it for shifting */
916 add bl, 16 /* bits += 16 */
917 shl eax, cl
918 or edx, eax /* hold |= *((ushort *)in)++ << bits */
919 mov cl, ch /* move op back to ecx */
920
921L_add_bits_to_dist:
922 sub bl, cl
923 xor eax, eax
924 inc eax
925 shl eax, cl
926 dec eax /* (1 << op) - 1 */
927 and eax, edx /* eax &= hold */
928 shr edx, cl
929 add ebp, eax /* dist += hold & ((1 << op) - 1) */
930
931L_check_window:
932 mov [esp+8], esi /* save in so from can use it's reg */
933 mov eax, edi
934 sub eax, [esp+20] /* nbytes = out - beg */
935
936 cmp eax, ebp
937 jb L_clip_window /* if (dist > nbytes) 4.2% */
938
939 mov ecx, [esp+64] /* ecx = len */
940 mov esi, edi
941 sub esi, ebp /* from = out - dist */
942
943 sar ecx, 1
944 jnc L_copy_two
945
946 rep movsw
947 mov al, [esi]
948 mov [edi], al
949 inc edi
950
951 mov esi, [esp+8] /* move in back to %esi, toss from */
952 mov ebp, [esp+32] /* ebp = lcode */
953 jmp L_while_test
954
955L_copy_two:
956 rep movsw
957 mov esi, [esp+8] /* move in back to %esi, toss from */
958 mov ebp, [esp+32] /* ebp = lcode */
959 jmp L_while_test
960
961ALIGN 4
962L_check_dist_one:
963 cmp ebp, 1 /* if dist 1, is a memset */
964 jne L_check_window
965 cmp [esp+20], edi
966 je L_check_window /* out == beg, if outside window */
967
968 mov ecx, [esp+64] /* ecx = len */
969 mov al, [edi-1]
970 mov ah, al
971
972 sar ecx, 1
973 jnc L_set_two
974 mov [edi], al /* memset out with from[-1] */
975 inc edi
976
977L_set_two:
978 rep stosw
979 mov ebp, [esp+32] /* ebp = lcode */
980 jmp L_while_test
981
982ALIGN 4
983L_test_for_second_level_length:
984 test al, 64
985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
986
987 xor eax, eax
988 inc eax
989 shl eax, cl
990 dec eax
991 and eax, edx /* eax &= hold */
992 add eax, [esp+64] /* eax += len */
993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
994 jmp L_dolen
995
996ALIGN 4
997L_test_for_second_level_dist:
998 test al, 64
999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
1000
1001 xor eax, eax
1002 inc eax
1003 shl eax, cl
1004 dec eax
1005 and eax, edx /* eax &= hold */
1006 add eax, ebp /* eax += dist */
1007 mov ecx, [esp+36] /* ecx = dcode */
1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
1009 jmp L_dodist
1010
1011ALIGN 4
1012L_clip_window:
1013 mov ecx, eax
1014 mov eax, [esp+48] /* eax = wsize */
1015 neg ecx /* nbytes = -nbytes */
1016 mov esi, [esp+28] /* from = window */
1017
1018 cmp eax, ebp
1019 jb L_invalid_distance_too_far /* if (dist > wsize) */
1020
1021 add ecx, ebp /* nbytes = dist - nbytes */
1022 cmp dword ptr [esp+52], 0
1023 jne L_wrap_around_window /* if (write != 0) */
1024
1025 sub eax, ecx
1026 add esi, eax /* from += wsize - nbytes */
1027
1028 mov eax, [esp+64] /* eax = len */
1029 cmp eax, ecx
1030 jbe L_do_copy /* if (nbytes >= len) */
1031
1032 sub eax, ecx /* len -= nbytes */
1033 rep movsb
1034 mov esi, edi
1035 sub esi, ebp /* from = out - dist */
1036 jmp L_do_copy
1037
1038ALIGN 4
1039L_wrap_around_window:
1040 mov eax, [esp+52] /* eax = write */
1041 cmp ecx, eax
1042 jbe L_contiguous_in_window /* if (write >= nbytes) */
1043
1044 add esi, [esp+48] /* from += wsize */
1045 add esi, eax /* from += write */
1046 sub esi, ecx /* from -= nbytes */
1047 sub ecx, eax /* nbytes -= write */
1048
1049 mov eax, [esp+64] /* eax = len */
1050 cmp eax, ecx
1051 jbe L_do_copy /* if (nbytes >= len) */
1052
1053 sub eax, ecx /* len -= nbytes */
1054 rep movsb
1055 mov esi, [esp+28] /* from = window */
1056 mov ecx, [esp+52] /* nbytes = write */
1057 cmp eax, ecx
1058 jbe L_do_copy /* if (nbytes >= len) */
1059
1060 sub eax, ecx /* len -= nbytes */
1061 rep movsb
1062 mov esi, edi
1063 sub esi, ebp /* from = out - dist */
1064 jmp L_do_copy
1065
1066ALIGN 4
1067L_contiguous_in_window:
1068 add esi, eax
1069 sub esi, ecx /* from += write - nbytes */
1070
1071 mov eax, [esp+64] /* eax = len */
1072 cmp eax, ecx
1073 jbe L_do_copy /* if (nbytes >= len) */
1074
1075 sub eax, ecx /* len -= nbytes */
1076 rep movsb
1077 mov esi, edi
1078 sub esi, ebp /* from = out - dist */
1079 jmp L_do_copy
1080
1081ALIGN 4
1082L_do_copy:
1083 mov ecx, eax
1084 rep movsb
1085
1086 mov esi, [esp+8] /* move in back to %esi, toss from */
1087 mov ebp, [esp+32] /* ebp = lcode */
1088 jmp L_while_test
1089
1090L_test_for_end_of_block:
1091 test al, 32
1092 jz L_invalid_literal_length_code
1093 mov dword ptr [esp+72], 1
1094 jmp L_break_loop_with_status
1095
1096L_invalid_literal_length_code:
1097 mov dword ptr [esp+72], 2
1098 jmp L_break_loop_with_status
1099
1100L_invalid_distance_code:
1101 mov dword ptr [esp+72], 3
1102 jmp L_break_loop_with_status
1103
1104L_invalid_distance_too_far:
1105 mov esi, [esp+4]
1106 mov dword ptr [esp+72], 4
1107 jmp L_break_loop_with_status
1108
1109L_break_loop:
1110 mov dword ptr [esp+72], 0
1111
1112L_break_loop_with_status:
1113/* put in, out, bits, and hold back into ar and pop esp */
1114 mov [esp+8], esi /* save in */
1115 mov [esp+16], edi /* save out */
1116 mov [esp+44], ebx /* save bits */
1117 mov [esp+40], edx /* save hold */
1118 mov ebp, [esp+4] /* restore esp, ebp */
1119 mov esp, [esp]
1120 }
1121#else
1122#error "x86 architecture not defined"
1123#endif
1124
1125 if (ar.status > 1) {
1126 if (ar.status == 2)
1127 strm->msg = "invalid literal/length code";
1128 else if (ar.status == 3)
1129 strm->msg = "invalid distance code";
1130 else
1131 strm->msg = "invalid distance too far back";
1132 state->mode = BAD;
1133 }
1134 else if ( ar.status == 1 ) {
1135 state->mode = TYPE;
1136 }
1137
1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
1139 ar.len = ar.bits >> 3;
1140 ar.in -= ar.len;
1141 ar.bits -= ar.len << 3;
1142 ar.hold &= (1U << ar.bits) - 1;
1143
1144 /* update state and return */
1145 strm->next_in = ar.in;
1146 strm->next_out = ar.out;
1147 strm->avail_in = (unsigned)(ar.in < ar.last ?
1148 PAD_AVAIL_IN + (ar.last - ar.in) :
1149 PAD_AVAIL_IN - (ar.in - ar.last));
1150 strm->avail_out = (unsigned)(ar.out < ar.end ?
1151 PAD_AVAIL_OUT + (ar.end - ar.out) :
1152 PAD_AVAIL_OUT - (ar.out - ar.end));
1153 state->hold = ar.hold;
1154 state->bits = ar.bits;
1155 return;
1156}
1157
Note: See TracBrowser for help on using the repository browser.