source: Vago/zlib-1.2.8/contrib/masmx86/inffas32.asm@ 1049

Last change on this file since 1049 was 1049, checked in by s10k, 8 years ago
File size: 16.0 KB
Line 
1;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
2; *
3; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
4; *
5; * Copyright (C) 1995-2003 Mark Adler
6; * For conditions of distribution and use, see copyright notice in zlib.h
7; *
8; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9; * Please use the copyright conditions above.
10; *
11; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
13; * the moment. I have successfully compiled and tested this code with gcc2.96,
14; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
15; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16; * enabled. I will attempt to merge the MMX code into this version. Newer
17; * versions of this and inffast.S can be found at
18; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
19; *
20; * 2005 : modification by Gilles Vollant
21; */
22; For Visual C++ 4.x and higher and ML 6.x and higher
23; ml.exe is in directory \MASM611C of Win95 DDK
24; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
26;
27;
28; compile with command line option
29; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
30
31; if you define NO_GZIP (see inflate.h), compile with
32; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
33
34
35; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37; in inflate_state in inflate.h)
38zlib1222sup equ 8
39
40
41IFDEF GUNZIP
42 INFLATE_MODE_TYPE equ 11
43 INFLATE_MODE_BAD equ 26
44ELSE
45 IFNDEF NO_GUNZIP
46 INFLATE_MODE_TYPE equ 11
47 INFLATE_MODE_BAD equ 26
48 ELSE
49 INFLATE_MODE_TYPE equ 3
50 INFLATE_MODE_BAD equ 17
51 ENDIF
52ENDIF
53
54
55; 75 "inffast.S"
56;FILE "inffast.S"
57
58;;;GLOBAL _inflate_fast
59
60;;;SECTION .text
61
62
63
64 .586p
65 .mmx
66
67 name inflate_fast_x86
68 .MODEL FLAT
69
70_DATA segment
71inflate_fast_use_mmx:
72 dd 1
73
74
75_TEXT segment
76
77
78
79ALIGN 4
80 db 'Fast decoding Code from Chris Anderson'
81 db 0
82
83ALIGN 4
84invalid_literal_length_code_msg:
85 db 'invalid literal/length code'
86 db 0
87
88ALIGN 4
89invalid_distance_code_msg:
90 db 'invalid distance code'
91 db 0
92
93ALIGN 4
94invalid_distance_too_far_msg:
95 db 'invalid distance too far back'
96 db 0
97
98
99ALIGN 4
100inflate_fast_mask:
101dd 0
102dd 1
103dd 3
104dd 7
105dd 15
106dd 31
107dd 63
108dd 127
109dd 255
110dd 511
111dd 1023
112dd 2047
113dd 4095
114dd 8191
115dd 16383
116dd 32767
117dd 65535
118dd 131071
119dd 262143
120dd 524287
121dd 1048575
122dd 2097151
123dd 4194303
124dd 8388607
125dd 16777215
126dd 33554431
127dd 67108863
128dd 134217727
129dd 268435455
130dd 536870911
131dd 1073741823
132dd 2147483647
133dd 4294967295
134
135
136mode_state equ 0 ;/* state->mode */
137wsize_state equ (32+zlib1222sup) ;/* state->wsize */
138write_state equ (36+4+zlib1222sup) ;/* state->write */
139window_state equ (40+4+zlib1222sup) ;/* state->window */
140hold_state equ (44+4+zlib1222sup) ;/* state->hold */
141bits_state equ (48+4+zlib1222sup) ;/* state->bits */
142lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
143distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
144lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
145distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
146
147
148;;SECTION .text
149; 205 "inffast.S"
150;GLOBAL inflate_fast_use_mmx
151
152;SECTION .data
153
154
155; GLOBAL inflate_fast_use_mmx:object
156;.size inflate_fast_use_mmx, 4
157; 226 "inffast.S"
158;SECTION .text
159
160ALIGN 4
161_inflate_fast proc near
162.FPO (16, 4, 0, 0, 1, 0)
163 push edi
164 push esi
165 push ebp
166 push ebx
167 pushfd
168 sub esp,64
169 cld
170
171
172
173
174 mov esi, [esp+88]
175 mov edi, [esi+28]
176
177
178
179
180
181
182
183 mov edx, [esi+4]
184 mov eax, [esi+0]
185
186 add edx,eax
187 sub edx,11
188
189 mov [esp+44],eax
190 mov [esp+20],edx
191
192 mov ebp, [esp+92]
193 mov ecx, [esi+16]
194 mov ebx, [esi+12]
195
196 sub ebp,ecx
197 neg ebp
198 add ebp,ebx
199
200 sub ecx,257
201 add ecx,ebx
202
203 mov [esp+60],ebx
204 mov [esp+40],ebp
205 mov [esp+16],ecx
206; 285 "inffast.S"
207 mov eax, [edi+lencode_state]
208 mov ecx, [edi+distcode_state]
209
210 mov [esp+8],eax
211 mov [esp+12],ecx
212
213 mov eax,1
214 mov ecx, [edi+lenbits_state]
215 shl eax,cl
216 dec eax
217 mov [esp+0],eax
218
219 mov eax,1
220 mov ecx, [edi+distbits_state]
221 shl eax,cl
222 dec eax
223 mov [esp+4],eax
224
225 mov eax, [edi+wsize_state]
226 mov ecx, [edi+write_state]
227 mov edx, [edi+window_state]
228
229 mov [esp+52],eax
230 mov [esp+48],ecx
231 mov [esp+56],edx
232
233 mov ebp, [edi+hold_state]
234 mov ebx, [edi+bits_state]
235; 321 "inffast.S"
236 mov esi, [esp+44]
237 mov ecx, [esp+20]
238 cmp ecx,esi
239 ja L_align_long
240
241 add ecx,11
242 sub ecx,esi
243 mov eax,12
244 sub eax,ecx
245 lea edi, [esp+28]
246 rep movsb
247 mov ecx,eax
248 xor eax,eax
249 rep stosb
250 lea esi, [esp+28]
251 mov [esp+20],esi
252 jmp L_is_aligned
253
254
255L_align_long:
256 test esi,3
257 jz L_is_aligned
258 xor eax,eax
259 mov al, [esi]
260 inc esi
261 mov ecx,ebx
262 add ebx,8
263 shl eax,cl
264 or ebp,eax
265 jmp L_align_long
266
267L_is_aligned:
268 mov edi, [esp+60]
269; 366 "inffast.S"
270L_check_mmx:
271 cmp dword ptr [inflate_fast_use_mmx],2
272 je L_init_mmx
273 ja L_do_loop
274
275 push eax
276 push ebx
277 push ecx
278 push edx
279 pushfd
280 mov eax, [esp]
281 xor dword ptr [esp],0200000h
282
283
284
285
286 popfd
287 pushfd
288 pop edx
289 xor edx,eax
290 jz L_dont_use_mmx
291 xor eax,eax
292 cpuid
293 cmp ebx,0756e6547h
294 jne L_dont_use_mmx
295 cmp ecx,06c65746eh
296 jne L_dont_use_mmx
297 cmp edx,049656e69h
298 jne L_dont_use_mmx
299 mov eax,1
300 cpuid
301 shr eax,8
302 and eax,15
303 cmp eax,6
304 jne L_dont_use_mmx
305 test edx,0800000h
306 jnz L_use_mmx
307 jmp L_dont_use_mmx
308L_use_mmx:
309 mov dword ptr [inflate_fast_use_mmx],2
310 jmp L_check_mmx_pop
311L_dont_use_mmx:
312 mov dword ptr [inflate_fast_use_mmx],3
313L_check_mmx_pop:
314 pop edx
315 pop ecx
316 pop ebx
317 pop eax
318 jmp L_check_mmx
319; 426 "inffast.S"
320ALIGN 4
321L_do_loop:
322; 437 "inffast.S"
323 cmp bl,15
324 ja L_get_length_code
325
326 xor eax,eax
327 lodsw
328 mov cl,bl
329 add bl,16
330 shl eax,cl
331 or ebp,eax
332
333L_get_length_code:
334 mov edx, [esp+0]
335 mov ecx, [esp+8]
336 and edx,ebp
337 mov eax, [ecx+edx*4]
338
339L_dolen:
340
341
342
343
344
345
346 mov cl,ah
347 sub bl,ah
348 shr ebp,cl
349
350
351
352
353
354
355 test al,al
356 jnz L_test_for_length_base
357
358 shr eax,16
359 stosb
360
361L_while_test:
362
363
364 cmp [esp+16],edi
365 jbe L_break_loop
366
367 cmp [esp+20],esi
368 ja L_do_loop
369 jmp L_break_loop
370
371L_test_for_length_base:
372; 502 "inffast.S"
373 mov edx,eax
374 shr edx,16
375 mov cl,al
376
377 test al,16
378 jz L_test_for_second_level_length
379 and cl,15
380 jz L_save_len
381 cmp bl,cl
382 jae L_add_bits_to_len
383
384 mov ch,cl
385 xor eax,eax
386 lodsw
387 mov cl,bl
388 add bl,16
389 shl eax,cl
390 or ebp,eax
391 mov cl,ch
392
393L_add_bits_to_len:
394 mov eax,1
395 shl eax,cl
396 dec eax
397 sub bl,cl
398 and eax,ebp
399 shr ebp,cl
400 add edx,eax
401
402L_save_len:
403 mov [esp+24],edx
404
405
406L_decode_distance:
407; 549 "inffast.S"
408 cmp bl,15
409 ja L_get_distance_code
410
411 xor eax,eax
412 lodsw
413 mov cl,bl
414 add bl,16
415 shl eax,cl
416 or ebp,eax
417
418L_get_distance_code:
419 mov edx, [esp+4]
420 mov ecx, [esp+12]
421 and edx,ebp
422 mov eax, [ecx+edx*4]
423
424
425L_dodist:
426 mov edx,eax
427 shr edx,16
428 mov cl,ah
429 sub bl,ah
430 shr ebp,cl
431; 584 "inffast.S"
432 mov cl,al
433
434 test al,16
435 jz L_test_for_second_level_dist
436 and cl,15
437 jz L_check_dist_one
438 cmp bl,cl
439 jae L_add_bits_to_dist
440
441 mov ch,cl
442 xor eax,eax
443 lodsw
444 mov cl,bl
445 add bl,16
446 shl eax,cl
447 or ebp,eax
448 mov cl,ch
449
450L_add_bits_to_dist:
451 mov eax,1
452 shl eax,cl
453 dec eax
454 sub bl,cl
455 and eax,ebp
456 shr ebp,cl
457 add edx,eax
458 jmp L_check_window
459
460L_check_window:
461; 625 "inffast.S"
462 mov [esp+44],esi
463 mov eax,edi
464 sub eax, [esp+40]
465
466 cmp eax,edx
467 jb L_clip_window
468
469 mov ecx, [esp+24]
470 mov esi,edi
471 sub esi,edx
472
473 sub ecx,3
474 mov al, [esi]
475 mov [edi],al
476 mov al, [esi+1]
477 mov dl, [esi+2]
478 add esi,3
479 mov [edi+1],al
480 mov [edi+2],dl
481 add edi,3
482 rep movsb
483
484 mov esi, [esp+44]
485 jmp L_while_test
486
487ALIGN 4
488L_check_dist_one:
489 cmp edx,1
490 jne L_check_window
491 cmp [esp+40],edi
492 je L_check_window
493
494 dec edi
495 mov ecx, [esp+24]
496 mov al, [edi]
497 sub ecx,3
498
499 mov [edi+1],al
500 mov [edi+2],al
501 mov [edi+3],al
502 add edi,4
503 rep stosb
504
505 jmp L_while_test
506
507ALIGN 4
508L_test_for_second_level_length:
509
510
511
512
513 test al,64
514 jnz L_test_for_end_of_block
515
516 mov eax,1
517 shl eax,cl
518 dec eax
519 and eax,ebp
520 add eax,edx
521 mov edx, [esp+8]
522 mov eax, [edx+eax*4]
523 jmp L_dolen
524
525ALIGN 4
526L_test_for_second_level_dist:
527
528
529
530
531 test al,64
532 jnz L_invalid_distance_code
533
534 mov eax,1
535 shl eax,cl
536 dec eax
537 and eax,ebp
538 add eax,edx
539 mov edx, [esp+12]
540 mov eax, [edx+eax*4]
541 jmp L_dodist
542
543ALIGN 4
544L_clip_window:
545; 721 "inffast.S"
546 mov ecx,eax
547 mov eax, [esp+52]
548 neg ecx
549 mov esi, [esp+56]
550
551 cmp eax,edx
552 jb L_invalid_distance_too_far
553
554 add ecx,edx
555 cmp dword ptr [esp+48],0
556 jne L_wrap_around_window
557
558 sub eax,ecx
559 add esi,eax
560; 749 "inffast.S"
561 mov eax, [esp+24]
562 cmp eax,ecx
563 jbe L_do_copy1
564
565 sub eax,ecx
566 rep movsb
567 mov esi,edi
568 sub esi,edx
569 jmp L_do_copy1
570
571 cmp eax,ecx
572 jbe L_do_copy1
573
574 sub eax,ecx
575 rep movsb
576 mov esi,edi
577 sub esi,edx
578 jmp L_do_copy1
579
580L_wrap_around_window:
581; 793 "inffast.S"
582 mov eax, [esp+48]
583 cmp ecx,eax
584 jbe L_contiguous_in_window
585
586 add esi, [esp+52]
587 add esi,eax
588 sub esi,ecx
589 sub ecx,eax
590
591
592 mov eax, [esp+24]
593 cmp eax,ecx
594 jbe L_do_copy1
595
596 sub eax,ecx
597 rep movsb
598 mov esi, [esp+56]
599 mov ecx, [esp+48]
600 cmp eax,ecx
601 jbe L_do_copy1
602
603 sub eax,ecx
604 rep movsb
605 mov esi,edi
606 sub esi,edx
607 jmp L_do_copy1
608
609L_contiguous_in_window:
610; 836 "inffast.S"
611 add esi,eax
612 sub esi,ecx
613
614
615 mov eax, [esp+24]
616 cmp eax,ecx
617 jbe L_do_copy1
618
619 sub eax,ecx
620 rep movsb
621 mov esi,edi
622 sub esi,edx
623
624L_do_copy1:
625; 862 "inffast.S"
626 mov ecx,eax
627 rep movsb
628
629 mov esi, [esp+44]
630 jmp L_while_test
631; 878 "inffast.S"
632ALIGN 4
633L_init_mmx:
634 emms
635
636
637
638
639
640 movd mm0,ebp
641 mov ebp,ebx
642; 896 "inffast.S"
643 movd mm4,dword ptr [esp+0]
644 movq mm3,mm4
645 movd mm5,dword ptr [esp+4]
646 movq mm2,mm5
647 pxor mm1,mm1
648 mov ebx, [esp+8]
649 jmp L_do_loop_mmx
650
651ALIGN 4
652L_do_loop_mmx:
653 psrlq mm0,mm1
654
655 cmp ebp,32
656 ja L_get_length_code_mmx
657
658 movd mm6,ebp
659 movd mm7,dword ptr [esi]
660 add esi,4
661 psllq mm7,mm6
662 add ebp,32
663 por mm0,mm7
664
665L_get_length_code_mmx:
666 pand mm4,mm0
667 movd eax,mm4
668 movq mm4,mm3
669 mov eax, [ebx+eax*4]
670
671L_dolen_mmx:
672 movzx ecx,ah
673 movd mm1,ecx
674 sub ebp,ecx
675
676 test al,al
677 jnz L_test_for_length_base_mmx
678
679 shr eax,16
680 stosb
681
682L_while_test_mmx:
683
684
685 cmp [esp+16],edi
686 jbe L_break_loop
687
688 cmp [esp+20],esi
689 ja L_do_loop_mmx
690 jmp L_break_loop
691
692L_test_for_length_base_mmx:
693
694 mov edx,eax
695 shr edx,16
696
697 test al,16
698 jz L_test_for_second_level_length_mmx
699 and eax,15
700 jz L_decode_distance_mmx
701
702 psrlq mm0,mm1
703 movd mm1,eax
704 movd ecx,mm0
705 sub ebp,eax
706 and ecx, [inflate_fast_mask+eax*4]
707 add edx,ecx
708
709L_decode_distance_mmx:
710 psrlq mm0,mm1
711
712 cmp ebp,32
713 ja L_get_dist_code_mmx
714
715 movd mm6,ebp
716 movd mm7,dword ptr [esi]
717 add esi,4
718 psllq mm7,mm6
719 add ebp,32
720 por mm0,mm7
721
722L_get_dist_code_mmx:
723 mov ebx, [esp+12]
724 pand mm5,mm0
725 movd eax,mm5
726 movq mm5,mm2
727 mov eax, [ebx+eax*4]
728
729L_dodist_mmx:
730
731 movzx ecx,ah
732 mov ebx,eax
733 shr ebx,16
734 sub ebp,ecx
735 movd mm1,ecx
736
737 test al,16
738 jz L_test_for_second_level_dist_mmx
739 and eax,15
740 jz L_check_dist_one_mmx
741
742L_add_bits_to_dist_mmx:
743 psrlq mm0,mm1
744 movd mm1,eax
745 movd ecx,mm0
746 sub ebp,eax
747 and ecx, [inflate_fast_mask+eax*4]
748 add ebx,ecx
749
750L_check_window_mmx:
751 mov [esp+44],esi
752 mov eax,edi
753 sub eax, [esp+40]
754
755 cmp eax,ebx
756 jb L_clip_window_mmx
757
758 mov ecx,edx
759 mov esi,edi
760 sub esi,ebx
761
762 sub ecx,3
763 mov al, [esi]
764 mov [edi],al
765 mov al, [esi+1]
766 mov dl, [esi+2]
767 add esi,3
768 mov [edi+1],al
769 mov [edi+2],dl
770 add edi,3
771 rep movsb
772
773 mov esi, [esp+44]
774 mov ebx, [esp+8]
775 jmp L_while_test_mmx
776
777ALIGN 4
778L_check_dist_one_mmx:
779 cmp ebx,1
780 jne L_check_window_mmx
781 cmp [esp+40],edi
782 je L_check_window_mmx
783
784 dec edi
785 mov ecx,edx
786 mov al, [edi]
787 sub ecx,3
788
789 mov [edi+1],al
790 mov [edi+2],al
791 mov [edi+3],al
792 add edi,4
793 rep stosb
794
795 mov ebx, [esp+8]
796 jmp L_while_test_mmx
797
798ALIGN 4
799L_test_for_second_level_length_mmx:
800 test al,64
801 jnz L_test_for_end_of_block
802
803 and eax,15
804 psrlq mm0,mm1
805 movd ecx,mm0
806 and ecx, [inflate_fast_mask+eax*4]
807 add ecx,edx
808 mov eax, [ebx+ecx*4]
809 jmp L_dolen_mmx
810
811ALIGN 4
812L_test_for_second_level_dist_mmx:
813 test al,64
814 jnz L_invalid_distance_code
815
816 and eax,15
817 psrlq mm0,mm1
818 movd ecx,mm0
819 and ecx, [inflate_fast_mask+eax*4]
820 mov eax, [esp+12]
821 add ecx,ebx
822 mov eax, [eax+ecx*4]
823 jmp L_dodist_mmx
824
825ALIGN 4
826L_clip_window_mmx:
827
828 mov ecx,eax
829 mov eax, [esp+52]
830 neg ecx
831 mov esi, [esp+56]
832
833 cmp eax,ebx
834 jb L_invalid_distance_too_far
835
836 add ecx,ebx
837 cmp dword ptr [esp+48],0
838 jne L_wrap_around_window_mmx
839
840 sub eax,ecx
841 add esi,eax
842
843 cmp edx,ecx
844 jbe L_do_copy1_mmx
845
846 sub edx,ecx
847 rep movsb
848 mov esi,edi
849 sub esi,ebx
850 jmp L_do_copy1_mmx
851
852 cmp edx,ecx
853 jbe L_do_copy1_mmx
854
855 sub edx,ecx
856 rep movsb
857 mov esi,edi
858 sub esi,ebx
859 jmp L_do_copy1_mmx
860
861L_wrap_around_window_mmx:
862
863 mov eax, [esp+48]
864 cmp ecx,eax
865 jbe L_contiguous_in_window_mmx
866
867 add esi, [esp+52]
868 add esi,eax
869 sub esi,ecx
870 sub ecx,eax
871
872
873 cmp edx,ecx
874 jbe L_do_copy1_mmx
875
876 sub edx,ecx
877 rep movsb
878 mov esi, [esp+56]
879 mov ecx, [esp+48]
880 cmp edx,ecx
881 jbe L_do_copy1_mmx
882
883 sub edx,ecx
884 rep movsb
885 mov esi,edi
886 sub esi,ebx
887 jmp L_do_copy1_mmx
888
889L_contiguous_in_window_mmx:
890
891 add esi,eax
892 sub esi,ecx
893
894
895 cmp edx,ecx
896 jbe L_do_copy1_mmx
897
898 sub edx,ecx
899 rep movsb
900 mov esi,edi
901 sub esi,ebx
902
903L_do_copy1_mmx:
904
905
906 mov ecx,edx
907 rep movsb
908
909 mov esi, [esp+44]
910 mov ebx, [esp+8]
911 jmp L_while_test_mmx
912; 1174 "inffast.S"
913L_invalid_distance_code:
914
915
916
917
918
919 mov ecx, invalid_distance_code_msg
920 mov edx,INFLATE_MODE_BAD
921 jmp L_update_stream_state
922
923L_test_for_end_of_block:
924
925
926
927
928
929 test al,32
930 jz L_invalid_literal_length_code
931
932 mov ecx,0
933 mov edx,INFLATE_MODE_TYPE
934 jmp L_update_stream_state
935
936L_invalid_literal_length_code:
937
938
939
940
941
942 mov ecx, invalid_literal_length_code_msg
943 mov edx,INFLATE_MODE_BAD
944 jmp L_update_stream_state
945
946L_invalid_distance_too_far:
947
948
949
950 mov esi, [esp+44]
951 mov ecx, invalid_distance_too_far_msg
952 mov edx,INFLATE_MODE_BAD
953 jmp L_update_stream_state
954
955L_update_stream_state:
956
957 mov eax, [esp+88]
958 test ecx,ecx
959 jz L_skip_msg
960 mov [eax+24],ecx
961L_skip_msg:
962 mov eax, [eax+28]
963 mov [eax+mode_state],edx
964 jmp L_break_loop
965
966ALIGN 4
967L_break_loop:
968; 1243 "inffast.S"
969 cmp dword ptr [inflate_fast_use_mmx],2
970 jne L_update_next_in
971
972
973
974 mov ebx,ebp
975
976L_update_next_in:
977; 1266 "inffast.S"
978 mov eax, [esp+88]
979 mov ecx,ebx
980 mov edx, [eax+28]
981 shr ecx,3
982 sub esi,ecx
983 shl ecx,3
984 sub ebx,ecx
985 mov [eax+12],edi
986 mov [edx+bits_state],ebx
987 mov ecx,ebx
988
989 lea ebx, [esp+28]
990 cmp [esp+20],ebx
991 jne L_buf_not_used
992
993 sub esi,ebx
994 mov ebx, [eax+0]
995 mov [esp+20],ebx
996 add esi,ebx
997 mov ebx, [eax+4]
998 sub ebx,11
999 add [esp+20],ebx
1000
1001L_buf_not_used:
1002 mov [eax+0],esi
1003
1004 mov ebx,1
1005 shl ebx,cl
1006 dec ebx
1007
1008
1009
1010
1011
1012 cmp dword ptr [inflate_fast_use_mmx],2
1013 jne L_update_hold
1014
1015
1016
1017 psrlq mm0,mm1
1018 movd ebp,mm0
1019
1020 emms
1021
1022L_update_hold:
1023
1024
1025
1026 and ebp,ebx
1027 mov [edx+hold_state],ebp
1028
1029
1030
1031
1032 mov ebx, [esp+20]
1033 cmp ebx,esi
1034 jbe L_last_is_smaller
1035
1036 sub ebx,esi
1037 add ebx,11
1038 mov [eax+4],ebx
1039 jmp L_fixup_out
1040L_last_is_smaller:
1041 sub esi,ebx
1042 neg esi
1043 add esi,11
1044 mov [eax+4],esi
1045
1046
1047
1048
1049L_fixup_out:
1050
1051 mov ebx, [esp+16]
1052 cmp ebx,edi
1053 jbe L_end_is_smaller
1054
1055 sub ebx,edi
1056 add ebx,257
1057 mov [eax+16],ebx
1058 jmp L_done
1059L_end_is_smaller:
1060 sub edi,ebx
1061 neg edi
1062 add edi,257
1063 mov [eax+16],edi
1064
1065
1066
1067
1068
1069L_done:
1070 add esp,64
1071 popfd
1072 pop ebx
1073 pop ebp
1074 pop esi
1075 pop edi
1076 ret
1077_inflate_fast endp
1078
1079_TEXT ends
1080end
Note: See TracBrowser for help on using the repository browser.