update from zlib 1.1.4 to 1.2.3
[reactos.git] / reactos / lib / 3rdparty / zlib / contrib / masmx86 / gvmat32.asm
1 ; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
2 ; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
3 ; File written by Gilles Vollant, by modifiying the longest_match
4 ; from Jean-loup Gailly in deflate.c
5 ;
6 ; http://www.zlib.net
7 ; http://www.winimage.com/zLibDll
8 ; http://www.muppetlabs.com/~breadbox/software/assembly.html
9 ;
10 ; For Visual C++ 4.x and higher and ML 6.x and higher
11 ; ml.exe is in directory \MASM611C of Win95 DDK
12 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
13 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
14 ;
15 ; this file contain two implementation of longest_match
16 ;
17 ; longest_match_7fff : written 1996 by Gilles Vollant optimized for
18 ; first Pentium. Assume s->w_mask == 0x7fff
19 ; longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro
20 ;
21 ; for using an seembly version of longest_match, you need define ASMV in project
22 ; There is two way in using gvmat32.asm
23 ;
24 ; A) Suggested method
25 ; if you want include both longest_match_7fff and longest_match_686
26 ; compile the asm file running
27 ; ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm
28 ; and include gvmat32c.c in your project
29 ; if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff,
30 ; longest_match_7fff will be used
31 ; if you have a more modern CPU (Pentium Pro, II and higher)
32 ; longest_match_686 will be used
33 ; on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used,
34 ; but this is not a sitation you'll find often
35 ;
36 ; B) Alternative
37 ; if you are not interresed in old cpu performance and want the smaller
38 ; binaries possible
39 ;
40 ; compile the asm file running
41 ; ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm
42 ; and do not include gvmat32c.c in your project (ou define also
43 ; NOOLDPENTIUMCODE)
44 ;
45 ; note : as I known, longest_match_686 is very faster than longest_match_7fff
46 ; on pentium Pro/II/III, faster (but less) in P4, but it seem
47 ; longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8
48 ;
49 ; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
50
51 ;uInt longest_match_7fff(s, cur_match)
52 ; deflate_state *s;
53 ; IPos cur_match; /* current match */
54
55 NbStack equ 76
56 cur_match equ dword ptr[esp+NbStack-0]
57 str_s equ dword ptr[esp+NbStack-4]
58 ; 5 dword on top (ret,ebp,esi,edi,ebx)
59 adrret equ dword ptr[esp+NbStack-8]
60 pushebp equ dword ptr[esp+NbStack-12]
61 pushedi equ dword ptr[esp+NbStack-16]
62 pushesi equ dword ptr[esp+NbStack-20]
63 pushebx equ dword ptr[esp+NbStack-24]
64
65 chain_length equ dword ptr [esp+NbStack-28]
66 limit equ dword ptr [esp+NbStack-32]
67 best_len equ dword ptr [esp+NbStack-36]
68 window equ dword ptr [esp+NbStack-40]
69 prev equ dword ptr [esp+NbStack-44]
70 scan_start equ word ptr [esp+NbStack-48]
71 wmask equ dword ptr [esp+NbStack-52]
72 match_start_ptr equ dword ptr [esp+NbStack-56]
73 nice_match equ dword ptr [esp+NbStack-60]
74 scan equ dword ptr [esp+NbStack-64]
75
76 windowlen equ dword ptr [esp+NbStack-68]
77 match_start equ dword ptr [esp+NbStack-72]
78 strend equ dword ptr [esp+NbStack-76]
79 NbStackAdd equ (NbStack-24)
80
81 .386p
82
83 name gvmatch
84 .MODEL FLAT
85
86
87
88 ; all the +zlib1222add offsets are due to the addition of fields
89 ; in zlib in the deflate_state structure since the asm code was first written
90 ; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
91 ; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
92 ; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
93
94 zlib1222add equ 8
95
96 ; Note : these value are good with a 8 bytes boundary pack structure
97 dep_chain_length equ 74h+zlib1222add
98 dep_window equ 30h+zlib1222add
99 dep_strstart equ 64h+zlib1222add
100 dep_prev_length equ 70h+zlib1222add
101 dep_nice_match equ 88h+zlib1222add
102 dep_w_size equ 24h+zlib1222add
103 dep_prev equ 38h+zlib1222add
104 dep_w_mask equ 2ch+zlib1222add
105 dep_good_match equ 84h+zlib1222add
106 dep_match_start equ 68h+zlib1222add
107 dep_lookahead equ 6ch+zlib1222add
108
109
110 _TEXT segment
111
112 IFDEF NOUNDERLINE
113 IFDEF NOOLDPENTIUMCODE
114 public longest_match
115 public match_init
116 ELSE
117 public longest_match_7fff
118 public cpudetect32
119 public longest_match_686
120 ENDIF
121 ELSE
122 IFDEF NOOLDPENTIUMCODE
123 public _longest_match
124 public _match_init
125 ELSE
126 public _longest_match_7fff
127 public _cpudetect32
128 public _longest_match_686
129 ENDIF
130 ENDIF
131
132 MAX_MATCH equ 258
133 MIN_MATCH equ 3
134 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
135
136
137
138 IFNDEF NOOLDPENTIUMCODE
139 IFDEF NOUNDERLINE
140 longest_match_7fff proc near
141 ELSE
142 _longest_match_7fff proc near
143 ENDIF
144
145 mov edx,[esp+4]
146
147
148
149 push ebp
150 push edi
151 push esi
152 push ebx
153
154 sub esp,NbStackAdd
155
156 ; initialize or check the variables used in match.asm.
157 mov ebp,edx
158
159 ; chain_length = s->max_chain_length
160 ; if (prev_length>=good_match) chain_length >>= 2
161 mov edx,[ebp+dep_chain_length]
162 mov ebx,[ebp+dep_prev_length]
163 cmp [ebp+dep_good_match],ebx
164 ja noshr
165 shr edx,2
166 noshr:
167 ; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
168 inc edx
169 mov edi,[ebp+dep_nice_match]
170 mov chain_length,edx
171 mov eax,[ebp+dep_lookahead]
172 cmp eax,edi
173 ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
174 jae nolookaheadnicematch
175 mov edi,eax
176 nolookaheadnicematch:
177 ; best_len = s->prev_length
178 mov best_len,ebx
179
180 ; window = s->window
181 mov esi,[ebp+dep_window]
182 mov ecx,[ebp+dep_strstart]
183 mov window,esi
184
185 mov nice_match,edi
186 ; scan = window + strstart
187 add esi,ecx
188 mov scan,esi
189 ; dx = *window
190 mov dx,word ptr [esi]
191 ; bx = *(window+best_len-1)
192 mov bx,word ptr [esi+ebx-1]
193 add esi,MAX_MATCH-1
194 ; scan_start = *scan
195 mov scan_start,dx
196 ; strend = scan + MAX_MATCH-1
197 mov strend,esi
198 ; bx = scan_end = *(window+best_len-1)
199
200 ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
201 ; s->strstart - (IPos)MAX_DIST(s) : NIL;
202
203 mov esi,[ebp+dep_w_size]
204 sub esi,MIN_LOOKAHEAD
205 ; here esi = MAX_DIST(s)
206 sub ecx,esi
207 ja nodist
208 xor ecx,ecx
209 nodist:
210 mov limit,ecx
211
212 ; prev = s->prev
213 mov edx,[ebp+dep_prev]
214 mov prev,edx
215
216 ;
217 mov edx,dword ptr [ebp+dep_match_start]
218 mov bp,scan_start
219 mov eax,cur_match
220 mov match_start,edx
221
222 mov edx,window
223 mov edi,edx
224 add edi,best_len
225 mov esi,prev
226 dec edi
227 ; windowlen = window + best_len -1
228 mov windowlen,edi
229
230 jmp beginloop2
231 align 4
232
233 ; here, in the loop
234 ; eax = ax = cur_match
235 ; ecx = limit
236 ; bx = scan_end
237 ; bp = scan_start
238 ; edi = windowlen (window + best_len -1)
239 ; esi = prev
240
241
242 ;// here; chain_length <=16
243 normalbeg0add16:
244 add chain_length,16
245 jz exitloop
246 normalbeg0:
247 cmp word ptr[edi+eax],bx
248 je normalbeg2noroll
249 rcontlabnoroll:
250 ; cur_match = prev[cur_match & wmask]
251 and eax,7fffh
252 mov ax,word ptr[esi+eax*2]
253 ; if cur_match > limit, go to exitloop
254 cmp ecx,eax
255 jnb exitloop
256 ; if --chain_length != 0, go to exitloop
257 dec chain_length
258 jnz normalbeg0
259 jmp exitloop
260
261 normalbeg2noroll:
262 ; if (scan_start==*(cur_match+window)) goto normalbeg2
263 cmp bp,word ptr[edx+eax]
264 jne rcontlabnoroll
265 jmp normalbeg2
266
267 contloop3:
268 mov edi,windowlen
269
270 ; cur_match = prev[cur_match & wmask]
271 and eax,7fffh
272 mov ax,word ptr[esi+eax*2]
273 ; if cur_match > limit, go to exitloop
274 cmp ecx,eax
275 jnbexitloopshort1:
276 jnb exitloop
277 ; if --chain_length != 0, go to exitloop
278
279
280 ; begin the main loop
281 beginloop2:
282 sub chain_length,16+1
283 ; if chain_length <=16, don't use the unrolled loop
284 jna normalbeg0add16
285
286 do16:
287 cmp word ptr[edi+eax],bx
288 je normalbeg2dc0
289
290 maccn MACRO lab
291 and eax,7fffh
292 mov ax,word ptr[esi+eax*2]
293 cmp ecx,eax
294 jnb exitloop
295 cmp word ptr[edi+eax],bx
296 je lab
297 ENDM
298
299 rcontloop0:
300 maccn normalbeg2dc1
301
302 rcontloop1:
303 maccn normalbeg2dc2
304
305 rcontloop2:
306 maccn normalbeg2dc3
307
308 rcontloop3:
309 maccn normalbeg2dc4
310
311 rcontloop4:
312 maccn normalbeg2dc5
313
314 rcontloop5:
315 maccn normalbeg2dc6
316
317 rcontloop6:
318 maccn normalbeg2dc7
319
320 rcontloop7:
321 maccn normalbeg2dc8
322
323 rcontloop8:
324 maccn normalbeg2dc9
325
326 rcontloop9:
327 maccn normalbeg2dc10
328
329 rcontloop10:
330 maccn short normalbeg2dc11
331
332 rcontloop11:
333 maccn short normalbeg2dc12
334
335 rcontloop12:
336 maccn short normalbeg2dc13
337
338 rcontloop13:
339 maccn short normalbeg2dc14
340
341 rcontloop14:
342 maccn short normalbeg2dc15
343
344 rcontloop15:
345 and eax,7fffh
346 mov ax,word ptr[esi+eax*2]
347 cmp ecx,eax
348 jnb exitloop
349
350 sub chain_length,16
351 ja do16
352 jmp normalbeg0add16
353
354 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
355
356 normbeg MACRO rcontlab,valsub
357 ; if we are here, we know that *(match+best_len-1) == scan_end
358 cmp bp,word ptr[edx+eax]
359 ; if (match != scan_start) goto rcontlab
360 jne rcontlab
361 ; calculate the good chain_length, and we'll compare scan and match string
362 add chain_length,16-valsub
363 jmp iseq
364 ENDM
365
366
367 normalbeg2dc11:
368 normbeg rcontloop11,11
369
370 normalbeg2dc12:
371 normbeg short rcontloop12,12
372
373 normalbeg2dc13:
374 normbeg short rcontloop13,13
375
376 normalbeg2dc14:
377 normbeg short rcontloop14,14
378
379 normalbeg2dc15:
380 normbeg short rcontloop15,15
381
382 normalbeg2dc10:
383 normbeg rcontloop10,10
384
385 normalbeg2dc9:
386 normbeg rcontloop9,9
387
388 normalbeg2dc8:
389 normbeg rcontloop8,8
390
391 normalbeg2dc7:
392 normbeg rcontloop7,7
393
394 normalbeg2dc6:
395 normbeg rcontloop6,6
396
397 normalbeg2dc5:
398 normbeg rcontloop5,5
399
400 normalbeg2dc4:
401 normbeg rcontloop4,4
402
403 normalbeg2dc3:
404 normbeg rcontloop3,3
405
406 normalbeg2dc2:
407 normbeg rcontloop2,2
408
409 normalbeg2dc1:
410 normbeg rcontloop1,1
411
412 normalbeg2dc0:
413 normbeg rcontloop0,0
414
415
416 ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
417
418 normalbeg2:
419 mov edi,window
420
421 cmp bp,word ptr[edi+eax]
422 jne contloop3 ; if *(ushf*)match != scan_start, continue
423
424 iseq:
425 ; if we are here, we know that *(match+best_len-1) == scan_end
426 ; and (match == scan_start)
427
428 mov edi,edx
429 mov esi,scan ; esi = scan
430 add edi,eax ; edi = window + cur_match = match
431
432 mov edx,[esi+3] ; compare manually dword at match+3
433 xor edx,[edi+3] ; and scan +3
434
435 jz begincompare ; if equal, go to long compare
436
437 ; we will determine the unmatch byte and calculate len (in esi)
438 or dl,dl
439 je eq1rr
440 mov esi,3
441 jmp trfinval
442 eq1rr:
443 or dx,dx
444 je eq1
445
446 mov esi,4
447 jmp trfinval
448 eq1:
449 and edx,0ffffffh
450 jz eq11
451 mov esi,5
452 jmp trfinval
453 eq11:
454 mov esi,6
455 jmp trfinval
456
457 begincompare:
458 ; here we now scan and match begin same
459 add edi,6
460 add esi,6
461 mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes
462 repe cmpsd ; loop until mismatch
463
464 je trfin ; go to trfin if not unmatch
465 ; we determine the unmatch byte
466 sub esi,4
467 mov edx,[edi-4]
468 xor edx,[esi]
469
470 or dl,dl
471 jnz trfin
472 inc esi
473
474 or dx,dx
475 jnz trfin
476 inc esi
477
478 and edx,0ffffffh
479 jnz trfin
480 inc esi
481
482 trfin:
483 sub esi,scan ; esi = len
484 trfinval:
485 ; here we have finised compare, and esi contain len of equal string
486 cmp esi,best_len ; if len > best_len, go newbestlen
487 ja short newbestlen
488 ; now we restore edx, ecx and esi, for the big loop
489 mov esi,prev
490 mov ecx,limit
491 mov edx,window
492 jmp contloop3
493
494 newbestlen:
495 mov best_len,esi ; len become best_len
496
497 mov match_start,eax ; save new position as match_start
498 cmp esi,nice_match ; if best_len >= nice_match, exit
499 jae exitloop
500 mov ecx,scan
501 mov edx,window ; restore edx=window
502 add ecx,esi
503 add esi,edx
504
505 dec esi
506 mov windowlen,esi ; windowlen = window + best_len-1
507 mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end
508
509 ; now we restore ecx and esi, for the big loop :
510 mov esi,prev
511 mov ecx,limit
512 jmp contloop3
513
514 exitloop:
515 ; exit : s->match_start=match_start
516 mov ebx,match_start
517 mov ebp,str_s
518 mov ecx,best_len
519 mov dword ptr [ebp+dep_match_start],ebx
520 mov eax,dword ptr [ebp+dep_lookahead]
521 cmp ecx,eax
522 ja minexlo
523 mov eax,ecx
524 minexlo:
525 ; return min(best_len,s->lookahead)
526
527 ; restore stack and register ebx,esi,edi,ebp
528 add esp,NbStackAdd
529
530 pop ebx
531 pop esi
532 pop edi
533 pop ebp
534 ret
535 InfoAuthor:
536 ; please don't remove this string !
537 ; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
538 db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah
539
540
541
542 IFDEF NOUNDERLINE
543 longest_match_7fff endp
544 ELSE
545 _longest_match_7fff endp
546 ENDIF
547
548
549 IFDEF NOUNDERLINE
550 cpudetect32 proc near
551 ELSE
552 _cpudetect32 proc near
553 ENDIF
554
555 push ebx
556
557 pushfd ; push original EFLAGS
558 pop eax ; get original EFLAGS
559 mov ecx, eax ; save original EFLAGS
560 xor eax, 40000h ; flip AC bit in EFLAGS
561 push eax ; save new EFLAGS value on stack
562 popfd ; replace current EFLAGS value
563 pushfd ; get new EFLAGS
564 pop eax ; store new EFLAGS in EAX
565 xor eax, ecx ; can\92t toggle AC bit, processor=80386
566 jz end_cpu_is_386 ; jump if 80386 processor
567 push ecx
568 popfd ; restore AC bit in EFLAGS first
569
570 pushfd
571 pushfd
572 pop ecx
573
574 mov eax, ecx ; get original EFLAGS
575 xor eax, 200000h ; flip ID bit in EFLAGS
576 push eax ; save new EFLAGS value on stack
577 popfd ; replace current EFLAGS value
578 pushfd ; get new EFLAGS
579 pop eax ; store new EFLAGS in EAX
580 popfd ; restore original EFLAGS
581 xor eax, ecx ; can\92t toggle ID bit,
582 je is_old_486 ; processor=old
583
584 mov eax,1
585 db 0fh,0a2h ;CPUID
586
587 exitcpudetect:
588 pop ebx
589 ret
590
591 end_cpu_is_386:
592 mov eax,0300h
593 jmp exitcpudetect
594
595 is_old_486:
596 mov eax,0400h
597 jmp exitcpudetect
598
599 IFDEF NOUNDERLINE
600 cpudetect32 endp
601 ELSE
602 _cpudetect32 endp
603 ENDIF
604 ENDIF
605
606 MAX_MATCH equ 258
607 MIN_MATCH equ 3
608 MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
609 MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
610
611
612 ;;; stack frame offsets
613
614 chainlenwmask equ esp + 0 ; high word: current chain len
615 ; low word: s->wmask
616 window equ esp + 4 ; local copy of s->window
617 windowbestlen equ esp + 8 ; s->window + bestlen
618 scanstart equ esp + 16 ; first two bytes of string
619 scanend equ esp + 12 ; last two bytes of string
620 scanalign equ esp + 20 ; dword-misalignment of string
621 nicematch equ esp + 24 ; a good enough match size
622 bestlen equ esp + 28 ; size of best match so far
623 scan equ esp + 32 ; ptr to string wanting match
624
625 LocalVarsSize equ 36
626 ; saved ebx byte esp + 36
627 ; saved edi byte esp + 40
628 ; saved esi byte esp + 44
629 ; saved ebp byte esp + 48
630 ; return address byte esp + 52
631 deflatestate equ esp + 56 ; the function arguments
632 curmatch equ esp + 60
633
634 ;;; Offsets for fields in the deflate_state structure. These numbers
635 ;;; are calculated from the definition of deflate_state, with the
636 ;;; assumption that the compiler will dword-align the fields. (Thus,
637 ;;; changing the definition of deflate_state could easily cause this
638 ;;; program to crash horribly, without so much as a warning at
639 ;;; compile time. Sigh.)
640
641 dsWSize equ 36+zlib1222add
642 dsWMask equ 44+zlib1222add
643 dsWindow equ 48+zlib1222add
644 dsPrev equ 56+zlib1222add
645 dsMatchLen equ 88+zlib1222add
646 dsPrevMatch equ 92+zlib1222add
647 dsStrStart equ 100+zlib1222add
648 dsMatchStart equ 104+zlib1222add
649 dsLookahead equ 108+zlib1222add
650 dsPrevLen equ 112+zlib1222add
651 dsMaxChainLen equ 116+zlib1222add
652 dsGoodMatch equ 132+zlib1222add
653 dsNiceMatch equ 136+zlib1222add
654
655
656 ;;; match.asm -- Pentium-Pro-optimized version of longest_match()
657 ;;; Written for zlib 1.1.2
658 ;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
659 ;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
660 ;;;
661 ;;; This is free software; you can redistribute it and/or modify it
662 ;;; under the terms of the GNU General Public License.
663
664 ;GLOBAL _longest_match, _match_init
665
666
667 ;SECTION .text
668
669 ;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
670
671 ;_longest_match:
672 IFDEF NOOLDPENTIUMCODE
673 IFDEF NOUNDERLINE
674 longest_match proc near
675 ELSE
676 _longest_match proc near
677 ENDIF
678 ELSE
679 IFDEF NOUNDERLINE
680 longest_match_686 proc near
681 ELSE
682 _longest_match_686 proc near
683 ENDIF
684 ENDIF
685
686 ;;; Save registers that the compiler may be using, and adjust esp to
687 ;;; make room for our stack frame.
688
689 push ebp
690 push edi
691 push esi
692 push ebx
693 sub esp, LocalVarsSize
694
695 ;;; Retrieve the function arguments. ecx will hold cur_match
696 ;;; throughout the entire function. edx will hold the pointer to the
697 ;;; deflate_state structure during the function's setup (before
698 ;;; entering the main loop.
699
700 mov edx, [deflatestate]
701 mov ecx, [curmatch]
702
703 ;;; uInt wmask = s->w_mask;
704 ;;; unsigned chain_length = s->max_chain_length;
705 ;;; if (s->prev_length >= s->good_match) {
706 ;;; chain_length >>= 2;
707 ;;; }
708
709 mov eax, [edx + dsPrevLen]
710 mov ebx, [edx + dsGoodMatch]
711 cmp eax, ebx
712 mov eax, [edx + dsWMask]
713 mov ebx, [edx + dsMaxChainLen]
714 jl LastMatchGood
715 shr ebx, 2
716 LastMatchGood:
717
718 ;;; chainlen is decremented once beforehand so that the function can
719 ;;; use the sign flag instead of the zero flag for the exit test.
720 ;;; It is then shifted into the high word, to make room for the wmask
721 ;;; value, which it will always accompany.
722
723 dec ebx
724 shl ebx, 16
725 or ebx, eax
726 mov [chainlenwmask], ebx
727
728 ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
729
730 mov eax, [edx + dsNiceMatch]
731 mov ebx, [edx + dsLookahead]
732 cmp ebx, eax
733 jl LookaheadLess
734 mov ebx, eax
735 LookaheadLess: mov [nicematch], ebx
736
737 ;;; register Bytef *scan = s->window + s->strstart;
738
739 mov esi, [edx + dsWindow]
740 mov [window], esi
741 mov ebp, [edx + dsStrStart]
742 lea edi, [esi + ebp]
743 mov [scan], edi
744
745 ;;; Determine how many bytes the scan ptr is off from being
746 ;;; dword-aligned.
747
748 mov eax, edi
749 neg eax
750 and eax, 3
751 mov [scanalign], eax
752
753 ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
754 ;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
755
756 mov eax, [edx + dsWSize]
757 sub eax, MIN_LOOKAHEAD
758 sub ebp, eax
759 jg LimitPositive
760 xor ebp, ebp
761 LimitPositive:
762
763 ;;; int best_len = s->prev_length;
764
765 mov eax, [edx + dsPrevLen]
766 mov [bestlen], eax
767
768 ;;; Store the sum of s->window + best_len in esi locally, and in esi.
769
770 add esi, eax
771 mov [windowbestlen], esi
772
773 ;;; register ush scan_start = *(ushf*)scan;
774 ;;; register ush scan_end = *(ushf*)(scan+best_len-1);
775 ;;; Posf *prev = s->prev;
776
777 movzx ebx, word ptr [edi]
778 mov [scanstart], ebx
779 movzx ebx, word ptr [edi + eax - 1]
780 mov [scanend], ebx
781 mov edi, [edx + dsPrev]
782
783 ;;; Jump into the main loop.
784
785 mov edx, [chainlenwmask]
786 jmp short LoopEntry
787
788 align 4
789
790 ;;; do {
791 ;;; match = s->window + cur_match;
792 ;;; if (*(ushf*)(match+best_len-1) != scan_end ||
793 ;;; *(ushf*)match != scan_start) continue;
794 ;;; [...]
795 ;;; } while ((cur_match = prev[cur_match & wmask]) > limit
796 ;;; && --chain_length != 0);
797 ;;;
798 ;;; Here is the inner loop of the function. The function will spend the
799 ;;; majority of its time in this loop, and majority of that time will
800 ;;; be spent in the first ten instructions.
801 ;;;
802 ;;; Within this loop:
803 ;;; ebx = scanend
804 ;;; ecx = curmatch
805 ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
806 ;;; esi = windowbestlen - i.e., (window + bestlen)
807 ;;; edi = prev
808 ;;; ebp = limit
809
810 LookupLoop:
811 and ecx, edx
812 movzx ecx, word ptr [edi + ecx*2]
813 cmp ecx, ebp
814 jbe LeaveNow
815 sub edx, 00010000h
816 js LeaveNow
817 LoopEntry: movzx eax, word ptr [esi + ecx - 1]
818 cmp eax, ebx
819 jnz LookupLoop
820 mov eax, [window]
821 movzx eax, word ptr [eax + ecx]
822 cmp eax, [scanstart]
823 jnz LookupLoop
824
825 ;;; Store the current value of chainlen.
826
827 mov [chainlenwmask], edx
828
829 ;;; Point edi to the string under scrutiny, and esi to the string we
830 ;;; are hoping to match it up with. In actuality, esi and edi are
831 ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
832 ;;; initialized to -(MAX_MATCH_8 - scanalign).
833
834 mov esi, [window]
835 mov edi, [scan]
836 add esi, ecx
837 mov eax, [scanalign]
838 mov edx, 0fffffef8h; -(MAX_MATCH_8)
839 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
840 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
841
842 ;;; Test the strings for equality, 8 bytes at a time. At the end,
843 ;;; adjust edx so that it is offset to the exact byte that mismatched.
844 ;;;
845 ;;; We already know at this point that the first three bytes of the
846 ;;; strings match each other, and they can be safely passed over before
847 ;;; starting the compare loop. So what this code does is skip over 0-3
848 ;;; bytes, as much as necessary in order to dword-align the edi
849 ;;; pointer. (esi will still be misaligned three times out of four.)
850 ;;;
851 ;;; It should be confessed that this loop usually does not represent
852 ;;; much of the total running time. Replacing it with a more
853 ;;; straightforward "rep cmpsb" would not drastically degrade
854 ;;; performance.
855
856 LoopCmps:
857 mov eax, [esi + edx]
858 xor eax, [edi + edx]
859 jnz LeaveLoopCmps
860 mov eax, [esi + edx + 4]
861 xor eax, [edi + edx + 4]
862 jnz LeaveLoopCmps4
863 add edx, 8
864 jnz LoopCmps
865 jmp short LenMaximum
866 LeaveLoopCmps4: add edx, 4
867 LeaveLoopCmps: test eax, 0000FFFFh
868 jnz LenLower
869 add edx, 2
870 shr eax, 16
871 LenLower: sub al, 1
872 adc edx, 0
873
874 ;;; Calculate the length of the match. If it is longer than MAX_MATCH,
875 ;;; then automatically accept it as the best possible match and leave.
876
877 lea eax, [edi + edx]
878 mov edi, [scan]
879 sub eax, edi
880 cmp eax, MAX_MATCH
881 jge LenMaximum
882
883 ;;; If the length of the match is not longer than the best match we
884 ;;; have so far, then forget it and return to the lookup loop.
885
886 mov edx, [deflatestate]
887 mov ebx, [bestlen]
888 cmp eax, ebx
889 jg LongerMatch
890 mov esi, [windowbestlen]
891 mov edi, [edx + dsPrev]
892 mov ebx, [scanend]
893 mov edx, [chainlenwmask]
894 jmp LookupLoop
895
896 ;;; s->match_start = cur_match;
897 ;;; best_len = len;
898 ;;; if (len >= nice_match) break;
899 ;;; scan_end = *(ushf*)(scan+best_len-1);
900
901 LongerMatch: mov ebx, [nicematch]
902 mov [bestlen], eax
903 mov [edx + dsMatchStart], ecx
904 cmp eax, ebx
905 jge LeaveNow
906 mov esi, [window]
907 add esi, eax
908 mov [windowbestlen], esi
909 movzx ebx, word ptr [edi + eax - 1]
910 mov edi, [edx + dsPrev]
911 mov [scanend], ebx
912 mov edx, [chainlenwmask]
913 jmp LookupLoop
914
915 ;;; Accept the current string, with the maximum possible length.
916
917 LenMaximum: mov edx, [deflatestate]
918 mov dword ptr [bestlen], MAX_MATCH
919 mov [edx + dsMatchStart], ecx
920
921 ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
922 ;;; return s->lookahead;
923
924 LeaveNow:
925 mov edx, [deflatestate]
926 mov ebx, [bestlen]
927 mov eax, [edx + dsLookahead]
928 cmp ebx, eax
929 jg LookaheadRet
930 mov eax, ebx
931 LookaheadRet:
932
933 ;;; Restore the stack and return from whence we came.
934
935 add esp, LocalVarsSize
936 pop ebx
937 pop esi
938 pop edi
939 pop ebp
940
941 ret
942 ; please don't remove this string !
943 ; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary!
944 db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
945
946
947 IFDEF NOOLDPENTIUMCODE
948 IFDEF NOUNDERLINE
949 longest_match endp
950 ELSE
951 _longest_match endp
952 ENDIF
953
954 IFDEF NOUNDERLINE
955 match_init proc near
956 ret
957 match_init endp
958 ELSE
959 _match_init proc near
960 ret
961 _match_init endp
962 ENDIF
963 ELSE
964 IFDEF NOUNDERLINE
965 longest_match_686 endp
966 ELSE
967 _longest_match_686 endp
968 ENDIF
969 ENDIF
970
971 _TEXT ends
972 end