Move zlib into right place
[reactos.git] / reactos / lib / 3rdparty / zlib / contrib / asm386 / gvmat32.asm
1 ;
2 ; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
3 ; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
4 ; File written by Gilles Vollant, by modifiying the longest_match
5 ; from Jean-loup Gailly in deflate.c
6 ; It need wmask == 0x7fff
7 ; (assembly code is faster with a fixed wmask)
8 ;
9 ; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK)
10 ; I compile with : "ml /coff /Zi /c gvmat32.asm"
11 ;
12
13 ;uInt longest_match_7fff(s, cur_match)
14 ; deflate_state *s;
15 ; IPos cur_match; /* current match */
16
17 NbStack equ 76
18 cur_match equ dword ptr[esp+NbStack-0]
19 str_s equ dword ptr[esp+NbStack-4]
20 ; 5 dword on top (ret,ebp,esi,edi,ebx)
21 adrret equ dword ptr[esp+NbStack-8]
22 pushebp equ dword ptr[esp+NbStack-12]
23 pushedi equ dword ptr[esp+NbStack-16]
24 pushesi equ dword ptr[esp+NbStack-20]
25 pushebx equ dword ptr[esp+NbStack-24]
26
27 chain_length equ dword ptr [esp+NbStack-28]
28 limit equ dword ptr [esp+NbStack-32]
29 best_len equ dword ptr [esp+NbStack-36]
30 window equ dword ptr [esp+NbStack-40]
31 prev equ dword ptr [esp+NbStack-44]
32 scan_start equ word ptr [esp+NbStack-48]
33 wmask equ dword ptr [esp+NbStack-52]
34 match_start_ptr equ dword ptr [esp+NbStack-56]
35 nice_match equ dword ptr [esp+NbStack-60]
36 scan equ dword ptr [esp+NbStack-64]
37
38 windowlen equ dword ptr [esp+NbStack-68]
39 match_start equ dword ptr [esp+NbStack-72]
40 strend equ dword ptr [esp+NbStack-76]
41 NbStackAdd equ (NbStack-24)
42
43 .386p
44
45 name gvmatch
46 .MODEL FLAT
47
48
49
50 ; all the +4 offsets are due to the addition of pending_buf_size (in zlib
51 ; in the deflate_state structure since the asm code was first written
52 ; (if you compile with zlib 1.0.4 or older, remove the +4).
53 ; Note : these value are good with a 8 bytes boundary pack structure
54 dep_chain_length equ 70h+4
55 dep_window equ 2ch+4
56 dep_strstart equ 60h+4
57 dep_prev_length equ 6ch+4
58 dep_nice_match equ 84h+4
59 dep_w_size equ 20h+4
60 dep_prev equ 34h+4
61 dep_w_mask equ 28h+4
62 dep_good_match equ 80h+4
63 dep_match_start equ 64h+4
64 dep_lookahead equ 68h+4
65
66
67 _TEXT segment
68
69 IFDEF NOUNDERLINE
70 public longest_match_7fff
71 ; public match_init
72 ELSE
73 public _longest_match_7fff
74 ; public _match_init
75 ENDIF
76
77 MAX_MATCH equ 258
78 MIN_MATCH equ 3
79 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
80
81
82
83 IFDEF NOUNDERLINE
84 ;match_init proc near
85 ; ret
86 ;match_init endp
87 ELSE
88 ;_match_init proc near
89 ; ret
90 ;_match_init endp
91 ENDIF
92
93
94 IFDEF NOUNDERLINE
95 longest_match_7fff proc near
96 ELSE
97 _longest_match_7fff proc near
98 ENDIF
99
100 mov edx,[esp+4]
101
102
103
104 push ebp
105 push edi
106 push esi
107 push ebx
108
109 sub esp,NbStackAdd
110
111 ; initialize or check the variables used in match.asm.
112 mov ebp,edx
113
114 ; chain_length = s->max_chain_length
115 ; if (prev_length>=good_match) chain_length >>= 2
116 mov edx,[ebp+dep_chain_length]
117 mov ebx,[ebp+dep_prev_length]
118 cmp [ebp+dep_good_match],ebx
119 ja noshr
120 shr edx,2
121 noshr:
122 ; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
123 inc edx
124 mov edi,[ebp+dep_nice_match]
125 mov chain_length,edx
126 mov eax,[ebp+dep_lookahead]
127 cmp eax,edi
128 ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
129 jae nolookaheadnicematch
130 mov edi,eax
131 nolookaheadnicematch:
132 ; best_len = s->prev_length
133 mov best_len,ebx
134
135 ; window = s->window
136 mov esi,[ebp+dep_window]
137 mov ecx,[ebp+dep_strstart]
138 mov window,esi
139
140 mov nice_match,edi
141 ; scan = window + strstart
142 add esi,ecx
143 mov scan,esi
144 ; dx = *window
145 mov dx,word ptr [esi]
146 ; bx = *(window+best_len-1)
147 mov bx,word ptr [esi+ebx-1]
148 add esi,MAX_MATCH-1
149 ; scan_start = *scan
150 mov scan_start,dx
151 ; strend = scan + MAX_MATCH-1
152 mov strend,esi
153 ; bx = scan_end = *(window+best_len-1)
154
155 ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
156 ; s->strstart - (IPos)MAX_DIST(s) : NIL;
157
158 mov esi,[ebp+dep_w_size]
159 sub esi,MIN_LOOKAHEAD
160 ; here esi = MAX_DIST(s)
161 sub ecx,esi
162 ja nodist
163 xor ecx,ecx
164 nodist:
165 mov limit,ecx
166
167 ; prev = s->prev
168 mov edx,[ebp+dep_prev]
169 mov prev,edx
170
171 ;
172 mov edx,dword ptr [ebp+dep_match_start]
173 mov bp,scan_start
174 mov eax,cur_match
175 mov match_start,edx
176
177 mov edx,window
178 mov edi,edx
179 add edi,best_len
180 mov esi,prev
181 dec edi
182 ; windowlen = window + best_len -1
183 mov windowlen,edi
184
185 jmp beginloop2
186 align 4
187
188 ; here, in the loop
189 ; eax = ax = cur_match
190 ; ecx = limit
191 ; bx = scan_end
192 ; bp = scan_start
193 ; edi = windowlen (window + best_len -1)
194 ; esi = prev
195
196
197 ;// here; chain_length <=16
198 normalbeg0add16:
199 add chain_length,16
200 jz exitloop
201 normalbeg0:
202 cmp word ptr[edi+eax],bx
203 je normalbeg2noroll
204 rcontlabnoroll:
205 ; cur_match = prev[cur_match & wmask]
206 and eax,7fffh
207 mov ax,word ptr[esi+eax*2]
208 ; if cur_match > limit, go to exitloop
209 cmp ecx,eax
210 jnb exitloop
211 ; if --chain_length != 0, go to exitloop
212 dec chain_length
213 jnz normalbeg0
214 jmp exitloop
215
216 normalbeg2noroll:
217 ; if (scan_start==*(cur_match+window)) goto normalbeg2
218 cmp bp,word ptr[edx+eax]
219 jne rcontlabnoroll
220 jmp normalbeg2
221
222 contloop3:
223 mov edi,windowlen
224
225 ; cur_match = prev[cur_match & wmask]
226 and eax,7fffh
227 mov ax,word ptr[esi+eax*2]
228 ; if cur_match > limit, go to exitloop
229 cmp ecx,eax
230 jnbexitloopshort1:
231 jnb exitloop
232 ; if --chain_length != 0, go to exitloop
233
234
235 ; begin the main loop
236 beginloop2:
237 sub chain_length,16+1
238 ; if chain_length <=16, don't use the unrolled loop
239 jna normalbeg0add16
240
241 do16:
242 cmp word ptr[edi+eax],bx
243 je normalbeg2dc0
244
245 maccn MACRO lab
246 and eax,7fffh
247 mov ax,word ptr[esi+eax*2]
248 cmp ecx,eax
249 jnb exitloop
250 cmp word ptr[edi+eax],bx
251 je lab
252 ENDM
253
254 rcontloop0:
255 maccn normalbeg2dc1
256
257 rcontloop1:
258 maccn normalbeg2dc2
259
260 rcontloop2:
261 maccn normalbeg2dc3
262
263 rcontloop3:
264 maccn normalbeg2dc4
265
266 rcontloop4:
267 maccn normalbeg2dc5
268
269 rcontloop5:
270 maccn normalbeg2dc6
271
272 rcontloop6:
273 maccn normalbeg2dc7
274
275 rcontloop7:
276 maccn normalbeg2dc8
277
278 rcontloop8:
279 maccn normalbeg2dc9
280
281 rcontloop9:
282 maccn normalbeg2dc10
283
284 rcontloop10:
285 maccn short normalbeg2dc11
286
287 rcontloop11:
288 maccn short normalbeg2dc12
289
290 rcontloop12:
291 maccn short normalbeg2dc13
292
293 rcontloop13:
294 maccn short normalbeg2dc14
295
296 rcontloop14:
297 maccn short normalbeg2dc15
298
299 rcontloop15:
300 and eax,7fffh
301 mov ax,word ptr[esi+eax*2]
302 cmp ecx,eax
303 jnb exitloop
304
305 sub chain_length,16
306 ja do16
307 jmp normalbeg0add16
308
309 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
310
311 normbeg MACRO rcontlab,valsub
312 ; if we are here, we know that *(match+best_len-1) == scan_end
313 cmp bp,word ptr[edx+eax]
314 ; if (match != scan_start) goto rcontlab
315 jne rcontlab
316 ; calculate the good chain_length, and we'll compare scan and match string
317 add chain_length,16-valsub
318 jmp iseq
319 ENDM
320
321
322 normalbeg2dc11:
323 normbeg rcontloop11,11
324
325 normalbeg2dc12:
326 normbeg short rcontloop12,12
327
328 normalbeg2dc13:
329 normbeg short rcontloop13,13
330
331 normalbeg2dc14:
332 normbeg short rcontloop14,14
333
334 normalbeg2dc15:
335 normbeg short rcontloop15,15
336
337 normalbeg2dc10:
338 normbeg rcontloop10,10
339
340 normalbeg2dc9:
341 normbeg rcontloop9,9
342
343 normalbeg2dc8:
344 normbeg rcontloop8,8
345
346 normalbeg2dc7:
347 normbeg rcontloop7,7
348
349 normalbeg2dc6:
350 normbeg rcontloop6,6
351
352 normalbeg2dc5:
353 normbeg rcontloop5,5
354
355 normalbeg2dc4:
356 normbeg rcontloop4,4
357
358 normalbeg2dc3:
359 normbeg rcontloop3,3
360
361 normalbeg2dc2:
362 normbeg rcontloop2,2
363
364 normalbeg2dc1:
365 normbeg rcontloop1,1
366
367 normalbeg2dc0:
368 normbeg rcontloop0,0
369
370
371 ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
372
373 normalbeg2:
374 mov edi,window
375
376 cmp bp,word ptr[edi+eax]
377 jne contloop3 ; if *(ushf*)match != scan_start, continue
378
379 iseq:
380 ; if we are here, we know that *(match+best_len-1) == scan_end
381 ; and (match == scan_start)
382
383 mov edi,edx
384 mov esi,scan ; esi = scan
385 add edi,eax ; edi = window + cur_match = match
386
387 mov edx,[esi+3] ; compare manually dword at match+3
388 xor edx,[edi+3] ; and scan +3
389
390 jz begincompare ; if equal, go to long compare
391
392 ; we will determine the unmatch byte and calculate len (in esi)
393 or dl,dl
394 je eq1rr
395 mov esi,3
396 jmp trfinval
397 eq1rr:
398 or dx,dx
399 je eq1
400
401 mov esi,4
402 jmp trfinval
403 eq1:
404 and edx,0ffffffh
405 jz eq11
406 mov esi,5
407 jmp trfinval
408 eq11:
409 mov esi,6
410 jmp trfinval
411
412 begincompare:
413 ; here we now scan and match begin same
414 add edi,6
415 add esi,6
416 mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes
417 repe cmpsd ; loop until mismatch
418
419 je trfin ; go to trfin if not unmatch
420 ; we determine the unmatch byte
421 sub esi,4
422 mov edx,[edi-4]
423 xor edx,[esi]
424
425 or dl,dl
426 jnz trfin
427 inc esi
428
429 or dx,dx
430 jnz trfin
431 inc esi
432
433 and edx,0ffffffh
434 jnz trfin
435 inc esi
436
437 trfin:
438 sub esi,scan ; esi = len
439 trfinval:
440 ; here we have finised compare, and esi contain len of equal string
441 cmp esi,best_len ; if len > best_len, go newbestlen
442 ja short newbestlen
443 ; now we restore edx, ecx and esi, for the big loop
444 mov esi,prev
445 mov ecx,limit
446 mov edx,window
447 jmp contloop3
448
449 newbestlen:
450 mov best_len,esi ; len become best_len
451
452 mov match_start,eax ; save new position as match_start
453 cmp esi,nice_match ; if best_len >= nice_match, exit
454 jae exitloop
455 mov ecx,scan
456 mov edx,window ; restore edx=window
457 add ecx,esi
458 add esi,edx
459
460 dec esi
461 mov windowlen,esi ; windowlen = window + best_len-1
462 mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end
463
464 ; now we restore ecx and esi, for the big loop :
465 mov esi,prev
466 mov ecx,limit
467 jmp contloop3
468
469 exitloop:
470 ; exit : s->match_start=match_start
471 mov ebx,match_start
472 mov ebp,str_s
473 mov ecx,best_len
474 mov dword ptr [ebp+dep_match_start],ebx
475 mov eax,dword ptr [ebp+dep_lookahead]
476 cmp ecx,eax
477 ja minexlo
478 mov eax,ecx
479 minexlo:
480 ; return min(best_len,s->lookahead)
481
482 ; restore stack and register ebx,esi,edi,ebp
483 add esp,NbStackAdd
484
485 pop ebx
486 pop esi
487 pop edi
488 pop ebp
489 ret
490 InfoAuthor:
491 ; please don't remove this string !
492 ; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
493 db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah
494
495
496
497 IFDEF NOUNDERLINE
498 longest_match_7fff endp
499 ELSE
500 _longest_match_7fff endp
501 ENDIF
502
503
504 IFDEF NOUNDERLINE
505 cpudetect32 proc near
506 ELSE
507 _cpudetect32 proc near
508 ENDIF
509
510
511 pushfd ; push original EFLAGS
512 pop eax ; get original EFLAGS
513 mov ecx, eax ; save original EFLAGS
514 xor eax, 40000h ; flip AC bit in EFLAGS
515 push eax ; save new EFLAGS value on stack
516 popfd ; replace current EFLAGS value
517 pushfd ; get new EFLAGS
518 pop eax ; store new EFLAGS in EAX
519 xor eax, ecx ; can\92t toggle AC bit, processor=80386
520 jz end_cpu_is_386 ; jump if 80386 processor
521 push ecx
522 popfd ; restore AC bit in EFLAGS first
523
524 pushfd
525 pushfd
526 pop ecx
527
528 mov eax, ecx ; get original EFLAGS
529 xor eax, 200000h ; flip ID bit in EFLAGS
530 push eax ; save new EFLAGS value on stack
531 popfd ; replace current EFLAGS value
532 pushfd ; get new EFLAGS
533 pop eax ; store new EFLAGS in EAX
534 popfd ; restore original EFLAGS
535 xor eax, ecx ; can\92t toggle ID bit,
536 je is_old_486 ; processor=old
537
538 mov eax,1
539 db 0fh,0a2h ;CPUID
540
541 exitcpudetect:
542 ret
543
544 end_cpu_is_386:
545 mov eax,0300h
546 jmp exitcpudetect
547
548 is_old_486:
549 mov eax,0400h
550 jmp exitcpudetect
551
552 IFDEF NOUNDERLINE
553 cpudetect32 endp
554 ELSE
555 _cpudetect32 endp
556 ENDIF
557
558 _TEXT ends
559 end