[TASKMGR] Process page: Allow using "Open File Location" functionality without runnin...
[reactos.git] / dll / opengl / mesa / asm-386.S
1 /* $Id: asm-386.S,v 1.8 1997/12/17 00:50:51 brianp Exp $ */
2
3 /*
4 * asm-386.S - special (hopefully faster) transformation functions for x86
5 *
6 * by Josh Vanderhoof
7 *
8 * This file is in the public domain.
9 */
10
11 /*
12 * $Log: asm-386.S,v $
13 * Revision 1.8 1997/12/17 00:50:51 brianp
14 * applied Josh's patch to fix texture coordinate transformation bugs
15 *
16 * Revision 1.7 1997/12/17 00:27:11 brianp
17 * applied Josh's patch to fix bfris
18 *
19 * Revision 1.6 1997/12/01 01:02:41 brianp
20 * added FreeBSD patches (Daniel J. O'Connor)
21 *
22 * Revision 1.5 1997/11/19 23:52:17 brianp
23 * added missing "cld" instruction in asm_transform_points4_identity()
24 *
25 * Revision 1.4 1997/11/11 02:22:41 brianp
26 * small change per Josh to ensure U/V pairing
27 *
28 * Revision 1.3 1997/11/07 03:37:24 brianp
29 * added missing line from Stephane Rehel
30 *
31 * Revision 1.2 1997/11/07 03:30:37 brianp
32 * added Josh's 11-5-97 patches
33 *
34 * Revision 1.1 1997/10/30 06:00:33 brianp
35 * Initial revision
36 */
37
38 #include <asm.inc>
39
40 #define S(x) dword ptr [esi + 4*x]
41 #define D(x) dword ptr [edi + 4*x]
42 #define M(x, y) dword ptr [edx + 16*x + 4*y]
43
44 .code
45
46 /*
47 * void asm_transform_points3_general( GLuint n, GLfloat d[][4],
48 * GLfloat m[16], GLfloat s[][4] );
49 */
50 PUBLIC _asm_transform_points3_general
51 _asm_transform_points3_general:
52 .align 4
53 push esi
54 push edi
55
56 mov ecx, [esp + 12] /* ecx = n */
57 mov edi, [esp + 16] /* edi = d */
58 mov edx, [esp + 20] /* edx = m */
59 mov esi, [esp + 24] /* esi = s */
60
61 test ecx, ecx
62 jz _asm_transform_points3_general_end
63
64 .align 4
65 _asm_transform_points3_general_loop:
66 fld S(0)
67 fmul M(0, 0)
68 fld S(0)
69 fmul M(0, 1)
70 fld S(0)
71 fmul M(0, 2)
72 fld S(0)
73 fmul M(0, 3)
74
75 fld S(1)
76 fmul M(1, 0)
77 fld S(1)
78 fmul M(1, 1)
79 fld S(1)
80 fmul M(1, 2)
81 fld S(1)
82 fmul M(1, 3)
83
84 /*
85 * The FPU stack should now look like this:
86 *
87 * st(7) = S(0) * M(0, 0)
88 * st(6) = S(0) * M(0, 1)
89 * st(5) = S(0) * M(0, 2)
90 * st(4) = S(0) * M(0, 3)
91 * st(3) = S(1) * M(1, 0)
92 * st(2) = S(1) * M(1, 1)
93 * st(1) = S(1) * M(1, 2)
94 * st(0) = S(1) * M(1, 3)
95 */
96
97 fxch st(3) /* 3 1 2 0 4 5 6 7 */
98 faddp st(7), st /* 1 2 0 4 5 6 7 */
99 fxch st(1) /* 2 1 0 4 5 6 7 */
100 faddp st(5), st /* 1 0 4 5 6 7 */
101 faddp st(3), st /* 0 4 5 6 7 */
102 faddp st(1), st /* 4 5 6 7 */
103
104 /*
105 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0)
106 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1)
107 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2)
108 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3)
109 */
110
111 fld S(2)
112 fmul M(2, 0)
113 fld S(2)
114 fmul M(2, 1)
115 fld S(2)
116 fmul M(2, 2)
117 fld S(2)
118 fmul M(2, 3)
119
120 /*
121 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0)
122 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1)
123 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2)
124 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3)
125 * st(3) = S(2) * M(2, 0)
126 * st(2) = S(2) * M(2, 1)
127 * st(1) = S(2) * M(2, 2)
128 * st(0) = S(2) * M(2, 3)
129 */
130
131 fxch st(3) /* 3 1 2 0 4 5 6 7 */
132 faddp st(7), st /* 1 2 0 4 5 6 7 */
133 fxch st(1) /* 2 1 0 4 5 6 7 */
134 faddp st(5), st /* 1 0 4 5 6 7 */
135 faddp st(3), st /* 0 4 5 6 7 */
136 faddp st(1), st /* 4 5 6 7 */
137
138 /*
139 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
140 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
141 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
142 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3)
143 */
144
145 fxch st(3) /* 3 1 2 0 */
146 fadd M(3, 0)
147 fxch st(2) /* 2 1 3 0 */
148 fadd M(3, 1)
149 fxch st(1) /* 1 2 3 0 */
150 fadd M(3, 2)
151 fxch st(3) /* 0 2 3 1 */
152 fadd M(3, 3)
153
154 /*
155 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + M(3, 2)
156 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + M(3, 0)
157 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + M(3, 1)
158 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) + M(3, 3)
159 */
160
161 fxch st(3) /* 3 1 2 0 */
162 fstp D(2) /* 1 2 0 */
163 fxch st(1) /* 2 1 0 */
164 fstp D(0) /* 1 0 */
165 lea esi, S(4)
166 fstp D(1) /* 0 */
167 dec ecx
168 fstp D(3) /* */
169
170 lea edi, D(4)
171
172 jnz _asm_transform_points3_general_loop
173
174 _asm_transform_points3_general_end:
175 pop edi
176 pop esi
177 ret
178
179
180 /*
181 * void asm_transform_points3_identity( GLuint n, GLfloat d[][4],
182 * GLfloat s[][4] );
183 */
184 PUBLIC _asm_transform_points3_identity
185 _asm_transform_points3_identity:
186 .align 4
187 push esi
188 push edi
189 mov ecx, [esp + 12] /* ecx = n */
190 mov edi, [esp + 16] /* edi = d */
191 mov esi, [esp + 20] /* esi = s */
192 push ebx
193 push ebp
194
195 test ecx, ecx
196 jz _asm_transform_points3_identity_end
197
198 mov ebp, HEX(3f800000)
199
200 .align 4
201 _asm_transform_points3_identity_loop:
202 mov eax, S(0)
203 mov edx, S(1)
204 mov ebx, S(2)
205 lea esi, S(4)
206 mov D(0), eax
207 mov D(1), edx
208 mov D(2), ebx
209 mov D(3), ebp
210 dec ecx
211 lea edi, D(4)
212 jnz _asm_transform_points3_identity_loop
213
214 _asm_transform_points3_identity_end:
215 pop ebp
216 pop ebx
217 pop edi
218 pop esi
219 ret
220
221
222 /*
223 * void asm_transform_points3_2d( GLuint n, GLfloat d[][4], GLfloat m[16],
224 * GLfloat s[][4] );
225 */
226 PUBLIC _asm_transform_points3_2d
227 _asm_transform_points3_2d:
228 .align 4
229 push esi
230 push edi
231 mov ecx, [esp + 12] /* ecx = n */
232 mov edi, [esp + 16] /* edi = d */
233 mov edx, [esp + 20] /* edx = m */
234 mov esi, [esp + 24] /* esi = s */
235 push ebp
236
237 mov ebp, HEX(3f800000)
238
239 test cl, DEC(1)
240 jz _asm_transform_points3_2d_step
241
242 dec ecx
243
244 fld S(0)
245 fmul M(0, 0)
246 fld S(0)
247 fmul M(0, 1)
248 fld S(1)
249 fmul M(1, 0)
250 fld S(1)
251 fmul M(1, 1)
252
253 /*
254 * st(3) = S(0) * M(0, 0)
255 * st(2) = S(0) * M(0, 1)
256 * st(1) = S(1) * M(1, 0)
257 * st(0) = S(1) * M(1, 1)
258 */
259
260 fxch st(1) /* 1 0 2 3 */
261 fadd M(3, 0)
262 fxch st(1) /* 0 1 2 3 */
263 fadd M(3, 1)
264 fxch st(1) /* 1 0 2 3 */
265 faddp st(3), st /* 0 2 3 */
266 faddp st(1), st /* 2 3 */
267 fstp D(1) /* 3 */
268 fstp D(0) /* */
269 mov eax, S(2)
270 lea esi, S(4)
271 mov D(3), ebp
272 mov D(2), eax
273 lea edi, D(4)
274
275 _asm_transform_points3_2d_step:
276 test ecx, ecx
277 jz _asm_transform_points3_2d_end
278
279 .align 4
280 _asm_transform_points3_2d_loop:
281 fld S(0)
282 fmul M(0, 0)
283 fld S(0)
284 fmul M(0, 1)
285 fld S(4)
286 fmul M(0, 0)
287 fld S(4)
288 fmul M(0, 1)
289 fld S(1)
290 fmul M(1, 0)
291 fld S(1)
292 fmul M(1, 1)
293 fld S(5)
294 fmul M(1, 0)
295 fld S(5)
296 fmul M(1, 1)
297
298 /*
299 * st(7) = S(0) * M(0, 0)
300 * st(6) = S(0) * M(0, 1)
301 * st(5) = S(4) * M(0, 0)
302 * st(4) = S(4) * M(0, 1)
303 * st(3) = S(1) * M(1, 0)
304 * st(2) = S(1) * M(1, 1)
305 * st(1) = S(5) * M(1, 0)
306 * st(0) = S(5) * M(1, 1)
307 */
308
309 fxch st(7) /* 7 1 2 3 4 5 6 0 */
310 fadd M(3, 0)
311 fxch st(6) /* 6 1 2 3 4 5 7 0 */
312 fadd M(3, 1)
313 fxch st(5) /* 5 1 2 3 4 6 7 0 */
314 fadd M(3, 0)
315 fxch st(4) /* 4 1 2 3 5 6 7 0 */
316 fadd M(3, 1)
317
318 mov eax, S(2)
319 mov D(3), ebp
320 mov D(2), eax
321 mov eax, S(6)
322 mov D(7), ebp
323 mov D(6), eax
324 lea esi, S(8)
325 sub ecx, DEC(2)
326
327 /*
328 * st(7) = S(5) * M(1, 1)
329 * st(6) = S(0) * M(0, 0) + M(3, 0)
330 * st(5) = S(0) * M(0, 1) + M(3, 1)
331 * st(4) = S(4) * M(0, 0) + M(3, 0)
332 * st(3) = S(1) * M(1, 0)
333 * st(2) = S(1) * M(1, 1)
334 * st(1) = S(5) * M(1, 0)
335 * st(0) = S(4) * M(0, 1) + M(3, 1)
336 */
337
338 faddp st(7), st /* 1 2 3 4 5 6 7 */
339 faddp st(3), st /* 2 3 4 5 6 7 */
340 faddp st(3), st /* 3 4 5 6 7 */
341 faddp st(3), st /* 4 5 6 7 */
342 fxch st(3) /* 7 5 6 4 */
343 fstp D(5) /* 5 6 4 */
344 fstp D(1) /* 6 4 */
345 fstp D(0) /* 4 */
346 fstp D(4) /* */
347
348 lea edi, D(8)
349 jnz _asm_transform_points3_2d_loop
350
351 _asm_transform_points3_2d_end:
352 pop ebp
353 pop edi
354 pop esi
355 ret
356
357
358 /*
359 * void asm_transform_points3_2d_no_rot( GLuint n, GLfloat d[][4],
360 * GLfloat m[16], GLfloat s[][4] );
361 *
362 */
363 PUBLIC _asm_transform_points3_2d_no_rot
364 _asm_transform_points3_2d_no_rot:
365 .align 4
366 push esi
367 push edi
368 mov ecx, [esp + 12] /* ecx = n */
369 mov edi, [esp + 16] /* edi = d */
370 mov edx, [esp + 20] /* edx = m */
371 mov esi, [esp + 24] /* esi = s */
372 push ebp
373
374 test ecx, ecx
375 jz _asm_transform_points3_2d_no_rot_end
376
377 mov ebp, HEX(3f800000)
378
379 .align 4
380 _asm_transform_points3_2d_no_rot_loop:
381 fld S(0)
382 fmul M(0, 0)
383 fld S(1)
384 fmul M(1, 1)
385 fxch st(1)
386 fadd M(3, 0)
387 fxch st(1)
388 fadd M(3, 1)
389 fxch st(1)
390 fstp D(0)
391 fstp D(1)
392
393 mov eax, S(2) /* cycle 1: U pipe */
394 mov D(3), ebp /* V pipe */
395 mov D(2), eax /* cycle 2: U pipe */
396
397 dec ecx
398 lea esi, S(4)
399 lea edi, D(4)
400 jnz _asm_transform_points3_2d_no_rot_loop
401
402 _asm_transform_points3_2d_no_rot_end:
403 pop ebp
404 pop edi
405 pop esi
406 ret
407
408
409
410 /*
411 * void asm_transform_points3_3d( GLuint n, GLfloat d[][4], GLfloat m[16],
412 * GLfloat s[][4] );
413 */
414 PUBLIC _asm_transform_points3_3d
415 _asm_transform_points3_3d:
416 .align 4
417 push esi
418 push edi
419 mov ecx, [esp + 12] /* ecx = n */
420 mov edi, [esp + 16] /* edi = d */
421 mov edx, [esp + 20] /* edx = m */
422 mov esi, [esp + 24] /* esi = s */
423
424 test ecx, ecx
425 jz _asm_transform_points3_3d_end
426
427 mov eax, HEX(3f800000)
428
429 .align 4
430 _asm_transform_points3_3d_loop:
431 fld S(0)
432 fmul M(0, 0)
433 fld S(0)
434 fmul M(0, 1)
435 fld S(0)
436 fmul M(0, 2)
437
438 fld S(1)
439 fmul M(1, 0)
440 fld S(1)
441 fmul M(1, 1)
442 fld S(1)
443 fmul M(1, 2)
444
445 /*
446 * st(5) = S(0) * M(0, 0)
447 * st(4) = S(0) * M(0, 1)
448 * st(3) = S(0) * M(0, 2)
449 * st(2) = S(1) * M(1, 0)
450 * st(1) = S(1) * M(1, 1)
451 * st(0) = S(1) * M(1, 2)
452 */
453
454 fxch st(2) /* 2 1 0 3 4 5 */
455 faddp st(5), st /* 1 0 3 4 5 */
456 faddp st(3), st /* 0 3 4 5 */
457 faddp st(1), st /* 3 4 5 */
458
459 /*
460 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0)
461 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1)
462 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2)
463 */
464
465 fld S(2)
466 fmul M(2, 0)
467 fld S(2)
468 fmul M(2, 1)
469 fld S(2)
470 fmul M(2, 2)
471
472 /*
473 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0)
474 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1)
475 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2)
476 * st(2) = S(2) * M(2, 0)
477 * st(1) = S(2) * M(2, 1)
478 * st(0) = S(2) * M(2, 2)
479 */
480
481 fxch st(2) /* 2 1 0 3 4 5 */
482 faddp st(5), st /* 1 0 3 4 5 */
483 faddp st(3), st /* 0 3 4 5 */
484 faddp st(1), st /* 3 4 5 */
485
486 /*
487 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
488 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
489 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
490 */
491
492 fxch st(2) /* 2 1 0 */
493 fadd M(3, 0)
494 fxch st(1) /* 1 2 0 */
495 fadd M(3, 1)
496 fxch st(2) /* 0 2 1 */
497 fadd M(3, 2)
498
499 fxch st(1) /* 2 0 1 */
500 fstp D(0) /* 0 1 */
501 fstp D(2) /* 1 */
502 fstp D(1) /* */
503 mov D(3), eax
504
505 lea esi, S(4)
506 dec ecx
507
508 lea edi, D(4)
509
510 jnz _asm_transform_points3_3d_loop
511
512 _asm_transform_points3_3d_end:
513 pop edi
514 pop esi
515 ret
516
517
518
519 /*
520 * void asm_transform_points4_general( GLuint n, GLfloat d[][4],
521 * GLfloat m[16], GLfloat s[][4] );
522 */
523 PUBLIC _asm_transform_points4_general
524 _asm_transform_points4_general:
525 .align 4
526 push esi
527 push edi
528 mov ecx, [esp + 12] /* ecx = n */
529 mov edi, [esp + 16] /* edi = d */
530 mov edx, [esp + 20] /* edx = m */
531 mov esi, [esp + 24] /* esi = s */
532
533 test ecx, ecx
534 jz _asm_transform_points4_general_end
535
536 .align 4
537 _asm_transform_points4_general_loop:
538 fld S(0)
539 fmul M(0, 0)
540 fld S(0)
541 fmul M(0, 1)
542 fld S(0)
543 fmul M(0, 2)
544 fld S(0)
545 fmul M(0, 3)
546
547 fld S(1)
548 fmul M(1, 0)
549 fld S(1)
550 fmul M(1, 1)
551 fld S(1)
552 fmul M(1, 2)
553 fld S(1)
554 fmul M(1, 3)
555
556 /*
557 * st(7) = S(0) * M(0, 0)
558 * st(6) = S(0) * M(0, 1)
559 * st(5) = S(0) * M(0, 2)
560 * st(4) = S(0) * M(0, 3)
561 * st(3) = S(1) * M(1, 0)
562 * st(2) = S(1) * M(1, 1)
563 * st(1) = S(1) * M(1, 2)
564 * st(0) = S(1) * M(1, 3)
565 */
566
567 fxch st(3) /* 3 1 2 0 4 5 6 7 */
568 faddp st(7), st /* 1 2 0 4 5 6 7 */
569 fxch st(1) /* 2 1 0 4 5 6 7 */
570 faddp st(5), st /* 1 0 4 5 6 7 */
571 faddp st(3), st /* 0 4 5 6 7 */
572 faddp st(1), st /* 4 5 6 7 */
573
574 /*
575 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0)
576 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1)
577 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2)
578 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3)
579 */
580
581 fld S(2)
582 fmul M(2, 0)
583 fld S(2)
584 fmul M(2, 1)
585 fld S(2)
586 fmul M(2, 2)
587 fld S(2)
588 fmul M(2, 3)
589
590 /*
591 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0)
592 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1)
593 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2)
594 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3)
595 * st(3) = S(2) * M(2, 0)
596 * st(2) = S(2) * M(2, 1)
597 * st(1) = S(2) * M(2, 2)
598 * st(0) = S(2) * M(2, 3)
599 */
600
601 fxch st(3) /* 3 1 2 0 4 5 6 7 */
602 faddp st(7), st /* 1 2 0 4 5 6 7 */
603 fxch st(1) /* 2 1 0 4 5 6 7 */
604 faddp st(5), st /* 1 0 4 5 6 7 */
605 faddp st(3), st /* 0 4 5 6 7 */
606 faddp st(1), st /* 4 5 6 7 */
607
608 /*
609 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
610 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
611 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
612 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3)
613 */
614
615 fld S(3)
616 fmul M(3, 0)
617 fld S(3)
618 fmul M(3, 1)
619 fld S(3)
620 fmul M(3, 2)
621 fld S(3)
622 fmul M(3, 3)
623
624 /*
625 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
626 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
627 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
628 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3)
629 * st(3) = S(3) * M(3, 0)
630 * st(2) = S(3) * M(3, 1)
631 * st(1) = S(3) * M(3, 2)
632 * st(0) = S(3) * M(3, 3)
633 */
634
635 fxch st(3) /* 3 1 2 0 4 5 6 7 */
636 faddp st(7), st /* 1 2 0 4 5 6 7 */
637 fxch st(1) /* 2 1 0 4 5 6 7 */
638 faddp st(5), st /* 1 0 4 5 6 7 */
639 faddp st(3), st /* 0 4 5 6 7 */
640
641 lea esi, S(4)
642 dec ecx
643
644 faddp st(1), st /* 4 5 6 7 */
645
646 /*
647 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + S(3) * M(3, 0)
648 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + S(3) * M(3, 1)
649 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + S(3) * M(3, 2)
650 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) + S(3) * M(3, 3)
651 */
652
653 fxch st(3) /* 3 1 2 0 */
654 fstp D(0) /* 1 2 0 */
655 fxch st(1) /* 2 1 0 */
656 fstp D(1) /* 1 0 */
657 fstp D(2) /* 0 */
658 fstp D(3) /* */
659
660 lea edi, D(4)
661
662 jnz _asm_transform_points4_general_loop
663
664 _asm_transform_points4_general_end:
665 pop edi
666 pop esi
667 ret
668
669
670
671 /*
672 * void asm_transform_points4_identity( GLuint n, GLfloat d[][4],
673 * GLfloat s[][4] );
674 */
675 PUBLIC _asm_transform_points4_identity
676 _asm_transform_points4_identity:
677 .align 4
678 push esi
679 push edi
680 mov ecx, [esp + 12] /* ecx = n */
681 mov edi, [esp + 16] /* edi = d */
682 mov esi, [esp + 20] /* esi = s */
683
684 lea ecx, [ecx * 4]
685
686 cld
687 rep movsd
688
689 pop edi
690 pop esi
691 ret
692
693
694
695 /*
696 * void asm_transform_points4_2d( GLuint n, GLfloat d[][4], GLfloat m[16],
697 * GLfloat s[][4] );
698 */
699 PUBLIC _asm_transform_points4_2d
700 _asm_transform_points4_2d:
701 .align 4
702 push esi
703 push edi
704 mov ecx, [esp + 12] /* ecx = n */
705 mov edi, [esp + 16] /* edi = d */
706 mov edx, [esp + 20] /* edx = m */
707 mov esi, [esp + 24] /* esi = s */
708
709 test ecx, ecx
710 jz _asm_transform_points4_2d_end
711
712 push ebx
713
714 .align 4
715 _asm_transform_points4_2d_loop:
716 fld S(0)
717 fmul M(0, 0)
718 fld S(0)
719 fmul M(0, 1)
720 fld S(1)
721 fmul M(1, 0)
722 fld S(1)
723 fmul M(1, 1)
724 fld S(3)
725 fmul M(3, 0)
726 fld S(3)
727 fmul M(3, 1)
728
729 /*
730 * st(5) = S(0) * M(0, 0)
731 * st(4) = S(0) * M(0, 1)
732 * st(3) = S(1) * M(1, 0)
733 * st(2) = S(1) * M(1, 1)
734 * st(1) = S(3) * M(3, 0)
735 * st(0) = S(3) * M(3, 1)
736 */
737
738 mov eax, S(2)
739 mov ebx, S(3)
740 lea esi, S(4)
741 dec ecx
742 mov D(2), eax
743 mov D(3), ebx
744 faddp st(4), st
745 faddp st(4), st
746 faddp st(2), st
747 faddp st(2), st
748 fstp D(1)
749 fstp D(0)
750 lea edi, D(4)
751 jnz _asm_transform_points4_2d_loop
752
753 pop ebx
754
755 _asm_transform_points4_2d_end:
756 pop edi
757 pop esi
758 ret
759
760
761
762 /*
763 * void asm_transform_points4_2d_no_rot( GLuint n, GLfloat d[][4],
764 * GLfloat m[16], GLfloat s[][4] );
765 */
766 PUBLIC _asm_transform_points4_2d_no_rot
767 _asm_transform_points4_2d_no_rot:
768 .align 4
769 push esi
770 push edi
771 mov ecx, [esp + 12] /* ecx = n */
772 mov edi, [esp + 16] /* edi = d */
773 mov edx, [esp + 20] /* edx = m */
774 mov esi, [esp + 24] /* esi = s */
775
776 test ecx, ecx
777 jz _asm_transform_points4_2d_no_rot_end
778 push ebx
779
780 .align 4
781 _asm_transform_points4_2d_no_rot_loop:
782 fld S(0)
783 fmul M(0, 0)
784 fld S(1)
785 fmul M(1, 1)
786 fld S(3)
787 fmul M(3, 0)
788 fld S(3)
789 fmul M(3, 1)
790 mov eax, S(2)
791 mov ebx, S(3)
792 lea esi, S(4)
793 dec ecx
794 mov D(2), eax
795 mov D(3), ebx
796 faddp st(2), st
797 faddp st(2), st
798 fstp D(1)
799 fstp D(0)
800 lea edi, D(4)
801 jnz _asm_transform_points4_2d_no_rot_loop
802
803 pop ebx
804
805 _asm_transform_points4_2d_no_rot_end:
806 pop edi
807 pop esi
808 ret
809
810
811
812 /*
813 * void asm_transform_points4_3d( GLuint n, GLfloat d[][4], GLfloat m[16],
814 * GLfloat s[][4] );
815 */
816 PUBLIC _asm_transform_points4_3d
817 _asm_transform_points4_3d:
818 .align 4
819 push esi
820 push edi
821 mov ecx, [esp + 12] /* ecx = n */
822 mov edi, [esp + 16] /* edi = d */
823 mov edx, [esp + 20] /* edx = m */
824 mov esi, [esp + 24] /* esi = s */
825
826 test ecx, ecx
827 jz _asm_transform_points4_3d_end
828
829 .align 4
830 _asm_transform_points4_3d_loop:
831 fld S(3)
832
833 fld S(0)
834 fmul M(0, 0)
835 fld S(0)
836 fmul M(0, 1)
837 fld S(0)
838 fmul M(0, 2)
839
840 fld S(1)
841 fmul M(1, 0)
842 fld S(1)
843 fmul M(1, 1)
844 fld S(1)
845 fmul M(1, 2)
846
847 /*
848 * st(5) = S(0) * M(0, 0)
849 * st(4) = S(0) * M(0, 1)
850 * st(3) = S(0) * M(0, 2)
851 * st(2) = S(1) * M(1, 0)
852 * st(1) = S(1) * M(1, 1)
853 * st(0) = S(1) * M(1, 2)
854 */
855
856 fxch st(2) /* 2 1 0 3 4 5 */
857 faddp st(5), st /* 1 0 3 4 5 */
858 faddp st(3), st /* 0 3 4 5 */
859 faddp st(1), st /* 3 4 5 */
860
861 /*
862 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0)
863 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1)
864 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2)
865 */
866
867 fld S(2)
868 fmul M(2, 0)
869 fld S(2)
870 fmul M(2, 1)
871 fld S(2)
872 fmul M(2, 2)
873
874 /*
875 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0)
876 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1)
877 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2)
878 * st(2) = S(2) * M(2, 0)
879 * st(1) = S(2) * M(2, 1)
880 * st(0) = S(2) * M(2, 2)
881 */
882
883 fxch st(2) /* 2 1 0 3 4 5 */
884 faddp st(5), st /* 1 0 3 4 5 */
885 faddp st(3), st /* 0 3 4 5 */
886 faddp st(1), st /* 3 4 5 */
887
888 /*
889 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
890 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
891 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
892 */
893
894 fld S(3)
895 fmul M(3, 0)
896 fld S(3)
897 fmul M(3, 1)
898 fld S(3)
899 fmul M(3, 2)
900
901 /*
902 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
903 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
904 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
905 * st(2) = S(3) * M(3, 0)
906 * st(1) = S(3) * M(3, 1)
907 * st(0) = S(3) * M(3, 2)
908 */
909
910 fxch st(2) /* 2 1 0 3 4 5 */
911 faddp st(5), st /* 1 0 3 4 5 */
912 faddp st(3), st /* 0 3 4 5 */
913
914 lea esi, S(4)
915 dec ecx
916
917 faddp st(1), st /* 3 4 5 */
918
919 /*
920 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + S(3) * M(3, 0)
921 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + S(3) * M(3, 1)
922 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + S(3) * M(3, 2)
923 */
924
925 fxch st(2) /* 2 1 0 */
926 fstp D(0) /* 1 0 */
927 fstp D(1) /* 0 */
928 fstp D(2) /* */
929 fstp D(3)
930
931 lea edi, D(4)
932
933 jnz _asm_transform_points4_3d_loop
934
935 _asm_transform_points4_3d_end:
936 pop edi
937 pop esi
938 ret
939
940 /*
941 * void asm_transform_points4_ortho( GLuint n, GLfloat d[][4],
942 * GLfloat m[16], GLfloat s[][4] );
943 */
944 PUBLIC _asm_transform_points4_ortho
945 _asm_transform_points4_ortho:
946 .align 4
947 push esi
948 push edi
949 mov ecx, [esp + 12] /* ecx = n */
950 mov edi, [esp + 16] /* edi = d */
951 mov edx, [esp + 20] /* edx = m */
952 mov esi, [esp + 24] /* esi = s */
953
954 test ecx, ecx
955 jz _asm_transform_points4_ortho_end
956
957 .align 4
958 _asm_transform_points4_ortho_loop:
959 fld S(0)
960 fmul M(0, 0)
961 fld S(1)
962 fmul M(1, 1)
963 fld S(2)
964 fmul M(2, 2)
965
966 fld S(3)
967 fmul M(3, 0)
968 fld S(3)
969 fmul M(3, 1)
970 fld S(3)
971 fmul M(3, 2)
972
973 mov eax, S(3)
974 lea esi, S(4)
975 dec ecx
976 mov D(3), eax
977
978 faddp st(3), st
979 faddp st(3), st
980 faddp st(3), st
981
982 fstp D(2)
983 fstp D(1)
984 fstp D(0)
985
986 lea edi, D(4)
987 jnz _asm_transform_points4_ortho_loop
988
989 _asm_transform_points4_ortho_end:
990 pop edi
991 pop esi
992 ret
993
994 /*
995 * void asm_transform_points4_perspective( GLuint n, GLfloat d[][4],
996 * GLfloat m[16], GLfloat s[][4] );
997 */
998 PUBLIC _asm_transform_points4_perspective
999 _asm_transform_points4_perspective:
1000 .align 4
1001 push esi
1002 push edi
1003 mov ecx, [esp + 12] /* ecx = n */
1004 mov edi, [esp + 16] /* edi = d */
1005 mov edx, [esp + 20] /* edx = m */
1006 mov esi, [esp + 24] /* esi = s */
1007
1008 test ecx, ecx
1009 jz _asm_transform_points4_perspective_end
1010
1011 .align 4
1012 _asm_transform_points4_perspective_loop:
1013 fld S(0)
1014 fmul M(0, 0)
1015 fld S(1)
1016 fmul M(1, 1)
1017 fld S(2)
1018 fmul M(2, 2)
1019
1020 fld S(2)
1021 fmul M(2, 0)
1022 fld S(2)
1023 fmul M(2, 1)
1024 fld S(3)
1025 fmul M(3, 2)
1026
1027 mov eax, S(2)
1028 lea esi, S(4)
1029 xor eax, HEX(80000000)
1030 dec ecx
1031
1032 faddp st(3), st
1033 faddp st(3), st
1034 faddp st(3), st
1035
1036 fstp D(2)
1037 fstp D(1)
1038 fstp D(0)
1039
1040 mov D(3), eax
1041 lea edi, D(4)
1042 jnz _asm_transform_points4_perspective_loop
1043
1044 _asm_transform_points4_perspective_end:
1045 pop edi
1046 pop esi
1047 ret
1048
1049
1050
1051 /*
1052 * Table for clip test.
1053 *
1054 * bit6 = S(3) < 0
1055 * bit5 = S(2) < 0
1056 * bit4 = abs(S(2)) > abs(S(3))
1057 * bit3 = S(1) < 0
1058 * bit2 = abs(S(1)) > abs(S(3))
1059 * bit1 = S(0) < 0
1060 * bit0 = abs(S(0)) > abs(S(3))
1061 */
1062
1063 /* Vertex buffer clipping flags (from vb.h) */
1064 #if 0
1065
1066 #define CLIP_RIGHT_BIT 0x01
1067 #define CLIP_LEFT_BIT 0x02
1068 #define CLIP_TOP_BIT 0x04
1069 #define CLIP_BOTTOM_BIT 0x08
1070 #define CLIP_NEAR_BIT 0x10
1071 #define CLIP_FAR_BIT 0x20
1072 #define CLIP_USER_BIT 0x40
1073 #define CLIP_ALL_BITS 0x3f
1074
1075 #define MAGN_X(i) (~(((i) & 1) - 1))
1076 #define SIGN_X(i) (~((((i) >> 1) & 1) - 1))
1077 #define MAGN_Y(i) (~((((i) >> 2) & 1) - 1))
1078 #define SIGN_Y(i) (~((((i) >> 3) & 1) - 1))
1079 #define MAGN_Z(i) (~((((i) >> 4) & 1) - 1))
1080 #define SIGN_Z(i) (~((((i) >> 5) & 1) - 1))
1081 #define SIGN_W(i) (~((((i) >> 6) & 1) - 1))
1082
1083 #define CLIP_VALUE(i) \
1084 (CLIP_RIGHT_BIT \
1085 & ((~SIGN_X(i) & SIGN_W(i)) \
1086 | (~SIGN_X(i) & ~SIGN_W(i) & MAGN_X(i)) \
1087 | (SIGN_X(i) & SIGN_W(i) & ~MAGN_X(i)))) \
1088 | (CLIP_LEFT_BIT \
1089 & ((SIGN_X(i) & SIGN_W(i)) \
1090 | (~SIGN_X(i) & SIGN_W(i) & ~MAGN_X(i)) \
1091 | (SIGN_X(i) & ~SIGN_W(i) & MAGN_X(i)))) \
1092 | (CLIP_TOP_BIT \
1093 & ((~SIGN_Y(i) & SIGN_W(i)) \
1094 | (~SIGN_Y(i) & ~SIGN_W(i) & MAGN_Y(i)) \
1095 | (SIGN_Y(i) & SIGN_W(i) & ~MAGN_Y(i)))) \
1096 | (CLIP_BOTTOM_BIT \
1097 & ((SIGN_Y(i) & SIGN_W(i)) \
1098 | (~SIGN_Y(i) & SIGN_W(i) & ~MAGN_Y(i)) \
1099 | (SIGN_Y(i) & ~SIGN_W(i) & MAGN_Y(i)))) \
1100 | (CLIP_FAR_BIT \
1101 & ((~SIGN_Z(i) & SIGN_W(i)) \
1102 | (~SIGN_Z(i) & ~SIGN_W(i) & MAGN_Z(i)) \
1103 | (SIGN_Z(i) & SIGN_W(i) & ~MAGN_Z(i)))) \
1104 | (CLIP_NEAR_BIT \
1105 & ((SIGN_Z(i) & SIGN_W(i)) \
1106 | (~SIGN_Z(i) & SIGN_W(i) & ~MAGN_Z(i)) \
1107 | (SIGN_Z(i) & ~SIGN_W(i) & MAGN_Z(i))))
1108
1109 #define CLIP_VALUE8(i) \
1110 CLIP_VALUE(i + 0), CLIP_VALUE(i + 1), CLIP_VALUE(i + 2), CLIP_VALUE(i + 3), \
1111 CLIP_VALUE(i + 4), CLIP_VALUE(i + 5), CLIP_VALUE(i + 6), CLIP_VALUE(i + 7)
1112
1113 .rodata
1114
1115 clip_table:
1116 .byte CLIP_VALUE8(0x00)
1117 .byte CLIP_VALUE8(0x08)
1118 .byte CLIP_VALUE8(0x10)
1119 .byte CLIP_VALUE8(0x18)
1120 .byte CLIP_VALUE8(0x20)
1121 .byte CLIP_VALUE8(0x28)
1122 .byte CLIP_VALUE8(0x30)
1123 .byte CLIP_VALUE8(0x38)
1124 .byte CLIP_VALUE8(0x40)
1125 .byte CLIP_VALUE8(0x48)
1126 .byte CLIP_VALUE8(0x50)
1127 .byte CLIP_VALUE8(0x58)
1128 .byte CLIP_VALUE8(0x60)
1129 .byte CLIP_VALUE8(0x68)
1130 .byte CLIP_VALUE8(0x70)
1131 .byte CLIP_VALUE8(0x78)
1132 #else
1133
1134 .const
1135 ASSUME NOTHING
1136
1137 clip_table:
1138 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(4), HEX(5), HEX(4), HEX(6)
1139 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(8), HEX(9), HEX(8), HEX(a)
1140 .byte HEX(20), HEX(21), HEX(20), HEX(22), HEX(24), HEX(25), HEX(24), HEX(26)
1141 .byte HEX(20), HEX(21), HEX(20), HEX(22), HEX(28), HEX(29), HEX(28), HEX(2a)
1142 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(4), HEX(5), HEX(4), HEX(6)
1143 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(8), HEX(9), HEX(8), HEX(a)
1144 .byte HEX(10), HEX(11), HEX(10), HEX(12), HEX(14), HEX(15), HEX(14), HEX(16)
1145 .byte HEX(10), HEX(11), HEX(10), HEX(12), HEX(18), HEX(19), HEX(18), HEX(1a)
1146 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(37), HEX(35), HEX(37), HEX(36)
1147 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(3b), HEX(39), HEX(3b), HEX(3a)
1148 .byte HEX(2f), HEX(2d), HEX(2f), HEX(2e), HEX(27), HEX(25), HEX(27), HEX(26)
1149 .byte HEX(2f), HEX(2d), HEX(2f), HEX(2e), HEX(2b), HEX(29), HEX(2b), HEX(2a)
1150 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(37), HEX(35), HEX(37), HEX(36)
1151 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(3b), HEX(39), HEX(3b), HEX(3a)
1152 .byte HEX(1f), HEX(1d), HEX(1f), HEX(1e), HEX(17), HEX(15), HEX(17), HEX(16)
1153 .byte HEX(1f), HEX(1d), HEX(1f), HEX(1e), HEX(1b), HEX(19), HEX(1b), HEX(1a)
1154
1155 #endif
1156
1157 .code
1158
1159 /*
1160 * cliptest -
1161 *
1162 * inputs:
1163 * ecx = # points
1164 * esi = points
1165 * edi = clipmask[]
1166 *
1167 * inputs/outputs:
1168 * al = ormask
1169 * ah = andmask
1170 */
1171
1172 cliptest:
1173 test ecx, ecx
1174 jz cliptest_end
1175
1176 push ebp
1177 push ebx
1178
1179 .align 4
1180 cliptest_loop:
1181 mov ebp, S(3)
1182 mov ebx, S(2)
1183
1184 xor edx, edx
1185 add ebp, ebp /* %ebp = abs(S(3))*2 ; carry = sign of S(3) */
1186
1187 adc edx, edx
1188 add ebx, ebx /* %ebx = abs(S(2))*2 ; carry = sign of S(2) */
1189
1190 adc edx, edx
1191 cmp ebp, ebx /* carry = abs(S(2))*2 > abs(S(3))*2 */
1192
1193 adc edx, edx
1194 mov ebx, S(1)
1195
1196 add ebx, ebx /* %ebx = abs(S(1))*2 ; carry = sign of S(1) */
1197
1198 adc edx, edx
1199 cmp ebp, ebx /* carry = abs(S(1))*2 > abs(S(3))*2 */
1200
1201 adc edx, edx
1202 mov ebx, S(0)
1203
1204 add ebx, ebx /* %ebx = abs(S(0))*2 ; carry = sign of S(0) */
1205
1206 adc edx, edx
1207 cmp ebp, ebx /* carry = abs(S(0))*2 > abs(S(3))*2 */
1208
1209 adc edx, edx
1210
1211 lea esi, S(4)
1212
1213 mov bl, byte ptr [edi]
1214 mov dl, byte ptr [clip_table + edx]
1215
1216 or bl, dl
1217 or al, dl
1218
1219 and ah, dl
1220 mov [edi], bl
1221
1222 inc edi
1223 dec ecx
1224
1225 jnz cliptest_loop
1226
1227 pop ebx
1228 pop ebp
1229 cliptest_end:
1230 ret
1231
1232 /*
1233 * void asm_project_and_cliptest_general( GLuint n, GLfloat d[][4], GLfloat m[16],
1234 * GLfloat s[][4], GLubyte clipmask[],
1235 * GLubyte *ormask, GLubyte *andmask );
1236 */
1237 PUBLIC _asm_project_and_cliptest_general
1238 _asm_project_and_cliptest_general:
1239 .align 4
1240 push esi
1241 push edi
1242 mov ecx, [esp + 12] /* ecx = n */
1243 mov edi, [esp + 16] /* edi = d */
1244 mov edx, [esp + 20] /* edx = m */
1245 mov esi, [esp + 24] /* esi = s */
1246
1247 push esi
1248 push edx
1249 push edi
1250 push ecx
1251 call _asm_transform_points4_general
1252 add esp, DEC(16)
1253
1254 mov edi, [esp + 32] /* ormask */
1255 mov esi, [esp + 36] /* andmask */
1256 mov al, [edi]
1257 mov ah, [esi]
1258
1259 mov ecx, [esp + 12] /* ecx = n */
1260 mov edi, [esp + 28] /* edi = clipmask */
1261 mov esi, [esp + 16] /* esi = d */
1262
1263 call cliptest
1264
1265 mov edi, [esp + 32] /* ormask */
1266 mov esi, [esp + 36] /* andmask */
1267 mov [edi], al
1268 mov [esi], ah
1269
1270 pop edi
1271 pop esi
1272 ret
1273
1274
1275 /*
1276 * void asm_project_and_cliptest_identity( GLuint n, GLfloat d[][4],
1277 * GLfloat s[][4], GLubyte clipmask[],
1278 * GLubyte *ormask, GLubyte *andmask );
1279 */
1280 PUBLIC _asm_project_and_cliptest_identity
1281 _asm_project_and_cliptest_identity:
1282 .align 4
1283 push esi
1284 push edi
1285 mov ecx, [esp + 12] /* ecx = n */
1286 mov edi, [esp + 16] /* edi = d */
1287 mov esi, [esp + 20] /* esi = s */
1288
1289 push esi
1290 push edi
1291 push ecx
1292
1293 call _asm_transform_points4_identity
1294
1295 add esp, DEC(12)
1296
1297 mov edi, [esp + 28] /* ormask */
1298 mov esi, [esp + 32] /* andmask */
1299 mov al, [edi]
1300 mov ah, [esi]
1301
1302 mov ecx, [esp + 12] /* ecx = n */
1303 mov edi, [esp + 24] /* edi = clipmask */
1304 mov esi, [esp + 16] /* esi = d */
1305
1306 call cliptest
1307
1308 mov edi, [esp + 28] /* ormask */
1309 mov esi, [esp + 32] /* andmask */
1310 mov [edi], al
1311 mov [esi], ah
1312
1313 pop edi
1314 pop esi
1315 ret
1316
1317 /*
1318 * void asm_project_and_cliptest_ortho( GLuint n, GLfloat d[][4], GLfloat m[16],
1319 * GLfloat s[][4], GLubyte clipmask[],
1320 * GLubyte *ormask, GLubyte *andmask );
1321 */
1322 PUBLIC _asm_project_and_cliptest_ortho
1323 _asm_project_and_cliptest_ortho:
1324 .align 4
1325 push esi
1326 push edi
1327 mov ecx, [esp + 12] /* ecx = n */
1328 mov edi, [esp + 16] /* edi = d */
1329 mov edx, [esp + 20] /* edx = m */
1330 mov esi, [esp + 24] /* esi = s */
1331
1332 push esi
1333 push edx
1334 push edi
1335 push ecx
1336
1337 call _asm_transform_points4_ortho
1338
1339 add esp, DEC(16)
1340
1341 mov edi, [esp + 32] /* ormask */
1342 mov esi, [esp + 36] /* andmask */
1343 mov al, [edi]
1344 mov ah, [esi]
1345
1346 mov ecx, [esp + 12] /* ecx = n */
1347 mov edi, [esp + 28] /* edi = clipmask */
1348 mov esi, [esp + 16] /* esi = d */
1349
1350 call cliptest
1351
1352 mov edi, [esp + 32] /* ormask */
1353 mov esi, [esp + 36] /* andmask */
1354 mov [edi], al
1355 mov [esi], ah
1356
1357 pop edi
1358 pop esi
1359 ret
1360
1361 /*
1362 * void asm_project_and_cliptest_perspective( GLuint n, GLfloat d[][4], GLfloat m[16],
1363 * GLfloat s[][4], GLubyte clipmask[],
1364 * GLubyte *ormask, GLubyte *andmask );
1365 */
1366 PUBLIC _asm_project_and_cliptest_perspective
1367 _asm_project_and_cliptest_perspective:
1368 .align 4
1369 push esi
1370 push edi
1371 mov ecx, [esp + 12] /* ecx = n */
1372 mov edi, [esp + 16] /* edi = d */
1373 mov edx, [esp + 20] /* edx = m */
1374 mov esi, [esp + 24] /* esi = s */
1375
1376 push esi
1377 push edx
1378 push edi
1379 push ecx
1380
1381 call _asm_transform_points4_perspective
1382
1383 add esp, DEC(16)
1384
1385 mov edi, [esp + 32] /* ormask */
1386 mov esi, [esp + 36] /* andmask */
1387 mov al, [edi]
1388 mov ah, [esi]
1389
1390 mov ecx, [esp + 12] /* ecx = n */
1391 mov edi, [esp + 28] /* edi = clipmask */
1392 mov esi, [esp + 16] /* esi = d */
1393
1394 call cliptest
1395
1396 mov edi, [esp + 32] /* ormask */
1397 mov esi, [esp + 36] /* andmask */
1398 mov byte ptr [edi], al
1399 mov byte ptr [esi], ah
1400
1401 pop edi
1402 pop esi
1403 ret
1404
1405
1406 /*
1407 * unsigned int inverse_nofp( float f );
1408 *
1409 * Calculate the inverse of a float without using the FPU.
1410 * This function returns a float in eax, so it's return
1411 * type should be 'int' when called from C (and converted
1412 * to float with pointer/union abuse).
1413 */
1414 .align 4
1415 inverse_nofp:
1416
1417 /* get mantissa in eax */
1418 mov ecx, [esp + 4]
1419 and ecx, HEX(7fffff)
1420
1421 /* set implicit integer */
1422 or ecx, HEX(800000)
1423
1424 /* div 0x10000:0x00000000 by mantissa */
1425 xor eax, eax
1426 mov edx, HEX(10000)
1427
1428 div ecx
1429
1430 /* round result */
1431 shr eax, DEC(1)
1432 adc eax, DEC(0)
1433
1434 /* get exponent in ecx */
1435 mov ecx, HEX(7f800000)
1436 mov edx, [esp + 4]
1437 and ecx, edx
1438
1439 /* negate exponent and decrement it */
1440 mov edx, HEX(7E800000)
1441 sub edx, ecx
1442
1443 /* if bit 24 is set, shift and adjust exponent */
1444 test eax, HEX(1000000)
1445 jz inverse_nofp_combine
1446
1447 shr eax, HEX(1)
1448 add edx, HEX(800000)
1449
1450 /* combine mantissa and exponent, then set sign */
1451 inverse_nofp_combine:
1452 and eax, HEX(7fffff)
1453 mov ecx, [esp + 4]
1454 or eax, edx
1455 and ecx, HEX(80000000)
1456 or eax, ecx
1457
1458 ret
1459
1460
1461 /*
1462 * void gl_xform_normals_3fv( GLuint n, GLfloat d[][4], GLfloat m[16],
1463 * GLfloat s[][4], GLboolean normalize );
1464 */
1465 PUBLIC _gl_xform_normals_3fv
1466 _gl_xform_normals_3fv:
1467 .align 4
1468 push esi
1469 push edi
1470 mov ecx, [esp + 12] /* ecx = n */
1471 mov edi, [esp + 16] /* edi = d */
1472 mov edx, [esp + 20] /* edx = m */
1473 mov esi, [esp + 24] /* esi = s */
1474
1475 test ecx, ecx
1476 jz _gl_xform_normals_3fv_end
1477
1478 .align 4
1479 _gl_xform_normals_3fv_loop:
1480 fld S(0)
1481 fmul M(0, 0)
1482 fld S(0)
1483 fmul M(1, 0)
1484 fld S(0)
1485 fmul M(2, 0)
1486
1487 fld S(1)
1488 fmul M(0, 1)
1489 fld S(1)
1490 fmul M(1, 1)
1491 fld S(1)
1492 fmul M(2, 1)
1493
1494 /*
1495 * st(5) = S(0) * M(0, 0)
1496 * st(4) = S(0) * M(1, 0)
1497 * st(3) = S(0) * M(2, 0)
1498 * st(2) = S(1) * M(0, 1)
1499 * st(1) = S(1) * M(1, 1)
1500 * st(0) = S(1) * M(2, 1)
1501 */
1502
1503 fxch st(2) /* 2 1 0 3 4 5 */
1504 faddp st(5), st /* 1 0 3 4 5 */
1505 faddp st(3), st /* 0 3 4 5 */
1506 faddp st(1), st /* 3 4 5 */
1507
1508 /*
1509 * st(2) = S(0) * M(0, 0) + S(1) * M(0, 1)
1510 * st(1) = S(0) * M(1, 0) + S(1) * M(1, 1)
1511 * st(0) = S(0) * M(2, 0) + S(1) * M(2, 1)
1512 */
1513
1514 fld S(2)
1515 fmul M(0, 2)
1516 fld S(2)
1517 fmul M(1, 2)
1518 fld S(2)
1519 fmul M(2, 2)
1520
1521 /*
1522 * st(5) = S(0) * M(0, 0) + S(1) * M(0, 1)
1523 * st(4) = S(0) * M(1, 0) + S(1) * M(1, 1)
1524 * st(3) = S(0) * M(2, 0) + S(1) * M(2, 1)
1525 * st(2) = S(2) * M(0, 2)
1526 * st(1) = S(2) * M(1, 2)
1527 * st(0) = S(2) * M(2, 2)
1528 */
1529
1530 fxch st(2) /* 2 1 0 3 4 5 */
1531 faddp st(5), st /* 1 0 3 4 5 */
1532 faddp st(3), st /* 0 3 4 5 */
1533 faddp st(1), st /* 3 4 5 */
1534
1535 /*
1536 * st(2) = S(0) * M(0, 0) + S(1) * M(0, 1) + S(2) * M(0, 2)
1537 * st(1) = S(0) * M(1, 0) + S(1) * M(1, 1) + S(2) * M(1, 2)
1538 * st(0) = S(0) * M(2, 0) + S(1) * M(2, 1) + S(2) * M(2, 2)
1539 */
1540
1541 fxch st(2) /* 2 1 0 */
1542 fstp D(0) /* 1 0 */
1543 fstp D(1) /* 0 */
1544 fstp D(2) /* */
1545
1546 lea esi, S(3)
1547
1548 dec ecx
1549 lea edi, D(3)
1550
1551 jnz _gl_xform_normals_3fv_loop
1552
1553 /*
1554 * Skip normalize if it isn't needed
1555 */
1556 cmp dword ptr [esp + 28], DEC(0)
1557 jz _gl_xform_normals_3fv_end
1558
1559 /* Normalize required */
1560
1561 mov esi, [esp + 12] /* esi = n */
1562 mov edi, [esp + 16] /* edi = d */
1563
1564 sub esp, DEC(4) /* temp var for 1.0 / len */
1565
1566 /*
1567 * (%esp) = length of first normal
1568 */
1569 fld D(0)
1570 fmul D(0)
1571 fld D(1)
1572 fmul D(1)
1573 fld D(2)
1574 fmul D(2)
1575 fxch st(2)
1576 faddp st(1), st
1577 faddp st(1), st
1578 fsqrt
1579 fstp dword ptr [esp]
1580
1581 jmp _gl_xform_normals_3fv_loop2_end
1582
1583 .align 4
1584 _gl_xform_normals_3fv_loop2:
1585 /* %st(0) = length of next normal */
1586 fld D(3)
1587 fmul D(3)
1588 fld D(4)
1589 fmul D(4)
1590 fld D(5)
1591 fmul D(5)
1592 fxch st(2)
1593 faddp st(1), st
1594 faddp st(1), st
1595 fsqrt
1596
1597 /*
1598 * inverse the length of the current normal, which is
1599 * already at (%esp). This should overlap the prev
1600 * fsqrt nicely.
1601 */
1602 call inverse_nofp
1603 mov [esp], eax
1604
1605 /* multiply normal by 1/len */
1606 fld D(0)
1607 fmul dword ptr [esp]
1608 fld D(1)
1609 fmul dword ptr [esp]
1610 fld D(2)
1611 fmul dword ptr [esp]
1612 fxch st(3)
1613 fstp dword ptr [esp] /* store length of next normal */
1614 fstp D(1)
1615 fstp D(0)
1616 fstp D(2)
1617 lea edi, D(3)
1618
1619 _gl_xform_normals_3fv_loop2_end:
1620 dec esi
1621 jnz _gl_xform_normals_3fv_loop2
1622
1623 /* finish up the last normal */
1624 call inverse_nofp
1625 mov [esp], eax
1626 fld D(0)
1627 fmul dword ptr [esp]
1628 fld D(1)
1629 fmul dword ptr [esp]
1630 fld D(2)
1631 fmul dword ptr [esp]
1632 fxch st(2)
1633 fstp D(0)
1634 fstp D(1)
1635 fstp D(2)
1636
1637 add esp, DEC(4)
1638
1639 _gl_xform_normals_3fv_end:
1640 pop edi
1641 pop esi
1642 ret
1643
1644 END