1 /* $Id: asm-386.S,v 1.8 1997/12/17 00:50:51 brianp Exp $ */
4 * asm-386.S - special (hopefully faster) transformation functions for x86
8 * This file is in the public domain.
13 * Revision 1.8 1997/12/17 00:50:51 brianp
14 * applied Josh's patch to fix texture coordinate transformation bugs
16 * Revision 1.7 1997/12/17 00:27:11 brianp
17 * applied Josh's patch to fix bfris
19 * Revision 1.6 1997/12/01 01:02:41 brianp
20 * added FreeBSD patches (Daniel J. O'Connor)
22 * Revision 1.5 1997/11/19 23:52:17 brianp
23 * added missing "cld" instruction in asm_transform_points4_identity()
25 * Revision 1.4 1997/11/11 02:22:41 brianp
26 * small change per Josh to ensure U/V pairing
28 * Revision 1.3 1997/11/07 03:37:24 brianp
29 * added missing line from Stephane Rehel
31 * Revision 1.2 1997/11/07 03:30:37 brianp
32 * added Josh's 11-5-97 patches
34 * Revision 1.1 1997/10/30 06:00:33 brianp
40 #define S(x) dword ptr [esi + 4*x]
41 #define D(x) dword ptr [edi + 4*x]
42 #define M(x, y) dword ptr [edx + 16*x + 4*y]
47 * void asm_transform_points3_general( GLuint n, GLfloat d[][4],
48 * GLfloat m[16], GLfloat s[][4] );
50 PUBLIC _asm_transform_points3_general
51 _asm_transform_points3_general:
56 mov ecx, [esp + 12] /* ecx = n */
57 mov edi, [esp + 16] /* edi = d */
58 mov edx, [esp + 20] /* edx = m */
59 mov esi, [esp + 24] /* esi = s */
62 jz _asm_transform_points3_general_end
65 _asm_transform_points3_general_loop:
85 * The FPU stack should now look like this:
87 * st(7) = S(0) * M(0, 0)
88 * st(6) = S(0) * M(0, 1)
89 * st(5) = S(0) * M(0, 2)
90 * st(4) = S(0) * M(0, 3)
91 * st(3) = S(1) * M(1, 0)
92 * st(2) = S(1) * M(1, 1)
93 * st(1) = S(1) * M(1, 2)
94 * st(0) = S(1) * M(1, 3)
97 fxch st(3) /* 3 1 2 0 4 5 6 7 */
98 faddp st(7), st /* 1 2 0 4 5 6 7 */
99 fxch st(1) /* 2 1 0 4 5 6 7 */
100 faddp st(5), st /* 1 0 4 5 6 7 */
101 faddp st(3), st /* 0 4 5 6 7 */
102 faddp st(1), st /* 4 5 6 7 */
105 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0)
106 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1)
107 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2)
108 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3)
121 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0)
122 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1)
123 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2)
124 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3)
125 * st(3) = S(2) * M(2, 0)
126 * st(2) = S(2) * M(2, 1)
127 * st(1) = S(2) * M(2, 2)
128 * st(0) = S(2) * M(2, 3)
131 fxch st(3) /* 3 1 2 0 4 5 6 7 */
132 faddp st(7), st /* 1 2 0 4 5 6 7 */
133 fxch st(1) /* 2 1 0 4 5 6 7 */
134 faddp st(5), st /* 1 0 4 5 6 7 */
135 faddp st(3), st /* 0 4 5 6 7 */
136 faddp st(1), st /* 4 5 6 7 */
139 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
140 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
141 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
142 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3)
145 fxch st(3) /* 3 1 2 0 */
147 fxch st(2) /* 2 1 3 0 */
149 fxch st(1) /* 1 2 3 0 */
151 fxch st(3) /* 0 2 3 1 */
155 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + M(3, 2)
156 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + M(3, 0)
157 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + M(3, 1)
158 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) + M(3, 3)
161 fxch st(3) /* 3 1 2 0 */
162 fstp D(2) /* 1 2 0 */
163 fxch st(1) /* 2 1 0 */
172 jnz _asm_transform_points3_general_loop
174 _asm_transform_points3_general_end:
181 * void asm_transform_points3_identity( GLuint n, GLfloat d[][4],
184 PUBLIC _asm_transform_points3_identity
185 _asm_transform_points3_identity:
189 mov ecx, [esp + 12] /* ecx = n */
190 mov edi, [esp + 16] /* edi = d */
191 mov esi, [esp + 20] /* esi = s */
196 jz _asm_transform_points3_identity_end
198 mov ebp, HEX(3f800000)
201 _asm_transform_points3_identity_loop:
212 jnz _asm_transform_points3_identity_loop
214 _asm_transform_points3_identity_end:
223 * void asm_transform_points3_2d( GLuint n, GLfloat d[][4], GLfloat m[16],
226 PUBLIC _asm_transform_points3_2d
227 _asm_transform_points3_2d:
231 mov ecx, [esp + 12] /* ecx = n */
232 mov edi, [esp + 16] /* edi = d */
233 mov edx, [esp + 20] /* edx = m */
234 mov esi, [esp + 24] /* esi = s */
237 mov ebp, HEX(3f800000)
240 jz _asm_transform_points3_2d_step
254 * st(3) = S(0) * M(0, 0)
255 * st(2) = S(0) * M(0, 1)
256 * st(1) = S(1) * M(1, 0)
257 * st(0) = S(1) * M(1, 1)
260 fxch st(1) /* 1 0 2 3 */
262 fxch st(1) /* 0 1 2 3 */
264 fxch st(1) /* 1 0 2 3 */
265 faddp st(3), st /* 0 2 3 */
266 faddp st(1), st /* 2 3 */
275 _asm_transform_points3_2d_step:
277 jz _asm_transform_points3_2d_end
280 _asm_transform_points3_2d_loop:
299 * st(7) = S(0) * M(0, 0)
300 * st(6) = S(0) * M(0, 1)
301 * st(5) = S(4) * M(0, 0)
302 * st(4) = S(4) * M(0, 1)
303 * st(3) = S(1) * M(1, 0)
304 * st(2) = S(1) * M(1, 1)
305 * st(1) = S(5) * M(1, 0)
306 * st(0) = S(5) * M(1, 1)
309 fxch st(7) /* 7 1 2 3 4 5 6 0 */
311 fxch st(6) /* 6 1 2 3 4 5 7 0 */
313 fxch st(5) /* 5 1 2 3 4 6 7 0 */
315 fxch st(4) /* 4 1 2 3 5 6 7 0 */
328 * st(7) = S(5) * M(1, 1)
329 * st(6) = S(0) * M(0, 0) + M(3, 0)
330 * st(5) = S(0) * M(0, 1) + M(3, 1)
331 * st(4) = S(4) * M(0, 0) + M(3, 0)
332 * st(3) = S(1) * M(1, 0)
333 * st(2) = S(1) * M(1, 1)
334 * st(1) = S(5) * M(1, 0)
335 * st(0) = S(4) * M(0, 1) + M(3, 1)
338 faddp st(7), st /* 1 2 3 4 5 6 7 */
339 faddp st(3), st /* 2 3 4 5 6 7 */
340 faddp st(3), st /* 3 4 5 6 7 */
341 faddp st(3), st /* 4 5 6 7 */
342 fxch st(3) /* 7 5 6 4 */
343 fstp D(5) /* 5 6 4 */
349 jnz _asm_transform_points3_2d_loop
351 _asm_transform_points3_2d_end:
359 * void asm_transform_points3_2d_no_rot( GLuint n, GLfloat d[][4],
360 * GLfloat m[16], GLfloat s[][4] );
363 PUBLIC _asm_transform_points3_2d_no_rot
364 _asm_transform_points3_2d_no_rot:
368 mov ecx, [esp + 12] /* ecx = n */
369 mov edi, [esp + 16] /* edi = d */
370 mov edx, [esp + 20] /* edx = m */
371 mov esi, [esp + 24] /* esi = s */
375 jz _asm_transform_points3_2d_no_rot_end
377 mov ebp, HEX(3f800000)
380 _asm_transform_points3_2d_no_rot_loop:
393 mov eax, S(2) /* cycle 1: U pipe */
394 mov D(3), ebp /* V pipe */
395 mov D(2), eax /* cycle 2: U pipe */
400 jnz _asm_transform_points3_2d_no_rot_loop
402 _asm_transform_points3_2d_no_rot_end:
411 * void asm_transform_points3_3d( GLuint n, GLfloat d[][4], GLfloat m[16],
414 PUBLIC _asm_transform_points3_3d
415 _asm_transform_points3_3d:
419 mov ecx, [esp + 12] /* ecx = n */
420 mov edi, [esp + 16] /* edi = d */
421 mov edx, [esp + 20] /* edx = m */
422 mov esi, [esp + 24] /* esi = s */
425 jz _asm_transform_points3_3d_end
427 mov eax, HEX(3f800000)
430 _asm_transform_points3_3d_loop:
446 * st(5) = S(0) * M(0, 0)
447 * st(4) = S(0) * M(0, 1)
448 * st(3) = S(0) * M(0, 2)
449 * st(2) = S(1) * M(1, 0)
450 * st(1) = S(1) * M(1, 1)
451 * st(0) = S(1) * M(1, 2)
454 fxch st(2) /* 2 1 0 3 4 5 */
455 faddp st(5), st /* 1 0 3 4 5 */
456 faddp st(3), st /* 0 3 4 5 */
457 faddp st(1), st /* 3 4 5 */
460 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0)
461 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1)
462 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2)
473 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0)
474 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1)
475 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2)
476 * st(2) = S(2) * M(2, 0)
477 * st(1) = S(2) * M(2, 1)
478 * st(0) = S(2) * M(2, 2)
481 fxch st(2) /* 2 1 0 3 4 5 */
482 faddp st(5), st /* 1 0 3 4 5 */
483 faddp st(3), st /* 0 3 4 5 */
484 faddp st(1), st /* 3 4 5 */
487 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
488 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
489 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
492 fxch st(2) /* 2 1 0 */
494 fxch st(1) /* 1 2 0 */
496 fxch st(2) /* 0 2 1 */
499 fxch st(1) /* 2 0 1 */
510 jnz _asm_transform_points3_3d_loop
512 _asm_transform_points3_3d_end:
520 * void asm_transform_points4_general( GLuint n, GLfloat d[][4],
521 * GLfloat m[16], GLfloat s[][4] );
523 PUBLIC _asm_transform_points4_general
524 _asm_transform_points4_general:
528 mov ecx, [esp + 12] /* ecx = n */
529 mov edi, [esp + 16] /* edi = d */
530 mov edx, [esp + 20] /* edx = m */
531 mov esi, [esp + 24] /* esi = s */
534 jz _asm_transform_points4_general_end
537 _asm_transform_points4_general_loop:
557 * st(7) = S(0) * M(0, 0)
558 * st(6) = S(0) * M(0, 1)
559 * st(5) = S(0) * M(0, 2)
560 * st(4) = S(0) * M(0, 3)
561 * st(3) = S(1) * M(1, 0)
562 * st(2) = S(1) * M(1, 1)
563 * st(1) = S(1) * M(1, 2)
564 * st(0) = S(1) * M(1, 3)
567 fxch st(3) /* 3 1 2 0 4 5 6 7 */
568 faddp st(7), st /* 1 2 0 4 5 6 7 */
569 fxch st(1) /* 2 1 0 4 5 6 7 */
570 faddp st(5), st /* 1 0 4 5 6 7 */
571 faddp st(3), st /* 0 4 5 6 7 */
572 faddp st(1), st /* 4 5 6 7 */
575 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0)
576 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1)
577 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2)
578 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3)
591 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0)
592 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1)
593 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2)
594 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3)
595 * st(3) = S(2) * M(2, 0)
596 * st(2) = S(2) * M(2, 1)
597 * st(1) = S(2) * M(2, 2)
598 * st(0) = S(2) * M(2, 3)
601 fxch st(3) /* 3 1 2 0 4 5 6 7 */
602 faddp st(7), st /* 1 2 0 4 5 6 7 */
603 fxch st(1) /* 2 1 0 4 5 6 7 */
604 faddp st(5), st /* 1 0 4 5 6 7 */
605 faddp st(3), st /* 0 4 5 6 7 */
606 faddp st(1), st /* 4 5 6 7 */
609 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
610 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
611 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
612 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3)
625 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
626 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
627 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
628 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3)
629 * st(3) = S(3) * M(3, 0)
630 * st(2) = S(3) * M(3, 1)
631 * st(1) = S(3) * M(3, 2)
632 * st(0) = S(3) * M(3, 3)
635 fxch st(3) /* 3 1 2 0 4 5 6 7 */
636 faddp st(7), st /* 1 2 0 4 5 6 7 */
637 fxch st(1) /* 2 1 0 4 5 6 7 */
638 faddp st(5), st /* 1 0 4 5 6 7 */
639 faddp st(3), st /* 0 4 5 6 7 */
644 faddp st(1), st /* 4 5 6 7 */
647 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + S(3) * M(3, 0)
648 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + S(3) * M(3, 1)
649 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + S(3) * M(3, 2)
650 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) + S(3) * M(3, 3)
653 fxch st(3) /* 3 1 2 0 */
654 fstp D(0) /* 1 2 0 */
655 fxch st(1) /* 2 1 0 */
662 jnz _asm_transform_points4_general_loop
664 _asm_transform_points4_general_end:
672 * void asm_transform_points4_identity( GLuint n, GLfloat d[][4],
675 PUBLIC _asm_transform_points4_identity
676 _asm_transform_points4_identity:
680 mov ecx, [esp + 12] /* ecx = n */
681 mov edi, [esp + 16] /* edi = d */
682 mov esi, [esp + 20] /* esi = s */
696 * void asm_transform_points4_2d( GLuint n, GLfloat d[][4], GLfloat m[16],
699 PUBLIC _asm_transform_points4_2d
700 _asm_transform_points4_2d:
704 mov ecx, [esp + 12] /* ecx = n */
705 mov edi, [esp + 16] /* edi = d */
706 mov edx, [esp + 20] /* edx = m */
707 mov esi, [esp + 24] /* esi = s */
710 jz _asm_transform_points4_2d_end
715 _asm_transform_points4_2d_loop:
730 * st(5) = S(0) * M(0, 0)
731 * st(4) = S(0) * M(0, 1)
732 * st(3) = S(1) * M(1, 0)
733 * st(2) = S(1) * M(1, 1)
734 * st(1) = S(3) * M(3, 0)
735 * st(0) = S(3) * M(3, 1)
751 jnz _asm_transform_points4_2d_loop
755 _asm_transform_points4_2d_end:
763 * void asm_transform_points4_2d_no_rot( GLuint n, GLfloat d[][4],
764 * GLfloat m[16], GLfloat s[][4] );
766 PUBLIC _asm_transform_points4_2d_no_rot
767 _asm_transform_points4_2d_no_rot:
771 mov ecx, [esp + 12] /* ecx = n */
772 mov edi, [esp + 16] /* edi = d */
773 mov edx, [esp + 20] /* edx = m */
774 mov esi, [esp + 24] /* esi = s */
777 jz _asm_transform_points4_2d_no_rot_end
781 _asm_transform_points4_2d_no_rot_loop:
801 jnz _asm_transform_points4_2d_no_rot_loop
805 _asm_transform_points4_2d_no_rot_end:
813 * void asm_transform_points4_3d( GLuint n, GLfloat d[][4], GLfloat m[16],
816 PUBLIC _asm_transform_points4_3d
817 _asm_transform_points4_3d:
821 mov ecx, [esp + 12] /* ecx = n */
822 mov edi, [esp + 16] /* edi = d */
823 mov edx, [esp + 20] /* edx = m */
824 mov esi, [esp + 24] /* esi = s */
827 jz _asm_transform_points4_3d_end
830 _asm_transform_points4_3d_loop:
848 * st(5) = S(0) * M(0, 0)
849 * st(4) = S(0) * M(0, 1)
850 * st(3) = S(0) * M(0, 2)
851 * st(2) = S(1) * M(1, 0)
852 * st(1) = S(1) * M(1, 1)
853 * st(0) = S(1) * M(1, 2)
856 fxch st(2) /* 2 1 0 3 4 5 */
857 faddp st(5), st /* 1 0 3 4 5 */
858 faddp st(3), st /* 0 3 4 5 */
859 faddp st(1), st /* 3 4 5 */
862 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0)
863 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1)
864 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2)
875 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0)
876 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1)
877 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2)
878 * st(2) = S(2) * M(2, 0)
879 * st(1) = S(2) * M(2, 1)
880 * st(0) = S(2) * M(2, 2)
883 fxch st(2) /* 2 1 0 3 4 5 */
884 faddp st(5), st /* 1 0 3 4 5 */
885 faddp st(3), st /* 0 3 4 5 */
886 faddp st(1), st /* 3 4 5 */
889 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
890 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
891 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
902 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0)
903 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1)
904 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2)
905 * st(2) = S(3) * M(3, 0)
906 * st(1) = S(3) * M(3, 1)
907 * st(0) = S(3) * M(3, 2)
910 fxch st(2) /* 2 1 0 3 4 5 */
911 faddp st(5), st /* 1 0 3 4 5 */
912 faddp st(3), st /* 0 3 4 5 */
917 faddp st(1), st /* 3 4 5 */
920 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + S(3) * M(3, 0)
921 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + S(3) * M(3, 1)
922 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + S(3) * M(3, 2)
925 fxch st(2) /* 2 1 0 */
933 jnz _asm_transform_points4_3d_loop
935 _asm_transform_points4_3d_end:
941 * void asm_transform_points4_ortho( GLuint n, GLfloat d[][4],
942 * GLfloat m[16], GLfloat s[][4] );
944 PUBLIC _asm_transform_points4_ortho
945 _asm_transform_points4_ortho:
949 mov ecx, [esp + 12] /* ecx = n */
950 mov edi, [esp + 16] /* edi = d */
951 mov edx, [esp + 20] /* edx = m */
952 mov esi, [esp + 24] /* esi = s */
955 jz _asm_transform_points4_ortho_end
958 _asm_transform_points4_ortho_loop:
987 jnz _asm_transform_points4_ortho_loop
989 _asm_transform_points4_ortho_end:
995 * void asm_transform_points4_perspective( GLuint n, GLfloat d[][4],
996 * GLfloat m[16], GLfloat s[][4] );
998 PUBLIC _asm_transform_points4_perspective
999 _asm_transform_points4_perspective:
1003 mov ecx, [esp + 12] /* ecx = n */
1004 mov edi, [esp + 16] /* edi = d */
1005 mov edx, [esp + 20] /* edx = m */
1006 mov esi, [esp + 24] /* esi = s */
1009 jz _asm_transform_points4_perspective_end
1012 _asm_transform_points4_perspective_loop:
1029 xor eax, HEX(80000000)
1042 jnz _asm_transform_points4_perspective_loop
1044 _asm_transform_points4_perspective_end:
1052 * Table for clip test.
1056 * bit4 = abs(S(2)) > abs(S(3))
1058 * bit2 = abs(S(1)) > abs(S(3))
1060 * bit0 = abs(S(0)) > abs(S(3))
1063 /* Vertex buffer clipping flags (from vb.h) */
1066 #define CLIP_RIGHT_BIT 0x01
1067 #define CLIP_LEFT_BIT 0x02
1068 #define CLIP_TOP_BIT 0x04
1069 #define CLIP_BOTTOM_BIT 0x08
1070 #define CLIP_NEAR_BIT 0x10
1071 #define CLIP_FAR_BIT 0x20
1072 #define CLIP_USER_BIT 0x40
1073 #define CLIP_ALL_BITS 0x3f
1075 #define MAGN_X(i) (~(((i) & 1) - 1))
1076 #define SIGN_X(i) (~((((i) >> 1) & 1) - 1))
1077 #define MAGN_Y(i) (~((((i) >> 2) & 1) - 1))
1078 #define SIGN_Y(i) (~((((i) >> 3) & 1) - 1))
1079 #define MAGN_Z(i) (~((((i) >> 4) & 1) - 1))
1080 #define SIGN_Z(i) (~((((i) >> 5) & 1) - 1))
1081 #define SIGN_W(i) (~((((i) >> 6) & 1) - 1))
1083 #define CLIP_VALUE(i) \
1085 & ((~SIGN_X(i) & SIGN_W(i)) \
1086 | (~SIGN_X(i) & ~SIGN_W(i) & MAGN_X(i)) \
1087 | (SIGN_X(i) & SIGN_W(i) & ~MAGN_X(i)))) \
1089 & ((SIGN_X(i) & SIGN_W(i)) \
1090 | (~SIGN_X(i) & SIGN_W(i) & ~MAGN_X(i)) \
1091 | (SIGN_X(i) & ~SIGN_W(i) & MAGN_X(i)))) \
1093 & ((~SIGN_Y(i) & SIGN_W(i)) \
1094 | (~SIGN_Y(i) & ~SIGN_W(i) & MAGN_Y(i)) \
1095 | (SIGN_Y(i) & SIGN_W(i) & ~MAGN_Y(i)))) \
1096 | (CLIP_BOTTOM_BIT \
1097 & ((SIGN_Y(i) & SIGN_W(i)) \
1098 | (~SIGN_Y(i) & SIGN_W(i) & ~MAGN_Y(i)) \
1099 | (SIGN_Y(i) & ~SIGN_W(i) & MAGN_Y(i)))) \
1101 & ((~SIGN_Z(i) & SIGN_W(i)) \
1102 | (~SIGN_Z(i) & ~SIGN_W(i) & MAGN_Z(i)) \
1103 | (SIGN_Z(i) & SIGN_W(i) & ~MAGN_Z(i)))) \
1105 & ((SIGN_Z(i) & SIGN_W(i)) \
1106 | (~SIGN_Z(i) & SIGN_W(i) & ~MAGN_Z(i)) \
1107 | (SIGN_Z(i) & ~SIGN_W(i) & MAGN_Z(i))))
1109 #define CLIP_VALUE8(i) \
1110 CLIP_VALUE(i + 0), CLIP_VALUE(i + 1), CLIP_VALUE(i + 2), CLIP_VALUE(i + 3), \
1111 CLIP_VALUE(i + 4), CLIP_VALUE(i + 5), CLIP_VALUE(i + 6), CLIP_VALUE(i + 7)
1116 .byte CLIP_VALUE8(0x00)
1117 .byte CLIP_VALUE8(0x08)
1118 .byte CLIP_VALUE8(0x10)
1119 .byte CLIP_VALUE8(0x18)
1120 .byte CLIP_VALUE8(0x20)
1121 .byte CLIP_VALUE8(0x28)
1122 .byte CLIP_VALUE8(0x30)
1123 .byte CLIP_VALUE8(0x38)
1124 .byte CLIP_VALUE8(0x40)
1125 .byte CLIP_VALUE8(0x48)
1126 .byte CLIP_VALUE8(0x50)
1127 .byte CLIP_VALUE8(0x58)
1128 .byte CLIP_VALUE8(0x60)
1129 .byte CLIP_VALUE8(0x68)
1130 .byte CLIP_VALUE8(0x70)
1131 .byte CLIP_VALUE8(0x78)
1138 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(4), HEX(5), HEX(4), HEX(6)
1139 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(8), HEX(9), HEX(8), HEX(a)
1140 .byte HEX(20), HEX(21), HEX(20), HEX(22), HEX(24), HEX(25), HEX(24), HEX(26)
1141 .byte HEX(20), HEX(21), HEX(20), HEX(22), HEX(28), HEX(29), HEX(28), HEX(2a)
1142 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(4), HEX(5), HEX(4), HEX(6)
1143 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(8), HEX(9), HEX(8), HEX(a)
1144 .byte HEX(10), HEX(11), HEX(10), HEX(12), HEX(14), HEX(15), HEX(14), HEX(16)
1145 .byte HEX(10), HEX(11), HEX(10), HEX(12), HEX(18), HEX(19), HEX(18), HEX(1a)
1146 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(37), HEX(35), HEX(37), HEX(36)
1147 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(3b), HEX(39), HEX(3b), HEX(3a)
1148 .byte HEX(2f), HEX(2d), HEX(2f), HEX(2e), HEX(27), HEX(25), HEX(27), HEX(26)
1149 .byte HEX(2f), HEX(2d), HEX(2f), HEX(2e), HEX(2b), HEX(29), HEX(2b), HEX(2a)
1150 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(37), HEX(35), HEX(37), HEX(36)
1151 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(3b), HEX(39), HEX(3b), HEX(3a)
1152 .byte HEX(1f), HEX(1d), HEX(1f), HEX(1e), HEX(17), HEX(15), HEX(17), HEX(16)
1153 .byte HEX(1f), HEX(1d), HEX(1f), HEX(1e), HEX(1b), HEX(19), HEX(1b), HEX(1a)
1185 add ebp, ebp /* %ebp = abs(S(3))*2 ; carry = sign of S(3) */
1188 add ebx, ebx /* %ebx = abs(S(2))*2 ; carry = sign of S(2) */
1191 cmp ebp, ebx /* carry = abs(S(2))*2 > abs(S(3))*2 */
1196 add ebx, ebx /* %ebx = abs(S(1))*2 ; carry = sign of S(1) */
1199 cmp ebp, ebx /* carry = abs(S(1))*2 > abs(S(3))*2 */
1204 add ebx, ebx /* %ebx = abs(S(0))*2 ; carry = sign of S(0) */
1207 cmp ebp, ebx /* carry = abs(S(0))*2 > abs(S(3))*2 */
1213 mov bl, byte ptr [edi]
1214 mov dl, byte ptr [clip_table + edx]
1233 * void asm_project_and_cliptest_general( GLuint n, GLfloat d[][4], GLfloat m[16],
1234 * GLfloat s[][4], GLubyte clipmask[],
1235 * GLubyte *ormask, GLubyte *andmask );
1237 PUBLIC _asm_project_and_cliptest_general
1238 _asm_project_and_cliptest_general:
1242 mov ecx, [esp + 12] /* ecx = n */
1243 mov edi, [esp + 16] /* edi = d */
1244 mov edx, [esp + 20] /* edx = m */
1245 mov esi, [esp + 24] /* esi = s */
1251 call _asm_transform_points4_general
1254 mov edi, [esp + 32] /* ormask */
1255 mov esi, [esp + 36] /* andmask */
1259 mov ecx, [esp + 12] /* ecx = n */
1260 mov edi, [esp + 28] /* edi = clipmask */
1261 mov esi, [esp + 16] /* esi = d */
1265 mov edi, [esp + 32] /* ormask */
1266 mov esi, [esp + 36] /* andmask */
1276 * void asm_project_and_cliptest_identity( GLuint n, GLfloat d[][4],
1277 * GLfloat s[][4], GLubyte clipmask[],
1278 * GLubyte *ormask, GLubyte *andmask );
1280 PUBLIC _asm_project_and_cliptest_identity
1281 _asm_project_and_cliptest_identity:
1285 mov ecx, [esp + 12] /* ecx = n */
1286 mov edi, [esp + 16] /* edi = d */
1287 mov esi, [esp + 20] /* esi = s */
1293 call _asm_transform_points4_identity
1297 mov edi, [esp + 28] /* ormask */
1298 mov esi, [esp + 32] /* andmask */
1302 mov ecx, [esp + 12] /* ecx = n */
1303 mov edi, [esp + 24] /* edi = clipmask */
1304 mov esi, [esp + 16] /* esi = d */
1308 mov edi, [esp + 28] /* ormask */
1309 mov esi, [esp + 32] /* andmask */
1318 * void asm_project_and_cliptest_ortho( GLuint n, GLfloat d[][4], GLfloat m[16],
1319 * GLfloat s[][4], GLubyte clipmask[],
1320 * GLubyte *ormask, GLubyte *andmask );
1322 PUBLIC _asm_project_and_cliptest_ortho
1323 _asm_project_and_cliptest_ortho:
1327 mov ecx, [esp + 12] /* ecx = n */
1328 mov edi, [esp + 16] /* edi = d */
1329 mov edx, [esp + 20] /* edx = m */
1330 mov esi, [esp + 24] /* esi = s */
1337 call _asm_transform_points4_ortho
1341 mov edi, [esp + 32] /* ormask */
1342 mov esi, [esp + 36] /* andmask */
1346 mov ecx, [esp + 12] /* ecx = n */
1347 mov edi, [esp + 28] /* edi = clipmask */
1348 mov esi, [esp + 16] /* esi = d */
1352 mov edi, [esp + 32] /* ormask */
1353 mov esi, [esp + 36] /* andmask */
1362 * void asm_project_and_cliptest_perspective( GLuint n, GLfloat d[][4], GLfloat m[16],
1363 * GLfloat s[][4], GLubyte clipmask[],
1364 * GLubyte *ormask, GLubyte *andmask );
1366 PUBLIC _asm_project_and_cliptest_perspective
1367 _asm_project_and_cliptest_perspective:
1371 mov ecx, [esp + 12] /* ecx = n */
1372 mov edi, [esp + 16] /* edi = d */
1373 mov edx, [esp + 20] /* edx = m */
1374 mov esi, [esp + 24] /* esi = s */
1381 call _asm_transform_points4_perspective
1385 mov edi, [esp + 32] /* ormask */
1386 mov esi, [esp + 36] /* andmask */
1390 mov ecx, [esp + 12] /* ecx = n */
1391 mov edi, [esp + 28] /* edi = clipmask */
1392 mov esi, [esp + 16] /* esi = d */
1396 mov edi, [esp + 32] /* ormask */
1397 mov esi, [esp + 36] /* andmask */
1398 mov byte ptr [edi], al
1399 mov byte ptr [esi], ah
1407 * unsigned int inverse_nofp( float f );
1409 * Calculate the inverse of a float without using the FPU.
1410 * This function returns a float in eax, so it's return
1411 * type should be 'int' when called from C (and converted
1412 * to float with pointer/union abuse).
1417 /* get mantissa in eax */
1419 and ecx, HEX(7fffff)
1421 /* set implicit integer */
1424 /* div 0x10000:0x00000000 by mantissa */
1434 /* get exponent in ecx */
1435 mov ecx, HEX(7f800000)
1439 /* negate exponent and decrement it */
1440 mov edx, HEX(7E800000)
1443 /* if bit 24 is set, shift and adjust exponent */
1444 test eax, HEX(1000000)
1445 jz inverse_nofp_combine
1448 add edx, HEX(800000)
1450 /* combine mantissa and exponent, then set sign */
1451 inverse_nofp_combine:
1452 and eax, HEX(7fffff)
1455 and ecx, HEX(80000000)
1462 * void gl_xform_normals_3fv( GLuint n, GLfloat d[][4], GLfloat m[16],
1463 * GLfloat s[][4], GLboolean normalize );
1465 PUBLIC _gl_xform_normals_3fv
1466 _gl_xform_normals_3fv:
1470 mov ecx, [esp + 12] /* ecx = n */
1471 mov edi, [esp + 16] /* edi = d */
1472 mov edx, [esp + 20] /* edx = m */
1473 mov esi, [esp + 24] /* esi = s */
1476 jz _gl_xform_normals_3fv_end
1479 _gl_xform_normals_3fv_loop:
1495 * st(5) = S(0) * M(0, 0)
1496 * st(4) = S(0) * M(1, 0)
1497 * st(3) = S(0) * M(2, 0)
1498 * st(2) = S(1) * M(0, 1)
1499 * st(1) = S(1) * M(1, 1)
1500 * st(0) = S(1) * M(2, 1)
1503 fxch st(2) /* 2 1 0 3 4 5 */
1504 faddp st(5), st /* 1 0 3 4 5 */
1505 faddp st(3), st /* 0 3 4 5 */
1506 faddp st(1), st /* 3 4 5 */
1509 * st(2) = S(0) * M(0, 0) + S(1) * M(0, 1)
1510 * st(1) = S(0) * M(1, 0) + S(1) * M(1, 1)
1511 * st(0) = S(0) * M(2, 0) + S(1) * M(2, 1)
1522 * st(5) = S(0) * M(0, 0) + S(1) * M(0, 1)
1523 * st(4) = S(0) * M(1, 0) + S(1) * M(1, 1)
1524 * st(3) = S(0) * M(2, 0) + S(1) * M(2, 1)
1525 * st(2) = S(2) * M(0, 2)
1526 * st(1) = S(2) * M(1, 2)
1527 * st(0) = S(2) * M(2, 2)
1530 fxch st(2) /* 2 1 0 3 4 5 */
1531 faddp st(5), st /* 1 0 3 4 5 */
1532 faddp st(3), st /* 0 3 4 5 */
1533 faddp st(1), st /* 3 4 5 */
1536 * st(2) = S(0) * M(0, 0) + S(1) * M(0, 1) + S(2) * M(0, 2)
1537 * st(1) = S(0) * M(1, 0) + S(1) * M(1, 1) + S(2) * M(1, 2)
1538 * st(0) = S(0) * M(2, 0) + S(1) * M(2, 1) + S(2) * M(2, 2)
1541 fxch st(2) /* 2 1 0 */
1551 jnz _gl_xform_normals_3fv_loop
1554 * Skip normalize if it isn't needed
1556 cmp dword ptr [esp + 28], DEC(0)
1557 jz _gl_xform_normals_3fv_end
1559 /* Normalize required */
1561 mov esi, [esp + 12] /* esi = n */
1562 mov edi, [esp + 16] /* edi = d */
1564 sub esp, DEC(4) /* temp var for 1.0 / len */
1567 * (%esp) = length of first normal
1579 fstp dword ptr [esp]
1581 jmp _gl_xform_normals_3fv_loop2_end
1584 _gl_xform_normals_3fv_loop2:
1585 /* %st(0) = length of next normal */
1598 * inverse the length of the current normal, which is
1599 * already at (%esp). This should overlap the prev
1605 /* multiply normal by 1/len */
1607 fmul dword ptr [esp]
1609 fmul dword ptr [esp]
1611 fmul dword ptr [esp]
1613 fstp dword ptr [esp] /* store length of next normal */
1619 _gl_xform_normals_3fv_loop2_end:
1621 jnz _gl_xform_normals_3fv_loop2
1623 /* finish up the last normal */
1627 fmul dword ptr [esp]
1629 fmul dword ptr [esp]
1631 fmul dword ptr [esp]
1639 _gl_xform_normals_3fv_end: