[MESA]
[reactos.git] / reactos / dll / opengl / mesa / x86 / x86_xform4.S
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 3.5
5 *
6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32 #include "assyntax.h"
33 #include "matypes.h"
34 #include "xform_args.h"
35
36 SEG_TEXT
37
38 #define FP_ONE 1065353216
39 #define FP_ZERO 0
40
41 #define SRC0 REGOFF(0, ESI)
42 #define SRC1 REGOFF(4, ESI)
43 #define SRC2 REGOFF(8, ESI)
44 #define SRC3 REGOFF(12, ESI)
45 #define DST0 REGOFF(0, EDI)
46 #define DST1 REGOFF(4, EDI)
47 #define DST2 REGOFF(8, EDI)
48 #define DST3 REGOFF(12, EDI)
49 #define MAT0 REGOFF(0, EDX)
50 #define MAT1 REGOFF(4, EDX)
51 #define MAT2 REGOFF(8, EDX)
52 #define MAT3 REGOFF(12, EDX)
53 #define MAT4 REGOFF(16, EDX)
54 #define MAT5 REGOFF(20, EDX)
55 #define MAT6 REGOFF(24, EDX)
56 #define MAT7 REGOFF(28, EDX)
57 #define MAT8 REGOFF(32, EDX)
58 #define MAT9 REGOFF(36, EDX)
59 #define MAT10 REGOFF(40, EDX)
60 #define MAT11 REGOFF(44, EDX)
61 #define MAT12 REGOFF(48, EDX)
62 #define MAT13 REGOFF(52, EDX)
63 #define MAT14 REGOFF(56, EDX)
64 #define MAT15 REGOFF(60, EDX)
65
66
67 ALIGNTEXT16
68 GLOBL GLNAME( _mesa_x86_transform_points4_general )
69 HIDDEN(_mesa_x86_transform_points4_general)
70 GLNAME( _mesa_x86_transform_points4_general ):
71
72 #define FRAME_OFFSET 8
73 PUSH_L( ESI )
74 PUSH_L( EDI )
75
76 MOV_L( ARG_SOURCE, ESI )
77 MOV_L( ARG_DEST, EDI )
78
79 MOV_L( ARG_MATRIX, EDX )
80 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
81
82 TEST_L( ECX, ECX )
83 JZ( LLBL(x86_p4_gr_done) )
84
85 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87
88 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90
91 SHL_L( CONST(4), ECX )
92 MOV_L( REGOFF(V4F_START, ESI), ESI )
93
94 MOV_L( REGOFF(V4F_START, EDI), EDI )
95 ADD_L( EDI, ECX )
96
97 ALIGNTEXT16
98 LLBL(x86_p4_gr_loop):
99
100 FLD_S( SRC0 ) /* F4 */
101 FMUL_S( MAT0 )
102 FLD_S( SRC0 ) /* F5 F4 */
103 FMUL_S( MAT1 )
104 FLD_S( SRC0 ) /* F6 F5 F4 */
105 FMUL_S( MAT2 )
106 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
107 FMUL_S( MAT3 )
108
109 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
110 FMUL_S( MAT4 )
111 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
112 FMUL_S( MAT5 )
113 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
114 FMUL_S( MAT6 )
115 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
116 FMUL_S( MAT7 )
117
118 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
119 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
120 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
123 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
124
125 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
126 FMUL_S( MAT8 )
127 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
128 FMUL_S( MAT9 )
129 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
130 FMUL_S( MAT10 )
131 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
132 FMUL_S( MAT11 )
133
134 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
135 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
136 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
139 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
140
141 FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */
142 FMUL_S( MAT12 )
143 FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */
144 FMUL_S( MAT13 )
145 FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */
146 FMUL_S( MAT14 )
147 FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
148 FMUL_S( MAT15 )
149
150 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
151 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
152 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
153 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
154 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
155 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
156
157 FXCH( ST(3) ) /* F4 F6 F5 F7 */
158 FSTP_S( DST0 ) /* F6 F5 F7 */
159 FXCH( ST(1) ) /* F5 F6 F7 */
160 FSTP_S( DST1 ) /* F6 F7 */
161 FSTP_S( DST2 ) /* F7 */
162 FSTP_S( DST3 ) /* */
163
164 LLBL(x86_p4_gr_skip):
165
166 ADD_L( CONST(16), EDI )
167 ADD_L( EAX, ESI )
168 CMP_L( ECX, EDI )
169 JNE( LLBL(x86_p4_gr_loop) )
170
171 LLBL(x86_p4_gr_done):
172
173 POP_L( EDI )
174 POP_L( ESI )
175 RET
176 #undef FRAME_OFFSET
177
178
179
180
181 ALIGNTEXT16
182 GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
183 HIDDEN(_mesa_x86_transform_points4_perspective)
184 GLNAME( _mesa_x86_transform_points4_perspective ):
185
186 #define FRAME_OFFSET 12
187 PUSH_L( ESI )
188 PUSH_L( EDI )
189 PUSH_L( EBX )
190
191 MOV_L( ARG_SOURCE, ESI )
192 MOV_L( ARG_DEST, EDI )
193
194 MOV_L( ARG_MATRIX, EDX )
195 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
196
197 TEST_L( ECX, ECX )
198 JZ( LLBL(x86_p4_pr_done) )
199
200 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
201 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
202
203 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
204 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
205
206 SHL_L( CONST(4), ECX )
207 MOV_L( REGOFF(V4F_START, ESI), ESI )
208
209 MOV_L( REGOFF(V4F_START, EDI), EDI )
210 ADD_L( EDI, ECX )
211
212 ALIGNTEXT16
213 LLBL(x86_p4_pr_loop):
214
215 FLD_S( SRC0 ) /* F4 */
216 FMUL_S( MAT0 )
217
218 FLD_S( SRC1 ) /* F5 F4 */
219 FMUL_S( MAT5 )
220
221 FLD_S( SRC2 ) /* F0 F5 F4 */
222 FMUL_S( MAT8 )
223 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
224 FMUL_S( MAT9 )
225 FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */
226 FMUL_S( MAT10 )
227
228 FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
229 FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */
230 FADDP( ST0, ST(2) ) /* F6 F5 F4 */
231
232 FLD_S( SRC3 ) /* F2 F6 F5 F4 */
233 FMUL_S( MAT14 )
234
235 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
236
237 MOV_L( SRC2, EBX )
238 XOR_L( CONST(-2147483648), EBX )/* change sign */
239
240 FXCH( ST(2) ) /* F4 F5 F6 */
241 FSTP_S( DST0 ) /* F5 F6 */
242 FSTP_S( DST1 ) /* F6 */
243 FSTP_S( DST2 ) /* */
244 MOV_L( EBX, DST3 )
245
246 LLBL(x86_p4_pr_skip):
247
248 ADD_L( CONST(16), EDI )
249 ADD_L( EAX, ESI )
250 CMP_L( ECX, EDI )
251 JNE( LLBL(x86_p4_pr_loop) )
252
253 LLBL(x86_p4_pr_done):
254
255 POP_L( EBX )
256 POP_L( EDI )
257 POP_L( ESI )
258 RET
259 #undef FRAME_OFFSET
260
261
262
263
264 ALIGNTEXT16
265 GLOBL GLNAME( _mesa_x86_transform_points4_3d )
266 HIDDEN(_mesa_x86_transform_points4_3d)
267 GLNAME( _mesa_x86_transform_points4_3d ):
268
269 #define FRAME_OFFSET 12
270 PUSH_L( ESI )
271 PUSH_L( EDI )
272 PUSH_L( EBX )
273
274 MOV_L( ARG_SOURCE, ESI )
275 MOV_L( ARG_DEST, EDI )
276
277 MOV_L( ARG_MATRIX, EDX )
278 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
279
280 TEST_L( ECX, ECX )
281 JZ( LLBL(x86_p4_3dr_done) )
282
283 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
284 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
285
286 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
287 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
288
289 SHL_L( CONST(4), ECX )
290 MOV_L( REGOFF(V4F_START, ESI), ESI )
291
292 MOV_L( REGOFF(V4F_START, EDI), EDI )
293 ADD_L( EDI, ECX )
294
295 ALIGNTEXT16
296 LLBL(x86_p4_3dr_loop):
297
298 FLD_S( SRC0 ) /* F4 */
299 FMUL_S( MAT0 )
300 FLD_S( SRC0 ) /* F5 F4 */
301 FMUL_S( MAT1 )
302 FLD_S( SRC0 ) /* F6 F5 F4 */
303 FMUL_S( MAT2 )
304
305 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
306 FMUL_S( MAT4 )
307 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
308 FMUL_S( MAT5 )
309 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
310 FMUL_S( MAT6 )
311
312 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
313 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
314 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
315 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
316
317 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
318 FMUL_S( MAT8 )
319 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
320 FMUL_S( MAT9 )
321 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
322 FMUL_S( MAT10 )
323
324 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
325 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
326 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
327 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
328
329 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
330 FMUL_S( MAT12 )
331 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
332 FMUL_S( MAT13 )
333 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
334 FMUL_S( MAT14 )
335
336 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
337 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
338 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
339 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
340
341 MOV_L( SRC3, EBX )
342
343 FXCH( ST(2) ) /* F4 F5 F6 */
344 FSTP_S( DST0 ) /* F5 F6 */
345 FSTP_S( DST1 ) /* F6 */
346 FSTP_S( DST2 ) /* */
347 MOV_L( EBX, DST3 )
348
349 LLBL(x86_p4_3dr_skip):
350
351 ADD_L( CONST(16), EDI )
352 ADD_L( EAX, ESI )
353 CMP_L( ECX, EDI )
354 JNE( LLBL(x86_p4_3dr_loop) )
355
356 LLBL(x86_p4_3dr_done):
357
358 POP_L( EBX )
359 POP_L( EDI )
360 POP_L( ESI )
361 RET
362 #undef FRAME_OFFSET
363
364
365
366
367 ALIGNTEXT16
368 GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
369 HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
370 GLNAME(_mesa_x86_transform_points4_3d_no_rot):
371
372 #define FRAME_OFFSET 12
373 PUSH_L( ESI )
374 PUSH_L( EDI )
375 PUSH_L( EBX )
376
377 MOV_L( ARG_SOURCE, ESI )
378 MOV_L( ARG_DEST, EDI )
379
380 MOV_L( ARG_MATRIX, EDX )
381 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
382
383 TEST_L( ECX, ECX )
384 JZ( LLBL(x86_p4_3dnrr_done) )
385
386 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
387 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
388
389 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
390 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
391
392 SHL_L( CONST(4), ECX )
393 MOV_L( REGOFF(V4F_START, ESI), ESI )
394
395 MOV_L( REGOFF(V4F_START, EDI), EDI )
396 ADD_L( EDI, ECX )
397
398 ALIGNTEXT16
399 LLBL(x86_p4_3dnrr_loop):
400
401 FLD_S( SRC0 ) /* F4 */
402 FMUL_S( MAT0 )
403
404 FLD_S( SRC1 ) /* F5 F4 */
405 FMUL_S( MAT5 )
406
407 FLD_S( SRC2 ) /* F6 F5 F4 */
408 FMUL_S( MAT10 )
409
410 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
411 FMUL_S( MAT12 )
412 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
413 FMUL_S( MAT13 )
414 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
415 FMUL_S( MAT14 )
416
417 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
418 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
419 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
420 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
421
422 MOV_L( SRC3, EBX )
423
424 FXCH( ST(2) ) /* F4 F5 F6 */
425 FSTP_S( DST0 ) /* F5 F6 */
426 FSTP_S( DST1 ) /* F6 */
427 FSTP_S( DST2 ) /* */
428 MOV_L( EBX, DST3 )
429
430 LLBL(x86_p4_3dnrr_skip):
431
432 ADD_L( CONST(16), EDI )
433 ADD_L( EAX, ESI )
434 CMP_L( ECX, EDI )
435 JNE( LLBL(x86_p4_3dnrr_loop) )
436
437 LLBL(x86_p4_3dnrr_done):
438
439 POP_L( EBX )
440 POP_L( EDI )
441 POP_L( ESI )
442 RET
443 #undef FRAME_OFFSET
444
445
446
447
448 ALIGNTEXT16
449 GLOBL GLNAME( _mesa_x86_transform_points4_2d )
450 HIDDEN(_mesa_x86_transform_points4_2d)
451 GLNAME( _mesa_x86_transform_points4_2d ):
452
453 #define FRAME_OFFSET 16
454 PUSH_L( ESI )
455 PUSH_L( EDI )
456 PUSH_L( EBX )
457 PUSH_L( EBP )
458
459 MOV_L( ARG_SOURCE, ESI )
460 MOV_L( ARG_DEST, EDI )
461
462 MOV_L( ARG_MATRIX, EDX )
463 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
464
465 TEST_L( ECX, ECX )
466 JZ( LLBL(x86_p4_2dr_done) )
467
468 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
469 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
470
471 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
472 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
473
474 SHL_L( CONST(4), ECX )
475 MOV_L( REGOFF(V4F_START, ESI), ESI )
476
477 MOV_L( REGOFF(V4F_START, EDI), EDI )
478 ADD_L( EDI, ECX )
479
480 ALIGNTEXT16
481 LLBL(x86_p4_2dr_loop):
482
483 FLD_S( SRC0 ) /* F4 */
484 FMUL_S( MAT0 )
485 FLD_S( SRC0 ) /* F5 F4 */
486 FMUL_S( MAT1 )
487
488 FLD_S( SRC1 ) /* F0 F5 F4 */
489 FMUL_S( MAT4 )
490 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
491 FMUL_S( MAT5 )
492
493 FXCH( ST(1) ) /* F0 F1 F5 F4 */
494 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
495 FADDP( ST0, ST(1) ) /* F5 F4 */
496
497 FLD_S( SRC3 ) /* F0 F5 F4 */
498 FMUL_S( MAT12 )
499 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
500 FMUL_S( MAT13 )
501
502 FXCH( ST(1) ) /* F0 F1 F5 F4 */
503 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
504 FADDP( ST0, ST(1) ) /* F5 F4 */
505
506 MOV_L( SRC2, EBX )
507 MOV_L( SRC3, EBP )
508
509 FXCH( ST(1) ) /* F4 F5 */
510 FSTP_S( DST0 ) /* F5 */
511 FSTP_S( DST1 ) /* */
512 MOV_L( EBX, DST2 )
513 MOV_L( EBP, DST3 )
514
515 LLBL(x86_p4_2dr_skip):
516
517 ADD_L( CONST(16), EDI )
518 ADD_L( EAX, ESI )
519 CMP_L( ECX, EDI )
520 JNE( LLBL(x86_p4_2dr_loop) )
521
522 LLBL(x86_p4_2dr_done):
523
524 POP_L( EBP )
525 POP_L( EBX )
526 POP_L( EDI )
527 POP_L( ESI )
528 RET
529 #undef FRAME_OFFSET
530
531
532
533
534 ALIGNTEXT16
535 GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
536 HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
537 GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
538
539 #define FRAME_OFFSET 16
540 PUSH_L( ESI )
541 PUSH_L( EDI )
542 PUSH_L( EBX )
543 PUSH_L( EBP )
544
545 MOV_L( ARG_SOURCE, ESI )
546 MOV_L( ARG_DEST, EDI )
547
548 MOV_L( ARG_MATRIX, EDX )
549 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
550
551 TEST_L( ECX, ECX )
552 JZ( LLBL(x86_p4_2dnrr_done) )
553
554 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
555 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
556
557 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
558 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
559
560 SHL_L( CONST(4), ECX )
561 MOV_L( REGOFF(V4F_START, ESI), ESI )
562
563 MOV_L( REGOFF(V4F_START, EDI), EDI )
564 ADD_L( EDI, ECX )
565
566 ALIGNTEXT16
567 LLBL(x86_p4_2dnrr_loop):
568
569 FLD_S( SRC0 ) /* F4 */
570 FMUL_S( MAT0 )
571
572 FLD_S( SRC1 ) /* F5 F4 */
573 FMUL_S( MAT5 )
574
575 FLD_S( SRC3 ) /* F0 F5 F4 */
576 FMUL_S( MAT12 )
577 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
578 FMUL_S( MAT13 )
579
580 FXCH( ST(1) ) /* F0 F1 F5 F4 */
581 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
582 FADDP( ST0, ST(1) ) /* F5 F4 */
583
584 MOV_L( SRC2, EBX )
585 MOV_L( SRC3, EBP )
586
587 FXCH( ST(1) ) /* F4 F5 */
588 FSTP_S( DST0 ) /* F5 */
589 FSTP_S( DST1 ) /* */
590 MOV_L( EBX, DST2 )
591 MOV_L( EBP, DST3 )
592
593 LLBL(x86_p4_2dnrr_skip):
594
595 ADD_L( CONST(16), EDI )
596 ADD_L( EAX, ESI )
597 CMP_L( ECX, EDI )
598 JNE( LLBL(x86_p4_2dnrr_loop) )
599
600 LLBL(x86_p4_2dnrr_done):
601
602 POP_L( EBP )
603 POP_L( EBX )
604 POP_L( EDI )
605 POP_L( ESI )
606 RET
607 #undef FRAME_OFFSET
608
609
610
611
612 ALIGNTEXT16
613 GLOBL GLNAME( _mesa_x86_transform_points4_identity )
614 HIDDEN(_mesa_x86_transform_points4_identity)
615 GLNAME( _mesa_x86_transform_points4_identity ):
616
617 #define FRAME_OFFSET 12
618 PUSH_L( ESI )
619 PUSH_L( EDI )
620 PUSH_L( EBX )
621
622 MOV_L( ARG_SOURCE, ESI )
623 MOV_L( ARG_DEST, EDI )
624
625 MOV_L( ARG_MATRIX, EDX )
626 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
627
628 TEST_L( ECX, ECX )
629 JZ( LLBL(x86_p4_ir_done) )
630
631 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
632 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
633
634 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
635 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
636
637 SHL_L( CONST(4), ECX )
638 MOV_L( REGOFF(V4F_START, ESI), ESI )
639
640 MOV_L( REGOFF(V4F_START, EDI), EDI )
641 ADD_L( EDI, ECX )
642
643 CMP_L( ESI, EDI )
644 JE( LLBL(x86_p4_ir_done) )
645
646 ALIGNTEXT16
647 LLBL(x86_p4_ir_loop):
648
649 MOV_L( SRC0, EBX )
650 MOV_L( SRC1, EDX )
651
652 MOV_L( EBX, DST0 )
653 MOV_L( EDX, DST1 )
654
655 MOV_L( SRC2, EBX )
656 MOV_L( SRC3, EDX )
657
658 MOV_L( EBX, DST2 )
659 MOV_L( EDX, DST3 )
660
661 LLBL(x86_p4_ir_skip):
662
663 ADD_L( CONST(16), EDI )
664 ADD_L( EAX, ESI )
665 CMP_L( ECX, EDI )
666 JNE( LLBL(x86_p4_ir_loop) )
667
668 LLBL(x86_p4_ir_done):
669
670 POP_L( EBX )
671 POP_L( EDI )
672 POP_L( ESI )
673 RET
674
675 #if defined (__ELF__) && defined (__linux__)
676 .section .note.GNU-stack,"",%progbits
677 #endif