migrate substitution keywords to SVN
[reactos.git] / reactos / lib / mesa32 / src / x86 / 3dnow_normal.S
1 /* $Id$ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 5.1
6 *
7 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /*
28 * 3Dnow assembly code by Holger Waechtler
29 */
30
31 #ifdef USE_3DNOW_ASM
32
33 #include "matypes.h"
34 #include "norm_args.h"
35
36 SEG_TEXT
37
38 #define M(i) REGOFF(i * 4, ECX)
39 #define STRIDE REGOFF(12, ESI)
40
41
42 ALIGNTEXT16
43 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
44 GLNAME(_mesa_3dnow_transform_normalize_normals):
45
46 #define FRAME_OFFSET 12
47
48 PUSH_L ( EDI )
49 PUSH_L ( ESI )
50 PUSH_L ( EBP )
51
52 MOV_L ( ARG_LENGTHS, EDI )
53 MOV_L ( ARG_IN, ESI )
54 MOV_L ( ARG_DEST, EAX )
55 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
56 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
57 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
58 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
59 MOV_L ( ARG_MAT, ECX )
60 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
61
62 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
63 JE ( LLBL (G3TN_end) )
64
65 MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
66 FEMMS
67
68 PUSH_L ( EBP )
69 PUSH_L ( EAX )
70 PUSH_L ( EDX ) /* save counter & pointer for */
71 /* the normalize pass */
72 #undef FRAME_OFFSET
73 #define FRAME_OFFSET 24
74
75 MOVQ ( M(0), MM3 ) /* m1 | m0 */
76 MOVQ ( M(4), MM4 ) /* m5 | m4 */
77
78 MOVD ( M(2), MM5 ) /* | m2 */
79 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
80
81 MOVQ ( M(8), MM6 ) /* m9 | m8 */
82 MOVQ ( M(10), MM7 ) /* | m10 */
83
84 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
85 JNE ( LLBL (G3TN_scale_end ) )
86
87 MOVD ( ARG_SCALE, MM0 ) /* | scale */
88 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
89
90 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
91 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
92 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
93 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
94 PFMUL ( MM0, MM7 ) /* | scale * m10 */
95
96 ALIGNTEXT32
97 LLBL (G3TN_scale_end):
98 LLBL (G3TN_transform):
99 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
100 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
101
102 MOVQ ( MM0, MM1 ) /* x1 | x0 */
103 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
104
105 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
106 ADD_L ( CONST(16), EAX ) /* next r */
107
108 PREFETCHW ( REGIND(EAX) )
109
110 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
111 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
112
113 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
114 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
115
116 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
117 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
118
119 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
120 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
121
122 PFMUL ( MM7, MM2 ) /* | x2*m10 */
123 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
124
125 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
126 ADD_L ( STRIDE, EDX ) /* next normal */
127
128 PREFETCH ( REGIND(EDX) )
129
130 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
131 DEC_L ( EBP ) /* decrement normal counter */
132 JA ( LLBL (G3TN_transform) )
133
134
135 POP_L ( EDX ) /* end of transform --- */
136 POP_L ( EAX ) /* now normalizing ... */
137 POP_L ( EBP )
138
139 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
140 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
141
142
143 ALIGNTEXT32
144 LLBL (G3TN_norm_w_lengths):
145
146 PREFETCHW ( REGOFF(12,EAX) )
147
148 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
149 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
150
151 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
152 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
153
154 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
155 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
156
157 ADD_L ( STRIDE, EDX ) /* next normal */
158 ADD_L ( CONST(4), EDI ) /* next length */
159
160 PREFETCH ( REGIND(EDI) )
161
162 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
163 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
164
165 ADD_L ( CONST(16), EAX ) /* next r */
166 DEC_L ( EBP ) /* decrement normal counter */
167
168 JA ( LLBL (G3TN_norm_w_lengths) )
169 JMP ( LLBL (G3TN_exit_3dnow) )
170
171 ALIGNTEXT32
172 LLBL (G3TN_norm):
173
174 PREFETCHW ( REGIND(EAX) )
175
176 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
177 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
178
179 MOVQ ( MM0, MM3 ) /* x1 | x0 */
180 MOVQ ( MM1, MM4 ) /* | x2 */
181
182 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
183 ADD_L ( CONST(16), EAX ) /* next r */
184
185 PFMUL ( MM1, MM4 ) /* | x2*x2 */
186 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
187
188 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
189 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
190
191 MOVQ ( MM5, MM4 )
192 PUNPCKLDQ ( MM3, MM3 )
193
194 DEC_L ( EBP ) /* decrement normal counter */
195 PFMUL ( MM5, MM5 )
196
197 PFRSQIT1 ( MM3, MM5 )
198 PFRCPIT2 ( MM4, MM5 )
199
200 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
201
202 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
203 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
204
205 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
206 JA ( LLBL (G3TN_norm) )
207
208 LLBL (G3TN_exit_3dnow):
209 FEMMS
210
211 LLBL (G3TN_end):
212 POP_L ( EBP )
213 POP_L ( ESI )
214 POP_L ( EDI )
215 RET
216
217
218
219 ALIGNTEXT16
220 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
221 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
222
223 #undef FRAME_OFFSET
224 #define FRAME_OFFSET 12
225
226 PUSH_L ( EDI )
227 PUSH_L ( ESI )
228 PUSH_L ( EBP )
229
230 MOV_L ( ARG_LENGTHS, EDI )
231 MOV_L ( ARG_IN, ESI )
232 MOV_L ( ARG_DEST, EAX )
233 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
234 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
235 MOV_L ( ARG_MAT, ECX )
236 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
237 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
238 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
239
240 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
241 JE ( LLBL (G3TNNR_end) )
242
243 FEMMS
244
245 MOVD ( M(0), MM0 ) /* | m0 */
246 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
247
248 MOVD ( M(10), MM2 ) /* | m10 */
249 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
250
251 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
252 JNE ( LLBL (G3TNNR_scale_end ) )
253
254 MOVD ( ARG_SCALE, MM7 ) /* | scale */
255 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
256
257 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
258 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
259
260 ALIGNTEXT32
261 LLBL (G3TNNR_scale_end):
262 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
263 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
264
265 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
266
267
268 ALIGNTEXT32
269 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
270
271 PREFETCHW ( REGIND(EAX) )
272
273 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
274 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
275
276 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
277 ADD_L ( STRIDE, EDX ) /* next normal */
278
279 PREFETCH ( REGIND(EDX) )
280
281 PFMUL ( MM2, MM7 ) /* | x2*m10 */
282 ADD_L ( CONST(16), EAX ) /* next r */
283
284 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
285 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
286
287 ADD_L ( CONST(4), EDI ) /* next length */
288 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
289
290 DEC_L ( EBP ) /* decrement normal counter */
291 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
292
293 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
294 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
295
296 JA ( LLBL (G3TNNR_norm_w_lengths) )
297 JMP ( LLBL (G3TNNR_exit_3dnow) )
298
299 ALIGNTEXT32
300 LLBL (G3TNNR_norm): /* need to calculate lengths */
301
302 PREFETCHW ( REGIND(EAX) )
303
304 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
305 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
306
307 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
308 ADD_L ( CONST(16), EAX ) /* next r */
309
310 PFMUL ( MM2, MM7 ) /* | x2*m10 */
311 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
312
313 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
314 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
315
316
317 PFMUL ( MM7, MM4 ) /* | x2*x2 */
318 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
319
320 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
321 ADD_L ( STRIDE, EDX ) /* next normal */
322
323 PREFETCH ( REGIND(EDX) )
324
325 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
326 MOVQ ( MM5, MM4 )
327
328 PUNPCKLDQ ( MM3, MM3 )
329 PFMUL ( MM5, MM5 )
330
331 PFRSQIT1 ( MM3, MM5 )
332 DEC_L ( EBP ) /* decrement normal counter */
333
334 PFRCPIT2 ( MM4, MM5 )
335 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
336
337 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
338 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
339
340 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
341 JA ( LLBL (G3TNNR_norm) )
342
343
344 LLBL (G3TNNR_exit_3dnow):
345 FEMMS
346
347 LLBL (G3TNNR_end):
348 POP_L ( EBP )
349 POP_L ( ESI )
350 POP_L ( EDI )
351 RET
352
353
354
355
356
357
358 ALIGNTEXT16
359 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
360 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
361
362 #undef FRAME_OFFSET
363 #define FRAME_OFFSET 12
364
365 PUSH_L ( EDI )
366 PUSH_L ( ESI )
367 PUSH_L ( EBP )
368
369 MOV_L ( ARG_IN, EAX )
370 MOV_L ( ARG_DEST, EDX )
371 MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
372 MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
373 MOV_L ( ARG_IN, ESI )
374 MOV_L ( ARG_MAT, ECX )
375 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
376 MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
377 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
378
379 CMP_L ( CONST(0), EBP )
380 JE ( LLBL (G3TRNR_end) )
381
382 FEMMS
383
384 MOVD ( ARG_SCALE, MM6 ) /* | scale */
385 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
386
387 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
388 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
389
390 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
391 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
392
393 PFMUL ( MM6, MM2 ) /* | scale*m10 */
394
395 ALIGNTEXT32
396 LLBL (G3TRNR_rescale):
397
398 PREFETCHW ( REGIND(EAX) )
399
400 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
401 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
402
403 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
404 ADD_L ( STRIDE, EDX ) /* next normal */
405
406 PREFETCH ( REGIND(EDX) )
407
408 PFMUL ( MM2, MM5 ) /* | x2*m10 */
409 ADD_L ( CONST(16), EAX ) /* next r */
410
411 DEC_L ( EBP ) /* decrement normal counter */
412 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
413
414 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
415 JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
416
417 FEMMS
418
419 LLBL (G3TRNR_end):
420 POP_L ( EBP )
421 POP_L ( ESI )
422 POP_L ( EDI )
423 RET
424
425
426
427
428
429 ALIGNTEXT16
430 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
431 GLNAME(_mesa_3dnow_transform_rescale_normals):
432
433 #undef FRAME_OFFSET
434 #define FRAME_OFFSET 8
435
436 PUSH_L ( EDI )
437 PUSH_L ( ESI )
438
439 MOV_L ( ARG_IN, ESI )
440 MOV_L ( ARG_DEST, EAX )
441 MOV_L ( ARG_MAT, ECX )
442 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
443 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
444 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
445 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
446 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
447
448 CMP_L ( CONST(0), EDI )
449 JE ( LLBL (G3TR_end) )
450
451 FEMMS
452
453 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
454
455 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
456 MOVD ( ARG_SCALE, MM0 ) /* scale */
457
458 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
459 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
460
461 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
462 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
463
464 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
465 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
466
467 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
468 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
469
470 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
471
472 PFMUL ( MM0, MM7 ) /* | scale*m10 */
473
474 ALIGNTEXT32
475 LLBL (G3TR_rescale):
476
477 PREFETCHW ( REGIND(EAX) )
478
479 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
480 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
481
482 MOVQ ( MM0, MM1 ) /* x1 | x0 */
483 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
484
485 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
486 ADD_L ( CONST(16), EAX ) /* next r */
487
488 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
489 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
490
491 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
492
493 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
494 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
495
496 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
497 ADD_L ( STRIDE, EDX ) /* next normal */
498
499 PREFETCH ( REGIND(EDX) )
500
501 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
502 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
503
504 PFMUL ( MM7, MM2 ) /* | x2*m10 */
505 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
506
507 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
508 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
509
510 DEC_L ( EDI ) /* decrement normal counter */
511 JA ( LLBL (G3TR_rescale) )
512
513 FEMMS
514
515 LLBL (G3TR_end):
516 POP_L ( ESI )
517 POP_L ( EDI )
518 RET
519
520
521
522
523
524
525
526 ALIGNTEXT16
527 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
528 GLNAME(_mesa_3dnow_transform_normals_no_rot):
529
530 #undef FRAME_OFFSET
531 #define FRAME_OFFSET 8
532
533 PUSH_L ( EDI )
534 PUSH_L ( ESI )
535
536 MOV_L ( ARG_IN, ESI )
537 MOV_L ( ARG_DEST, EAX )
538 MOV_L ( ARG_MAT, ECX )
539 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
540 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
541 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
542 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
543 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
544
545 CMP_L ( CONST(0), EDI )
546 JE ( LLBL (G3TNR_end) )
547
548 FEMMS
549
550 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
551 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
552
553 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
554 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
555
556 ALIGNTEXT32
557 LLBL (G3TNR_transform):
558
559 PREFETCHW ( REGIND(EAX) )
560
561 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
562 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
563
564 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
565 ADD_L ( STRIDE, EDX) /* next normal */
566
567 PREFETCH ( REGIND(EDX) )
568
569 PFMUL ( MM2, MM5 ) /* | x2*m10 */
570 ADD_L ( CONST(16), EAX ) /* next r */
571
572 DEC_L ( EDI ) /* decrement normal counter */
573 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
574
575 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
576 JA ( LLBL (G3TNR_transform) )
577
578 FEMMS
579
580 LLBL (G3TNR_end):
581 POP_L ( ESI )
582 POP_L ( EDI )
583 RET
584
585
586
587
588
589
590
591
592 ALIGNTEXT16
593 GLOBL GLNAME(_mesa_3dnow_transform_normals)
594 GLNAME(_mesa_3dnow_transform_normals):
595
596 #undef FRAME_OFFSET
597 #define FRAME_OFFSET 8
598
599 PUSH_L ( EDI )
600 PUSH_L ( ESI )
601
602 MOV_L ( ARG_IN, ESI )
603 MOV_L ( ARG_DEST, EAX )
604 MOV_L ( ARG_MAT, ECX )
605 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
606 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
607 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
608 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
609 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
610
611 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
612 JE ( LLBL (G3T_end) )
613
614 FEMMS
615
616 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
617 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
618
619 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
620 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
621
622 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
623 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
624
625 ALIGNTEXT32
626 LLBL (G3T_transform):
627
628 PREFETCHW ( REGIND(EAX) )
629
630 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
631 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
632
633 MOVQ ( MM0, MM1 ) /* x1 | x0 */
634 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
635
636 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
637 ADD_L ( CONST(16), EAX ) /* next r */
638
639 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
640 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
641
642 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
643 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
644
645 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
646 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
647
648 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
649 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
650
651 PFMUL ( MM7, MM2 ) /* | x2*m10 */
652 ADD_L ( STRIDE, EDX ) /* next normal */
653
654 PREFETCH ( REGIND(EDX) )
655
656 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
657 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
658
659 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
660 DEC_L ( EDI ) /* decrement normal counter */
661
662 JA ( LLBL (G3T_transform) )
663
664 FEMMS
665
666 LLBL (G3T_end):
667 POP_L ( ESI )
668 POP_L ( EDI )
669 RET
670
671
672
673
674
675
676 ALIGNTEXT16
677 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
678 GLNAME(_mesa_3dnow_normalize_normals):
679
680 #undef FRAME_OFFSET
681 #define FRAME_OFFSET 12
682
683 PUSH_L ( EDI )
684 PUSH_L ( ESI )
685 PUSH_L ( EBP )
686
687 MOV_L ( ARG_IN, ESI )
688 MOV_L ( ARG_DEST, EAX )
689 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
690 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
691 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
692 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
693 MOV_L ( ARG_LENGTHS, EDX )
694
695 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
696 JE ( LLBL (G3N_end) )
697
698 FEMMS
699
700 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
701 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
702
703 ALIGNTEXT32
704 LLBL (G3N_norm1): /* use precalculated lengths */
705
706 PREFETCH ( REGIND(EAX) )
707
708 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
709 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
710
711 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
712 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
713
714 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
715 ADD_L ( STRIDE, ECX ) /* next normal */
716
717 PREFETCH ( REGIND(ECX) )
718
719 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
720 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
721
722 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
723 ADD_L ( CONST(16), EAX ) /* next r */
724
725 ADD_L ( CONST(4), EDX ) /* next length */
726 DEC_L ( EBP ) /* decrement normal counter */
727
728 JA ( LLBL (G3N_norm1) )
729
730 JMP ( LLBL (G3N_end1) )
731
732 ALIGNTEXT32
733 LLBL (G3N_norm2): /* need to calculate lengths */
734
735 PREFETCHW ( REGIND(EAX) )
736
737 PREFETCH ( REGIND(ECX) )
738
739 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
740 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
741
742 MOVQ ( MM0, MM3 ) /* x1 | x0 */
743 ADD_L ( STRIDE, ECX ) /* next normal */
744
745 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
746 MOVQ ( MM1, MM4 ) /* | x2 */
747
748 ADD_L ( CONST(16), EAX ) /* next r */
749 PFMUL ( MM1, MM4 ) /* | x2*x2 */
750
751 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
752 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
753
754 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
755 MOVQ ( MM5, MM4 )
756
757 PUNPCKLDQ ( MM3, MM3 )
758 PFMUL ( MM5, MM5 )
759
760 PFRSQIT1 ( MM3, MM5 )
761 DEC_L ( EBP ) /* decrement normal counter */
762
763 PFRCPIT2 ( MM4, MM5 )
764
765 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
766 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
767
768 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
769 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
770
771 JA ( LLBL (G3N_norm2) )
772
773 LLBL (G3N_end1):
774 FEMMS
775
776 LLBL (G3N_end):
777 POP_L ( EBP )
778 POP_L ( ESI )
779 POP_L ( EDI )
780 RET
781
782
783
784
785
786
787 ALIGNTEXT16
788 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
789 GLNAME(_mesa_3dnow_rescale_normals):
790
791 #undef FRAME_OFFSET
792 #define FRAME_OFFSET 8
793 PUSH_L ( EDI )
794 PUSH_L ( ESI )
795
796 MOV_L ( ARG_IN, ESI )
797 MOV_L ( ARG_DEST, EAX )
798 MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
799 MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
800 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
801 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
802
803 CMP_L ( CONST(0), EDX )
804 JE ( LLBL (G3R_end) )
805
806 FEMMS
807
808 MOVD ( ARG_SCALE, MM0 ) /* scale */
809 PUNPCKLDQ ( MM0, MM0 )
810
811 ALIGNTEXT32
812 LLBL (G3R_rescale):
813
814 PREFETCHW ( REGIND(EAX) )
815
816 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
817 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
818
819 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
820 ADD_L ( STRIDE, ECX ) /* next normal */
821
822 PREFETCH ( REGIND(ECX) )
823
824 PFMUL ( MM0, MM2 ) /* | x2*scale */
825 ADD_L ( CONST(16), EAX ) /* next r */
826
827 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
828 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
829
830 DEC_L ( EDX ) /* decrement normal counter */
831 JA ( LLBL (G3R_rescale) )
832
833 FEMMS
834
835 LLBL (G3R_end):
836 POP_L ( ESI )
837 POP_L ( EDI )
838 RET
839
840 #endif