[PROPSYS]
[reactos.git] / reactos / dll / opengl / mesa / x86 / 3dnow_normal.S
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 5.1
5 *
6 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * 3Dnow assembly code by Holger Waechtler
28 */
29
30 #ifdef USE_3DNOW_ASM
31
32 #include "assyntax.h"
33 #include "matypes.h"
34 #include "norm_args.h"
35
36 SEG_TEXT
37
38 #define M(i) REGOFF(i * 4, ECX)
39 #define STRIDE REGOFF(12, ESI)
40
41
42 ALIGNTEXT16
43 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
44 HIDDEN(_mesa_3dnow_transform_normalize_normals)
45 GLNAME(_mesa_3dnow_transform_normalize_normals):
46
47 #define FRAME_OFFSET 12
48
49 PUSH_L ( EDI )
50 PUSH_L ( ESI )
51 PUSH_L ( EBP )
52
53 MOV_L ( ARG_LENGTHS, EDI )
54 MOV_L ( ARG_IN, ESI )
55 MOV_L ( ARG_DEST, EAX )
56 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
57 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
58 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
59 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
60 MOV_L ( ARG_MAT, ECX )
61 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
62
63 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
64 JE ( LLBL (G3TN_end) )
65
66 MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
67 FEMMS
68
69 PUSH_L ( EBP )
70 PUSH_L ( EAX )
71 PUSH_L ( EDX ) /* save counter & pointer for */
72 /* the normalize pass */
73 #undef FRAME_OFFSET
74 #define FRAME_OFFSET 24
75
76 MOVQ ( M(0), MM3 ) /* m1 | m0 */
77 MOVQ ( M(4), MM4 ) /* m5 | m4 */
78
79 MOVD ( M(2), MM5 ) /* | m2 */
80 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
81
82 MOVQ ( M(8), MM6 ) /* m9 | m8 */
83 MOVQ ( M(10), MM7 ) /* | m10 */
84
85 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
86 JNE ( LLBL (G3TN_scale_end ) )
87
88 MOVD ( ARG_SCALE, MM0 ) /* | scale */
89 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
90
91 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
92 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
93 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
94 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
95 PFMUL ( MM0, MM7 ) /* | scale * m10 */
96
97 ALIGNTEXT32
98 LLBL (G3TN_scale_end):
99 LLBL (G3TN_transform):
100 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
101 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
102
103 MOVQ ( MM0, MM1 ) /* x1 | x0 */
104 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
105
106 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
107 ADD_L ( CONST(16), EAX ) /* next r */
108
109 PREFETCHW ( REGIND(EAX) )
110
111 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
112 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
113
114 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
115 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
116
117 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
118 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
119
120 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
121 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
122
123 PFMUL ( MM7, MM2 ) /* | x2*m10 */
124 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
125
126 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
127 ADD_L ( STRIDE, EDX ) /* next normal */
128
129 PREFETCH ( REGIND(EDX) )
130
131 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
132 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
133 JNZ ( LLBL (G3TN_transform) )
134
135
136 POP_L ( EDX ) /* end of transform --- */
137 POP_L ( EAX ) /* now normalizing ... */
138 POP_L ( EBP )
139
140 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
141 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
142
143
144 ALIGNTEXT32
145 LLBL (G3TN_norm_w_lengths):
146
147 PREFETCHW ( REGOFF(12,EAX) )
148
149 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
150 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
151
152 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
153 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
154
155 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
156 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
157
158 ADD_L ( STRIDE, EDX ) /* next normal */
159 ADD_L ( CONST(4), EDI ) /* next length */
160
161 PREFETCH ( REGIND(EDI) )
162
163 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
164 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
165
166 ADD_L ( CONST(16), EAX ) /* next r */
167 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
168
169 JNZ ( LLBL (G3TN_norm_w_lengths) )
170 JMP ( LLBL (G3TN_exit_3dnow) )
171
172 ALIGNTEXT32
173 LLBL (G3TN_norm):
174
175 PREFETCHW ( REGIND(EAX) )
176
177 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
178 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
179
180 MOVQ ( MM0, MM3 ) /* x1 | x0 */
181 MOVQ ( MM1, MM4 ) /* | x2 */
182
183 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
184 ADD_L ( CONST(16), EAX ) /* next r */
185
186 PFMUL ( MM1, MM4 ) /* | x2*x2 */
187 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
188
189 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
190 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
191
192 MOVQ ( MM5, MM4 )
193 PUNPCKLDQ ( MM3, MM3 )
194
195 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
196 PFMUL ( MM5, MM5 )
197
198 PFRSQIT1 ( MM3, MM5 )
199 PFRCPIT2 ( MM4, MM5 )
200
201 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
202
203 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
204 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
205
206 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
207 JNZ ( LLBL (G3TN_norm) )
208
209 LLBL (G3TN_exit_3dnow):
210 FEMMS
211
212 LLBL (G3TN_end):
213 POP_L ( EBP )
214 POP_L ( ESI )
215 POP_L ( EDI )
216 RET
217
218
219
220 ALIGNTEXT16
221 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
222 HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
223 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
224
225 #undef FRAME_OFFSET
226 #define FRAME_OFFSET 12
227
228 PUSH_L ( EDI )
229 PUSH_L ( ESI )
230 PUSH_L ( EBP )
231
232 MOV_L ( ARG_LENGTHS, EDI )
233 MOV_L ( ARG_IN, ESI )
234 MOV_L ( ARG_DEST, EAX )
235 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
236 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
237 MOV_L ( ARG_MAT, ECX )
238 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
239 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
240 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
241
242 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
243 JE ( LLBL (G3TNNR_end) )
244
245 FEMMS
246
247 MOVD ( M(0), MM0 ) /* | m0 */
248 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
249
250 MOVD ( M(10), MM2 ) /* | m10 */
251 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
252
253 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
254 JNE ( LLBL (G3TNNR_scale_end ) )
255
256 MOVD ( ARG_SCALE, MM7 ) /* | scale */
257 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
258
259 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
260 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
261
262 ALIGNTEXT32
263 LLBL (G3TNNR_scale_end):
264 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
265 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
266
267 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
268
269
270 ALIGNTEXT32
271 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
272
273 PREFETCHW ( REGIND(EAX) )
274
275 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
276 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
277
278 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
279 ADD_L ( STRIDE, EDX ) /* next normal */
280
281 PREFETCH ( REGIND(EDX) )
282
283 PFMUL ( MM2, MM7 ) /* | x2*m10 */
284 ADD_L ( CONST(16), EAX ) /* next r */
285
286 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
287 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
288
289 ADD_L ( CONST(4), EDI ) /* next length */
290 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
291
292 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
293 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
294
295 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
296 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
297
298 JNZ ( LLBL (G3TNNR_norm_w_lengths) )
299 JMP ( LLBL (G3TNNR_exit_3dnow) )
300
301 ALIGNTEXT32
302 LLBL (G3TNNR_norm): /* need to calculate lengths */
303
304 PREFETCHW ( REGIND(EAX) )
305
306 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
307 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
308
309 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
310 ADD_L ( CONST(16), EAX ) /* next r */
311
312 PFMUL ( MM2, MM7 ) /* | x2*m10 */
313 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
314
315 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
316 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
317
318
319 PFMUL ( MM7, MM4 ) /* | x2*x2 */
320 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
321
322 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
323 ADD_L ( STRIDE, EDX ) /* next normal */
324
325 PREFETCH ( REGIND(EDX) )
326
327 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
328 MOVQ ( MM5, MM4 )
329
330 PUNPCKLDQ ( MM3, MM3 )
331 PFMUL ( MM5, MM5 )
332
333 PFRSQIT1 ( MM3, MM5 )
334 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
335
336 PFRCPIT2 ( MM4, MM5 )
337 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
338
339 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
340 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
341
342 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
343 JNZ ( LLBL (G3TNNR_norm) )
344
345
346 LLBL (G3TNNR_exit_3dnow):
347 FEMMS
348
349 LLBL (G3TNNR_end):
350 POP_L ( EBP )
351 POP_L ( ESI )
352 POP_L ( EDI )
353 RET
354
355
356
357
358
359
360 ALIGNTEXT16
361 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
362 HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
363 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
364
365 #undef FRAME_OFFSET
366 #define FRAME_OFFSET 12
367
368 PUSH_L ( EDI )
369 PUSH_L ( ESI )
370 PUSH_L ( EBP )
371
372 MOV_L ( ARG_IN, EAX )
373 MOV_L ( ARG_DEST, EDX )
374 MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
375 MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
376 MOV_L ( ARG_IN, ESI )
377 MOV_L ( ARG_MAT, ECX )
378 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
379 MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
380 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
381
382 CMP_L ( CONST(0), EBP )
383 JE ( LLBL (G3TRNR_end) )
384
385 FEMMS
386
387 MOVD ( ARG_SCALE, MM6 ) /* | scale */
388 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
389
390 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
391 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
392
393 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
394 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
395
396 PFMUL ( MM6, MM2 ) /* | scale*m10 */
397
398 ALIGNTEXT32
399 LLBL (G3TRNR_rescale):
400
401 PREFETCHW ( REGIND(EAX) )
402
403 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
404 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
405
406 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
407 ADD_L ( STRIDE, EDX ) /* next normal */
408
409 PREFETCH ( REGIND(EDX) )
410
411 PFMUL ( MM2, MM5 ) /* | x2*m10 */
412 ADD_L ( CONST(16), EAX ) /* next r */
413
414 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
415 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
416
417 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
418 JNZ ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
419
420 FEMMS
421
422 LLBL (G3TRNR_end):
423 POP_L ( EBP )
424 POP_L ( ESI )
425 POP_L ( EDI )
426 RET
427
428
429
430
431
432 ALIGNTEXT16
433 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
434 HIDDEN(_mesa_3dnow_transform_rescale_normals)
435 GLNAME(_mesa_3dnow_transform_rescale_normals):
436
437 #undef FRAME_OFFSET
438 #define FRAME_OFFSET 8
439
440 PUSH_L ( EDI )
441 PUSH_L ( ESI )
442
443 MOV_L ( ARG_IN, ESI )
444 MOV_L ( ARG_DEST, EAX )
445 MOV_L ( ARG_MAT, ECX )
446 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
447 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
448 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
449 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
450 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
451
452 CMP_L ( CONST(0), EDI )
453 JE ( LLBL (G3TR_end) )
454
455 FEMMS
456
457 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
458
459 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
460 MOVD ( ARG_SCALE, MM0 ) /* scale */
461
462 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
463 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
464
465 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
466 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
467
468 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
469 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
470
471 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
472 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
473
474 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
475
476 PFMUL ( MM0, MM7 ) /* | scale*m10 */
477
478 ALIGNTEXT32
479 LLBL (G3TR_rescale):
480
481 PREFETCHW ( REGIND(EAX) )
482
483 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
484 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
485
486 MOVQ ( MM0, MM1 ) /* x1 | x0 */
487 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
488
489 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
490 ADD_L ( CONST(16), EAX ) /* next r */
491
492 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
493 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
494
495 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
496
497 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
498 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
499
500 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
501 ADD_L ( STRIDE, EDX ) /* next normal */
502
503 PREFETCH ( REGIND(EDX) )
504
505 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
506 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
507
508 PFMUL ( MM7, MM2 ) /* | x2*m10 */
509 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
510
511 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
512 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
513
514 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
515 JNZ ( LLBL (G3TR_rescale) )
516
517 FEMMS
518
519 LLBL (G3TR_end):
520 POP_L ( ESI )
521 POP_L ( EDI )
522 RET
523
524
525
526
527
528
529
530 ALIGNTEXT16
531 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
532 HIDDEN(_mesa_3dnow_transform_normals_no_rot)
533 GLNAME(_mesa_3dnow_transform_normals_no_rot):
534
535 #undef FRAME_OFFSET
536 #define FRAME_OFFSET 8
537
538 PUSH_L ( EDI )
539 PUSH_L ( ESI )
540
541 MOV_L ( ARG_IN, ESI )
542 MOV_L ( ARG_DEST, EAX )
543 MOV_L ( ARG_MAT, ECX )
544 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
545 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
546 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
547 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
548 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
549
550 CMP_L ( CONST(0), EDI )
551 JE ( LLBL (G3TNR_end) )
552
553 FEMMS
554
555 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
556 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
557
558 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
559 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
560
561 ALIGNTEXT32
562 LLBL (G3TNR_transform):
563
564 PREFETCHW ( REGIND(EAX) )
565
566 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
567 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
568
569 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
570 ADD_L ( STRIDE, EDX) /* next normal */
571
572 PREFETCH ( REGIND(EDX) )
573
574 PFMUL ( MM2, MM5 ) /* | x2*m10 */
575 ADD_L ( CONST(16), EAX ) /* next r */
576
577 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
578 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
579
580 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
581 JNZ ( LLBL (G3TNR_transform) )
582
583 FEMMS
584
585 LLBL (G3TNR_end):
586 POP_L ( ESI )
587 POP_L ( EDI )
588 RET
589
590
591
592
593
594
595
596
597 ALIGNTEXT16
598 GLOBL GLNAME(_mesa_3dnow_transform_normals)
599 HIDDEN(_mesa_3dnow_transform_normals)
600 GLNAME(_mesa_3dnow_transform_normals):
601
602 #undef FRAME_OFFSET
603 #define FRAME_OFFSET 8
604
605 PUSH_L ( EDI )
606 PUSH_L ( ESI )
607
608 MOV_L ( ARG_IN, ESI )
609 MOV_L ( ARG_DEST, EAX )
610 MOV_L ( ARG_MAT, ECX )
611 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
612 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
613 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
614 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
615 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
616
617 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
618 JE ( LLBL (G3T_end) )
619
620 FEMMS
621
622 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
623 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
624
625 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
626 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
627
628 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
629 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
630
631 ALIGNTEXT32
632 LLBL (G3T_transform):
633
634 PREFETCHW ( REGIND(EAX) )
635
636 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
637 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
638
639 MOVQ ( MM0, MM1 ) /* x1 | x0 */
640 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
641
642 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
643 ADD_L ( CONST(16), EAX ) /* next r */
644
645 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
646 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
647
648 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
649 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
650
651 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
652 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
653
654 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
655 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
656
657 PFMUL ( MM7, MM2 ) /* | x2*m10 */
658 ADD_L ( STRIDE, EDX ) /* next normal */
659
660 PREFETCH ( REGIND(EDX) )
661
662 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
663 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
664
665 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
666 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
667
668 JNZ ( LLBL (G3T_transform) )
669
670 FEMMS
671
672 LLBL (G3T_end):
673 POP_L ( ESI )
674 POP_L ( EDI )
675 RET
676
677
678
679
680
681
682 ALIGNTEXT16
683 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
684 HIDDEN(_mesa_3dnow_normalize_normals)
685 GLNAME(_mesa_3dnow_normalize_normals):
686
687 #undef FRAME_OFFSET
688 #define FRAME_OFFSET 12
689
690 PUSH_L ( EDI )
691 PUSH_L ( ESI )
692 PUSH_L ( EBP )
693
694 MOV_L ( ARG_IN, ESI )
695 MOV_L ( ARG_DEST, EAX )
696 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
697 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
698 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
699 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
700 MOV_L ( ARG_LENGTHS, EDX )
701
702 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
703 JE ( LLBL (G3N_end) )
704
705 FEMMS
706
707 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
708 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
709
710 ALIGNTEXT32
711 LLBL (G3N_norm1): /* use precalculated lengths */
712
713 PREFETCH ( REGIND(EAX) )
714
715 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
716 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
717
718 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
719 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
720
721 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
722 ADD_L ( STRIDE, ECX ) /* next normal */
723
724 PREFETCH ( REGIND(ECX) )
725
726 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
727 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
728
729 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
730 ADD_L ( CONST(16), EAX ) /* next r */
731
732 ADD_L ( CONST(4), EDX ) /* next length */
733 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
734
735 JNZ ( LLBL (G3N_norm1) )
736
737 JMP ( LLBL (G3N_end1) )
738
739 ALIGNTEXT32
740 LLBL (G3N_norm2): /* need to calculate lengths */
741
742 PREFETCHW ( REGIND(EAX) )
743
744 PREFETCH ( REGIND(ECX) )
745
746 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
747 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
748
749 MOVQ ( MM0, MM3 ) /* x1 | x0 */
750 ADD_L ( STRIDE, ECX ) /* next normal */
751
752 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
753 MOVQ ( MM1, MM4 ) /* | x2 */
754
755 ADD_L ( CONST(16), EAX ) /* next r */
756 PFMUL ( MM1, MM4 ) /* | x2*x2 */
757
758 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
759 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
760
761 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
762 MOVQ ( MM5, MM4 )
763
764 PUNPCKLDQ ( MM3, MM3 )
765 PFMUL ( MM5, MM5 )
766
767 PFRSQIT1 ( MM3, MM5 )
768 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
769
770 PFRCPIT2 ( MM4, MM5 )
771
772 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
773 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
774
775 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
776 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
777
778 JNZ ( LLBL (G3N_norm2) )
779
780 LLBL (G3N_end1):
781 FEMMS
782
783 LLBL (G3N_end):
784 POP_L ( EBP )
785 POP_L ( ESI )
786 POP_L ( EDI )
787 RET
788
789
790
791
792
793
794 ALIGNTEXT16
795 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
796 HIDDEN(_mesa_3dnow_rescale_normals)
797 GLNAME(_mesa_3dnow_rescale_normals):
798
799 #undef FRAME_OFFSET
800 #define FRAME_OFFSET 8
801 PUSH_L ( EDI )
802 PUSH_L ( ESI )
803
804 MOV_L ( ARG_IN, ESI )
805 MOV_L ( ARG_DEST, EAX )
806 MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
807 MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
808 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
809 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
810
811 CMP_L ( CONST(0), EDX )
812 JE ( LLBL (G3R_end) )
813
814 FEMMS
815
816 MOVD ( ARG_SCALE, MM0 ) /* scale */
817 PUNPCKLDQ ( MM0, MM0 )
818
819 ALIGNTEXT32
820 LLBL (G3R_rescale):
821
822 PREFETCHW ( REGIND(EAX) )
823
824 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
825 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
826
827 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
828 ADD_L ( STRIDE, ECX ) /* next normal */
829
830 PREFETCH ( REGIND(ECX) )
831
832 PFMUL ( MM0, MM2 ) /* | x2*scale */
833 ADD_L ( CONST(16), EAX ) /* next r */
834
835 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
836 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
837
838 SUB_L ( CONST(1), EDX ) /* decrement normal counter */
839 JNZ ( LLBL (G3R_rescale) )
840
841 FEMMS
842
843 LLBL (G3R_end):
844 POP_L ( ESI )
845 POP_L ( EDI )
846 RET
847
848 #endif
849
850 #if defined (__ELF__) && defined (__linux__)
851 .section .note.GNU-stack,"",%progbits
852 #endif