move mesa32 over to new dir
[reactos.git] / reactos / lib / mesa32 / src / sparc / xform.S
1 /* $Id: xform.S,v 1.4 2005/07/28 00:11:11 idr Exp $ */
2
3 /* TODO
4 *
5 * 1) It would be nice if load/store double could be used
6 * at least for the matrix parts. I think for the matrices
7 * it is safe, but for the vertices it probably is not due to
8 * things like glInterleavedArrays etc.
9 *
10 * UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
11 *
12 * 2) One extremely slick trick would be if we could enclose
13 * groups of xform calls on the same vertices such that
14 * we just load the matrix into f16-->f31 before the calls
15 * and then we would not have to do them here. This may be
16 * tricky and not much of a gain though.
17 */
18
19 #include "sparc_matrix.h"
20
21 #if defined(SVR4) || defined(__SVR4) || defined(__svr4__)
22 /* Solaris requires this for 64-bit. */
23 .register %g2, #scratch
24 .register %g3, #scratch
25 #endif
26
27 .text
28 .align 64
29
30 __set_v4f_1:
31 ld [%o0 + V4F_FLAGS], %g2
32 mov 1, %g1
33 st %g1, [%o0 + V4F_SIZE]
34 or %g2, VEC_SIZE_1, %g2
35 retl
36 st %g2, [%o0 + V4F_FLAGS]
37 __set_v4f_2:
38 ld [%o0 + V4F_FLAGS], %g2
39 mov 2, %g1
40 st %g1, [%o0 + V4F_SIZE]
41 or %g2, VEC_SIZE_2, %g2
42 retl
43 st %g2, [%o0 + V4F_FLAGS]
44 __set_v4f_3:
45 ld [%o0 + V4F_FLAGS], %g2
46 mov 3, %g1
47 st %g1, [%o0 + V4F_SIZE]
48 or %g2, VEC_SIZE_3, %g2
49 retl
50 st %g2, [%o0 + V4F_FLAGS]
51 __set_v4f_4:
52 ld [%o0 + V4F_FLAGS], %g2
53 mov 4, %g1
54 st %g1, [%o0 + V4F_SIZE]
55 or %g2, VEC_SIZE_4, %g2
56 retl
57 st %g2, [%o0 + V4F_FLAGS]
58
59 /* First the raw versions. */
60
61 .globl _mesa_sparc_transform_points1_general
62 _mesa_sparc_transform_points1_general:
63 ld [%o2 + V4F_STRIDE], %o5
64 LDPTR [%o2 + V4F_START], %g1
65 LDPTR [%o0 + V4F_START], %g2
66 ld [%o2 + V4F_COUNT], %g3
67
68 LDMATRIX_0_1_2_3_12_13_14_15(%o1)
69
70 cmp %g3, 1
71 st %g3, [%o0 + V4F_COUNT]
72 bl 3f
73 clr %o1
74
75 be 2f
76 andn %g3, 1, %o2
77
78 1: ld [%g1 + 0x00], %f0 ! LSU Group
79 add %g1, %o5, %g1 ! IEU0
80 ld [%g1 + 0x00], %f8 ! LSU Group
81 add %o1, 2, %o1 ! IEU0
82 add %g1, %o5, %g1 ! IEU1
83 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
84 fmuls %f0, M1, %f2 ! FGM Group
85 fmuls %f0, M2, %f3 ! FGM Group
86 fmuls %f0, M3, %f4 ! FGM Group
87 fmuls %f8, M0, %f9 ! FGM Group f1 available
88 fadds %f1, M12, %f1 ! FGA
89 st %f1, [%g2 + 0x00] ! LSU
90 fmuls %f8, M1, %f10 ! FGM Group f2 available
91 fadds %f2, M13, %f2 ! FGA
92 st %f2, [%g2 + 0x04] ! LSU
93 fmuls %f8, M2, %f11 ! FGM Group f3 available
94 fadds %f3, M14, %f3 ! FGA
95 st %f3, [%g2 + 0x08] ! LSU
96 fmuls %f8, M3, %f12 ! FGM Group f4 available
97 fadds %f4, M15, %f4 ! FGA
98 st %f4, [%g2 + 0x0c] ! LSU
99 fadds %f9, M12, %f9 ! FGA Group f9 available
100 st %f9, [%g2 + 0x10] ! LSU
101 fadds %f10, M13, %f10 ! FGA Group f10 available
102 st %f10, [%g2 + 0x14] ! LSU
103 fadds %f11, M14, %f11 ! FGA Group f11 available
104 st %f11, [%g2 + 0x18] ! LSU
105 fadds %f12, M15, %f12 ! FGA Group f12 available
106 st %f12, [%g2 + 0x1c] ! LSU
107 cmp %o1, %o2 ! IEU1
108 bne 1b ! CTI
109 add %g2, 0x20, %g2 ! IEU0 Group
110
111 cmp %o1, %g3
112 be 3f
113 nop
114
115 2: ld [%g1 + 0x00], %f0 ! LSU Group
116 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
117 fmuls %f0, M1, %f2 ! FGM Group
118 fmuls %f0, M2, %f3 ! FGM Group
119 fmuls %f0, M3, %f4 ! FGM Group
120 fadds %f1, M12, %f1 ! FGA Group
121 st %f1, [%g2 + 0x00] ! LSU
122 fadds %f2, M13, %f2 ! FGA Group
123 st %f2, [%g2 + 0x04] ! LSU
124 fadds %f3, M14, %f3 ! FGA Group
125 st %f3, [%g2 + 0x08] ! LSU
126 fadds %f4, M15, %f4 ! FGA Group
127 st %f4, [%g2 + 0x0c] ! LSU
128
129 3:
130 ba __set_v4f_4
131 nop
132
133 .globl _mesa_sparc_transform_points1_identity
134 _mesa_sparc_transform_points1_identity:
135 cmp %o0, %o2
136 be 4f
137 ld [%o2 + V4F_STRIDE], %o5
138 LDPTR [%o2 + V4F_START], %g1
139 LDPTR [%o0 + V4F_START], %g2
140 ld [%o2 + V4F_COUNT], %g3
141
142 cmp %g3, 1
143 st %g3, [%o0 + V4F_COUNT]
144 bl 3f
145 clr %o1
146
147 be 2f
148 andn %g3, 1, %o2
149
150 1: ld [%g1 + 0x00], %f0 ! LSU Group
151 add %g1, %o5, %g1 ! IEU0
152 ld [%g1 + 0x00], %f1 ! LSU Group
153 add %o1, 2, %o1 ! IEU0
154 add %g1, %o5, %g1 ! IEU1
155 st %f0, [%g2 + 0x00] ! LSU Group
156 cmp %o1, %o2 ! IEU1
157 st %f1, [%g2 + 0x10] ! LSU Group
158 bne 1b ! CTI
159 add %g2, 0x20, %g2 ! IEU0
160
161 cmp %o1, %g3
162 be 3f
163 nop
164
165 2: ld [%g1 + 0x00], %f0
166 addx %g0, %g0, %g0
167 st %f0, [%g2 + 0x00]
168
169 3:
170 ba __set_v4f_1
171 nop
172
173 4: retl
174 nop
175
176 .globl _mesa_sparc_transform_points1_2d
177 _mesa_sparc_transform_points1_2d:
178 ld [%o2 + V4F_STRIDE], %o5
179 LDPTR [%o2 + V4F_START], %g1
180 LDPTR [%o0 + V4F_START], %g2
181 ld [%o2 + V4F_COUNT], %g3
182
183 LDMATRIX_0_1_12_13(%o1)
184
185 cmp %g3, 1
186 st %g3, [%o0 + V4F_COUNT]
187 bl 3f
188 clr %o1
189
190 be 2f
191 andn %g3, 1, %o2
192
193 1: ld [%g1 + 0x00], %f0 ! LSU Group
194 add %g1, %o5, %g1 ! IEU0
195 ld [%g1 + 0x00], %f8 ! LSU Group
196 add %o1, 2, %o1 ! IEU0
197 add %g1, %o5, %g1 ! IEU1
198 fmuls %f0, M0, %f1 ! FGM Group
199 fmuls %f0, M1, %f2 ! FGM Group
200 fmuls %f8, M0, %f9 ! FGM Group
201 fmuls %f8, M1, %f10 ! FGM Group
202 fadds %f1, M12, %f3 ! FGA Group f1 available
203 st %f3, [%g2 + 0x00] ! LSU
204 fadds %f2, M13, %f4 ! FGA Group f2 available
205 st %f4, [%g2 + 0x04] ! LSU
206 fadds %f9, M12, %f11 ! FGA Group f9 available
207 st %f11, [%g2 + 0x10] ! LSU
208 fadds %f10, M13, %f12 ! FGA Group f10 available
209 st %f12, [%g2 + 0x14] ! LSU
210 cmp %o1, %o2 ! IEU1
211 bne 1b ! CTI
212 add %g2, 0x20, %g2 ! IEU0 Group
213
214 cmp %o1, %g3
215 be 3f
216 nop
217
218 2: ld [%g1 + 0x00], %f0
219 fmuls %f0, M0, %f1
220 fmuls %f0, M1, %f2
221 fadds %f1, M12, %f3
222 st %f3, [%g2 + 0x00]
223 fadds %f2, M13, %f4
224 st %f4, [%g2 + 0x04]
225
226 3:
227 ba __set_v4f_2
228 nop
229
230 .globl _mesa_sparc_transform_points1_2d_no_rot
231 _mesa_sparc_transform_points1_2d_no_rot:
232 ld [%o2 + V4F_STRIDE], %o5
233 LDPTR [%o2 + V4F_START], %g1
234 LDPTR [%o0 + V4F_START], %g2
235 ld [%o2 + V4F_COUNT], %g3
236
237 LDMATRIX_0_12_13(%o1)
238
239 cmp %g3, 1
240 st %g3, [%o0 + V4F_COUNT]
241 bl 3f
242 clr %o1
243
244 be 2f
245 andn %g3, 1, %o2
246
247 1: ld [%g1 + 0x00], %f0 ! LSU Group
248 add %g1, %o5, %g1 ! IEU0
249 ld [%g1 + 0x00], %f4 ! LSU Group
250 add %o1, 2, %o1 ! IEU0
251 add %g1, %o5, %g1 ! IEU1
252 fmuls %f0, M0, %f1 ! FGM Group
253 fmuls %f4, M0, %f5 ! FGM Group
254 fadds %f1, M12, %f3 ! FGA Group, 2 cycle stall, f1 available
255 st %f3, [%g2 + 0x00] ! LSU
256 st M13, [%g2 + 0x04] ! LSU Group, f5 available
257 fadds %f5, M12, %f6 ! FGA
258 st %f6, [%g2 + 0x10] ! LSU Group
259 st M13, [%g2 + 0x14] ! LSU Group
260 cmp %o1, %o2 ! IEU1
261 bne 1b ! CTI
262 add %g2, 0x20, %g2 ! IEU0 Group
263
264 cmp %o1, %g3
265 be 3f
266 nop
267
268 2: ld [%g1 + 0x00], %f0
269 fmuls %f0, M0, %f1
270 fadds %f1, M12, %f3
271 st %f3, [%g2 + 0x00]
272 st M13, [%g2 + 0x04]
273
274 3:
275 ba __set_v4f_2
276 nop
277
278 .globl _mesa_sparc_transform_points1_3d
279 _mesa_sparc_transform_points1_3d:
280 ld [%o2 + V4F_STRIDE], %o5
281 LDPTR [%o2 + V4F_START], %g1
282 LDPTR [%o0 + V4F_START], %g2
283 ld [%o2 + V4F_COUNT], %g3
284
285 LDMATRIX_0_1_2_12_13_14(%o1)
286
287 cmp %g3, 1
288 st %g3, [%o0 + V4F_COUNT]
289 bl 3f
290 clr %o1
291
292 be 2f
293 andn %g3, 1, %o2
294
295 1: ld [%g1 + 0x00], %f0 ! LSU Group
296 add %g1, %o5, %g1 ! IEU0
297 ld [%g1 + 0x00], %f4 ! LSU Group
298 add %o1, 2, %o1 ! IEU0
299 add %g1, %o5, %g1 ! IEU1
300 fmuls %f0, M0, %f1 ! FGM Group
301 fmuls %f0, M1, %f2 ! FGM Group
302 fmuls %f0, M2, %f3 ! FGM Group
303 fmuls %f4, M0, %f5 ! FGM Group
304 fadds %f1, M12, %f1 ! FGA Group, f1 available
305 st %f1, [%g2 + 0x00] ! LSU
306 fmuls %f4, M1, %f6 ! FGM
307 fadds %f2, M13, %f2 ! FGA Group, f2 available
308 st %f2, [%g2 + 0x04] ! LSU
309 fmuls %f4, M2, %f7 ! FGM
310 fadds %f3, M14, %f3 ! FGA Group, f3 available
311 st %f3, [%g2 + 0x08] ! LSU
312 fadds %f5, M12, %f5 ! FGA Group, f5 available
313 st %f5, [%g2 + 0x10] ! LSU
314 fadds %f6, M13, %f6 ! FGA Group, f6 available
315 st %f6, [%g2 + 0x14] ! LSU
316 fadds %f7, M14, %f7 ! FGA Group, f7 available
317 st %f7, [%g2 + 0x18] ! LSU
318 cmp %o1, %o2 ! IEU1
319 bne 1b ! CTI
320 add %g2, 0x20, %g2 ! IEU0 Group
321
322 cmp %o1, %g3
323 be 3f
324 nop
325
326 2: ld [%g1 + 0x00], %f0
327 fmuls %f0, M0, %f1
328 fmuls %f0, M1, %f2
329 fmuls %f0, M2, %f3
330 fadds %f1, M12, %f1
331 st %f1, [%g2 + 0x00]
332 fadds %f2, M13, %f2
333 st %f2, [%g2 + 0x04]
334 fadds %f3, M14, %f3
335 st %f3, [%g2 + 0x08]
336
337 3:
338 ba __set_v4f_3
339 nop
340
341 .globl _mesa_sparc_transform_points1_3d_no_rot
342 _mesa_sparc_transform_points1_3d_no_rot:
343 ld [%o2 + V4F_STRIDE], %o5
344 LDPTR [%o2 + V4F_START], %g1
345 LDPTR [%o0 + V4F_START], %g2
346 ld [%o2 + V4F_COUNT], %g3
347
348 LDMATRIX_0_12_13_14(%o1)
349
350 cmp %g3, 1
351 st %g3, [%o0 + V4F_COUNT]
352 bl 3f
353 clr %o1
354
355 be 2f
356 andn %g3, 1, %o2
357
358 1: ld [%g1 + 0x00], %f0 ! LSU Group
359 add %g1, %o5, %g1 ! IEU0
360 ld [%g1 + 0x00], %f2 ! LSU Group
361 add %o1, 2, %o1 ! IEU0
362 add %g1, %o5, %g1 ! IEU1
363 fmuls %f0, M0, %f1 ! FGM Group
364 fmuls %f2, M0, %f3 ! FGM Group
365 fadds %f1, M12, %f1 ! FGA Group, 2 cycle stall, f1 available
366 st %f1, [%g2 + 0x00] ! LSU
367 fadds %f3, M12, %f3 ! FGA Group, f3 available
368 st M13, [%g2 + 0x04] ! LSU
369 st M14, [%g2 + 0x08] ! LSU Group
370 st %f3, [%g2 + 0x10] ! LSU Group
371 st M13, [%g2 + 0x14] ! LSU Group
372 st M14, [%g2 + 0x18] ! LSU Group
373 cmp %o1, %o2 ! IEU1
374 bne 1b ! CTI
375 add %g2, 0x20, %g2 ! IEU0 Group
376
377 cmp %o1, %g3
378 be 3f
379 nop
380
381 2: ld [%g1 + 0x00], %f0
382 fmuls %f0, M0, %f1
383 fadds %f1, M12, %f1
384 st %f1, [%g2 + 0x00]
385 st M13, [%g2 + 0x04]
386 st M14, [%g2 + 0x08]
387
388 3:
389 ba __set_v4f_3
390 nop
391
392 .globl _mesa_sparc_transform_points1_perspective
393 _mesa_sparc_transform_points1_perspective:
394 ld [%o2 + V4F_STRIDE], %o5
395 LDPTR [%o2 + V4F_START], %g1
396 LDPTR [%o0 + V4F_START], %g2
397 ld [%o2 + V4F_COUNT], %g3
398
399 LDMATRIX_0_14(%o1)
400
401 cmp %g3, 1
402 st %g3, [%o0 + V4F_COUNT]
403 bl 3f
404 clr %o1
405
406 be 2f
407 andn %g3, 1, %o2
408
409 1: ld [%g1 + 0x00], %f0 ! LSU Group
410 add %g1, %o5, %g1 ! IEU0
411 ld [%g1 + 0x00], %f2 ! LSU Group
412 add %o1, 2, %o1 ! IEU0
413 add %g1, %o5, %g1 ! IEU1
414 fmuls %f0, M0, %f1 ! FGM Group
415 st %f1, [%g2 + 0x00] ! LSU
416 fmuls %f2, M0, %f3 ! FGM Group
417 st %g0, [%g2 + 0x04] ! LSU
418 st M14, [%g2 + 0x08] ! LSU Group
419 st %g0, [%g2 + 0x0c] ! LSU Group
420 st %f3, [%g2 + 0x10] ! LSU Group
421 st %g0, [%g2 + 0x14] ! LSU Group
422 st M14, [%g2 + 0x18] ! LSU Group
423 st %g0, [%g2 + 0x1c] ! LSU Group
424 cmp %o1, %o2 ! IEU1
425 bne 1b ! CTI
426 add %g2, 0x20, %g2 ! IEU0 Group
427
428 cmp %o1, %g3
429 be 3f
430 nop
431
432 2: ld [%g1 + 0x00], %f0
433 fmuls %f0, M0, %f1
434 st %f1, [%g2 + 0x00]
435 st %g0, [%g2 + 0x04]
436 st M14, [%g2 + 0x08]
437 st %g0, [%g2 + 0x0c]
438
439 3:
440 ba __set_v4f_4
441 nop
442
443 .globl _mesa_sparc_transform_points2_general
444 _mesa_sparc_transform_points2_general:
445 ld [%o2 + V4F_STRIDE], %o5
446 LDPTR [%o2 + V4F_START], %g1
447 LDPTR [%o0 + V4F_START], %g2
448 ld [%o2 + V4F_COUNT], %g3
449
450 LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
451
452 cmp %g3, 0
453 st %g3, [%o0 + V4F_COUNT]
454 be 2f
455 clr %o1
456
457 1: ld [%g1 + 0x00], %f0 ! LSU Group
458 ld [%g1 + 0x04], %f1 ! LSU Group
459 add %o1, 1, %o1 ! IEU0
460 add %g1, %o5, %g1 ! IEU1
461 fmuls %f0, M0, %f2 ! FGM Group
462 fmuls %f0, M1, %f3 ! FGM Group
463 fmuls %f0, M2, %f4 ! FGM Group
464 fmuls %f0, M3, %f5 ! FGM Group
465 fadds %f2, M12, %f2 ! FGA Group f2 available
466 fmuls %f1, M4, %f6 ! FGM
467 fadds %f3, M13, %f3 ! FGA Group f3 available
468 fmuls %f1, M5, %f7 ! FGM
469 fadds %f4, M14, %f4 ! FGA Group f4 available
470 fmuls %f1, M6, %f8 ! FGM
471 fadds %f5, M15, %f5 ! FGA Group f5 available
472 fmuls %f1, M7, %f9 ! FGM
473 fadds %f2, %f6, %f2 ! FGA Group f6 available
474 st %f2, [%g2 + 0x00] ! LSU
475 fadds %f3, %f7, %f3 ! FGA Group f7 available
476 st %f3, [%g2 + 0x04] ! LSU
477 fadds %f4, %f8, %f4 ! FGA Group f8 available
478 st %f4, [%g2 + 0x08] ! LSU
479 fadds %f5, %f9, %f5 ! FGA Group f9 available
480 st %f5, [%g2 + 0x0c] ! LSU
481 cmp %o1, %g3 ! IEU1
482 bne 1b ! CTI
483 add %g2, 0x10, %g2 ! IEU0 Group
484 2:
485 ba __set_v4f_4
486 nop
487
488 .globl _mesa_sparc_transform_points2_identity
489 _mesa_sparc_transform_points2_identity:
490 cmp %o2, %o0
491 be 3f
492 ld [%o2 + V4F_STRIDE], %o5
493 LDPTR [%o2 + V4F_START], %g1
494 LDPTR [%o0 + V4F_START], %g2
495 ld [%o2 + V4F_COUNT], %g3
496
497 cmp %g3, 0
498 st %g3, [%o0 + V4F_COUNT]
499 be 2f
500 clr %o1
501
502 1: ld [%g1 + 0x00], %f0 ! LSU Group
503 add %o1, 1, %o1 ! IEU0
504 ld [%g1 + 0x04], %f1 ! LSU Group
505 add %g1, %o5, %g1 ! IEU0
506 cmp %o1, %g3 ! IEU1
507 st %f0, [%g2 + 0x00] ! LSU Group
508 st %f1, [%g2 + 0x04] ! LSU Group
509 bne 1b ! CTI
510 add %g2, 0x10, %g2 ! IEU0
511 2:
512 ba __set_v4f_2
513 nop
514
515 3: retl
516 nop
517
518 .globl _mesa_sparc_transform_points2_2d
519 _mesa_sparc_transform_points2_2d:
520 ld [%o2 + V4F_STRIDE], %o5
521 LDPTR [%o2 + V4F_START], %g1
522 LDPTR [%o0 + V4F_START], %g2
523 ld [%o2 + V4F_COUNT], %g3
524
525 LDMATRIX_0_1_4_5_12_13(%o1)
526
527 cmp %g3, 1
528 st %g3, [%o0 + V4F_COUNT]
529 bl 3f
530 clr %o1
531
532 be 2f
533 andn %g3, 1, %o2
534
535 1: ld [%g1 + 0x00], %f0 ! LSU Group
536 ld [%g1 + 0x04], %f1 ! LSU Group
537 add %o1, 2, %o1 ! IEU0
538 add %g1, %o5, %g1 ! IEU1
539 fmuls %f0, M0, %f2 ! FGM
540 ld [%g1 + 0x00], %f8 ! LSU Group
541 fmuls %f0, M1, %f3 ! FGM
542 ld [%g1 + 0x04], %f9 ! LSU Group
543 fmuls %f1, M4, %f6 ! FGM
544 fmuls %f1, M5, %f7 ! FGM Group
545 add %g1, %o5, %g1 ! IEU0
546 fmuls %f8, M0, %f10 ! FGM Group f2 available
547 fadds %f2, M12, %f2 ! FGA
548 fmuls %f8, M1, %f11 ! FGM Group f3 available
549 fadds %f3, M13, %f3 ! FGA
550 fmuls %f9, M4, %f12 ! FGM Group
551 fmuls %f9, M5, %f13 ! FGM Group
552 fadds %f10, M12, %f10 ! FGA Group f2, f10 available
553 fadds %f2, %f6, %f2 ! FGA Group f3, f11 available
554 st %f2, [%g2 + 0x00] ! LSU
555 fadds %f11, M13, %f11 ! FGA Group f12 available
556 fadds %f3, %f7, %f3 ! FGA Group f13 available
557 st %f3, [%g2 + 0x04] ! LSU
558 fadds %f10, %f12, %f10 ! FGA Group f10 available
559 st %f10, [%g2 + 0x10] ! LSU
560 fadds %f11, %f13, %f11 ! FGA Group f11 available
561 st %f11, [%g2 + 0x14] ! LSU
562 cmp %o1, %o2 ! IEU1
563 bne 1b ! CTI
564 add %g2, 0x20, %g2 ! IEU0 Group
565
566 cmp %o1, %g3
567 be 3f
568 nop
569
570 2: ld [%g1 + 0x00], %f0 ! LSU Group
571 ld [%g1 + 0x04], %f1 ! LSU Group
572 fmuls %f0, M0, %f2 ! FGM Group
573 fmuls %f0, M1, %f3 ! FGM Group
574 fmuls %f1, M4, %f6 ! FGM Group
575 fmuls %f1, M5, %f7 ! FGM Group
576 fadds %f2, M12, %f2 ! FGA Group f2 available
577 fadds %f3, M13, %f3 ! FGA Group f3 available
578 fadds %f2, %f6, %f2 ! FGA Group 2 cycle stall, f2 available
579 st %f2, [%g2 + 0x00] ! LSU
580 fadds %f3, %f7, %f3 ! FGA Group f3 available
581 st %f3, [%g2 + 0x04] ! LSU
582
583 3:
584 ba __set_v4f_2
585 nop
586
587 .globl _mesa_sparc_transform_points2_2d_no_rot
588 _mesa_sparc_transform_points2_2d_no_rot:
589 ld [%o2 + V4F_STRIDE], %o5
590 LDPTR [%o2 + V4F_START], %g1
591 LDPTR [%o0 + V4F_START], %g2
592 ld [%o2 + V4F_COUNT], %g3
593
594 LDMATRIX_0_5_12_13(%o1)
595
596 cmp %g3, 1
597 st %g3, [%o0 + V4F_COUNT]
598 bl 3f
599 clr %o1
600
601 be 2f
602 andn %g3, 1, %o2
603
604 1: ld [%g1 + 0x00], %f0 ! LSU Group
605 ld [%g1 + 0x04], %f1 ! LSU Group
606 add %o1, 2, %o1 ! IEU0
607 add %g1, %o5, %g1 ! IEU1
608 ld [%g1 + 0x00], %f4 ! LSU Group
609 fmuls %f0, M0, %f2 ! FGM
610 ld [%g1 + 0x04], %f5 ! LSU Group
611 fmuls %f1, M5, %f3 ! FGM
612 fmuls %f4, M0, %f6 ! FGM Group
613 add %g1, %o5, %g1 ! IEU0
614 fmuls %f5, M5, %f7 ! FGM Group
615 fadds %f2, M12, %f2 ! FGA Group f2 available
616 st %f2, [%g2 + 0x00] ! LSU
617 fadds %f3, M13, %f3 ! FGA Group f3 available
618 st %f3, [%g2 + 0x04] ! LSU
619 fadds %f6, M12, %f6 ! FGA Group f6 available
620 st %f6, [%g2 + 0x10] ! LSU
621 fadds %f7, M13, %f7 ! FGA Group f7 available
622 st %f7, [%g2 + 0x14] ! LSU
623 cmp %o1, %o2 ! IEU1
624 bne 1b ! CTI
625 add %g2, 0x20, %g2 ! IEU0 Group
626
627 cmp %o1, %g3
628 be 3f
629 nop
630
631 2: ld [%g1 + 0x00], %f0 ! LSU Group
632 ld [%g1 + 0x04], %f1 ! LSU Group
633 fmuls %f0, M0, %f2 ! FGM Group
634 fmuls %f1, M5, %f3 ! FGM Group
635 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
636 st %f2, [%g2 + 0x00] ! LSU
637 fadds %f3, M13, %f3 ! FGA Group f3 available
638 st %f3, [%g2 + 0x04] ! LSU
639
640 3:
641 ba __set_v4f_2
642 nop
643
644 /* orig: 12 cycles */
645 .globl _mesa_sparc_transform_points2_3d
646 _mesa_sparc_transform_points2_3d:
647 ld [%o2 + V4F_STRIDE], %o5
648 ld [%o2 + V4F_START], %g1
649 ld [%o0 + V4F_START], %g2
650 ld [%o2 + V4F_COUNT], %g3
651
652 LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1)
653
654 cmp %g3, 1
655 st %g3, [%o0 + V4F_COUNT]
656 bl 3f
657 clr %o1
658
659 be 2f
660 andn %g3, 1, %o2
661
662 1: ld [%g1 + 0x00], %f0 ! LSU Group
663 ld [%g1 + 0x04], %f1 ! LSU Group
664 add %o1, 2, %o1 ! IEU0
665 add %g1, %o5, %g1 ! IEU1
666 ld [%g1 + 0x00], %f9 ! LSU Group
667 fmuls %f0, M0, %f2 ! FGM
668 ld [%g1 + 0x04], %f10 ! LSU Group
669 fmuls %f0, M1, %f3 ! FGM
670 fmuls %f0, M2, %f4 ! FGM Group
671 add %g1, %o5, %g1 ! IEU0
672 fmuls %f1, M4, %f6 ! FGM Group
673 fmuls %f1, M5, %f7 ! FGM Group f2 available
674 fadds %f2, M12, %f2 ! FGA
675 fmuls %f1, M6, %f8 ! FGM Group f3 available
676 fadds %f3, M13, %f3 ! FGA
677 fmuls %f9, M0, %f11 ! FGM Group f4 available
678 fadds %f4, M14, %f4 ! FGA
679 fmuls %f9, M1, %f12 ! FGM Group f6 available
680 fmuls %f9, M2, %f13 ! FGM Group f2, f7 available
681 fadds %f2, %f6, %f2 ! FGA
682 st %f2, [%g2 + 0x00] ! LSU
683 fmuls %f10, M4, %f14 ! FGM Group f3, f8 available
684 fadds %f3, %f7, %f3 ! FGA
685 st %f3, [%g2 + 0x04] ! LSU
686 fmuls %f10, M5, %f15 ! FGM Group f4, f11 available
687 fadds %f11, M12, %f11 ! FGA
688 fmuls %f10, M6, %f0 ! FGM Group f12 available
689 fadds %f12, M13, %f12 ! FGA
690 fadds %f13, M14, %f13 ! FGA Group f13 available
691 fadds %f4, %f8, %f4 ! FGA Group f14 available
692 st %f4, [%g2 + 0x08] ! LSU
693 fadds %f11, %f14, %f11 ! FGA Group f15, f11 available
694 st %f11, [%g2 + 0x10] ! LSU
695 fadds %f12, %f15, %f12 ! FGA Group f0, f12 available
696 st %f12, [%g2 + 0x14] ! LSU
697 fadds %f13, %f0, %f13 ! FGA Group f13 available
698 st %f13, [%g2 + 0x18] ! LSU
699
700 cmp %o1, %o2 ! IEU1
701 bne 1b ! CTI
702 add %g2, 0x20, %g2 ! IEU0 Group
703
704 cmp %o1, %g3
705 be 3f
706 nop
707
708 2: ld [%g1 + 0x00], %f0 ! LSU Group
709 ld [%g1 + 0x04], %f1 ! LSU Group
710 fmuls %f0, M0, %f2 ! FGM Group
711 fmuls %f0, M1, %f3 ! FGM Group
712 fmuls %f0, M2, %f4 ! FGM Group
713 fmuls %f1, M4, %f6 ! FGM Group
714 fmuls %f1, M5, %f7 ! FGM Group f2 available
715 fadds %f2, M12, %f2 ! FGA
716 fmuls %f1, M6, %f8 ! FGM Group f3 available
717 fadds %f3, M13, %f3 ! FGA
718 fadds %f4, M14, %f4 ! FGA Group f4 available
719 fadds %f2, %f6, %f2 ! FGA Group stall, f2, f6, f7 available
720 st %f2, [%g2 + 0x00] ! LSU
721 fadds %f3, %f7, %f3 ! FGA Group f3, f8 available
722 st %f3, [%g2 + 0x04] ! LSU
723 fadds %f4, %f8, %f4 ! FGA Group f4 available
724 st %f4, [%g2 + 0x08] ! LSU
725
726 3:
727 ba __set_v4f_3
728 nop
729
730 .globl _mesa_sparc_transform_points2_3d_no_rot
731 _mesa_sparc_transform_points2_3d_no_rot:
732 ld [%o2 + V4F_STRIDE], %o5
733 LDPTR [%o2 + V4F_START], %g1
734 LDPTR [%o0 + V4F_START], %g2
735 ld [%o2 + V4F_COUNT], %g3
736
737 LDMATRIX_0_5_12_13_14(%o1)
738
739 cmp %g3, 1
740 st %g3, [%o0 + V4F_COUNT]
741 bl 3f
742 clr %o3
743
744 be 2f
745 andn %g3, 1, %o2
746
747 1: ld [%g1 + 0x00], %f0 ! LSU Group
748 ld [%g1 + 0x04], %f1 ! LSU Group
749 add %o3, 2, %o3 ! IEU0
750 add %g1, %o5, %g1 ! IEU1
751 ld [%g1 + 0x00], %f4 ! LSU Group
752 fmuls %f0, M0, %f2 ! FGM
753 ld [%g1 + 0x04], %f5 ! LSU Group
754 fmuls %f1, M5, %f3 ! FGM
755 fmuls %f4, M0, %f6 ! FGM Group
756 add %g1, %o5, %g1 ! IEU0
757 fmuls %f5, M5, %f7 ! FGM Group
758 fadds %f2, M12, %f2 ! FGA Group f2 available
759 st %f2, [%g2 + 0x00] ! LSU
760 fadds %f3, M13, %f3 ! FGA Group f3 available
761 st %f3, [%g2 + 0x04] ! LSU
762 fadds %f6, M12, %f6 ! FGA Group f6 available
763 st M14, [%g2 + 0x08] ! LSU
764 fadds %f7, M13, %f7 ! FGA Group f7 available
765 st %f6, [%g2 + 0x10] ! LSU
766 st %f7, [%g2 + 0x14] ! LSU Group
767 st M14, [%g2 + 0x18] ! LSU Group
768 cmp %o3, %o2 ! IEU1
769 bne 1b ! CTI
770 add %g2, 0x20, %g2 ! IEU0 Group
771
772 cmp %o3, %g3
773 be 3f
774 nop
775
776 2: ld [%g1 + 0x00], %f0 ! LSU Group
777 ld [%g1 + 0x04], %f1 ! LSU Group
778 fmuls %f0, M0, %f2 ! FGM Group
779 fmuls %f1, M5, %f3 ! FGM Group
780 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
781 st %f2, [%g2 + 0x00] ! LSU
782 fadds %f3, M13, %f3 ! FGA Group f3 available
783 st %f3, [%g2 + 0x04] ! LSU
784 st M14, [%g2 + 0x08] ! LSU Group
785
786 3: ld [%o1 + (14 * 0x4)], %g3
787 cmp %g3, 0
788 bne __set_v4f_3
789 nop
790 ba __set_v4f_2
791 nop
792
793 .globl _mesa_sparc_transform_points2_perspective
794 _mesa_sparc_transform_points2_perspective:
795 ld [%o2 + V4F_STRIDE], %o5
796 LDPTR [%o2 + V4F_START], %g1
797 LDPTR [%o0 + V4F_START], %g2
798 ld [%o2 + V4F_COUNT], %g3
799
800 LDMATRIX_0_5_14(%o1)
801
802 cmp %g3, 0
803 st %g3, [%o0 + V4F_COUNT]
804 be 2f
805 clr %o1
806
807 1: ld [%g1 + 0x00], %f0
808 ld [%g1 + 0x04], %f1
809 add %o1, 1, %o1
810 add %g1, %o5, %g1
811 fmuls %f0, M0, %f2
812 st %f2, [%g2 + 0x00]
813 fmuls %f1, M5, %f3
814 st %f3, [%g2 + 0x04]
815 st M14, [%g2 + 0x08]
816 st %g0, [%g2 + 0x0c]
817 cmp %o1, %g3
818 bne 1b
819 add %g2, 0x10, %g2
820 2:
821 ba __set_v4f_4
822 nop
823
824 .globl _mesa_sparc_transform_points3_general
825 _mesa_sparc_transform_points3_general:
826 ld [%o2 + V4F_STRIDE], %o5
827 LDPTR [%o2 + V4F_START], %g1
828 LDPTR [%o0 + V4F_START], %g2
829 ld [%o2 + V4F_COUNT], %g3
830
831 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
832
833 cmp %g3, 0
834 st %g3, [%o0 + V4F_COUNT]
835 be 2f
836 clr %o1
837
838 1: ld [%g1 + 0x00], %f0 ! LSU Group
839 ld [%g1 + 0x04], %f1 ! LSU Group
840 ld [%g1 + 0x08], %f2 ! LSU Group
841 add %o1, 1, %o1 ! IEU0
842 add %g1, %o5, %g1 ! IEU1
843 fmuls %f0, M0, %f3 ! FGM
844 fmuls %f1, M4, %f7 ! FGM Group
845 fmuls %f0, M1, %f4 ! FGM Group
846 fmuls %f1, M5, %f8 ! FGM Group
847 fmuls %f0, M2, %f5 ! FGM Group f3 available
848 fmuls %f1, M6, %f9 ! FGM Group f7 available
849 fadds %f3, %f7, %f3 ! FGA
850 fmuls %f0, M3, %f6 ! FGM Group f4 available
851 fmuls %f1, M7, %f10 ! FGM Group f8 available
852 fadds %f4, %f8, %f4 ! FGA
853 fmuls %f2, M8, %f7 ! FGM Group f5 available
854 fmuls %f2, M9, %f8 ! FGM Group f9,f3 available
855 fadds %f5, %f9, %f5 ! FGA
856 fmuls %f2, M10, %f9 ! FGM Group f6 available
857 fadds %f6, %f10, %f6 ! FGA Group f10,f4 available
858 fmuls %f2, M11, %f10 ! FGM
859 fadds %f3, M12, %f3 ! FGA Group f7 available
860 fadds %f4, M13, %f4 ! FGA Group f8,f5 available
861 fadds %f5, M14, %f5 ! FGA Group f9 available
862 fadds %f6, M15, %f6 ! FGA Group f10,f6 available
863 fadds %f3, %f7, %f3 ! FGA Group f3 available
864 st %f3, [%g2 + 0x00] ! LSU
865 fadds %f4, %f8, %f4 ! FGA Group f4 available
866 st %f4, [%g2 + 0x04] ! LSU
867 fadds %f5, %f9, %f5 ! FGA Group f5 available
868 st %f5, [%g2 + 0x08] ! LSU
869 fadds %f6, %f10, %f6 ! FGA Group f6 available
870 st %f6, [%g2 + 0x0c] ! LSU
871 cmp %o1, %g3 ! IEU1
872 bne 1b ! CTI
873 add %g2, 0x10, %g2 ! IEU0 Group
874 2:
875 ba __set_v4f_4
876 nop
877
878 .globl _mesa_sparc_transform_points3_identity
879 _mesa_sparc_transform_points3_identity:
880 ld [%o2 + V4F_STRIDE], %o5
881 LDPTR [%o2 + V4F_START], %g1
882 LDPTR [%o0 + V4F_START], %g2
883 ld [%o2 + V4F_COUNT], %g3
884
885 cmp %g3, 0
886 st %g3, [%o0 + V4F_COUNT]
887 be 2f
888 clr %o1
889
890 1: ld [%g1 + 0x00], %f0
891 ld [%g1 + 0x04], %f1
892 ld [%g1 + 0x08], %f2
893 add %o1, 1, %o1
894 add %g1, %o5, %g1
895 cmp %o1, %g3
896 st %f0, [%g2 + 0x00]
897 st %f1, [%g2 + 0x04]
898 st %f2, [%g2 + 0x08]
899 bne 1b
900 add %g2, 0x10, %g2
901 2:
902 ba __set_v4f_3
903 nop
904
905 .globl _mesa_sparc_transform_points3_2d
906 _mesa_sparc_transform_points3_2d:
907 ld [%o2 + V4F_STRIDE], %o5
908 LDPTR [%o2 + V4F_START], %g1
909 LDPTR [%o0 + V4F_START], %g2
910 ld [%o2 + V4F_COUNT], %g3
911
912 LDMATRIX_0_1_4_5_12_13(%o1)
913
914 cmp %g3, 0
915 st %g3, [%o0 + V4F_COUNT]
916 be 2f
917 clr %o1
918
919 1: ld [%g1 + 0x00], %f0 ! LSU Group
920 ld [%g1 + 0x04], %f1 ! LSU Group
921 ld [%g1 + 0x08], %f2 ! LSU Group
922 add %o1, 1, %o1 ! IEU0
923 add %g1, %o5, %g1 ! IEU1
924 fmuls %f0, M0, %f3 ! FGM
925 fmuls %f0, M1, %f4 ! FGM Group
926 fmuls %f1, M4, %f6 ! FGM Group
927 fmuls %f1, M5, %f7 ! FGM Group
928 fadds %f3, M12, %f3 ! FGA Group f3 available
929 fadds %f4, M13, %f4 ! FGA Group f4 available
930 fadds %f3, %f6, %f3 ! FGA Group f6 available
931 st %f3, [%g2 + 0x00] ! LSU
932 fadds %f4, %f7, %f4 ! FGA Group f7 available
933 st %f4, [%g2 + 0x04] ! LSU
934 st %f2, [%g2 + 0x08] ! LSU Group
935 cmp %o1, %g3 ! IEU1
936 bne 1b ! CTI
937 add %g2, 0x10, %g2 ! IEU0 Group
938 2:
939 ba __set_v4f_3
940 nop
941
942 .globl _mesa_sparc_transform_points3_2d_no_rot
943 _mesa_sparc_transform_points3_2d_no_rot:
944 ld [%o2 + V4F_STRIDE], %o5
945 LDPTR [%o2 + V4F_START], %g1
946 LDPTR [%o0 + V4F_START], %g2
947 ld [%o2 + V4F_COUNT], %g3
948
949 LDMATRIX_0_5_12_13(%o1)
950
951 cmp %g3, 0
952 st %g3, [%o0 + V4F_COUNT]
953 be 2f
954 clr %o1
955
956 1: ld [%g1 + 0x00], %f0 ! LSU Group
957 ld [%g1 + 0x04], %f1 ! LSU Group
958 ld [%g1 + 0x08], %f2 ! LSU Group
959 add %o1, 1, %o1 ! IEU0
960 add %g1, %o5, %g1 ! IEU1
961 fmuls %f0, M0, %f3 ! FGM
962 fmuls %f1, M5, %f4 ! FGM Group
963 st %f2, [%g2 + 0x08] ! LSU
964 fadds %f3, M12, %f3 ! FGA Group
965 st %f3, [%g2 + 0x00] ! LSU
966 fadds %f4, M13, %f4 ! FGA Group
967 st %f4, [%g2 + 0x04] ! LSU
968 cmp %o1, %g3 ! IEU1
969 bne 1b ! CTI
970 add %g2, 0x10, %g2 ! IEU0 Group
971 2:
972 ba __set_v4f_3
973 nop
974
975 .globl _mesa_sparc_transform_points3_3d
976 _mesa_sparc_transform_points3_3d:
977 ld [%o2 + V4F_STRIDE], %o5
978 LDPTR [%o2 + V4F_START], %g1
979 LDPTR [%o0 + V4F_START], %g2
980 ld [%o2 + V4F_COUNT], %g3
981
982 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
983
984 cmp %g3, 0
985 st %g3, [%o0 + V4F_COUNT]
986 be 2f
987 clr %o1
988
989 1: ld [%g1 + 0x00], %f0 ! LSU Group
990 ld [%g1 + 0x04], %f1 ! LSU Group
991 ld [%g1 + 0x08], %f2 ! LSU Group
992 add %o1, 1, %o1 ! IEU0
993 add %g1, %o5, %g1 ! IEU1
994 fmuls %f0, M0, %f3 ! FGM
995 fmuls %f1, M4, %f6 ! FGM Group
996 fmuls %f0, M1, %f4 ! FGM Group
997 fmuls %f1, M5, %f7 ! FGM Group
998 fmuls %f0, M2, %f5 ! FGM Group f3 available
999 fmuls %f1, M6, %f8 ! FGM Group f6 available
1000 fadds %f3, %f6, %f3 ! FGA
1001 fmuls %f2, M8, %f9 ! FGM Group f4 available
1002 fmuls %f2, M9, %f10 ! FGM Group f7 available
1003 fadds %f4, %f7, %f4 ! FGA
1004 fmuls %f2, M10, %f11 ! FGM Group f5 available
1005 fadds %f5, %f8, %f5 ! FGA Group f8, f3 available
1006 fadds %f3, %f9, %f3 ! FGA Group f9 available
1007 fadds %f4, %f10, %f4 ! FGA Group f10, f4 available
1008 fadds %f5, %f11, %f5 ! FGA Group stall, f11, f5 available
1009 fadds %f3, M12, %f3 ! FGA Group f3 available
1010 st %f3, [%g2 + 0x00] ! LSU
1011 fadds %f4, M13, %f4 ! FGA Group f4 available
1012 st %f4, [%g2 + 0x04] ! LSU
1013 fadds %f5, M14, %f5 ! FGA Group f5 available
1014 st %f5, [%g2 + 0x08] ! LSU
1015 cmp %o1, %g3 ! IEU1
1016 bne 1b ! CTI
1017 add %g2, 0x10, %g2 ! IEU0 Group
1018 2:
1019 ba __set_v4f_3
1020 nop
1021
1022 .globl _mesa_sparc_transform_points3_3d_no_rot
1023 _mesa_sparc_transform_points3_3d_no_rot:
1024 ld [%o2 + V4F_STRIDE], %o5
1025 LDPTR [%o2 + V4F_START], %g1
1026 LDPTR [%o0 + V4F_START], %g2
1027 ld [%o2 + V4F_COUNT], %g3
1028
1029 LDMATRIX_0_5_10_12_13_14(%o1)
1030
1031 cmp %g3, 0
1032 st %g3, [%o0 + V4F_COUNT]
1033 be 2f
1034 clr %o1
1035
1036 1: ld [%g1 + 0x00], %f0 ! LSU Group
1037 ld [%g1 + 0x04], %f1 ! LSU Group
1038 ld [%g1 + 0x08], %f2 ! LSU Group
1039 add %o1, 1, %o1 ! IEU0
1040 add %g1, %o5, %g1 ! IEU1
1041 cmp %o1, %g3 ! IEU1 Group
1042 fmuls %f0, M0, %f3 ! FGM
1043 fmuls %f1, M5, %f4 ! FGM Group
1044 fmuls %f2, M10, %f5 ! FGM Group
1045 fadds %f3, M12, %f3 ! FGA Group, stall, f3 available
1046 st %f3, [%g2 + 0x00] ! LSU
1047 fadds %f4, M13, %f4 ! FGA Group, f4 available
1048 st %f4, [%g2 + 0x04] ! LSU
1049 fadds %f5, M14, %f5 ! FGA Group, f5 available
1050 st %f5, [%g2 + 0x08] ! LEU
1051 bne 1b ! CTI
1052 add %g2, 0x10, %g2 ! IEU0 Group
1053 2:
1054 ba __set_v4f_3
1055 nop
1056
1057 .globl _mesa_sparc_transform_points3_perspective
1058 _mesa_sparc_transform_points3_perspective:
1059 ld [%o2 + V4F_STRIDE], %o5
1060 LDPTR [%o2 + V4F_START], %g1
1061 LDPTR [%o0 + V4F_START], %g2
1062 ld [%o2 + V4F_COUNT], %g3
1063
1064 LDMATRIX_0_5_8_9_10_14(%o1)
1065
1066 cmp %g3, 0
1067 st %g3, [%o0 + V4F_COUNT]
1068 be 2f
1069 clr %o1
1070
1071 1: ld [%g1 + 0x00], %f0 ! LSU Group
1072 ld [%g1 + 0x04], %f1 ! LSU Group
1073 ld [%g1 + 0x08], %f2 ! LSU Group
1074 add %o1, 1, %o1 ! IEU0
1075 add %g1, %o5, %g1 ! IEU1
1076 fmuls %f0, M0, %f3 ! FGM
1077 fmuls %f2, M8, %f6 ! FGM Group
1078 fmuls %f1, M5, %f4 ! FGM Group
1079 fmuls %f2, M9, %f7 ! FGM Group
1080 fmuls %f2, M10, %f5 ! FGM Group f3 available
1081 fadds %f3, %f6, %f3 ! FGA Group f6 available
1082 st %f3, [%g2 + 0x00] ! LSU
1083 fadds %f4, %f7, %f4 ! FGA Group stall, f4, f7 available
1084 st %f4, [%g2 + 0x04] ! LSU
1085 fadds %f5, M14, %f5 ! FGA Group
1086 st %f5, [%g2 + 0x08] ! LSU
1087 fnegs %f2, %f6 ! FGA Group
1088 st %f6, [%g2 + 0x0c] ! LSU
1089 cmp %o1, %g3 ! IEU1
1090 bne 1b ! CTI
1091 add %g2, 0x10, %g2 ! IEU0 Group
1092 2:
1093 ba __set_v4f_4
1094 nop
1095
1096 .globl _mesa_sparc_transform_points4_general
1097 _mesa_sparc_transform_points4_general:
1098 ld [%o2 + V4F_STRIDE], %o5
1099 LDPTR [%o2 + V4F_START], %g1
1100 LDPTR [%o0 + V4F_START], %g2
1101 ld [%o2 + V4F_COUNT], %g3
1102
1103 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
1104
1105 cmp %g3, 0
1106 st %g3, [%o0 + V4F_COUNT]
1107 be 2f
1108 clr %o1
1109
1110 1: ld [%g1 + 0x00], %f0 ! LSU Group
1111 ld [%g1 + 0x04], %f1 ! LSU Group
1112 ld [%g1 + 0x08], %f2 ! LSU Group
1113 ld [%g1 + 0x0c], %f3 ! LSU Group
1114 add %o1, 1, %o1 ! IEU0
1115 add %g1, %o5, %g1 ! IEU1
1116 fmuls %f0, M0, %f4 ! FGM Group
1117 fmuls %f1, M4, %f8 ! FGM Group
1118 fmuls %f0, M1, %f5 ! FGM Group
1119 fmuls %f1, M5, %f9 ! FGM Group
1120 fmuls %f0, M2, %f6 ! FGM Group f4 available
1121 fmuls %f1, M6, %f10 ! FGM Group f8 available
1122 fadds %f4, %f8, %f4 ! FGA
1123 fmuls %f0, M3, %f7 ! FGM Group f5 available
1124 fmuls %f1, M7, %f11 ! FGM Group f9 available
1125 fadds %f5, %f9, %f5 ! FGA
1126 fmuls %f2, M8, %f12 ! FGM Group f6 available
1127 fmuls %f2, M9, %f13 ! FGM Group f10, f4 available
1128 fadds %f6, %f10, %f6 ! FGA
1129 fmuls %f2, M10, %f14 ! FGM Group f7 available
1130 fmuls %f2, M11, %f15 ! FGM Group f11, f5 available
1131 fadds %f7, %f11, %f7 ! FGA
1132 fmuls %f3, M12, %f8 ! FGM Group f12 available
1133 fadds %f4, %f12, %f4 ! FGA
1134 fmuls %f3, M13, %f9 ! FGM Group f13, f6 available
1135 fadds %f5, %f13, %f5 ! FGA
1136 fmuls %f3, M14, %f10 ! FGM Group f14 available
1137 fadds %f6, %f14, %f6 ! FGA
1138 fmuls %f3, M15, %f11 ! FGM Group f15, f7 available
1139 fadds %f7, %f15, %f7 ! FGA
1140 fadds %f4, %f8, %f4 ! FGA Group f8, f4 available
1141 st %f4, [%g2 + 0x00] ! LSU
1142 fadds %f5, %f9, %f5 ! FGA Group f9, f5 available
1143 st %f5, [%g2 + 0x04] ! LSU
1144 fadds %f6, %f10, %f6 ! FGA Group f10, f6 available
1145 st %f6, [%g2 + 0x08] ! LSU
1146 fadds %f7, %f11, %f7 ! FGA Group f11, f7 available
1147 st %f7, [%g2 + 0x0c] ! LSU
1148 cmp %o1, %g3 ! IEU1
1149 bne 1b ! CTI
1150 add %g2, 0x10, %g2 ! IEU0 Group
1151 2:
1152 ba __set_v4f_4
1153 nop
1154
1155 .globl _mesa_sparc_transform_points4_identity
1156 _mesa_sparc_transform_points4_identity:
1157 ld [%o2 + V4F_STRIDE], %o5
1158 LDPTR [%o2 + V4F_START], %g1
1159 LDPTR [%o0 + V4F_START], %g2
1160 ld [%o2 + V4F_COUNT], %g3
1161
1162 cmp %g3, 0
1163 st %g3, [%o0 + V4F_COUNT]
1164 be 2f
1165 clr %o1
1166
1167 1: ld [%g1 + 0x00], %f0
1168 ld [%g1 + 0x04], %f1
1169 ld [%g1 + 0x08], %f2
1170 add %o1, 1, %o1
1171 ld [%g1 + 0x0c], %f3
1172 add %g1, %o5, %g1
1173 st %f0, [%g2 + 0x00]
1174 st %f1, [%g2 + 0x04]
1175 st %f2, [%g2 + 0x08]
1176 cmp %o1, %g3
1177 st %f3, [%g2 + 0x0c]
1178 bne 1b
1179 add %g2, 0x10, %g2
1180 2:
1181 ba __set_v4f_4
1182 nop
1183
1184 .globl _mesa_sparc_transform_points4_2d
1185 _mesa_sparc_transform_points4_2d:
1186 ld [%o2 + V4F_STRIDE], %o5
1187 LDPTR [%o2 + V4F_START], %g1
1188 LDPTR [%o0 + V4F_START], %g2
1189 ld [%o2 + V4F_COUNT], %g3
1190
1191 LDMATRIX_0_1_4_5_12_13(%o1)
1192
1193 cmp %g3, 0
1194 st %g3, [%o0 + V4F_COUNT]
1195 be 2f
1196 clr %o1
1197
1198 1: ld [%g1 + 0x00], %f0 ! LSU Group
1199 ld [%g1 + 0x04], %f1 ! LSU Group
1200 ld [%g1 + 0x08], %f2 ! LSU Group
1201 ld [%g1 + 0x0c], %f3 ! LSU Group
1202 add %o1, 1, %o1 ! IEU0
1203 add %g1, %o5, %g1 ! IEU1
1204 fmuls %f0, M0, %f4 ! FGM
1205 fmuls %f1, M4, %f8 ! FGM Group
1206 fmuls %f0, M1, %f5 ! FGM Group
1207 fmuls %f1, M5, %f9 ! FGM Group f4 available
1208 fmuls %f3, M12, %f12 ! FGM Group
1209 fmuls %f3, M13, %f13 ! FGM Group f8 available
1210 fadds %f4, %f8, %f4 ! FGA
1211 fadds %f5, %f9, %f5 ! FGA Group stall, f5, f9 available
1212 fadds %f4, %f12, %f4 ! FGA Group 2 cycle stall, f4, f12, f13 avail
1213 st %f4, [%g2 + 0x00] ! LSU
1214 fadds %f5, %f13, %f5 ! FGA Group f5 available
1215 st %f5, [%g2 + 0x04] ! LSU
1216 st %f2, [%g2 + 0x08] ! LSU Group
1217 st %f3, [%g2 + 0x0c] ! LSU Group
1218 cmp %o1, %g3 ! IEU1
1219 bne 1b ! CTI
1220 add %g2, 0x10, %g2 ! IEU0 Group
1221 2:
1222 ba __set_v4f_4
1223 nop
1224
1225 .globl _mesa_sparc_transform_points4_2d_no_rot
1226 _mesa_sparc_transform_points4_2d_no_rot:
1227 ld [%o2 + V4F_STRIDE], %o5
1228 LDPTR [%o2 + V4F_START], %g1
1229 LDPTR [%o0 + V4F_START], %g2
1230 ld [%o2 + V4F_COUNT], %g3
1231
1232 LDMATRIX_0_1_4_5_12_13(%o1)
1233
1234 cmp %g3, 0
1235 st %g3, [%o0 + V4F_COUNT]
1236 be 2f
1237 clr %o1
1238
1239 1: ld [%g1 + 0x00], %f0
1240 ld [%g1 + 0x04], %f1
1241 ld [%g1 + 0x08], %f2
1242 ld [%g1 + 0x0c], %f3
1243 add %o1, 1, %o1
1244 add %g1, %o5, %g1
1245 fmuls %f0, M0, %f4
1246 fmuls %f3, M12, %f8
1247 fmuls %f1, M5, %f5
1248 fmuls %f3, M13, %f9
1249 fadds %f4, %f8, %f4
1250 st %f4, [%g2 + 0x00]
1251 fadds %f5, %f9, %f5
1252 st %f5, [%g2 + 0x04]
1253 st %f2, [%g2 + 0x08]
1254 st %f3, [%g2 + 0x0c]
1255 cmp %o1, %g3
1256 bne 1b
1257 add %g2, 0x10, %g2
1258 2:
1259 ba __set_v4f_4
1260 nop
1261
1262 .globl _mesa_sparc_transform_points4_3d
1263 _mesa_sparc_transform_points4_3d:
1264 ld [%o2 + V4F_STRIDE], %o5
1265 LDPTR [%o2 + V4F_START], %g1
1266 LDPTR [%o0 + V4F_START], %g2
1267 ld [%o2 + V4F_COUNT], %g3
1268
1269 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
1270
1271 cmp %g3, 0
1272 st %g3, [%o0 + V4F_COUNT]
1273 be 2f
1274 clr %o1
1275
1276 1: ld [%g1 + 0x00], %f0 ! LSU Group
1277 ld [%g1 + 0x04], %f1 ! LSU Group
1278 ld [%g1 + 0x08], %f2 ! LSU Group
1279 ld [%g1 + 0x0c], %f3 ! LSU Group
1280 add %o1, 1, %o1 ! IEU0
1281 add %g1, %o5, %g1 ! IEU1
1282 fmuls %f0, M0, %f4 ! FGM
1283 fmuls %f1, M4, %f7 ! FGM Group
1284 fmuls %f0, M1, %f5 ! FGM Group
1285 fmuls %f1, M5, %f8 ! FGM Group
1286 fmuls %f0, M2, %f6 ! FGM Group f4 available
1287 fmuls %f1, M6, %f9 ! FGM Group f7 available
1288 fadds %f4, %f7, %f4 ! FGA
1289 fmuls %f2, M8, %f10 ! FGM Group f5 available
1290 fmuls %f2, M9, %f11 ! FGM Group f8 available
1291 fadds %f5, %f8, %f5 ! FGA
1292 fmuls %f2, M10, %f12 ! FGM Group f6 available
1293 fmuls %f3, M12, %f13 ! FGM Group f9, f4 available
1294 fadds %f6, %f9, %f6 ! FGA
1295 fmuls %f3, M13, %f14 ! FGM Group f10 available
1296 fadds %f4, %f10, %f4 ! FGA
1297 fmuls %f3, M14, %f15 ! FGM Group f11, f5 available
1298 fadds %f5, %f11, %f5 ! FGA
1299 fadds %f6, %f12, %f6 ! FGA Group stall, f12, f13, f6 available
1300 fadds %f4, %f13, %f4 ! FGA Group f14, f4 available
1301 st %f4, [%g2 + 0x00] ! LSU
1302 fadds %f5, %f14, %f5 ! FGA Group f15, f5 available
1303 st %f5, [%g2 + 0x04] ! LSU
1304 fadds %f6, %f15, %f6 ! FGA Group f6 available
1305 st %f6, [%g2 + 0x08] ! LSU
1306 st %f3, [%g2 + 0x0c] ! LSU Group
1307 cmp %o1, %g3 ! IEU1
1308 bne 1b ! CTI
1309 add %g2, 0x10, %g2 ! IEU0 Group
1310 2:
1311 ba __set_v4f_4
1312 nop
1313
1314 .globl _mesa_sparc_transform_points4_3d_no_rot
1315 _mesa_sparc_transform_points4_3d_no_rot:
1316 ld [%o2 + V4F_STRIDE], %o5
1317 LDPTR [%o2 + V4F_START], %g1
1318 LDPTR [%o0 + V4F_START], %g2
1319 ld [%o2 + V4F_COUNT], %g3
1320
1321 LDMATRIX_0_5_10_12_13_14(%o1)
1322
1323 cmp %g3, 0
1324 st %g3, [%o0 + V4F_COUNT]
1325 be 2f
1326 clr %o1
1327
1328 1: ld [%g1 + 0x00], %f0 ! LSU Group
1329 ld [%g1 + 0x04], %f1 ! LSU Group
1330 ld [%g1 + 0x08], %f2 ! LSU Group
1331 ld [%g1 + 0x0c], %f3 ! LSU Group
1332 add %o1, 1, %o1 ! IEU0
1333 add %g1, %o5, %g1 ! IEU1
1334 fmuls %f0, M0, %f4 ! FGM
1335 fmuls %f3, M12, %f7 ! FGM Group
1336 fmuls %f1, M5, %f5 ! FGM Group
1337 fmuls %f3, M13, %f8 ! FGM Group
1338 fmuls %f2, M10, %f6 ! FGM Group f4 available
1339 fmuls %f3, M14, %f9 ! FGM Group f7 available
1340 fadds %f4, %f7, %f4 ! FGA
1341 st %f4, [%g2 + 0x00] ! LSU
1342 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
1343 st %f5, [%g2 + 0x04] ! LSU
1344 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
1345 st %f6, [%g2 + 0x08] ! LSU
1346 st %f3, [%g2 + 0x0c] ! LSU Group
1347 cmp %o1, %g3 ! IEU1
1348 bne 1b ! CTI
1349 add %g2, 0x10, %g2 ! IEU0 Group
1350 2:
1351 ba __set_v4f_4
1352 nop
1353
1354 .globl _mesa_sparc_transform_points4_perspective
1355 _mesa_sparc_transform_points4_perspective:
1356 ld [%o2 + V4F_STRIDE], %o5
1357 LDPTR [%o2 + V4F_START], %g1
1358 LDPTR [%o0 + V4F_START], %g2
1359 ld [%o2 + V4F_COUNT], %g3
1360
1361 LDMATRIX_0_5_8_9_10_14(%o1)
1362
1363 cmp %g3, 0
1364 st %g3, [%o0 + V4F_COUNT]
1365 be 2f
1366 clr %o1
1367
1368 1: ld [%g1 + 0x00], %f0 ! LSU Group
1369 ld [%g1 + 0x04], %f1 ! LSU Group
1370 ld [%g1 + 0x08], %f2 ! LSU Group
1371 ld [%g1 + 0x0c], %f3 ! LSU Group
1372 add %o1, 1, %o1 ! IEU0
1373 add %g1, %o5, %g1 ! IEU1
1374 fmuls %f0, M0, %f4 ! FGM
1375 fmuls %f2, M8, %f7 ! FGM Group
1376 fmuls %f1, M5, %f5 ! FGM Group
1377 fmuls %f2, M9, %f8 ! FGM Group
1378 fmuls %f2, M10, %f6 ! FGM Group f4 available
1379 fmuls %f3, M14, %f9 ! FGM Group f7 available
1380 fadds %f4, %f7, %f4 ! FGA
1381 st %f4, [%g2 + 0x00] ! LSU
1382 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
1383 st %f5, [%g2 + 0x04] ! LSU
1384 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
1385 st %f6, [%g2 + 0x08] ! LSU
1386 fnegs %f2, %f7 ! FGA Group
1387 st %f7, [%g2 + 0x0c] ! LSU
1388 cmp %o1, %g3 ! IEU1
1389 bne 1b ! CTI
1390 add %g2, 0x10, %g2 ! IEU0 Group
1391 2:
1392 ba __set_v4f_4
1393 nop