migrate substitution keywords to SVN
[reactos.git] / reactos / lib / mesa32 / src / sparc / norm.S
1 /* $Id$ */
2
3 #include "sparc_matrix.h"
4
5 #ifdef SVR4
6 /* Solaris requires this for 64-bit. */
7 .register %g2, #scratch
8 .register %g3, #scratch
9 #endif
10
11 .text
12
13 #if defined(__sparc_v9__) && !defined(__linux__)
14 #define STACK_VAR_OFF (2047 + (8 * 16))
15 #else
16 #define STACK_VAR_OFF (4 * 16)
17 #endif
18
19 /* Newton-Raphson approximation turns out to be slower
20 * (and less accurate) than direct fsqrts/fdivs.
21 */
22 #define ONE_DOT_ZERO 0x3f800000
23
24 .globl _mesa_sparc_transform_normalize_normals
25 _mesa_sparc_transform_normalize_normals:
26 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
27
28 sethi %hi(ONE_DOT_ZERO), %g2
29 sub %sp, 16, %sp
30 st %g2, [%sp + STACK_VAR_OFF+0x0]
31 st %o1, [%sp + STACK_VAR_OFF+0x4]
32 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
33 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
34 add %sp, 16, %sp
35
36 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
37 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
38 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
39 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
40 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
41
42 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
43
44 /* dest->count = in->count */
45 st %g1, [%o4 + V4F_COUNT]
46
47 cmp %g1, 1
48 bl 7f
49 cmp %o3, 0
50 bne 4f
51 clr %o4 ! 'i' for STRIDE_LOOP
52
53 1: /* LENGTHS == NULL */
54 ld [%o5 + 0x00], %f0 ! ux = from[0]
55 ld [%o5 + 0x04], %f1 ! uy = from[1]
56 ld [%o5 + 0x08], %f2 ! uz = from[2]
57 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
58 add %o4, 1, %o4 ! i++
59
60 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
61 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
62 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
63 */
64 fmuls %f0, M0, %f3 ! FGM Group
65 fmuls %f1, M1, %f4 ! FGM Group
66 fmuls %f0, M4, %f5 ! FGM Group
67 fmuls %f1, M5, %f6 ! FGM Group
68 fmuls %f0, M8, %f7 ! FGM Group f3 available
69 fmuls %f1, M9, %f8 ! FGM Group f4 available
70 fadds %f3, %f4, %f3 ! FGA
71 fmuls %f2, M2, %f10 ! FGM Group f5 available
72 fmuls %f2, M6, %f0 ! FGM Group f6 available
73 fadds %f5, %f6, %f5 ! FGA
74 fmuls %f2, M10, %f4 ! FGM Group f7 available
75 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
76 fadds %f3, %f10, %f3 ! FGA Group f10 available
77 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
78 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
79
80 /* f3=tx, f5=ty, f7=tz */
81
82 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
83 fmuls %f3, %f3, %f6 ! FGM Group f3 available
84 fmuls %f5, %f5, %f8 ! FGM Group f5 available
85 fmuls %f7, %f7, %f10 ! FGM Group f7 available
86 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
87 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
88
89 /* scale (f6) = 1.0 / sqrt(len) */
90 fsqrts %f6, %f6 ! FDIV 20 cycles
91 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
92
93 fmuls %f3, %f6, %f3
94 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
95 fmuls %f5, %f6, %f5
96 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
97 fmuls %f7, %f6, %f7
98 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
99
100 cmp %o4, %g1 ! continue if (i < count)
101 bl 1b
102 add %g3, 0x0c, %g3 ! advance out vector pointer
103
104 ba 7f
105 nop
106
107 4: /* LENGTHS != NULL */
108 fmuls M0, %f15, M0
109 fmuls M1, %f15, M1
110 fmuls M2, %f15, M2
111 fmuls M4, %f15, M4
112 fmuls M5, %f15, M5
113 fmuls M6, %f15, M6
114 fmuls M8, %f15, M8
115 fmuls M9, %f15, M9
116 fmuls M10, %f15, M10
117
118 5:
119 ld [%o5 + 0x00], %f0 ! ux = from[0]
120 ld [%o5 + 0x04], %f1 ! uy = from[1]
121 ld [%o5 + 0x08], %f2 ! uz = from[2]
122 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
123 add %o4, 1, %o4 ! i++
124
125 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
126 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
127 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
128 */
129 fmuls %f0, M0, %f3 ! FGM Group
130 fmuls %f1, M1, %f4 ! FGM Group
131 fmuls %f0, M4, %f5 ! FGM Group
132 fmuls %f1, M5, %f6 ! FGM Group
133 fmuls %f0, M8, %f7 ! FGM Group f3 available
134 fmuls %f1, M9, %f8 ! FGM Group f4 available
135 fadds %f3, %f4, %f3 ! FGA
136 fmuls %f2, M2, %f10 ! FGM Group f5 available
137 fmuls %f2, M6, %f0 ! FGM Group f6 available
138 fadds %f5, %f6, %f5 ! FGA
139 fmuls %f2, M10, %f4 ! FGM Group f7 available
140 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
141 fadds %f3, %f10, %f3 ! FGA Group f10 available
142 ld [%o3], %f13 ! LSU
143 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
144 add %o3, 4, %o3 ! IEU0
145 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
146
147 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
148
149 fmuls %f3, %f13, %f3
150 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
151 fmuls %f5, %f13, %f5
152 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
153 fmuls %f7, %f13, %f7
154 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
155
156 cmp %o4, %g1 ! continue if (i < count)
157 bl 5b
158 add %g3, 0x0c, %g3 ! advance out vector pointer
159
160 7: retl
161 nop
162
163 .globl _mesa_sparc_transform_normalize_normals_no_rot
164 _mesa_sparc_transform_normalize_normals_no_rot:
165 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
166
167 sethi %hi(ONE_DOT_ZERO), %g2
168 sub %sp, 16, %sp
169 st %g2, [%sp + STACK_VAR_OFF+0x0]
170 st %o1, [%sp + STACK_VAR_OFF+0x4]
171 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
172 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
173 add %sp, 16, %sp
174
175 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
176 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
177 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
178 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
179 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
180
181 LDMATRIX_0_5_10(%o0)
182
183 /* dest->count = in->count */
184 st %g1, [%o4 + V4F_COUNT]
185
186 cmp %g1, 1
187 bl 7f
188 cmp %o3, 0
189 bne 4f
190 clr %o4 ! 'i' for STRIDE_LOOP
191
192 1: /* LENGTHS == NULL */
193 ld [%o5 + 0x00], %f0 ! ux = from[0]
194 ld [%o5 + 0x04], %f1 ! uy = from[1]
195 ld [%o5 + 0x08], %f2 ! uz = from[2]
196 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
197 add %o4, 1, %o4 ! i++
198
199 /* tx (f3) = (ux * m0)
200 * ty (f5) = (uy * m5)
201 * tz (f7) = (uz * m10)
202 */
203 fmuls %f0, M0, %f3 ! FGM Group
204 fmuls %f1, M5, %f5 ! FGM Group
205 fmuls %f2, M10, %f7 ! FGM Group
206
207 /* f3=tx, f5=ty, f7=tz */
208
209 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
210 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
211 fmuls %f5, %f5, %f8 ! FGM Group f5 available
212 fmuls %f7, %f7, %f10 ! FGM Group f7 available
213 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
214 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
215
216 /* scale (f6) = 1.0 / sqrt(len) */
217 fsqrts %f6, %f6 ! FDIV 20 cycles
218 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
219
220 fmuls %f3, %f6, %f3
221 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
222 fmuls %f5, %f6, %f5
223 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
224 fmuls %f7, %f6, %f7
225 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
226
227 cmp %o4, %g1 ! continue if (i < count)
228 bl 1b
229 add %g3, 0x0c, %g3 ! advance out vector pointer
230
231 ba 7f
232 nop
233
234 4: /* LENGTHS != NULL */
235 fmuls M0, %f15, M0
236 fmuls M5, %f15, M5
237 fmuls M10, %f15, M10
238
239 5:
240 ld [%o5 + 0x00], %f0 ! ux = from[0]
241 ld [%o5 + 0x04], %f1 ! uy = from[1]
242 ld [%o5 + 0x08], %f2 ! uz = from[2]
243 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
244 add %o4, 1, %o4 ! i++
245
246 /* tx (f3) = (ux * m0)
247 * ty (f5) = (uy * m5)
248 * tz (f7) = (uz * m10)
249 */
250 fmuls %f0, M0, %f3 ! FGM Group
251 ld [%o3], %f13 ! LSU
252 fmuls %f1, M5, %f5 ! FGM Group
253 add %o3, 4, %o3 ! IEU0
254 fmuls %f2, M10, %f7 ! FGM Group
255
256 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
257
258 fmuls %f3, %f13, %f3
259 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
260 fmuls %f5, %f13, %f5
261 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
262 fmuls %f7, %f13, %f7
263 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
264
265 cmp %o4, %g1 ! continue if (i < count)
266 bl 5b
267 add %g3, 0x0c, %g3 ! advance out vector pointer
268
269 7: retl
270 nop
271
272 .globl _mesa_sparc_transform_rescale_normals_no_rot
273 _mesa_sparc_transform_rescale_normals_no_rot:
274 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
275 sub %sp, 16, %sp
276 st %o1, [%sp + STACK_VAR_OFF+0x0]
277 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
278 add %sp, 16, %sp
279
280 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
281 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
282 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
283 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
284 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
285
286 LDMATRIX_0_5_10(%o0)
287
288 /* dest->count = in->count */
289 st %g1, [%o4 + V4F_COUNT]
290
291 cmp %g1, 1
292 bl 7f
293 clr %o4 ! 'i' for STRIDE_LOOP
294
295 fmuls M0, %f15, M0
296 fmuls M5, %f15, M5
297 fmuls M10, %f15, M10
298
299 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
300 ld [%o5 + 0x04], %f1 ! uy = from[1]
301 ld [%o5 + 0x08], %f2 ! uz = from[2]
302 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
303 add %o4, 1, %o4 ! i++
304
305 /* tx (f3) = (ux * m0)
306 * ty (f5) = (uy * m5)
307 * tz (f7) = (uz * m10)
308 */
309 fmuls %f0, M0, %f3 ! FGM Group
310 st %f3, [%g3 + 0x00] ! LSU
311 fmuls %f1, M5, %f5 ! FGM Group
312 st %f5, [%g3 + 0x04] ! LSU
313 fmuls %f2, M10, %f7 ! FGM Group
314 st %f7, [%g3 + 0x08] ! LSU
315
316 cmp %o4, %g1 ! continue if (i < count)
317 bl 1b
318 add %g3, 0x0c, %g3 ! advance out vector pointer
319
320 7: retl
321 nop
322
323 .globl _mesa_sparc_transform_rescale_normals
324 _mesa_sparc_transform_rescale_normals:
325 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
326 sub %sp, 16, %sp
327 st %o1, [%sp + STACK_VAR_OFF+0x0]
328 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
329 add %sp, 16, %sp
330
331 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
332 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
333 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
334 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
335 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
336
337 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
338
339 /* dest->count = in->count */
340 st %g1, [%o4 + V4F_COUNT]
341
342 cmp %g1, 1
343 bl 7f
344 clr %o4 ! 'i' for STRIDE_LOOP
345
346 fmuls M0, %f15, M0
347 fmuls M1, %f15, M1
348 fmuls M2, %f15, M2
349 fmuls M4, %f15, M4
350 fmuls M5, %f15, M5
351 fmuls M6, %f15, M6
352 fmuls M8, %f15, M8
353 fmuls M9, %f15, M9
354 fmuls M10, %f15, M10
355
356 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
357 ld [%o5 + 0x04], %f1 ! uy = from[1]
358 ld [%o5 + 0x08], %f2 ! uz = from[2]
359 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
360 add %o4, 1, %o4 ! i++
361
362 fmuls %f0, M0, %f3 ! FGM Group
363 fmuls %f1, M1, %f4 ! FGM Group
364 fmuls %f0, M4, %f5 ! FGM Group
365 fmuls %f1, M5, %f6 ! FGM Group
366 fmuls %f0, M8, %f7 ! FGM Group f3 available
367 fmuls %f1, M9, %f8 ! FGM Group f4 available
368 fadds %f3, %f4, %f3 ! FGA
369 fmuls %f2, M2, %f10 ! FGM Group f5 available
370 fmuls %f2, M6, %f0 ! FGM Group f6 available
371 fadds %f5, %f6, %f5 ! FGA
372 fmuls %f2, M10, %f4 ! FGM Group f7 available
373 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
374 fadds %f3, %f10, %f3 ! FGA Group f10 available
375 st %f3, [%g3 + 0x00] ! LSU
376 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
377 st %f5, [%g3 + 0x04] ! LSU
378 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
379 st %f7, [%g3 + 0x08] ! LSU
380
381 cmp %o4, %g1 ! continue if (i < count)
382 bl 1b
383 add %g3, 0x0c, %g3 ! advance out vector pointer
384
385 7: retl
386 nop
387
388 .globl _mesa_sparc_transform_normals_no_rot
389 _mesa_sparc_transform_normals_no_rot:
390 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
391 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
392 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
393 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
394 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
395 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
396
397 LDMATRIX_0_5_10(%o0)
398
399 /* dest->count = in->count */
400 st %g1, [%o4 + V4F_COUNT]
401
402 cmp %g1, 1
403 bl 7f
404 clr %o4 ! 'i' for STRIDE_LOOP
405
406 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
407 ld [%o5 + 0x04], %f1 ! uy = from[1]
408 ld [%o5 + 0x08], %f2 ! uz = from[2]
409 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
410 add %o4, 1, %o4 ! i++
411
412 /* tx (f3) = (ux * m0)
413 * ty (f5) = (uy * m5)
414 * tz (f7) = (uz * m10)
415 */
416 fmuls %f0, M0, %f3 ! FGM Group
417 st %f3, [%g3 + 0x00] ! LSU
418 fmuls %f1, M5, %f5 ! FGM Group
419 st %f5, [%g3 + 0x04] ! LSU
420 fmuls %f2, M10, %f7 ! FGM Group
421 st %f7, [%g3 + 0x08] ! LSU
422
423 cmp %o4, %g1 ! continue if (i < count)
424 bl 1b
425 add %g3, 0x0c, %g3 ! advance out vector pointer
426
427 7: retl
428 nop
429
430 .globl _mesa_sparc_transform_normals
431 _mesa_sparc_transform_normals:
432 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
433 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
434 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
435 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
436 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
437 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
438
439 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
440
441 /* dest->count = in->count */
442 st %g1, [%o4 + V4F_COUNT]
443
444 cmp %g1, 1
445 bl 7f
446 clr %o4 ! 'i' for STRIDE_LOOP
447
448 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
449 ld [%o5 + 0x04], %f1 ! uy = from[1]
450 ld [%o5 + 0x08], %f2 ! uz = from[2]
451 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
452 add %o4, 1, %o4 ! i++
453
454 fmuls %f0, M0, %f3 ! FGM Group
455 fmuls %f1, M1, %f4 ! FGM Group
456 fmuls %f0, M4, %f5 ! FGM Group
457 fmuls %f1, M5, %f6 ! FGM Group
458 fmuls %f0, M8, %f7 ! FGM Group f3 available
459 fmuls %f1, M9, %f8 ! FGM Group f4 available
460 fadds %f3, %f4, %f3 ! FGA
461 fmuls %f2, M2, %f10 ! FGM Group f5 available
462 fmuls %f2, M6, %f0 ! FGM Group f6 available
463 fadds %f5, %f6, %f5 ! FGA
464 fmuls %f2, M10, %f4 ! FGM Group f7 available
465 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
466 fadds %f3, %f10, %f3 ! FGA Group f10 available
467 st %f3, [%g3 + 0x00] ! LSU
468 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
469 st %f5, [%g3 + 0x04] ! LSU
470 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
471 st %f7, [%g3 + 0x08] ! LSU
472
473 cmp %o4, %g1 ! continue if (i < count)
474 bl 1b
475 add %g3, 0x0c, %g3 ! advance out vector pointer
476
477 7: retl
478 nop
479
480 .globl _mesa_sparc_normalize_normals
481 _mesa_sparc_normalize_normals:
482 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
483
484 sethi %hi(ONE_DOT_ZERO), %g2
485 sub %sp, 16, %sp
486 st %g2, [%sp + STACK_VAR_OFF+0x0]
487 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
488 add %sp, 16, %sp
489
490 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
491 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
492 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
493 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
494
495 /* dest->count = in->count */
496 st %g1, [%o4 + V4F_COUNT]
497
498 cmp %g1, 1
499 bl 7f
500 cmp %o3, 0
501 bne 4f
502 clr %o4 ! 'i' for STRIDE_LOOP
503
504 1: /* LENGTHS == NULL */
505 ld [%o5 + 0x00], %f3 ! ux = from[0]
506 ld [%o5 + 0x04], %f5 ! uy = from[1]
507 ld [%o5 + 0x08], %f7 ! uz = from[2]
508 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
509 add %o4, 1, %o4 ! i++
510
511 /* f3=tx, f5=ty, f7=tz */
512
513 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
514 fmuls %f3, %f3, %f6 ! FGM Group f3 available
515 fmuls %f5, %f5, %f8 ! FGM Group f5 available
516 fmuls %f7, %f7, %f10 ! FGM Group f7 available
517 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
518 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
519
520 /* scale (f6) = 1.0 / sqrt(len) */
521 fsqrts %f6, %f6 ! FDIV 20 cycles
522 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
523
524 fmuls %f3, %f6, %f3
525 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
526 fmuls %f5, %f6, %f5
527 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
528 fmuls %f7, %f6, %f7
529 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
530
531 cmp %o4, %g1 ! continue if (i < count)
532 bl 1b
533 add %g3, 0x0c, %g3 ! advance out vector pointer
534
535 ba 7f
536 nop
537
538 4: /* LENGTHS != NULL */
539
540 5:
541 ld [%o5 + 0x00], %f3 ! ux = from[0]
542 ld [%o5 + 0x04], %f5 ! uy = from[1]
543 ld [%o5 + 0x08], %f7 ! uz = from[2]
544 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
545 add %o4, 1, %o4 ! i++
546
547 ld [%o3], %f13 ! LSU
548 add %o3, 4, %o3 ! IEU0
549
550 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
551
552 fmuls %f3, %f13, %f3
553 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
554 fmuls %f5, %f13, %f5
555 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
556 fmuls %f7, %f13, %f7
557 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
558
559 cmp %o4, %g1 ! continue if (i < count)
560 bl 5b
561 add %g3, 0x0c, %g3 ! advance out vector pointer
562
563 7: retl
564 nop
565
566 .globl _mesa_sparc_rescale_normals
567 _mesa_sparc_rescale_normals:
568 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
569
570 sethi %hi(ONE_DOT_ZERO), %g2
571 sub %sp, 16, %sp
572 st %o1, [%sp + STACK_VAR_OFF+0x0]
573 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
574 add %sp, 16, %sp
575
576 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
577 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
578 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
579 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
580
581 /* dest->count = in->count */
582 st %g1, [%o4 + V4F_COUNT]
583
584 cmp %g1, 1
585 bl 7f
586 clr %o4 ! 'i' for STRIDE_LOOP
587
588 1:
589 ld [%o5 + 0x00], %f3 ! ux = from[0]
590 ld [%o5 + 0x04], %f5 ! uy = from[1]
591 ld [%o5 + 0x08], %f7 ! uz = from[2]
592 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
593 add %o4, 1, %o4 ! i++
594
595 /* f3=tx, f5=ty, f7=tz */
596
597 fmuls %f3, %f15, %f3
598 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
599 fmuls %f5, %f15, %f5
600 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
601 fmuls %f7, %f15, %f7
602 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
603
604 cmp %o4, %g1 ! continue if (i < count)
605 bl 1b
606 add %g3, 0x0c, %g3 ! advance out vector pointer
607
608 7: retl
609 nop