[MSACM32]
[reactos.git] / reactos / dll / opengl / mesa / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 0
66
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
71
72 static void
73 micro_abs(union tgsi_exec_channel *dst,
74 const union tgsi_exec_channel *src)
75 {
76 dst->f[0] = fabsf(src->f[0]);
77 dst->f[1] = fabsf(src->f[1]);
78 dst->f[2] = fabsf(src->f[2]);
79 dst->f[3] = fabsf(src->f[3]);
80 }
81
82 static void
83 micro_arl(union tgsi_exec_channel *dst,
84 const union tgsi_exec_channel *src)
85 {
86 dst->i[0] = (int)floorf(src->f[0]);
87 dst->i[1] = (int)floorf(src->f[1]);
88 dst->i[2] = (int)floorf(src->f[2]);
89 dst->i[3] = (int)floorf(src->f[3]);
90 }
91
92 static void
93 micro_arr(union tgsi_exec_channel *dst,
94 const union tgsi_exec_channel *src)
95 {
96 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
97 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
98 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
99 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
100 }
101
102 static void
103 micro_ceil(union tgsi_exec_channel *dst,
104 const union tgsi_exec_channel *src)
105 {
106 dst->f[0] = ceilf(src->f[0]);
107 dst->f[1] = ceilf(src->f[1]);
108 dst->f[2] = ceilf(src->f[2]);
109 dst->f[3] = ceilf(src->f[3]);
110 }
111
112 static void
113 micro_clamp(union tgsi_exec_channel *dst,
114 const union tgsi_exec_channel *src0,
115 const union tgsi_exec_channel *src1,
116 const union tgsi_exec_channel *src2)
117 {
118 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
119 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
120 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
121 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
122 }
123
124 static void
125 micro_cmp(union tgsi_exec_channel *dst,
126 const union tgsi_exec_channel *src0,
127 const union tgsi_exec_channel *src1,
128 const union tgsi_exec_channel *src2)
129 {
130 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
131 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
132 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
133 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
134 }
135
136 static void
137 micro_cnd(union tgsi_exec_channel *dst,
138 const union tgsi_exec_channel *src0,
139 const union tgsi_exec_channel *src1,
140 const union tgsi_exec_channel *src2)
141 {
142 dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0];
143 dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1];
144 dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2];
145 dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3];
146 }
147
148 static void
149 micro_cos(union tgsi_exec_channel *dst,
150 const union tgsi_exec_channel *src)
151 {
152 dst->f[0] = cosf(src->f[0]);
153 dst->f[1] = cosf(src->f[1]);
154 dst->f[2] = cosf(src->f[2]);
155 dst->f[3] = cosf(src->f[3]);
156 }
157
158 static void
159 micro_ddx(union tgsi_exec_channel *dst,
160 const union tgsi_exec_channel *src)
161 {
162 dst->f[0] =
163 dst->f[1] =
164 dst->f[2] =
165 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
166 }
167
168 static void
169 micro_ddy(union tgsi_exec_channel *dst,
170 const union tgsi_exec_channel *src)
171 {
172 dst->f[0] =
173 dst->f[1] =
174 dst->f[2] =
175 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
176 }
177
178 static void
179 micro_exp2(union tgsi_exec_channel *dst,
180 const union tgsi_exec_channel *src)
181 {
182 #if FAST_MATH
183 dst->f[0] = util_fast_exp2(src->f[0]);
184 dst->f[1] = util_fast_exp2(src->f[1]);
185 dst->f[2] = util_fast_exp2(src->f[2]);
186 dst->f[3] = util_fast_exp2(src->f[3]);
187 #else
188 #if DEBUG
189 /* Inf is okay for this instruction, so clamp it to silence assertions. */
190 uint i;
191 union tgsi_exec_channel clamped;
192
193 for (i = 0; i < 4; i++) {
194 if (src->f[i] > 127.99999f) {
195 clamped.f[i] = 127.99999f;
196 } else if (src->f[i] < -126.99999f) {
197 clamped.f[i] = -126.99999f;
198 } else {
199 clamped.f[i] = src->f[i];
200 }
201 }
202 src = &clamped;
203 #endif /* DEBUG */
204
205 dst->f[0] = powf(2.0f, src->f[0]);
206 dst->f[1] = powf(2.0f, src->f[1]);
207 dst->f[2] = powf(2.0f, src->f[2]);
208 dst->f[3] = powf(2.0f, src->f[3]);
209 #endif /* FAST_MATH */
210 }
211
212 static void
213 micro_flr(union tgsi_exec_channel *dst,
214 const union tgsi_exec_channel *src)
215 {
216 dst->f[0] = floorf(src->f[0]);
217 dst->f[1] = floorf(src->f[1]);
218 dst->f[2] = floorf(src->f[2]);
219 dst->f[3] = floorf(src->f[3]);
220 }
221
222 static void
223 micro_frc(union tgsi_exec_channel *dst,
224 const union tgsi_exec_channel *src)
225 {
226 dst->f[0] = src->f[0] - floorf(src->f[0]);
227 dst->f[1] = src->f[1] - floorf(src->f[1]);
228 dst->f[2] = src->f[2] - floorf(src->f[2]);
229 dst->f[3] = src->f[3] - floorf(src->f[3]);
230 }
231
232 static void
233 micro_iabs(union tgsi_exec_channel *dst,
234 const union tgsi_exec_channel *src)
235 {
236 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
237 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
238 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
239 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
240 }
241
242 static void
243 micro_ineg(union tgsi_exec_channel *dst,
244 const union tgsi_exec_channel *src)
245 {
246 dst->i[0] = -src->i[0];
247 dst->i[1] = -src->i[1];
248 dst->i[2] = -src->i[2];
249 dst->i[3] = -src->i[3];
250 }
251
252 static void
253 micro_lg2(union tgsi_exec_channel *dst,
254 const union tgsi_exec_channel *src)
255 {
256 #if FAST_MATH
257 dst->f[0] = util_fast_log2(src->f[0]);
258 dst->f[1] = util_fast_log2(src->f[1]);
259 dst->f[2] = util_fast_log2(src->f[2]);
260 dst->f[3] = util_fast_log2(src->f[3]);
261 #else
262 dst->f[0] = logf(src->f[0]) * 1.442695f;
263 dst->f[1] = logf(src->f[1]) * 1.442695f;
264 dst->f[2] = logf(src->f[2]) * 1.442695f;
265 dst->f[3] = logf(src->f[3]) * 1.442695f;
266 #endif
267 }
268
269 static void
270 micro_lrp(union tgsi_exec_channel *dst,
271 const union tgsi_exec_channel *src0,
272 const union tgsi_exec_channel *src1,
273 const union tgsi_exec_channel *src2)
274 {
275 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
276 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
277 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
278 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
279 }
280
281 static void
282 micro_mad(union tgsi_exec_channel *dst,
283 const union tgsi_exec_channel *src0,
284 const union tgsi_exec_channel *src1,
285 const union tgsi_exec_channel *src2)
286 {
287 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
288 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
289 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
290 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
291 }
292
293 static void
294 micro_mov(union tgsi_exec_channel *dst,
295 const union tgsi_exec_channel *src)
296 {
297 dst->u[0] = src->u[0];
298 dst->u[1] = src->u[1];
299 dst->u[2] = src->u[2];
300 dst->u[3] = src->u[3];
301 }
302
303 static void
304 micro_rcp(union tgsi_exec_channel *dst,
305 const union tgsi_exec_channel *src)
306 {
307 #if 0 /* for debugging */
308 assert(src->f[0] != 0.0f);
309 assert(src->f[1] != 0.0f);
310 assert(src->f[2] != 0.0f);
311 assert(src->f[3] != 0.0f);
312 #endif
313 dst->f[0] = 1.0f / src->f[0];
314 dst->f[1] = 1.0f / src->f[1];
315 dst->f[2] = 1.0f / src->f[2];
316 dst->f[3] = 1.0f / src->f[3];
317 }
318
319 static void
320 micro_rnd(union tgsi_exec_channel *dst,
321 const union tgsi_exec_channel *src)
322 {
323 dst->f[0] = floorf(src->f[0] + 0.5f);
324 dst->f[1] = floorf(src->f[1] + 0.5f);
325 dst->f[2] = floorf(src->f[2] + 0.5f);
326 dst->f[3] = floorf(src->f[3] + 0.5f);
327 }
328
329 static void
330 micro_rsq(union tgsi_exec_channel *dst,
331 const union tgsi_exec_channel *src)
332 {
333 #if 0 /* for debugging */
334 assert(src->f[0] != 0.0f);
335 assert(src->f[1] != 0.0f);
336 assert(src->f[2] != 0.0f);
337 assert(src->f[3] != 0.0f);
338 #endif
339 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
340 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
341 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
342 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
343 }
344
345 static void
346 micro_seq(union tgsi_exec_channel *dst,
347 const union tgsi_exec_channel *src0,
348 const union tgsi_exec_channel *src1)
349 {
350 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
351 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
352 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
353 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
354 }
355
356 static void
357 micro_sge(union tgsi_exec_channel *dst,
358 const union tgsi_exec_channel *src0,
359 const union tgsi_exec_channel *src1)
360 {
361 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
362 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
363 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
364 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
365 }
366
367 static void
368 micro_sgn(union tgsi_exec_channel *dst,
369 const union tgsi_exec_channel *src)
370 {
371 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
372 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
373 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
374 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
375 }
376
377 static void
378 micro_isgn(union tgsi_exec_channel *dst,
379 const union tgsi_exec_channel *src)
380 {
381 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
382 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
383 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
384 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
385 }
386
387 static void
388 micro_sgt(union tgsi_exec_channel *dst,
389 const union tgsi_exec_channel *src0,
390 const union tgsi_exec_channel *src1)
391 {
392 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
393 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
394 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
395 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
396 }
397
398 static void
399 micro_sin(union tgsi_exec_channel *dst,
400 const union tgsi_exec_channel *src)
401 {
402 dst->f[0] = sinf(src->f[0]);
403 dst->f[1] = sinf(src->f[1]);
404 dst->f[2] = sinf(src->f[2]);
405 dst->f[3] = sinf(src->f[3]);
406 }
407
408 static void
409 micro_sle(union tgsi_exec_channel *dst,
410 const union tgsi_exec_channel *src0,
411 const union tgsi_exec_channel *src1)
412 {
413 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
414 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
415 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
416 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
417 }
418
419 static void
420 micro_slt(union tgsi_exec_channel *dst,
421 const union tgsi_exec_channel *src0,
422 const union tgsi_exec_channel *src1)
423 {
424 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
425 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
426 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
427 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
428 }
429
430 static void
431 micro_sne(union tgsi_exec_channel *dst,
432 const union tgsi_exec_channel *src0,
433 const union tgsi_exec_channel *src1)
434 {
435 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
436 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
437 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
438 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
439 }
440
441 static void
442 micro_sfl(union tgsi_exec_channel *dst)
443 {
444 dst->f[0] = 0.0f;
445 dst->f[1] = 0.0f;
446 dst->f[2] = 0.0f;
447 dst->f[3] = 0.0f;
448 }
449
450 static void
451 micro_str(union tgsi_exec_channel *dst)
452 {
453 dst->f[0] = 1.0f;
454 dst->f[1] = 1.0f;
455 dst->f[2] = 1.0f;
456 dst->f[3] = 1.0f;
457 }
458
459 static void
460 micro_trunc(union tgsi_exec_channel *dst,
461 const union tgsi_exec_channel *src)
462 {
463 dst->f[0] = (float)(int)src->f[0];
464 dst->f[1] = (float)(int)src->f[1];
465 dst->f[2] = (float)(int)src->f[2];
466 dst->f[3] = (float)(int)src->f[3];
467 }
468
469
470 #define CHAN_X 0
471 #define CHAN_Y 1
472 #define CHAN_Z 2
473 #define CHAN_W 3
474
475 enum tgsi_exec_datatype {
476 TGSI_EXEC_DATA_FLOAT,
477 TGSI_EXEC_DATA_INT,
478 TGSI_EXEC_DATA_UINT
479 };
480
481 /*
482 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
483 */
484 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
485 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
486 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
487 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
488 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
489 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
490
491
492 /** The execution mask depends on the conditional mask and the loop mask */
493 #define UPDATE_EXEC_MASK(MACH) \
494 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
495
496
497 static const union tgsi_exec_channel ZeroVec =
498 { { 0.0, 0.0, 0.0, 0.0 } };
499
500 static const union tgsi_exec_channel OneVec = {
501 {1.0f, 1.0f, 1.0f, 1.0f}
502 };
503
504 static const union tgsi_exec_channel P128Vec = {
505 {128.0f, 128.0f, 128.0f, 128.0f}
506 };
507
508 static const union tgsi_exec_channel M128Vec = {
509 {-128.0f, -128.0f, -128.0f, -128.0f}
510 };
511
512
513 /**
514 * Assert that none of the float values in 'chan' are infinite or NaN.
515 * NaN and Inf may occur normally during program execution and should
516 * not lead to crashes, etc. But when debugging, it's helpful to catch
517 * them.
518 */
519 static INLINE void
520 check_inf_or_nan(const union tgsi_exec_channel *chan)
521 {
522 assert(!util_is_inf_or_nan((chan)->f[0]));
523 assert(!util_is_inf_or_nan((chan)->f[1]));
524 assert(!util_is_inf_or_nan((chan)->f[2]));
525 assert(!util_is_inf_or_nan((chan)->f[3]));
526 }
527
528
529 #ifdef DEBUG
530 static void
531 print_chan(const char *msg, const union tgsi_exec_channel *chan)
532 {
533 debug_printf("%s = {%f, %f, %f, %f}\n",
534 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
535 }
536 #endif
537
538
539 #ifdef DEBUG
540 static void
541 print_temp(const struct tgsi_exec_machine *mach, uint index)
542 {
543 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
544 int i;
545 debug_printf("Temp[%u] =\n", index);
546 for (i = 0; i < 4; i++) {
547 debug_printf(" %c: { %f, %f, %f, %f }\n",
548 "XYZW"[i],
549 tmp->xyzw[i].f[0],
550 tmp->xyzw[i].f[1],
551 tmp->xyzw[i].f[2],
552 tmp->xyzw[i].f[3]);
553 }
554 }
555 #endif
556
557
558 void
559 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
560 unsigned num_bufs,
561 const void **bufs,
562 const unsigned *buf_sizes)
563 {
564 unsigned i;
565
566 for (i = 0; i < num_bufs; i++) {
567 mach->Consts[i] = bufs[i];
568 mach->ConstsSize[i] = buf_sizes[i];
569 }
570 }
571
572
573 /**
574 * Check if there's a potential src/dst register data dependency when
575 * using SOA execution.
576 * Example:
577 * MOV T, T.yxwz;
578 * This would expand into:
579 * MOV t0, t1;
580 * MOV t1, t0;
581 * MOV t2, t3;
582 * MOV t3, t2;
583 * The second instruction will have the wrong value for t0 if executed as-is.
584 */
585 boolean
586 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
587 {
588 uint i, chan;
589
590 uint writemask = inst->Dst[0].Register.WriteMask;
591 if (writemask == TGSI_WRITEMASK_X ||
592 writemask == TGSI_WRITEMASK_Y ||
593 writemask == TGSI_WRITEMASK_Z ||
594 writemask == TGSI_WRITEMASK_W ||
595 writemask == TGSI_WRITEMASK_NONE) {
596 /* no chance of data dependency */
597 return FALSE;
598 }
599
600 /* loop over src regs */
601 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
602 if ((inst->Src[i].Register.File ==
603 inst->Dst[0].Register.File) &&
604 ((inst->Src[i].Register.Index ==
605 inst->Dst[0].Register.Index) ||
606 inst->Src[i].Register.Indirect ||
607 inst->Dst[0].Register.Indirect)) {
608 /* loop over dest channels */
609 uint channelsWritten = 0x0;
610 for (chan = 0; chan < NUM_CHANNELS; chan++) {
611 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
612 /* check if we're reading a channel that's been written */
613 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
614 if (channelsWritten & (1 << swizzle)) {
615 return TRUE;
616 }
617
618 channelsWritten |= (1 << chan);
619 }
620 }
621 }
622 }
623 return FALSE;
624 }
625
626
627 /**
628 * Initialize machine state by expanding tokens to full instructions,
629 * allocating temporary storage, setting up constants, etc.
630 * After this, we can call tgsi_exec_machine_run() many times.
631 */
632 void
633 tgsi_exec_machine_bind_shader(
634 struct tgsi_exec_machine *mach,
635 const struct tgsi_token *tokens,
636 uint numSamplers,
637 struct tgsi_sampler **samplers)
638 {
639 uint k;
640 struct tgsi_parse_context parse;
641 struct tgsi_full_instruction *instructions;
642 struct tgsi_full_declaration *declarations;
643 uint maxInstructions = 10, numInstructions = 0;
644 uint maxDeclarations = 10, numDeclarations = 0;
645
646 #if 0
647 tgsi_dump(tokens, 0);
648 #endif
649
650 util_init_math();
651
652 if (numSamplers) {
653 assert(samplers);
654 }
655
656 mach->Tokens = tokens;
657 mach->Samplers = samplers;
658
659 if (!tokens) {
660 /* unbind and free all */
661 if (mach->Declarations) {
662 FREE( mach->Declarations );
663 }
664 mach->Declarations = NULL;
665 mach->NumDeclarations = 0;
666
667 if (mach->Instructions) {
668 FREE( mach->Instructions );
669 }
670 mach->Instructions = NULL;
671 mach->NumInstructions = 0;
672
673 return;
674 }
675
676 k = tgsi_parse_init (&parse, mach->Tokens);
677 if (k != TGSI_PARSE_OK) {
678 debug_printf( "Problem parsing!\n" );
679 return;
680 }
681
682 mach->Processor = parse.FullHeader.Processor.Processor;
683 mach->ImmLimit = 0;
684
685 if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
686 !mach->UsedGeometryShader) {
687 struct tgsi_exec_vector *inputs;
688 struct tgsi_exec_vector *outputs;
689
690 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
691 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS,
692 16);
693
694 if (!inputs)
695 return;
696
697 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
698 TGSI_MAX_TOTAL_VERTICES, 16);
699
700 if (!outputs) {
701 align_free(inputs);
702 return;
703 }
704
705 align_free(mach->Inputs);
706 align_free(mach->Outputs);
707
708 mach->Inputs = inputs;
709 mach->Outputs = outputs;
710 mach->UsedGeometryShader = TRUE;
711 }
712
713 declarations = (struct tgsi_full_declaration *)
714 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
715
716 if (!declarations) {
717 return;
718 }
719
720 instructions = (struct tgsi_full_instruction *)
721 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
722
723 if (!instructions) {
724 FREE( declarations );
725 return;
726 }
727
728 while( !tgsi_parse_end_of_tokens( &parse ) ) {
729 uint i;
730
731 tgsi_parse_token( &parse );
732 switch( parse.FullToken.Token.Type ) {
733 case TGSI_TOKEN_TYPE_DECLARATION:
734 /* save expanded declaration */
735 if (numDeclarations == maxDeclarations) {
736 declarations = REALLOC(declarations,
737 maxDeclarations
738 * sizeof(struct tgsi_full_declaration),
739 (maxDeclarations + 10)
740 * sizeof(struct tgsi_full_declaration));
741 maxDeclarations += 10;
742 }
743 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
744 unsigned reg;
745 for (reg = parse.FullToken.FullDeclaration.Range.First;
746 reg <= parse.FullToken.FullDeclaration.Range.Last;
747 ++reg) {
748 ++mach->NumOutputs;
749 }
750 }
751 if (parse.FullToken.FullDeclaration.Declaration.File ==
752 TGSI_FILE_IMMEDIATE_ARRAY) {
753 unsigned reg;
754 struct tgsi_full_declaration *decl =
755 &parse.FullToken.FullDeclaration;
756 debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES);
757 for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) {
758 for( i = 0; i < 4; i++ ) {
759 int idx = reg * 4 + i;
760 mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float;
761 }
762 }
763 }
764 memcpy(declarations + numDeclarations,
765 &parse.FullToken.FullDeclaration,
766 sizeof(declarations[0]));
767 numDeclarations++;
768 break;
769
770 case TGSI_TOKEN_TYPE_IMMEDIATE:
771 {
772 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
773 assert( size <= 4 );
774 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
775
776 for( i = 0; i < size; i++ ) {
777 mach->Imms[mach->ImmLimit][i] =
778 parse.FullToken.FullImmediate.u[i].Float;
779 }
780 mach->ImmLimit += 1;
781 }
782 break;
783
784 case TGSI_TOKEN_TYPE_INSTRUCTION:
785
786 /* save expanded instruction */
787 if (numInstructions == maxInstructions) {
788 instructions = REALLOC(instructions,
789 maxInstructions
790 * sizeof(struct tgsi_full_instruction),
791 (maxInstructions + 10)
792 * sizeof(struct tgsi_full_instruction));
793 maxInstructions += 10;
794 }
795
796 memcpy(instructions + numInstructions,
797 &parse.FullToken.FullInstruction,
798 sizeof(instructions[0]));
799
800 numInstructions++;
801 break;
802
803 case TGSI_TOKEN_TYPE_PROPERTY:
804 break;
805
806 default:
807 assert( 0 );
808 }
809 }
810 tgsi_parse_free (&parse);
811
812 if (mach->Declarations) {
813 FREE( mach->Declarations );
814 }
815 mach->Declarations = declarations;
816 mach->NumDeclarations = numDeclarations;
817
818 if (mach->Instructions) {
819 FREE( mach->Instructions );
820 }
821 mach->Instructions = instructions;
822 mach->NumInstructions = numInstructions;
823 }
824
825
826 struct tgsi_exec_machine *
827 tgsi_exec_machine_create( void )
828 {
829 struct tgsi_exec_machine *mach;
830 uint i;
831
832 mach = align_malloc( sizeof *mach, 16 );
833 if (!mach)
834 goto fail;
835
836 memset(mach, 0, sizeof(*mach));
837
838 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
839 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
840 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
841
842 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
843 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
844 if (!mach->Inputs || !mach->Outputs)
845 goto fail;
846
847 /* Setup constants needed by the SSE2 executor. */
848 for( i = 0; i < 4; i++ ) {
849 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
850 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
851 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
852 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */
853 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
854 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */
855 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
856 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
857 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
858 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
859 }
860
861 #ifdef DEBUG
862 /* silence warnings */
863 (void) print_chan;
864 (void) print_temp;
865 #endif
866
867 return mach;
868
869 fail:
870 if (mach) {
871 align_free(mach->Inputs);
872 align_free(mach->Outputs);
873 align_free(mach);
874 }
875 return NULL;
876 }
877
878
879 void
880 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
881 {
882 if (mach) {
883 if (mach->Instructions)
884 FREE(mach->Instructions);
885 if (mach->Declarations)
886 FREE(mach->Declarations);
887
888 align_free(mach->Inputs);
889 align_free(mach->Outputs);
890
891 align_free(mach);
892 }
893 }
894
895 static void
896 micro_add(union tgsi_exec_channel *dst,
897 const union tgsi_exec_channel *src0,
898 const union tgsi_exec_channel *src1)
899 {
900 dst->f[0] = src0->f[0] + src1->f[0];
901 dst->f[1] = src0->f[1] + src1->f[1];
902 dst->f[2] = src0->f[2] + src1->f[2];
903 dst->f[3] = src0->f[3] + src1->f[3];
904 }
905
906 static void
907 micro_div(
908 union tgsi_exec_channel *dst,
909 const union tgsi_exec_channel *src0,
910 const union tgsi_exec_channel *src1 )
911 {
912 if (src1->f[0] != 0) {
913 dst->f[0] = src0->f[0] / src1->f[0];
914 }
915 if (src1->f[1] != 0) {
916 dst->f[1] = src0->f[1] / src1->f[1];
917 }
918 if (src1->f[2] != 0) {
919 dst->f[2] = src0->f[2] / src1->f[2];
920 }
921 if (src1->f[3] != 0) {
922 dst->f[3] = src0->f[3] / src1->f[3];
923 }
924 }
925
926 static void
927 micro_rcc(union tgsi_exec_channel *dst,
928 const union tgsi_exec_channel *src)
929 {
930 uint i;
931
932 for (i = 0; i < 4; i++) {
933 float recip = 1.0f / src->f[i];
934
935 if (recip > 0.0f) {
936 if (recip > 1.884467e+019f) {
937 dst->f[i] = 1.884467e+019f;
938 }
939 else if (recip < 5.42101e-020f) {
940 dst->f[i] = 5.42101e-020f;
941 }
942 else {
943 dst->f[i] = recip;
944 }
945 }
946 else {
947 if (recip < -1.884467e+019f) {
948 dst->f[i] = -1.884467e+019f;
949 }
950 else if (recip > -5.42101e-020f) {
951 dst->f[i] = -5.42101e-020f;
952 }
953 else {
954 dst->f[i] = recip;
955 }
956 }
957 }
958 }
959
960 static void
961 micro_lt(
962 union tgsi_exec_channel *dst,
963 const union tgsi_exec_channel *src0,
964 const union tgsi_exec_channel *src1,
965 const union tgsi_exec_channel *src2,
966 const union tgsi_exec_channel *src3 )
967 {
968 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
969 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
970 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
971 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
972 }
973
974 static void
975 micro_max(union tgsi_exec_channel *dst,
976 const union tgsi_exec_channel *src0,
977 const union tgsi_exec_channel *src1)
978 {
979 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
980 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
981 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
982 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
983 }
984
985 static void
986 micro_min(union tgsi_exec_channel *dst,
987 const union tgsi_exec_channel *src0,
988 const union tgsi_exec_channel *src1)
989 {
990 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
991 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
992 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
993 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
994 }
995
996 static void
997 micro_mul(union tgsi_exec_channel *dst,
998 const union tgsi_exec_channel *src0,
999 const union tgsi_exec_channel *src1)
1000 {
1001 dst->f[0] = src0->f[0] * src1->f[0];
1002 dst->f[1] = src0->f[1] * src1->f[1];
1003 dst->f[2] = src0->f[2] * src1->f[2];
1004 dst->f[3] = src0->f[3] * src1->f[3];
1005 }
1006
1007 static void
1008 micro_neg(
1009 union tgsi_exec_channel *dst,
1010 const union tgsi_exec_channel *src )
1011 {
1012 dst->f[0] = -src->f[0];
1013 dst->f[1] = -src->f[1];
1014 dst->f[2] = -src->f[2];
1015 dst->f[3] = -src->f[3];
1016 }
1017
1018 static void
1019 micro_pow(
1020 union tgsi_exec_channel *dst,
1021 const union tgsi_exec_channel *src0,
1022 const union tgsi_exec_channel *src1 )
1023 {
1024 #if FAST_MATH
1025 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1026 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1027 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1028 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1029 #else
1030 dst->f[0] = powf( src0->f[0], src1->f[0] );
1031 dst->f[1] = powf( src0->f[1], src1->f[1] );
1032 dst->f[2] = powf( src0->f[2], src1->f[2] );
1033 dst->f[3] = powf( src0->f[3], src1->f[3] );
1034 #endif
1035 }
1036
1037 static void
1038 micro_sub(union tgsi_exec_channel *dst,
1039 const union tgsi_exec_channel *src0,
1040 const union tgsi_exec_channel *src1)
1041 {
1042 dst->f[0] = src0->f[0] - src1->f[0];
1043 dst->f[1] = src0->f[1] - src1->f[1];
1044 dst->f[2] = src0->f[2] - src1->f[2];
1045 dst->f[3] = src0->f[3] - src1->f[3];
1046 }
1047
1048 static void
1049 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1050 const uint chan_index,
1051 const uint file,
1052 const uint swizzle,
1053 const union tgsi_exec_channel *index,
1054 const union tgsi_exec_channel *index2D,
1055 union tgsi_exec_channel *chan)
1056 {
1057 uint i;
1058
1059 assert(swizzle < 4);
1060
1061 switch (file) {
1062 case TGSI_FILE_CONSTANT:
1063 for (i = 0; i < QUAD_SIZE; i++) {
1064 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1065 assert(mach->Consts[index2D->i[i]]);
1066
1067 if (index->i[i] < 0) {
1068 chan->u[i] = 0;
1069 } else {
1070 /* NOTE: copying the const value as a uint instead of float */
1071 const uint constbuf = index2D->i[i];
1072 const uint *buf = (const uint *)mach->Consts[constbuf];
1073 const int pos = index->i[i] * 4 + swizzle;
1074 /* const buffer bounds check */
1075 if (pos < 0 || pos >= mach->ConstsSize[constbuf]) {
1076 if (0) {
1077 /* Debug: print warning */
1078 static int count = 0;
1079 if (count++ < 100)
1080 debug_printf("TGSI Exec: const buffer index %d"
1081 " out of bounds\n", pos);
1082 }
1083 chan->u[i] = 0;
1084 }
1085 else
1086 chan->u[i] = buf[pos];
1087 }
1088 }
1089 break;
1090
1091 case TGSI_FILE_INPUT:
1092 for (i = 0; i < QUAD_SIZE; i++) {
1093 /*
1094 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1095 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1096 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1097 index2D->i[i], index->i[i]);
1098 }*/
1099 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1100 assert(pos >= 0);
1101 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1102 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1103 }
1104 break;
1105
1106 case TGSI_FILE_SYSTEM_VALUE:
1107 /* XXX no swizzling at this point. Will be needed if we put
1108 * gl_FragCoord, for example, in a sys value register.
1109 */
1110 for (i = 0; i < QUAD_SIZE; i++) {
1111 chan->u[i] = mach->SystemValue[index->i[i]].u[i];
1112 }
1113 break;
1114
1115 case TGSI_FILE_TEMPORARY:
1116 for (i = 0; i < QUAD_SIZE; i++) {
1117 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1118 assert(index2D->i[i] == 0);
1119
1120 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1121 }
1122 break;
1123
1124 case TGSI_FILE_TEMPORARY_ARRAY:
1125 for (i = 0; i < QUAD_SIZE; i++) {
1126 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1127 assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS);
1128
1129 chan->u[i] =
1130 mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i];
1131 }
1132 break;
1133
1134 case TGSI_FILE_IMMEDIATE:
1135 for (i = 0; i < QUAD_SIZE; i++) {
1136 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1137 assert(index2D->i[i] == 0);
1138
1139 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1140 }
1141 break;
1142
1143 case TGSI_FILE_IMMEDIATE_ARRAY:
1144 for (i = 0; i < QUAD_SIZE; i++) {
1145 assert(index2D->i[i] == 0);
1146
1147 chan->f[i] = mach->ImmArray[index->i[i]][swizzle];
1148 }
1149 break;
1150
1151 case TGSI_FILE_ADDRESS:
1152 for (i = 0; i < QUAD_SIZE; i++) {
1153 assert(index->i[i] >= 0);
1154 assert(index2D->i[i] == 0);
1155
1156 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1157 }
1158 break;
1159
1160 case TGSI_FILE_PREDICATE:
1161 for (i = 0; i < QUAD_SIZE; i++) {
1162 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1163 assert(index2D->i[i] == 0);
1164
1165 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1166 }
1167 break;
1168
1169 case TGSI_FILE_OUTPUT:
1170 /* vertex/fragment output vars can be read too */
1171 for (i = 0; i < QUAD_SIZE; i++) {
1172 assert(index->i[i] >= 0);
1173 assert(index2D->i[i] == 0);
1174
1175 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1176 }
1177 break;
1178
1179 default:
1180 assert(0);
1181 for (i = 0; i < QUAD_SIZE; i++) {
1182 chan->u[i] = 0;
1183 }
1184 }
1185 }
1186
1187 static void
1188 fetch_source(const struct tgsi_exec_machine *mach,
1189 union tgsi_exec_channel *chan,
1190 const struct tgsi_full_src_register *reg,
1191 const uint chan_index,
1192 enum tgsi_exec_datatype src_datatype)
1193 {
1194 union tgsi_exec_channel index;
1195 union tgsi_exec_channel index2D;
1196 uint swizzle;
1197
1198 /* We start with a direct index into a register file.
1199 *
1200 * file[1],
1201 * where:
1202 * file = Register.File
1203 * [1] = Register.Index
1204 */
1205 index.i[0] =
1206 index.i[1] =
1207 index.i[2] =
1208 index.i[3] = reg->Register.Index;
1209
1210 /* There is an extra source register that indirectly subscripts
1211 * a register file. The direct index now becomes an offset
1212 * that is being added to the indirect register.
1213 *
1214 * file[ind[2].x+1],
1215 * where:
1216 * ind = Indirect.File
1217 * [2] = Indirect.Index
1218 * .x = Indirect.SwizzleX
1219 */
1220 if (reg->Register.Indirect) {
1221 union tgsi_exec_channel index2;
1222 union tgsi_exec_channel indir_index;
1223 const uint execmask = mach->ExecMask;
1224 uint i;
1225
1226 /* which address register (always zero now) */
1227 index2.i[0] =
1228 index2.i[1] =
1229 index2.i[2] =
1230 index2.i[3] = reg->Indirect.Index;
1231 assert(reg->Indirect.File == TGSI_FILE_ADDRESS);
1232 /* get current value of address register[swizzle] */
1233 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1234 fetch_src_file_channel(mach,
1235 chan_index,
1236 reg->Indirect.File,
1237 swizzle,
1238 &index2,
1239 &ZeroVec,
1240 &indir_index);
1241
1242 /* add value of address register to the offset */
1243 index.i[0] += indir_index.i[0];
1244 index.i[1] += indir_index.i[1];
1245 index.i[2] += indir_index.i[2];
1246 index.i[3] += indir_index.i[3];
1247
1248 /* for disabled execution channels, zero-out the index to
1249 * avoid using a potential garbage value.
1250 */
1251 for (i = 0; i < QUAD_SIZE; i++) {
1252 if ((execmask & (1 << i)) == 0)
1253 index.i[i] = 0;
1254 }
1255 }
1256
1257 /* There is an extra source register that is a second
1258 * subscript to a register file. Effectively it means that
1259 * the register file is actually a 2D array of registers.
1260 *
1261 * file[3][1],
1262 * where:
1263 * [3] = Dimension.Index
1264 */
1265 if (reg->Register.Dimension) {
1266 index2D.i[0] =
1267 index2D.i[1] =
1268 index2D.i[2] =
1269 index2D.i[3] = reg->Dimension.Index;
1270
1271 /* Again, the second subscript index can be addressed indirectly
1272 * identically to the first one.
1273 * Nothing stops us from indirectly addressing the indirect register,
1274 * but there is no need for that, so we won't exercise it.
1275 *
1276 * file[ind[4].y+3][1],
1277 * where:
1278 * ind = DimIndirect.File
1279 * [4] = DimIndirect.Index
1280 * .y = DimIndirect.SwizzleX
1281 */
1282 if (reg->Dimension.Indirect) {
1283 union tgsi_exec_channel index2;
1284 union tgsi_exec_channel indir_index;
1285 const uint execmask = mach->ExecMask;
1286 uint i;
1287
1288 index2.i[0] =
1289 index2.i[1] =
1290 index2.i[2] =
1291 index2.i[3] = reg->DimIndirect.Index;
1292
1293 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1294 fetch_src_file_channel(mach,
1295 chan_index,
1296 reg->DimIndirect.File,
1297 swizzle,
1298 &index2,
1299 &ZeroVec,
1300 &indir_index);
1301
1302 index2D.i[0] += indir_index.i[0];
1303 index2D.i[1] += indir_index.i[1];
1304 index2D.i[2] += indir_index.i[2];
1305 index2D.i[3] += indir_index.i[3];
1306
1307 /* for disabled execution channels, zero-out the index to
1308 * avoid using a potential garbage value.
1309 */
1310 for (i = 0; i < QUAD_SIZE; i++) {
1311 if ((execmask & (1 << i)) == 0) {
1312 index2D.i[i] = 0;
1313 }
1314 }
1315 }
1316
1317 /* If by any chance there was a need for a 3D array of register
1318 * files, we would have to check whether Dimension is followed
1319 * by a dimension register and continue the saga.
1320 */
1321 } else {
1322 index2D.i[0] =
1323 index2D.i[1] =
1324 index2D.i[2] =
1325 index2D.i[3] = 0;
1326 }
1327
1328 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1329 fetch_src_file_channel(mach,
1330 chan_index,
1331 reg->Register.File,
1332 swizzle,
1333 &index,
1334 &index2D,
1335 chan);
1336
1337 if (reg->Register.Absolute) {
1338 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1339 micro_abs(chan, chan);
1340 } else {
1341 micro_iabs(chan, chan);
1342 }
1343 }
1344
1345 if (reg->Register.Negate) {
1346 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1347 micro_neg(chan, chan);
1348 } else {
1349 micro_ineg(chan, chan);
1350 }
1351 }
1352 }
1353
1354 static void
1355 store_dest(struct tgsi_exec_machine *mach,
1356 const union tgsi_exec_channel *chan,
1357 const struct tgsi_full_dst_register *reg,
1358 const struct tgsi_full_instruction *inst,
1359 uint chan_index,
1360 enum tgsi_exec_datatype dst_datatype)
1361 {
1362 uint i;
1363 union tgsi_exec_channel null;
1364 union tgsi_exec_channel *dst;
1365 union tgsi_exec_channel index2D;
1366 uint execmask = mach->ExecMask;
1367 int offset = 0; /* indirection offset */
1368 int index;
1369
1370 /* for debugging */
1371 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1372 check_inf_or_nan(chan);
1373 }
1374
1375 /* There is an extra source register that indirectly subscripts
1376 * a register file. The direct index now becomes an offset
1377 * that is being added to the indirect register.
1378 *
1379 * file[ind[2].x+1],
1380 * where:
1381 * ind = Indirect.File
1382 * [2] = Indirect.Index
1383 * .x = Indirect.SwizzleX
1384 */
1385 if (reg->Register.Indirect) {
1386 union tgsi_exec_channel index;
1387 union tgsi_exec_channel indir_index;
1388 uint swizzle;
1389
1390 /* which address register (always zero for now) */
1391 index.i[0] =
1392 index.i[1] =
1393 index.i[2] =
1394 index.i[3] = reg->Indirect.Index;
1395
1396 /* get current value of address register[swizzle] */
1397 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1398
1399 /* fetch values from the address/indirection register */
1400 fetch_src_file_channel(mach,
1401 chan_index,
1402 reg->Indirect.File,
1403 swizzle,
1404 &index,
1405 &ZeroVec,
1406 &indir_index);
1407
1408 /* save indirection offset */
1409 offset = indir_index.i[0];
1410 }
1411
1412 /* There is an extra source register that is a second
1413 * subscript to a register file. Effectively it means that
1414 * the register file is actually a 2D array of registers.
1415 *
1416 * file[3][1],
1417 * where:
1418 * [3] = Dimension.Index
1419 */
1420 if (reg->Register.Dimension) {
1421 index2D.i[0] =
1422 index2D.i[1] =
1423 index2D.i[2] =
1424 index2D.i[3] = reg->Dimension.Index;
1425
1426 /* Again, the second subscript index can be addressed indirectly
1427 * identically to the first one.
1428 * Nothing stops us from indirectly addressing the indirect register,
1429 * but there is no need for that, so we won't exercise it.
1430 *
1431 * file[ind[4].y+3][1],
1432 * where:
1433 * ind = DimIndirect.File
1434 * [4] = DimIndirect.Index
1435 * .y = DimIndirect.SwizzleX
1436 */
1437 if (reg->Dimension.Indirect) {
1438 union tgsi_exec_channel index2;
1439 union tgsi_exec_channel indir_index;
1440 const uint execmask = mach->ExecMask;
1441 unsigned swizzle;
1442 uint i;
1443
1444 index2.i[0] =
1445 index2.i[1] =
1446 index2.i[2] =
1447 index2.i[3] = reg->DimIndirect.Index;
1448
1449 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1450 fetch_src_file_channel(mach,
1451 chan_index,
1452 reg->DimIndirect.File,
1453 swizzle,
1454 &index2,
1455 &ZeroVec,
1456 &indir_index);
1457
1458 index2D.i[0] += indir_index.i[0];
1459 index2D.i[1] += indir_index.i[1];
1460 index2D.i[2] += indir_index.i[2];
1461 index2D.i[3] += indir_index.i[3];
1462
1463 /* for disabled execution channels, zero-out the index to
1464 * avoid using a potential garbage value.
1465 */
1466 for (i = 0; i < QUAD_SIZE; i++) {
1467 if ((execmask & (1 << i)) == 0) {
1468 index2D.i[i] = 0;
1469 }
1470 }
1471 }
1472
1473 /* If by any chance there was a need for a 3D array of register
1474 * files, we would have to check whether Dimension is followed
1475 * by a dimension register and continue the saga.
1476 */
1477 } else {
1478 index2D.i[0] =
1479 index2D.i[1] =
1480 index2D.i[2] =
1481 index2D.i[3] = 0;
1482 }
1483
1484 switch (reg->Register.File) {
1485 case TGSI_FILE_NULL:
1486 dst = &null;
1487 break;
1488
1489 case TGSI_FILE_OUTPUT:
1490 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1491 + reg->Register.Index;
1492 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1493 #if 0
1494 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1495 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1496 for (i = 0; i < QUAD_SIZE; i++)
1497 if (execmask & (1 << i))
1498 fprintf(stderr, "%f, ", chan->f[i]);
1499 fprintf(stderr, ")\n");
1500 }
1501 #endif
1502 break;
1503
1504 case TGSI_FILE_TEMPORARY:
1505 index = reg->Register.Index;
1506 assert( index < TGSI_EXEC_NUM_TEMPS );
1507 dst = &mach->Temps[offset + index].xyzw[chan_index];
1508 break;
1509
1510 case TGSI_FILE_TEMPORARY_ARRAY:
1511 index = reg->Register.Index;
1512 assert( index < TGSI_EXEC_NUM_TEMPS );
1513 assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS );
1514 /* XXX we use index2D.i[0] here but somehow we might
1515 * end up with someone trying to store indirectly in
1516 * different buffers */
1517 dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index];
1518 break;
1519
1520 case TGSI_FILE_ADDRESS:
1521 index = reg->Register.Index;
1522 dst = &mach->Addrs[index].xyzw[chan_index];
1523 break;
1524
1525 case TGSI_FILE_PREDICATE:
1526 index = reg->Register.Index;
1527 assert(index < TGSI_EXEC_NUM_PREDS);
1528 dst = &mach->Predicates[index].xyzw[chan_index];
1529 break;
1530
1531 default:
1532 assert( 0 );
1533 return;
1534 }
1535
1536 if (inst->Instruction.Predicate) {
1537 uint swizzle;
1538 union tgsi_exec_channel *pred;
1539
1540 switch (chan_index) {
1541 case CHAN_X:
1542 swizzle = inst->Predicate.SwizzleX;
1543 break;
1544 case CHAN_Y:
1545 swizzle = inst->Predicate.SwizzleY;
1546 break;
1547 case CHAN_Z:
1548 swizzle = inst->Predicate.SwizzleZ;
1549 break;
1550 case CHAN_W:
1551 swizzle = inst->Predicate.SwizzleW;
1552 break;
1553 default:
1554 assert(0);
1555 return;
1556 }
1557
1558 assert(inst->Predicate.Index == 0);
1559
1560 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1561
1562 if (inst->Predicate.Negate) {
1563 for (i = 0; i < QUAD_SIZE; i++) {
1564 if (pred->u[i]) {
1565 execmask &= ~(1 << i);
1566 }
1567 }
1568 } else {
1569 for (i = 0; i < QUAD_SIZE; i++) {
1570 if (!pred->u[i]) {
1571 execmask &= ~(1 << i);
1572 }
1573 }
1574 }
1575 }
1576
1577 switch (inst->Instruction.Saturate) {
1578 case TGSI_SAT_NONE:
1579 for (i = 0; i < QUAD_SIZE; i++)
1580 if (execmask & (1 << i))
1581 dst->i[i] = chan->i[i];
1582 break;
1583
1584 case TGSI_SAT_ZERO_ONE:
1585 for (i = 0; i < QUAD_SIZE; i++)
1586 if (execmask & (1 << i)) {
1587 if (chan->f[i] < 0.0f)
1588 dst->f[i] = 0.0f;
1589 else if (chan->f[i] > 1.0f)
1590 dst->f[i] = 1.0f;
1591 else
1592 dst->i[i] = chan->i[i];
1593 }
1594 break;
1595
1596 case TGSI_SAT_MINUS_PLUS_ONE:
1597 for (i = 0; i < QUAD_SIZE; i++)
1598 if (execmask & (1 << i)) {
1599 if (chan->f[i] < -1.0f)
1600 dst->f[i] = -1.0f;
1601 else if (chan->f[i] > 1.0f)
1602 dst->f[i] = 1.0f;
1603 else
1604 dst->i[i] = chan->i[i];
1605 }
1606 break;
1607
1608 default:
1609 assert( 0 );
1610 }
1611 }
1612
1613 #define FETCH(VAL,INDEX,CHAN)\
1614 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1615
1616 #define IFETCH(VAL,INDEX,CHAN)\
1617 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1618
1619
1620 /**
1621 * Execute ARB-style KIL which is predicated by a src register.
1622 * Kill fragment if any of the four values is less than zero.
1623 */
1624 static void
1625 exec_kil(struct tgsi_exec_machine *mach,
1626 const struct tgsi_full_instruction *inst)
1627 {
1628 uint uniquemask;
1629 uint chan_index;
1630 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1631 union tgsi_exec_channel r[1];
1632
1633 /* This mask stores component bits that were already tested. */
1634 uniquemask = 0;
1635
1636 for (chan_index = 0; chan_index < 4; chan_index++)
1637 {
1638 uint swizzle;
1639 uint i;
1640
1641 /* unswizzle channel */
1642 swizzle = tgsi_util_get_full_src_register_swizzle (
1643 &inst->Src[0],
1644 chan_index);
1645
1646 /* check if the component has not been already tested */
1647 if (uniquemask & (1 << swizzle))
1648 continue;
1649 uniquemask |= 1 << swizzle;
1650
1651 FETCH(&r[0], 0, chan_index);
1652 for (i = 0; i < 4; i++)
1653 if (r[0].f[i] < 0.0f)
1654 kilmask |= 1 << i;
1655 }
1656
1657 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1658 }
1659
1660 /**
1661 * Execute NVIDIA-style KIL which is predicated by a condition code.
1662 * Kill fragment if the condition code is TRUE.
1663 */
1664 static void
1665 exec_kilp(struct tgsi_exec_machine *mach,
1666 const struct tgsi_full_instruction *inst)
1667 {
1668 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1669
1670 /* "unconditional" kil */
1671 kilmask = mach->ExecMask;
1672 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1673 }
1674
1675 static void
1676 emit_vertex(struct tgsi_exec_machine *mach)
1677 {
1678 /* FIXME: check for exec mask correctly
1679 unsigned i;
1680 for (i = 0; i < QUAD_SIZE; ++i) {
1681 if ((mach->ExecMask & (1 << i)))
1682 */
1683 if (mach->ExecMask) {
1684 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1685 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1686 }
1687 }
1688
1689 static void
1690 emit_primitive(struct tgsi_exec_machine *mach)
1691 {
1692 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1693 /* FIXME: check for exec mask correctly
1694 unsigned i;
1695 for (i = 0; i < QUAD_SIZE; ++i) {
1696 if ((mach->ExecMask & (1 << i)))
1697 */
1698 if (mach->ExecMask) {
1699 ++(*prim_count);
1700 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1701 mach->Primitives[*prim_count] = 0;
1702 }
1703 }
1704
1705 static void
1706 conditional_emit_primitive(struct tgsi_exec_machine *mach)
1707 {
1708 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1709 int emitted_verts =
1710 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
1711 if (emitted_verts) {
1712 emit_primitive(mach);
1713 }
1714 }
1715 }
1716
1717
1718 /*
1719 * Fetch four texture samples using STR texture coordinates.
1720 */
1721 static void
1722 fetch_texel( struct tgsi_sampler *sampler,
1723 const union tgsi_exec_channel *s,
1724 const union tgsi_exec_channel *t,
1725 const union tgsi_exec_channel *p,
1726 const union tgsi_exec_channel *c0,
1727 enum tgsi_sampler_control control,
1728 union tgsi_exec_channel *r,
1729 union tgsi_exec_channel *g,
1730 union tgsi_exec_channel *b,
1731 union tgsi_exec_channel *a )
1732 {
1733 uint j;
1734 float rgba[NUM_CHANNELS][QUAD_SIZE];
1735
1736 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba);
1737
1738 for (j = 0; j < 4; j++) {
1739 r->f[j] = rgba[0][j];
1740 g->f[j] = rgba[1][j];
1741 b->f[j] = rgba[2][j];
1742 a->f[j] = rgba[3][j];
1743 }
1744 }
1745
1746
1747 #define TEX_MODIFIER_NONE 0
1748 #define TEX_MODIFIER_PROJECTED 1
1749 #define TEX_MODIFIER_LOD_BIAS 2
1750 #define TEX_MODIFIER_EXPLICIT_LOD 3
1751
1752
1753 static void
1754 exec_tex(struct tgsi_exec_machine *mach,
1755 const struct tgsi_full_instruction *inst,
1756 uint modifier)
1757 {
1758 const uint unit = inst->Src[1].Register.Index;
1759 union tgsi_exec_channel r[4];
1760 const union tgsi_exec_channel *lod = &ZeroVec;
1761 enum tgsi_sampler_control control;
1762 uint chan;
1763
1764 if (modifier != TEX_MODIFIER_NONE) {
1765 FETCH(&r[3], 0, CHAN_W);
1766 if (modifier != TEX_MODIFIER_PROJECTED) {
1767 lod = &r[3];
1768 }
1769 }
1770
1771 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1772 control = tgsi_sampler_lod_explicit;
1773 } else {
1774 control = tgsi_sampler_lod_bias;
1775 }
1776
1777 switch (inst->Texture.Texture) {
1778 case TGSI_TEXTURE_1D:
1779 FETCH(&r[0], 0, CHAN_X);
1780
1781 if (modifier == TEX_MODIFIER_PROJECTED) {
1782 micro_div(&r[0], &r[0], &r[3]);
1783 }
1784
1785 fetch_texel(mach->Samplers[unit],
1786 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
1787 control,
1788 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1789 break;
1790 case TGSI_TEXTURE_SHADOW1D:
1791 FETCH(&r[0], 0, CHAN_X);
1792 FETCH(&r[2], 0, CHAN_Z);
1793
1794 if (modifier == TEX_MODIFIER_PROJECTED) {
1795 micro_div(&r[0], &r[0], &r[3]);
1796 }
1797
1798 fetch_texel(mach->Samplers[unit],
1799 &r[0], &ZeroVec, &r[2], lod, /* S, T, P, LOD */
1800 control,
1801 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1802 break;
1803
1804 case TGSI_TEXTURE_2D:
1805 case TGSI_TEXTURE_RECT:
1806 case TGSI_TEXTURE_SHADOW2D:
1807 case TGSI_TEXTURE_SHADOWRECT:
1808 FETCH(&r[0], 0, CHAN_X);
1809 FETCH(&r[1], 0, CHAN_Y);
1810 FETCH(&r[2], 0, CHAN_Z);
1811
1812 if (modifier == TEX_MODIFIER_PROJECTED) {
1813 micro_div(&r[0], &r[0], &r[3]);
1814 micro_div(&r[1], &r[1], &r[3]);
1815 micro_div(&r[2], &r[2], &r[3]);
1816 }
1817
1818 fetch_texel(mach->Samplers[unit],
1819 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1820 control,
1821 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1822 break;
1823
1824 case TGSI_TEXTURE_1D_ARRAY:
1825 FETCH(&r[0], 0, CHAN_X);
1826 FETCH(&r[1], 0, CHAN_Y);
1827
1828 if (modifier == TEX_MODIFIER_PROJECTED) {
1829 micro_div(&r[0], &r[0], &r[3]);
1830 }
1831
1832 fetch_texel(mach->Samplers[unit],
1833 &r[0], &r[1], &ZeroVec, lod, /* S, T, P, LOD */
1834 control,
1835 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1836 break;
1837 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1838 FETCH(&r[0], 0, CHAN_X);
1839 FETCH(&r[1], 0, CHAN_Y);
1840 FETCH(&r[2], 0, CHAN_Z);
1841
1842 if (modifier == TEX_MODIFIER_PROJECTED) {
1843 micro_div(&r[0], &r[0], &r[3]);
1844 }
1845
1846 fetch_texel(mach->Samplers[unit],
1847 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1848 control,
1849 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1850 break;
1851
1852 case TGSI_TEXTURE_2D_ARRAY:
1853 FETCH(&r[0], 0, CHAN_X);
1854 FETCH(&r[1], 0, CHAN_Y);
1855 FETCH(&r[2], 0, CHAN_Z);
1856
1857 if (modifier == TEX_MODIFIER_PROJECTED) {
1858 micro_div(&r[0], &r[0], &r[3]);
1859 micro_div(&r[1], &r[1], &r[3]);
1860 }
1861
1862 fetch_texel(mach->Samplers[unit],
1863 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1864 control,
1865 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1866 break;
1867 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1868 case TGSI_TEXTURE_SHADOWCUBE:
1869 FETCH(&r[0], 0, CHAN_X);
1870 FETCH(&r[1], 0, CHAN_Y);
1871 FETCH(&r[2], 0, CHAN_Z);
1872 FETCH(&r[3], 0, CHAN_W);
1873
1874 fetch_texel(mach->Samplers[unit],
1875 &r[0], &r[1], &r[2], &r[3], /* S, T, P, LOD */
1876 control,
1877 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1878 break;
1879 case TGSI_TEXTURE_3D:
1880 case TGSI_TEXTURE_CUBE:
1881 FETCH(&r[0], 0, CHAN_X);
1882 FETCH(&r[1], 0, CHAN_Y);
1883 FETCH(&r[2], 0, CHAN_Z);
1884
1885 if (modifier == TEX_MODIFIER_PROJECTED) {
1886 micro_div(&r[0], &r[0], &r[3]);
1887 micro_div(&r[1], &r[1], &r[3]);
1888 micro_div(&r[2], &r[2], &r[3]);
1889 }
1890
1891 fetch_texel(mach->Samplers[unit],
1892 &r[0], &r[1], &r[2], lod,
1893 control,
1894 &r[0], &r[1], &r[2], &r[3]);
1895 break;
1896
1897 default:
1898 assert(0);
1899 }
1900
1901 #if 0
1902 debug_printf("fetch r: %g %g %g %g\n",
1903 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
1904 debug_printf("fetch g: %g %g %g %g\n",
1905 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
1906 debug_printf("fetch b: %g %g %g %g\n",
1907 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
1908 debug_printf("fetch a: %g %g %g %g\n",
1909 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
1910 #endif
1911
1912 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1913 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1914 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1915 }
1916 }
1917 }
1918
1919 static void
1920 exec_txd(struct tgsi_exec_machine *mach,
1921 const struct tgsi_full_instruction *inst)
1922 {
1923 const uint unit = inst->Src[3].Register.Index;
1924 union tgsi_exec_channel r[4];
1925 uint chan;
1926
1927 /*
1928 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1929 */
1930
1931 switch (inst->Texture.Texture) {
1932 case TGSI_TEXTURE_1D:
1933 case TGSI_TEXTURE_SHADOW1D:
1934
1935 FETCH(&r[0], 0, CHAN_X);
1936
1937 fetch_texel(mach->Samplers[unit],
1938 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
1939 tgsi_sampler_lod_bias,
1940 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1941 break;
1942
1943 case TGSI_TEXTURE_1D_ARRAY:
1944 case TGSI_TEXTURE_2D:
1945 case TGSI_TEXTURE_RECT:
1946 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1947 case TGSI_TEXTURE_SHADOW2D:
1948 case TGSI_TEXTURE_SHADOWRECT:
1949
1950 FETCH(&r[0], 0, CHAN_X);
1951 FETCH(&r[1], 0, CHAN_Y);
1952 FETCH(&r[2], 0, CHAN_Z);
1953
1954 fetch_texel(mach->Samplers[unit],
1955 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
1956 tgsi_sampler_lod_bias,
1957 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1958 break;
1959
1960 case TGSI_TEXTURE_2D_ARRAY:
1961 case TGSI_TEXTURE_3D:
1962 case TGSI_TEXTURE_CUBE:
1963
1964 FETCH(&r[0], 0, CHAN_X);
1965 FETCH(&r[1], 0, CHAN_Y);
1966 FETCH(&r[2], 0, CHAN_Z);
1967
1968 fetch_texel(mach->Samplers[unit],
1969 &r[0], &r[1], &r[2], &ZeroVec,
1970 tgsi_sampler_lod_bias,
1971 &r[0], &r[1], &r[2], &r[3]);
1972 break;
1973
1974 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1975
1976 FETCH(&r[0], 0, CHAN_X);
1977 FETCH(&r[1], 0, CHAN_Y);
1978 FETCH(&r[2], 0, CHAN_Z);
1979 FETCH(&r[3], 0, CHAN_W);
1980
1981 fetch_texel(mach->Samplers[unit],
1982 &r[0], &r[1], &r[2], &r[3],
1983 tgsi_sampler_lod_bias,
1984 &r[0], &r[1], &r[2], &r[3]);
1985 break;
1986
1987 default:
1988 assert(0);
1989 }
1990
1991 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1992 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1993 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1994 }
1995 }
1996 }
1997
1998
1999 static void
2000 exec_txf(struct tgsi_exec_machine *mach,
2001 const struct tgsi_full_instruction *inst)
2002 {
2003 struct tgsi_sampler *sampler;
2004 const uint unit = inst->Src[2].Register.Index;
2005 union tgsi_exec_channel r[4];
2006 union tgsi_exec_channel offset[3];
2007 uint chan;
2008 float rgba[NUM_CHANNELS][QUAD_SIZE];
2009 int j;
2010 int8_t offsets[3];
2011
2012 if (inst->Texture.NumOffsets == 1) {
2013 union tgsi_exec_channel index;
2014 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2015 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2016 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2017 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2018 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2019 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2020 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2021 offsets[0] = offset[0].i[0];
2022 offsets[1] = offset[1].i[0];
2023 offsets[2] = offset[2].i[0];
2024 } else
2025 offsets[0] = offsets[1] = offsets[2] = 0;
2026
2027 IFETCH(&r[3], 0, CHAN_W);
2028
2029 switch(inst->Texture.Texture) {
2030 case TGSI_TEXTURE_3D:
2031 case TGSI_TEXTURE_2D_ARRAY:
2032 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2033 IFETCH(&r[2], 0, CHAN_Z);
2034 /* fallthrough */
2035 case TGSI_TEXTURE_2D:
2036 case TGSI_TEXTURE_RECT:
2037 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2038 case TGSI_TEXTURE_SHADOW2D:
2039 case TGSI_TEXTURE_SHADOWRECT:
2040 case TGSI_TEXTURE_1D_ARRAY:
2041 IFETCH(&r[1], 0, CHAN_Y);
2042 /* fallthrough */
2043 case TGSI_TEXTURE_1D:
2044 case TGSI_TEXTURE_SHADOW1D:
2045 IFETCH(&r[0], 0, CHAN_X);
2046 break;
2047 default:
2048 assert(0);
2049 break;
2050 }
2051
2052 sampler = mach->Samplers[unit];
2053 sampler->get_texel(sampler, r[0].i, r[1].i, r[2].i, r[3].i,
2054 offsets, rgba);
2055
2056 for (j = 0; j < QUAD_SIZE; j++) {
2057 r[0].f[j] = rgba[0][j];
2058 r[1].f[j] = rgba[1][j];
2059 r[2].f[j] = rgba[2][j];
2060 r[3].f[j] = rgba[3][j];
2061 }
2062
2063 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2064 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2065 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2066 }
2067 }
2068 }
2069
2070 static void
2071 exec_txq(struct tgsi_exec_machine *mach,
2072 const struct tgsi_full_instruction *inst)
2073 {
2074 struct tgsi_sampler *sampler;
2075 const uint unit = inst->Src[1].Register.Index;
2076 int result[4];
2077 union tgsi_exec_channel r[4], src;
2078 uint chan;
2079 int i,j;
2080
2081 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_INT);
2082 sampler = mach->Samplers[unit];
2083
2084 sampler->get_dims(sampler, src.i[0], result);
2085
2086 for (i = 0; i < QUAD_SIZE; i++) {
2087 for (j = 0; j < 4; j++) {
2088 r[j].i[i] = result[j];
2089 }
2090 }
2091
2092 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2093 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2094 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2095 TGSI_EXEC_DATA_INT);
2096 }
2097 }
2098 }
2099
2100 static void
2101 exec_sample(struct tgsi_exec_machine *mach,
2102 const struct tgsi_full_instruction *inst,
2103 uint modifier)
2104 {
2105 const uint resource_unit = inst->Src[1].Register.Index;
2106 const uint sampler_unit = inst->Src[2].Register.Index;
2107 union tgsi_exec_channel r[4];
2108 const union tgsi_exec_channel *lod = &ZeroVec;
2109 enum tgsi_sampler_control control;
2110 uint chan;
2111
2112 if (modifier != TEX_MODIFIER_NONE) {
2113 if (modifier == TEX_MODIFIER_LOD_BIAS)
2114 FETCH(&r[3], 3, CHAN_X);
2115 else /*TEX_MODIFIER_LOD*/
2116 FETCH(&r[3], 0, CHAN_W);
2117
2118 if (modifier != TEX_MODIFIER_PROJECTED) {
2119 lod = &r[3];
2120 }
2121 }
2122
2123 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2124 control = tgsi_sampler_lod_explicit;
2125 } else {
2126 control = tgsi_sampler_lod_bias;
2127 }
2128
2129 switch (mach->Resources[resource_unit].Resource) {
2130 case TGSI_TEXTURE_1D:
2131 case TGSI_TEXTURE_SHADOW1D:
2132 FETCH(&r[0], 0, CHAN_X);
2133
2134 if (modifier == TEX_MODIFIER_PROJECTED) {
2135 micro_div(&r[0], &r[0], &r[3]);
2136 }
2137
2138 fetch_texel(mach->Samplers[sampler_unit],
2139 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
2140 control,
2141 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2142 break;
2143
2144 case TGSI_TEXTURE_1D_ARRAY:
2145 case TGSI_TEXTURE_2D:
2146 case TGSI_TEXTURE_RECT:
2147 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2148 case TGSI_TEXTURE_SHADOW2D:
2149 case TGSI_TEXTURE_SHADOWRECT:
2150 FETCH(&r[0], 0, CHAN_X);
2151 FETCH(&r[1], 0, CHAN_Y);
2152 FETCH(&r[2], 0, CHAN_Z);
2153
2154 if (modifier == TEX_MODIFIER_PROJECTED) {
2155 micro_div(&r[0], &r[0], &r[3]);
2156 micro_div(&r[1], &r[1], &r[3]);
2157 micro_div(&r[2], &r[2], &r[3]);
2158 }
2159
2160 fetch_texel(mach->Samplers[sampler_unit],
2161 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
2162 control,
2163 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2164 break;
2165
2166 case TGSI_TEXTURE_2D_ARRAY:
2167 case TGSI_TEXTURE_3D:
2168 case TGSI_TEXTURE_CUBE:
2169 FETCH(&r[0], 0, CHAN_X);
2170 FETCH(&r[1], 0, CHAN_Y);
2171 FETCH(&r[2], 0, CHAN_Z);
2172
2173 if (modifier == TEX_MODIFIER_PROJECTED) {
2174 micro_div(&r[0], &r[0], &r[3]);
2175 micro_div(&r[1], &r[1], &r[3]);
2176 micro_div(&r[2], &r[2], &r[3]);
2177 }
2178
2179 fetch_texel(mach->Samplers[sampler_unit],
2180 &r[0], &r[1], &r[2], lod,
2181 control,
2182 &r[0], &r[1], &r[2], &r[3]);
2183 break;
2184
2185 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2186 case TGSI_TEXTURE_SHADOWCUBE:
2187 FETCH(&r[0], 0, CHAN_X);
2188 FETCH(&r[1], 0, CHAN_Y);
2189 FETCH(&r[2], 0, CHAN_Z);
2190 FETCH(&r[3], 0, CHAN_W);
2191
2192 assert(modifier != TEX_MODIFIER_PROJECTED);
2193
2194 fetch_texel(mach->Samplers[sampler_unit],
2195 &r[0], &r[1], &r[2], &r[3],
2196 control,
2197 &r[0], &r[1], &r[2], &r[3]);
2198 break;
2199
2200 default:
2201 assert(0);
2202 }
2203
2204 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2205 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2206 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2207 }
2208 }
2209 }
2210
2211 static void
2212 exec_sample_d(struct tgsi_exec_machine *mach,
2213 const struct tgsi_full_instruction *inst)
2214 {
2215 const uint resource_unit = inst->Src[1].Register.Index;
2216 const uint sampler_unit = inst->Src[2].Register.Index;
2217 union tgsi_exec_channel r[4];
2218 uint chan;
2219 /*
2220 * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet.
2221 */
2222
2223 switch (mach->Resources[resource_unit].Resource) {
2224 case TGSI_TEXTURE_1D:
2225 case TGSI_TEXTURE_SHADOW1D:
2226
2227 FETCH(&r[0], 0, CHAN_X);
2228
2229 fetch_texel(mach->Samplers[sampler_unit],
2230 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
2231 tgsi_sampler_lod_bias,
2232 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2233 break;
2234
2235 case TGSI_TEXTURE_2D:
2236 case TGSI_TEXTURE_RECT:
2237 case TGSI_TEXTURE_SHADOW2D:
2238 case TGSI_TEXTURE_SHADOWRECT:
2239
2240 FETCH(&r[0], 0, CHAN_X);
2241 FETCH(&r[1], 0, CHAN_Y);
2242 FETCH(&r[2], 0, CHAN_Z);
2243
2244 fetch_texel(mach->Samplers[sampler_unit],
2245 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
2246 tgsi_sampler_lod_bias,
2247 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2248 break;
2249
2250 case TGSI_TEXTURE_3D:
2251 case TGSI_TEXTURE_CUBE:
2252
2253 FETCH(&r[0], 0, CHAN_X);
2254 FETCH(&r[1], 0, CHAN_Y);
2255 FETCH(&r[2], 0, CHAN_Z);
2256
2257 fetch_texel(mach->Samplers[sampler_unit],
2258 &r[0], &r[1], &r[2], &ZeroVec,
2259 tgsi_sampler_lod_bias,
2260 &r[0], &r[1], &r[2], &r[3]);
2261 break;
2262
2263 default:
2264 assert(0);
2265 }
2266
2267 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2268 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2269 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2270 }
2271 }
2272 }
2273
2274
2275 /**
2276 * Evaluate a constant-valued coefficient at the position of the
2277 * current quad.
2278 */
2279 static void
2280 eval_constant_coef(
2281 struct tgsi_exec_machine *mach,
2282 unsigned attrib,
2283 unsigned chan )
2284 {
2285 unsigned i;
2286
2287 for( i = 0; i < QUAD_SIZE; i++ ) {
2288 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2289 }
2290 }
2291
2292 /**
2293 * Evaluate a linear-valued coefficient at the position of the
2294 * current quad.
2295 */
2296 static void
2297 eval_linear_coef(
2298 struct tgsi_exec_machine *mach,
2299 unsigned attrib,
2300 unsigned chan )
2301 {
2302 const float x = mach->QuadPos.xyzw[0].f[0];
2303 const float y = mach->QuadPos.xyzw[1].f[0];
2304 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2305 const float dady = mach->InterpCoefs[attrib].dady[chan];
2306 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2307 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2308 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2309 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2310 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2311 }
2312
2313 /**
2314 * Evaluate a perspective-valued coefficient at the position of the
2315 * current quad.
2316 */
2317 static void
2318 eval_perspective_coef(
2319 struct tgsi_exec_machine *mach,
2320 unsigned attrib,
2321 unsigned chan )
2322 {
2323 const float x = mach->QuadPos.xyzw[0].f[0];
2324 const float y = mach->QuadPos.xyzw[1].f[0];
2325 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2326 const float dady = mach->InterpCoefs[attrib].dady[chan];
2327 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2328 const float *w = mach->QuadPos.xyzw[3].f;
2329 /* divide by W here */
2330 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2331 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2332 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2333 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2334 }
2335
2336
2337 typedef void (* eval_coef_func)(
2338 struct tgsi_exec_machine *mach,
2339 unsigned attrib,
2340 unsigned chan );
2341
2342 static void
2343 exec_declaration(struct tgsi_exec_machine *mach,
2344 const struct tgsi_full_declaration *decl)
2345 {
2346 if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
2347 mach->Resources[decl->Range.First] = decl->Resource;
2348 return;
2349 }
2350
2351 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2352 if (decl->Declaration.File == TGSI_FILE_INPUT) {
2353 uint first, last, mask;
2354
2355 first = decl->Range.First;
2356 last = decl->Range.Last;
2357 mask = decl->Declaration.UsageMask;
2358
2359 /* XXX we could remove this special-case code since
2360 * mach->InterpCoefs[first].a0 should already have the
2361 * front/back-face value. But we should first update the
2362 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2363 * Then, we could remove the tgsi_exec_machine::Face field.
2364 */
2365 /* XXX make FACE a system value */
2366 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2367 uint i;
2368
2369 assert(decl->Semantic.Index == 0);
2370 assert(first == last);
2371
2372 for (i = 0; i < QUAD_SIZE; i++) {
2373 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2374 }
2375 } else {
2376 eval_coef_func eval;
2377 uint i, j;
2378
2379 switch (decl->Declaration.Interpolate) {
2380 case TGSI_INTERPOLATE_CONSTANT:
2381 eval = eval_constant_coef;
2382 break;
2383
2384 case TGSI_INTERPOLATE_LINEAR:
2385 eval = eval_linear_coef;
2386 break;
2387
2388 case TGSI_INTERPOLATE_PERSPECTIVE:
2389 eval = eval_perspective_coef;
2390 break;
2391
2392 case TGSI_INTERPOLATE_COLOR:
2393 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2394 break;
2395
2396 default:
2397 assert(0);
2398 return;
2399 }
2400
2401 for (j = 0; j < NUM_CHANNELS; j++) {
2402 if (mask & (1 << j)) {
2403 for (i = first; i <= last; i++) {
2404 eval(mach, i, j);
2405 }
2406 }
2407 }
2408 }
2409 }
2410 }
2411
2412 if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
2413 mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First;
2414 }
2415 }
2416
2417
2418 typedef void (* micro_op)(union tgsi_exec_channel *dst);
2419
2420 static void
2421 exec_vector(struct tgsi_exec_machine *mach,
2422 const struct tgsi_full_instruction *inst,
2423 micro_op op,
2424 enum tgsi_exec_datatype dst_datatype)
2425 {
2426 unsigned int chan;
2427
2428 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2429 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2430 union tgsi_exec_channel dst;
2431
2432 op(&dst);
2433 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2434 }
2435 }
2436 }
2437
2438 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2439 const union tgsi_exec_channel *src);
2440
2441 static void
2442 exec_scalar_unary(struct tgsi_exec_machine *mach,
2443 const struct tgsi_full_instruction *inst,
2444 micro_unary_op op,
2445 enum tgsi_exec_datatype dst_datatype,
2446 enum tgsi_exec_datatype src_datatype)
2447 {
2448 unsigned int chan;
2449 union tgsi_exec_channel src;
2450 union tgsi_exec_channel dst;
2451
2452 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype);
2453 op(&dst, &src);
2454 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2455 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2456 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2457 }
2458 }
2459 }
2460
2461 static void
2462 exec_vector_unary(struct tgsi_exec_machine *mach,
2463 const struct tgsi_full_instruction *inst,
2464 micro_unary_op op,
2465 enum tgsi_exec_datatype dst_datatype,
2466 enum tgsi_exec_datatype src_datatype)
2467 {
2468 unsigned int chan;
2469 struct tgsi_exec_vector dst;
2470
2471 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2472 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2473 union tgsi_exec_channel src;
2474
2475 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2476 op(&dst.xyzw[chan], &src);
2477 }
2478 }
2479 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2480 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2481 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2482 }
2483 }
2484 }
2485
2486 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2487 const union tgsi_exec_channel *src0,
2488 const union tgsi_exec_channel *src1);
2489
2490 static void
2491 exec_scalar_binary(struct tgsi_exec_machine *mach,
2492 const struct tgsi_full_instruction *inst,
2493 micro_binary_op op,
2494 enum tgsi_exec_datatype dst_datatype,
2495 enum tgsi_exec_datatype src_datatype)
2496 {
2497 unsigned int chan;
2498 union tgsi_exec_channel src[2];
2499 union tgsi_exec_channel dst;
2500
2501 fetch_source(mach, &src[0], &inst->Src[0], CHAN_X, src_datatype);
2502 fetch_source(mach, &src[1], &inst->Src[1], CHAN_Y, src_datatype);
2503 op(&dst, &src[0], &src[1]);
2504 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2505 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2506 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2507 }
2508 }
2509 }
2510
2511 static void
2512 exec_vector_binary(struct tgsi_exec_machine *mach,
2513 const struct tgsi_full_instruction *inst,
2514 micro_binary_op op,
2515 enum tgsi_exec_datatype dst_datatype,
2516 enum tgsi_exec_datatype src_datatype)
2517 {
2518 unsigned int chan;
2519 struct tgsi_exec_vector dst;
2520
2521 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2522 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2523 union tgsi_exec_channel src[2];
2524
2525 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2526 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2527 op(&dst.xyzw[chan], &src[0], &src[1]);
2528 }
2529 }
2530 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2531 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2532 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2533 }
2534 }
2535 }
2536
2537 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
2538 const union tgsi_exec_channel *src0,
2539 const union tgsi_exec_channel *src1,
2540 const union tgsi_exec_channel *src2);
2541
2542 static void
2543 exec_vector_trinary(struct tgsi_exec_machine *mach,
2544 const struct tgsi_full_instruction *inst,
2545 micro_trinary_op op,
2546 enum tgsi_exec_datatype dst_datatype,
2547 enum tgsi_exec_datatype src_datatype)
2548 {
2549 unsigned int chan;
2550 struct tgsi_exec_vector dst;
2551
2552 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2553 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2554 union tgsi_exec_channel src[3];
2555
2556 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2557 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2558 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
2559 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
2560 }
2561 }
2562 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2563 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2564 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2565 }
2566 }
2567 }
2568
2569 static void
2570 exec_dp3(struct tgsi_exec_machine *mach,
2571 const struct tgsi_full_instruction *inst)
2572 {
2573 unsigned int chan;
2574 union tgsi_exec_channel arg[3];
2575
2576 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2577 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2578 micro_mul(&arg[2], &arg[0], &arg[1]);
2579
2580 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
2581 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2582 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2583 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2584 }
2585
2586 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2587 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2588 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2589 }
2590 }
2591 }
2592
2593 static void
2594 exec_dp4(struct tgsi_exec_machine *mach,
2595 const struct tgsi_full_instruction *inst)
2596 {
2597 unsigned int chan;
2598 union tgsi_exec_channel arg[3];
2599
2600 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2601 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2602 micro_mul(&arg[2], &arg[0], &arg[1]);
2603
2604 for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
2605 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2606 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2607 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2608 }
2609
2610 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2611 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2612 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2613 }
2614 }
2615 }
2616
2617 static void
2618 exec_dp2a(struct tgsi_exec_machine *mach,
2619 const struct tgsi_full_instruction *inst)
2620 {
2621 unsigned int chan;
2622 union tgsi_exec_channel arg[3];
2623
2624 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2625 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2626 micro_mul(&arg[2], &arg[0], &arg[1]);
2627
2628 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2629 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2630 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2631
2632 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2633 micro_add(&arg[0], &arg[0], &arg[1]);
2634
2635 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2636 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2637 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2638 }
2639 }
2640 }
2641
2642 static void
2643 exec_dph(struct tgsi_exec_machine *mach,
2644 const struct tgsi_full_instruction *inst)
2645 {
2646 unsigned int chan;
2647 union tgsi_exec_channel arg[3];
2648
2649 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2650 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2651 micro_mul(&arg[2], &arg[0], &arg[1]);
2652
2653 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2654 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2655 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2656
2657 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2658 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2659 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2660
2661 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2662 micro_add(&arg[0], &arg[0], &arg[1]);
2663
2664 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2665 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2666 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2667 }
2668 }
2669 }
2670
2671 static void
2672 exec_dp2(struct tgsi_exec_machine *mach,
2673 const struct tgsi_full_instruction *inst)
2674 {
2675 unsigned int chan;
2676 union tgsi_exec_channel arg[3];
2677
2678 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2679 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2680 micro_mul(&arg[2], &arg[0], &arg[1]);
2681
2682 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2683 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2684 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2685
2686 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2687 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2688 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2689 }
2690 }
2691 }
2692
2693 static void
2694 exec_nrm4(struct tgsi_exec_machine *mach,
2695 const struct tgsi_full_instruction *inst)
2696 {
2697 unsigned int chan;
2698 union tgsi_exec_channel arg[4];
2699 union tgsi_exec_channel scale;
2700
2701 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2702 micro_mul(&scale, &arg[0], &arg[0]);
2703
2704 for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
2705 union tgsi_exec_channel product;
2706
2707 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2708 micro_mul(&product, &arg[chan], &arg[chan]);
2709 micro_add(&scale, &scale, &product);
2710 }
2711
2712 micro_rsq(&scale, &scale);
2713
2714 for (chan = CHAN_X; chan <= CHAN_W; chan++) {
2715 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2716 micro_mul(&arg[chan], &arg[chan], &scale);
2717 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2718 }
2719 }
2720 }
2721
2722 static void
2723 exec_nrm3(struct tgsi_exec_machine *mach,
2724 const struct tgsi_full_instruction *inst)
2725 {
2726 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2727 unsigned int chan;
2728 union tgsi_exec_channel arg[3];
2729 union tgsi_exec_channel scale;
2730
2731 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2732 micro_mul(&scale, &arg[0], &arg[0]);
2733
2734 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
2735 union tgsi_exec_channel product;
2736
2737 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2738 micro_mul(&product, &arg[chan], &arg[chan]);
2739 micro_add(&scale, &scale, &product);
2740 }
2741
2742 micro_rsq(&scale, &scale);
2743
2744 for (chan = CHAN_X; chan <= CHAN_Z; chan++) {
2745 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2746 micro_mul(&arg[chan], &arg[chan], &scale);
2747 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2748 }
2749 }
2750 }
2751
2752 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2753 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2754 }
2755 }
2756
2757 static void
2758 exec_scs(struct tgsi_exec_machine *mach,
2759 const struct tgsi_full_instruction *inst)
2760 {
2761 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
2762 union tgsi_exec_channel arg;
2763 union tgsi_exec_channel result;
2764
2765 fetch_source(mach, &arg, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2766
2767 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2768 micro_cos(&result, &arg);
2769 store_dest(mach, &result, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2770 }
2771 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2772 micro_sin(&result, &arg);
2773 store_dest(mach, &result, &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2774 }
2775 }
2776 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2777 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2778 }
2779 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2780 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2781 }
2782 }
2783
2784 static void
2785 exec_x2d(struct tgsi_exec_machine *mach,
2786 const struct tgsi_full_instruction *inst)
2787 {
2788 union tgsi_exec_channel r[4];
2789 union tgsi_exec_channel d[2];
2790
2791 fetch_source(mach, &r[0], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2792 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2793 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) {
2794 fetch_source(mach, &r[2], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2795 micro_mul(&r[2], &r[2], &r[0]);
2796 fetch_source(mach, &r[3], &inst->Src[2], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2797 micro_mul(&r[3], &r[3], &r[1]);
2798 micro_add(&r[2], &r[2], &r[3]);
2799 fetch_source(mach, &r[3], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2800 micro_add(&d[0], &r[2], &r[3]);
2801 }
2802 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) {
2803 fetch_source(mach, &r[2], &inst->Src[2], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2804 micro_mul(&r[2], &r[2], &r[0]);
2805 fetch_source(mach, &r[3], &inst->Src[2], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2806 micro_mul(&r[3], &r[3], &r[1]);
2807 micro_add(&r[2], &r[2], &r[3]);
2808 fetch_source(mach, &r[3], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2809 micro_add(&d[1], &r[2], &r[3]);
2810 }
2811 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2812 store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2813 }
2814 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2815 store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2816 }
2817 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2818 store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2819 }
2820 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2821 store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2822 }
2823 }
2824
2825 static void
2826 exec_rfl(struct tgsi_exec_machine *mach,
2827 const struct tgsi_full_instruction *inst)
2828 {
2829 union tgsi_exec_channel r[9];
2830
2831 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2832 /* r0 = dp3(src0, src0) */
2833 fetch_source(mach, &r[2], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2834 micro_mul(&r[0], &r[2], &r[2]);
2835 fetch_source(mach, &r[4], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2836 micro_mul(&r[8], &r[4], &r[4]);
2837 micro_add(&r[0], &r[0], &r[8]);
2838 fetch_source(mach, &r[6], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2839 micro_mul(&r[8], &r[6], &r[6]);
2840 micro_add(&r[0], &r[0], &r[8]);
2841
2842 /* r1 = dp3(src0, src1) */
2843 fetch_source(mach, &r[3], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2844 micro_mul(&r[1], &r[2], &r[3]);
2845 fetch_source(mach, &r[5], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2846 micro_mul(&r[8], &r[4], &r[5]);
2847 micro_add(&r[1], &r[1], &r[8]);
2848 fetch_source(mach, &r[7], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2849 micro_mul(&r[8], &r[6], &r[7]);
2850 micro_add(&r[1], &r[1], &r[8]);
2851
2852 /* r1 = 2 * r1 / r0 */
2853 micro_add(&r[1], &r[1], &r[1]);
2854 micro_div(&r[1], &r[1], &r[0]);
2855
2856 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2857 micro_mul(&r[2], &r[2], &r[1]);
2858 micro_sub(&r[2], &r[2], &r[3]);
2859 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2860 }
2861 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2862 micro_mul(&r[4], &r[4], &r[1]);
2863 micro_sub(&r[4], &r[4], &r[5]);
2864 store_dest(mach, &r[4], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2865 }
2866 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2867 micro_mul(&r[6], &r[6], &r[1]);
2868 micro_sub(&r[6], &r[6], &r[7]);
2869 store_dest(mach, &r[6], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2870 }
2871 }
2872 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2873 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2874 }
2875 }
2876
2877 static void
2878 exec_xpd(struct tgsi_exec_machine *mach,
2879 const struct tgsi_full_instruction *inst)
2880 {
2881 union tgsi_exec_channel r[6];
2882 union tgsi_exec_channel d[3];
2883
2884 fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2885 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2886
2887 micro_mul(&r[2], &r[0], &r[1]);
2888
2889 fetch_source(mach, &r[3], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2890 fetch_source(mach, &r[4], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2891
2892 micro_mul(&r[5], &r[3], &r[4] );
2893 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2894
2895 fetch_source(mach, &r[2], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2896
2897 micro_mul(&r[3], &r[3], &r[2]);
2898
2899 fetch_source(mach, &r[5], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2900
2901 micro_mul(&r[1], &r[1], &r[5]);
2902 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2903
2904 micro_mul(&r[5], &r[5], &r[4]);
2905 micro_mul(&r[0], &r[0], &r[2]);
2906 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2907
2908 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2909 store_dest(mach, &d[CHAN_X], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2910 }
2911 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2912 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2913 }
2914 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2915 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2916 }
2917 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2918 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2919 }
2920 }
2921
2922 static void
2923 exec_dst(struct tgsi_exec_machine *mach,
2924 const struct tgsi_full_instruction *inst)
2925 {
2926 union tgsi_exec_channel r[2];
2927 union tgsi_exec_channel d[4];
2928
2929 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2930 fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2931 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2932 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2933 }
2934 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2935 fetch_source(mach, &d[CHAN_Z], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2936 }
2937 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2938 fetch_source(mach, &d[CHAN_W], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2939 }
2940
2941 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2942 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2943 }
2944 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2945 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2946 }
2947 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2948 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2949 }
2950 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2951 store_dest(mach, &d[CHAN_W], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2952 }
2953 }
2954
2955 static void
2956 exec_log(struct tgsi_exec_machine *mach,
2957 const struct tgsi_full_instruction *inst)
2958 {
2959 union tgsi_exec_channel r[3];
2960
2961 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2962 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
2963 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
2964 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
2965 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2966 store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2967 }
2968 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2969 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
2970 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
2971 store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2972 }
2973 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2974 store_dest(mach, &r[1], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2975 }
2976 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2977 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2978 }
2979 }
2980
2981 static void
2982 exec_exp(struct tgsi_exec_machine *mach,
2983 const struct tgsi_full_instruction *inst)
2984 {
2985 union tgsi_exec_channel r[3];
2986
2987 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2988 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
2989 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2990 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
2991 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2992 }
2993 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2994 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
2995 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2996 }
2997 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2998 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
2999 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3000 }
3001 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3002 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
3003 }
3004 }
3005
3006 static void
3007 exec_lit(struct tgsi_exec_machine *mach,
3008 const struct tgsi_full_instruction *inst)
3009 {
3010 union tgsi_exec_channel r[3];
3011 union tgsi_exec_channel d[3];
3012
3013 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3014 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
3015 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3016 fetch_source(mach, &r[1], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3017 micro_max(&r[1], &r[1], &ZeroVec);
3018
3019 fetch_source(mach, &r[2], &inst->Src[0], CHAN_W, TGSI_EXEC_DATA_FLOAT);
3020 micro_min(&r[2], &r[2], &P128Vec);
3021 micro_max(&r[2], &r[2], &M128Vec);
3022 micro_pow(&r[1], &r[1], &r[2]);
3023 micro_lt(&d[CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3024 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3025 }
3026 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3027 micro_max(&d[CHAN_Y], &r[0], &ZeroVec);
3028 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3029 }
3030 }
3031 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3032 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
3033 }
3034
3035 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3036 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
3037 }
3038 }
3039
3040 static void
3041 exec_break(struct tgsi_exec_machine *mach)
3042 {
3043 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3044 /* turn off loop channels for each enabled exec channel */
3045 mach->LoopMask &= ~mach->ExecMask;
3046 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3047 UPDATE_EXEC_MASK(mach);
3048 } else {
3049 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3050
3051 mach->Switch.mask = 0x0;
3052
3053 UPDATE_EXEC_MASK(mach);
3054 }
3055 }
3056
3057 static void
3058 exec_switch(struct tgsi_exec_machine *mach,
3059 const struct tgsi_full_instruction *inst)
3060 {
3061 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3062 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3063
3064 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3065 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
3066 mach->Switch.mask = 0x0;
3067 mach->Switch.defaultMask = 0x0;
3068
3069 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3070 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3071
3072 UPDATE_EXEC_MASK(mach);
3073 }
3074
3075 static void
3076 exec_case(struct tgsi_exec_machine *mach,
3077 const struct tgsi_full_instruction *inst)
3078 {
3079 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3080 union tgsi_exec_channel src;
3081 uint mask = 0;
3082
3083 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
3084
3085 if (mach->Switch.selector.u[0] == src.u[0]) {
3086 mask |= 0x1;
3087 }
3088 if (mach->Switch.selector.u[1] == src.u[1]) {
3089 mask |= 0x2;
3090 }
3091 if (mach->Switch.selector.u[2] == src.u[2]) {
3092 mask |= 0x4;
3093 }
3094 if (mach->Switch.selector.u[3] == src.u[3]) {
3095 mask |= 0x8;
3096 }
3097
3098 mach->Switch.defaultMask |= mask;
3099
3100 mach->Switch.mask |= mask & prevMask;
3101
3102 UPDATE_EXEC_MASK(mach);
3103 }
3104
3105 static void
3106 exec_default(struct tgsi_exec_machine *mach)
3107 {
3108 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3109
3110 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3111
3112 UPDATE_EXEC_MASK(mach);
3113 }
3114
3115 static void
3116 exec_endswitch(struct tgsi_exec_machine *mach)
3117 {
3118 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3119 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3120
3121 UPDATE_EXEC_MASK(mach);
3122 }
3123
3124 static void
3125 micro_i2f(union tgsi_exec_channel *dst,
3126 const union tgsi_exec_channel *src)
3127 {
3128 dst->f[0] = (float)src->i[0];
3129 dst->f[1] = (float)src->i[1];
3130 dst->f[2] = (float)src->i[2];
3131 dst->f[3] = (float)src->i[3];
3132 }
3133
3134 static void
3135 micro_not(union tgsi_exec_channel *dst,
3136 const union tgsi_exec_channel *src)
3137 {
3138 dst->u[0] = ~src->u[0];
3139 dst->u[1] = ~src->u[1];
3140 dst->u[2] = ~src->u[2];
3141 dst->u[3] = ~src->u[3];
3142 }
3143
3144 static void
3145 micro_shl(union tgsi_exec_channel *dst,
3146 const union tgsi_exec_channel *src0,
3147 const union tgsi_exec_channel *src1)
3148 {
3149 dst->u[0] = src0->u[0] << src1->u[0];
3150 dst->u[1] = src0->u[1] << src1->u[1];
3151 dst->u[2] = src0->u[2] << src1->u[2];
3152 dst->u[3] = src0->u[3] << src1->u[3];
3153 }
3154
3155 static void
3156 micro_and(union tgsi_exec_channel *dst,
3157 const union tgsi_exec_channel *src0,
3158 const union tgsi_exec_channel *src1)
3159 {
3160 dst->u[0] = src0->u[0] & src1->u[0];
3161 dst->u[1] = src0->u[1] & src1->u[1];
3162 dst->u[2] = src0->u[2] & src1->u[2];
3163 dst->u[3] = src0->u[3] & src1->u[3];
3164 }
3165
3166 static void
3167 micro_or(union tgsi_exec_channel *dst,
3168 const union tgsi_exec_channel *src0,
3169 const union tgsi_exec_channel *src1)
3170 {
3171 dst->u[0] = src0->u[0] | src1->u[0];
3172 dst->u[1] = src0->u[1] | src1->u[1];
3173 dst->u[2] = src0->u[2] | src1->u[2];
3174 dst->u[3] = src0->u[3] | src1->u[3];
3175 }
3176
3177 static void
3178 micro_xor(union tgsi_exec_channel *dst,
3179 const union tgsi_exec_channel *src0,
3180 const union tgsi_exec_channel *src1)
3181 {
3182 dst->u[0] = src0->u[0] ^ src1->u[0];
3183 dst->u[1] = src0->u[1] ^ src1->u[1];
3184 dst->u[2] = src0->u[2] ^ src1->u[2];
3185 dst->u[3] = src0->u[3] ^ src1->u[3];
3186 }
3187
3188 static void
3189 micro_mod(union tgsi_exec_channel *dst,
3190 const union tgsi_exec_channel *src0,
3191 const union tgsi_exec_channel *src1)
3192 {
3193 dst->i[0] = src0->i[0] % src1->i[0];
3194 dst->i[1] = src0->i[1] % src1->i[1];
3195 dst->i[2] = src0->i[2] % src1->i[2];
3196 dst->i[3] = src0->i[3] % src1->i[3];
3197 }
3198
3199 static void
3200 micro_f2i(union tgsi_exec_channel *dst,
3201 const union tgsi_exec_channel *src)
3202 {
3203 dst->i[0] = (int)src->f[0];
3204 dst->i[1] = (int)src->f[1];
3205 dst->i[2] = (int)src->f[2];
3206 dst->i[3] = (int)src->f[3];
3207 }
3208
3209 static void
3210 micro_idiv(union tgsi_exec_channel *dst,
3211 const union tgsi_exec_channel *src0,
3212 const union tgsi_exec_channel *src1)
3213 {
3214 dst->i[0] = src0->i[0] / src1->i[0];
3215 dst->i[1] = src0->i[1] / src1->i[1];
3216 dst->i[2] = src0->i[2] / src1->i[2];
3217 dst->i[3] = src0->i[3] / src1->i[3];
3218 }
3219
3220 static void
3221 micro_imax(union tgsi_exec_channel *dst,
3222 const union tgsi_exec_channel *src0,
3223 const union tgsi_exec_channel *src1)
3224 {
3225 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
3226 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
3227 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
3228 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
3229 }
3230
3231 static void
3232 micro_imin(union tgsi_exec_channel *dst,
3233 const union tgsi_exec_channel *src0,
3234 const union tgsi_exec_channel *src1)
3235 {
3236 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
3237 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
3238 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
3239 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
3240 }
3241
3242 static void
3243 micro_isge(union tgsi_exec_channel *dst,
3244 const union tgsi_exec_channel *src0,
3245 const union tgsi_exec_channel *src1)
3246 {
3247 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
3248 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
3249 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
3250 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
3251 }
3252
3253 static void
3254 micro_ishr(union tgsi_exec_channel *dst,
3255 const union tgsi_exec_channel *src0,
3256 const union tgsi_exec_channel *src1)
3257 {
3258 dst->i[0] = src0->i[0] >> src1->i[0];
3259 dst->i[1] = src0->i[1] >> src1->i[1];
3260 dst->i[2] = src0->i[2] >> src1->i[2];
3261 dst->i[3] = src0->i[3] >> src1->i[3];
3262 }
3263
3264 static void
3265 micro_islt(union tgsi_exec_channel *dst,
3266 const union tgsi_exec_channel *src0,
3267 const union tgsi_exec_channel *src1)
3268 {
3269 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
3270 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
3271 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
3272 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
3273 }
3274
3275 static void
3276 micro_f2u(union tgsi_exec_channel *dst,
3277 const union tgsi_exec_channel *src)
3278 {
3279 dst->u[0] = (uint)src->f[0];
3280 dst->u[1] = (uint)src->f[1];
3281 dst->u[2] = (uint)src->f[2];
3282 dst->u[3] = (uint)src->f[3];
3283 }
3284
3285 static void
3286 micro_u2f(union tgsi_exec_channel *dst,
3287 const union tgsi_exec_channel *src)
3288 {
3289 dst->f[0] = (float)src->u[0];
3290 dst->f[1] = (float)src->u[1];
3291 dst->f[2] = (float)src->u[2];
3292 dst->f[3] = (float)src->u[3];
3293 }
3294
3295 static void
3296 micro_uadd(union tgsi_exec_channel *dst,
3297 const union tgsi_exec_channel *src0,
3298 const union tgsi_exec_channel *src1)
3299 {
3300 dst->u[0] = src0->u[0] + src1->u[0];
3301 dst->u[1] = src0->u[1] + src1->u[1];
3302 dst->u[2] = src0->u[2] + src1->u[2];
3303 dst->u[3] = src0->u[3] + src1->u[3];
3304 }
3305
3306 static void
3307 micro_udiv(union tgsi_exec_channel *dst,
3308 const union tgsi_exec_channel *src0,
3309 const union tgsi_exec_channel *src1)
3310 {
3311 dst->u[0] = src0->u[0] / src1->u[0];
3312 dst->u[1] = src0->u[1] / src1->u[1];
3313 dst->u[2] = src0->u[2] / src1->u[2];
3314 dst->u[3] = src0->u[3] / src1->u[3];
3315 }
3316
3317 static void
3318 micro_umad(union tgsi_exec_channel *dst,
3319 const union tgsi_exec_channel *src0,
3320 const union tgsi_exec_channel *src1,
3321 const union tgsi_exec_channel *src2)
3322 {
3323 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
3324 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
3325 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
3326 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
3327 }
3328
3329 static void
3330 micro_umax(union tgsi_exec_channel *dst,
3331 const union tgsi_exec_channel *src0,
3332 const union tgsi_exec_channel *src1)
3333 {
3334 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
3335 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
3336 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
3337 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
3338 }
3339
3340 static void
3341 micro_umin(union tgsi_exec_channel *dst,
3342 const union tgsi_exec_channel *src0,
3343 const union tgsi_exec_channel *src1)
3344 {
3345 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
3346 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
3347 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
3348 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
3349 }
3350
3351 static void
3352 micro_umod(union tgsi_exec_channel *dst,
3353 const union tgsi_exec_channel *src0,
3354 const union tgsi_exec_channel *src1)
3355 {
3356 dst->u[0] = src0->u[0] % src1->u[0];
3357 dst->u[1] = src0->u[1] % src1->u[1];
3358 dst->u[2] = src0->u[2] % src1->u[2];
3359 dst->u[3] = src0->u[3] % src1->u[3];
3360 }
3361
3362 static void
3363 micro_umul(union tgsi_exec_channel *dst,
3364 const union tgsi_exec_channel *src0,
3365 const union tgsi_exec_channel *src1)
3366 {
3367 dst->u[0] = src0->u[0] * src1->u[0];
3368 dst->u[1] = src0->u[1] * src1->u[1];
3369 dst->u[2] = src0->u[2] * src1->u[2];
3370 dst->u[3] = src0->u[3] * src1->u[3];
3371 }
3372
3373 static void
3374 micro_useq(union tgsi_exec_channel *dst,
3375 const union tgsi_exec_channel *src0,
3376 const union tgsi_exec_channel *src1)
3377 {
3378 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
3379 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
3380 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
3381 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
3382 }
3383
3384 static void
3385 micro_usge(union tgsi_exec_channel *dst,
3386 const union tgsi_exec_channel *src0,
3387 const union tgsi_exec_channel *src1)
3388 {
3389 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
3390 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
3391 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
3392 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
3393 }
3394
3395 static void
3396 micro_ushr(union tgsi_exec_channel *dst,
3397 const union tgsi_exec_channel *src0,
3398 const union tgsi_exec_channel *src1)
3399 {
3400 dst->u[0] = src0->u[0] >> src1->u[0];
3401 dst->u[1] = src0->u[1] >> src1->u[1];
3402 dst->u[2] = src0->u[2] >> src1->u[2];
3403 dst->u[3] = src0->u[3] >> src1->u[3];
3404 }
3405
3406 static void
3407 micro_uslt(union tgsi_exec_channel *dst,
3408 const union tgsi_exec_channel *src0,
3409 const union tgsi_exec_channel *src1)
3410 {
3411 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
3412 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
3413 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
3414 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
3415 }
3416
3417 static void
3418 micro_usne(union tgsi_exec_channel *dst,
3419 const union tgsi_exec_channel *src0,
3420 const union tgsi_exec_channel *src1)
3421 {
3422 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
3423 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
3424 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
3425 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
3426 }
3427
3428 static void
3429 micro_uarl(union tgsi_exec_channel *dst,
3430 const union tgsi_exec_channel *src)
3431 {
3432 dst->i[0] = src->u[0];
3433 dst->i[1] = src->u[1];
3434 dst->i[2] = src->u[2];
3435 dst->i[3] = src->u[3];
3436 }
3437
3438 static void
3439 micro_ucmp(union tgsi_exec_channel *dst,
3440 const union tgsi_exec_channel *src0,
3441 const union tgsi_exec_channel *src1,
3442 const union tgsi_exec_channel *src2)
3443 {
3444 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
3445 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
3446 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
3447 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
3448 }
3449
3450 static void
3451 exec_instruction(
3452 struct tgsi_exec_machine *mach,
3453 const struct tgsi_full_instruction *inst,
3454 int *pc )
3455 {
3456 union tgsi_exec_channel r[10];
3457
3458 (*pc)++;
3459
3460 switch (inst->Instruction.Opcode) {
3461 case TGSI_OPCODE_ARL:
3462 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3463 break;
3464
3465 case TGSI_OPCODE_MOV:
3466 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3467 break;
3468
3469 case TGSI_OPCODE_LIT:
3470 exec_lit(mach, inst);
3471 break;
3472
3473 case TGSI_OPCODE_RCP:
3474 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3475 break;
3476
3477 case TGSI_OPCODE_RSQ:
3478 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3479 break;
3480
3481 case TGSI_OPCODE_EXP:
3482 exec_exp(mach, inst);
3483 break;
3484
3485 case TGSI_OPCODE_LOG:
3486 exec_log(mach, inst);
3487 break;
3488
3489 case TGSI_OPCODE_MUL:
3490 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3491 break;
3492
3493 case TGSI_OPCODE_ADD:
3494 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3495 break;
3496
3497 case TGSI_OPCODE_DP3:
3498 exec_dp3(mach, inst);
3499 break;
3500
3501 case TGSI_OPCODE_DP4:
3502 exec_dp4(mach, inst);
3503 break;
3504
3505 case TGSI_OPCODE_DST:
3506 exec_dst(mach, inst);
3507 break;
3508
3509 case TGSI_OPCODE_MIN:
3510 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3511 break;
3512
3513 case TGSI_OPCODE_MAX:
3514 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3515 break;
3516
3517 case TGSI_OPCODE_SLT:
3518 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3519 break;
3520
3521 case TGSI_OPCODE_SGE:
3522 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3523 break;
3524
3525 case TGSI_OPCODE_MAD:
3526 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3527 break;
3528
3529 case TGSI_OPCODE_SUB:
3530 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3531 break;
3532
3533 case TGSI_OPCODE_LRP:
3534 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3535 break;
3536
3537 case TGSI_OPCODE_CND:
3538 exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3539 break;
3540
3541 case TGSI_OPCODE_DP2A:
3542 exec_dp2a(mach, inst);
3543 break;
3544
3545 case TGSI_OPCODE_FRC:
3546 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3547 break;
3548
3549 case TGSI_OPCODE_CLAMP:
3550 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3551 break;
3552
3553 case TGSI_OPCODE_FLR:
3554 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3555 break;
3556
3557 case TGSI_OPCODE_ROUND:
3558 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3559 break;
3560
3561 case TGSI_OPCODE_EX2:
3562 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3563 break;
3564
3565 case TGSI_OPCODE_LG2:
3566 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3567 break;
3568
3569 case TGSI_OPCODE_POW:
3570 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3571 break;
3572
3573 case TGSI_OPCODE_XPD:
3574 exec_xpd(mach, inst);
3575 break;
3576
3577 case TGSI_OPCODE_ABS:
3578 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3579 break;
3580
3581 case TGSI_OPCODE_RCC:
3582 exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3583 break;
3584
3585 case TGSI_OPCODE_DPH:
3586 exec_dph(mach, inst);
3587 break;
3588
3589 case TGSI_OPCODE_COS:
3590 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3591 break;
3592
3593 case TGSI_OPCODE_DDX:
3594 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3595 break;
3596
3597 case TGSI_OPCODE_DDY:
3598 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3599 break;
3600
3601 case TGSI_OPCODE_KILP:
3602 exec_kilp (mach, inst);
3603 break;
3604
3605 case TGSI_OPCODE_KIL:
3606 exec_kil (mach, inst);
3607 break;
3608
3609 case TGSI_OPCODE_PK2H:
3610 assert (0);
3611 break;
3612
3613 case TGSI_OPCODE_PK2US:
3614 assert (0);
3615 break;
3616
3617 case TGSI_OPCODE_PK4B:
3618 assert (0);
3619 break;
3620
3621 case TGSI_OPCODE_PK4UB:
3622 assert (0);
3623 break;
3624
3625 case TGSI_OPCODE_RFL:
3626 exec_rfl(mach, inst);
3627 break;
3628
3629 case TGSI_OPCODE_SEQ:
3630 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3631 break;
3632
3633 case TGSI_OPCODE_SFL:
3634 exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT);
3635 break;
3636
3637 case TGSI_OPCODE_SGT:
3638 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3639 break;
3640
3641 case TGSI_OPCODE_SIN:
3642 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3643 break;
3644
3645 case TGSI_OPCODE_SLE:
3646 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3647 break;
3648
3649 case TGSI_OPCODE_SNE:
3650 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3651 break;
3652
3653 case TGSI_OPCODE_STR:
3654 exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT);
3655 break;
3656
3657 case TGSI_OPCODE_TEX:
3658 /* simple texture lookup */
3659 /* src[0] = texcoord */
3660 /* src[1] = sampler unit */
3661 exec_tex(mach, inst, TEX_MODIFIER_NONE);
3662 break;
3663
3664 case TGSI_OPCODE_TXB:
3665 /* Texture lookup with lod bias */
3666 /* src[0] = texcoord (src[0].w = LOD bias) */
3667 /* src[1] = sampler unit */
3668 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS);
3669 break;
3670
3671 case TGSI_OPCODE_TXD:
3672 /* Texture lookup with explict partial derivatives */
3673 /* src[0] = texcoord */
3674 /* src[1] = d[strq]/dx */
3675 /* src[2] = d[strq]/dy */
3676 /* src[3] = sampler unit */
3677 exec_txd(mach, inst);
3678 break;
3679
3680 case TGSI_OPCODE_TXL:
3681 /* Texture lookup with explit LOD */
3682 /* src[0] = texcoord (src[0].w = LOD) */
3683 /* src[1] = sampler unit */
3684 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
3685 break;
3686
3687 case TGSI_OPCODE_TXP:
3688 /* Texture lookup with projection */
3689 /* src[0] = texcoord (src[0].w = projection) */
3690 /* src[1] = sampler unit */
3691 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED);
3692 break;
3693
3694 case TGSI_OPCODE_UP2H:
3695 assert (0);
3696 break;
3697
3698 case TGSI_OPCODE_UP2US:
3699 assert (0);
3700 break;
3701
3702 case TGSI_OPCODE_UP4B:
3703 assert (0);
3704 break;
3705
3706 case TGSI_OPCODE_UP4UB:
3707 assert (0);
3708 break;
3709
3710 case TGSI_OPCODE_X2D:
3711 exec_x2d(mach, inst);
3712 break;
3713
3714 case TGSI_OPCODE_ARA:
3715 assert (0);
3716 break;
3717
3718 case TGSI_OPCODE_ARR:
3719 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3720 break;
3721
3722 case TGSI_OPCODE_BRA:
3723 assert (0);
3724 break;
3725
3726 case TGSI_OPCODE_CAL:
3727 /* skip the call if no execution channels are enabled */
3728 if (mach->ExecMask) {
3729 /* do the call */
3730
3731 /* First, record the depths of the execution stacks.
3732 * This is important for deeply nested/looped return statements.
3733 * We have to unwind the stacks by the correct amount. For a
3734 * real code generator, we could determine the number of entries
3735 * to pop off each stack with simple static analysis and avoid
3736 * implementing this data structure at run time.
3737 */
3738 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
3739 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
3740 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
3741 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
3742 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
3743 /* note that PC was already incremented above */
3744 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
3745
3746 mach->CallStackTop++;
3747
3748 /* Second, push the Cond, Loop, Cont, Func stacks */
3749 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3750 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3751 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3752 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3753 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3754 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
3755
3756 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3757 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3758 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3759 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3760 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3761 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
3762
3763 /* Finally, jump to the subroutine */
3764 *pc = inst->Label.Label;
3765 }
3766 break;
3767
3768 case TGSI_OPCODE_RET:
3769 mach->FuncMask &= ~mach->ExecMask;
3770 UPDATE_EXEC_MASK(mach);
3771
3772 if (mach->FuncMask == 0x0) {
3773 /* really return now (otherwise, keep executing */
3774
3775 if (mach->CallStackTop == 0) {
3776 /* returning from main() */
3777 mach->CondStackTop = 0;
3778 mach->LoopStackTop = 0;
3779 *pc = -1;
3780 return;
3781 }
3782
3783 assert(mach->CallStackTop > 0);
3784 mach->CallStackTop--;
3785
3786 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3787 mach->CondMask = mach->CondStack[mach->CondStackTop];
3788
3789 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3790 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3791
3792 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3793 mach->ContMask = mach->ContStack[mach->ContStackTop];
3794
3795 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3796 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3797
3798 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3799 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3800
3801 assert(mach->FuncStackTop > 0);
3802 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3803
3804 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3805
3806 UPDATE_EXEC_MASK(mach);
3807 }
3808 break;
3809
3810 case TGSI_OPCODE_SSG:
3811 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3812 break;
3813
3814 case TGSI_OPCODE_CMP:
3815 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3816 break;
3817
3818 case TGSI_OPCODE_SCS:
3819 exec_scs(mach, inst);
3820 break;
3821
3822 case TGSI_OPCODE_NRM:
3823 exec_nrm3(mach, inst);
3824 break;
3825
3826 case TGSI_OPCODE_NRM4:
3827 exec_nrm4(mach, inst);
3828 break;
3829
3830 case TGSI_OPCODE_DIV:
3831 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3832 break;
3833
3834 case TGSI_OPCODE_DP2:
3835 exec_dp2(mach, inst);
3836 break;
3837
3838 case TGSI_OPCODE_IF:
3839 /* push CondMask */
3840 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3841 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3842 FETCH( &r[0], 0, CHAN_X );
3843 /* update CondMask */
3844 if( ! r[0].u[0] ) {
3845 mach->CondMask &= ~0x1;
3846 }
3847 if( ! r[0].u[1] ) {
3848 mach->CondMask &= ~0x2;
3849 }
3850 if( ! r[0].u[2] ) {
3851 mach->CondMask &= ~0x4;
3852 }
3853 if( ! r[0].u[3] ) {
3854 mach->CondMask &= ~0x8;
3855 }
3856 UPDATE_EXEC_MASK(mach);
3857 /* Todo: If CondMask==0, jump to ELSE */
3858 break;
3859
3860 case TGSI_OPCODE_ELSE:
3861 /* invert CondMask wrt previous mask */
3862 {
3863 uint prevMask;
3864 assert(mach->CondStackTop > 0);
3865 prevMask = mach->CondStack[mach->CondStackTop - 1];
3866 mach->CondMask = ~mach->CondMask & prevMask;
3867 UPDATE_EXEC_MASK(mach);
3868 /* Todo: If CondMask==0, jump to ENDIF */
3869 }
3870 break;
3871
3872 case TGSI_OPCODE_ENDIF:
3873 /* pop CondMask */
3874 assert(mach->CondStackTop > 0);
3875 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3876 UPDATE_EXEC_MASK(mach);
3877 break;
3878
3879 case TGSI_OPCODE_END:
3880 /* make sure we end primitives which haven't
3881 * been explicitly emitted */
3882 conditional_emit_primitive(mach);
3883 /* halt execution */
3884 *pc = -1;
3885 break;
3886
3887 case TGSI_OPCODE_PUSHA:
3888 assert (0);
3889 break;
3890
3891 case TGSI_OPCODE_POPA:
3892 assert (0);
3893 break;
3894
3895 case TGSI_OPCODE_CEIL:
3896 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3897 break;
3898
3899 case TGSI_OPCODE_I2F:
3900 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3901 break;
3902
3903 case TGSI_OPCODE_NOT:
3904 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3905 break;
3906
3907 case TGSI_OPCODE_TRUNC:
3908 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3909 break;
3910
3911 case TGSI_OPCODE_SHL:
3912 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3913 break;
3914
3915 case TGSI_OPCODE_AND:
3916 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3917 break;
3918
3919 case TGSI_OPCODE_OR:
3920 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3921 break;
3922
3923 case TGSI_OPCODE_MOD:
3924 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3925 break;
3926
3927 case TGSI_OPCODE_XOR:
3928 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3929 break;
3930
3931 case TGSI_OPCODE_SAD:
3932 assert (0);
3933 break;
3934
3935 case TGSI_OPCODE_TXF:
3936 exec_txf(mach, inst);
3937 break;
3938
3939 case TGSI_OPCODE_TXQ:
3940 exec_txq(mach, inst);
3941 break;
3942
3943 case TGSI_OPCODE_EMIT:
3944 emit_vertex(mach);
3945 break;
3946
3947 case TGSI_OPCODE_ENDPRIM:
3948 emit_primitive(mach);
3949 break;
3950
3951 case TGSI_OPCODE_BGNLOOP:
3952 /* push LoopMask and ContMasks */
3953 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3954 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3955 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3956 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3957
3958 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3959 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3960 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3961 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3962 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3963 break;
3964
3965 case TGSI_OPCODE_ENDLOOP:
3966 /* Restore ContMask, but don't pop */
3967 assert(mach->ContStackTop > 0);
3968 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3969 UPDATE_EXEC_MASK(mach);
3970 if (mach->ExecMask) {
3971 /* repeat loop: jump to instruction just past BGNLOOP */
3972 assert(mach->LoopLabelStackTop > 0);
3973 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3974 }
3975 else {
3976 /* exit loop: pop LoopMask */
3977 assert(mach->LoopStackTop > 0);
3978 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3979 /* pop ContMask */
3980 assert(mach->ContStackTop > 0);
3981 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3982 assert(mach->LoopLabelStackTop > 0);
3983 --mach->LoopLabelStackTop;
3984
3985 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3986 }
3987 UPDATE_EXEC_MASK(mach);
3988 break;
3989
3990 case TGSI_OPCODE_BRK:
3991 exec_break(mach);
3992 break;
3993
3994 case TGSI_OPCODE_CONT:
3995 /* turn off cont channels for each enabled exec channel */
3996 mach->ContMask &= ~mach->ExecMask;
3997 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3998 UPDATE_EXEC_MASK(mach);
3999 break;
4000
4001 case TGSI_OPCODE_BGNSUB:
4002 /* no-op */
4003 break;
4004
4005 case TGSI_OPCODE_ENDSUB:
4006 /*
4007 * XXX: This really should be a no-op. We should never reach this opcode.
4008 */
4009
4010 assert(mach->CallStackTop > 0);
4011 mach->CallStackTop--;
4012
4013 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
4014 mach->CondMask = mach->CondStack[mach->CondStackTop];
4015
4016 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
4017 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
4018
4019 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
4020 mach->ContMask = mach->ContStack[mach->ContStackTop];
4021
4022 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
4023 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
4024
4025 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
4026 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
4027
4028 assert(mach->FuncStackTop > 0);
4029 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
4030
4031 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
4032
4033 UPDATE_EXEC_MASK(mach);
4034 break;
4035
4036 case TGSI_OPCODE_NOP:
4037 break;
4038
4039 case TGSI_OPCODE_BREAKC:
4040 FETCH(&r[0], 0, CHAN_X);
4041 /* update CondMask */
4042 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
4043 mach->LoopMask &= ~0x1;
4044 }
4045 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
4046 mach->LoopMask &= ~0x2;
4047 }
4048 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
4049 mach->LoopMask &= ~0x4;
4050 }
4051 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
4052 mach->LoopMask &= ~0x8;
4053 }
4054 /* Todo: if mach->LoopMask == 0, jump to end of loop */
4055 UPDATE_EXEC_MASK(mach);
4056 break;
4057
4058 case TGSI_OPCODE_F2I:
4059 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
4060 break;
4061
4062 case TGSI_OPCODE_IDIV:
4063 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4064 break;
4065
4066 case TGSI_OPCODE_IMAX:
4067 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4068 break;
4069
4070 case TGSI_OPCODE_IMIN:
4071 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4072 break;
4073
4074 case TGSI_OPCODE_INEG:
4075 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4076 break;
4077
4078 case TGSI_OPCODE_ISGE:
4079 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4080 break;
4081
4082 case TGSI_OPCODE_ISHR:
4083 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4084 break;
4085
4086 case TGSI_OPCODE_ISLT:
4087 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4088 break;
4089
4090 case TGSI_OPCODE_F2U:
4091 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
4092 break;
4093
4094 case TGSI_OPCODE_U2F:
4095 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
4096 break;
4097
4098 case TGSI_OPCODE_UADD:
4099 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4100 break;
4101
4102 case TGSI_OPCODE_UDIV:
4103 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4104 break;
4105
4106 case TGSI_OPCODE_UMAD:
4107 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4108 break;
4109
4110 case TGSI_OPCODE_UMAX:
4111 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4112 break;
4113
4114 case TGSI_OPCODE_UMIN:
4115 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4116 break;
4117
4118 case TGSI_OPCODE_UMOD:
4119 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4120 break;
4121
4122 case TGSI_OPCODE_UMUL:
4123 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4124 break;
4125
4126 case TGSI_OPCODE_USEQ:
4127 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4128 break;
4129
4130 case TGSI_OPCODE_USGE:
4131 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4132 break;
4133
4134 case TGSI_OPCODE_USHR:
4135 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4136 break;
4137
4138 case TGSI_OPCODE_USLT:
4139 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4140 break;
4141
4142 case TGSI_OPCODE_USNE:
4143 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4144 break;
4145
4146 case TGSI_OPCODE_SWITCH:
4147 exec_switch(mach, inst);
4148 break;
4149
4150 case TGSI_OPCODE_CASE:
4151 exec_case(mach, inst);
4152 break;
4153
4154 case TGSI_OPCODE_DEFAULT:
4155 exec_default(mach);
4156 break;
4157
4158 case TGSI_OPCODE_ENDSWITCH:
4159 exec_endswitch(mach);
4160 break;
4161
4162 case TGSI_OPCODE_LOAD:
4163 assert(0);
4164 break;
4165
4166 case TGSI_OPCODE_LOAD_MS:
4167 assert(0);
4168 break;
4169
4170 case TGSI_OPCODE_SAMPLE:
4171 exec_sample(mach, inst, TEX_MODIFIER_NONE);
4172 break;
4173
4174 case TGSI_OPCODE_SAMPLE_B:
4175 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS);
4176 break;
4177
4178 case TGSI_OPCODE_SAMPLE_C:
4179 exec_sample(mach, inst, TEX_MODIFIER_NONE);
4180 break;
4181
4182 case TGSI_OPCODE_SAMPLE_C_LZ:
4183 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS);
4184 break;
4185
4186 case TGSI_OPCODE_SAMPLE_D:
4187 exec_sample_d(mach, inst);
4188 break;
4189
4190 case TGSI_OPCODE_SAMPLE_L:
4191 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
4192 break;
4193
4194 case TGSI_OPCODE_GATHER4:
4195 assert(0);
4196 break;
4197
4198 case TGSI_OPCODE_RESINFO:
4199 assert(0);
4200 break;
4201
4202 case TGSI_OPCODE_SAMPLE_POS:
4203 assert(0);
4204 break;
4205
4206 case TGSI_OPCODE_SAMPLE_INFO:
4207 assert(0);
4208 break;
4209
4210 case TGSI_OPCODE_UARL:
4211 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
4212 break;
4213
4214 case TGSI_OPCODE_UCMP:
4215 exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4216 break;
4217
4218 case TGSI_OPCODE_IABS:
4219 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4220 break;
4221
4222 case TGSI_OPCODE_ISSG:
4223 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4224 break;
4225
4226 default:
4227 assert( 0 );
4228 }
4229 }
4230
4231
4232 #define DEBUG_EXECUTION 0
4233
4234
4235 /**
4236 * Run TGSI interpreter.
4237 * \return bitmask of "alive" quad components
4238 */
4239 uint
4240 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
4241 {
4242 uint i;
4243 int pc = 0;
4244
4245 mach->CondMask = 0xf;
4246 mach->LoopMask = 0xf;
4247 mach->ContMask = 0xf;
4248 mach->FuncMask = 0xf;
4249 mach->ExecMask = 0xf;
4250
4251 mach->Switch.mask = 0xf;
4252
4253 assert(mach->CondStackTop == 0);
4254 assert(mach->LoopStackTop == 0);
4255 assert(mach->ContStackTop == 0);
4256 assert(mach->SwitchStackTop == 0);
4257 assert(mach->BreakStackTop == 0);
4258 assert(mach->CallStackTop == 0);
4259
4260 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
4261 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
4262
4263 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
4264 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
4265 mach->Primitives[0] = 0;
4266 }
4267
4268 /* execute declarations (interpolants) */
4269 for (i = 0; i < mach->NumDeclarations; i++) {
4270 exec_declaration( mach, mach->Declarations+i );
4271 }
4272
4273 {
4274 #if DEBUG_EXECUTION
4275 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
4276 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
4277 uint inst = 1;
4278
4279 memcpy(temps, mach->Temps, sizeof(temps));
4280 memcpy(outputs, mach->Outputs, sizeof(outputs));
4281 #endif
4282
4283 /* execute instructions, until pc is set to -1 */
4284 while (pc != -1) {
4285
4286 #if DEBUG_EXECUTION
4287 uint i;
4288
4289 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
4290 #endif
4291
4292 assert(pc < (int) mach->NumInstructions);
4293 exec_instruction(mach, mach->Instructions + pc, &pc);
4294
4295 #if DEBUG_EXECUTION
4296 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
4297 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
4298 uint j;
4299
4300 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
4301 debug_printf("TEMP[%2u] = ", i);
4302 for (j = 0; j < 4; j++) {
4303 if (j > 0) {
4304 debug_printf(" ");
4305 }
4306 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4307 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
4308 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
4309 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
4310 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
4311 }
4312 }
4313 }
4314 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
4315 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
4316 uint j;
4317
4318 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
4319 debug_printf("OUT[%2u] = ", i);
4320 for (j = 0; j < 4; j++) {
4321 if (j > 0) {
4322 debug_printf(" ");
4323 }
4324 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4325 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
4326 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
4327 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
4328 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
4329 }
4330 }
4331 }
4332 #endif
4333 }
4334 }
4335
4336 #if 0
4337 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
4338 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
4339 /*
4340 * Scale back depth component.
4341 */
4342 for (i = 0; i < 4; i++)
4343 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
4344 }
4345 #endif
4346
4347 /* Strictly speaking, these assertions aren't really needed but they
4348 * can potentially catch some bugs in the control flow code.
4349 */
4350 assert(mach->CondStackTop == 0);
4351 assert(mach->LoopStackTop == 0);
4352 assert(mach->ContStackTop == 0);
4353 assert(mach->SwitchStackTop == 0);
4354 assert(mach->BreakStackTop == 0);
4355 assert(mach->CallStackTop == 0);
4356
4357 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4358 }