[AMSTREAM] We don't need to define WIDL_C_INLINE_WRAPPERS here anymore.
[reactos.git] / dll / directx / wine / d3dx9_36 / preshader.c
1 /*
2 * Copyright 2016 Paul Gofman
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19 #include "d3dx9_36_private.h"
20
21 #include <assert.h>
22
23 /* ReactOS FIXME: Insect */
24 #define fmin min
25 #define fmax max
26
27 enum pres_ops
28 {
29 PRESHADER_OP_NOP,
30 PRESHADER_OP_MOV,
31 PRESHADER_OP_NEG,
32 PRESHADER_OP_RCP,
33 PRESHADER_OP_FRC,
34 PRESHADER_OP_EXP,
35 PRESHADER_OP_LOG,
36 PRESHADER_OP_RSQ,
37 PRESHADER_OP_SIN,
38 PRESHADER_OP_COS,
39 PRESHADER_OP_ASIN,
40 PRESHADER_OP_ACOS,
41 PRESHADER_OP_ATAN,
42 PRESHADER_OP_MIN,
43 PRESHADER_OP_MAX,
44 PRESHADER_OP_LT,
45 PRESHADER_OP_GE,
46 PRESHADER_OP_ADD,
47 PRESHADER_OP_MUL,
48 PRESHADER_OP_ATAN2,
49 PRESHADER_OP_DIV,
50 PRESHADER_OP_CMP,
51 PRESHADER_OP_DOT,
52 PRESHADER_OP_DOTSWIZ6,
53 PRESHADER_OP_DOTSWIZ8,
54 };
55
56 typedef double (*pres_op_func)(double *args, int n);
57
58 static double to_signed_nan(double v)
59 {
60 static const union
61 {
62 ULONG64 ulong64_value;
63 double double_value;
64 }
65 signed_nan =
66 {
67 0xfff8000000000000
68 };
69
70 return isnan(v) ? signed_nan.double_value : v;
71 }
72
73 static double pres_mov(double *args, int n) {return args[0];}
74 static double pres_add(double *args, int n) {return args[0] + args[1];}
75 static double pres_mul(double *args, int n) {return args[0] * args[1];}
76 static double pres_dot(double *args, int n)
77 {
78 int i;
79 double sum;
80
81 sum = 0.0;
82 for (i = 0; i < n; ++i)
83 sum += args[i] * args[i + n];
84 return sum;
85 }
86
87 static double pres_dotswiz6(double *args, int n)
88 {
89 return pres_dot(args, 3);
90 }
91
92 static double pres_dotswiz8(double *args, int n)
93 {
94 return pres_dot(args, 4);
95 }
96
97 static double pres_neg(double *args, int n) {return -args[0];}
98 static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
99 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;}
100 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;}
101 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
102 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
103 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
104 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];}
105 static double pres_sin(double *args, int n) {return sin(args[0]);}
106 static double pres_cos(double *args, int n) {return cos(args[0]);}
107 static double pres_rsq(double *args, int n)
108 {
109 double v;
110
111 v = fabs(args[0]);
112 if (v == 0.0)
113 return INFINITY;
114 else
115 return 1.0 / sqrt(v);
116 }
117 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
118 static double pres_log(double *args, int n)
119 {
120 double v;
121
122 v = fabs(args[0]);
123 if (v == 0.0)
124 return 0.0;
125 else
126 #ifdef HAVE_LOG2
127 return log2(v);
128 #else
129 return log(v) / log(2);
130 #endif
131 }
132 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));}
133 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));}
134 static double pres_atan(double *args, int n) {return atan(args[0]);}
135 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);}
136
137 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever
138 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */
139 static double pres_div(double *args, int n) {return 0.0;}
140
141 #define PRES_OPCODE_MASK 0x7ff00000
142 #define PRES_OPCODE_SHIFT 20
143 #define PRES_SCALAR_FLAG 0x80000000
144 #define PRES_NCOMP_MASK 0x0000ffff
145
146 #define FOURCC_PRES 0x53455250
147 #define FOURCC_CLIT 0x54494c43
148 #define FOURCC_FXLC 0x434c5846
149 #define FOURCC_PRSI 0x49535250
150 #define PRES_SIGN 0x46580000
151
152 struct op_info
153 {
154 unsigned int opcode;
155 char mnem[16];
156 unsigned int input_count;
157 BOOL func_all_comps;
158 pres_op_func func;
159 };
160
161 static const struct op_info pres_op_info[] =
162 {
163 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */
164 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
165 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
166 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
167 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
168 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
169 {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */
170 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
171 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
172 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
173 {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */
174 {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */
175 {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */
176 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
177 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
178 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */
179 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */
180 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
181 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
182 {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */
183 {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */
184 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
185 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
186 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
187 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
188 };
189
190 enum pres_value_type
191 {
192 PRES_VT_FLOAT,
193 PRES_VT_DOUBLE,
194 PRES_VT_INT,
195 PRES_VT_BOOL,
196 PRES_VT_COUNT
197 };
198
199 static const struct
200 {
201 unsigned int component_size;
202 enum pres_value_type type;
203 }
204 table_info[] =
205 {
206 {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
207 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
208 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
209 {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */
210 {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */
211 /* TODO: use double precision for 64 bit */
212 {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */
213 };
214
215 static const char *table_symbol[] =
216 {
217 "imm", "c", "oc", "ob", "oi", "r", "(null)",
218 };
219
220 static const enum pres_reg_tables pres_regset2table[] =
221 {
222 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */
223 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */
224 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */
225 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */
226 };
227
228 static const enum pres_reg_tables shad_regset2table[] =
229 {
230 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */
231 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */
232 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */
233 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */
234 };
235
236 struct d3dx_pres_reg
237 {
238 enum pres_reg_tables table;
239 /* offset is component index, not register index, e. g.
240 offset for component c3.y is 13 (3 * 4 + 1) */
241 unsigned int offset;
242 };
243
244 struct d3dx_pres_operand
245 {
246 struct d3dx_pres_reg reg;
247 struct d3dx_pres_reg index_reg;
248 };
249
250 #define MAX_INPUTS_COUNT 8
251
252 struct d3dx_pres_ins
253 {
254 enum pres_ops op;
255 /* first input argument is scalar,
256 scalar component is propagated */
257 BOOL scalar_op;
258 unsigned int component_count;
259 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
260 struct d3dx_pres_operand output;
261 };
262
263 struct const_upload_info
264 {
265 BOOL transpose;
266 unsigned int major, minor;
267 unsigned int major_stride;
268 unsigned int major_count;
269 unsigned int count;
270 unsigned int minor_remainder;
271 };
272
273 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type)
274 {
275 switch (type)
276 {
277 case D3DXPT_FLOAT:
278 return PRES_VT_FLOAT;
279 case D3DXPT_INT:
280 return PRES_VT_INT;
281 case D3DXPT_BOOL:
282 return PRES_VT_BOOL;
283 default:
284 FIXME("Unsupported type %u.\n", type);
285 return PRES_VT_COUNT;
286 }
287 }
288
289 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
290 {
291 return table == PRES_REGTAB_OBCONST ? offset : offset >> 2;
292 }
293
294 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx)
295 {
296 return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2;
297 }
298
299 static unsigned int get_reg_components(unsigned int table)
300 {
301 return get_offset_reg(table, 1);
302 }
303
304 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
305
306 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
307 {
308 unsigned int size;
309
310 size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size;
311 if (size)
312 {
313 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
314 if (!rs->tables[table])
315 return E_OUTOFMEMORY;
316 }
317 return D3D_OK;
318 }
319
320 static void regstore_free_tables(struct d3dx_regstore *rs)
321 {
322 unsigned int i;
323
324 for (i = 0; i < PRES_REGTAB_COUNT; ++i)
325 {
326 HeapFree(GetProcessHeap(), 0, rs->tables[i]);
327 }
328 }
329
330 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data,
331 unsigned int start_offset, unsigned int count)
332 {
333 BYTE *dst = rs->tables[table];
334 const BYTE *src = data;
335 unsigned int size;
336
337 dst += start_offset * table_info[table].component_size;
338 size = count * table_info[table].component_size;
339 assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst));
340 memcpy(dst, src, size);
341 }
342
343 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
344 {
345 BYTE *p;
346
347 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
348 switch (table_info[table].type)
349 {
350 case PRES_VT_FLOAT:
351 return *(float *)p;
352 case PRES_VT_DOUBLE:
353 return *(double *)p;
354 default:
355 FIXME("Unexpected preshader input from table %u.\n", table);
356 return NAN;
357 }
358 }
359
360 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
361 {
362 BYTE *p;
363
364 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
365 switch (table_info[table].type)
366 {
367 case PRES_VT_FLOAT : *(float *)p = v; break;
368 case PRES_VT_DOUBLE: *(double *)p = v; break;
369 case PRES_VT_INT : *(int *)p = lrint(v); break;
370 case PRES_VT_BOOL : *(BOOL *)p = !!v; break;
371 default:
372 FIXME("Bad type %u.\n", table_info[table].type);
373 break;
374 }
375 }
376
377 static void dump_bytecode(void *data, unsigned int size)
378 {
379 unsigned int *bytecode = (unsigned int *)data;
380 unsigned int i, j, n;
381
382 size /= sizeof(*bytecode);
383 i = 0;
384 while (i < size)
385 {
386 n = min(size - i, 8);
387 for (j = 0; j < n; ++j)
388 TRACE("0x%08x,", bytecode[i + j]);
389 i += n;
390 TRACE("\n");
391 }
392 }
393
394 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
395 unsigned int fourcc, unsigned int *size)
396 {
397 /* Provide at least one value in comment section on non-NULL return. */
398 while (count > 2 && (*ptr & 0xffff) == 0xfffe)
399 {
400 unsigned int section_size;
401
402 section_size = (*ptr >> 16);
403 if (!section_size || section_size + 1 > count)
404 break;
405 if (*(ptr + 1) == fourcc)
406 {
407 *size = section_size;
408 return ptr + 2;
409 }
410 count -= section_size + 1;
411 ptr += section_size + 1;
412 }
413 return NULL;
414 }
415
416 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg)
417 {
418 static const enum pres_reg_tables reg_table[8] =
419 {
420 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
421 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
422 };
423
424 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
425 {
426 FIXME("Unsupported register table %#x.\n", *ptr);
427 return NULL;
428 }
429
430 reg->table = reg_table[*ptr++];
431 reg->offset = *ptr++;
432 return ptr;
433 }
434
435 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
436 {
437 if (count < 3 || (*ptr && count < 5))
438 {
439 WARN("Byte code buffer ends unexpectedly, count %u.\n", count);
440 return NULL;
441 }
442
443 if (*ptr)
444 {
445 if (*ptr != 1)
446 {
447 FIXME("Unknown relative addressing flag, word %#x.\n", *ptr);
448 return NULL;
449 }
450 ptr = parse_pres_reg(ptr + 1, &opr->index_reg);
451 if (!ptr)
452 return NULL;
453 }
454 else
455 {
456 opr->index_reg.table = PRES_REGTAB_COUNT;
457 ++ptr;
458 }
459
460 ptr = parse_pres_reg(ptr, &opr->reg);
461
462 if (opr->reg.table == PRES_REGTAB_OBCONST)
463 opr->reg.offset /= 4;
464 return ptr;
465 }
466
467 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
468 {
469 unsigned int ins_code, ins_raw;
470 unsigned int input_count;
471 unsigned int i;
472
473 if (count < 2)
474 {
475 WARN("Byte code buffer ends unexpectedly.\n");
476 return NULL;
477 }
478
479 ins_raw = *ptr++;
480 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
481 ins->component_count = ins_raw & PRES_NCOMP_MASK;
482 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
483
484 if (ins->component_count < 1 || ins->component_count > 4)
485 {
486 FIXME("Unsupported number of components %u.\n", ins->component_count);
487 return NULL;
488 }
489 input_count = *ptr++;
490 count -= 2;
491 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
492 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
493 break;
494 if (i == ARRAY_SIZE(pres_op_info))
495 {
496 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
497 return NULL;
498 }
499 ins->op = i;
500 if (input_count > ARRAY_SIZE(ins->inputs))
501 {
502 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
503 pres_op_info[i].mnem);
504 return NULL;
505 }
506 for (i = 0; i < input_count; ++i)
507 {
508 unsigned int *p;
509
510 p = parse_pres_arg(ptr, count, &ins->inputs[i]);
511 if (!p)
512 return NULL;
513 count -= p - ptr;
514 ptr = p;
515 }
516 ptr = parse_pres_arg(ptr, count, &ins->output);
517 if (ins->output.index_reg.table != PRES_REGTAB_COUNT)
518 {
519 FIXME("Relative addressing in output register not supported.\n");
520 return NULL;
521 }
522 if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset
523 + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1))
524 != get_reg_offset(ins->output.reg.table, ins->output.reg.offset))
525 {
526 FIXME("Instructions outputting multiple registers are not supported.\n");
527 return NULL;
528 }
529 return ptr;
530 }
531
532 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc,
533 WORD *constantinfo_reserved)
534 {
535 const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc);
536
537 if (!constant)
538 {
539 FIXME("Could not get constant desc.\n");
540 return D3DERR_INVALIDCALL;
541 }
542 *desc = constant->desc;
543 if (constantinfo_reserved)
544 *constantinfo_reserved = constant->constantinfo_reserved;
545 return D3D_OK;
546 }
547
548 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set,
549 struct const_upload_info *info)
550 {
551 struct d3dx_parameter *param = const_set->param;
552 unsigned int table = const_set->table;
553
554 info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
555 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
556 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
557 {
558 info->major = param->columns;
559 info->minor = param->rows;
560 }
561 else
562 {
563 info->major = param->rows;
564 info->minor = param->columns;
565 }
566
567 if (get_reg_components(table) == 1)
568 {
569 unsigned int const_length = get_offset_reg(table, const_set->register_count);
570
571 info->major_stride = info->minor;
572 info->major_count = const_length / info->major_stride;
573 info->minor_remainder = const_length % info->major_stride;
574 }
575 else
576 {
577 info->major_stride = get_reg_components(table);
578 info->major_count = const_set->register_count;
579 info->minor_remainder = 0;
580 }
581 info->count = info->major_count * info->minor + info->minor_remainder;
582 }
583
584 #define INITIAL_CONST_SET_SIZE 16
585
586 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
587 {
588 if (const_tab->const_set_count >= const_tab->const_set_size)
589 {
590 unsigned int new_size;
591 struct d3dx_const_param_eval_output *new_alloc;
592
593 if (!const_tab->const_set_size)
594 {
595 new_size = INITIAL_CONST_SET_SIZE;
596 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
597 if (!new_alloc)
598 {
599 ERR("Out of memory.\n");
600 return E_OUTOFMEMORY;
601 }
602 }
603 else
604 {
605 new_size = const_tab->const_set_size * 2;
606 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
607 sizeof(*const_tab->const_set) * new_size);
608 if (!new_alloc)
609 {
610 ERR("Out of memory.\n");
611 return E_OUTOFMEMORY;
612 }
613 }
614 const_tab->const_set = new_alloc;
615 const_tab->const_set_size = new_size;
616 }
617 const_tab->const_set[const_tab->const_set_count++] = *set;
618 return D3D_OK;
619 }
620
621 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab,
622 struct d3dx_preshader *pres)
623 {
624 unsigned int i;
625 struct d3dx_const_param_eval_output const_set = {NULL};
626
627 for (i = 0; i < pres->ins_count; ++i)
628 {
629 const struct d3dx_pres_ins *ins = &pres->ins[i];
630 const struct d3dx_pres_reg *reg = &ins->output.reg;
631
632 if (reg->table == PRES_REGTAB_TEMP)
633 continue;
634
635 const_set.register_index = get_reg_offset(reg->table, reg->offset);
636 const_set.register_count = 1;
637 const_set.table = reg->table;
638 const_set.constant_class = D3DXPC_FORCE_DWORD;
639 const_set.element_count = 1;
640 append_const_set(const_tab, &const_set);
641 }
642 }
643
644 static int compare_const_set(const void *a, const void *b)
645 {
646 const struct d3dx_const_param_eval_output *r1 = a;
647 const struct d3dx_const_param_eval_output *r2 = b;
648
649 if (r1->table != r2->table)
650 return r1->table - r2->table;
651 return r1->register_index - r2->register_index;
652 }
653
654 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab,
655 struct d3dx_parameter *param, unsigned int index)
656 {
657 unsigned int i, start_index = index;
658 DWORD *current_data;
659 enum pres_reg_tables current_table;
660 unsigned int current_start_offset, element_count;
661 struct d3dx_const_param_eval_output *first_const;
662
663 if (!const_tab->const_set_count)
664 return D3D_OK;
665
666 while (index < const_tab->const_set_count - 1)
667 {
668 first_const = &const_tab->const_set[index];
669 current_data = first_const->param->data;
670 current_table = first_const->table;
671 current_start_offset = get_offset_reg(current_table, first_const->register_index);
672 element_count = 0;
673 for (i = index; i < const_tab->const_set_count; ++i)
674 {
675 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i];
676 unsigned int count = get_offset_reg(const_set->table,
677 const_set->register_count * const_set->element_count);
678 unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index);
679
680 if (!(const_set->table == current_table && current_start_offset == start_offset
681 && const_set->direct_copy == first_const->direct_copy
682 && current_data == const_set->param->data
683 && (const_set->direct_copy || (first_const->param->type == const_set->param->type
684 && first_const->param->class == const_set->param->class
685 && first_const->param->columns == const_set->param->columns
686 && first_const->param->rows == const_set->param->rows
687 && first_const->register_count == const_set->register_count
688 && (i == const_tab->const_set_count - 1
689 || first_const->param->element_count == const_set->param->element_count)))))
690 break;
691
692 current_start_offset += count;
693 current_data += const_set->direct_copy ? count : const_set->param->rows
694 * const_set->param->columns * const_set->element_count;
695 element_count += const_set->element_count;
696 }
697
698 if (i > index + 1)
699 {
700 TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index,
701 debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy);
702
703 first_const->element_count = element_count;
704 if (first_const->direct_copy)
705 {
706 first_const->element_count = 1;
707 if (index == start_index
708 && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT))
709 {
710 if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
711 return D3DERR_INVALIDCALL;
712 first_const->param = param;
713 }
714 first_const->register_count = get_reg_offset(current_table, current_start_offset)
715 - first_const->register_index;
716 }
717 memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i],
718 sizeof(*const_tab->const_set) * (const_tab->const_set_count - i));
719 const_tab->const_set_count -= i - index - 1;
720 }
721 else
722 {
723 TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n",
724 const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy);
725 }
726 index = i;
727 }
728 return D3D_OK;
729 }
730
731 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
732 D3DXHANDLE hc, struct d3dx_parameter *param)
733 {
734 D3DXCONSTANT_DESC desc;
735 unsigned int const_count, param_count, i;
736 BOOL get_element;
737 struct d3dx_const_param_eval_output const_set;
738 struct const_upload_info info;
739 enum pres_value_type table_type;
740 HRESULT hr;
741
742 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL)))
743 return D3DERR_INVALIDCALL;
744
745 if (param->element_count)
746 {
747 param_count = param->element_count;
748 const_count = desc.Elements;
749 get_element = TRUE;
750 }
751 else
752 {
753 if (desc.Elements > 1)
754 {
755 FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
756 return D3DERR_INVALIDCALL;
757 }
758 param_count = param->member_count;
759 const_count = desc.StructMembers;
760 get_element = FALSE;
761 }
762 if (const_count != param_count)
763 {
764 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
765 param_count, const_count);
766 return D3DERR_INVALIDCALL;
767 }
768 if (const_count)
769 {
770 HRESULT ret = D3D_OK;
771 D3DXHANDLE hc_element;
772 unsigned int index = const_tab->const_set_count;
773
774 for (i = 0; i < const_count; ++i)
775 {
776 if (get_element)
777 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
778 else
779 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
780 if (!hc_element)
781 {
782 FIXME("Could not get constant.\n");
783 hr = D3DERR_INVALIDCALL;
784 }
785 else
786 {
787 hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
788 }
789 if (FAILED(hr))
790 ret = hr;
791 }
792 if (FAILED(ret))
793 return ret;
794 return merge_const_set_entries(const_tab, param, index);
795 }
796
797 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
798 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
799 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
800 debugstr_a(param->name), param->rows, param->columns, param->class,
801 param->flags, param->bytes);
802
803 const_set.element_count = 1;
804 const_set.param = param;
805 const_set.constant_class = desc.Class;
806 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
807 {
808 FIXME("Unknown register set %u.\n", desc.RegisterSet);
809 return D3DERR_INVALIDCALL;
810 }
811 const_set.register_index = desc.RegisterIndex;
812 const_set.table = const_tab->regset2table[desc.RegisterSet];
813 if (const_set.table >= PRES_REGTAB_COUNT)
814 {
815 ERR("Unexpected register set %u.\n", desc.RegisterSet);
816 return D3DERR_INVALIDCALL;
817 }
818 assert(table_info[const_set.table].component_size == sizeof(unsigned int));
819 assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int));
820 const_set.register_count = desc.RegisterCount;
821 table_type = table_info[const_set.table].type;
822 get_const_upload_info(&const_set, &info);
823 if (!info.count)
824 {
825 TRACE("%s has zero count, skipping.\n", debugstr_a(param->name));
826 return D3D_OK;
827 }
828
829 if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
830 return D3DERR_INVALIDCALL;
831
832 const_set.direct_copy = table_type_from_param_type(param->type) == table_type
833 && !info.transpose && info.minor == info.major_stride
834 && info.count == get_offset_reg(const_set.table, const_set.register_count)
835 && info.count * sizeof(unsigned int) <= param->bytes;
836 if (info.minor_remainder && !const_set.direct_copy && !info.transpose)
837 FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n",
838 debugstr_a(param->name));
839
840 if (info.major_count > info.major
841 || (info.major_count == info.major && info.minor_remainder))
842 {
843 WARN("Constant dimensions exceed parameter size.\n");
844 return D3DERR_INVALIDCALL;
845 }
846
847 if (FAILED(hr = append_const_set(const_tab, &const_set)))
848 return hr;
849
850 return D3D_OK;
851 }
852
853 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out,
854 struct d3dx9_base_effect *base, const char **skip_constants,
855 unsigned int skip_constants_count, struct d3dx_preshader *pres)
856 {
857 ID3DXConstantTable *ctab;
858 D3DXCONSTANT_DESC *cdesc;
859 struct d3dx_parameter **inputs_param;
860 D3DXCONSTANTTABLE_DESC desc;
861 HRESULT hr;
862 D3DXHANDLE hc;
863 unsigned int i, j;
864
865 hr = D3DXGetShaderConstantTable(byte_code, &ctab);
866 if (FAILED(hr) || !ctab)
867 {
868 TRACE("Could not get CTAB data, hr %#x.\n", hr);
869 /* returning OK, shaders and preshaders without CTAB are valid */
870 return D3D_OK;
871 }
872 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
873 {
874 FIXME("Could not get CTAB desc, hr %#x.\n", hr);
875 goto cleanup;
876 }
877
878 out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
879 out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
880 if (!cdesc || !inputs_param)
881 {
882 hr = E_OUTOFMEMORY;
883 goto cleanup;
884 }
885
886 for (i = 0; i < desc.Constants; ++i)
887 {
888 unsigned int index = out->input_count;
889 WORD constantinfo_reserved;
890
891 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
892 if (!hc)
893 {
894 FIXME("Null constant handle.\n");
895 goto cleanup;
896 }
897 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved)))
898 goto cleanup;
899 inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name);
900 if (!inputs_param[index])
901 {
902 WARN("Could not find parameter %s in effect.\n", cdesc[index].Name);
903 continue;
904 }
905 if (cdesc[index].Class == D3DXPC_OBJECT)
906 {
907 TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]);
908 if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT
909 || !is_param_type_sampler(inputs_param[index]->type))
910 {
911 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name));
912 hr = D3DERR_INVALIDCALL;
913 goto cleanup;
914 }
915 if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount)
916 {
917 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name));
918 hr = D3DERR_INVALIDCALL;
919 goto cleanup;
920 }
921 }
922 if (!is_top_level_parameter(inputs_param[index]))
923 {
924 WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name));
925 hr = E_FAIL;
926 goto cleanup;
927 }
928
929 for (j = 0; j < skip_constants_count; ++j)
930 {
931 if (!strcmp(cdesc[index].Name, skip_constants[j]))
932 {
933 if (!constantinfo_reserved)
934 {
935 WARN("skip_constants parameter %s is not register bound.\n",
936 cdesc[index].Name);
937 hr = D3DERR_INVALIDCALL;
938 goto cleanup;
939 }
940 TRACE("Skipping constant %s.\n", cdesc[index].Name);
941 break;
942 }
943 }
944 if (j < skip_constants_count)
945 continue;
946 ++out->input_count;
947 if (inputs_param[index]->class == D3DXPC_OBJECT)
948 continue;
949 if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index])))
950 goto cleanup;
951 }
952 if (pres)
953 append_pres_const_sets_for_shader_input(out, pres);
954 if (out->const_set_count)
955 {
956 struct d3dx_const_param_eval_output *new_alloc;
957
958 qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set);
959
960 i = 0;
961 while (i < out->const_set_count - 1)
962 {
963 if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD
964 && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD
965 && out->const_set[i].table == out->const_set[i + 1].table
966 && out->const_set[i].register_index + out->const_set[i].register_count
967 >= out->const_set[i + 1].register_index)
968 {
969 assert(out->const_set[i].register_index + out->const_set[i].register_count
970 <= out->const_set[i + 1].register_index + 1);
971 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1
972 - out->const_set[i].register_index;
973 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i])
974 * (out->const_set_count - i - 2));
975 --out->const_set_count;
976 }
977 else
978 {
979 ++i;
980 }
981 }
982
983 new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set,
984 sizeof(*out->const_set) * out->const_set_count);
985 if (new_alloc)
986 {
987 out->const_set = new_alloc;
988 out->const_set_size = out->const_set_count;
989 }
990 else
991 {
992 WARN("Out of memory.\n");
993 }
994 }
995 cleanup:
996 ID3DXConstantTable_Release(ctab);
997 return hr;
998 }
999
1000 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
1001 {
1002 if (table < PRES_REGTAB_COUNT)
1003 table_sizes[table] = max(table_sizes[table], max_register + 1);
1004 }
1005
1006 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
1007 {
1008 unsigned int i, table, max_register;
1009
1010 for (i = 0; i < ctab->input_count; ++i)
1011 {
1012 if (!ctab->inputs[i].RegisterCount)
1013 continue;
1014 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
1015 table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1016 update_table_size(table_sizes, table, max_register);
1017 }
1018 }
1019
1020 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
1021 {
1022 static const char *xyzw_str = "xyzw";
1023 unsigned int i, table;
1024
1025 table = arg->reg.table;
1026 if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT)
1027 {
1028 TRACE("(");
1029 for (i = 0; i < component_count; ++i)
1030 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
1031 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]);
1032 TRACE(")");
1033 }
1034 else
1035 {
1036 if (arg->index_reg.table == PRES_REGTAB_COUNT)
1037 {
1038 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset));
1039 }
1040 else
1041 {
1042 unsigned int index_reg;
1043
1044 index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset);
1045 TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset),
1046 table_symbol[arg->index_reg.table], index_reg,
1047 xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]);
1048 }
1049 for (i = 0; i < component_count; ++i)
1050 TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]);
1051 }
1052 }
1053
1054 static void dump_registers(struct d3dx_const_tab *ctab)
1055 {
1056 unsigned int table, i;
1057
1058 for (i = 0; i < ctab->input_count; ++i)
1059 {
1060 table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1061 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
1062 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
1063 }
1064 }
1065
1066 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
1067 {
1068 unsigned int i;
1069
1070 TRACE("%s ", pres_op_info[ins->op].mnem);
1071 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
1072 for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
1073 {
1074 TRACE(", ");
1075 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
1076 }
1077 TRACE("\n");
1078 }
1079
1080 static void dump_preshader(struct d3dx_preshader *pres)
1081 {
1082 unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4;
1083 const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED];
1084
1085 if (immediate_count)
1086 TRACE("// Immediates:\n");
1087 for (i = 0; i < immediate_count; ++i)
1088 {
1089 if (!(i % 4))
1090 TRACE("// ");
1091 TRACE("%.8e", immediates[i]);
1092 if (i % 4 == 3)
1093 TRACE("\n");
1094 else
1095 TRACE(", ");
1096 }
1097 TRACE("// Preshader registers:\n");
1098 dump_registers(&pres->inputs);
1099 TRACE("preshader\n");
1100 for (i = 0; i < pres->ins_count; ++i)
1101 dump_ins(&pres->regs, &pres->ins[i]);
1102 }
1103
1104 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base)
1105 {
1106 unsigned int *p;
1107 unsigned int i, j, const_count;
1108 double *dconst;
1109 HRESULT hr;
1110 unsigned int saved_word;
1111 unsigned int section_size;
1112
1113 TRACE("Preshader version %#x.\n", *ptr & 0xffff);
1114
1115 if (!count)
1116 {
1117 WARN("Unexpected end of byte code buffer.\n");
1118 return D3DXERR_INVALIDDATA;
1119 }
1120
1121 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
1122 if (p)
1123 {
1124 const_count = *p++;
1125 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
1126 {
1127 WARN("Byte code buffer ends unexpectedly.\n");
1128 return D3DXERR_INVALIDDATA;
1129 }
1130 dconst = (double *)p;
1131 }
1132 else
1133 {
1134 const_count = 0;
1135 dconst = NULL;
1136 }
1137 TRACE("%u double constants.\n", const_count);
1138
1139 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
1140 if (!p)
1141 {
1142 WARN("Could not find preshader code.\n");
1143 return D3D_OK;
1144 }
1145 pres->ins_count = *p++;
1146 --section_size;
1147 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
1148 {
1149 WARN("Invalid instruction count %u.\n", pres->ins_count);
1150 return D3DXERR_INVALIDDATA;
1151 }
1152 TRACE("%u instructions.\n", pres->ins_count);
1153 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
1154 if (!pres->ins)
1155 return E_OUTOFMEMORY;
1156 for (i = 0; i < pres->ins_count; ++i)
1157 {
1158 unsigned int *ptr_next;
1159
1160 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
1161 if (!ptr_next)
1162 return D3DXERR_INVALIDDATA;
1163 section_size -= ptr_next - p;
1164 p = ptr_next;
1165 }
1166
1167 pres->inputs.regset2table = pres_regset2table;
1168
1169 saved_word = *ptr;
1170 *ptr = 0xfffe0000;
1171 hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL);
1172 *ptr = saved_word;
1173 if (FAILED(hr))
1174 return hr;
1175
1176 if (const_count % get_reg_components(PRES_REGTAB_IMMED))
1177 {
1178 FIXME("const_count %u is not a multiple of %u.\n", const_count,
1179 get_reg_components(PRES_REGTAB_IMMED));
1180 return D3DXERR_INVALIDDATA;
1181 }
1182 pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count);
1183
1184 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
1185 for (i = 0; i < pres->ins_count; ++i)
1186 {
1187 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
1188 {
1189 enum pres_reg_tables table;
1190 unsigned int reg_idx;
1191
1192 if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT)
1193 {
1194 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0
1195 : pres->ins[i].component_count - 1;
1196
1197 table = pres->ins[i].inputs[j].reg.table;
1198 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset
1199 + last_component_index);
1200 }
1201 else
1202 {
1203 table = pres->ins[i].inputs[j].index_reg.table;
1204 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset);
1205 }
1206 if (reg_idx >= pres->regs.table_sizes[table])
1207 {
1208 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u.\n",
1209 i, j, table, reg_idx);
1210 return D3DXERR_INVALIDDATA;
1211 }
1212 }
1213 update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table,
1214 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset));
1215 }
1216 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
1217 return E_OUTOFMEMORY;
1218 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
1219
1220 return D3D_OK;
1221 }
1222
1223 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size,
1224 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter,
1225 const char **skip_constants, unsigned int skip_constants_count)
1226 {
1227 struct d3dx_param_eval *peval;
1228 unsigned int *ptr, *shader_ptr = NULL;
1229 unsigned int i;
1230 BOOL shader;
1231 unsigned int count, pres_size;
1232 HRESULT ret;
1233
1234 TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
1235 base_effect, byte_code, byte_code_size, type, peval_out);
1236
1237 count = byte_code_size / sizeof(unsigned int);
1238 if (!byte_code || !count)
1239 {
1240 *peval_out = NULL;
1241 return D3D_OK;
1242 }
1243
1244 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
1245 if (!peval)
1246 {
1247 ret = E_OUTOFMEMORY;
1248 goto err_out;
1249 }
1250 peval->version_counter = version_counter;
1251
1252 peval->param_type = type;
1253 switch (type)
1254 {
1255 case D3DXPT_VERTEXSHADER:
1256 case D3DXPT_PIXELSHADER:
1257 shader = TRUE;
1258 break;
1259 default:
1260 shader = FALSE;
1261 break;
1262 }
1263 peval->shader_inputs.regset2table = shad_regset2table;
1264
1265 ptr = (unsigned int *)byte_code;
1266 if (shader)
1267 {
1268 if ((*ptr & 0xfffe0000) != 0xfffe0000)
1269 {
1270 FIXME("Invalid shader signature %#x.\n", *ptr);
1271 ret = D3DXERR_INVALIDDATA;
1272 goto err_out;
1273 }
1274 TRACE("Shader version %#x.\n", *ptr & 0xffff);
1275 shader_ptr = ptr;
1276 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
1277 if (!ptr)
1278 TRACE("No preshader found.\n");
1279 }
1280 else
1281 {
1282 pres_size = count;
1283 }
1284
1285 if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect)))
1286 {
1287 FIXME("Failed parsing preshader, byte code for analysis follows.\n");
1288 dump_bytecode(byte_code, byte_code_size);
1289 goto err_out;
1290 }
1291
1292 if (shader)
1293 {
1294 if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect,
1295 skip_constants, skip_constants_count, &peval->pres)))
1296 {
1297 TRACE("Could not get shader constant table, hr %#x.\n", ret);
1298 goto err_out;
1299 }
1300 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
1301 }
1302
1303 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
1304 {
1305 if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i)))
1306 goto err_out;
1307 }
1308
1309 if (TRACE_ON(d3dx))
1310 {
1311 dump_bytecode(byte_code, byte_code_size);
1312 dump_preshader(&peval->pres);
1313 if (shader)
1314 {
1315 TRACE("// Shader registers:\n");
1316 dump_registers(&peval->shader_inputs);
1317 }
1318 }
1319 *peval_out = peval;
1320 TRACE("Created parameter evaluator %p.\n", *peval_out);
1321 return D3D_OK;
1322
1323 err_out:
1324 WARN("Error creating parameter evaluator.\n");
1325 if (TRACE_ON(d3dx))
1326 dump_bytecode(byte_code, byte_code_size);
1327
1328 d3dx_free_param_eval(peval);
1329 *peval_out = NULL;
1330 return ret;
1331 }
1332
1333 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
1334 {
1335 HeapFree(GetProcessHeap(), 0, ctab->inputs);
1336 HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
1337 HeapFree(GetProcessHeap(), 0, ctab->const_set);
1338 }
1339
1340 static void d3dx_free_preshader(struct d3dx_preshader *pres)
1341 {
1342 HeapFree(GetProcessHeap(), 0, pres->ins);
1343
1344 regstore_free_tables(&pres->regs);
1345 d3dx_free_const_tab(&pres->inputs);
1346 }
1347
1348 void d3dx_free_param_eval(struct d3dx_param_eval *peval)
1349 {
1350 TRACE("peval %p.\n", peval);
1351
1352 if (!peval)
1353 return;
1354
1355 d3dx_free_preshader(&peval->pres);
1356 d3dx_free_const_tab(&peval->shader_inputs);
1357 HeapFree(GetProcessHeap(), 0, peval);
1358 }
1359
1360 static void pres_int_from_float(void *out, const void *in, unsigned int count)
1361 {
1362 unsigned int i;
1363 const float *in_float = in;
1364 int *out_int = out;
1365
1366 for (i = 0; i < count; ++i)
1367 out_int[i] = in_float[i];
1368 }
1369
1370 static void pres_bool_from_value(void *out, const void *in, unsigned int count)
1371 {
1372 unsigned int i;
1373 const DWORD *in_dword = in;
1374 BOOL *out_bool = out;
1375
1376 for (i = 0; i < count; ++i)
1377 out_bool[i] = !!in_dword[i];
1378 }
1379
1380 static void pres_float_from_int(void *out, const void *in, unsigned int count)
1381 {
1382 unsigned int i;
1383 const int *in_int = in;
1384 float *out_float = out;
1385
1386 for (i = 0; i < count; ++i)
1387 out_float[i] = in_int[i];
1388 }
1389
1390 static void pres_float_from_bool(void *out, const void *in, unsigned int count)
1391 {
1392 unsigned int i;
1393 const BOOL *in_bool = in;
1394 float *out_float = out;
1395
1396 for (i = 0; i < count; ++i)
1397 out_float[i] = !!in_bool[i];
1398 }
1399
1400 static void pres_int_from_bool(void *out, const void *in, unsigned int count)
1401 {
1402 unsigned int i;
1403 const float *in_bool = in;
1404 int *out_int = out;
1405
1406 for (i = 0; i < count; ++i)
1407 out_int[i] = !!in_bool[i];
1408 }
1409
1410 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table,
1411 unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type)
1412 {
1413 typedef void (*conv_func)(void *out, const void *in, unsigned int count);
1414 static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] =
1415 {
1416 {NULL, NULL, pres_int_from_float, pres_bool_from_value},
1417 {NULL, NULL, NULL, NULL},
1418 {pres_float_from_int, NULL, NULL, pres_bool_from_value},
1419 {pres_float_from_bool, NULL, pres_int_from_bool, NULL}
1420 };
1421 enum pres_value_type table_type = table_info[table].type;
1422
1423 if (param_type == table_type)
1424 {
1425 regstore_set_values(rs, table, in, offset, count);
1426 return;
1427 }
1428
1429 set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count);
1430 }
1431
1432 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1433 D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr,
1434 unsigned int start, unsigned int count)
1435 {
1436 if (type == D3DXPT_VERTEXSHADER)
1437 {
1438 switch(table)
1439 {
1440 case PRES_REGTAB_OCONST:
1441 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count);
1442 case PRES_REGTAB_OICONST:
1443 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count);
1444 case PRES_REGTAB_OBCONST:
1445 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count);
1446 default:
1447 FIXME("Unexpected register table %u.\n", table);
1448 return D3DERR_INVALIDCALL;
1449 }
1450 }
1451 else if (type == D3DXPT_PIXELSHADER)
1452 {
1453 switch(table)
1454 {
1455 case PRES_REGTAB_OCONST:
1456 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count);
1457 case PRES_REGTAB_OICONST:
1458 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count);
1459 case PRES_REGTAB_OBCONST:
1460 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count);
1461 default:
1462 FIXME("Unexpected register table %u.\n", table);
1463 return D3DERR_INVALIDCALL;
1464 }
1465 }
1466 else
1467 {
1468 FIXME("Unexpected parameter type %u.\n", type);
1469 return D3DERR_INVALIDCALL;
1470 }
1471 }
1472
1473 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
1474 ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1475 D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty)
1476 {
1477 unsigned int const_idx;
1478 unsigned int current_start = 0, current_count = 0;
1479 enum pres_reg_tables current_table = PRES_REGTAB_COUNT;
1480 BOOL update_device = manager || device;
1481 HRESULT hr, result = D3D_OK;
1482 ULONG64 update_version = const_tab->update_version;
1483
1484 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1485 {
1486 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1487 enum pres_reg_tables table = const_set->table;
1488 struct d3dx_parameter *param = const_set->param;
1489 unsigned int element, i, j, start_offset;
1490 struct const_upload_info info;
1491 unsigned int *data;
1492 enum pres_value_type param_type;
1493
1494 if (!(param && is_param_dirty(param, update_version)))
1495 continue;
1496
1497 data = param->data;
1498 start_offset = get_offset_reg(table, const_set->register_index);
1499 if (const_set->direct_copy)
1500 {
1501 regstore_set_values(rs, table, data, start_offset,
1502 get_offset_reg(table, const_set->register_count));
1503 continue;
1504 }
1505 param_type = table_type_from_param_type(param->type);
1506 if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR)
1507 {
1508 unsigned int count = max(param->rows, param->columns);
1509
1510 if (count >= get_reg_components(table))
1511 {
1512 regstore_set_data(rs, table, start_offset, data,
1513 count * const_set->element_count, param_type);
1514 }
1515 else
1516 {
1517 for (element = 0; element < const_set->element_count; ++element)
1518 regstore_set_data(rs, table, start_offset + get_offset_reg(table, element),
1519 &data[element * count], count, param_type);
1520 }
1521 continue;
1522 }
1523 get_const_upload_info(const_set, &info);
1524 for (element = 0; element < const_set->element_count; ++element)
1525 {
1526 unsigned int *out = (unsigned int *)rs->tables[table] + start_offset;
1527
1528 /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer.
1529 * All the supported types of parameters and table values have the same size. */
1530 if (info.transpose)
1531 {
1532 for (i = 0; i < info.major_count; ++i)
1533 for (j = 0; j < info.minor; ++j)
1534 out[i * info.major_stride + j] = data[i + j * info.major];
1535
1536 for (j = 0; j < info.minor_remainder; ++j)
1537 out[i * info.major_stride + j] = data[i + j * info.major];
1538 }
1539 else
1540 {
1541 for (i = 0; i < info.major_count; ++i)
1542 for (j = 0; j < info.minor; ++j)
1543 out[i * info.major_stride + j] = data[i * info.minor + j];
1544 }
1545 start_offset += get_offset_reg(table, const_set->register_count);
1546 data += param->rows * param->columns;
1547 }
1548 start_offset = get_offset_reg(table, const_set->register_index);
1549 if (table_info[table].type != param_type)
1550 regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset,
1551 get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type);
1552 }
1553 const_tab->update_version = new_update_version;
1554 if (!update_device)
1555 return D3D_OK;
1556
1557 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1558 {
1559 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1560
1561 if (device_update_all || (const_set->param
1562 ? is_param_dirty(const_set->param, update_version) : pres_dirty))
1563 {
1564 enum pres_reg_tables table = const_set->table;
1565
1566 if (table == current_table && current_start + current_count == const_set->register_index)
1567 {
1568 current_count += const_set->register_count * const_set->element_count;
1569 }
1570 else
1571 {
1572 if (current_count)
1573 {
1574 if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1575 (DWORD *)rs->tables[current_table]
1576 + get_offset_reg(current_table, current_start), current_start, current_count)))
1577 result = hr;
1578 }
1579 current_table = table;
1580 current_start = const_set->register_index;
1581 current_count = const_set->register_count * const_set->element_count;
1582 }
1583 }
1584 }
1585 if (current_count)
1586 {
1587 if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1588 (DWORD *)rs->tables[current_table]
1589 + get_offset_reg(current_table, current_start), current_start, current_count)))
1590 result = hr;
1591 }
1592 return result;
1593 }
1594
1595 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset)
1596 {
1597 return regstore_get_double(rs, table, offset);
1598 }
1599
1600 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp)
1601 {
1602 unsigned int offset, base_index, reg_index, table;
1603
1604 table = opr->reg.table;
1605
1606 if (opr->index_reg.table == PRES_REGTAB_COUNT)
1607 base_index = 0;
1608 else
1609 base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset));
1610
1611 offset = get_offset_reg(table, base_index) + opr->reg.offset + comp;
1612 reg_index = get_reg_offset(table, offset);
1613
1614 if (reg_index >= rs->table_sizes[table])
1615 {
1616 unsigned int wrap_size;
1617
1618 if (table == PRES_REGTAB_CONST)
1619 {
1620 /* As it can be guessed from tests, offset into floating constant table is wrapped
1621 * to the nearest power of 2 and not to the actual table size. */
1622 for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1)
1623 ;
1624 }
1625 else
1626 {
1627 wrap_size = rs->table_sizes[table];
1628 }
1629 WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n",
1630 reg_index, table, wrap_size, rs->table_sizes[table]);
1631 reg_index %= wrap_size;
1632
1633 if (reg_index >= rs->table_sizes[table])
1634 return 0.0;
1635
1636 offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table);
1637 }
1638
1639 return exec_get_reg_value(rs, table, offset);
1640 }
1641
1642 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg,
1643 unsigned int comp, double res)
1644 {
1645 regstore_set_double(rs, reg->table, reg->offset + comp, res);
1646 }
1647
1648 #define ARGS_ARRAY_SIZE 8
1649 static HRESULT execute_preshader(struct d3dx_preshader *pres)
1650 {
1651 unsigned int i, j, k;
1652 double args[ARGS_ARRAY_SIZE];
1653 double res;
1654
1655 for (i = 0; i < pres->ins_count; ++i)
1656 {
1657 const struct d3dx_pres_ins *ins;
1658 const struct op_info *oi;
1659
1660 ins = &pres->ins[i];
1661 oi = &pres_op_info[ins->op];
1662 if (oi->func_all_comps)
1663 {
1664 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1665 {
1666 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1667 return E_FAIL;
1668 }
1669 for (k = 0; k < oi->input_count; ++k)
1670 for (j = 0; j < ins->component_count; ++j)
1671 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k],
1672 ins->scalar_op && !k ? 0 : j);
1673 res = oi->func(args, ins->component_count);
1674
1675 /* only 'dot' instruction currently falls here */
1676 exec_set_arg(&pres->regs, &ins->output.reg, 0, res);
1677 }
1678 else
1679 {
1680 for (j = 0; j < ins->component_count; ++j)
1681 {
1682 for (k = 0; k < oi->input_count; ++k)
1683 args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1684 res = oi->func(args, ins->component_count);
1685 exec_set_arg(&pres->regs, &ins->output.reg, j, res);
1686 }
1687 }
1688 }
1689 return D3D_OK;
1690 }
1691
1692 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version)
1693 {
1694 unsigned int i;
1695
1696 if (update_version == ULONG64_MAX)
1697 update_version = ctab->update_version;
1698 for (i = 0; i < ctab->input_count; ++i)
1699 {
1700 if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]),
1701 update_version))
1702 return TRUE;
1703 }
1704 return FALSE;
1705 }
1706
1707 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version)
1708 {
1709 return is_const_tab_input_dirty(&peval->pres.inputs, update_version)
1710 || is_const_tab_input_dirty(&peval->shader_inputs, update_version);
1711 }
1712
1713 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param,
1714 void *param_value)
1715 {
1716 HRESULT hr;
1717 unsigned int i;
1718 unsigned int elements, elements_param, elements_table;
1719 float *oc;
1720
1721 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1722
1723 if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX))
1724 {
1725 set_constants(&peval->pres.regs, &peval->pres.inputs,
1726 next_update_version(peval->version_counter),
1727 NULL, NULL, peval->param_type, FALSE, FALSE);
1728
1729 if (FAILED(hr = execute_preshader(&peval->pres)))
1730 return hr;
1731 }
1732
1733 elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]);
1734 elements_param = param->bytes / sizeof(unsigned int);
1735 elements = min(elements_table, elements_param);
1736 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1737 for (i = 0; i < elements; ++i)
1738 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1739 return D3D_OK;
1740 }
1741
1742 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1743 struct d3dx_param_eval *peval, BOOL update_all)
1744 {
1745 HRESULT hr;
1746 struct d3dx_preshader *pres = &peval->pres;
1747 struct d3dx_regstore *rs = &pres->regs;
1748 ULONG64 new_update_version = next_update_version(peval->version_counter);
1749 BOOL pres_dirty = FALSE;
1750
1751 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1752
1753 if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX))
1754 {
1755 set_constants(rs, &pres->inputs, new_update_version,
1756 NULL, NULL, peval->param_type, FALSE, FALSE);
1757 if (FAILED(hr = execute_preshader(pres)))
1758 return hr;
1759 pres_dirty = TRUE;
1760 }
1761
1762 return set_constants(rs, &peval->shader_inputs, new_update_version,
1763 manager, device, peval->param_type, update_all, pres_dirty);
1764 }