- import ddraw from Wine and use it for now
[reactos.git] / reactos / dll / directx / wine / wined3d / vertexshader.c
1 /*
2 * shaders implementation
3 *
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
9 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 */
25
26 #include "config.h"
27
28 #include <math.h>
29 #include <stdio.h>
30
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36
37 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
38 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
39
40 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
41 /* This table is not order or position dependent. */
42
43 /* Arithmetic */
44 {WINED3DSIO_NOP, "nop", 0, 0, WINED3DSIH_NOP, 0, 0 },
45 {WINED3DSIO_MOV, "mov", 1, 2, WINED3DSIH_MOV, 0, 0 },
46 {WINED3DSIO_MOVA, "mova", 1, 2, WINED3DSIH_MOVA, WINED3DVS_VERSION(2,0), -1 },
47 {WINED3DSIO_ADD, "add", 1, 3, WINED3DSIH_ADD, 0, 0 },
48 {WINED3DSIO_SUB, "sub", 1, 3, WINED3DSIH_SUB, 0, 0 },
49 {WINED3DSIO_MAD, "mad", 1, 4, WINED3DSIH_MAD, 0, 0 },
50 {WINED3DSIO_MUL, "mul", 1, 3, WINED3DSIH_MUL, 0, 0 },
51 {WINED3DSIO_RCP, "rcp", 1, 2, WINED3DSIH_RCP, 0, 0 },
52 {WINED3DSIO_RSQ, "rsq", 1, 2, WINED3DSIH_RSQ, 0, 0 },
53 {WINED3DSIO_DP3, "dp3", 1, 3, WINED3DSIH_DP3, 0, 0 },
54 {WINED3DSIO_DP4, "dp4", 1, 3, WINED3DSIH_DP4, 0, 0 },
55 {WINED3DSIO_MIN, "min", 1, 3, WINED3DSIH_MIN, 0, 0 },
56 {WINED3DSIO_MAX, "max", 1, 3, WINED3DSIH_MAX, 0, 0 },
57 {WINED3DSIO_SLT, "slt", 1, 3, WINED3DSIH_SLT, 0, 0 },
58 {WINED3DSIO_SGE, "sge", 1, 3, WINED3DSIH_SGE, 0, 0 },
59 {WINED3DSIO_ABS, "abs", 1, 2, WINED3DSIH_ABS, 0, 0 },
60 {WINED3DSIO_EXP, "exp", 1, 2, WINED3DSIH_EXP, 0, 0 },
61 {WINED3DSIO_LOG, "log", 1, 2, WINED3DSIH_LOG, 0, 0 },
62 {WINED3DSIO_EXPP, "expp", 1, 2, WINED3DSIH_EXPP, 0, 0 },
63 {WINED3DSIO_LOGP, "logp", 1, 2, WINED3DSIH_LOGP, 0, 0 },
64 {WINED3DSIO_LIT, "lit", 1, 2, WINED3DSIH_LIT, 0, 0 },
65 {WINED3DSIO_DST, "dst", 1, 3, WINED3DSIH_DST, 0, 0 },
66 {WINED3DSIO_LRP, "lrp", 1, 4, WINED3DSIH_LRP, 0, 0 },
67 {WINED3DSIO_FRC, "frc", 1, 2, WINED3DSIH_FRC, 0, 0 },
68 {WINED3DSIO_POW, "pow", 1, 3, WINED3DSIH_POW, 0, 0 },
69 {WINED3DSIO_CRS, "crs", 1, 3, WINED3DSIH_CRS, 0, 0 },
70 /* TODO: sng can possibly be performed as
71 RCP tmp, vec
72 MUL out, tmp, vec*/
73 {WINED3DSIO_SGN, "sgn", 1, 2, WINED3DSIH_SGN, 0, 0 },
74 {WINED3DSIO_NRM, "nrm", 1, 2, WINED3DSIH_NRM, 0, 0 },
75 {WINED3DSIO_SINCOS, "sincos", 1, 4, WINED3DSIH_SINCOS, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
76 {WINED3DSIO_SINCOS, "sincos", 1, 2, WINED3DSIH_SINCOS, WINED3DVS_VERSION(3,0), -1 },
77 /* Matrix */
78 {WINED3DSIO_M4x4, "m4x4", 1, 3, WINED3DSIH_M4x4, 0, 0 },
79 {WINED3DSIO_M4x3, "m4x3", 1, 3, WINED3DSIH_M4x3, 0, 0 },
80 {WINED3DSIO_M3x4, "m3x4", 1, 3, WINED3DSIH_M3x4, 0, 0 },
81 {WINED3DSIO_M3x3, "m3x3", 1, 3, WINED3DSIH_M3x3, 0, 0 },
82 {WINED3DSIO_M3x2, "m3x2", 1, 3, WINED3DSIH_M3x2, 0, 0 },
83 /* Declare registers */
84 {WINED3DSIO_DCL, "dcl", 0, 2, WINED3DSIH_DCL, 0, 0 },
85 /* Constant definitions */
86 {WINED3DSIO_DEF, "def", 1, 5, WINED3DSIH_DEF, 0, 0 },
87 {WINED3DSIO_DEFB, "defb", 1, 2, WINED3DSIH_DEFB, 0, 0 },
88 {WINED3DSIO_DEFI, "defi", 1, 5, WINED3DSIH_DEFI, 0, 0 },
89 /* Flow control - requires GLSL or software shaders */
90 {WINED3DSIO_REP , "rep", 0, 1, WINED3DSIH_REP, WINED3DVS_VERSION(2,0), -1 },
91 {WINED3DSIO_ENDREP, "endrep", 0, 0, WINED3DSIH_ENDREP, WINED3DVS_VERSION(2,0), -1 },
92 {WINED3DSIO_IF, "if", 0, 1, WINED3DSIH_IF, WINED3DVS_VERSION(2,0), -1 },
93 {WINED3DSIO_IFC, "ifc", 0, 2, WINED3DSIH_IFC, WINED3DVS_VERSION(2,1), -1 },
94 {WINED3DSIO_ELSE, "else", 0, 0, WINED3DSIH_ELSE, WINED3DVS_VERSION(2,0), -1 },
95 {WINED3DSIO_ENDIF, "endif", 0, 0, WINED3DSIH_ENDIF, WINED3DVS_VERSION(2,0), -1 },
96 {WINED3DSIO_BREAK, "break", 0, 0, WINED3DSIH_BREAK, WINED3DVS_VERSION(2,1), -1 },
97 {WINED3DSIO_BREAKC, "breakc", 0, 2, WINED3DSIH_BREAKC, WINED3DVS_VERSION(2,1), -1 },
98 {WINED3DSIO_BREAKP, "breakp", 0, 1, WINED3DSIH_BREAKP, 0, 0 },
99 {WINED3DSIO_CALL, "call", 0, 1, WINED3DSIH_CALL, WINED3DVS_VERSION(2,0), -1 },
100 {WINED3DSIO_CALLNZ, "callnz", 0, 2, WINED3DSIH_CALLNZ, WINED3DVS_VERSION(2,0), -1 },
101 {WINED3DSIO_LOOP, "loop", 0, 2, WINED3DSIH_LOOP, WINED3DVS_VERSION(2,0), -1 },
102 {WINED3DSIO_RET, "ret", 0, 0, WINED3DSIH_RET, WINED3DVS_VERSION(2,0), -1 },
103 {WINED3DSIO_ENDLOOP, "endloop", 0, 0, WINED3DSIH_ENDLOOP, WINED3DVS_VERSION(2,0), -1 },
104 {WINED3DSIO_LABEL, "label", 0, 1, WINED3DSIH_LABEL, WINED3DVS_VERSION(2,0), -1 },
105
106 {WINED3DSIO_SETP, "setp", 1, 3, WINED3DSIH_SETP, 0, 0 },
107 {WINED3DSIO_TEXLDL, "texldl", 1, 3, WINED3DSIH_TEXLDL, WINED3DVS_VERSION(3,0), -1 },
108 {0, NULL, 0, 0, 0, 0, 0 }
109 };
110
111 static void vshader_set_limits(
112 IWineD3DVertexShaderImpl *This) {
113
114 This->baseShader.limits.texcoord = 0;
115 This->baseShader.limits.attributes = 16;
116 This->baseShader.limits.packed_input = 0;
117
118 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
119 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
120
121 switch (This->baseShader.reg_maps.shader_version)
122 {
123 case WINED3DVS_VERSION(1,0):
124 case WINED3DVS_VERSION(1,1):
125 This->baseShader.limits.temporary = 12;
126 This->baseShader.limits.constant_bool = 0;
127 This->baseShader.limits.constant_int = 0;
128 This->baseShader.limits.address = 1;
129 This->baseShader.limits.packed_output = 0;
130 This->baseShader.limits.sampler = 0;
131 This->baseShader.limits.label = 0;
132 break;
133
134 case WINED3DVS_VERSION(2,0):
135 case WINED3DVS_VERSION(2,1):
136 This->baseShader.limits.temporary = 12;
137 This->baseShader.limits.constant_bool = 16;
138 This->baseShader.limits.constant_int = 16;
139 This->baseShader.limits.address = 1;
140 This->baseShader.limits.packed_output = 0;
141 This->baseShader.limits.sampler = 0;
142 This->baseShader.limits.label = 16;
143 break;
144
145 case WINED3DVS_VERSION(3,0):
146 This->baseShader.limits.temporary = 32;
147 This->baseShader.limits.constant_bool = 32;
148 This->baseShader.limits.constant_int = 32;
149 This->baseShader.limits.address = 1;
150 This->baseShader.limits.packed_output = 12;
151 This->baseShader.limits.sampler = 4;
152 This->baseShader.limits.label = 16; /* FIXME: 2048 */
153 break;
154
155 default: This->baseShader.limits.temporary = 12;
156 This->baseShader.limits.constant_bool = 16;
157 This->baseShader.limits.constant_int = 16;
158 This->baseShader.limits.address = 1;
159 This->baseShader.limits.packed_output = 0;
160 This->baseShader.limits.sampler = 0;
161 This->baseShader.limits.label = 16;
162 FIXME("Unrecognized vertex shader version %#x\n",
163 This->baseShader.reg_maps.shader_version);
164 }
165 }
166
167 /* This is an internal function,
168 * used to create fake semantics for shaders
169 * that don't have them - d3d8 shaders where the declaration
170 * stores the register for each input
171 */
172 static void vshader_set_input(
173 IWineD3DVertexShaderImpl* This,
174 unsigned int regnum,
175 BYTE usage, BYTE usage_idx) {
176
177 /* Fake usage: set reserved bit, usage, usage_idx */
178 DWORD usage_token = (0x1 << 31) |
179 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
180
181 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
182 DWORD reg_token = (0x1 << 31) |
183 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
184
185 This->semantics_in[regnum].usage = usage_token;
186 This->semantics_in[regnum].reg = reg_token;
187 }
188
189 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
190 if (usage_idx1 != usage_idx2) return FALSE;
191 if (usage1 == usage2) return TRUE;
192 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
193 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
194
195 return FALSE;
196 }
197
198 BOOL vshader_get_input(
199 IWineD3DVertexShader* iface,
200 BYTE usage_req, BYTE usage_idx_req,
201 unsigned int* regnum) {
202
203 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
204 int i;
205
206 for (i = 0; i < MAX_ATTRIBS; i++) {
207 DWORD usage_token = This->semantics_in[i].usage;
208 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
209 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
210
211 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
212 *regnum = i;
213 return TRUE;
214 }
215 }
216 return FALSE;
217 }
218
219 /* *******************************************
220 IWineD3DVertexShader IUnknown parts follow
221 ******************************************* */
222 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
223 TRACE("iface %p, riid %s, ppobj %p\n", iface, debugstr_guid(riid), ppobj);
224
225 if (IsEqualGUID(riid, &IID_IWineD3DVertexShader)
226 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
227 || IsEqualGUID(riid, &IID_IWineD3DBase)
228 || IsEqualGUID(riid, &IID_IUnknown))
229 {
230 IUnknown_AddRef(iface);
231 *ppobj = iface;
232 return S_OK;
233 }
234
235 WARN("%s not implemented, returning E_NOINTERFACE\n", debugstr_guid(riid));
236
237 *ppobj = NULL;
238 return E_NOINTERFACE;
239 }
240
241 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
242 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
243 ULONG refcount = InterlockedIncrement(&This->baseShader.ref);
244
245 TRACE("%p increasing refcount to %u\n", This, refcount);
246
247 return refcount;
248 }
249
250 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
251 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
252 ULONG refcount = InterlockedDecrement(&This->baseShader.ref);
253
254 TRACE("%p decreasing refcount to %u\n", This, refcount);
255
256 if (!refcount)
257 {
258 shader_cleanup((IWineD3DBaseShader *)iface);
259 HeapFree(GetProcessHeap(), 0, This);
260 }
261
262 return refcount;
263 }
264
265 /* *******************************************
266 IWineD3DVertexShader IWineD3DVertexShader parts follow
267 ******************************************* */
268
269 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
270 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
271
272 *parent = This->parent;
273 IUnknown_AddRef(*parent);
274 TRACE("(%p) : returning %p\n", This, *parent);
275 return WINED3D_OK;
276 }
277
278 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
279 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
280 IWineD3DDevice_AddRef(This->baseShader.device);
281 *pDevice = This->baseShader.device;
282 TRACE("(%p) returning %p\n", This, *pDevice);
283 return WINED3D_OK;
284 }
285
286 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
287 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
288 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
289
290 if (NULL == pData) {
291 *pSizeOfData = This->baseShader.functionLength;
292 return WINED3D_OK;
293 }
294 if (*pSizeOfData < This->baseShader.functionLength) {
295 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
296 * than the required size we should write the required size and
297 * return D3DERR_MOREDATA. That's not actually true. */
298 return WINED3DERR_INVALIDCALL;
299 }
300
301 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
302 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
303
304 return WINED3D_OK;
305 }
306
307 /* Note that for vertex shaders CompileShader isn't called until the
308 * shader is first used. The reason for this is that we need the vertex
309 * declaration the shader will be used with in order to determine if
310 * the data in a register is of type D3DCOLOR, and needs swizzling. */
311 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
312
313 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
314 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
315 HRESULT hr;
316 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
317
318 TRACE("(%p) : pFunction %p\n", iface, pFunction);
319
320 /* First pass: trace shader */
321 if (TRACE_ON(d3d_shader)) shader_trace_init(pFunction, This->baseShader.shader_ins);
322
323 /* Initialize immediate constant lists */
324 list_init(&This->baseShader.constantsF);
325 list_init(&This->baseShader.constantsB);
326 list_init(&This->baseShader.constantsI);
327
328 /* Second pass: figure out registers used, semantics, etc.. */
329 This->min_rel_offset = GL_LIMITS(vshader_constantsF);
330 This->max_rel_offset = 0;
331 memset(reg_maps, 0, sizeof(shader_reg_maps));
332 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
333 This->semantics_in, This->semantics_out, pFunction);
334 if (hr != WINED3D_OK) return hr;
335
336 vshader_set_limits(This);
337
338 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
339
340 if(deviceImpl->vs_selected_mode == SHADER_ARB &&
341 (GLINFO_LOCATION).arb_vs_offset_limit &&
342 This->min_rel_offset <= This->max_rel_offset) {
343
344 if(This->max_rel_offset - This->min_rel_offset > 127) {
345 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
346 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
347 FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
348 } else if(This->max_rel_offset - This->min_rel_offset > 63) {
349 This->rel_offset = This->min_rel_offset + 63;
350 } else if(This->max_rel_offset > 63) {
351 This->rel_offset = This->min_rel_offset;
352 } else {
353 This->rel_offset = 0;
354 }
355 }
356 This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
357
358 /* copy the function ... because it will certainly be released by application */
359 This->baseShader.function = HeapAlloc(GetProcessHeap(), 0, This->baseShader.functionLength);
360 if (!This->baseShader.function) return E_OUTOFMEMORY;
361 memcpy(This->baseShader.function, pFunction, This->baseShader.functionLength);
362
363 return WINED3D_OK;
364 }
365
366 /* Preload semantics for d3d8 shaders */
367 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
368 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
369 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
370
371 unsigned int i;
372 for (i = 0; i < vdecl->element_count; ++i)
373 {
374 const struct wined3d_vertex_declaration_element *e = &vdecl->elements[i];
375 vshader_set_input(This, e->output_slot, e->usage, e->usage_idx);
376 }
377 }
378
379 /* Set local constants for d3d8 shaders */
380 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
381 UINT start_idx, const float *src_data, UINT count) {
382 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
383 UINT i, end_idx;
384
385 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
386
387 end_idx = start_idx + count;
388 if (end_idx > GL_LIMITS(vshader_constantsF)) {
389 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
390 end_idx = GL_LIMITS(vshader_constantsF);
391 }
392
393 for (i = start_idx; i < end_idx; ++i) {
394 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
395 if (!lconst) return E_OUTOFMEMORY;
396
397 lconst->idx = i;
398 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
399 list_add_head(&This->baseShader.constantsF, &lconst->entry);
400 }
401
402 return WINED3D_OK;
403 }
404
405 static GLuint vertexshader_compile(IWineD3DVertexShaderImpl *This, const struct vs_compile_args *args) {
406 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
407 SHADER_BUFFER buffer;
408 GLuint ret;
409
410 /* Generate the HW shader */
411 TRACE("(%p) : Generating hardware program\n", This);
412 shader_buffer_init(&buffer);
413 This->cur_args = args;
414 ret = deviceImpl->shader_backend->shader_generate_vshader((IWineD3DVertexShader *)This, &buffer, args);
415 This->cur_args = NULL;
416 shader_buffer_free(&buffer);
417
418 return ret;
419 }
420
421 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
422 {
423 /*** IUnknown methods ***/
424 IWineD3DVertexShaderImpl_QueryInterface,
425 IWineD3DVertexShaderImpl_AddRef,
426 IWineD3DVertexShaderImpl_Release,
427 /*** IWineD3DBase methods ***/
428 IWineD3DVertexShaderImpl_GetParent,
429 /*** IWineD3DBaseShader methods ***/
430 IWineD3DVertexShaderImpl_SetFunction,
431 /*** IWineD3DVertexShader methods ***/
432 IWineD3DVertexShaderImpl_GetDevice,
433 IWineD3DVertexShaderImpl_GetFunction,
434 IWineD3DVertexShaderImpl_FakeSemantics,
435 IWIneD3DVertexShaderImpl_SetLocalConstantsF
436 };
437
438 void find_vs_compile_args(IWineD3DVertexShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct vs_compile_args *args) {
439 args->fog_src = stateblock->renderState[WINED3DRS_FOGTABLEMODE] == WINED3DFOG_NONE ? VS_FOG_COORD : VS_FOG_Z;
440 args->swizzle_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.swizzle_map;
441 }
442
443 static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
444 const DWORD use_map) {
445 if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
446 return stored->fog_src == new->fog_src;
447 }
448
449 GLuint find_gl_vshader(IWineD3DVertexShaderImpl *shader, const struct vs_compile_args *args)
450 {
451 UINT i;
452 DWORD new_size = shader->shader_array_size;
453 struct vs_compiled_shader *new_array;
454 DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map;
455
456 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
457 * so a linear search is more performant than a hashmap or a binary search
458 * (cache coherency etc)
459 */
460 for(i = 0; i < shader->num_gl_shaders; i++) {
461 if(vs_args_equal(&shader->gl_shaders[i].args, args, use_map)) {
462 return shader->gl_shaders[i].prgId;
463 }
464 }
465
466 TRACE("No matching GL shader found, compiling a new shader\n");
467
468 if(shader->shader_array_size == shader->num_gl_shaders) {
469 if (shader->num_gl_shaders)
470 {
471 new_size = shader->shader_array_size + max(1, shader->shader_array_size / 2);
472 new_array = HeapReAlloc(GetProcessHeap(), 0, shader->gl_shaders,
473 new_size * sizeof(*shader->gl_shaders));
474 } else {
475 new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader->gl_shaders));
476 new_size = 1;
477 }
478
479 if(!new_array) {
480 ERR("Out of memory\n");
481 return 0;
482 }
483 shader->gl_shaders = new_array;
484 shader->shader_array_size = new_size;
485 }
486
487 shader->gl_shaders[shader->num_gl_shaders].args = *args;
488 shader->gl_shaders[shader->num_gl_shaders].prgId = vertexshader_compile(shader, args);
489 return shader->gl_shaders[shader->num_gl_shaders++].prgId;
490 }