[D3D8][D3D9][DDRAW][WINED3D] Sync with Wine Staging 3.9. CORE-14656
[reactos.git] / dll / directx / wine / wined3d / shader.c
index 0513c9e..9075fc7 100644 (file)
@@ -419,6 +419,15 @@ static const struct wined3d_shader_frontend *shader_select_frontend(enum wined3d
     }
 }
 
+static enum wined3d_shader_type shader_get_shader_type(const struct wined3d_shader_desc *desc)
+{
+    if (desc->format == WINED3D_SHADER_BYTE_CODE_FORMAT_SM4)
+        return wined3d_get_sm4_shader_type(desc->byte_code, desc->byte_code_size);
+
+    FIXME("Could not get shader type for byte code format %#x.\n", desc->format);
+    return WINED3D_SHADER_TYPE_INVALID;
+}
+
 void string_buffer_clear(struct wined3d_string_buffer *buffer)
 {
     buffer->buffer[0] = '\0';
@@ -801,6 +810,10 @@ static BOOL shader_record_register_usage(struct wined3d_shader *shader, struct w
             reg_maps->vocp = 1;
             break;
 
+        case WINED3DSPR_SAMPLEMASK:
+            reg_maps->sample_mask = 1;
+            break;
+
         default:
             TRACE("Not recording register of type %#x and [%#x][%#x].\n",
                     reg->type, reg->idx[0].offset, reg->idx[1].offset);
@@ -983,13 +996,49 @@ static void wined3d_insert_interpolation_mode(DWORD *packed_interpolation_mode,
             register_idx * WINED3D_PACKED_INTERPOLATION_BIT_COUNT, WINED3D_PACKED_INTERPOLATION_BIT_COUNT, mode);
 }
 
+static HRESULT shader_scan_output_signature(struct wined3d_shader *shader)
+{
+    const struct wined3d_shader_signature *output_signature = &shader->output_signature;
+    struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
+    unsigned int i;
+    HRESULT hr;
+
+    for (i = 0; i < output_signature->element_count; ++i)
+    {
+        const struct wined3d_shader_signature_element *e = &output_signature->elements[i];
+        unsigned int mask;
+
+        reg_maps->output_registers |= 1u << e->register_idx;
+        if (e->sysval_semantic == WINED3D_SV_CLIP_DISTANCE)
+        {
+            if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
+                return hr;
+            reg_maps->clip_distance_mask |= mask;
+        }
+        else if (e->sysval_semantic == WINED3D_SV_CULL_DISTANCE)
+        {
+            if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
+                return hr;
+            reg_maps->cull_distance_mask |= mask;
+        }
+        else if (e->sysval_semantic == WINED3D_SV_VIEWPORT_ARRAY_INDEX)
+        {
+            reg_maps->viewport_array = 1;
+        }
+    }
+
+    return WINED3D_OK;
+}
+
 /* Note that this does not count the loop register as an address register. */
-static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const struct wined3d_shader_frontend *fe,
-        struct wined3d_shader_reg_maps *reg_maps, struct wined3d_shader_signature *input_signature,
-        struct wined3d_shader_signature *output_signature, DWORD constf_size)
+static HRESULT shader_get_registers_used(struct wined3d_shader *shader, DWORD constf_size)
 {
     struct wined3d_shader_signature_element input_signature_elements[max(MAX_ATTRIBS, MAX_REG_INPUT)];
     struct wined3d_shader_signature_element output_signature_elements[MAX_REG_OUTPUT];
+    struct wined3d_shader_signature *output_signature = &shader->output_signature;
+    struct wined3d_shader_signature *input_signature = &shader->input_signature;
+    struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
+    const struct wined3d_shader_frontend *fe = shader->frontend;
     unsigned int cur_loop_depth = 0, max_loop_depth = 0;
     struct wined3d_shader_version shader_version;
     struct wined3d_shader_phase *phase = NULL;
@@ -1667,7 +1716,8 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
                 shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
                         ins.src[3].reg.idx[0].offset, reg_maps->sampler_map.count);
             }
-            else if (ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE)
+            else if ((ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE)
+                    || (ins.handler_idx == WINED3DSIH_SAMPLE_INFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE))
             {
                 shader_record_sample(reg_maps, ins.src[0].reg.idx[0].offset,
                         WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
@@ -1780,25 +1830,8 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
 
     if (output_signature->elements)
     {
-        for (i = 0; i < output_signature->element_count; ++i)
-        {
-            const struct wined3d_shader_signature_element *e = &output_signature->elements[i];
-            unsigned int mask;
-
-            reg_maps->output_registers |= 1u << e->register_idx;
-            if (e->sysval_semantic == WINED3D_SV_CLIP_DISTANCE)
-            {
-                if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
-                    return hr;
-                reg_maps->clip_distance_mask |= mask;
-            }
-            else if (e->sysval_semantic == WINED3D_SV_CULL_DISTANCE)
-            {
-                if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
-                    return hr;
-                reg_maps->cull_distance_mask |= mask;
-            }
-        }
+        if (FAILED(hr = shader_scan_output_signature(shader)))
+            return hr;
     }
     else if (reg_maps->output_registers)
     {
@@ -1889,6 +1922,23 @@ static void shader_dump_sync_flags(struct wined3d_string_buffer *buffer, DWORD s
         shader_addline(buffer, "_unknown_flags(%#x)", sync_flags);
 }
 
+static void shader_dump_precise_flags(struct wined3d_string_buffer *buffer, DWORD flags)
+{
+    if (!(flags & WINED3DSI_PRECISE_XYZW))
+        return;
+
+    shader_addline(buffer, " [precise");
+    if (flags != WINED3DSI_PRECISE_XYZW)
+    {
+        shader_addline(buffer, "(%s%s%s%s)",
+                flags & WINED3DSI_PRECISE_X ? "x" : "",
+                flags & WINED3DSI_PRECISE_Y ? "y" : "",
+                flags & WINED3DSI_PRECISE_Z ? "z" : "",
+                flags & WINED3DSI_PRECISE_W ? "w" : "");
+    }
+    shader_addline(buffer, "]");
+}
+
 static void shader_dump_uav_flags(struct wined3d_string_buffer *buffer, DWORD uav_flags)
 {
     if (uav_flags & WINED3DSUF_GLOBALLY_COHERENT)
@@ -2291,6 +2341,10 @@ static void shader_dump_register(struct wined3d_string_buffer *buffer,
             shader_addline(buffer, "null");
             break;
 
+        case WINED3DSPR_RASTERIZER:
+            shader_addline(buffer, "rasterizer");
+            break;
+
         case WINED3DSPR_RESOURCE:
             shader_addline(buffer, "t");
             break;
@@ -3052,6 +3106,10 @@ static void shader_trace_init(const struct wined3d_shader_frontend *fe, void *fe
             {
                 shader_dump_sync_flags(&buffer, ins.flags);
             }
+            else
+            {
+                shader_dump_precise_flags(&buffer, ins.flags);
+            }
 
             if (wined3d_shader_instruction_has_texel_offset(&ins))
                 shader_addline(&buffer, "(%d,%d,%d)", ins.texel_offset.u, ins.texel_offset.v, ins.texel_offset.w);
@@ -3266,11 +3324,11 @@ const struct wined3d_shader_backend_ops none_shader_backend =
 static HRESULT shader_set_function(struct wined3d_shader *shader, DWORD float_const_count,
         enum wined3d_shader_type type, unsigned int max_version)
 {
+    const struct wined3d_d3d_info *d3d_info = &shader->device->adapter->d3d_info;
     struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
     const struct wined3d_shader_frontend *fe;
-    HRESULT hr;
     unsigned int backend_version;
-    const struct wined3d_d3d_info *d3d_info = &shader->device->adapter->d3d_info;
+    HRESULT hr;
 
     TRACE("shader %p, float_const_count %u, type %#x, max_version %u.\n",
             shader, float_const_count, type, max_version);
@@ -3288,8 +3346,7 @@ static HRESULT shader_set_function(struct wined3d_shader *shader, DWORD float_co
         shader_trace_init(fe, shader->frontend_data);
 
     /* Second pass: figure out which registers are used, what the semantics are, etc. */
-    if (FAILED(hr = shader_get_registers_used(shader, fe, reg_maps, &shader->input_signature,
-            &shader->output_signature, float_const_count)))
+    if (FAILED(hr = shader_get_registers_used(shader, float_const_count)))
         return hr;
 
     if (reg_maps->shader_version.type != type)
@@ -3591,9 +3648,6 @@ static HRESULT shader_init(struct wined3d_shader *shader, struct wined3d_device
     TRACE("byte_code %p, byte_code_size %#lx, format %#x, max_version %#x.\n",
             desc->byte_code, (long)desc->byte_code_size, desc->format, desc->max_version);
 
-    if (!desc->byte_code)
-        return WINED3DERR_INVALIDCALL;
-
     if (!(shader->frontend = shader_select_frontend(desc->format)))
     {
         FIXME("Unable to find frontend for shader.\n");
@@ -3668,25 +3722,39 @@ static HRESULT shader_init(struct wined3d_shader *shader, struct wined3d_device
         byte_code_size = (ptr - desc->byte_code) * sizeof(*ptr);
     }
 
-    if (!(shader->function = heap_alloc(byte_code_size)))
+    if (desc->byte_code && byte_code_size)
     {
-        shader_cleanup(shader);
-        return E_OUTOFMEMORY;
-    }
-    memcpy(shader->function, desc->byte_code, byte_code_size);
-    shader->functionLength = byte_code_size;
+        if (!(shader->function = heap_alloc(byte_code_size)))
+        {
+            shader_cleanup(shader);
+            return E_OUTOFMEMORY;
+        }
+        memcpy(shader->function, desc->byte_code, byte_code_size);
+        shader->functionLength = byte_code_size;
 
-    if (FAILED(hr = shader_set_function(shader, float_const_count, type, desc->max_version)))
+        if (FAILED(hr = shader_set_function(shader, float_const_count, type, desc->max_version)))
+        {
+            WARN("Failed to set function, hr %#x.\n", hr);
+            shader_cleanup(shader);
+            return hr;
+        }
+    }
+    else
     {
-        WARN("Failed to set function, hr %#x.\n", hr);
-        shader_cleanup(shader);
-        return hr;
+        shader->reg_maps.shader_version.type = type;
+        shader->reg_maps.shader_version.major = 4;
+        shader->reg_maps.shader_version.minor = 0;
+        shader_set_limits(shader);
+
+        if (FAILED(hr = shader_scan_output_signature(shader)))
+        {
+            shader_cleanup(shader);
+            return hr;
+        }
     }
 
     shader->load_local_constsF = shader->lconst_inf_or_nan;
 
-    wined3d_cs_init_object(shader->device->cs, wined3d_shader_init_object, shader);
-
     return hr;
 }
 
@@ -3723,20 +3791,40 @@ static HRESULT geometry_shader_init(struct wined3d_shader *shader, struct wined3
         const struct wined3d_shader_desc *desc, const struct wined3d_stream_output_desc *so_desc,
         void *parent, const struct wined3d_parent_ops *parent_ops)
 {
-    struct wined3d_stream_output_element *elements = NULL;
+    struct wined3d_shader_desc shader_desc = *desc;
+    struct wined3d_stream_output_element *elements;
+    enum wined3d_shader_type shader_type;
     HRESULT hr;
 
-    if (so_desc && !(elements = heap_calloc(so_desc->element_count, sizeof(*elements))))
-        return E_OUTOFMEMORY;
-
-    if (FAILED(hr = shader_init(shader, device, desc, 0, WINED3D_SHADER_TYPE_GEOMETRY, parent, parent_ops)))
+    if (so_desc)
     {
-        heap_free(elements);
-        return hr;
+        shader_type = shader_get_shader_type(desc);
+        switch (shader_type)
+        {
+            case WINED3D_SHADER_TYPE_VERTEX:
+                shader_desc.byte_code = NULL;
+                shader_desc.byte_code_size = 0;
+                break;
+            case WINED3D_SHADER_TYPE_DOMAIN:
+                FIXME("Stream output not supported for %s.\n", debug_shader_type(shader_type));
+                return E_NOTIMPL;
+            default:
+                break;
+        }
     }
 
+    if (FAILED(hr = shader_init(shader, device, &shader_desc, 0,
+            WINED3D_SHADER_TYPE_GEOMETRY, parent, parent_ops)))
+        return hr;
+
     if (so_desc)
     {
+        if (!(elements = heap_calloc(so_desc->element_count, sizeof(*elements))))
+        {
+            shader_cleanup(shader);
+            return E_OUTOFMEMORY;
+        }
+
         shader->u.gs.so_desc = *so_desc;
         shader->u.gs.so_desc.elements = elements;
         memcpy(elements, so_desc->elements, so_desc->element_count * sizeof(*elements));
@@ -3776,6 +3864,9 @@ void find_gs_compile_args(const struct wined3d_state *state, const struct wined3
 
     args->output_count = pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
 
+    if (!(args->primitive_type = shader->u.gs.input_type))
+        args->primitive_type = d3d_primitive_type_from_gl(state->gl_primitive_type);
+
     init_interpolation_compile_args(args->interpolation_mode, pixel_shader, gl_info);
 }
 
@@ -3873,8 +3964,7 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3
             switch (texture->target)
             {
                 /* RECT textures are distinguished from 2D textures via np2_fixup */
-                case GL_TEXTURE_RECTANGLE_ARB:
-                case GL_TEXTURE_2D:
+                default:
                     break;
 
                 case GL_TEXTURE_3D:
@@ -3926,16 +4016,16 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3
     if (shader->reg_maps.shader_version.major >= 3)
     {
         if (position_transformed)
-            args->vp_mode = pretransformed;
+            args->vp_mode = WINED3D_VP_MODE_NONE;
         else if (use_vs(state))
-            args->vp_mode = vertexshader;
+            args->vp_mode = WINED3D_VP_MODE_SHADER;
         else
-            args->vp_mode = fixedfunction;
+            args->vp_mode = WINED3D_VP_MODE_FF;
         args->fog = WINED3D_FFP_PS_FOG_OFF;
     }
     else
     {
-        args->vp_mode = vertexshader;
+        args->vp_mode = WINED3D_VP_MODE_SHADER;
         if (state->render_states[WINED3D_RS_FOGENABLE])
         {
             switch (state->render_states[WINED3D_RS_FOGTABLEMODE])
@@ -4110,6 +4200,9 @@ HRESULT CDECL wined3d_shader_create_cs(struct wined3d_device *device, const stru
     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
             device, desc, parent, parent_ops, shader);
 
+    if (!desc->byte_code)
+        return WINED3DERR_INVALIDCALL;
+
     if (!(object = heap_alloc_zero(sizeof(*object))))
         return E_OUTOFMEMORY;
 
@@ -4120,6 +4213,8 @@ HRESULT CDECL wined3d_shader_create_cs(struct wined3d_device *device, const stru
         return hr;
     }
 
+    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
+
     TRACE("Created compute shader %p.\n", object);
     *shader = object;
 
@@ -4135,6 +4230,9 @@ HRESULT CDECL wined3d_shader_create_ds(struct wined3d_device *device, const stru
     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
             device, desc, parent, parent_ops, shader);
 
+    if (!desc->byte_code)
+        return WINED3DERR_INVALIDCALL;
+
     if (!(object = heap_alloc_zero(sizeof(*object))))
         return E_OUTOFMEMORY;
 
@@ -4145,6 +4243,8 @@ HRESULT CDECL wined3d_shader_create_ds(struct wined3d_device *device, const stru
         return hr;
     }
 
+    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
+
     TRACE("Created domain shader %p.\n", object);
     *shader = object;
 
@@ -4161,6 +4261,9 @@ HRESULT CDECL wined3d_shader_create_gs(struct wined3d_device *device, const stru
     TRACE("device %p, desc %p, so_desc %p, parent %p, parent_ops %p, shader %p.\n",
             device, desc, so_desc, parent, parent_ops, shader);
 
+    if (!desc->byte_code)
+        return WINED3DERR_INVALIDCALL;
+
     if (!(object = heap_alloc_zero(sizeof(*object))))
         return E_OUTOFMEMORY;
 
@@ -4171,6 +4274,8 @@ HRESULT CDECL wined3d_shader_create_gs(struct wined3d_device *device, const stru
         return hr;
     }
 
+    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
+
     TRACE("Created geometry shader %p.\n", object);
     *shader = object;
 
@@ -4186,6 +4291,9 @@ HRESULT CDECL wined3d_shader_create_hs(struct wined3d_device *device, const stru
     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
             device, desc, parent, parent_ops, shader);
 
+    if (!desc->byte_code)
+        return WINED3DERR_INVALIDCALL;
+
     if (!(object = heap_alloc_zero(sizeof(*object))))
         return E_OUTOFMEMORY;
 
@@ -4196,6 +4304,8 @@ HRESULT CDECL wined3d_shader_create_hs(struct wined3d_device *device, const stru
         return hr;
     }
 
+    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
+
     TRACE("Created hull shader %p.\n", object);
     *shader = object;
 
@@ -4211,6 +4321,9 @@ HRESULT CDECL wined3d_shader_create_ps(struct wined3d_device *device, const stru
     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
             device, desc, parent, parent_ops, shader);
 
+    if (!desc->byte_code)
+        return WINED3DERR_INVALIDCALL;
+
     if (!(object = heap_alloc_zero(sizeof(*object))))
         return E_OUTOFMEMORY;
 
@@ -4221,6 +4334,8 @@ HRESULT CDECL wined3d_shader_create_ps(struct wined3d_device *device, const stru
         return hr;
     }
 
+    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
+
     TRACE("Created pixel shader %p.\n", object);
     *shader = object;
 
@@ -4236,6 +4351,9 @@ HRESULT CDECL wined3d_shader_create_vs(struct wined3d_device *device, const stru
     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
             device, desc, parent, parent_ops, shader);
 
+    if (!desc->byte_code)
+        return WINED3DERR_INVALIDCALL;
+
     if (!(object = heap_alloc_zero(sizeof(*object))))
         return E_OUTOFMEMORY;
 
@@ -4246,6 +4364,8 @@ HRESULT CDECL wined3d_shader_create_vs(struct wined3d_device *device, const stru
         return hr;
     }
 
+    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
+
     TRACE("Created vertex shader %p.\n", object);
     *shader = object;