[D3D8][D3D9][DDRAW][D3DCOMPILER_43][WINED3D] Revert to Wine Staging 3.3
[reactos.git] / dll / directx / wine / wined3d / arb_program_shader.c
index 4fbb3a5..f86c167 100644 (file)
@@ -611,7 +611,7 @@ static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *g
     unsigned char i;
 
     /* Upload the position fixup */
-    shader_get_position_fixup(context, state, 1, position_fixup);
+    shader_get_position_fixup(context, state, position_fixup);
     GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup));
 
     if (!gl_shader->num_int_consts) return;
@@ -1061,7 +1061,7 @@ static void shader_arb_get_register_name(const struct wined3d_shader_instruction
                             /* This is better than nothing for now */
                             sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
                         }
-                        else if(ctx->cur_ps_args->super.vp_mode != WINED3D_VP_MODE_SHADER)
+                        else if(ctx->cur_ps_args->super.vp_mode != vertexshader)
                         {
                             /* This is problematic because we'd have to consult the ctx->ps_input strings
                              * for where to find the varying. Some may be "0.0", others can be texcoords or
@@ -1416,6 +1416,9 @@ static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD
     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
     const char *mod;
     BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
+    const struct wined3d_shader *shader;
+    const struct wined3d_device *device;
+    const struct wined3d_gl_info *gl_info;
     const char *tex_dst = dst_str;
     struct color_fixup_masks masks;
 
@@ -1429,8 +1432,12 @@ static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD
             break;
 
         case WINED3D_SHADER_RESOURCE_TEXTURE_2D:
+            shader = ins->ctx->shader;
+            device = shader->device;
+            gl_info = &device->adapter->gl_info;
+
             if (pshader && priv->cur_ps_args->super.np2_fixup & (1u << sampler_idx)
-                    && ins->ctx->gl_info->supported[ARB_TEXTURE_RECTANGLE])
+                    && gl_info->supported[ARB_TEXTURE_RECTANGLE])
                 tex_type = "RECT";
             else
                 tex_type = "2D";
@@ -3415,68 +3422,73 @@ static void init_ps_input(const struct wined3d_shader *shader,
     const char *semantic_name;
     DWORD semantic_idx;
 
-    if (args->super.vp_mode == WINED3D_VP_MODE_SHADER)
-    {
-        /* That one is easy. The vertex shaders provide v0-v7 in
-         * fragment.texcoord and v8 and v9 in fragment.color. */
-        for (i = 0; i < 8; ++i)
-        {
-            priv->ps_input[i] = texcoords[i];
-        }
-        priv->ps_input[8] = "fragment.color.primary";
-        priv->ps_input[9] = "fragment.color.secondary";
-        return;
-    }
+    switch(args->super.vp_mode)
+    {
+        case pretransformed:
+        case fixedfunction:
+            /* The pixelshader has to collect the varyings on its own. In any case properly load
+             * color0 and color1. In the case of pretransformed vertices also load texcoords. Set
+             * other attribs to 0.0.
+             *
+             * For fixedfunction this behavior is correct, according to the tests. For pretransformed
+             * we'd either need a replacement shader that can load other attribs like BINORMAL, or
+             * load the texcoord attrib pointers to match the pixel shader signature
+             */
+            for (i = 0; i < shader->input_signature.element_count; ++i)
+            {
+                input = &shader->input_signature.elements[i];
+                if (!(semantic_name = input->semantic_name))
+                    continue;
+                semantic_idx = input->semantic_idx;
 
-    /* The fragment shader has to collect the varyings on its own. In any case
-     * properly load color0 and color1. In the case of pre-transformed
-     * vertices also load texture coordinates. Set other attributes to 0.0.
-     *
-     * For fixed-function this behavior is correct, according to the tests.
-     * For pre-transformed we'd either need a replacement shader that can load
-     * other attributes like BINORMAL, or load the texture coordinate
-     * attribute pointers to match the fragment shader signature. */
-    for (i = 0; i < shader->input_signature.element_count; ++i)
-    {
-        input = &shader->input_signature.elements[i];
-        if (!(semantic_name = input->semantic_name))
-            continue;
-        semantic_idx = input->semantic_idx;
+                if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR))
+                {
+                    if (!semantic_idx)
+                        priv->ps_input[input->register_idx] = "fragment.color.primary";
+                    else if (semantic_idx == 1)
+                        priv->ps_input[input->register_idx] = "fragment.color.secondary";
+                    else
+                        priv->ps_input[input->register_idx] = "0.0";
+                }
+                else if (args->super.vp_mode == fixedfunction)
+                {
+                    priv->ps_input[input->register_idx] = "0.0";
+                }
+                else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
+                {
+                    if (semantic_idx < 8)
+                        priv->ps_input[input->register_idx] = texcoords[semantic_idx];
+                    else
+                        priv->ps_input[input->register_idx] = "0.0";
+                }
+                else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG))
+                {
+                    if (!semantic_idx)
+                        priv->ps_input[input->register_idx] = "fragment.fogcoord";
+                    else
+                        priv->ps_input[input->register_idx] = "0.0";
+                }
+                else
+                {
+                    priv->ps_input[input->register_idx] = "0.0";
+                }
 
-        if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR))
-        {
-            if (!semantic_idx)
-                priv->ps_input[input->register_idx] = "fragment.color.primary";
-            else if (semantic_idx == 1)
-                priv->ps_input[input->register_idx] = "fragment.color.secondary";
-            else
-                priv->ps_input[input->register_idx] = "0.0";
-        }
-        else if (args->super.vp_mode == WINED3D_VP_MODE_FF)
-        {
-            priv->ps_input[input->register_idx] = "0.0";
-        }
-        else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
-        {
-            if (semantic_idx < 8)
-                priv->ps_input[input->register_idx] = texcoords[semantic_idx];
-            else
-                priv->ps_input[input->register_idx] = "0.0";
-        }
-        else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG))
-        {
-            if (!semantic_idx)
-                priv->ps_input[input->register_idx] = "fragment.fogcoord";
-            else
-                priv->ps_input[input->register_idx] = "0.0";
-        }
-        else
-        {
-            priv->ps_input[input->register_idx] = "0.0";
-        }
+                TRACE("v%u, semantic %s%u is %s\n", input->register_idx,
+                        semantic_name, semantic_idx, priv->ps_input[input->register_idx]);
+            }
+            break;
 
-        TRACE("v%u, semantic %s%u is %s\n", input->register_idx,
-                semantic_name, semantic_idx, priv->ps_input[input->register_idx]);
+        case vertexshader:
+            /* That one is easy. The vertex shaders provide v0-v7 in fragment.texcoord and v8 and v9 in
+             * fragment.color
+             */
+            for(i = 0; i < 8; i++)
+            {
+                priv->ps_input[i] = texcoords[i];
+            }
+            priv->ps_input[8] = "fragment.color.primary";
+            priv->ps_input[9] = "fragment.color.secondary";
+            break;
     }
 }
 
@@ -6072,19 +6084,17 @@ static const char *get_argreg(struct wined3d_string_buffer *buffer, DWORD argnum
 }
 
 static void gen_ffp_instr(struct wined3d_string_buffer *buffer, unsigned int stage, BOOL color,
-        BOOL alpha, BOOL tmp_dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2)
+        BOOL alpha, DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2)
 {
     const char *dstmask, *dstreg, *arg0, *arg1, *arg2;
     unsigned int mul = 1;
 
-    if (color && alpha)
-        dstmask = "";
-    else if (color)
-        dstmask = ".xyz";
-    else
-        dstmask = ".w";
+    if(color && alpha) dstmask = "";
+    else if(color) dstmask = ".xyz";
+    else dstmask = ".w";
 
-    dstreg = tmp_dst ? "tempreg" : "ret";
+    if(dst == tempreg) dstreg = "tempreg";
+    else dstreg = "ret";
 
     arg0 = get_argreg(buffer, 0, stage, dw_arg0);
     arg1 = get_argreg(buffer, 1, stage, dw_arg1);
@@ -6263,7 +6273,7 @@ static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, con
 
         if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE)
             tex_read |= 1u << stage;
-        if (settings->op[stage].tmp_dst)
+        if (settings->op[stage].dst == tempreg)
             tempreg_used = TRUE;
         if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP)
             tempreg_used = TRUE;
@@ -6381,17 +6391,12 @@ static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, con
 
         textype = arbfp_texture_target(settings->op[stage].tex_type);
 
-        if (settings->op[stage].projected == WINED3D_PROJECTION_NONE)
-        {
+        if(settings->op[stage].projected == proj_none) {
             instr = "TEX";
-        }
-        else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT4
-                || settings->op[stage].projected == WINED3D_PROJECTION_COUNT3)
-        {
+        } else if(settings->op[stage].projected == proj_count4 ||
+                  settings->op[stage].projected == proj_count3) {
             instr = "TXP";
-        }
-        else
-        {
+        } else {
             FIXME("Unexpected projection mode %d\n", settings->op[stage].projected);
             instr = "TXP";
         }
@@ -6405,27 +6410,18 @@ static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, con
             shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1);
             shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1);
 
-            /* With projective textures, texbem only divides the static
-             * texture coordinate, not the displacement, so multiply the
-             * displacement with the dividing parameter before passing it to
-             * TXP. */
-            if (settings->op[stage].projected != WINED3D_PROJECTION_NONE)
-            {
-                if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT4)
-                {
+            /* with projective textures, texbem only divides the static texture coord, not the displacement,
+             * so multiply the displacement with the dividing parameter before passing it to TXP
+             */
+            if (settings->op[stage].projected != proj_none) {
+                if(settings->op[stage].projected == proj_count4) {
                     shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage);
-                    shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n",
-                            stage, stage);
-                }
-                else
-                {
+                    shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage);
+                } else {
                     shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage);
-                    shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n",
-                            stage, stage);
+                    shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage);
                 }
-            }
-            else
-            {
+            } else {
                 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage);
             }
 
@@ -6437,16 +6433,12 @@ static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, con
                                stage - 1, stage - 1, stage - 1);
                 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage);
             }
-        }
-        else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3)
-        {
+        } else if(settings->op[stage].projected == proj_count3) {
             shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage);
             shader_addline(&buffer, "MOV ret.w, ret.z;\n");
             shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n",
                             instr, stage, stage, textype);
-        }
-        else
-        {
+        } else {
             shader_addline(&buffer, "%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n",
                             instr, stage, stage, stage, textype);
         }
@@ -6495,23 +6487,23 @@ static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, con
 
         if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
         {
-            gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].tmp_dst,
+            gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst,
                           settings->op[stage].cop, settings->op[stage].carg0,
                           settings->op[stage].carg1, settings->op[stage].carg2);
         }
         else if (op_equal)
         {
-            gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].tmp_dst,
+            gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].dst,
                           settings->op[stage].cop, settings->op[stage].carg0,
                           settings->op[stage].carg1, settings->op[stage].carg2);
         }
         else if (settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP
                 && settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP_LUMINANCE)
         {
-            gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].tmp_dst,
+            gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst,
                           settings->op[stage].cop, settings->op[stage].carg0,
                           settings->op[stage].carg1, settings->op[stage].carg2);
-            gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].tmp_dst,
+            gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].dst,
                           settings->op[stage].aop, settings->op[stage].aarg0,
                           settings->op[stage].aarg1, settings->op[stage].aarg2);
         }
@@ -6936,8 +6928,8 @@ static void arbfp_blitter_destroy(struct wined3d_blitter *blitter, struct wined3
     heap_free(arbfp_blitter);
 }
 
-static void gen_packed_yuv_read(struct wined3d_string_buffer *buffer,
-        const struct arbfp_blit_type *type, char *luminance)
+static BOOL gen_planar_yuv_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type,
+        char *luminance)
 {
     char chroma;
     const char *tex, *texinstr = "TXP";
@@ -6985,12 +6977,13 @@ static void gen_packed_yuv_read(struct wined3d_string_buffer *buffer,
     shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
     shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");
 
-    /* Multiply the x coordinate by 0.5 and get the fraction. This gives 0.25
-     * and 0.75 for the even and odd pixels respectively. */
+    /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the
+     * even and odd pixels respectively
+     */
     shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n");
     shader_addline(buffer, "FRC texcrd2, texcrd2;\n");
 
-    /* Sample Pixel 1. */
+    /* Sample Pixel 1 */
     shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
 
     /* Put the value into either of the chroma values */
@@ -7019,10 +7012,12 @@ static void gen_packed_yuv_read(struct wined3d_string_buffer *buffer,
 
     /* This gives the correctly filtered luminance value */
     shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex);
+
+    return TRUE;
 }
 
-static void gen_yv12_read(struct wined3d_string_buffer *buffer,
-        const struct arbfp_blit_type *type, char *luminance)
+static BOOL gen_yv12_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type,
+        char *luminance)
 {
     const char *tex;
     static const float yv12_coef[]
@@ -7084,6 +7079,7 @@ static void gen_yv12_read(struct wined3d_string_buffer *buffer,
      */
     if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
     {
+
         shader_addline(buffer, "RCP chroma.w, size.y;\n");
 
         shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n");
@@ -7091,7 +7087,7 @@ static void gen_yv12_read(struct wined3d_string_buffer *buffer,
         shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n");
         shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n");
 
-        /* Read odd lines from the right side (add size * 0.5 to the x coordinate). */
+        /* Read odd lines from the right side(add size * 0.5 to the x coordinate */
         shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
         shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
         shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
@@ -7105,11 +7101,11 @@ static void gen_yv12_read(struct wined3d_string_buffer *buffer,
     }
     else
     {
-        /* The y coordinate for V is in the range [size, size + size / 4). */
+        /* Read from [size - size+size/4] */
         shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
         shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n");
 
-        /* Read odd lines from the right side (add size * 0.5 to the x coordinate). */
+        /* Read odd lines from the right side(add size * 0.5 to the x coordinate */
         shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
         shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
         shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
@@ -7165,10 +7161,12 @@ static void gen_yv12_read(struct wined3d_string_buffer *buffer,
         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
     }
     *luminance = 'a';
+
+    return TRUE;
 }
 
-static void gen_nv12_read(struct wined3d_string_buffer *buffer,
-        const struct arbfp_blit_type *type, char *luminance)
+static BOOL gen_nv12_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type,
+        char *luminance)
 {
     const char *tex;
     static const float nv12_coef[]
@@ -7244,7 +7242,7 @@ static void gen_nv12_read(struct wined3d_string_buffer *buffer,
     }
     else
     {
-        /* The y coordinate for chroma is in the range [size, size + size / 2). */
+        /* Read from [size - size+size/2] */
         shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.y, size.y;\n");
 
         shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
@@ -7299,6 +7297,8 @@ static void gen_nv12_read(struct wined3d_string_buffer *buffer,
         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
     }
     *luminance = 'a';
+
+    return TRUE;
 }
 
 /* Context activation is done by the caller. */
@@ -7464,15 +7464,27 @@ static GLuint gen_yuv_shader(const struct wined3d_gl_info *gl_info, const struct
     {
         case COMPLEX_FIXUP_UYVY:
         case COMPLEX_FIXUP_YUY2:
-            gen_packed_yuv_read(&buffer, type, &luminance_component);
+            if (!gen_planar_yuv_read(&buffer, type, &luminance_component))
+            {
+                string_buffer_free(&buffer);
+                return 0;
+            }
             break;
 
         case COMPLEX_FIXUP_YV12:
-            gen_yv12_read(&buffer, type, &luminance_component);
+            if (!gen_yv12_read(&buffer, type, &luminance_component))
+            {
+                string_buffer_free(&buffer);
+                return 0;
+            }
             break;
 
         case COMPLEX_FIXUP_NV12:
-            gen_nv12_read(&buffer, type, &luminance_component);
+            if (!gen_nv12_read(&buffer, type, &luminance_component))
+            {
+                string_buffer_free(&buffer);
+                return 0;
+            }
             break;
 
         default:
@@ -7698,7 +7710,7 @@ static BOOL arbfp_blit_supported(enum wined3d_blit_op blit_op, const struct wine
     enum complex_fixup src_fixup;
     BOOL decompress;
 
-    if (src_resource->type != WINED3D_RTYPE_TEXTURE_2D)
+    if (!context->gl_info->supported[ARB_FRAGMENT_PROGRAM])
         return FALSE;
 
     if (blit_op == WINED3D_BLIT_OP_RAW_BLIT && dst_format->id == src_format->id)
@@ -7783,105 +7795,59 @@ static BOOL arbfp_blit_supported(enum wined3d_blit_op blit_op, const struct wine
 }
 
 static DWORD arbfp_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_op op,
-        struct wined3d_context *context, struct wined3d_texture *src_texture, unsigned int src_sub_resource_idx,
-        DWORD src_location, const RECT *src_rect, struct wined3d_texture *dst_texture,
-        unsigned int dst_sub_resource_idx, DWORD dst_location, const RECT *dst_rect,
+        struct wined3d_context *context, struct wined3d_surface *src_surface, DWORD src_location,
+        const RECT *src_rect, struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect,
         const struct wined3d_color_key *color_key, enum wined3d_texture_filter_type filter)
 {
+    unsigned int src_sub_resource_idx = surface_get_sub_resource_idx(src_surface);
+    struct wined3d_texture *src_texture = src_surface->container;
+    struct wined3d_texture *dst_texture = dst_surface->container;
     struct wined3d_device *device = dst_texture->resource.device;
-    struct wined3d_texture *staging_texture = NULL;
     struct wined3d_arbfp_blitter *arbfp_blitter;
     struct wined3d_color_key alpha_test_key;
     struct wined3d_blitter *next;
-    unsigned int src_level;
     RECT s, d;
 
-    TRACE("blitter %p, op %#x, context %p, src_texture %p, src_sub_resource_idx %u, src_location %s, src_rect %s, "
-            "dst_texture %p, dst_sub_resource_idx %u, dst_location %s, dst_rect %s, colour_key %p, filter %s.\n",
-            blitter, op, context, src_texture, src_sub_resource_idx, wined3d_debug_location(src_location),
-            wine_dbgstr_rect(src_rect), dst_texture, dst_sub_resource_idx, wined3d_debug_location(dst_location),
-            wine_dbgstr_rect(dst_rect), color_key, debug_d3dtexturefiltertype(filter));
-
     if (!arbfp_blit_supported(op, context, &src_texture->resource, src_location,
             &dst_texture->resource, dst_location))
     {
-        if (!(next = blitter->next))
-        {
-            ERR("No blitter to handle blit op %#x.\n", op);
-            return dst_location;
-        }
-
-        TRACE("Forwarding to blitter %p.\n", next);
-        return next->ops->blitter_blit(next, op, context, src_texture, src_sub_resource_idx, src_location,
-                src_rect, dst_texture, dst_sub_resource_idx, dst_location, dst_rect, color_key, filter);
+        if ((next = blitter->next))
+            return next->ops->blitter_blit(next, op, context, src_surface, src_location,
+                    src_rect, dst_surface, dst_location, dst_rect, color_key, filter);
     }
 
     arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter);
 
-    if (!(src_texture->resource.access & WINED3D_RESOURCE_ACCESS_GPU))
-    {
-        struct wined3d_resource_desc desc;
-        struct wined3d_box upload_box;
-        HRESULT hr;
-
-        TRACE("Source texture is not GPU accessible, creating a staging texture.\n");
-
-        src_level = src_sub_resource_idx % src_texture->level_count;
-        desc.resource_type = WINED3D_RTYPE_TEXTURE_2D;
-        desc.format = src_texture->resource.format->id;
-        desc.multisample_type = src_texture->resource.multisample_type;
-        desc.multisample_quality = src_texture->resource.multisample_quality;
-        desc.usage = WINED3DUSAGE_PRIVATE;
-        desc.access = WINED3D_RESOURCE_ACCESS_GPU;
-        desc.width = wined3d_texture_get_level_width(src_texture, src_level);
-        desc.height = wined3d_texture_get_level_height(src_texture, src_level);
-        desc.depth = 1;
-        desc.size = 0;
-
-        if (FAILED(hr = wined3d_texture_create(device, &desc, 1, 1, 0,
-                NULL, NULL, &wined3d_null_parent_ops, &staging_texture)))
-        {
-            ERR("Failed to create staging texture, hr %#x.\n", hr);
-            return dst_location;
-        }
-
-        wined3d_box_set(&upload_box, 0, 0, desc.width, desc.height, 0, desc.depth);
-        wined3d_texture_upload_from_texture(staging_texture, 0, 0, 0, 0,
-                src_texture, src_sub_resource_idx, &upload_box);
-
-        src_texture = staging_texture;
-        src_sub_resource_idx = 0;
-    }
-    else if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
-            && (src_texture->sub_resources[src_sub_resource_idx].locations
-            & (WINED3D_LOCATION_TEXTURE_RGB | WINED3D_LOCATION_DRAWABLE)) == WINED3D_LOCATION_DRAWABLE
+    /* Now load the surface */
+    if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
+            && (surface_get_sub_resource(src_surface)->locations
+            & (WINED3D_LOCATION_TEXTURE_RGB | WINED3D_LOCATION_DRAWABLE))
+            == WINED3D_LOCATION_DRAWABLE
             && !wined3d_resource_is_offscreen(&src_texture->resource))
     {
+        unsigned int src_level = src_sub_resource_idx % src_texture->level_count;
 
         /* Without FBO blits transferring from the drawable to the texture is
          * expensive, because we have to flip the data in sysmem. Since we can
          * flip in the blitter, we don't actually need that flip anyway. So we
          * use the surface's texture as scratch texture, and flip the source
          * rectangle instead. */
-        texture2d_load_fb_texture(src_texture, src_sub_resource_idx, FALSE, context);
+        surface_load_fb_texture(src_surface, FALSE, context);
 
         s = *src_rect;
-        src_level = src_sub_resource_idx % src_texture->level_count;
         s.top = wined3d_texture_get_level_height(src_texture, src_level) - s.top;
         s.bottom = wined3d_texture_get_level_height(src_texture, src_level) - s.bottom;
         src_rect = &s;
     }
     else
-    {
         wined3d_texture_load(src_texture, context, FALSE);
-    }
 
-    context_apply_ffp_blit_state(context, device);
+    context_apply_blit_state(context, device);
 
     if (dst_location == WINED3D_LOCATION_DRAWABLE)
     {
         d = *dst_rect;
-        wined3d_texture_translate_drawable_coords(dst_texture, context->win_handle, &d);
+        surface_translate_drawable_coords(dst_surface, context->win_handle, &d);
         dst_rect = &d;
     }
 
@@ -7891,16 +7857,15 @@ static DWORD arbfp_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_bl
 
         if (dst_location == WINED3D_LOCATION_DRAWABLE)
         {
-            TRACE("Destination texture %p is onscreen.\n", dst_texture);
+            TRACE("Destination surface %p is onscreen.\n", dst_surface);
             buffer = wined3d_texture_get_gl_buffer(dst_texture);
         }
         else
         {
-            TRACE("Destination texture %p is offscreen.\n", dst_texture);
+            TRACE("Destination surface %p is offscreen.\n", dst_surface);
             buffer = GL_COLOR_ATTACHMENT0;
         }
-        context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER,
-                &dst_texture->resource, dst_sub_resource_idx, NULL, 0, dst_location);
+        context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
         context_set_draw_buffer(context, buffer);
         context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
         context_invalidate_state(context, STATE_FRAMEBUFFER);
@@ -7917,16 +7882,14 @@ static DWORD arbfp_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_bl
     arbfp_blit_set(arbfp_blitter, context, src_texture, src_sub_resource_idx, color_key);
 
     /* Draw a textured quad */
-    context_draw_textured_quad(context, src_texture, src_sub_resource_idx, src_rect, dst_rect, filter);
+    draw_textured_quad(src_texture, src_sub_resource_idx, context, src_rect, dst_rect, filter);
 
     /* Leave the opengl state valid for blitting */
     arbfp_blit_unset(context->gl_info);
 
-    if (dst_texture->swapchain && (dst_texture->swapchain->front_buffer == dst_texture))
-        context->gl_info->gl_ops.gl.p_glFlush();
-
-    if (staging_texture)
-        wined3d_texture_decref(staging_texture);
+    if (wined3d_settings.strict_draw_ordering
+            || (dst_texture->swapchain && (dst_texture->swapchain->front_buffer == dst_texture)))
+        context->gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
 
     return dst_location;
 }