+
+struct glsl_blitter_args
+{
+ GLenum texture_type;
+ struct color_fixup_desc fixup;
+ unsigned short padding;
+};
+
+struct glsl_blitter_program
+{
+ struct wine_rb_entry entry;
+ struct glsl_blitter_args args;
+ GLuint id;
+};
+
+struct wined3d_glsl_blitter
+{
+ struct wined3d_blitter blitter;
+ struct wined3d_string_buffer_list string_buffers;
+ struct wine_rb_tree programs;
+ GLuint palette_texture;
+};
+
+static int glsl_blitter_args_compare(const void *key, const struct wine_rb_entry *entry)
+{
+ const struct glsl_blitter_args *a = key;
+ const struct glsl_blitter_args *b = &WINE_RB_ENTRY_VALUE(entry, const struct glsl_blitter_program, entry)->args;
+
+ return memcmp(a, b, sizeof(*a));
+}
+
+/* Context activation is done by the caller. */
+static void glsl_free_blitter_program(struct wine_rb_entry *entry, void *ctx)
+{
+ struct glsl_blitter_program *program = WINE_RB_ENTRY_VALUE(entry, struct glsl_blitter_program, entry);
+ struct wined3d_context *context = ctx;
+ const struct wined3d_gl_info *gl_info = context->gl_info;
+
+ GL_EXTCALL(glDeleteProgram(program->id));
+ checkGLcall("glDeleteProgram()");
+ heap_free(program);
+}
+
+/* Context activation is done by the caller. */
+static void glsl_blitter_destroy(struct wined3d_blitter *blitter, struct wined3d_context *context)
+{
+ const struct wined3d_gl_info *gl_info = context->gl_info;
+ struct wined3d_glsl_blitter *glsl_blitter;
+ struct wined3d_blitter *next;
+
+ if ((next = blitter->next))
+ next->ops->blitter_destroy(next, context);
+
+ glsl_blitter = CONTAINING_RECORD(blitter, struct wined3d_glsl_blitter, blitter);
+
+ if (glsl_blitter->palette_texture)
+ gl_info->gl_ops.gl.p_glDeleteTextures(1, &glsl_blitter->palette_texture);
+
+ wine_rb_destroy(&glsl_blitter->programs, glsl_free_blitter_program, context);
+ string_buffer_list_cleanup(&glsl_blitter->string_buffers);
+
+ heap_free(glsl_blitter);
+}
+
+static void glsl_blitter_generate_p8_shader(struct wined3d_string_buffer *buffer,
+ const struct wined3d_gl_info *gl_info, const struct glsl_blitter_args *args,
+ const char *output, const char *tex_type, const char *swizzle)
+{
+ shader_addline(buffer, "uniform sampler1D sampler_palette;\n");
+ shader_addline(buffer, "\nvoid main()\n{\n");
+ /* The alpha-component contains the palette index. */
+ shader_addline(buffer, " float index = texture%s(sampler, out_texcoord.%s).%c;\n",
+ needs_legacy_glsl_syntax(gl_info) ? tex_type : "", swizzle,
+ gl_info->supported[WINED3D_GL_LEGACY_CONTEXT] ? 'w' : 'x');
+ /* Scale the index by 255/256 and add a bias of 0.5 in order to sample in
+ * the middle. */
+ shader_addline(buffer, " index = (index * 255.0 + 0.5) / 256.0;\n");
+ shader_addline(buffer, " %s = texture%s(sampler_palette, index);\n",
+ output, needs_legacy_glsl_syntax(gl_info) ? "1D" : "");
+ shader_addline(buffer, "}\n");
+}
+
+static void gen_packed_yuv_read(struct wined3d_string_buffer *buffer,
+ const struct wined3d_gl_info *gl_info, const struct glsl_blitter_args *args,
+ const char *tex_type)
+{
+ enum complex_fixup complex_fixup = get_complex_fixup(args->fixup);
+ char chroma, luminance;
+ const char *tex;
+
+ /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit
+ * macropixel, giving effectively 16 bits per pixel. The color consists of
+ * a luminance(Y) and two chroma(U and V) values. Each macropixel has two
+ * luminance values, one for each single pixel it contains, and one U and
+ * one V value shared between both pixels.
+ *
+ * The data is loaded into an A8L8 texture. With YUY2, the luminance
+ * component contains the luminance and alpha the chroma. With UYVY it is
+ * vice versa. Thus take the format into account when generating the read
+ * swizzles
+ *
+ * Reading the Y value is straightforward - just sample the texture. The
+ * hardware takes care of filtering in the horizontal and vertical
+ * direction.
+ *
+ * Reading the U and V values is harder. We have to avoid filtering
+ * horizontally, because that would mix the U and V values of one pixel or
+ * two adjacent pixels. Thus floor the texture coordinate and add 0.5 to
+ * get an unfiltered read, regardless of the filtering setting. Vertical
+ * filtering works automatically though - the U and V values of two rows
+ * are mixed nicely.
+ *
+ * Apart of avoiding filtering issues, the code has to know which value it
+ * just read, and where it can find the other one. To determine this, it
+ * checks if it sampled an even or odd pixel, and shifts the 2nd read
+ * accordingly.
+ *
+ * Handling horizontal filtering of U and V values requires reading a 2nd
+ * pair of pixels, extracting U and V and mixing them. This is not
+ * implemented yet.
+ *
+ * An alternative implementation idea is to load the texture as A8R8G8B8
+ * texture, with width / 2. This way one read gives all 3 values, finding
+ * U and V is easy in an unfiltered situation. Finding the luminance on
+ * the other hand requires finding out if it is an odd or even pixel. The
+ * real drawback of this approach is filtering. This would have to be
+ * emulated completely in the shader, reading up two 2 packed pixels in up
+ * to 2 rows and interpolating both horizontally and vertically. Beyond
+ * that it would require adjustments to the texture handling code to deal
+ * with the width scaling. */
+
+ if (complex_fixup == COMPLEX_FIXUP_UYVY)
+ {
+ chroma = 'x';
+ luminance = gl_info->supported[WINED3D_GL_LEGACY_CONTEXT] ? 'w' : 'y';
+ }
+ else
+ {
+ chroma = gl_info->supported[WINED3D_GL_LEGACY_CONTEXT] ? 'w' : 'y';
+ luminance = 'x';
+ }
+
+ tex = needs_legacy_glsl_syntax(gl_info) ? tex_type : "";
+
+ /* First we have to read the chroma values. This means we need at least
+ * two pixels (no filtering), or 4 pixels (with filtering). To get the
+ * unmodified chroma, we have to rid ourselves of the filtering when we
+ * sample the texture. */
+ shader_addline(buffer, " texcoord.xy = out_texcoord.xy;\n");
+ /* We must not allow filtering between pixel x and x+1, this would mix U
+ * and V. Vertical filtering is ok. However, bear in mind that the pixel
+ * center is at 0.5, so add 0.5. */
+ shader_addline(buffer, " texcoord.x = (floor(texcoord.x * size.x) + 0.5) / size.x;\n");
+ shader_addline(buffer, " luminance = texture%s(sampler, texcoord.xy).%c;\n", tex, chroma);
+
+ /* Multiply the x coordinate by 0.5 and get the fraction. This gives 0.25
+ * and 0.75 for the even and odd pixels respectively. */
+ /* Put the value into either of the chroma values. */
+ shader_addline(buffer, " bool even = fract(texcoord.x * size.x * 0.5) < 0.5;\n");
+ shader_addline(buffer, " if (even)\n");
+ shader_addline(buffer, " chroma.y = luminance;\n");
+ shader_addline(buffer, " else\n");
+ shader_addline(buffer, " chroma.x = luminance;\n");
+
+ /* Sample pixel 2. If we read an even pixel, sample the pixel right to the
+ * current one. Otherwise, sample the left pixel. */
+ shader_addline(buffer, " texcoord.x += even ? 1.0 / size.x : -1.0 / size.x;\n");
+ shader_addline(buffer, " luminance = texture%s(sampler, texcoord.xy).%c;\n", tex, chroma);
+
+ /* Put the value into the other chroma. */
+ shader_addline(buffer, " if (even)\n");
+ shader_addline(buffer, " chroma.x = luminance;\n");
+ shader_addline(buffer, " else\n");
+ shader_addline(buffer, " chroma.y = luminance;\n");
+
+ /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of
+ * the current one and lerp the two U and V values. */
+
+ /* This gives the correctly filtered luminance value. */
+ shader_addline(buffer, " luminance = texture%s(sampler, out_texcoord.xy).%c;\n", tex, luminance);
+}
+
+static void gen_yv12_read(struct wined3d_string_buffer *buffer,
+ const struct wined3d_gl_info *gl_info, const char *tex_type)
+{
+ char component = gl_info->supported[WINED3D_GL_LEGACY_CONTEXT] ? 'w' : 'x';
+ const char *tex = needs_legacy_glsl_syntax(gl_info) ? tex_type : "";
+
+ /* YV12 surfaces contain a WxH sized luminance plane, followed by a
+ * (W/2)x(H/2) V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So
+ * the effective bitdepth is 12 bits per pixel. Since the U and V planes
+ * have only half the pitch of the luminance plane, the packing into the
+ * gl texture is a bit unfortunate. If the whole texture is interpreted as
+ * luminance data it looks approximately like this:
+ *
+ * +----------------------------------+----
+ * | |
+ * | |
+ * | |
+ * | |
+ * | | 2
+ * | LUMINANCE | -
+ * | | 3
+ * | |
+ * | |
+ * | |
+ * | |
+ * +----------------+-----------------+----
+ * | | |
+ * | V even rows | V odd rows |
+ * | | | 1
+ * +----------------+------------------ -
+ * | | | 3
+ * | U even rows | U odd rows |
+ * | | |
+ * +----------------+-----------------+----
+ * | | |
+ * | 0.5 | 0.5 |
+ *
+ * So it appears as if there are 4 chroma images, but in fact the odd rows
+ * in the chroma images are in the same row as the even ones. So it is
+ * kinda tricky to read. */
+
+ /* First sample the chroma values. */
+ shader_addline(buffer, " texcoord.xy = out_texcoord.xy;\n");
+ /* The chroma planes have only half the width. */
+ shader_addline(buffer, " texcoord.x *= 0.5;\n");
+
+ /* The first value is between 2/3 and 5/6 of the texture's height, so
+ * scale+bias the coordinate. Also read the right side of the image when
+ * reading odd lines.
+ *
+ * Don't forget to clamp the y values in into the range, otherwise we'll
+ * get filtering bleeding. */
+
+ /* Read odd lines from the right side (add 0.5 to the x coordinate). */
+ shader_addline(buffer, " if (fract(floor(texcoord.y * size.y) * 0.5 + 1.0 / 6.0) >= 0.5)\n");
+ shader_addline(buffer, " texcoord.x += 0.5;\n");
+
+ /* Clamp, keep the half pixel origin in mind. */
+ shader_addline(buffer, " texcoord.y = clamp(2.0 / 3.0 + texcoord.y / 6.0, "
+ "2.0 / 3.0 + 0.5 / size.y, 5.0 / 6.0 - 0.5 / size.y);\n");
+
+ shader_addline(buffer, " chroma.x = texture%s(sampler, texcoord.xy).%c;\n", tex, component);
+
+ /* The other chroma value is 1/6th of the texture lower, from 5/6th to
+ * 6/6th No need to clamp because we're just reusing the already clamped
+ * value from above. */
+ shader_addline(buffer, " texcoord.y += 1.0 / 6.0;\n");
+ shader_addline(buffer, " chroma.y = texture%s(sampler, texcoord.xy).%c;\n", tex, component);
+
+ /* Sample the luminance value. It is in the top 2/3rd of the texture, so
+ * scale the y coordinate. Clamp the y coordinate to prevent the chroma
+ * values from bleeding into the sampled luminance values due to
+ * filtering. */
+ shader_addline(buffer, " texcoord.xy = out_texcoord.xy;\n");
+ /* Multiply the y coordinate by 2/3 and clamp it. */
+ shader_addline(buffer, " texcoord.y = min(texcoord.y * 2.0 / 3.0, 2.0 / 3.0 - 0.5 / size.y);\n");
+ shader_addline(buffer, " luminance = texture%s(sampler, texcoord.xy).%c;\n", tex, component);
+}
+
+static void gen_nv12_read(struct wined3d_string_buffer *buffer,
+ const struct wined3d_gl_info *gl_info, const char *tex_type)
+{
+ char component = gl_info->supported[WINED3D_GL_LEGACY_CONTEXT] ? 'w' : 'x';
+ const char *tex = needs_legacy_glsl_syntax(gl_info) ? tex_type : "";
+
+ /* NV12 surfaces contain a WxH sized luminance plane, followed by a
+ * (W/2)x(H/2) sized plane where each component is an UV pair. So the
+ * effective bitdepth is 12 bits per pixel. If the whole texture is
+ * interpreted as luminance data it looks approximately like this:
+ *
+ * +----------------------------------+----
+ * | |
+ * | |
+ * | |
+ * | |
+ * | | 2
+ * | LUMINANCE | -
+ * | | 3
+ * | |
+ * | |
+ * | |
+ * | |
+ * +----------------------------------+----
+ * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
+ * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
+ * | | 1
+ * | | -
+ * | | 3
+ * | |
+ * | |
+ * +----------------------------------+---- */
+
+ /* First sample the chroma values. */
+ shader_addline(buffer, " texcoord.xy = out_texcoord.xy;\n");
+ /* We only have half the number of chroma pixels. */
+ shader_addline(buffer, " texcoord.x *= 0.5;\n");
+ shader_addline(buffer, " texcoord.y = (texcoord.y + 2.0) / 3.0;\n");
+
+ /* We must not allow filtering horizontally, this would mix U and V.
+ * Vertical filtering is ok. However, bear in mind that the pixel center
+ * is at 0.5, so add 0.5. */
+
+ /* Convert to non-normalised coordinates so we can find the individual
+ * pixel. */
+ shader_addline(buffer, " texcoord.x = floor(texcoord.x * size.x);\n");
+ /* Multiply by 2 since chroma components are stored in UV pixel pairs, add
+ * 0.5 to hit the center of the pixel. Then convert back to normalised
+ * coordinates. */
+ shader_addline(buffer, " texcoord.x = (texcoord.x * 2.0 + 0.5) / size.x;\n");
+ /* Clamp, keep the half pixel origin in mind. */
+ shader_addline(buffer, " texcoord.y = max(texcoord.y, 2.0 / 3.0 + 0.5 / size.y);\n");
+
+ shader_addline(buffer, " chroma.y = texture%s(sampler, texcoord.xy).%c;\n", tex, component);
+ /* Add 1.0 / size.x to sample the adjacent texel. */
+ shader_addline(buffer, " texcoord.x += 1.0 / size.x;\n");
+ shader_addline(buffer, " chroma.x = texture%s(sampler, texcoord.xy).%c;\n", tex, component);
+
+ /* Sample the luminance value. It is in the top 2/3rd of the texture, so
+ * scale the y coordinate. Clamp the y coordinate to prevent the chroma
+ * values from bleeding into the sampled luminance values due to
+ * filtering. */
+ shader_addline(buffer, " texcoord.xy = out_texcoord.xy;\n");
+ /* Multiply the y coordinate by 2/3 and clamp it. */
+ shader_addline(buffer, " texcoord.y = min(texcoord.y * 2.0 / 3.0, 2.0 / 3.0 - 0.5 / size.y);\n");
+ shader_addline(buffer, " luminance = texture%s(sampler, texcoord.xy).%c;\n", tex, component);
+}
+
+static void glsl_blitter_generate_yuv_shader(struct wined3d_string_buffer *buffer,
+ const struct wined3d_gl_info *gl_info, const struct glsl_blitter_args *args,
+ const char *output, const char *tex_type, const char *swizzle)
+{
+ enum complex_fixup complex_fixup = get_complex_fixup(args->fixup);
+
+ shader_addline(buffer, "const vec4 yuv_coef = vec4(1.403, -0.344, -0.714, 1.770);\n");
+ shader_addline(buffer, "float luminance;\n");
+ shader_addline(buffer, "vec2 texcoord;\n");
+ shader_addline(buffer, "vec2 chroma;\n");
+ shader_addline(buffer, "uniform vec2 size;\n");
+
+ shader_addline(buffer, "\nvoid main()\n{\n");
+
+ switch (complex_fixup)
+ {
+ case COMPLEX_FIXUP_UYVY:
+ case COMPLEX_FIXUP_YUY2:
+ gen_packed_yuv_read(buffer, gl_info, args, tex_type);
+ break;
+
+ case COMPLEX_FIXUP_YV12:
+ gen_yv12_read(buffer, gl_info, tex_type);
+ break;
+
+ case COMPLEX_FIXUP_NV12:
+ gen_nv12_read(buffer, gl_info, tex_type);
+ break;
+
+ default:
+ FIXME("Unsupported fixup %#x.\n", complex_fixup);
+ string_buffer_free(buffer);
+ return;
+ }
+
+ /* Calculate the final result. Formula is taken from
+ * http://www.fourcc.org/fccyvrgb.php. Note that the chroma
+ * ranges from -0.5 to 0.5. */
+ shader_addline(buffer, "\n chroma.xy -= 0.5;\n");
+
+ shader_addline(buffer, " %s.x = luminance + chroma.x * yuv_coef.x;\n", output);
+ shader_addline(buffer, " %s.y = luminance + chroma.y * yuv_coef.y + chroma.x * yuv_coef.z;\n", output);
+ shader_addline(buffer, " %s.z = luminance + chroma.y * yuv_coef.w;\n", output);
+
+ shader_addline(buffer, "}\n");
+}
+
+static void glsl_blitter_generate_plain_shader(struct wined3d_string_buffer *buffer,
+ const struct wined3d_gl_info *gl_info, const struct glsl_blitter_args *args,
+ const char *output, const char *tex_type, const char *swizzle)
+{
+ shader_addline(buffer, "\nvoid main()\n{\n");
+ shader_addline(buffer, " %s = texture%s(sampler, out_texcoord.%s);\n",
+ output, needs_legacy_glsl_syntax(gl_info) ? tex_type : "", swizzle);
+ shader_glsl_color_correction_ext(buffer, output, WINED3DSP_WRITEMASK_ALL, args->fixup);
+ shader_addline(buffer, "}\n");
+}
+
+/* Context activation is done by the caller. */
+static GLuint glsl_blitter_generate_program(struct wined3d_glsl_blitter *blitter,
+ const struct wined3d_gl_info *gl_info, const struct glsl_blitter_args *args)
+{
+ static const struct
+ {
+ GLenum texture_target;
+ const char texture_type[7];
+ const char texcoord_swizzle[4];
+ }
+ texture_data[] =
+ {
+ {GL_TEXTURE_2D, "2D", "xy"},
+ {GL_TEXTURE_CUBE_MAP, "Cube", "xyz"},
+ {GL_TEXTURE_RECTANGLE_ARB, "2DRect", "xy"},
+ };
+ static const char vshader_main[] =
+ "\n"
+ "void main()\n"
+ "{\n"
+ " gl_Position = vec4(pos, 0.0, 1.0);\n"
+ " out_texcoord = texcoord;\n"
+ "}\n";
+ enum complex_fixup complex_fixup = get_complex_fixup(args->fixup);
+ struct wined3d_string_buffer *buffer, *output;
+ GLuint program, vshader_id, fshader_id;
+ const char *tex_type, *swizzle, *ptr;
+ unsigned int i;
+ GLint loc;
+
+ for (i = 0; i < ARRAY_SIZE(texture_data); ++i)
+ {
+ if (args->texture_type == texture_data[i].texture_target)
+ {
+ tex_type = texture_data[i].texture_type;
+ swizzle = texture_data[i].texcoord_swizzle;
+ break;
+ }
+ }
+ if (i == ARRAY_SIZE(texture_data))
+ {
+ FIXME("Unsupported texture type %#x.\n", args->texture_type);
+ return 0;
+ }
+
+ program = GL_EXTCALL(glCreateProgram());
+
+ vshader_id = GL_EXTCALL(glCreateShader(GL_VERTEX_SHADER));
+
+ buffer = string_buffer_get(&blitter->string_buffers);
+ shader_glsl_add_version_declaration(buffer, gl_info);
+ shader_addline(buffer, "%s vec2 pos;\n", get_attribute_keyword(gl_info));
+ shader_addline(buffer, "%s vec3 texcoord;\n", get_attribute_keyword(gl_info));
+ declare_out_varying(gl_info, buffer, FALSE, "vec3 out_texcoord;\n");
+ shader_addline(buffer, vshader_main);
+
+ ptr = buffer->buffer;
+ GL_EXTCALL(glShaderSource(vshader_id, 1, &ptr, NULL));
+ GL_EXTCALL(glAttachShader(program, vshader_id));
+ GL_EXTCALL(glDeleteShader(vshader_id));
+
+ fshader_id = GL_EXTCALL(glCreateShader(GL_FRAGMENT_SHADER));
+
+ string_buffer_clear(buffer);
+ shader_glsl_add_version_declaration(buffer, gl_info);
+ shader_addline(buffer, "uniform sampler%s sampler;\n", tex_type);
+ declare_in_varying(gl_info, buffer, FALSE, "vec3 out_texcoord;\n");
+ if (!needs_legacy_glsl_syntax(gl_info))
+ shader_addline(buffer, "out vec4 ps_out[1];\n");
+
+ output = string_buffer_get(&blitter->string_buffers);
+ string_buffer_sprintf(output, "%s[0]", get_fragment_output(gl_info));
+
+ switch (complex_fixup)
+ {
+ case COMPLEX_FIXUP_P8:
+ glsl_blitter_generate_p8_shader(buffer, gl_info, args, output->buffer, tex_type, swizzle);
+ break;
+ case COMPLEX_FIXUP_YUY2:
+ case COMPLEX_FIXUP_UYVY:
+ case COMPLEX_FIXUP_YV12:
+ case COMPLEX_FIXUP_NV12:
+ glsl_blitter_generate_yuv_shader(buffer, gl_info, args, output->buffer, tex_type, swizzle);
+ break;
+ case COMPLEX_FIXUP_NONE:
+ glsl_blitter_generate_plain_shader(buffer, gl_info, args, output->buffer, tex_type, swizzle);
+ }
+
+ string_buffer_release(&blitter->string_buffers, output);
+
+ ptr = buffer->buffer;
+ GL_EXTCALL(glShaderSource(fshader_id, 1, &ptr, NULL));
+ string_buffer_release(&blitter->string_buffers, buffer);
+ GL_EXTCALL(glAttachShader(program, fshader_id));
+ GL_EXTCALL(glDeleteShader(fshader_id));
+
+ GL_EXTCALL(glBindAttribLocation(program, 0, "pos"));
+ GL_EXTCALL(glBindAttribLocation(program, 1, "texcoord"));
+
+ if (!needs_legacy_glsl_syntax(gl_info))
+ GL_EXTCALL(glBindFragDataLocation(program, 0, "ps_out"));
+
+ GL_EXTCALL(glCompileShader(vshader_id));
+ print_glsl_info_log(gl_info, vshader_id, FALSE);
+ GL_EXTCALL(glCompileShader(fshader_id));
+ print_glsl_info_log(gl_info, fshader_id, FALSE);
+ GL_EXTCALL(glLinkProgram(program));
+ shader_glsl_validate_link(gl_info, program);
+
+ GL_EXTCALL(glUseProgram(program));
+ loc = GL_EXTCALL(glGetUniformLocation(program, "sampler"));
+ GL_EXTCALL(glUniform1i(loc, 0));
+ if (complex_fixup == COMPLEX_FIXUP_P8)
+ {
+ loc = GL_EXTCALL(glGetUniformLocation(program, "sampler_palette"));
+ GL_EXTCALL(glUniform1i(loc, 1));
+ }
+
+ return program;
+}
+
+/* Context activation is done by the caller. */
+static void glsl_blitter_upload_palette(struct wined3d_glsl_blitter *blitter,
+ struct wined3d_context *context, const struct wined3d_texture *texture)
+{
+ const struct wined3d_gl_info *gl_info = context->gl_info;
+ const struct wined3d_palette *palette;
+
+ palette = texture->swapchain ? texture->swapchain->palette : NULL;
+
+ if (!blitter->palette_texture)
+ gl_info->gl_ops.gl.p_glGenTextures(1, &blitter->palette_texture);
+
+ context_active_texture(context, gl_info, 1);
+ gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_1D, blitter->palette_texture);
+ gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+
+ if (palette)
+ {
+ gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 256, 0, GL_BGRA,
+ GL_UNSIGNED_INT_8_8_8_8_REV, palette->colors);
+ }
+ else
+ {
+ static const DWORD black;
+
+ FIXME("P8 texture loaded without a palette.\n");
+ gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 1, 0, GL_BGRA,
+ GL_UNSIGNED_INT_8_8_8_8_REV, &black);
+ }
+
+ context_active_texture(context, gl_info, 0);
+}
+
+/* Context activation is done by the caller. */
+static struct glsl_blitter_program *glsl_blitter_get_program(struct wined3d_glsl_blitter *blitter,
+ struct wined3d_context *context, const struct wined3d_texture *texture)
+{
+ const struct wined3d_gl_info *gl_info = context->gl_info;
+ struct glsl_blitter_program *program;
+ struct glsl_blitter_args args;
+ struct wine_rb_entry *entry;
+
+ memset(&args, 0, sizeof(args));
+ args.texture_type = texture->target;
+ args.fixup = texture->resource.format->color_fixup;
+
+ if ((entry = wine_rb_get(&blitter->programs, &args)))
+ return WINE_RB_ENTRY_VALUE(entry, struct glsl_blitter_program, entry);
+
+ if (!(program = heap_alloc(sizeof(*program))))
+ {
+ ERR("Failed to allocate blitter program memory.\n");
+ return NULL;
+ }
+
+ program->args = args;
+ if (!(program->id = glsl_blitter_generate_program(blitter, gl_info, &args)))
+ {
+ WARN("Failed to generate blitter program.\n");
+ heap_free(program);
+ return NULL;
+ }
+
+ if (wine_rb_put(&blitter->programs, &program->args, &program->entry) == -1)
+ {
+ ERR("Failed to store blitter program.\n");
+ GL_EXTCALL(glDeleteProgram(program->id));
+ heap_free(program);
+ return NULL;
+ }
+
+ return program;
+}
+
+static BOOL glsl_blitter_supported(enum wined3d_blit_op blit_op, const struct wined3d_context *context,
+ const struct wined3d_texture *src_texture, DWORD src_location,
+ const struct wined3d_texture *dst_texture, DWORD dst_location)
+{
+ const struct wined3d_resource *src_resource = &src_texture->resource;
+ const struct wined3d_resource *dst_resource = &dst_texture->resource;
+ const struct wined3d_format *src_format = src_resource->format;
+ const struct wined3d_format *dst_format = dst_resource->format;
+ BOOL decompress;
+
+ if (blit_op == WINED3D_BLIT_OP_RAW_BLIT && dst_format->id == src_format->id)
+ {
+ if (dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
+ blit_op = WINED3D_BLIT_OP_DEPTH_BLIT;
+ else
+ blit_op = WINED3D_BLIT_OP_COLOR_BLIT;
+ }
+
+ if (blit_op != WINED3D_BLIT_OP_COLOR_BLIT)
+ {
+ TRACE("Unsupported blit_op %#x.\n", blit_op);
+ return FALSE;
+ }
+
+ if (src_resource->type != WINED3D_RTYPE_TEXTURE_2D)
+ return FALSE;
+
+ if (src_texture->target == GL_TEXTURE_2D_MULTISAMPLE
+ || dst_texture->target == GL_TEXTURE_2D_MULTISAMPLE
+ || src_texture->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY
+ || dst_texture->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+ {
+ TRACE("Multi-sample textures not supported.\n");
+ return FALSE;
+ }
+
+ /* We don't necessarily want to blit from resources without
+ * WINED3D_RESOURCE_ACCESS_GPU, but that may be the only way to decompress
+ * compressed textures. */
+ decompress = src_format && (src_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED)
+ && !(dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED);
+ if (!decompress && !(src_resource->access & dst_resource->access & WINED3D_RESOURCE_ACCESS_GPU))
+ {
+ TRACE("Source or destination resource does not have GPU access.\n");
+ return FALSE;
+ }
+
+ if (!is_identity_fixup(dst_format->color_fixup)
+ && (dst_format->id != src_format->id || dst_location != WINED3D_LOCATION_DRAWABLE))
+ {
+ TRACE("Destination fixups are not supported.\n");
+ return FALSE;
+ }
+
+ TRACE("Returning supported.\n");
+ return TRUE;
+}
+
+static DWORD glsl_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_op op,
+ struct wined3d_context *context, struct wined3d_texture *src_texture, unsigned int src_sub_resource_idx,
+ DWORD src_location, const RECT *src_rect, struct wined3d_texture *dst_texture,
+ unsigned int dst_sub_resource_idx, DWORD dst_location, const RECT *dst_rect,
+ const struct wined3d_color_key *colour_key, enum wined3d_texture_filter_type filter)
+{
+ struct wined3d_device *device = dst_texture->resource.device;
+ const struct wined3d_gl_info *gl_info = context->gl_info;
+ struct wined3d_texture *staging_texture = NULL;
+ struct wined3d_glsl_blitter *glsl_blitter;
+ struct glsl_blitter_program *program;
+ struct wined3d_blitter *next;
+ unsigned int src_level;
+ GLint location;
+ RECT s, d;
+
+ TRACE("blitter %p, op %#x, context %p, src_texture %p, src_sub_resource_idx %u, src_location %s, src_rect %s, "
+ "dst_texture %p, dst_sub_resource_idx %u, dst_location %s, dst_rect %s, colour_key %p, filter %s.\n",
+ blitter, op, context, src_texture, src_sub_resource_idx, wined3d_debug_location(src_location),
+ wine_dbgstr_rect(src_rect), dst_texture, dst_sub_resource_idx, wined3d_debug_location(dst_location),
+ wine_dbgstr_rect(dst_rect), colour_key, debug_d3dtexturefiltertype(filter));
+
+ if (!glsl_blitter_supported(op, context, src_texture, src_location, dst_texture, dst_location))
+ {
+ if (!(next = blitter->next))
+ {
+ ERR("No blitter to handle blit op %#x.\n", op);
+ return dst_location;
+ }
+
+ TRACE("Forwarding to blitter %p.\n", next);
+ return next->ops->blitter_blit(next, op, context, src_texture, src_sub_resource_idx, src_location,
+ src_rect, dst_texture, dst_sub_resource_idx, dst_location, dst_rect, colour_key, filter);
+ }
+
+ glsl_blitter = CONTAINING_RECORD(blitter, struct wined3d_glsl_blitter, blitter);
+
+ if (!(src_texture->resource.access & WINED3D_RESOURCE_ACCESS_GPU))
+ {
+ struct wined3d_resource_desc desc;
+ struct wined3d_box upload_box;
+ HRESULT hr;
+
+ TRACE("Source texture is not GPU accessible, creating a staging texture.\n");
+
+ src_level = src_sub_resource_idx % src_texture->level_count;
+ desc.resource_type = WINED3D_RTYPE_TEXTURE_2D;
+ desc.format = src_texture->resource.format->id;
+ desc.multisample_type = src_texture->resource.multisample_type;
+ desc.multisample_quality = src_texture->resource.multisample_quality;
+ desc.usage = WINED3DUSAGE_PRIVATE;
+ desc.access = WINED3D_RESOURCE_ACCESS_GPU;
+ desc.width = wined3d_texture_get_level_width(src_texture, src_level);
+ desc.height = wined3d_texture_get_level_height(src_texture, src_level);
+ desc.depth = 1;
+ desc.size = 0;
+
+ if (FAILED(hr = wined3d_texture_create(device, &desc, 1, 1, 0,
+ NULL, NULL, &wined3d_null_parent_ops, &staging_texture)))
+ {
+ ERR("Failed to create staging texture, hr %#x.\n", hr);
+ return dst_location;
+ }
+
+ wined3d_box_set(&upload_box, 0, 0, desc.width, desc.height, 0, desc.depth);
+ wined3d_texture_upload_from_texture(staging_texture, 0, 0, 0, 0,
+ src_texture, src_sub_resource_idx, &upload_box);
+
+ src_texture = staging_texture;
+ src_sub_resource_idx = 0;
+ }
+ else if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
+ && (src_texture->sub_resources[src_sub_resource_idx].locations
+ & (WINED3D_LOCATION_TEXTURE_RGB | WINED3D_LOCATION_DRAWABLE)) == WINED3D_LOCATION_DRAWABLE
+ && !wined3d_resource_is_offscreen(&src_texture->resource))
+ {
+
+ /* Without FBO blits transferring from the drawable to the texture is
+ * expensive, because we have to flip the data in sysmem. Since we can
+ * flip in the blitter, we don't actually need that flip anyway. So we
+ * use the surface's texture as scratch texture, and flip the source
+ * rectangle instead. */
+ texture2d_load_fb_texture(src_texture, src_sub_resource_idx, FALSE, context);
+
+ s = *src_rect;
+ src_level = src_sub_resource_idx % src_texture->level_count;
+ s.top = wined3d_texture_get_level_height(src_texture, src_level) - s.top;
+ s.bottom = wined3d_texture_get_level_height(src_texture, src_level) - s.bottom;
+ src_rect = &s;
+ }
+ else
+ {
+ wined3d_texture_load(src_texture, context, FALSE);
+ }
+
+ context_apply_blit_state(context, device);
+
+ if (dst_location == WINED3D_LOCATION_DRAWABLE)
+ {
+ d = *dst_rect;
+ wined3d_texture_translate_drawable_coords(dst_texture, context->win_handle, &d);
+ dst_rect = &d;
+ }
+
+ if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
+ {
+ GLenum buffer;
+
+ if (dst_location == WINED3D_LOCATION_DRAWABLE)
+ {
+ TRACE("Destination texture %p is onscreen.\n", dst_texture);
+ buffer = wined3d_texture_get_gl_buffer(dst_texture);
+ }
+ else
+ {
+ TRACE("Destination texture %p is offscreen.\n", dst_texture);
+ buffer = GL_COLOR_ATTACHMENT0;
+ }
+ context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER,
+ &dst_texture->resource, dst_sub_resource_idx, NULL, 0, dst_location);
+ context_set_draw_buffer(context, buffer);
+ context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
+ context_invalidate_state(context, STATE_FRAMEBUFFER);
+ }
+
+ if (!(program = glsl_blitter_get_program(glsl_blitter, context, src_texture)))
+ {
+ ERR("Failed to get blitter program.\n");
+ return dst_location;
+ }
+ GL_EXTCALL(glUseProgram(program->id));
+ switch (get_complex_fixup(program->args.fixup))
+ {
+ case COMPLEX_FIXUP_P8:
+ glsl_blitter_upload_palette(glsl_blitter, context, src_texture);
+ break;
+
+ case COMPLEX_FIXUP_YUY2:
+ case COMPLEX_FIXUP_UYVY:
+ case COMPLEX_FIXUP_YV12:
+ case COMPLEX_FIXUP_NV12:
+ src_level = src_sub_resource_idx % src_texture->level_count;
+ location = GL_EXTCALL(glGetUniformLocation(program->id, "size"));
+ GL_EXTCALL(glUniform2f(location, wined3d_texture_get_level_pow2_width(src_texture, src_level),
+ wined3d_texture_get_level_pow2_height(src_texture, src_level)));
+ break;
+
+ default:
+ break;
+ }
+ context_draw_shaded_quad(context, src_texture, src_sub_resource_idx, src_rect, dst_rect, filter);
+ GL_EXTCALL(glUseProgram(0));
+
+ if (dst_texture->swapchain && (dst_texture->swapchain->front_buffer == dst_texture))
+ gl_info->gl_ops.gl.p_glFlush();
+
+ if (staging_texture)
+ wined3d_texture_decref(staging_texture);
+
+ return dst_location;
+}
+
+static void glsl_blitter_clear(struct wined3d_blitter *blitter, struct wined3d_device *device,
+ unsigned int rt_count, const struct wined3d_fb_state *fb, unsigned int rect_count, const RECT *clear_rects,
+ const RECT *draw_rect, DWORD flags, const struct wined3d_color *color, float depth, DWORD stencil)
+{
+ struct wined3d_blitter *next;
+
+ if ((next = blitter->next))
+ next->ops->blitter_clear(next, device, rt_count, fb, rect_count,
+ clear_rects, draw_rect, flags, color, depth, stencil);
+}
+
+static const struct wined3d_blitter_ops glsl_blitter_ops =
+{
+ glsl_blitter_destroy,
+ glsl_blitter_clear,
+ glsl_blitter_blit,
+};
+
+struct wined3d_blitter *wined3d_glsl_blitter_create(struct wined3d_blitter **next,
+ const struct wined3d_device *device)
+{
+ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
+ struct wined3d_glsl_blitter *blitter;
+
+ if (device->shader_backend != &glsl_shader_backend)
+ return NULL;
+
+ if (!gl_info->supported[ARB_VERTEX_SHADER] || !gl_info->supported[ARB_FRAGMENT_SHADER])
+ return NULL;
+
+ if (!(blitter = heap_alloc(sizeof(*blitter))))
+ {
+ ERR("Failed to allocate blitter.\n");
+ return NULL;
+ }
+
+ TRACE("Created blitter %p.\n", blitter);
+
+ blitter->blitter.ops = &glsl_blitter_ops;
+ blitter->blitter.next = *next;
+ string_buffer_list_init(&blitter->string_buffers);
+ wine_rb_init(&blitter->programs, glsl_blitter_args_compare);
+ blitter->palette_texture = 0;
+ *next = &blitter->blitter;
+
+ return *next;
+}