GLboolean inputs_safe;
GLboolean outputs_safe;
GLboolean have_sse2;
-
+
struct x86_reg identity;
struct x86_reg chan0;
};
return p->identity;
}
-static void emit_load4f_4( struct x86_program *p,
+static void emit_load4f_4( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
sse_movups(&p->func, dest, arg0);
}
-static void emit_load4f_3( struct x86_program *p,
+static void emit_load4f_3( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
sse_movlps(&p->func, dest, arg0);
}
-static void emit_load4f_2( struct x86_program *p,
+static void emit_load4f_2( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
sse_movlps(&p->func, dest, arg0);
}
-static void emit_load4f_1( struct x86_program *p,
+static void emit_load4f_1( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
-static void emit_load3f_3( struct x86_program *p,
+static void emit_load3f_3( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
*/
if (p->inputs_safe) {
sse_movups(&p->func, dest, arg0);
- }
+ }
else {
/* c 0 0 0
* c c c c
- * a b c c
+ * a b c c
*/
sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X));
}
}
-static void emit_load3f_2( struct x86_program *p,
+static void emit_load3f_2( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
emit_load4f_2(p, dest, arg0);
}
-static void emit_load3f_1( struct x86_program *p,
+static void emit_load3f_1( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
emit_load4f_1(p, dest, arg0);
}
-static void emit_load2f_2( struct x86_program *p,
+static void emit_load2f_2( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
sse_movlps(&p->func, dest, arg0);
}
-static void emit_load2f_1( struct x86_program *p,
+static void emit_load2f_1( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
emit_load4f_1(p, dest, arg0);
}
-static void emit_load1f_1( struct x86_program *p,
+static void emit_load1f_1( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
sse_movss(&p->func, dest, arg0);
}
-static void (*load[4][4])( struct x86_program *p,
+static void (*load[4][4])( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 ) = {
- { emit_load1f_1,
- emit_load1f_1,
- emit_load1f_1,
+ { emit_load1f_1,
+ emit_load1f_1,
+ emit_load1f_1,
emit_load1f_1 },
- { emit_load2f_1,
- emit_load2f_2,
- emit_load2f_2,
+ { emit_load2f_1,
+ emit_load2f_2,
+ emit_load2f_2,
emit_load2f_2 },
- { emit_load3f_1,
- emit_load3f_2,
- emit_load3f_3,
+ { emit_load3f_1,
+ emit_load3f_2,
+ emit_load3f_3,
emit_load3f_3 },
- { emit_load4f_1,
- emit_load4f_2,
- emit_load4f_3,
- emit_load4f_4 }
+ { emit_load4f_1,
+ emit_load4f_2,
+ emit_load4f_3,
+ emit_load4f_4 }
};
static void emit_load( struct x86_program *p,
load[sz-1][src_sz-1](p, dest, src);
}
-static void emit_store4f( struct x86_program *p,
+static void emit_store4f( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
sse_movups(&p->func, dest, arg0);
}
-static void emit_store3f( struct x86_program *p,
+static void emit_store3f( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
}
}
-static void emit_store2f( struct x86_program *p,
+static void emit_store2f( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
sse_movlps(&p->func, dest, arg0);
}
-static void emit_store1f( struct x86_program *p,
+static void emit_store1f( struct x86_program *p,
struct x86_reg dest,
struct x86_reg arg0 )
{
}
-static void (*store[4])( struct x86_program *p,
+static void (*store[4])( struct x86_program *p,
struct x86_reg dest,
- struct x86_reg arg0 ) =
+ struct x86_reg arg0 ) =
{
- emit_store1f,
- emit_store2f,
- emit_store3f,
- emit_store4f
+ emit_store1f,
+ emit_store2f,
+ emit_store3f,
+ emit_store4f
};
static void emit_store( struct x86_program *p,
* pull the stride value from memory each time).
*/
x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride));
-
- /* save new value of a[j].inputptr
+
+ /* save new value of a[j].inputptr
*/
x86_mov(&p->func, ptr_to_src, srcREG);
}
/* Lots of hardcoding
*
* EAX -- pointer to current output vertex
- * ECX -- pointer to current attribute
- *
+ * ECX -- pointer to current attribute
+ *
*/
static GLboolean build_vertex_emit( struct x86_program *p )
{
GLubyte *fixup, *label;
x86_init_func(&p->func);
-
+
/* Push a few regs?
*/
x86_push(&p->func, countEBP);
x86_cmp(&p->func, countEBP, srcECX);
fixup = x86_jcc_forward(&p->func, cc_E);
- /* Initialize destination register.
+ /* Initialize destination register.
*/
x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3));
x86_mov(&p->func, vtxESI, x86_make_disp(vtxESI, get_offset(ctx, &ctx->swtnl_context)));
vtxESI = x86_make_disp(vtxESI, get_offset(tnl, &tnl->clipspace));
-
+
/* Possibly load vp0, vp1 for viewport calcs:
*/
if (vtx->need_viewport) {
emit_store(p, dest, 4, temp);
update_src_ptr(p, srcECX, vtxESI, a);
break;
- case EMIT_2F_VIEWPORT:
+ case EMIT_2F_VIEWPORT:
get_src_ptr(p, srcECX, vtxESI, a);
emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
sse_mulps(&p->func, temp, vp0);
emit_store(p, dest, 2, temp);
update_src_ptr(p, srcECX, vtxESI, a);
break;
- case EMIT_3F_VIEWPORT:
+ case EMIT_3F_VIEWPORT:
get_src_ptr(p, srcECX, vtxESI, a);
emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
sse_mulps(&p->func, temp, vp0);
emit_store(p, dest, 3, temp);
update_src_ptr(p, srcECX, vtxESI, a);
break;
- case EMIT_4F_VIEWPORT:
+ case EMIT_4F_VIEWPORT:
get_src_ptr(p, srcECX, vtxESI, a);
emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
sse_mulps(&p->func, temp, vp0);
update_src_ptr(p, srcECX, vtxESI, a);
break;
- case EMIT_1UB_1F:
+ case EMIT_1UB_1F:
/* Test for PAD3 + 1UB:
*/
if (j > 0 &&
_mesa_printf("unknown a[%d].format %d\n", j, a->format);
return GL_FALSE; /* catch any new opcodes */
}
-
+
/* Increment j by at least 1 - may have been incremented above also:
*/
j++;
/* decr count, loop if not zero
*/
x86_dec(&p->func, countEBP);
- x86_test(&p->func, countEBP, countEBP);
+ x86_test(&p->func, countEBP, countEBP);
x86_jcc(&p->func, cc_NZ, label);
/* Exit mmx state?
void _tnl_generate_sse_emit( GLcontext *ctx )
{
struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
- struct x86_program p;
+ struct x86_program p;
if (!cpu_has_xmm) {
vtx->codegen_emit = NULL;