mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 12:32:43 +00:00
rsx/interpreter: Improve instruction accuracy
- Fix DIV instruction - Add EXP_TEX modifier - Implement WPOS register read - Swap 3D and Cubemap enums to match RSX ids - Adds two extra instruction classes: flow control and packing control - Implement remaining FP instructions with exception of the rare projected texture lookups - Fix typo causing output color index > 0 to not work - Fix KIL instruction - Implement conditional vertex program writes
This commit is contained in:
parent
fc5b4026e1
commit
2ed50ba263
@ -818,7 +818,7 @@ namespace glsl
|
||||
case FUNCTION::FUNCTION_FRACT:
|
||||
return "fract($0)";
|
||||
case FUNCTION::FUNCTION_REFL:
|
||||
return "$Ty($0 - 2.0 * (dot($0, $1)) * $1)";
|
||||
return "reflect($0, $1)";
|
||||
case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D:
|
||||
return "TEX1D($_i, $0.x)";
|
||||
case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_BIAS:
|
||||
|
@ -142,11 +142,22 @@ const float modifier_scale[] = {1.f, 2.f, 4.f, 8.f, 1.f, 0.5f, 0.25f, 0.125f};
|
||||
|
||||
vec4 regs16[48];
|
||||
vec4 regs32[48];
|
||||
vec4 cc[2];
|
||||
vec4 cc[2] = { vec4(0.), vec4(0.) };
|
||||
int inst_length = 1;
|
||||
int ip = -1;
|
||||
instruction_t inst;
|
||||
|
||||
#ifdef WITH_FLOW_CTRL
|
||||
int test_addr = -1;
|
||||
int jump_addr = -1;
|
||||
int loop_start_addr = -1;
|
||||
int loop_end_addr = -1;
|
||||
int counter = 0;
|
||||
#endif
|
||||
|
||||
vec4 wpos = gl_FragCoord * vec4(abs(wpos_scale), wpos_scale, 1., 1.) + vec4(0., wpos_bias, 0., 0.);
|
||||
vec4 fogc = fetch_fog_value(fog_mode, in_regs[5]);
|
||||
|
||||
vec4 read_src(const in int index)
|
||||
{
|
||||
const uint type = GET_BITS(index + 1, 0, 2);
|
||||
@ -173,14 +184,13 @@ vec4 read_src(const in int index)
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
// TODO: wpos
|
||||
value = vec4(0.); break;
|
||||
value = wpos; break;
|
||||
case 1:
|
||||
value = gl_FrontFacing? in_regs[3] : in_regs[1]; break;
|
||||
case 2:
|
||||
value = gl_FrontFacing? in_regs[4] : in_regs[2]; break;
|
||||
case 3:
|
||||
value = fetch_fog_value(fog_mode, in_regs[5]); break;
|
||||
value = fogc; break;
|
||||
case 13:
|
||||
value = in_regs[6]; break;
|
||||
case 14:
|
||||
@ -223,6 +233,40 @@ vec4 read_cond()
|
||||
return shuffle(cc[GET_BITS(1, 31, 1)], GET_BITS(1, 21, 8));
|
||||
}
|
||||
|
||||
#if defined(WITH_FLOW_CTRL) || defined(WITH_KIL)
|
||||
|
||||
bool check_cond()
|
||||
{
|
||||
const uint exec_mask = GET_BITS(1, 18, 3);
|
||||
if (exec_mask == 0x7)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
const vec4 cond = read_cond();
|
||||
switch (exec_mask)
|
||||
{
|
||||
case EXEC_GT | EXEC_EQ:
|
||||
return any(greaterThanEqual(cond, vec4(0.)));
|
||||
case EXEC_LT | EXEC_EQ:
|
||||
return any(lessThanEqual(cond, vec4(0.)));
|
||||
case EXEC_LT | EXEC_GT:
|
||||
return any(notEqual(cond, vec4(0.)));
|
||||
case EXEC_GT:
|
||||
return any(greaterThan(cond, vec4(0.)));
|
||||
case EXEC_LT:
|
||||
return any(lessThan(cond, vec4(0.)));
|
||||
case EXEC_EQ:
|
||||
return any(equal(cond, vec4(0.)));
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef WITH_TEXTURES
|
||||
|
||||
vec4 _texture(in vec4 coord, float bias)
|
||||
@ -236,19 +280,25 @@ vec4 _texture(in vec4 coord, float bias)
|
||||
const uint type = bitfieldExtract(texture_control, int(tex_num + tex_num), 2);
|
||||
coord.xy *= texture_parameters[tex_num].scale;
|
||||
|
||||
vec4 value;
|
||||
switch (type)
|
||||
{
|
||||
case 0:
|
||||
return texture(SAMPLER1D(tex_num), coord.x, bias);
|
||||
value = texture(SAMPLER1D(tex_num), coord.x, bias); break;
|
||||
case 1:
|
||||
return texture(SAMPLER2D(tex_num), coord.xy, bias);
|
||||
value = texture(SAMPLER2D(tex_num), coord.xy, bias); break;
|
||||
case 2:
|
||||
return texture(SAMPLER3D(tex_num), coord.xyz, bias);
|
||||
value = texture(SAMPLERCUBE(tex_num), coord.xyz, bias); break;
|
||||
case 3:
|
||||
return texture(SAMPLERCUBE(tex_num), coord.xyz, bias);
|
||||
value = texture(SAMPLER3D(tex_num), coord.xyz, bias); break;
|
||||
}
|
||||
|
||||
return vec4(0.);
|
||||
if (TEST_BIT(0, 21))
|
||||
{
|
||||
value = fma(value, vec4(2.), vec4(-1.));
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
vec4 _textureLod(in vec4 coord, float lod)
|
||||
@ -262,19 +312,25 @@ vec4 _textureLod(in vec4 coord, float lod)
|
||||
const uint type = bitfieldExtract(texture_control, int(tex_num + tex_num), 2);
|
||||
coord.xy *= texture_parameters[tex_num].scale;
|
||||
|
||||
vec4 value;
|
||||
switch (type)
|
||||
{
|
||||
case 0:
|
||||
return textureLod(SAMPLER1D(tex_num), coord.x, lod);
|
||||
value = textureLod(SAMPLER1D(tex_num), coord.x, lod); break;
|
||||
case 1:
|
||||
return textureLod(SAMPLER2D(tex_num), coord.xy, lod);
|
||||
value = textureLod(SAMPLER2D(tex_num), coord.xy, lod); break;
|
||||
case 2:
|
||||
return textureLod(SAMPLER3D(tex_num), coord.xyz, lod);
|
||||
value = textureLod(SAMPLERCUBE(tex_num), coord.xyz, lod); break;
|
||||
case 3:
|
||||
return textureLod(SAMPLERCUBE(tex_num), coord.xyz, lod);
|
||||
value = textureLod(SAMPLER3D(tex_num), coord.xyz, lod); break;
|
||||
}
|
||||
|
||||
return vec4(0.);
|
||||
if (TEST_BIT(0, 21))
|
||||
{
|
||||
value = fma(value, vec4(2.), vec4(-1.));
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -359,7 +415,9 @@ void initialize()
|
||||
regs16[j++] = vec4(0.);
|
||||
register_count--;
|
||||
}
|
||||
}
|
||||
})"
|
||||
|
||||
R"(
|
||||
|
||||
void main()
|
||||
{
|
||||
@ -374,6 +432,28 @@ void main()
|
||||
ip += inst_length;
|
||||
inst_length = 1;
|
||||
|
||||
#ifdef WITH_FLOW_CTRL
|
||||
if (ip == test_addr)
|
||||
{
|
||||
ip = jump_addr;
|
||||
test_addr = -1;
|
||||
jump_addr = -1;
|
||||
}
|
||||
else if (ip == loop_end_addr)
|
||||
{
|
||||
if (counter > 0)
|
||||
{
|
||||
counter--;
|
||||
ip = loop_start_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
loop_end_addr = -1;
|
||||
loop_start_addr = -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Decode instruction
|
||||
// endian swap + word swap
|
||||
inst.words =
|
||||
@ -383,6 +463,64 @@ void main()
|
||||
inst.opcode = GET_BITS(0, 24, 6);
|
||||
inst.end = TEST_BIT(0, 0);
|
||||
|
||||
#ifdef WITH_FLOW_CTRL
|
||||
if (TEST_BIT(2, 31))
|
||||
{
|
||||
// Flow control
|
||||
switch (inst.opcode | (1 << 6))
|
||||
{
|
||||
//case RSX_FP_OPCODE_CAL:
|
||||
// Function call not yet found in the wild for this hw class
|
||||
case RSX_FP_OPCODE_RET:
|
||||
inst.end = true;
|
||||
continue;
|
||||
case RSX_FP_OPCODE_IFE:
|
||||
if (check_cond())
|
||||
{
|
||||
// Go down IF path
|
||||
if (inst.words.z < inst.words.w)
|
||||
{
|
||||
test_addr = int(inst.words.z >> 2);
|
||||
jump_addr = int(inst.words.w >> 2);
|
||||
}
|
||||
// If simple IF..ENDIF, do nothing
|
||||
}
|
||||
else
|
||||
{
|
||||
// Go to ELSE path
|
||||
ip = int(inst.words.z >> 2);
|
||||
inst_length = 0;
|
||||
}
|
||||
continue;
|
||||
case RSX_FP_OPCODE_LOOP:
|
||||
case RSX_FP_OPCODE_REP:
|
||||
if (check_cond())
|
||||
{
|
||||
counter = int(GET_BITS(2, 2, 8) - GET_BITS(2, 10, 8));
|
||||
counter /= int(GET_BITS(2, 19, 8));
|
||||
loop_start_addr = ip + 1;
|
||||
loop_end_addr = int(inst.words.w >> 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
ip = int(inst.words.w >> 2);
|
||||
inst_length = 0;
|
||||
}
|
||||
continue;
|
||||
case RSX_FP_OPCODE_BRK:
|
||||
if (loop_end_addr > 0)
|
||||
{
|
||||
ip = loop_end_addr;
|
||||
inst_length = 0;
|
||||
counter = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Class 1, no input/output
|
||||
switch (inst.opcode)
|
||||
{
|
||||
@ -390,8 +528,15 @@ void main()
|
||||
case RSX_FP_OPCODE_FENCT:
|
||||
case RSX_FP_OPCODE_FENCB:
|
||||
continue;
|
||||
#ifdef WITH_KIL
|
||||
case RSX_FP_OPCODE_KIL:
|
||||
discard; return;
|
||||
if (check_cond())
|
||||
{
|
||||
discard;
|
||||
return;
|
||||
}
|
||||
continue;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Class 2, 1 input
|
||||
@ -431,6 +576,31 @@ void main()
|
||||
#ifdef WITH_TEXTURES
|
||||
case RSX_FP_OPCODE_TEX:
|
||||
value = _texture(s0, 0.f); break;
|
||||
case RSX_FP_OPCODE_TXP:
|
||||
value = _texture(vec4(s0.xyz / s0.w, s0.w), 0.f); break;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_PACKING
|
||||
case RSX_FP_OPCODE_PK2:
|
||||
value = vec4(uintBitsToFloat(packHalf2x16(s0.xy))); break;
|
||||
case RSX_FP_OPCODE_PK4:
|
||||
value = vec4(uintBitsToFloat(packSnorm4x8(s0))); break;
|
||||
case RSX_FP_OPCODE_PK16:
|
||||
value = vec4(uintBitsToFloat(packSnorm2x16(s0.xy))); break;
|
||||
case RSX_FP_OPCODE_PKG:
|
||||
// Should be similar to PKB but with gamma correction, see description of PK4UBG in khronos page
|
||||
case RSX_FP_OPCODE_PKB:
|
||||
value = vec4(uintBitsToFloat(packUnorm4x8(s0))); break;
|
||||
case RSX_FP_OPCODE_UP2:
|
||||
value = unpackHalf2x16(floatBitsToUint(s0.x)).xyxy; break;
|
||||
case RSX_FP_OPCODE_UP4:
|
||||
value = unpackSnorm4x8(floatBitsToUint(s0.x)); break;
|
||||
case RSX_FP_OPCODE_UP16:
|
||||
value = unpackSnorm2x16(floatBitsToUint(s0.x)).xyxy; break;
|
||||
case RSX_FP_OPCODE_UPG:
|
||||
// Same as UPB with gamma correction
|
||||
case RSX_FP_OPCODE_UPB:
|
||||
value = unpackUnorm4x8(floatBitsToUint(s0.x)); break;
|
||||
#endif
|
||||
default:
|
||||
handled = false;
|
||||
@ -474,12 +644,13 @@ void main()
|
||||
case RSX_FP_OPCODE_POW:
|
||||
value = pow(s0, s1).xxxx; break;
|
||||
case RSX_FP_OPCODE_DIV:
|
||||
value = s0 / s1.xxxx;
|
||||
value = s0 / s1.xxxx; break;
|
||||
case RSX_FP_OPCODE_DIVSQ:
|
||||
value = s0 * inversesqrt(s1.xxxx); break;
|
||||
case RSX_FP_OPCODE_REFL:
|
||||
value = reflect(s0, s1); break;
|
||||
|
||||
#ifdef WITH_TEXTURES
|
||||
//case RSX_FP_OPCODE_TXP:
|
||||
//case RSX_FP_OPCODE_TXD:
|
||||
case RSX_FP_OPCODE_TXL:
|
||||
value = _textureLod(s0, s1.x); break;
|
||||
@ -507,48 +678,27 @@ void main()
|
||||
value = dot(s0.xy, s1.xy).xxxx + s2.xxxx; break;
|
||||
}
|
||||
}
|
||||
|
||||
// Flow control
|
||||
/* case RSX_FP_OPCODE_BRK:
|
||||
case RSX_FP_OPCODE_CAL:
|
||||
case RSX_FP_OPCODE_IFE:
|
||||
case RSX_FP_OPCODE_LOOP:
|
||||
case RSX_FP_OPCODE_REP:
|
||||
case RSX_FP_OPCODE_RET:
|
||||
|
||||
#if 0
|
||||
// Other
|
||||
case RSX_FP_OPCODE_PK4:
|
||||
case RSX_FP_OPCODE_UP4:
|
||||
case RSX_FP_OPCODE_BEM:
|
||||
case RSX_FP_OPCODE_BEMLUM:
|
||||
case RSX_FP_OPCODE_LIT:
|
||||
case RSX_FP_OPCODE_LIF:
|
||||
case RSX_FP_OPCODE_PK2:
|
||||
case RSX_FP_OPCODE_FENCT:
|
||||
case RSX_FP_OPCODE_FENCB:
|
||||
case RSX_FP_OPCODE_UP2:
|
||||
case RSX_FP_OPCODE_PKB:
|
||||
case RSX_FP_OPCODE_UPB:
|
||||
case RSX_FP_OPCODE_PK16:
|
||||
case RSX_FP_OPCODE_UP16:
|
||||
case RSX_FP_OPCODE_BEM:
|
||||
case RSX_FP_OPCODE_PKG:
|
||||
case RSX_FP_OPCODE_UPG:
|
||||
case RSX_FP_OPCODE_BEMLUM:
|
||||
case RSX_FP_OPCODE_REFL:
|
||||
case RSX_FP_OPCODE_TIMESWTEX:*/
|
||||
|
||||
case RSX_FP_OPCODE_TIMESWTEX:
|
||||
#endif
|
||||
write_dst(value);
|
||||
}
|
||||
|
||||
#ifdef WITH_HALF_OUTPUT_REGISTER
|
||||
ocol0 = regs16[0];
|
||||
ocol1 = regs16[4];
|
||||
ocol1 = regs16[6];
|
||||
ocol1 = regs16[8];
|
||||
ocol2 = regs16[6];
|
||||
ocol3 = regs16[8];
|
||||
#else
|
||||
ocol0 = regs32[0];
|
||||
ocol1 = regs32[2];
|
||||
ocol1 = regs32[3];
|
||||
ocol1 = regs32[4];
|
||||
ocol2 = regs32[3];
|
||||
ocol3 = regs32[4];
|
||||
#endif
|
||||
|
||||
#ifdef WITH_DEPTH_EXPORT
|
||||
@ -560,7 +710,7 @@ void main()
|
||||
if (ocol0.a < alpha_ref) discard; // gequal
|
||||
#endif
|
||||
#ifdef ALPHA_TEST_GREATER
|
||||
if (ocol0.a > alpha_ref) discard; // greater
|
||||
if (ocol0.a <= alpha_ref) discard; // greater
|
||||
#endif
|
||||
#ifdef ALPHA_TEST_LESS
|
||||
if (ocol0.a >= alpha_ref) discard; // less
|
||||
|
@ -191,6 +191,29 @@ vec4 _distance(const in vec4 a, const in vec4 b)
|
||||
return vec4(1., a.y * b.y, a.z, b.w);
|
||||
}
|
||||
|
||||
bvec4 test_cond(const in vec4 cond, const in uint mode)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case EXEC_GT | EXEC_EQ | EXEC_LT:
|
||||
return bvec4(true);
|
||||
case EXEC_GT | EXEC_EQ:
|
||||
return greaterThanEqual(cond, vec4(0.));
|
||||
case EXEC_LT | EXEC_EQ:
|
||||
return lessThanEqual(cond, vec4(0.));
|
||||
case EXEC_LT | EXEC_GT:
|
||||
return notEqual(cond, vec4(0.));
|
||||
case EXEC_GT:
|
||||
return greaterThan(cond, vec4(0.));
|
||||
case EXEC_LT:
|
||||
return lessThan(cond, vec4(0.));
|
||||
case EXEC_EQ:
|
||||
return equal(cond, vec4(0.));
|
||||
default:
|
||||
return bvec4(false);
|
||||
}
|
||||
}
|
||||
|
||||
// Local registers
|
||||
uvec4 instr;
|
||||
vec4 temp[32];
|
||||
@ -202,6 +225,11 @@ D1 d1;
|
||||
D2 d2;
|
||||
D3 d3;
|
||||
|
||||
vec4 get_cond()
|
||||
{
|
||||
return shuffle(cc[d0.cond_reg_sel_1], d0.swizzle);
|
||||
}
|
||||
|
||||
void write_sca(in float value)
|
||||
{
|
||||
if (d0.saturate)
|
||||
@ -229,23 +257,30 @@ void write_vec(in vec4 value)
|
||||
value = clamp(value, 0, 1);
|
||||
}
|
||||
|
||||
bvec4 write_mask = d3.vec_mask;
|
||||
if (d0.cond_test_enable)
|
||||
{
|
||||
const bvec4 mask = test_cond(get_cond(), d0.cond);
|
||||
write_mask = bvec4(uvec4(write_mask) & uvec4(mask));
|
||||
}
|
||||
|
||||
if (d0.dst_tmp == 0x3f && !d0.vec_result)
|
||||
{
|
||||
if (d0.cond_update_enable_1)
|
||||
{
|
||||
reg_mov(cc[d0.cond_reg_sel_1], value, d3.vec_mask);
|
||||
reg_mov(cc[d0.cond_reg_sel_1], value, write_mask);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (d0.vec_result && d3.dst < 16)
|
||||
{
|
||||
reg_mov(dest[d3.dst], value, d3.vec_mask);
|
||||
reg_mov(dest[d3.dst], value, write_mask);
|
||||
}
|
||||
|
||||
if (d0.dst_tmp != 0x3f)
|
||||
{
|
||||
reg_mov(temp[d0.dst_tmp], value, d3.vec_mask);
|
||||
reg_mov(temp[d0.dst_tmp], value, write_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -286,34 +321,12 @@ bool static_branch()
|
||||
return (cond == actual);
|
||||
}
|
||||
|
||||
bvec4 test_cond(vec4 cond, uint mode)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case EXEC_GT | EXEC_EQ:
|
||||
return greaterThanEqual(cond, vec4(0.));
|
||||
case EXEC_LT | EXEC_EQ:
|
||||
return lessThanEqual(cond, vec4(0.));
|
||||
case EXEC_LT | EXEC_GT:
|
||||
return notEqual(cond, vec4(0.));
|
||||
case EXEC_GT:
|
||||
return greaterThan(cond, vec4(0.));
|
||||
case EXEC_LT:
|
||||
return lessThan(cond, vec4(0.));
|
||||
case EXEC_EQ:
|
||||
return equal(cond, vec4(0.));
|
||||
}
|
||||
|
||||
return bvec4(false);
|
||||
}
|
||||
|
||||
bool dynamic_branch()
|
||||
{
|
||||
if (d0.cond == (EXEC_LT | EXEC_GT | EXEC_EQ)) return true;
|
||||
if (d0.cond == 0) return false;
|
||||
|
||||
vec4 cond = shuffle(cc[d0.cond_reg_sel_1], d0.swizzle);
|
||||
return any(test_cond(cond, d0.cond));
|
||||
return any(test_cond(get_cond(), d0.cond));
|
||||
}
|
||||
|
||||
vec4 read_src(const in int index)
|
||||
|
@ -328,61 +328,81 @@ size_t fragment_program_utils::get_fragment_program_ucode_size(const void* ptr)
|
||||
|
||||
fragment_program_utils::fragment_program_metadata fragment_program_utils::analyse_fragment_program(const void* ptr)
|
||||
{
|
||||
fragment_program_utils::fragment_program_metadata result{};
|
||||
result.program_start_offset = UINT32_MAX;
|
||||
const auto instBuffer = ptr;
|
||||
s32 index = 0;
|
||||
s32 program_offset = -1;
|
||||
u32 ucode_size = 0;
|
||||
u32 constants_size = 0;
|
||||
u16 textures_mask = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
const auto inst = v128::loadu(instBuffer, index);
|
||||
const u32 opcode = (inst._u32[0] >> 16) & 0x3F;
|
||||
|
||||
if (opcode)
|
||||
// Check for opcode high bit which indicates a branch instructions (opcode 0x40...0x45)
|
||||
if (inst._u32[2] & (1 << 23))
|
||||
{
|
||||
if (program_offset < 0)
|
||||
program_offset = index * 16;
|
||||
result.has_branch_instructions = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 opcode = (inst._u32[0] >> 16) & 0x3F;
|
||||
if (opcode)
|
||||
{
|
||||
if (result.program_start_offset == umax)
|
||||
result.program_start_offset = index * 16;
|
||||
|
||||
switch(opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_TEX:
|
||||
case RSX_FP_OPCODE_TEXBEM:
|
||||
case RSX_FP_OPCODE_TXP:
|
||||
case RSX_FP_OPCODE_TXPBEM:
|
||||
case RSX_FP_OPCODE_TXD:
|
||||
case RSX_FP_OPCODE_TXB:
|
||||
case RSX_FP_OPCODE_TXL:
|
||||
{
|
||||
//Bits 17-20 of word 1, swapped within u16 sections
|
||||
//Bits 16-23 are swapped into the upper 8 bits (24-31)
|
||||
const u32 tex_num = (inst._u32[0] >> 25) & 15;
|
||||
textures_mask |= (1 << tex_num);
|
||||
break;
|
||||
}
|
||||
switch (opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_TEX:
|
||||
case RSX_FP_OPCODE_TEXBEM:
|
||||
case RSX_FP_OPCODE_TXP:
|
||||
case RSX_FP_OPCODE_TXPBEM:
|
||||
case RSX_FP_OPCODE_TXD:
|
||||
case RSX_FP_OPCODE_TXB:
|
||||
case RSX_FP_OPCODE_TXL:
|
||||
{
|
||||
//Bits 17-20 of word 1, swapped within u16 sections
|
||||
//Bits 16-23 are swapped into the upper 8 bits (24-31)
|
||||
const u32 tex_num = (inst._u32[0] >> 25) & 15;
|
||||
result.referenced_textures_mask |= (1 << tex_num);
|
||||
break;
|
||||
}
|
||||
case RSX_FP_OPCODE_PK4:
|
||||
case RSX_FP_OPCODE_UP4:
|
||||
case RSX_FP_OPCODE_PK2:
|
||||
case RSX_FP_OPCODE_UP2:
|
||||
case RSX_FP_OPCODE_PKB:
|
||||
case RSX_FP_OPCODE_UPB:
|
||||
case RSX_FP_OPCODE_PK16:
|
||||
case RSX_FP_OPCODE_UP16:
|
||||
case RSX_FP_OPCODE_PKG:
|
||||
case RSX_FP_OPCODE_UPG:
|
||||
{
|
||||
result.has_pack_instructions = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_constant(inst._u32[1]) || is_constant(inst._u32[2]) || is_constant(inst._u32[3]))
|
||||
{
|
||||
//Instruction references constant, skip one slot occupied by data
|
||||
index++;
|
||||
ucode_size += 16;
|
||||
constants_size += 16;
|
||||
result.program_ucode_length += 16;
|
||||
result.program_constants_buffer_length += 16;
|
||||
}
|
||||
}
|
||||
|
||||
if (program_offset >= 0)
|
||||
if (result.program_start_offset != umax)
|
||||
{
|
||||
ucode_size += 16;
|
||||
result.program_ucode_length += 16;
|
||||
}
|
||||
|
||||
if ((inst._u32[0] >> 8) & 0x1)
|
||||
{
|
||||
if (program_offset < 0)
|
||||
if (result.program_start_offset == umax)
|
||||
{
|
||||
program_offset = index * 16;
|
||||
ucode_size = 16;
|
||||
result.program_start_offset = index * 16;
|
||||
result.program_constants_buffer_length = 16;
|
||||
}
|
||||
|
||||
break;
|
||||
@ -391,7 +411,7 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
|
||||
index++;
|
||||
}
|
||||
|
||||
return{ static_cast<u32>(program_offset), ucode_size, constants_size, textures_mask };
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t fragment_program_utils::get_fragment_program_ucode_hash(const RSXFragmentProgram& program)
|
||||
|
@ -50,6 +50,9 @@ namespace program_hash_util
|
||||
u32 program_ucode_length;
|
||||
u32 program_constants_buffer_length;
|
||||
u16 referenced_textures_mask;
|
||||
|
||||
bool has_pack_instructions;
|
||||
bool has_branch_instructions;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -16,6 +16,9 @@ namespace program_common
|
||||
COMPILER_OPT_ENABLE_ALPHA_TEST_L = 64,
|
||||
COMPILER_OPT_ENABLE_ALPHA_TEST_EQ = 128,
|
||||
COMPILER_OPT_ENABLE_ALPHA_TEST_NE = 256,
|
||||
COMPILER_OPT_ENABLE_FLOW_CTRL = 512,
|
||||
COMPILER_OPT_ENABLE_PACKING = 1024,
|
||||
COMPILER_OPT_ENABLE_KIL = 2048
|
||||
};
|
||||
|
||||
static std::string get_vertex_interpreter()
|
||||
|
@ -95,8 +95,10 @@ namespace gl
|
||||
|
||||
if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT;
|
||||
if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT;
|
||||
|
||||
if (rsx::method_registers.shader_control() & RSX_SHADER_CONTROL_USES_KIL) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL;
|
||||
if (metadata.referenced_textures_mask) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES;
|
||||
if (metadata.has_branch_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL;
|
||||
if (metadata.has_pack_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING;
|
||||
|
||||
if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]]
|
||||
{
|
||||
@ -250,6 +252,21 @@ namespace gl
|
||||
builder << "#define WITH_DEPTH_EXPORT\n";
|
||||
}
|
||||
|
||||
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL)
|
||||
{
|
||||
builder << "#define WITH_FLOW_CTRL\n";
|
||||
}
|
||||
|
||||
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_PACKING)
|
||||
{
|
||||
builder << "#define WITH_PACKING\n";
|
||||
}
|
||||
|
||||
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_KIL)
|
||||
{
|
||||
builder << "#define WITH_KIL\n";
|
||||
}
|
||||
|
||||
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
|
||||
{
|
||||
builder << "#define WITH_TEXTURES\n\n";
|
||||
|
@ -142,6 +142,21 @@ namespace vk
|
||||
builder << "#define WITH_DEPTH_EXPORT\n";
|
||||
}
|
||||
|
||||
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL)
|
||||
{
|
||||
builder << "#define WITH_FLOW_CTRL\n";
|
||||
}
|
||||
|
||||
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_PACKING)
|
||||
{
|
||||
builder << "#define WITH_PACKING\n";
|
||||
}
|
||||
|
||||
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_KIL)
|
||||
{
|
||||
builder << "#define WITH_KIL\n";
|
||||
}
|
||||
|
||||
const char* type_names[] = { "sampler1D", "sampler2D", "sampler3D", "samplerCube" };
|
||||
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
|
||||
{
|
||||
@ -561,7 +576,10 @@ namespace vk
|
||||
|
||||
if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT;
|
||||
if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT;
|
||||
if (rsx::method_registers.shader_control() & RSX_SHADER_CONTROL_USES_KIL) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL;
|
||||
if (metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES;
|
||||
if (metadata.has_branch_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL;
|
||||
if (metadata.has_pack_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING;
|
||||
|
||||
if (m_current_key == key) [[likely]]
|
||||
{
|
||||
|
@ -1,4 +1,4 @@
|
||||
#pragma once
|
||||
#pragma once
|
||||
#include "Utilities/types.h"
|
||||
|
||||
namespace rsx
|
||||
|
Loading…
x
Reference in New Issue
Block a user