mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-15 13:21:14 +00:00
rsx/interpreter: Improve instructions support
- Must statically write the gl_ClipDistance registers else you get uninitialized trash. This problem is more readily apparent on NVIDIA technology but even AMD is not completely immune.
This commit is contained in:
parent
b4bf48c33b
commit
930bc9179d
@ -176,9 +176,9 @@ vec4 read_src(const in int index)
|
||||
// TODO: wpos
|
||||
value = vec4(0.); break;
|
||||
case 1:
|
||||
value = gl_FrontFacing? in_regs[1] : in_regs[3]; break;
|
||||
value = gl_FrontFacing? in_regs[3] : in_regs[1]; break;
|
||||
case 2:
|
||||
value = gl_FrontFacing? in_regs[2] : in_regs[4]; break;
|
||||
value = gl_FrontFacing? in_regs[4] : in_regs[2]; break;
|
||||
case 3:
|
||||
value = fetch_fog_value(fog_mode, in_regs[5]); break;
|
||||
case 13:
|
||||
|
@ -258,6 +258,13 @@ void write_output(const in int oid, const in int mask_bit)
|
||||
}
|
||||
}
|
||||
|
||||
// Cannot dynamically index into the gl_ClipDistance array without causing problems due to it's unknown size
|
||||
#define write_clip_distance(plane, mask_bit, test, value)\
|
||||
if (test && attribute_enabled(1 << mask_bit))\
|
||||
gl_ClipDistance[plane] = value;\
|
||||
else\
|
||||
gl_ClipDistance[plane] = 0.5f;\
|
||||
|
||||
ivec4 read_addr_reg()
|
||||
{
|
||||
return a[d0.addr_reg_sel_1];
|
||||
@ -524,15 +531,21 @@ void main()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: 2-sided lighting
|
||||
if (!attribute_enabled(1 << 0 | 1 << 2))
|
||||
{
|
||||
dest[1] = dest[3] = vec4(0, 0, 0, 1);
|
||||
}
|
||||
// Unconditionally update COLOR0 and SPECULAR0
|
||||
write_output(1, 0);
|
||||
write_output(2, 1);
|
||||
|
||||
if (!attribute_enabled(1 << 1 | 1 << 3))
|
||||
// Conditionally update COLOR1 and SPECULAR1 depending on 2-sided mask
|
||||
if (control == 0)
|
||||
{
|
||||
dest[2] = dest[4] = vec4(0, 0, 0, 1);
|
||||
dest[3] = dest[1];
|
||||
dest[4] = dest[2];
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2-sided lighting
|
||||
write_output(3, 2);
|
||||
write_output(4, 3);
|
||||
}
|
||||
|
||||
if (!attribute_enabled(1 << 4))
|
||||
@ -549,19 +562,12 @@ void main()
|
||||
gl_PointSize = point_size;
|
||||
}
|
||||
|
||||
if (attribute_enabled(1 << 6 | 1 << 7 | 1 << 8))
|
||||
{
|
||||
gl_ClipDistance[0] = (user_clip_enabled[0].x > 0)? dest[5].y * user_clip_factor[0].x : 0.5f;
|
||||
gl_ClipDistance[1] = (user_clip_enabled[0].y > 0)? dest[5].z * user_clip_factor[0].y : 0.5f;
|
||||
gl_ClipDistance[2] = (user_clip_enabled[0].z > 0)? dest[5].w * user_clip_factor[0].z : 0.5f;
|
||||
}
|
||||
|
||||
if (attribute_enabled(1 << 9 | 1 << 10 | 1 << 11))
|
||||
{
|
||||
gl_ClipDistance[3] = (user_clip_enabled[0].w > 0)? dest[6].y * user_clip_factor[0].w : 0.5f;
|
||||
gl_ClipDistance[4] = (user_clip_enabled[1].x > 0)? dest[6].z * user_clip_factor[1].x : 0.5f;
|
||||
gl_ClipDistance[5] = (user_clip_enabled[1].y > 0)? dest[6].w * user_clip_factor[1].y : 0.5f;
|
||||
}
|
||||
write_clip_distance(0, 6, user_clip_enabled[0].x > 0, dest[5].y * user_clip_factor[0].x);
|
||||
write_clip_distance(1, 7, user_clip_enabled[0].y > 0, dest[5].z * user_clip_factor[0].y);
|
||||
write_clip_distance(2, 8, user_clip_enabled[0].z > 0, dest[5].w * user_clip_factor[0].z);
|
||||
write_clip_distance(3, 9, user_clip_enabled[0].w > 0, dest[6].y * user_clip_factor[0].w);
|
||||
write_clip_distance(4, 10, user_clip_enabled[1].x > 0, dest[6].z * user_clip_factor[1].x);
|
||||
write_clip_distance(5, 11, user_clip_enabled[1].y > 0, dest[6].w * user_clip_factor[1].y);
|
||||
|
||||
write_output(15, 12);
|
||||
write_output(6, 13);
|
||||
|
@ -778,6 +778,7 @@ void GLGSRender::load_program_env()
|
||||
vp_config[0] = current_vertex_program.base_address;
|
||||
vp_config[1] = current_vertex_program.entry;
|
||||
vp_config[2] = current_vertex_program.output_mask;
|
||||
vp_config[3] = rsx::method_registers.two_side_light_en() ? 1u : 0u;
|
||||
|
||||
std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length);
|
||||
|
||||
|
@ -114,7 +114,7 @@ namespace gl
|
||||
" uint base_address;\n"
|
||||
" uint entry;\n"
|
||||
" uint output_mask;\n"
|
||||
" uint reserved;\n"
|
||||
" uint control;\n"
|
||||
" uvec4 vp_instructions[];\n"
|
||||
"};\n\n";
|
||||
|
||||
@ -285,26 +285,22 @@ namespace gl
|
||||
return;
|
||||
}
|
||||
|
||||
if (get_driver_caps().vendor_AMD)
|
||||
// Overlapping texture bindings are trouble. Cannot bind one TIU to two types of samplers simultaneously
|
||||
for (unsigned i = 0; i < replacement_map.size(); ++i)
|
||||
{
|
||||
// AMD drivers don't like texture bindings overlapping which means workarounds are needed
|
||||
// Technically this is accurate to spec, but makes efficient usage of shader resources difficult
|
||||
for (unsigned i = 0; i < replacement_map.size(); ++i)
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
auto& pool = allocator.pools[j];
|
||||
for (int k = pool.num_used; k < pool.pool_size; ++k)
|
||||
{
|
||||
auto& pool = allocator.pools[j];
|
||||
for (int k = pool.num_used; k < pool.pool_size; ++k)
|
||||
if (pool.allocated[k] == replacement_map[i].second)
|
||||
{
|
||||
if (pool.allocated[k] == replacement_map[i].second)
|
||||
{
|
||||
pool.allocated[k] = replacement_map[i].first;
|
||||
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
|
||||
pool.allocated[k] = replacement_map[i].first;
|
||||
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
|
||||
|
||||
// Exit nested loop
|
||||
j = 4;
|
||||
break;
|
||||
}
|
||||
// Exit nested loop
|
||||
j = 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -467,8 +467,8 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
|
||||
if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled)
|
||||
{
|
||||
m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "vertex instructions buffer", 512 * 16);
|
||||
m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "fragment instructions buffer", 2048);
|
||||
m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "vertex instructions buffer", 512 * 16);
|
||||
m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "fragment instructions buffer", 2048);
|
||||
}
|
||||
|
||||
const auto limits = m_device->gpu().get_limits();
|
||||
@ -1759,6 +1759,7 @@ void VKGSRender::load_program_env()
|
||||
vp_config[0] = current_vertex_program.base_address;
|
||||
vp_config[1] = current_vertex_program.entry;
|
||||
vp_config[2] = current_vertex_program.output_mask;
|
||||
vp_config[3] = rsx::method_registers.two_side_light_en()? 1u: 0u;
|
||||
|
||||
std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length);
|
||||
m_vertex_instructions_buffer.unmap();
|
||||
|
@ -34,7 +34,7 @@ namespace vk
|
||||
" uint base_address;\n"
|
||||
" uint entry;\n"
|
||||
" uint output_mask;\n"
|
||||
" uint reserved;\n"
|
||||
" uint control;\n"
|
||||
" uvec4 vp_instructions[];\n"
|
||||
"};\n\n";
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user