rsx/interpreter: Improve instructions support

- Must statically write the gl_ClipDistance registers else you get uninitialized trash.
  This problem is more readily apparent on NVIDIA technology but even AMD is not completely immune.
This commit is contained in:
kd-11 2020-04-18 20:38:56 +03:00 committed by Ivan
parent b4bf48c33b
commit 930bc9179d
6 changed files with 45 additions and 41 deletions

View File

@ -176,9 +176,9 @@ vec4 read_src(const in int index)
// TODO: wpos
value = vec4(0.); break;
case 1:
value = gl_FrontFacing? in_regs[1] : in_regs[3]; break;
value = gl_FrontFacing? in_regs[3] : in_regs[1]; break;
case 2:
value = gl_FrontFacing? in_regs[2] : in_regs[4]; break;
value = gl_FrontFacing? in_regs[4] : in_regs[2]; break;
case 3:
value = fetch_fog_value(fog_mode, in_regs[5]); break;
case 13:

View File

@ -258,6 +258,13 @@ void write_output(const in int oid, const in int mask_bit)
}
}
// Cannot dynamically index into the gl_ClipDistance array without causing problems due to it's unknown size
#define write_clip_distance(plane, mask_bit, test, value)\
if (test && attribute_enabled(1 << mask_bit))\
gl_ClipDistance[plane] = value;\
else\
gl_ClipDistance[plane] = 0.5f;\
ivec4 read_addr_reg()
{
return a[d0.addr_reg_sel_1];
@ -524,15 +531,21 @@ void main()
}
}
// TODO: 2-sided lighting
if (!attribute_enabled(1 << 0 | 1 << 2))
{
dest[1] = dest[3] = vec4(0, 0, 0, 1);
}
// Unconditionally update COLOR0 and SPECULAR0
write_output(1, 0);
write_output(2, 1);
if (!attribute_enabled(1 << 1 | 1 << 3))
// Conditionally update COLOR1 and SPECULAR1 depending on 2-sided mask
if (control == 0)
{
dest[2] = dest[4] = vec4(0, 0, 0, 1);
dest[3] = dest[1];
dest[4] = dest[2];
}
else
{
// 2-sided lighting
write_output(3, 2);
write_output(4, 3);
}
if (!attribute_enabled(1 << 4))
@ -549,19 +562,12 @@ void main()
gl_PointSize = point_size;
}
if (attribute_enabled(1 << 6 | 1 << 7 | 1 << 8))
{
gl_ClipDistance[0] = (user_clip_enabled[0].x > 0)? dest[5].y * user_clip_factor[0].x : 0.5f;
gl_ClipDistance[1] = (user_clip_enabled[0].y > 0)? dest[5].z * user_clip_factor[0].y : 0.5f;
gl_ClipDistance[2] = (user_clip_enabled[0].z > 0)? dest[5].w * user_clip_factor[0].z : 0.5f;
}
if (attribute_enabled(1 << 9 | 1 << 10 | 1 << 11))
{
gl_ClipDistance[3] = (user_clip_enabled[0].w > 0)? dest[6].y * user_clip_factor[0].w : 0.5f;
gl_ClipDistance[4] = (user_clip_enabled[1].x > 0)? dest[6].z * user_clip_factor[1].x : 0.5f;
gl_ClipDistance[5] = (user_clip_enabled[1].y > 0)? dest[6].w * user_clip_factor[1].y : 0.5f;
}
write_clip_distance(0, 6, user_clip_enabled[0].x > 0, dest[5].y * user_clip_factor[0].x);
write_clip_distance(1, 7, user_clip_enabled[0].y > 0, dest[5].z * user_clip_factor[0].y);
write_clip_distance(2, 8, user_clip_enabled[0].z > 0, dest[5].w * user_clip_factor[0].z);
write_clip_distance(3, 9, user_clip_enabled[0].w > 0, dest[6].y * user_clip_factor[0].w);
write_clip_distance(4, 10, user_clip_enabled[1].x > 0, dest[6].z * user_clip_factor[1].x);
write_clip_distance(5, 11, user_clip_enabled[1].y > 0, dest[6].w * user_clip_factor[1].y);
write_output(15, 12);
write_output(6, 13);

View File

@ -778,6 +778,7 @@ void GLGSRender::load_program_env()
vp_config[0] = current_vertex_program.base_address;
vp_config[1] = current_vertex_program.entry;
vp_config[2] = current_vertex_program.output_mask;
vp_config[3] = rsx::method_registers.two_side_light_en() ? 1u : 0u;
std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length);

View File

@ -114,7 +114,7 @@ namespace gl
" uint base_address;\n"
" uint entry;\n"
" uint output_mask;\n"
" uint reserved;\n"
" uint control;\n"
" uvec4 vp_instructions[];\n"
"};\n\n";
@ -285,26 +285,22 @@ namespace gl
return;
}
if (get_driver_caps().vendor_AMD)
// Overlapping texture bindings are trouble. Cannot bind one TIU to two types of samplers simultaneously
for (unsigned i = 0; i < replacement_map.size(); ++i)
{
// AMD drivers don't like texture bindings overlapping which means workarounds are needed
// Technically this is accurate to spec, but makes efficient usage of shader resources difficult
for (unsigned i = 0; i < replacement_map.size(); ++i)
for (int j = 0; j < 4; ++j)
{
for (int j = 0; j < 4; ++j)
auto& pool = allocator.pools[j];
for (int k = pool.num_used; k < pool.pool_size; ++k)
{
auto& pool = allocator.pools[j];
for (int k = pool.num_used; k < pool.pool_size; ++k)
if (pool.allocated[k] == replacement_map[i].second)
{
if (pool.allocated[k] == replacement_map[i].second)
{
pool.allocated[k] = replacement_map[i].first;
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
pool.allocated[k] = replacement_map[i].first;
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
// Exit nested loop
j = 4;
break;
}
// Exit nested loop
j = 4;
break;
}
}
}

View File

@ -467,8 +467,8 @@ VKGSRender::VKGSRender() : GSRender()
if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled)
{
m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "vertex instructions buffer", 512 * 16);
m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "fragment instructions buffer", 2048);
m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "vertex instructions buffer", 512 * 16);
m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "fragment instructions buffer", 2048);
}
const auto limits = m_device->gpu().get_limits();
@ -1759,6 +1759,7 @@ void VKGSRender::load_program_env()
vp_config[0] = current_vertex_program.base_address;
vp_config[1] = current_vertex_program.entry;
vp_config[2] = current_vertex_program.output_mask;
vp_config[3] = rsx::method_registers.two_side_light_en()? 1u: 0u;
std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length);
m_vertex_instructions_buffer.unmap();

View File

@ -34,7 +34,7 @@ namespace vk
" uint base_address;\n"
" uint entry;\n"
" uint output_mask;\n"
" uint reserved;\n"
" uint control;\n"
" uvec4 vp_instructions[];\n"
"};\n\n";