rsx/interpreter: Improve instructions support

- Must statically write the gl_ClipDistance registers else you get uninitialized trash.
  This problem is more readily apparent on NVIDIA technology but even AMD is not completely immune.
This commit is contained in:
kd-11 2020-04-18 20:38:56 +03:00 committed by Ivan
parent b4bf48c33b
commit 930bc9179d
6 changed files with 45 additions and 41 deletions

View File

@ -176,9 +176,9 @@ vec4 read_src(const in int index)
// TODO: wpos // TODO: wpos
value = vec4(0.); break; value = vec4(0.); break;
case 1: case 1:
value = gl_FrontFacing? in_regs[1] : in_regs[3]; break; value = gl_FrontFacing? in_regs[3] : in_regs[1]; break;
case 2: case 2:
value = gl_FrontFacing? in_regs[2] : in_regs[4]; break; value = gl_FrontFacing? in_regs[4] : in_regs[2]; break;
case 3: case 3:
value = fetch_fog_value(fog_mode, in_regs[5]); break; value = fetch_fog_value(fog_mode, in_regs[5]); break;
case 13: case 13:

View File

@ -258,6 +258,13 @@ void write_output(const in int oid, const in int mask_bit)
} }
} }
// Cannot dynamically index into the gl_ClipDistance array without causing problems due to it's unknown size
#define write_clip_distance(plane, mask_bit, test, value)\
if (test && attribute_enabled(1 << mask_bit))\
gl_ClipDistance[plane] = value;\
else\
gl_ClipDistance[plane] = 0.5f;\
ivec4 read_addr_reg() ivec4 read_addr_reg()
{ {
return a[d0.addr_reg_sel_1]; return a[d0.addr_reg_sel_1];
@ -524,15 +531,21 @@ void main()
} }
} }
// TODO: 2-sided lighting // Unconditionally update COLOR0 and SPECULAR0
if (!attribute_enabled(1 << 0 | 1 << 2)) write_output(1, 0);
{ write_output(2, 1);
dest[1] = dest[3] = vec4(0, 0, 0, 1);
}
if (!attribute_enabled(1 << 1 | 1 << 3)) // Conditionally update COLOR1 and SPECULAR1 depending on 2-sided mask
if (control == 0)
{ {
dest[2] = dest[4] = vec4(0, 0, 0, 1); dest[3] = dest[1];
dest[4] = dest[2];
}
else
{
// 2-sided lighting
write_output(3, 2);
write_output(4, 3);
} }
if (!attribute_enabled(1 << 4)) if (!attribute_enabled(1 << 4))
@ -549,19 +562,12 @@ void main()
gl_PointSize = point_size; gl_PointSize = point_size;
} }
if (attribute_enabled(1 << 6 | 1 << 7 | 1 << 8)) write_clip_distance(0, 6, user_clip_enabled[0].x > 0, dest[5].y * user_clip_factor[0].x);
{ write_clip_distance(1, 7, user_clip_enabled[0].y > 0, dest[5].z * user_clip_factor[0].y);
gl_ClipDistance[0] = (user_clip_enabled[0].x > 0)? dest[5].y * user_clip_factor[0].x : 0.5f; write_clip_distance(2, 8, user_clip_enabled[0].z > 0, dest[5].w * user_clip_factor[0].z);
gl_ClipDistance[1] = (user_clip_enabled[0].y > 0)? dest[5].z * user_clip_factor[0].y : 0.5f; write_clip_distance(3, 9, user_clip_enabled[0].w > 0, dest[6].y * user_clip_factor[0].w);
gl_ClipDistance[2] = (user_clip_enabled[0].z > 0)? dest[5].w * user_clip_factor[0].z : 0.5f; write_clip_distance(4, 10, user_clip_enabled[1].x > 0, dest[6].z * user_clip_factor[1].x);
} write_clip_distance(5, 11, user_clip_enabled[1].y > 0, dest[6].w * user_clip_factor[1].y);
if (attribute_enabled(1 << 9 | 1 << 10 | 1 << 11))
{
gl_ClipDistance[3] = (user_clip_enabled[0].w > 0)? dest[6].y * user_clip_factor[0].w : 0.5f;
gl_ClipDistance[4] = (user_clip_enabled[1].x > 0)? dest[6].z * user_clip_factor[1].x : 0.5f;
gl_ClipDistance[5] = (user_clip_enabled[1].y > 0)? dest[6].w * user_clip_factor[1].y : 0.5f;
}
write_output(15, 12); write_output(15, 12);
write_output(6, 13); write_output(6, 13);

View File

@ -778,6 +778,7 @@ void GLGSRender::load_program_env()
vp_config[0] = current_vertex_program.base_address; vp_config[0] = current_vertex_program.base_address;
vp_config[1] = current_vertex_program.entry; vp_config[1] = current_vertex_program.entry;
vp_config[2] = current_vertex_program.output_mask; vp_config[2] = current_vertex_program.output_mask;
vp_config[3] = rsx::method_registers.two_side_light_en() ? 1u : 0u;
std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length); std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length);

View File

@ -114,7 +114,7 @@ namespace gl
" uint base_address;\n" " uint base_address;\n"
" uint entry;\n" " uint entry;\n"
" uint output_mask;\n" " uint output_mask;\n"
" uint reserved;\n" " uint control;\n"
" uvec4 vp_instructions[];\n" " uvec4 vp_instructions[];\n"
"};\n\n"; "};\n\n";
@ -285,26 +285,22 @@ namespace gl
return; return;
} }
if (get_driver_caps().vendor_AMD) // Overlapping texture bindings are trouble. Cannot bind one TIU to two types of samplers simultaneously
for (unsigned i = 0; i < replacement_map.size(); ++i)
{ {
// AMD drivers don't like texture bindings overlapping which means workarounds are needed for (int j = 0; j < 4; ++j)
// Technically this is accurate to spec, but makes efficient usage of shader resources difficult
for (unsigned i = 0; i < replacement_map.size(); ++i)
{ {
for (int j = 0; j < 4; ++j) auto& pool = allocator.pools[j];
for (int k = pool.num_used; k < pool.pool_size; ++k)
{ {
auto& pool = allocator.pools[j]; if (pool.allocated[k] == replacement_map[i].second)
for (int k = pool.num_used; k < pool.pool_size; ++k)
{ {
if (pool.allocated[k] == replacement_map[i].second) pool.allocated[k] = replacement_map[i].first;
{ pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
pool.allocated[k] = replacement_map[i].first;
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
// Exit nested loop // Exit nested loop
j = 4; j = 4;
break; break;
}
} }
} }
} }

View File

@ -467,8 +467,8 @@ VKGSRender::VKGSRender() : GSRender()
if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled) if (g_cfg.video.shader_interpreter_mode != shader_interpreter_mode::disabled)
{ {
m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "vertex instructions buffer", 512 * 16); m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "vertex instructions buffer", 512 * 16);
m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 16 * 0x100000, "fragment instructions buffer", 2048); m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "fragment instructions buffer", 2048);
} }
const auto limits = m_device->gpu().get_limits(); const auto limits = m_device->gpu().get_limits();
@ -1759,6 +1759,7 @@ void VKGSRender::load_program_env()
vp_config[0] = current_vertex_program.base_address; vp_config[0] = current_vertex_program.base_address;
vp_config[1] = current_vertex_program.entry; vp_config[1] = current_vertex_program.entry;
vp_config[2] = current_vertex_program.output_mask; vp_config[2] = current_vertex_program.output_mask;
vp_config[3] = rsx::method_registers.two_side_light_en()? 1u: 0u;
std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length); std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length);
m_vertex_instructions_buffer.unmap(); m_vertex_instructions_buffer.unmap();

View File

@ -34,7 +34,7 @@ namespace vk
" uint base_address;\n" " uint base_address;\n"
" uint entry;\n" " uint entry;\n"
" uint output_mask;\n" " uint output_mask;\n"
" uint reserved;\n" " uint control;\n"
" uvec4 vp_instructions[];\n" " uvec4 vp_instructions[];\n"
"};\n\n"; "};\n\n";