From bc5c4c920561fc627cfdf8af9eb35bc4ede8d809 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 19 Apr 2020 20:23:12 +0300 Subject: [PATCH] rsx/gl: Implement variable path interpreter for optimal performance --- .../Interpreter/FragmentInterpreter.glsl | 50 +++- rpcs3/Emu/RSX/Common/ShaderInterpreter.h | 13 + rpcs3/Emu/RSX/GL/GLGSRender.cpp | 26 +- rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp | 281 +++++++++++++----- rpcs3/Emu/RSX/GL/GLShaderInterpreter.h | 24 +- 5 files changed, 285 insertions(+), 109 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl b/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl index bb1ce2b6c0..ac4d614937 100644 --- a/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl +++ b/rpcs3/Emu/RSX/Common/Interpreter/FragmentInterpreter.glsl @@ -223,6 +223,8 @@ vec4 read_cond() return shuffle(cc[GET_BITS(1, 31, 1)], GET_BITS(1, 21, 8)); } +#ifdef WITH_TEXTURES + vec4 _texture(in vec4 coord, float bias) { const uint tex_num = GET_BITS(0, 17, 4); @@ -275,6 +277,8 @@ vec4 _textureLod(in vec4 coord, float lod) return vec4(0.); } +#endif + void write_dst(in vec4 value) { bvec4 inst_mask = bvec4( @@ -423,8 +427,11 @@ void main() value = sin(s0.xxxx); break; case RSX_FP_OPCODE_NRM: value.xyz = normalize(s0.xyz); break; + +#ifdef WITH_TEXTURES case RSX_FP_OPCODE_TEX: value = _texture(s0, 0.f); break; +#endif default: handled = false; } @@ -470,6 +477,8 @@ void main() value = s0 / s1.xxxx; case RSX_FP_OPCODE_DIVSQ: value = s0 * inversesqrt(s1.xxxx); break; + +#ifdef WITH_TEXTURES //case RSX_FP_OPCODE_TXP: //case RSX_FP_OPCODE_TXD: case RSX_FP_OPCODE_TXL: @@ -478,6 +487,7 @@ void main() value = _texture(s0, s1.x); break; //case RSX_FP_OPCODE_TEXBEM: //case RSX_FP_OPCODE_TXPBEM: +#endif default: handled = false; } @@ -529,29 +539,41 @@ void main() write_dst(value); } - if (!shader_attribute(CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS)) - { +#ifdef WITH_HALF_OUTPUT_REGISTER ocol0 = regs16[0]; ocol1 = regs16[4]; ocol1 = regs16[6]; ocol1 = regs16[8]; - } - else - { +#else ocol0 = regs32[0]; ocol1 = regs32[2]; ocol1 = regs32[3]; ocol1 = regs32[4]; - } +#endif - if (shader_attribute(CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)) - { - gl_FragDepth = regs32[1].z; - } - else - { - gl_FragDepth = gl_FragCoord.z; - } +#ifdef WITH_DEPTH_EXPORT + gl_FragDepth = regs32[1].z; +#endif + +// Typically an application will pick one strategy and stick with it +#ifdef ALPHA_TEST_GEQUAL + if (ocol0.a < alpha_ref) discard; // gequal +#endif +#ifdef ALPHA_TEST_GREATER + if (ocol0.a > alpha_ref) discard; // greater +#endif +#ifdef ALPHA_TEST_LESS + if (ocol0.a >= alpha_ref) discard; // less +#endif +#ifdef ALPHA_TEST_LEQUAL + if (ocol0.a > alpha_ref) discard; // lequal +#endif +#ifdef ALPHA_TEST_EQUAL + if (ocol0.a != alpha_ref) discard; // equal +#endif +#ifdef ALPHA_TEST_NEQUAL + if (ocol0.a == alpha_ref) discard; // nequal +#endif } )" diff --git a/rpcs3/Emu/RSX/Common/ShaderInterpreter.h b/rpcs3/Emu/RSX/Common/ShaderInterpreter.h index 7ceedf224c..c11bf1aae9 100644 --- a/rpcs3/Emu/RSX/Common/ShaderInterpreter.h +++ b/rpcs3/Emu/RSX/Common/ShaderInterpreter.h @@ -5,6 +5,19 @@ namespace program_common { namespace interpreter { + enum compiler_option + { + COMPILER_OPT_ENABLE_TEXTURES = 1, + COMPILER_OPT_ENABLE_DEPTH_EXPORT = 2, + COMPILER_OPT_ENABLE_F32_EXPORT = 4, + COMPILER_OPT_ENABLE_ALPHA_TEST_GE = 8, + COMPILER_OPT_ENABLE_ALPHA_TEST_G = 16, + COMPILER_OPT_ENABLE_ALPHA_TEST_LE = 32, + COMPILER_OPT_ENABLE_ALPHA_TEST_L = 64, + COMPILER_OPT_ENABLE_ALPHA_TEST_EQ = 128, + COMPILER_OPT_ENABLE_ALPHA_TEST_NE = 256, + }; + static std::string get_vertex_interpreter() { const char* s = diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 64c0ca3ef1..ed4a150c00 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -621,13 +621,21 @@ bool GLGSRender::load_program() current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side current_fragment_program.unnormalized_coords = 0; //unused } - else if (m_program && - (m_program != m_shader_interpreter.get() || interpreter_mode == shader_interpreter_mode::forced)) + else if (m_program) { - return true; + if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]] + { + return true; + } + + if (interpreter_mode == shader_interpreter_mode::forced) + { + m_program = m_shader_interpreter.get(current_fp_metadata); + return true; + } } - auto old_program = m_program; + const bool was_interpreter = m_shader_interpreter.is_interpreter(m_program); if (interpreter_mode != shader_interpreter_mode::forced) [[likely]] { void* pipeline_properties = nullptr; @@ -660,12 +668,16 @@ bool GLGSRender::load_program() m_program->sync(); } } + else + { + m_program = nullptr; + } if (!m_program && interpreter_mode != shader_interpreter_mode::disabled) { // Fall back to interpreter - m_program = m_shader_interpreter.get(); - if (old_program != m_program) + m_program = m_shader_interpreter.get(current_fp_metadata); + if (was_interpreter != m_shader_interpreter.is_interpreter(m_program)) { // Program has changed, reupload m_interpreter_state = rsx::invalidate_pipeline_bits; @@ -689,7 +701,7 @@ void GLGSRender::load_program_env() const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty); const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty); const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); - const bool update_instruction_buffers = (!!m_interpreter_state && m_program == m_shader_interpreter.get()); + const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program)); m_program->use(); diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp index 256174abfe..24a98f0da3 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -44,46 +44,70 @@ namespace gl void shader_interpreter::create() { - texture_pools[0].create(shader::type::vertex); - texture_pools[1].create(shader::type::fragment); - build_vs(); - build_fs(); - - program_handle.create(). - attach(vs). - attach(fs). - link(); - - program_handle.uniforms[0] = GL_STREAM_BUFFER_START + 0; - program_handle.uniforms[1] = GL_STREAM_BUFFER_START + 1; - - // Initialize texture bindings - int assigned = 0; - auto& allocator = texture_pools[1]; - const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" }; - - for (int i = 0; i < 4; ++i) - { - for (int j = 0; j < allocator.pools[i].pool_size; ++j) - { - allocator.pools[i].allocate(assigned++); - } - - program_handle.uniforms[type_names[i]] = allocator.pools[i].allocated; - } + build_program(::program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES); + build_program(::program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES | ::program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT); } void shader_interpreter::destroy() { - program_handle.remove(); - vs.remove(); - fs.remove(); + for (auto& prog : m_program_cache) + { + prog.second->fs.remove(); + prog.second->prog.remove(); + } + + m_vs.remove(); } - glsl::program* shader_interpreter::get() + glsl::program* shader_interpreter::get(const interpreter::program_metadata& metadata) { - return &program_handle; + // Build options + u64 opt = 0; + if (rsx::method_registers.alpha_test_enabled()) [[unlikely]] + { + switch (rsx::method_registers.alpha_func()) + { + case rsx::comparison_function::always: + break; + case rsx::comparison_function::never: + return nullptr; + case rsx::comparison_function::greater_or_equal: + opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE; + break; + case rsx::comparison_function::greater: + opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G; + break; + case rsx::comparison_function::less_or_equal: + opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE; + break; + case rsx::comparison_function::less: + opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L; + break; + case rsx::comparison_function::equal: + opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ; + break; + case rsx::comparison_function::not_equal: + opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE; + break; + } + } + + if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT; + if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT; + + if (metadata.referenced_textures_mask) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; + + if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]] + { + m_current_interpreter = it->second.get(); + } + else + { + m_current_interpreter = build_program(opt); + } + + return &m_current_interpreter->prog; } void shader_interpreter::build_vs() @@ -124,43 +148,47 @@ namespace gl builder << program_common::interpreter::get_vertex_interpreter(); const std::string s = builder.str(); - vs.create(glsl::shader::type::vertex); - vs.source(s); - vs.compile(); + m_vs.create(glsl::shader::type::vertex); + m_vs.source(s); + m_vs.compile(); } - void shader_interpreter::build_fs() + void shader_interpreter::build_fs(u64 compiler_options, interpreter::cached_program& prog_data) { // Allocate TIUs - auto& allocator = texture_pools[1]; - if (allocator.max_image_units >= 32) + auto& allocator = prog_data.allocator; + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) { - // 16 + 4 + 4 + 4 - allocator.allocate(4); // 1D - allocator.allocate(16); // 2D - allocator.allocate(4); // CUBE - allocator.allocate(4); // 3D - } - else if (allocator.max_image_units >= 24) - { - // 16 + 4 + 2 + 2 - allocator.allocate(2); // 1D - allocator.allocate(16); // 2D - allocator.allocate(2); // CUBE - allocator.allocate(4); // 3D - } - else if (allocator.max_image_units >= 16) - { - // 10 + 2 + 2 + 2 - allocator.allocate(2); // 1D - allocator.allocate(10); // 2D - allocator.allocate(2); // CUBE - allocator.allocate(2); // 3D - } - else - { - // Unusable - rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter."); + allocator.create(glsl::shader::type::fragment); + if (allocator.max_image_units >= 32) + { + // 16 + 4 + 4 + 4 + allocator.allocate(4); // 1D + allocator.allocate(16); // 2D + allocator.allocate(4); // CUBE + allocator.allocate(4); // 3D + } + else if (allocator.max_image_units >= 24) + { + // 16 + 4 + 2 + 2 + allocator.allocate(2); // 1D + allocator.allocate(16); // 2D + allocator.allocate(2); // CUBE + allocator.allocate(4); // 3D + } + else if (allocator.max_image_units >= 16) + { + // 10 + 2 + 2 + 2 + allocator.allocate(2); // 1D + allocator.allocate(10); // 2D + allocator.allocate(2); // CUBE + allocator.allocate(2); // 3D + } + else + { + // Unusable + rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter."); + } } ::glsl::shader_properties properties{}; @@ -182,18 +210,67 @@ namespace gl ::glsl::insert_subheader_block(builder); comp.insertConstants(builder); - const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" }; - for (int i = 0; i < 4; ++i) + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE) { - builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n"; + builder << "#define ALPHA_TEST_GEQUAL\n"; } - builder << "\n" - "#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n" - "#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n" - "#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n" - "#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n" - "#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n"; + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G) + { + builder << "#define ALPHA_TEST_GREATER\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE) + { + builder << "#define ALPHA_TEST_LEQUAL\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L) + { + builder << "#define ALPHA_TEST_LESS\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ) + { + builder << "#define ALPHA_TEST_EQUAL\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE) + { + builder << "#define ALPHA_TEST_NEQUAL\n"; + } + + if (!(compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT)) + { + builder << "#define WITH_HALF_OUTPUT_REGISTER\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT) + { + builder << "#define WITH_DEPTH_EXPORT\n"; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) + { + builder << "#define WITH_TEXTURES\n\n"; + + const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" }; + for (int i = 0; i < 4; ++i) + { + builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n"; + } + + builder << "\n" + "#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n" + "#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n" + "#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n" + "#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n" + "#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n"; + } + else if (compiler_options) + { + builder << "\n"; + } builder << "layout(std430, binding =" << GL_INTERPRETER_FRAGMENT_BLOCK << ") readonly restrict buffer FragmentInstructionBlock\n" @@ -211,22 +288,62 @@ namespace gl builder << program_common::interpreter::get_fragment_interpreter(); const std::string s = builder.str(); - fs.create(glsl::shader::type::fragment); - fs.source(s); - fs.compile(); + prog_data.fs.create(glsl::shader::type::fragment); + prog_data.fs.source(s); + prog_data.fs.compile(); + } + + interpreter::cached_program* shader_interpreter::build_program(u64 compiler_options) + { + auto data = new interpreter::cached_program(); + build_fs(compiler_options, *data); + + data->prog.create(). + attach(m_vs). + attach(data->fs). + link(); + + data->prog.uniforms[0] = GL_STREAM_BUFFER_START + 0; + data->prog.uniforms[1] = GL_STREAM_BUFFER_START + 1; + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) + { + // Initialize texture bindings + int assigned = 0; + auto& allocator = data->allocator; + const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" }; + + for (int i = 0; i < 4; ++i) + { + for (int j = 0; j < allocator.pools[i].pool_size; ++j) + { + allocator.pools[i].allocate(assigned++); + } + + data->prog.uniforms[type_names[i]] = allocator.pools[i].allocated; + } + } + + m_program_cache[compiler_options].reset(data); + return data; + } + + bool shader_interpreter::is_interpreter(const glsl::program* program) + { + return (program == &m_current_interpreter->prog); } void shader_interpreter::update_fragment_textures( const std::array, 16>& descriptors, u16 reference_mask, u32* out) { - if (reference_mask == 0) + if (reference_mask == 0 || !m_current_interpreter) { return; } // Reset allocation - auto& allocator = texture_pools[1]; + auto& allocator = m_current_interpreter->allocator; for (unsigned i = 0; i < 4; ++i) { allocator.pools[i].num_used = 0; @@ -306,9 +423,9 @@ namespace gl } } - if (allocator.pools[0].flags) program_handle.uniforms["sampler1D_array"] = allocator.pools[0].allocated; - if (allocator.pools[1].flags) program_handle.uniforms["sampler2D_array"] = allocator.pools[1].allocated; - if (allocator.pools[2].flags) program_handle.uniforms["samplerCube_array"] = allocator.pools[2].allocated; - if (allocator.pools[3].flags) program_handle.uniforms["sampler3D_array"] = allocator.pools[3].allocated; + if (allocator.pools[0].flags) m_current_interpreter->prog.uniforms["sampler1D_array"] = allocator.pools[0].allocated; + if (allocator.pools[1].flags) m_current_interpreter->prog.uniforms["sampler2D_array"] = allocator.pools[1].allocated; + if (allocator.pools[2].flags) m_current_interpreter->prog.uniforms["samplerCube_array"] = allocator.pools[2].allocated; + if (allocator.pools[3].flags) m_current_interpreter->prog.uniforms["sampler3D_array"] = allocator.pools[3].allocated; } } diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h index c7e26542d6..efea49f531 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h @@ -1,10 +1,13 @@ #pragma once #include "GLHelpers.h" +#include "../Common/ProgramStateCache.h" namespace gl { namespace interpreter { + using program_metadata = program_hash_util::fragment_program_utils::fragment_program_metadata; + enum class texture_pool_flags { dirty = 1 @@ -48,17 +51,25 @@ namespace gl void create(::gl::glsl::shader::type domain); void allocate(int size); }; + + struct cached_program + { + glsl::shader fs; + glsl::program prog; + texture_pool_allocator allocator; + }; } class shader_interpreter { - glsl::shader vs; - glsl::shader fs; - glsl::program program_handle; - interpreter::texture_pool_allocator texture_pools[2]; + glsl::shader m_vs; + std::unordered_map> m_program_cache; void build_vs(); - void build_fs(); + void build_fs(u64 compiler_options, interpreter::cached_program& prog_data); + interpreter::cached_program* build_program(u64 compiler_options); + + interpreter::cached_program* m_current_interpreter = nullptr; public: void create(); @@ -66,6 +77,7 @@ namespace gl void update_fragment_textures(const std::array, 16>& descriptors, u16 reference_mask, u32* out); - glsl::program* get(); + glsl::program* get(const interpreter::program_metadata& fp_metadata); + bool is_interpreter(const glsl::program* program); }; }