From d77a78cdf1b3ca523641ec4ee966e92a9ffc99a4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 20 Jun 2023 14:54:32 +0300 Subject: [PATCH] rsx: Rework texture coordinate handling to support clamping and a more sane scale-bias setup --- rpcs3/Emu/RSX/Common/TextureUtils.h | 14 +- rpcs3/Emu/RSX/Common/texture_cache.h | 22 +++- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 5 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 4 +- .../RSX/Program/FragmentProgramDecompiler.cpp | 4 + .../RSX/Program/FragmentProgramDecompiler.h | 4 + rpcs3/Emu/RSX/Program/GLSLCommon.cpp | 97 ++++++++------ rpcs3/Emu/RSX/Program/GLSLCommon.h | 1 + .../GLSLSnippets/RSXProg/RSXDefines2.glsl | 10 +- .../RSXFragmentTextureDepthConversion.glsl | 6 +- .../RSXProg/RSXFragmentTextureMSAAOps.glsl | 4 +- .../RSXProg/RSXFragmentTextureOps.glsl | 120 ++++++++++++++---- rpcs3/Emu/RSX/Program/GLSLTypes.h | 15 ++- rpcs3/Emu/RSX/Program/program_util.h | 5 +- rpcs3/Emu/RSX/RSXThread.cpp | 16 ++- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 5 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 6 +- 17 files changed, 239 insertions(+), 99 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index dd64418dd3..de2cbf5f44 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -133,9 +133,17 @@ namespace rsx u8 samples = 1; u32 ref_address = 0; u64 surface_cache_tag = 0; - f32 scale_x = 1.f; - f32 scale_y = 1.f; - f32 scale_z = 1.f; + +#pragma pack(push, 1) + struct + { + f32 scale[3]; + f32 bias[3]; + f32 clamp_min[2]; + f32 clamp_max[2]; + bool clamp = false; + } texcoord_xform; +#pragma pack(pop) virtual ~sampled_image_descriptor_base() = default; virtual u32 encoded_component_map() const = 0; diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 479590f855..e822d4e5dc 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -179,11 +179,16 @@ namespace rsx upload_context = ctx; format_class = ftype; is_cyclic_reference = cyclic_reference; - scale_x = scale.width; - scale_y = scale.height; - scale_z = scale.depth; image_type = type; samples = msaa_samples; + + texcoord_xform.scale[0] = scale.width; + texcoord_xform.scale[1] = scale.height; + texcoord_xform.scale[2] = scale.depth; + texcoord_xform.bias[0] = 0.; + texcoord_xform.bias[1] = 0.; + texcoord_xform.bias[2] = 0.; + texcoord_xform.clamp = false; } sampled_image_descriptor(image_resource_type external_handle, deferred_request_command reason, @@ -196,10 +201,15 @@ namespace rsx image_handle = 0; upload_context = ctx; format_class = ftype; - scale_x = scale.width; - scale_y = scale.height; - scale_z = scale.depth; image_type = type; + + texcoord_xform.scale[0] = scale.width; + texcoord_xform.scale[1] = scale.height; + texcoord_xform.scale[2] = scale.depth; + texcoord_xform.bias[0] = 0.; + texcoord_xform.bias[1] = 0.; + texcoord_xform.bias[2] = 0.; + texcoord_xform.clamp = false; } void simplify() diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 23c4f90954..933459af3d 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -187,7 +187,7 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_depth_conversion = properties.redirected_sampler_mask != 0; m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos); m_shader_props.require_texture_ops = properties.has_tex_op; - m_shader_props.require_shadow_ops = properties.shadow_sampler_mask != 0; + m_shader_props.require_tex_shadow_ops = properties.shadow_sampler_mask != 0; m_shader_props.require_texture_expand = properties.has_exp_tex_op; m_shader_props.require_srgb_to_linear = properties.has_upg; m_shader_props.require_linear_to_srgb = properties.has_pkg; @@ -198,6 +198,9 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA; m_shader_props.supports_native_fp16 = device_props.has_native_half_support; m_shader_props.ROP_output_rounding = ::gl::get_driver_caps().vendor_NVIDIA; + m_shader_props.require_tex1D_ops = properties.has_tex1D; + m_shader_props.require_tex2D_ops = properties.has_tex2D; + m_shader_props.require_tex3D_ops = properties.has_tex3D; glsl::insert_glsl_legacy_function(OS, m_shader_props); } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index ca1a1f4dba..d6c50ddeca 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -867,10 +867,10 @@ void GLGSRender::load_program_env() if (update_fragment_texture_env) { // Fragment texture parameters - auto mapping = m_texture_parameters_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); + auto mapping = m_texture_parameters_buffer->alloc_from_heap(768, m_uniform_buffer_offset_align); current_fragment_program.texture_params.write_to(mapping.first, current_fp_metadata.referenced_textures_mask); - m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 512); + m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 768); } if (update_raster_env) diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index f644bd95ec..78cb3479ec 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -253,15 +253,19 @@ std::string FragmentProgramDecompiler::AddTex() switch (m_prog.get_texture_dimension(dst.tex_num)) { case rsx::texture_dimension_extended::texture_dimension_1d: + properties.has_tex1D = true; sampler = "sampler1D"; break; case rsx::texture_dimension_extended::texture_dimension_cubemap: + properties.has_tex3D = true; sampler = "samplerCube"; break; case rsx::texture_dimension_extended::texture_dimension_2d: + properties.has_tex2D = true; sampler = "sampler2D"; break; case rsx::texture_dimension_extended::texture_dimension_3d: + properties.has_tex3D = true; sampler = "sampler3D"; break; } diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index c275eb2623..e866a240c7 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -288,6 +288,10 @@ public: bool has_pkg = false; bool has_upg = false; bool has_dynamic_register_load = false; + + bool has_tex1D = false; + bool has_tex2D = false; + bool has_tex3D = false; } properties; diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index 369933cf2a..f62ac0db09 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -119,10 +119,10 @@ namespace glsl { "VTX_FMT_SNORM16", RSX_VERTEX_BASE_TYPE_SNORM16 }, { "VTX_FMT_FLOAT32", RSX_VERTEX_BASE_TYPE_FLOAT }, { "VTX_FMT_FLOAT16", RSX_VERTEX_BASE_TYPE_HALF_FLOAT }, - { "VTX_FMT_UNORM8", RSX_VERTEX_BASE_TYPE_UNORM8 }, - { "VTX_FMT_SINT16", RSX_VERTEX_BASE_TYPE_SINT16 }, - { "VTX_FMT_COMP32", RSX_VERTEX_BASE_TYPE_CMP32 }, - { "VTX_FMT_UINT8", RSX_VERTEX_BASE_TYPE_UINT8 } + { "VTX_FMT_UNORM8 ", RSX_VERTEX_BASE_TYPE_UNORM8 }, + { "VTX_FMT_SINT16 ", RSX_VERTEX_BASE_TYPE_SINT16 }, + { "VTX_FMT_COMP32 ", RSX_VERTEX_BASE_TYPE_CMP32 }, + { "VTX_FMT_UINT8 ", RSX_VERTEX_BASE_TYPE_UINT8 } }); // For intel GPUs which cannot access vectors in indexed mode (driver bug? or glsl version too low?) @@ -258,17 +258,17 @@ namespace glsl OS << "// ROP control\n"; program_common::define_glsl_constants(OS, { - { "ALPHA_TEST_ENABLE_BIT ", rsx::ROP_control_bits::ALPHA_TEST_ENABLE_BIT }, - { "SRGB_FRAMEBUFFER_BIT ", rsx::ROP_control_bits::SRGB_FRAMEBUFFER_BIT }, - { "ALPHA_TO_COVERAGE_ENABLE_BIT ", rsx::ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT }, - { "MSAA_WRITE_ENABLE_BIT ", rsx::ROP_control_bits::MSAA_WRITE_ENABLE_BIT }, - { "INT_FRAMEBUFFER_BIT ", rsx::ROP_control_bits::INT_FRAMEBUFFER_BIT }, - { "POLYGON_STIPPLE_ENABLE_BIT ", rsx::ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT }, - { "ALPHA_TEST_FUNC_OFFSET ", rsx::ROP_control_bits::ALPHA_FUNC_OFFSET }, - { "ALPHA_TEST_FUNC_LENGTH ", rsx::ROP_control_bits::ALPHA_FUNC_NUM_BITS }, - { "MSAA_SAMPLE_CTRL_OFFSET ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET }, - { "MSAA_SAMPLE_CTRL_LENGTH ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_NUM_BITS }, - { "ROP_CMD_MASK ", rsx::ROP_control_bits::ROP_CMD_MASK } + { "ALPHA_TEST_ENABLE_BIT ", rsx::ROP_control_bits::ALPHA_TEST_ENABLE_BIT }, + { "SRGB_FRAMEBUFFER_BIT ", rsx::ROP_control_bits::SRGB_FRAMEBUFFER_BIT }, + { "ALPHA_TO_COVERAGE_ENABLE_BIT", rsx::ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT }, + { "MSAA_WRITE_ENABLE_BIT ", rsx::ROP_control_bits::MSAA_WRITE_ENABLE_BIT }, + { "INT_FRAMEBUFFER_BIT ", rsx::ROP_control_bits::INT_FRAMEBUFFER_BIT }, + { "POLYGON_STIPPLE_ENABLE_BIT ", rsx::ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT }, + { "ALPHA_TEST_FUNC_OFFSET ", rsx::ROP_control_bits::ALPHA_FUNC_OFFSET }, + { "ALPHA_TEST_FUNC_LENGTH ", rsx::ROP_control_bits::ALPHA_FUNC_NUM_BITS }, + { "MSAA_SAMPLE_CTRL_OFFSET ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET }, + { "MSAA_SAMPLE_CTRL_LENGTH ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_NUM_BITS }, + { "ROP_CMD_MASK ", rsx::ROP_control_bits::ROP_CMD_MASK } }); if (props.fp32_outputs || !props.supports_native_fp16) @@ -343,12 +343,12 @@ namespace glsl { program_common::define_glsl_constants(OS, { - { "FOG_LINEAR", rsx::fog_mode::linear }, - { "FOG_EXP", rsx::fog_mode::exponential }, - { "FOG_EXP2", rsx::fog_mode::exponential2 }, + { "FOG_LINEAR ", rsx::fog_mode::linear }, + { "FOG_EXP ", rsx::fog_mode::exponential }, + { "FOG_EXP2 ", rsx::fog_mode::exponential2 }, { "FOG_LINEAR_ABS", rsx::fog_mode::linear_abs }, - { "FOG_EXP_ABS", rsx::fog_mode::exponential_abs }, - { "FOG_EXP2_ABS", rsx::fog_mode::exponential2_abs }, + { "FOG_EXP_ABS ", rsx::fog_mode::exponential_abs }, + { "FOG_EXP2_ABS ", rsx::fog_mode::exponential2_abs }, }); enabled_options.push_back("_ENABLE_FOG_READ"); @@ -365,25 +365,26 @@ namespace glsl if (props.require_texture_ops) { // Declare special texture control flags - OS << "#define GAMMA_R_MASK (1 << " << rsx::texture_control_bits::GAMMA_R << ")\n"; - OS << "#define GAMMA_G_MASK (1 << " << rsx::texture_control_bits::GAMMA_G << ")\n"; - OS << "#define GAMMA_B_MASK (1 << " << rsx::texture_control_bits::GAMMA_B << ")\n"; - OS << "#define GAMMA_A_MASK (1 << " << rsx::texture_control_bits::GAMMA_A << ")\n"; - OS << "#define EXPAND_R_MASK (1 << " << rsx::texture_control_bits::EXPAND_R << ")\n"; - OS << "#define EXPAND_G_MASK (1 << " << rsx::texture_control_bits::EXPAND_G << ")\n"; - OS << "#define EXPAND_B_MASK (1 << " << rsx::texture_control_bits::EXPAND_B << ")\n"; - OS << "#define EXPAND_A_MASK (1 << " << rsx::texture_control_bits::EXPAND_A << ")\n\n"; + program_common::define_glsl_constants(OS, + { + { "GAMMA_R_BIT " , rsx::texture_control_bits::GAMMA_R }, + { "GAMMA_G_BIT " , rsx::texture_control_bits::GAMMA_G }, + { "GAMMA_B_BIT " , rsx::texture_control_bits::GAMMA_B }, + { "GAMMA_A_BIT " , rsx::texture_control_bits::GAMMA_A }, + { "EXPAND_R_BIT" , rsx::texture_control_bits::EXPAND_R }, + { "EXPAND_G_BIT" , rsx::texture_control_bits::EXPAND_G }, + { "EXPAND_B_BIT" , rsx::texture_control_bits::EXPAND_B }, + { "EXPAND_A_BIT" , rsx::texture_control_bits::EXPAND_A }, - OS << "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n"; - OS << "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n"; - OS << "#define DEPTH_FLOAT " << rsx::texture_control_bits::DEPTH_FLOAT << "\n"; - OS << "#define DEPTH_COMPARE " << rsx::texture_control_bits::DEPTH_COMPARE_OP << "\n"; - OS << "#define FILTERED_MAG_BIT " << rsx::texture_control_bits::FILTERED_MAG << "\n"; - OS << "#define FILTERED_MIN_BIT " << rsx::texture_control_bits::FILTERED_MIN << "\n"; - OS << "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n"; - OS << "#define GAMMA_CTRL_MASK (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK)\n"; - OS << "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n"; - OS << "#define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT)\n\n"; + { "ALPHAKILL ", rsx::texture_control_bits::ALPHAKILL }, + { "RENORMALIZE ", rsx::texture_control_bits::RENORMALIZE }, + { "DEPTH_FLOAT ", rsx::texture_control_bits::DEPTH_FLOAT }, + { "DEPTH_COMPARE", rsx::texture_control_bits::DEPTH_COMPARE_OP }, + { "FILTERED_MAG_BIT", rsx::texture_control_bits::FILTERED_MAG }, + { "FILTERED_MIN_BIT", rsx::texture_control_bits::FILTERED_MIN }, + { "INT_COORDS_BIT ", rsx::texture_control_bits::UNNORMALIZED_COORDS }, + { "CLAMP_COORDS_BIT", rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT } + }); if (props.require_texture_expand) { @@ -395,6 +396,26 @@ namespace glsl enabled_options.push_back("_EMULATED_TEXSHADOW"); } + if (props.require_tex_shadow_ops) + { + enabled_options.push_back("_ENABLE_SHADOW"); + } + + if (props.require_tex1D_ops) + { + enabled_options.push_back("_ENABLE_TEX1D"); + } + + if (props.require_tex2D_ops) + { + enabled_options.push_back("_ENABLE_TEX2D"); + } + + if (props.require_tex3D_ops) + { + enabled_options.push_back("_ENABLE_TEX3D"); + } + program_common::define_glsl_switches(OS, enabled_options); enabled_options.clear(); diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.h b/rpcs3/Emu/RSX/Program/GLSLCommon.h index dba5c1c630..0ad55477ae 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.h @@ -29,6 +29,7 @@ namespace rsx FILTERED_MAG, FILTERED_MIN, UNNORMALIZED_COORDS, + CLAMP_TEXCOORDS_BIT, GAMMA_CTRL_MASK = (1 << GAMMA_R) | (1 << GAMMA_G) | (1 << GAMMA_B) | (1 << GAMMA_A), EXPAND_MASK = (1 << EXPAND_R) | (1 << EXPAND_G) | (1 << EXPAND_B) | (1 << EXPAND_A), diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl index eaf21d252c..2b07c25f6f 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl @@ -1,10 +1,14 @@ R"( // Small structures that should be defined before any backend logic +// Avoid arrays and sub-vec4 members because of std140 padding constraints struct sampler_info { - vec4 scale_bias; - uint remap; - uint flags; + float scale_x, scale_y, scale_z; // 12 + float bias_x, bias_y, bias_z; // 24 + float clamp_min_x, clamp_min_y; // 32 + float clamp_max_x, clamp_max_y; // 40 + uint remap; // 44 + uint flags; // 48 }; )" diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureDepthConversion.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureDepthConversion.glsl index 68cea535ce..01eb79803f 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureDepthConversion.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureDepthConversion.glsl @@ -1,8 +1,8 @@ R"( #define ZS_READ(index, coord) vec2(texture(TEX_NAME(index), coord).r, float(texture(TEX_NAME_STENCIL(index), coord).x)) -#define TEX1D_Z24X8_RGBA8(index, coord1) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) -#define TEX2D_Z24X8_RGBA8(index, coord2) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) -#define TEX3D_Z24X8_RGBA8(index, coord3) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) +#define TEX1D_Z24X8_RGBA8(index, coord1) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) +#define TEX2D_Z24X8_RGBA8(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) +#define TEX3D_Z24X8_RGBA8(index, coord3) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) // NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) // The A component (Z) is useless (should contain stencil8 or just 1) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl index 5a3a38539e..2ceef0d6a2 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl @@ -1,10 +1,10 @@ R"( #define ZCOMPARE_FUNC(index) _get_bits(TEX_FLAGS(index), DEPTH_COMPARE, 3) #define ZS_READ_MS(index, coord) vec2(sampleTexture2DMS(TEX_NAME(index), coord, index).r, float(sampleTexture2DMS(TEX_NAME_STENCIL(index), coord, index).x)) -#define TEX2D_MS(index, coord2) process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, index), TEX_FLAGS(index)) +#define TEX2D_MS(index, coord2) _process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, index), TEX_FLAGS(index)) #define TEX2D_SHADOW_MS(index, coord3) vec4(comparison_passes(sampleTexture2DMS(TEX_NAME(index), coord3.xy, index).x, coord3.z, ZCOMPARE_FUNC(index))) #define TEX2D_SHADOWPROJ_MS(index, coord4) TEX2D_SHADOW_MS(index, (coord4.xyz / coord4.w)) -#define TEX2D_Z24X8_RGBA8_MS(index, coord2) process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) +#define TEX2D_Z24X8_RGBA8_MS(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) vec3 compute2x2DownsampleWeights(const in float coord, const in float uv_step, const in float actual_step) { diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl index df897eeef4..d8a127ad1e 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl @@ -1,4 +1,17 @@ R"( +#define GAMMA_R_MASK (1 << GAMMA_R_BIT) +#define GAMMA_G_MASK (1 << GAMMA_G_BIT) +#define GAMMA_B_MASK (1 << GAMMA_B_BIT) +#define GAMMA_A_MASK (1 << GAMMA_A_BIT) +#define EXPAND_R_MASK (1 << EXPAND_R_BIT) +#define EXPAND_G_MASK (1 << EXPAND_G_BIT) +#define EXPAND_B_MASK (1 << EXPAND_B_BIT) +#define EXPAND_A_MASK (1 << EXPAND_A_BIT) + +#define GAMMA_CTRL_MASK (GAMMA_R_MASK | GAMMA_G_MASK | GAMMA_B_MASK | GAMMA_A_MASK) +#define SIGN_EXPAND_MASK (EXPAND_R_MASK | EXPAND_G_MASK | EXPAND_B_MASK | EXPAND_A_MASK) +#define FILTERED_MASK (FILTERED_MAG_BIT | FILTERED_MIN_BIT) + #ifdef _ENABLE_TEXTURE_EXPAND uint _texture_flag_override = 0; #define _enable_texture_expand() _texture_flag_override = SIGN_EXPAND_MASK @@ -11,22 +24,27 @@ R"( #define TEX_NAME(index) tex##index #define TEX_NAME_STENCIL(index) tex##index##_stencil -#define COORD_SCALE1(index, coord1) ((coord1 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.x) -#define COORD_SCALE2(index, coord2) ((coord2 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xy) -#define COORD_SCALE3(index, coord3) ((coord3 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xyz) +#define COORD_SCALE1(index, coord1) _texcoord_xform(coord1, texture_parameters[index]) +#define COORD_SCALE2(index, coord2) _texcoord_xform(coord2, texture_parameters[index]) +#define COORD_SCALE3(index, coord3) _texcoord_xform(coord3, texture_parameters[index]) -#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1)), TEX_FLAGS(index)) -#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1), bias), TEX_FLAGS(index)) -#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE1(index, coord1), lod), TEX_FLAGS(index)) -#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE1(index, coord1), dpdx, dpdy), TEX_FLAGS(index)) -#define TEX1D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec2(COORD_SCALE1(index, coord4.x), coord4.w)), TEX_FLAGS(index)) +#ifdef _ENABLE_TEX1D +#define TEX1D(index, coord1) _process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1)), TEX_FLAGS(index)) +#define TEX1D_BIAS(index, coord1, bias) _process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1), bias), TEX_FLAGS(index)) +#define TEX1D_LOD(index, coord1, lod) _process_texel(textureLod(TEX_NAME(index), COORD_SCALE1(index, coord1), lod), TEX_FLAGS(index)) +#define TEX1D_GRAD(index, coord1, dpdx, dpdy) _process_texel(textureGrad(TEX_NAME(index), COORD_SCALE1(index, coord1), dpdx, dpdy), TEX_FLAGS(index)) +#define TEX1D_PROJ(index, coord4) _process_texel(textureProj(TEX_NAME(index), vec2(COORD_SCALE1(index, coord4.x), coord4.w)), TEX_FLAGS(index)) +#endif -#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2)), TEX_FLAGS(index)) -#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2), bias), TEX_FLAGS(index)) -#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE2(index, coord2), lod), TEX_FLAGS(index)) -#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE2(index, coord2), dpdx, dpdy), TEX_FLAGS(index)) -#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.z, coord4.w)), TEX_FLAGS(index)) +#ifdef _ENABLE_TEX2D +#define TEX2D(index, coord2) _process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2)), TEX_FLAGS(index)) +#define TEX2D_BIAS(index, coord2, bias) _process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2), bias), TEX_FLAGS(index)) +#define TEX2D_LOD(index, coord2, lod) _process_texel(textureLod(TEX_NAME(index), COORD_SCALE2(index, coord2), lod), TEX_FLAGS(index)) +#define TEX2D_GRAD(index, coord2, dpdx, dpdy) _process_texel(textureGrad(TEX_NAME(index), COORD_SCALE2(index, coord2), dpdx, dpdy), TEX_FLAGS(index)) +#define TEX2D_PROJ(index, coord4) _process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.z, coord4.w)), TEX_FLAGS(index)) +#endif +#ifdef _ENABLE_SHADOW #ifdef _EMULATED_TEXSHADOW #define SHADOW_COORD(index, coord3) vec3(COORD_SCALE2(index, coord3.xy), _test_bit(TEX_FLAGS(index), DEPTH_FLOAT)? coord3.z : min(float(coord3.z), 1.0)) #define SHADOW_COORD4(index, coord4) vec4(SHADOW_COORD(index, coord4.xyz), coord4.w) @@ -40,20 +58,72 @@ R"( #define TEX3D_SHADOW(index, coord4) texture(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w)) #define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.zw)) #endif +#endif -#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3)), TEX_FLAGS(index)) -#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3), bias), TEX_FLAGS(index)) -#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE3(index, coord3), lod), TEX_FLAGS(index)) -#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE3(index, coord3), dpdx, dpdy), TEX_FLAGS(index)) -#define TEX3D_PROJ(index, coord4) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord4.xyz) / coord4.w), TEX_FLAGS(index)) +#ifdef _ENABLE_TEX3D +#define TEX3D(index, coord3) _process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3)), TEX_FLAGS(index)) +#define TEX3D_BIAS(index, coord3, bias) _process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3), bias), TEX_FLAGS(index)) +#define TEX3D_LOD(index, coord3, lod) _process_texel(textureLod(TEX_NAME(index), COORD_SCALE3(index, coord3), lod), TEX_FLAGS(index)) +#define TEX3D_GRAD(index, coord3, dpdx, dpdy) _process_texel(textureGrad(TEX_NAME(index), COORD_SCALE3(index, coord3), dpdx, dpdy), TEX_FLAGS(index)) +#define TEX3D_PROJ(index, coord4) _process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord4.xyz) / coord4.w), TEX_FLAGS(index)) +#endif -vec4 process_texel(in vec4 rgba, const in uint control_bits) +#ifdef _ENABLE_TEX1D +float _texcoord_xform(const in float coord, const in sampler_info params) +{ + float result = fma(coord, params.scale_x, params.bias_x); + if (_test_bit(params.flags, CLAMP_COORDS_BIT)) + { + result = clamp(result, params.clamp_min_x, params.clamp_max_x); + } + + return result; +} +#endif + +#ifdef _ENABLE_TEX2D +vec2 _texcoord_xform(const in vec2 coord, const in sampler_info params) +{ + float result = fma( + coord, + vec2(params.scale_x, params.scale_y), + vec2(params.bias_x, params.bias_y) + ); + + if (_test_bit(params.flags, CLAMP_COORDS_BIT)) + { + result = clamp( + result, + vec2(params.clamp_min_x, params.clamp_min_y), + vec2(params.clamp_max_x, params.clamp_max_y) + ); + } + + return result; +} +#endif + +#ifdef _ENABLE_TEX3D +vec3 _texcoord_xform(const in vec3 coord, const in sampler_info params) +{ + float result = fma( + coord, + vec3(params.scale_x, params.scale_y, params.scale_z), + vec3(params.bias_x, params.bias_y, params.bias_z) + ); + + // NOTE: Coordinate clamping not supported for CUBE and 3D textures + return result; +} +#endif + +vec4 _process_texel(in vec4 rgba, const in uint control_bits) { if (control_bits == 0) { return rgba; } - + if (_test_bit(control_bits, ALPHAKILL)) { // Alphakill @@ -63,18 +133,18 @@ vec4 process_texel(in vec4 rgba, const in uint control_bits) return rgba; } } - + if (_test_bit(control_bits, RENORMALIZE)) { // Renormalize to 8-bit (PS3) accuracy rgba = floor(rgba * 255.); rgba /= 255.; } - + uvec4 mask; vec4 convert; uint op_mask = control_bits & uint(SIGN_EXPAND_MASK); - + if (op_mask != 0) { // Expand to signed normalized @@ -82,7 +152,7 @@ vec4 process_texel(in vec4 rgba, const in uint control_bits) convert = (rgba * 2.f - 1.f); rgba = _select(rgba, convert, notEqual(mask, uvec4(0))); } - + op_mask = control_bits & uint(GAMMA_CTRL_MASK); if (op_mask != 0u) { @@ -91,7 +161,7 @@ vec4 process_texel(in vec4 rgba, const in uint control_bits) convert = srgb_to_linear(rgba); return _select(rgba, convert, notEqual(mask, uvec4(0))); } - + return rgba; } diff --git a/rpcs3/Emu/RSX/Program/GLSLTypes.h b/rpcs3/Emu/RSX/Program/GLSLTypes.h index 4e31a369ef..0de51066fc 100644 --- a/rpcs3/Emu/RSX/Program/GLSLTypes.h +++ b/rpcs3/Emu/RSX/Program/GLSLTypes.h @@ -24,11 +24,6 @@ namespace glsl // Only relevant for fragment programs bool fp32_outputs : 1; bool require_wpos : 1; - bool require_depth_conversion : 1; - bool require_texture_ops : 1; - bool require_shadow_ops : 1; - bool require_msaa_ops : 1; - bool require_texture_expand : 1; bool require_srgb_to_linear : 1; bool require_linear_to_srgb : 1; bool require_explicit_invariance: 1; @@ -41,5 +36,15 @@ namespace glsl bool disable_early_discard : 1; bool supports_native_fp16 : 1; bool ROP_output_rounding : 1; + + // Texturing spec + bool require_texture_ops : 1; // Global switch to enable/disable all texture code + bool require_depth_conversion : 1; // Include DSV<->RTV bitcast emulation + bool require_tex_shadow_ops : 1; // Include shadow compare emulation + bool require_msaa_ops : 1; // Include MSAA<->Resolved bitcast emulation + bool require_texture_expand : 1; // Include sign-expansion emulation + bool require_tex1D_ops : 1; // Include 1D texture stuff + bool require_tex2D_ops : 1; // Include 2D texture stuff + bool require_tex3D_ops : 1; // Include 3D texture stuff (including cubemap) }; }; diff --git a/rpcs3/Emu/RSX/Program/program_util.h b/rpcs3/Emu/RSX/Program/program_util.h index d6fd991101..7974290180 100644 --- a/rpcs3/Emu/RSX/Program/program_util.h +++ b/rpcs3/Emu/RSX/Program/program_util.h @@ -17,10 +17,11 @@ namespace rsx struct TIU_slot { float scale[3]; - float subpixel_bias; + float bias[3]; + float clamp_min[2]; + float clamp_max[2]; u32 remap; u32 control; - u32 padding[2]; } slots_[16]; // QT headers will collide with any variable named 'slots' because reasons diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 259a390678..283cee7b84 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2296,10 +2296,7 @@ namespace rsx if (tex.enabled() && sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_UNDEFINED) { - current_fragment_program.texture_params[i].scale[0] = sampler_descriptors[i]->scale_x; - current_fragment_program.texture_params[i].scale[1] = sampler_descriptors[i]->scale_y; - current_fragment_program.texture_params[i].scale[2] = sampler_descriptors[i]->scale_z; - current_fragment_program.texture_params[i].subpixel_bias = 0.f; + std::memcpy(current_fragment_program.texture_params[i].scale, sampler_descriptors[i]->texcoord_xform.scale, 10 * sizeof(float)); current_fragment_program.texture_params[i].remap = tex.remap(); m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty; @@ -2307,6 +2304,11 @@ namespace rsx u32 texture_control = 0; current_fp_texture_state.set_dimension(sampler_descriptors[i]->image_type, i); + if (sampler_descriptors[i]->texcoord_xform.clamp) + { + texture_control |= (1 << rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT); + } + if (tex.alpha_kill_enabled()) { //alphakill can be ignored unless a valid comparison function is set @@ -2324,7 +2326,11 @@ namespace rsx { // Subpixel offset so that (X + bias) * scale will round correctly. // This is done to work around fdiv precision issues in some GPUs (NVIDIA) - current_fragment_program.texture_params[i].subpixel_bias = 0.01f; + // We apply the simplification where (x + bias) * z = xz + zbias here. + const auto subpixel_bias = 0.01f; + current_fragment_program.texture_params[i].bias[0] += (subpixel_bias * current_fragment_program.texture_params[i].scale[0]); + current_fragment_program.texture_params[i].bias[1] += (subpixel_bias * current_fragment_program.texture_params[i].scale[1]); + current_fragment_program.texture_params[i].bias[2] += (subpixel_bias * current_fragment_program.texture_params[i].scale[2]); } } diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index dd7a83d8cd..e2e9b1039c 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -238,7 +238,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_depth_conversion = properties.redirected_sampler_mask != 0; m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos); m_shader_props.require_texture_ops = properties.has_tex_op; - m_shader_props.require_shadow_ops = properties.shadow_sampler_mask != 0; + m_shader_props.require_tex_shadow_ops = properties.shadow_sampler_mask != 0; m_shader_props.require_msaa_ops = m_prog.texture_state.multisampled_textures != 0; m_shader_props.require_texture_expand = properties.has_exp_tex_op; m_shader_props.require_srgb_to_linear = properties.has_upg; @@ -250,6 +250,9 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA; m_shader_props.supports_native_fp16 = device_props.has_native_half_support; m_shader_props.ROP_output_rounding = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA; + m_shader_props.require_tex1D_ops = properties.has_tex1D; + m_shader_props.require_tex2D_ops = properties.has_tex2D; + m_shader_props.require_tex3D_ops = properties.has_tex3D; glsl::insert_glsl_legacy_function(OS, m_shader_props); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index f5cf2de26a..56a38e6345 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2176,12 +2176,12 @@ void VKGSRender::load_program_env() { check_heap_status(VK_HEAP_CHECK_TEXTURE_ENV_STORAGE); - auto mem = m_fragment_texture_params_ring_info.alloc<256>(512); - auto buf = m_fragment_texture_params_ring_info.map(mem, 512); + auto mem = m_fragment_texture_params_ring_info.alloc<256>(768); + auto buf = m_fragment_texture_params_ring_info.map(mem, 768); current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask); m_fragment_texture_params_ring_info.unmap(); - m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 512 }; + m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 768 }; } if (update_raster_env)