rsx: Rework texture coordinate handling to support clamping and a more sane scale-bias setup

This commit is contained in:
kd-11 2023-06-20 14:54:32 +03:00 committed by kd-11
parent 66cb855db0
commit d77a78cdf1
17 changed files with 239 additions and 99 deletions

View File

@ -133,9 +133,17 @@ namespace rsx
u8 samples = 1;
u32 ref_address = 0;
u64 surface_cache_tag = 0;
f32 scale_x = 1.f;
f32 scale_y = 1.f;
f32 scale_z = 1.f;
#pragma pack(push, 1)
struct
{
f32 scale[3];
f32 bias[3];
f32 clamp_min[2];
f32 clamp_max[2];
bool clamp = false;
} texcoord_xform;
#pragma pack(pop)
virtual ~sampled_image_descriptor_base() = default;
virtual u32 encoded_component_map() const = 0;

View File

@ -179,11 +179,16 @@ namespace rsx
upload_context = ctx;
format_class = ftype;
is_cyclic_reference = cyclic_reference;
scale_x = scale.width;
scale_y = scale.height;
scale_z = scale.depth;
image_type = type;
samples = msaa_samples;
texcoord_xform.scale[0] = scale.width;
texcoord_xform.scale[1] = scale.height;
texcoord_xform.scale[2] = scale.depth;
texcoord_xform.bias[0] = 0.;
texcoord_xform.bias[1] = 0.;
texcoord_xform.bias[2] = 0.;
texcoord_xform.clamp = false;
}
sampled_image_descriptor(image_resource_type external_handle, deferred_request_command reason,
@ -196,10 +201,15 @@ namespace rsx
image_handle = 0;
upload_context = ctx;
format_class = ftype;
scale_x = scale.width;
scale_y = scale.height;
scale_z = scale.depth;
image_type = type;
texcoord_xform.scale[0] = scale.width;
texcoord_xform.scale[1] = scale.height;
texcoord_xform.scale[2] = scale.depth;
texcoord_xform.bias[0] = 0.;
texcoord_xform.bias[1] = 0.;
texcoord_xform.bias[2] = 0.;
texcoord_xform.clamp = false;
}
void simplify()

View File

@ -187,7 +187,7 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_depth_conversion = properties.redirected_sampler_mask != 0;
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
m_shader_props.require_texture_ops = properties.has_tex_op;
m_shader_props.require_shadow_ops = properties.shadow_sampler_mask != 0;
m_shader_props.require_tex_shadow_ops = properties.shadow_sampler_mask != 0;
m_shader_props.require_texture_expand = properties.has_exp_tex_op;
m_shader_props.require_srgb_to_linear = properties.has_upg;
m_shader_props.require_linear_to_srgb = properties.has_pkg;
@ -198,6 +198,9 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
m_shader_props.ROP_output_rounding = ::gl::get_driver_caps().vendor_NVIDIA;
m_shader_props.require_tex1D_ops = properties.has_tex1D;
m_shader_props.require_tex2D_ops = properties.has_tex2D;
m_shader_props.require_tex3D_ops = properties.has_tex3D;
glsl::insert_glsl_legacy_function(OS, m_shader_props);
}

View File

@ -867,10 +867,10 @@ void GLGSRender::load_program_env()
if (update_fragment_texture_env)
{
// Fragment texture parameters
auto mapping = m_texture_parameters_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align);
auto mapping = m_texture_parameters_buffer->alloc_from_heap(768, m_uniform_buffer_offset_align);
current_fragment_program.texture_params.write_to(mapping.first, current_fp_metadata.referenced_textures_mask);
m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 512);
m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 768);
}
if (update_raster_env)

View File

@ -253,15 +253,19 @@ std::string FragmentProgramDecompiler::AddTex()
switch (m_prog.get_texture_dimension(dst.tex_num))
{
case rsx::texture_dimension_extended::texture_dimension_1d:
properties.has_tex1D = true;
sampler = "sampler1D";
break;
case rsx::texture_dimension_extended::texture_dimension_cubemap:
properties.has_tex3D = true;
sampler = "samplerCube";
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
properties.has_tex2D = true;
sampler = "sampler2D";
break;
case rsx::texture_dimension_extended::texture_dimension_3d:
properties.has_tex3D = true;
sampler = "sampler3D";
break;
}

View File

@ -288,6 +288,10 @@ public:
bool has_pkg = false;
bool has_upg = false;
bool has_dynamic_register_load = false;
bool has_tex1D = false;
bool has_tex2D = false;
bool has_tex3D = false;
}
properties;

View File

@ -119,10 +119,10 @@ namespace glsl
{ "VTX_FMT_SNORM16", RSX_VERTEX_BASE_TYPE_SNORM16 },
{ "VTX_FMT_FLOAT32", RSX_VERTEX_BASE_TYPE_FLOAT },
{ "VTX_FMT_FLOAT16", RSX_VERTEX_BASE_TYPE_HALF_FLOAT },
{ "VTX_FMT_UNORM8", RSX_VERTEX_BASE_TYPE_UNORM8 },
{ "VTX_FMT_SINT16", RSX_VERTEX_BASE_TYPE_SINT16 },
{ "VTX_FMT_COMP32", RSX_VERTEX_BASE_TYPE_CMP32 },
{ "VTX_FMT_UINT8", RSX_VERTEX_BASE_TYPE_UINT8 }
{ "VTX_FMT_UNORM8 ", RSX_VERTEX_BASE_TYPE_UNORM8 },
{ "VTX_FMT_SINT16 ", RSX_VERTEX_BASE_TYPE_SINT16 },
{ "VTX_FMT_COMP32 ", RSX_VERTEX_BASE_TYPE_CMP32 },
{ "VTX_FMT_UINT8 ", RSX_VERTEX_BASE_TYPE_UINT8 }
});
// For intel GPUs which cannot access vectors in indexed mode (driver bug? or glsl version too low?)
@ -258,17 +258,17 @@ namespace glsl
OS << "// ROP control\n";
program_common::define_glsl_constants<rsx::ROP_control_bits>(OS,
{
{ "ALPHA_TEST_ENABLE_BIT ", rsx::ROP_control_bits::ALPHA_TEST_ENABLE_BIT },
{ "SRGB_FRAMEBUFFER_BIT ", rsx::ROP_control_bits::SRGB_FRAMEBUFFER_BIT },
{ "ALPHA_TO_COVERAGE_ENABLE_BIT ", rsx::ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT },
{ "MSAA_WRITE_ENABLE_BIT ", rsx::ROP_control_bits::MSAA_WRITE_ENABLE_BIT },
{ "INT_FRAMEBUFFER_BIT ", rsx::ROP_control_bits::INT_FRAMEBUFFER_BIT },
{ "POLYGON_STIPPLE_ENABLE_BIT ", rsx::ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT },
{ "ALPHA_TEST_FUNC_OFFSET ", rsx::ROP_control_bits::ALPHA_FUNC_OFFSET },
{ "ALPHA_TEST_FUNC_LENGTH ", rsx::ROP_control_bits::ALPHA_FUNC_NUM_BITS },
{ "MSAA_SAMPLE_CTRL_OFFSET ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET },
{ "MSAA_SAMPLE_CTRL_LENGTH ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_NUM_BITS },
{ "ROP_CMD_MASK ", rsx::ROP_control_bits::ROP_CMD_MASK }
{ "ALPHA_TEST_ENABLE_BIT ", rsx::ROP_control_bits::ALPHA_TEST_ENABLE_BIT },
{ "SRGB_FRAMEBUFFER_BIT ", rsx::ROP_control_bits::SRGB_FRAMEBUFFER_BIT },
{ "ALPHA_TO_COVERAGE_ENABLE_BIT", rsx::ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT },
{ "MSAA_WRITE_ENABLE_BIT ", rsx::ROP_control_bits::MSAA_WRITE_ENABLE_BIT },
{ "INT_FRAMEBUFFER_BIT ", rsx::ROP_control_bits::INT_FRAMEBUFFER_BIT },
{ "POLYGON_STIPPLE_ENABLE_BIT ", rsx::ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT },
{ "ALPHA_TEST_FUNC_OFFSET ", rsx::ROP_control_bits::ALPHA_FUNC_OFFSET },
{ "ALPHA_TEST_FUNC_LENGTH ", rsx::ROP_control_bits::ALPHA_FUNC_NUM_BITS },
{ "MSAA_SAMPLE_CTRL_OFFSET ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET },
{ "MSAA_SAMPLE_CTRL_LENGTH ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_NUM_BITS },
{ "ROP_CMD_MASK ", rsx::ROP_control_bits::ROP_CMD_MASK }
});
if (props.fp32_outputs || !props.supports_native_fp16)
@ -343,12 +343,12 @@ namespace glsl
{
program_common::define_glsl_constants<rsx::fog_mode>(OS,
{
{ "FOG_LINEAR", rsx::fog_mode::linear },
{ "FOG_EXP", rsx::fog_mode::exponential },
{ "FOG_EXP2", rsx::fog_mode::exponential2 },
{ "FOG_LINEAR ", rsx::fog_mode::linear },
{ "FOG_EXP ", rsx::fog_mode::exponential },
{ "FOG_EXP2 ", rsx::fog_mode::exponential2 },
{ "FOG_LINEAR_ABS", rsx::fog_mode::linear_abs },
{ "FOG_EXP_ABS", rsx::fog_mode::exponential_abs },
{ "FOG_EXP2_ABS", rsx::fog_mode::exponential2_abs },
{ "FOG_EXP_ABS ", rsx::fog_mode::exponential_abs },
{ "FOG_EXP2_ABS ", rsx::fog_mode::exponential2_abs },
});
enabled_options.push_back("_ENABLE_FOG_READ");
@ -365,25 +365,26 @@ namespace glsl
if (props.require_texture_ops)
{
// Declare special texture control flags
OS << "#define GAMMA_R_MASK (1 << " << rsx::texture_control_bits::GAMMA_R << ")\n";
OS << "#define GAMMA_G_MASK (1 << " << rsx::texture_control_bits::GAMMA_G << ")\n";
OS << "#define GAMMA_B_MASK (1 << " << rsx::texture_control_bits::GAMMA_B << ")\n";
OS << "#define GAMMA_A_MASK (1 << " << rsx::texture_control_bits::GAMMA_A << ")\n";
OS << "#define EXPAND_R_MASK (1 << " << rsx::texture_control_bits::EXPAND_R << ")\n";
OS << "#define EXPAND_G_MASK (1 << " << rsx::texture_control_bits::EXPAND_G << ")\n";
OS << "#define EXPAND_B_MASK (1 << " << rsx::texture_control_bits::EXPAND_B << ")\n";
OS << "#define EXPAND_A_MASK (1 << " << rsx::texture_control_bits::EXPAND_A << ")\n\n";
program_common::define_glsl_constants<rsx::texture_control_bits>(OS,
{
{ "GAMMA_R_BIT " , rsx::texture_control_bits::GAMMA_R },
{ "GAMMA_G_BIT " , rsx::texture_control_bits::GAMMA_G },
{ "GAMMA_B_BIT " , rsx::texture_control_bits::GAMMA_B },
{ "GAMMA_A_BIT " , rsx::texture_control_bits::GAMMA_A },
{ "EXPAND_R_BIT" , rsx::texture_control_bits::EXPAND_R },
{ "EXPAND_G_BIT" , rsx::texture_control_bits::EXPAND_G },
{ "EXPAND_B_BIT" , rsx::texture_control_bits::EXPAND_B },
{ "EXPAND_A_BIT" , rsx::texture_control_bits::EXPAND_A },
OS << "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n";
OS << "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n";
OS << "#define DEPTH_FLOAT " << rsx::texture_control_bits::DEPTH_FLOAT << "\n";
OS << "#define DEPTH_COMPARE " << rsx::texture_control_bits::DEPTH_COMPARE_OP << "\n";
OS << "#define FILTERED_MAG_BIT " << rsx::texture_control_bits::FILTERED_MAG << "\n";
OS << "#define FILTERED_MIN_BIT " << rsx::texture_control_bits::FILTERED_MIN << "\n";
OS << "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n";
OS << "#define GAMMA_CTRL_MASK (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK)\n";
OS << "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n";
OS << "#define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT)\n\n";
{ "ALPHAKILL ", rsx::texture_control_bits::ALPHAKILL },
{ "RENORMALIZE ", rsx::texture_control_bits::RENORMALIZE },
{ "DEPTH_FLOAT ", rsx::texture_control_bits::DEPTH_FLOAT },
{ "DEPTH_COMPARE", rsx::texture_control_bits::DEPTH_COMPARE_OP },
{ "FILTERED_MAG_BIT", rsx::texture_control_bits::FILTERED_MAG },
{ "FILTERED_MIN_BIT", rsx::texture_control_bits::FILTERED_MIN },
{ "INT_COORDS_BIT ", rsx::texture_control_bits::UNNORMALIZED_COORDS },
{ "CLAMP_COORDS_BIT", rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT }
});
if (props.require_texture_expand)
{
@ -395,6 +396,26 @@ namespace glsl
enabled_options.push_back("_EMULATED_TEXSHADOW");
}
if (props.require_tex_shadow_ops)
{
enabled_options.push_back("_ENABLE_SHADOW");
}
if (props.require_tex1D_ops)
{
enabled_options.push_back("_ENABLE_TEX1D");
}
if (props.require_tex2D_ops)
{
enabled_options.push_back("_ENABLE_TEX2D");
}
if (props.require_tex3D_ops)
{
enabled_options.push_back("_ENABLE_TEX3D");
}
program_common::define_glsl_switches(OS, enabled_options);
enabled_options.clear();

View File

@ -29,6 +29,7 @@ namespace rsx
FILTERED_MAG,
FILTERED_MIN,
UNNORMALIZED_COORDS,
CLAMP_TEXCOORDS_BIT,
GAMMA_CTRL_MASK = (1 << GAMMA_R) | (1 << GAMMA_G) | (1 << GAMMA_B) | (1 << GAMMA_A),
EXPAND_MASK = (1 << EXPAND_R) | (1 << EXPAND_G) | (1 << EXPAND_B) | (1 << EXPAND_A),

View File

@ -1,10 +1,14 @@
R"(
// Small structures that should be defined before any backend logic
// Avoid arrays and sub-vec4 members because of std140 padding constraints
struct sampler_info
{
vec4 scale_bias;
uint remap;
uint flags;
float scale_x, scale_y, scale_z; // 12
float bias_x, bias_y, bias_z; // 24
float clamp_min_x, clamp_min_y; // 32
float clamp_max_x, clamp_max_y; // 40
uint remap; // 44
uint flags; // 48
};
)"

View File

@ -1,8 +1,8 @@
R"(
#define ZS_READ(index, coord) vec2(texture(TEX_NAME(index), coord).r, float(texture(TEX_NAME_STENCIL(index), coord).x))
#define TEX1D_Z24X8_RGBA8(index, coord1) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX2D_Z24X8_RGBA8(index, coord2) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX3D_Z24X8_RGBA8(index, coord3) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX1D_Z24X8_RGBA8(index, coord1) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX2D_Z24X8_RGBA8(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX3D_Z24X8_RGBA8(index, coord3) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
// NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
// The A component (Z) is useless (should contain stencil8 or just 1)

View File

@ -1,10 +1,10 @@
R"(
#define ZCOMPARE_FUNC(index) _get_bits(TEX_FLAGS(index), DEPTH_COMPARE, 3)
#define ZS_READ_MS(index, coord) vec2(sampleTexture2DMS(TEX_NAME(index), coord, index).r, float(sampleTexture2DMS(TEX_NAME_STENCIL(index), coord, index).x))
#define TEX2D_MS(index, coord2) process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, index), TEX_FLAGS(index))
#define TEX2D_MS(index, coord2) _process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, index), TEX_FLAGS(index))
#define TEX2D_SHADOW_MS(index, coord3) vec4(comparison_passes(sampleTexture2DMS(TEX_NAME(index), coord3.xy, index).x, coord3.z, ZCOMPARE_FUNC(index)))
#define TEX2D_SHADOWPROJ_MS(index, coord4) TEX2D_SHADOW_MS(index, (coord4.xyz / coord4.w))
#define TEX2D_Z24X8_RGBA8_MS(index, coord2) process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX2D_Z24X8_RGBA8_MS(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
vec3 compute2x2DownsampleWeights(const in float coord, const in float uv_step, const in float actual_step)
{

View File

@ -1,4 +1,17 @@
R"(
#define GAMMA_R_MASK (1 << GAMMA_R_BIT)
#define GAMMA_G_MASK (1 << GAMMA_G_BIT)
#define GAMMA_B_MASK (1 << GAMMA_B_BIT)
#define GAMMA_A_MASK (1 << GAMMA_A_BIT)
#define EXPAND_R_MASK (1 << EXPAND_R_BIT)
#define EXPAND_G_MASK (1 << EXPAND_G_BIT)
#define EXPAND_B_MASK (1 << EXPAND_B_BIT)
#define EXPAND_A_MASK (1 << EXPAND_A_BIT)
#define GAMMA_CTRL_MASK (GAMMA_R_MASK | GAMMA_G_MASK | GAMMA_B_MASK | GAMMA_A_MASK)
#define SIGN_EXPAND_MASK (EXPAND_R_MASK | EXPAND_G_MASK | EXPAND_B_MASK | EXPAND_A_MASK)
#define FILTERED_MASK (FILTERED_MAG_BIT | FILTERED_MIN_BIT)
#ifdef _ENABLE_TEXTURE_EXPAND
uint _texture_flag_override = 0;
#define _enable_texture_expand() _texture_flag_override = SIGN_EXPAND_MASK
@ -11,22 +24,27 @@ R"(
#define TEX_NAME(index) tex##index
#define TEX_NAME_STENCIL(index) tex##index##_stencil
#define COORD_SCALE1(index, coord1) ((coord1 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.x)
#define COORD_SCALE2(index, coord2) ((coord2 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xy)
#define COORD_SCALE3(index, coord3) ((coord3 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xyz)
#define COORD_SCALE1(index, coord1) _texcoord_xform(coord1, texture_parameters[index])
#define COORD_SCALE2(index, coord2) _texcoord_xform(coord2, texture_parameters[index])
#define COORD_SCALE3(index, coord3) _texcoord_xform(coord3, texture_parameters[index])
#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1)), TEX_FLAGS(index))
#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1), bias), TEX_FLAGS(index))
#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE1(index, coord1), lod), TEX_FLAGS(index))
#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE1(index, coord1), dpdx, dpdy), TEX_FLAGS(index))
#define TEX1D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec2(COORD_SCALE1(index, coord4.x), coord4.w)), TEX_FLAGS(index))
#ifdef _ENABLE_TEX1D
#define TEX1D(index, coord1) _process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1)), TEX_FLAGS(index))
#define TEX1D_BIAS(index, coord1, bias) _process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1), bias), TEX_FLAGS(index))
#define TEX1D_LOD(index, coord1, lod) _process_texel(textureLod(TEX_NAME(index), COORD_SCALE1(index, coord1), lod), TEX_FLAGS(index))
#define TEX1D_GRAD(index, coord1, dpdx, dpdy) _process_texel(textureGrad(TEX_NAME(index), COORD_SCALE1(index, coord1), dpdx, dpdy), TEX_FLAGS(index))
#define TEX1D_PROJ(index, coord4) _process_texel(textureProj(TEX_NAME(index), vec2(COORD_SCALE1(index, coord4.x), coord4.w)), TEX_FLAGS(index))
#endif
#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2)), TEX_FLAGS(index))
#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2), bias), TEX_FLAGS(index))
#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE2(index, coord2), lod), TEX_FLAGS(index))
#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE2(index, coord2), dpdx, dpdy), TEX_FLAGS(index))
#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.z, coord4.w)), TEX_FLAGS(index))
#ifdef _ENABLE_TEX2D
#define TEX2D(index, coord2) _process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2)), TEX_FLAGS(index))
#define TEX2D_BIAS(index, coord2, bias) _process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2), bias), TEX_FLAGS(index))
#define TEX2D_LOD(index, coord2, lod) _process_texel(textureLod(TEX_NAME(index), COORD_SCALE2(index, coord2), lod), TEX_FLAGS(index))
#define TEX2D_GRAD(index, coord2, dpdx, dpdy) _process_texel(textureGrad(TEX_NAME(index), COORD_SCALE2(index, coord2), dpdx, dpdy), TEX_FLAGS(index))
#define TEX2D_PROJ(index, coord4) _process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.z, coord4.w)), TEX_FLAGS(index))
#endif
#ifdef _ENABLE_SHADOW
#ifdef _EMULATED_TEXSHADOW
#define SHADOW_COORD(index, coord3) vec3(COORD_SCALE2(index, coord3.xy), _test_bit(TEX_FLAGS(index), DEPTH_FLOAT)? coord3.z : min(float(coord3.z), 1.0))
#define SHADOW_COORD4(index, coord4) vec4(SHADOW_COORD(index, coord4.xyz), coord4.w)
@ -40,20 +58,72 @@ R"(
#define TEX3D_SHADOW(index, coord4) texture(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w))
#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.zw))
#endif
#endif
#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3)), TEX_FLAGS(index))
#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3), bias), TEX_FLAGS(index))
#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE3(index, coord3), lod), TEX_FLAGS(index))
#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE3(index, coord3), dpdx, dpdy), TEX_FLAGS(index))
#define TEX3D_PROJ(index, coord4) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord4.xyz) / coord4.w), TEX_FLAGS(index))
#ifdef _ENABLE_TEX3D
#define TEX3D(index, coord3) _process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3)), TEX_FLAGS(index))
#define TEX3D_BIAS(index, coord3, bias) _process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3), bias), TEX_FLAGS(index))
#define TEX3D_LOD(index, coord3, lod) _process_texel(textureLod(TEX_NAME(index), COORD_SCALE3(index, coord3), lod), TEX_FLAGS(index))
#define TEX3D_GRAD(index, coord3, dpdx, dpdy) _process_texel(textureGrad(TEX_NAME(index), COORD_SCALE3(index, coord3), dpdx, dpdy), TEX_FLAGS(index))
#define TEX3D_PROJ(index, coord4) _process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord4.xyz) / coord4.w), TEX_FLAGS(index))
#endif
vec4 process_texel(in vec4 rgba, const in uint control_bits)
#ifdef _ENABLE_TEX1D
float _texcoord_xform(const in float coord, const in sampler_info params)
{
float result = fma(coord, params.scale_x, params.bias_x);
if (_test_bit(params.flags, CLAMP_COORDS_BIT))
{
result = clamp(result, params.clamp_min_x, params.clamp_max_x);
}
return result;
}
#endif
#ifdef _ENABLE_TEX2D
vec2 _texcoord_xform(const in vec2 coord, const in sampler_info params)
{
float result = fma(
coord,
vec2(params.scale_x, params.scale_y),
vec2(params.bias_x, params.bias_y)
);
if (_test_bit(params.flags, CLAMP_COORDS_BIT))
{
result = clamp(
result,
vec2(params.clamp_min_x, params.clamp_min_y),
vec2(params.clamp_max_x, params.clamp_max_y)
);
}
return result;
}
#endif
#ifdef _ENABLE_TEX3D
vec3 _texcoord_xform(const in vec3 coord, const in sampler_info params)
{
float result = fma(
coord,
vec3(params.scale_x, params.scale_y, params.scale_z),
vec3(params.bias_x, params.bias_y, params.bias_z)
);
// NOTE: Coordinate clamping not supported for CUBE and 3D textures
return result;
}
#endif
vec4 _process_texel(in vec4 rgba, const in uint control_bits)
{
if (control_bits == 0)
{
return rgba;
}
if (_test_bit(control_bits, ALPHAKILL))
{
// Alphakill
@ -63,18 +133,18 @@ vec4 process_texel(in vec4 rgba, const in uint control_bits)
return rgba;
}
}
if (_test_bit(control_bits, RENORMALIZE))
{
// Renormalize to 8-bit (PS3) accuracy
rgba = floor(rgba * 255.);
rgba /= 255.;
}
uvec4 mask;
vec4 convert;
uint op_mask = control_bits & uint(SIGN_EXPAND_MASK);
if (op_mask != 0)
{
// Expand to signed normalized
@ -82,7 +152,7 @@ vec4 process_texel(in vec4 rgba, const in uint control_bits)
convert = (rgba * 2.f - 1.f);
rgba = _select(rgba, convert, notEqual(mask, uvec4(0)));
}
op_mask = control_bits & uint(GAMMA_CTRL_MASK);
if (op_mask != 0u)
{
@ -91,7 +161,7 @@ vec4 process_texel(in vec4 rgba, const in uint control_bits)
convert = srgb_to_linear(rgba);
return _select(rgba, convert, notEqual(mask, uvec4(0)));
}
return rgba;
}

View File

@ -24,11 +24,6 @@ namespace glsl
// Only relevant for fragment programs
bool fp32_outputs : 1;
bool require_wpos : 1;
bool require_depth_conversion : 1;
bool require_texture_ops : 1;
bool require_shadow_ops : 1;
bool require_msaa_ops : 1;
bool require_texture_expand : 1;
bool require_srgb_to_linear : 1;
bool require_linear_to_srgb : 1;
bool require_explicit_invariance: 1;
@ -41,5 +36,15 @@ namespace glsl
bool disable_early_discard : 1;
bool supports_native_fp16 : 1;
bool ROP_output_rounding : 1;
// Texturing spec
bool require_texture_ops : 1; // Global switch to enable/disable all texture code
bool require_depth_conversion : 1; // Include DSV<->RTV bitcast emulation
bool require_tex_shadow_ops : 1; // Include shadow compare emulation
bool require_msaa_ops : 1; // Include MSAA<->Resolved bitcast emulation
bool require_texture_expand : 1; // Include sign-expansion emulation
bool require_tex1D_ops : 1; // Include 1D texture stuff
bool require_tex2D_ops : 1; // Include 2D texture stuff
bool require_tex3D_ops : 1; // Include 3D texture stuff (including cubemap)
};
};

View File

@ -17,10 +17,11 @@ namespace rsx
struct TIU_slot
{
float scale[3];
float subpixel_bias;
float bias[3];
float clamp_min[2];
float clamp_max[2];
u32 remap;
u32 control;
u32 padding[2];
}
slots_[16]; // QT headers will collide with any variable named 'slots' because reasons

View File

@ -2296,10 +2296,7 @@ namespace rsx
if (tex.enabled() && sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_UNDEFINED)
{
current_fragment_program.texture_params[i].scale[0] = sampler_descriptors[i]->scale_x;
current_fragment_program.texture_params[i].scale[1] = sampler_descriptors[i]->scale_y;
current_fragment_program.texture_params[i].scale[2] = sampler_descriptors[i]->scale_z;
current_fragment_program.texture_params[i].subpixel_bias = 0.f;
std::memcpy(current_fragment_program.texture_params[i].scale, sampler_descriptors[i]->texcoord_xform.scale, 10 * sizeof(float));
current_fragment_program.texture_params[i].remap = tex.remap();
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;
@ -2307,6 +2304,11 @@ namespace rsx
u32 texture_control = 0;
current_fp_texture_state.set_dimension(sampler_descriptors[i]->image_type, i);
if (sampler_descriptors[i]->texcoord_xform.clamp)
{
texture_control |= (1 << rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT);
}
if (tex.alpha_kill_enabled())
{
//alphakill can be ignored unless a valid comparison function is set
@ -2324,7 +2326,11 @@ namespace rsx
{
// Subpixel offset so that (X + bias) * scale will round correctly.
// This is done to work around fdiv precision issues in some GPUs (NVIDIA)
current_fragment_program.texture_params[i].subpixel_bias = 0.01f;
// We apply the simplification where (x + bias) * z = xz + zbias here.
const auto subpixel_bias = 0.01f;
current_fragment_program.texture_params[i].bias[0] += (subpixel_bias * current_fragment_program.texture_params[i].scale[0]);
current_fragment_program.texture_params[i].bias[1] += (subpixel_bias * current_fragment_program.texture_params[i].scale[1]);
current_fragment_program.texture_params[i].bias[2] += (subpixel_bias * current_fragment_program.texture_params[i].scale[2]);
}
}

View File

@ -238,7 +238,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_depth_conversion = properties.redirected_sampler_mask != 0;
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
m_shader_props.require_texture_ops = properties.has_tex_op;
m_shader_props.require_shadow_ops = properties.shadow_sampler_mask != 0;
m_shader_props.require_tex_shadow_ops = properties.shadow_sampler_mask != 0;
m_shader_props.require_msaa_ops = m_prog.texture_state.multisampled_textures != 0;
m_shader_props.require_texture_expand = properties.has_exp_tex_op;
m_shader_props.require_srgb_to_linear = properties.has_upg;
@ -250,6 +250,9 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
m_shader_props.ROP_output_rounding = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
m_shader_props.require_tex1D_ops = properties.has_tex1D;
m_shader_props.require_tex2D_ops = properties.has_tex2D;
m_shader_props.require_tex3D_ops = properties.has_tex3D;
glsl::insert_glsl_legacy_function(OS, m_shader_props);
}

View File

@ -2176,12 +2176,12 @@ void VKGSRender::load_program_env()
{
check_heap_status(VK_HEAP_CHECK_TEXTURE_ENV_STORAGE);
auto mem = m_fragment_texture_params_ring_info.alloc<256>(512);
auto buf = m_fragment_texture_params_ring_info.map(mem, 512);
auto mem = m_fragment_texture_params_ring_info.alloc<256>(768);
auto buf = m_fragment_texture_params_ring_info.map(mem, 768);
current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask);
m_fragment_texture_params_ring_info.unmap();
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 512 };
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 768 };
}
if (update_raster_env)