diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index 97ceb37c01..5c175c8ae6 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -94,7 +94,7 @@ static size_t s_state_writes_in_queue; static std::condition_variable s_state_write_queue_is_empty; // Don't forget to increase this after doing changes on the savestate system -constexpr u32 STATE_VERSION = 154; // Last changed in PR 11177 +constexpr u32 STATE_VERSION = 155; // Last changed in PR 10890 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, diff --git a/Source/Core/VideoBackends/D3D12/DX12Context.cpp b/Source/Core/VideoBackends/D3D12/DX12Context.cpp index 5cf0e414c4..955f26413e 100644 --- a/Source/Core/VideoBackends/D3D12/DX12Context.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12Context.cpp @@ -358,7 +358,7 @@ bool DXContext::CreateGXRootSignature() SetRootParamTable(¶ms[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3, 1, D3D12_SHADER_VISIBILITY_VERTEX); param_count++; - SetRootParamConstant(¶ms[param_count], 2, 1, D3D12_SHADER_VISIBILITY_VERTEX); + SetRootParamConstant(¶ms[param_count], 3, 1, D3D12_SHADER_VISIBILITY_VERTEX); param_count++; // Since these must be contiguous, pixel lighting goes to bbox if not enabled. diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index c3427ba37f..6e60929056 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -102,9 +102,18 @@ struct VertexShaderConstants std::array vertex_offset_texcoords; }; +enum class VSExpand : u32 +{ + None = 0, + Point, + Line, +}; + struct GeometryShaderConstants { float4 stereoparams; float4 lineptparams; int4 texoffset; + VSExpand vs_expand; // Used by VS point/line expansion in ubershaders + u32 pad[3]; }; diff --git a/Source/Core/VideoCommon/GXPipelineTypes.h b/Source/Core/VideoCommon/GXPipelineTypes.h index c226739b7d..fdb76a6646 100644 --- a/Source/Core/VideoCommon/GXPipelineTypes.h +++ b/Source/Core/VideoCommon/GXPipelineTypes.h @@ -19,7 +19,7 @@ namespace VideoCommon // As pipelines encompass both shader UIDs and render states, changes to either of these should // also increment the pipeline UID version. Incrementing the UID version will cause all UID // caches to be invalidated. -constexpr u32 GX_PIPELINE_UID_VERSION = 5; // Last changed in PR 10747 +constexpr u32 GX_PIPELINE_UID_VERSION = 6; // Last changed in PR 10890 struct GXPipelineUid { diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 818a22bfa4..04d0062982 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -97,10 +97,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& else out.Write("cbuffer GSBlock {{\n"); - out.Write("\tfloat4 " I_STEREOPARAMS ";\n" - "\tfloat4 " I_LINEPTPARAMS ";\n" - "\tint4 " I_TEXOFFSET ";\n" - "}};\n"); + out.Write("{}", s_geometry_shader_uniforms); + out.Write("}};\n"); out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "", diff --git a/Source/Core/VideoCommon/GeometryShaderManager.cpp b/Source/Core/VideoCommon/GeometryShaderManager.cpp index f71a687ce8..459df67868 100644 --- a/Source/Core/VideoCommon/GeometryShaderManager.cpp +++ b/Source/Core/VideoCommon/GeometryShaderManager.cpp @@ -8,6 +8,7 @@ #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/RenderState.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" @@ -36,10 +37,22 @@ void GeometryShaderManager::Dirty() // Any constants that can changed based on settings should be re-calculated s_projection_changed = true; + // Uses EFB scale config + SetLinePtWidthChanged(); + dirty = true; } -void GeometryShaderManager::SetConstants() +static void SetVSExpand(VSExpand expand) +{ + if (GeometryShaderManager::constants.vs_expand != expand) + { + GeometryShaderManager::constants.vs_expand = expand; + GeometryShaderManager::dirty = true; + } +} + +void GeometryShaderManager::SetConstants(PrimitiveType prim) { if (s_projection_changed && g_ActiveConfig.stereo_mode != StereoMode::Off) { @@ -63,6 +76,16 @@ void GeometryShaderManager::SetConstants() dirty = true; } + if (g_ActiveConfig.UseVSForLinePointExpand()) + { + if (prim == PrimitiveType::Points) + SetVSExpand(VSExpand::Point); + else if (prim == PrimitiveType::Lines) + SetVSExpand(VSExpand::Line); + else + SetVSExpand(VSExpand::None); + } + if (s_viewport_changed) { s_viewport_changed = false; diff --git a/Source/Core/VideoCommon/GeometryShaderManager.h b/Source/Core/VideoCommon/GeometryShaderManager.h index cbca02616c..2b449b9089 100644 --- a/Source/Core/VideoCommon/GeometryShaderManager.h +++ b/Source/Core/VideoCommon/GeometryShaderManager.h @@ -7,6 +7,7 @@ #include "VideoCommon/ConstantManager.h" class PointerWrap; +enum class PrimitiveType : u32; // The non-API dependent parts. class GeometryShaderManager @@ -16,7 +17,7 @@ public: static void Dirty(); static void DoState(PointerWrap& p); - static void SetConstants(); + static void SetConstants(PrimitiveType prim); static void SetViewportChanged(); static void SetProjectionChanged(); static void SetLinePtWidthChanged(); diff --git a/Source/Core/VideoCommon/IndexGenerator.cpp b/Source/Core/VideoCommon/IndexGenerator.cpp index be2dc99e3a..5d86561ffd 100644 --- a/Source/Core/VideoCommon/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/IndexGenerator.cpp @@ -190,6 +190,39 @@ u16* AddLineStrip(u16* index_ptr, u32 num_verts, u32 index) return index_ptr; } +template +u16* AddLines_VSExpand(u16* index_ptr, u32 num_verts, u32 index) +{ + // VS Expand uses (index >> 2) as the base vertex + // Bit 0 indicates which side of the line (left/right for a vertical line) + // Bit 1 indicates which point of the line (top/bottom for a vertical line) + // VS Expand assumes the two points will be adjacent vertices + constexpr u32 advance = linestrip ? 1 : 2; + for (u32 i = 1; i < num_verts; i += advance) + { + u32 p0 = (index + i - 1) << 2; + u32 p1 = (index + i - 0) << 2; + if constexpr (pr) + { + *index_ptr++ = p0 + 0; + *index_ptr++ = p0 + 1; + *index_ptr++ = p1 + 2; + *index_ptr++ = p1 + 3; + *index_ptr++ = s_primitive_restart; + } + else + { + *index_ptr++ = p0 + 0; + *index_ptr++ = p0 + 1; + *index_ptr++ = p1 + 2; + *index_ptr++ = p0 + 1; + *index_ptr++ = p1 + 2; + *index_ptr++ = p1 + 3; + } + } + return index_ptr; +} + u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index) { for (u32 i = 0; i != num_verts; ++i) @@ -198,6 +231,35 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index) } return index_ptr; } + +template +u16* AddPoints_VSExpand(u16* index_ptr, u32 num_verts, u32 index) +{ + // VS Expand uses (index >> 2) as the base vertex + // Bottom two bits indicate which of (TL, TR, BL, BR) this is + for (u32 i = 0; i < num_verts; ++i) + { + u32 base = (index + i) << 2; + if constexpr (pr) + { + *index_ptr++ = base + 0; + *index_ptr++ = base + 1; + *index_ptr++ = base + 2; + *index_ptr++ = base + 3; + *index_ptr++ = s_primitive_restart; + } + else + { + *index_ptr++ = base + 0; + *index_ptr++ = base + 1; + *index_ptr++ = base + 2; + *index_ptr++ = base + 1; + *index_ptr++ = base + 2; + *index_ptr++ = base + 3; + } + } + return index_ptr; +} } // Anonymous namespace void IndexGenerator::Init() @@ -220,9 +282,27 @@ void IndexGenerator::Init() m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip; m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan; } - m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList; - m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip; - m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints; + if (g_Config.UseVSForLinePointExpand()) + { + if (g_Config.backend_info.bSupportsPrimitiveRestart) + { + m_primitive_table[Primitive::GX_DRAW_LINES] = AddLines_VSExpand; + m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLines_VSExpand; + m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints_VSExpand; + } + else + { + m_primitive_table[Primitive::GX_DRAW_LINES] = AddLines_VSExpand; + m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLines_VSExpand; + m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints_VSExpand; + } + } + else + { + m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList; + m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip; + m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints; + } } void IndexGenerator::Start(u16* index_ptr) @@ -246,10 +326,14 @@ void IndexGenerator::AddExternalIndices(const u16* indices, u32 num_indices, u32 m_base_index += num_vertices; } -u32 IndexGenerator::GetRemainingIndices() const +u32 IndexGenerator::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const { - // -1 is reserved for primitive restart (OGL + DX11) - constexpr u32 max_index = 65534; + u32 max_index = USHRT_MAX; - return max_index - m_base_index; + if (g_Config.UseVSForLinePointExpand() && primitive >= OpcodeDecoder::Primitive::GX_DRAW_LINES) + max_index >>= 2; + + // -1 is reserved for primitive restart + + return max_index - m_base_index - 1; } diff --git a/Source/Core/VideoCommon/IndexGenerator.h b/Source/Core/VideoCommon/IndexGenerator.h index 32cf21e207..3c57ea7803 100644 --- a/Source/Core/VideoCommon/IndexGenerator.h +++ b/Source/Core/VideoCommon/IndexGenerator.h @@ -23,7 +23,7 @@ public: // returns numprimitives u32 GetNumVerts() const { return m_base_index; } u32 GetIndexLen() const { return static_cast(m_index_buffer_current - m_base_index_ptr); } - u32 GetRemainingIndices() const; + u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const; private: u16* m_index_buffer_current = nullptr; diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index f281024f22..59ae917686 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -10,6 +10,7 @@ #include "Common/MsgHandler.h" #include "Core/ConfigManager.h" +#include "VideoCommon/ConstantManager.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/FramebufferManager.h" #include "VideoCommon/FramebufferShaderGen.h" @@ -695,6 +696,35 @@ static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in) ps->ztest = EmulatedZ::EarlyWithZComplocHack; } + if (g_ActiveConfig.UseVSForLinePointExpand() && + (out.rasterization_state.primitive == PrimitiveType::Points || + out.rasterization_state.primitive == PrimitiveType::Lines)) + { + // All primitives are expanded to triangles in the vertex shader + vertex_shader_uid_data* vs = out.vs_uid.GetUidData(); + const PortableVertexDeclaration& decl = out.vertex_format->GetVertexDeclaration(); + vs->position_has_3_elems = decl.position.components >= 3; + vs->texcoord_elem_count = 0; + for (int i = 0; i < 8; i++) + { + if (decl.texcoords[i].enable) + { + ASSERT(decl.texcoords[i].components <= 3); + vs->texcoord_elem_count |= decl.texcoords[i].components << (i * 2); + } + } + out.vertex_format = nullptr; + if (out.rasterization_state.primitive == PrimitiveType::Points) + vs->vs_expand = VSExpand::Point; + else + vs->vs_expand = VSExpand::Line; + PrimitiveType prim = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? + PrimitiveType::TriangleStrip : + PrimitiveType::Triangles; + out.rasterization_state.primitive = prim; + out.gs_uid.GetUidData()->primitive_type = static_cast(prim); + } + return out; } @@ -760,6 +790,17 @@ static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in) out.blending_state.usedualsrc = false; out.ps_uid.GetUidData()->no_dual_src = true; } + + if (g_ActiveConfig.UseVSForLinePointExpand()) + { + // All primitives are expanded to triangles in the vertex shader + PrimitiveType prim = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? + PrimitiveType::TriangleStrip : + PrimitiveType::Triangles; + out.rasterization_state.primitive = prim; + out.gs_uid.GetUidData()->primitive_type = static_cast(prim); + } + return out; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 7b407f9aaf..85757b64c5 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -5,6 +5,7 @@ #include +#include "Common/Assert.h" #include "Common/FileUtil.h" #include "Core/ConfigManager.h" #include "VideoCommon/VideoCommon.h" @@ -44,6 +45,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() g_ActiveConfig.ManualTextureSamplingWithHiResTextures(); bits.backend_sampler_lod_bias = g_ActiveConfig.backend_info.bSupportsLodBiasInSampler; bits.backend_dynamic_vertex_loader = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader; + bits.backend_vs_point_line_expand = g_ActiveConfig.UseVSForLinePointExpand(); return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 73fa68af03..22bad1b220 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -178,6 +178,7 @@ union ShaderHostConfig BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes; BitField<26, 1, bool, u32> backend_sampler_lod_bias; BitField<27, 1, bool, u32> backend_dynamic_vertex_loader; + BitField<28, 1, bool, u32> backend_vs_point_line_expand; static ShaderHostConfig GetCurrent(); }; @@ -316,3 +317,8 @@ static const char s_shader_uniforms[] = "\tuint components;\n" "\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n" "\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n" "\t#define xfmem_alpha(i) (xfmem_pack1[(i)].w)\n"; + +static const char s_geometry_shader_uniforms[] = "\tfloat4 " I_STEREOPARAMS ";\n" + "\tfloat4 " I_LINEPTPARAMS ";\n" + "\tint4 " I_TEXOFFSET ";\n" + "\tuint vs_expand;\n"; diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 04f2b27a14..0bfacf88ee 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -3,6 +3,7 @@ #include "VideoCommon/UberShaderVertex.h" +#include "VideoCommon/ConstantManager.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/UberShaderCommon.h" @@ -35,6 +36,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config const bool ssaa = host_config.ssaa; const bool per_pixel_lighting = host_config.per_pixel_lighting; const bool vertex_rounding = host_config.vertex_rounding; + const bool vertex_loader = + host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand; const u32 num_texgen = uid_data->num_texgens; ShaderCode out; @@ -46,6 +49,13 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config out.Write("{}", s_shader_uniforms); out.Write("}};\n"); + if (vertex_loader) + { + out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n"); + out.Write("{}", s_geometry_shader_uniforms); + out.Write("}};\n"); + } + out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "", ShaderStage::Vertex); out.Write("}};\n\n"); @@ -54,7 +64,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config WriteBitfieldExtractHeader(out, api_type, host_config); WriteLightingFunction(out); - if (host_config.backend_dynamic_vertex_loader) + if (vertex_loader) { out.Write(R"( SSBO_BINDING(1) readonly restrict buffer Vertices {{ @@ -73,17 +83,17 @@ SSBO_BINDING(1) readonly restrict buffer Vertices {{ // D3D12 uses a root constant for this uniform, since it changes with every draw. // D3D11 doesn't currently support dynamic vertex loader, and we'll have to figure something // out for it if we want to support it in the future. - out.Write("UBO_BINDING(std140, 3) uniform DX_Constants {{\n" + out.Write("UBO_BINDING(std140, 4) uniform DX_Constants {{\n" " uint base_vertex;\n" "}};\n\n" - "uint GetVertexBaseOffset() {{\n" - " return (gl_VertexID + base_vertex) * vertex_stride;\n" + "uint GetVertexBaseOffset(uint vertex_id) {{\n" + " return (vertex_id + base_vertex) * vertex_stride;\n" "}}\n"); } else { - out.Write("uint GetVertexBaseOffset() {{\n" - " return gl_VertexID * vertex_stride;\n" + out.Write("uint GetVertexBaseOffset(uint vertex_id) {{\n" + " return vertex_id * vertex_stride;\n" "}}\n"); } @@ -187,9 +197,17 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ out.Write("VS_OUTPUT o;\n" "\n"); - if (host_config.backend_dynamic_vertex_loader) + if (host_config.backend_vs_point_line_expand) { - out.Write("uint vertex_base_offset = GetVertexBaseOffset();\n"); + out.Write("uint vertex_id = gl_VertexID;\n" + "if (vs_expand != 0u) {{\n" + " vertex_id = vertex_id >> 2;\n" + "}}\n" + "uint vertex_base_offset = GetVertexBaseOffset(vertex_id);\n"); + } + else if (host_config.backend_dynamic_vertex_loader) + { + out.Write("uint vertex_base_offset = GetVertexBaseOffset(gl_VertexID);\n"); } // rawpos is always needed LoadVertexAttribute(out, host_config, 0, "rawpos", "float4", "rawpos"); @@ -320,6 +338,88 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ if (num_texgen > 0) GenVertexShaderTexGens(api_type, host_config, num_texgen, out); + if (host_config.backend_vs_point_line_expand) + { + out.Write("if (vs_expand == {}u) {{ // Line\n", static_cast(VSExpand::Line)); + out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n" + " bool is_right = (gl_VertexID & 1) != 0;\n" + " uint other_base_offset = vertex_base_offset;\n" + " if (is_bottom) {{\n" + " other_base_offset -= vertex_stride;\n" + " }} else {{\n" + " other_base_offset += vertex_stride;\n" + " }}\n" + " float4 other_rawpos = load_input_float4_rawpos(other_base_offset, " + "vertex_offset_rawpos);\n" + " float4 other_p0 = P0;\n" + " float4 other_p1 = P1;\n" + " float4 other_p2 = P2;\n" + " if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n", + VB_HAS_POSMTXIDX); + out.Write(" uint other_posidx = int(load_input_uint4_ubyte4(other_base_offset, " + "vertex_offset_posmtx).r);\n" + " other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n" + " other_p1 = " I_TRANSFORMMATRICES "[other_posidx+1];\n" + " other_p2 = " I_TRANSFORMMATRICES "[other_posidx+2];\n" + " }}\n" + " float4 other_pos = float4(dot(other_p0, other_rawpos), " + "dot(other_p1, other_rawpos), dot(other_p2, other_rawpos), 1.0);\n" + " other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION + "[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION + "[3], other_pos));\n" + "\n" + " float sign = is_right ? 1.0f : -1.0f;\n" + // GameCube/Wii's line drawing algorithm is a little quirky. It does not + // use the correct line caps. Instead, the line caps are vertical or + // horizontal depending the slope of the line. + " float2 offset;\n" + " float2 to = abs(o.pos.xy / o.pos.w - other_pos.xy / other_pos.w);\n" + // FIXME: What does real hardware do when line is at a 45-degree angle? + // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. + " if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" + // Line is more tall. Extend geometry left and right. + // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] + " offset = float2(sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" + " }} else {{\n" + // Line is more wide. Extend geometry up and down. + // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] + " offset = float2(0, sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" + " }}\n" + "\n" + " o.pos.xy += offset * o.pos.w;\n"); + if (num_texgen > 0) + { + out.Write(" if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n" + " float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); + for (u32 i = 0; i < num_texgen; i++) + { + out.Write(" if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i); + out.Write(" o.tex{}.x += texOffset;\n", i); + } + out.Write(" }}\n"); + } + out.Write("}} else if (vs_expand == {}u) {{ // Point\n", static_cast(VSExpand::Point)); + out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n" + " bool is_right = (gl_VertexID & 1) != 0;\n" + " float2 sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? 1.0f : -1.0f);\n" + " float2 offset = sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n" + " o.pos.xy += offset * o.pos.w;\n"); + if (num_texgen > 0) + { + out.Write(" if (" I_TEXOFFSET "[3] != 0) {{\n" + " float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n" + " float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, " + "is_bottom ? texOffsetMagnitude : 0.0f);"); + for (u32 i = 0; i < num_texgen; i++) + { + out.Write(" if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", i); + out.Write(" o.tex{}.xy += texOffset;\n", i); + } + out.Write(" }}\n"); + } + out.Write("}}\n"); + } + if (per_pixel_lighting) { out.Write("// When per-pixel lighting is enabled, the vertex colors are passed through\n" @@ -574,7 +674,7 @@ static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& hos " {{\n"); out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n", VB_HAS_TEXMTXIDX0); - if (host_config.backend_dynamic_vertex_loader) + if (host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand) { out.Write(" int tmp = int(load_input_float3_rawtex(vertex_base_offset, " "vertex_offset_rawtex[texgen / 4][texgen % 4]).z);\n" @@ -655,7 +755,7 @@ static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_c std::string_view name, std::string_view shader_type, std::string_view stored_type, std::string_view offset_name) { - if (host_config.backend_dynamic_vertex_loader) + if (host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand) { code.Write("{:{}}{} {} = load_input_{}_{}(vertex_base_offset, vertex_offset_{});\n", "", indent, shader_type, name, shader_type, stored_type, diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 41b3968b2b..f8e4970a51 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -140,12 +140,12 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive // Check for size in buffer, if the buffer gets full, call Flush() if (!m_is_flushed && - (count > m_index_generator.GetRemainingIndices() || count > GetRemainingIndices(primitive) || - needed_vertex_bytes > GetRemainingSize())) + (count > m_index_generator.GetRemainingIndices(primitive) || + count > GetRemainingIndices(primitive) || needed_vertex_bytes > GetRemainingSize())) { Flush(); - if (count > m_index_generator.GetRemainingIndices()) + if (count > m_index_generator.GetRemainingIndices(primitive)) { ERROR_LOG_FMT(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush."); } @@ -193,7 +193,55 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c { const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen(); - if (g_Config.backend_info.bSupportsPrimitiveRestart) + if (primitive >= Primitive::GX_DRAW_LINES) + { + if (g_Config.UseVSForLinePointExpand()) + { + if (g_Config.backend_info.bSupportsPrimitiveRestart) + { + switch (primitive) + { + case Primitive::GX_DRAW_LINES: + return index_len / 5 * 2; + case Primitive::GX_DRAW_LINE_STRIP: + return index_len / 5 + 1; + case Primitive::GX_DRAW_POINTS: + return index_len / 5; + default: + return 0; + } + } + else + { + switch (primitive) + { + case Primitive::GX_DRAW_LINES: + return index_len / 6 * 2; + case Primitive::GX_DRAW_LINE_STRIP: + return index_len / 6 + 1; + case Primitive::GX_DRAW_POINTS: + return index_len / 6; + default: + return 0; + } + } + } + else + { + switch (primitive) + { + case Primitive::GX_DRAW_LINES: + return index_len; + case Primitive::GX_DRAW_LINE_STRIP: + return index_len / 2 + 1; + case Primitive::GX_DRAW_POINTS: + return index_len; + default: + return 0; + } + } + } + else if (g_Config.backend_info.bSupportsPrimitiveRestart) { switch (primitive) { @@ -206,15 +254,6 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c return index_len / 1 - 1; case Primitive::GX_DRAW_TRIANGLE_FAN: return index_len / 6 * 4 + 1; - - case Primitive::GX_DRAW_LINES: - return index_len; - case Primitive::GX_DRAW_LINE_STRIP: - return index_len / 2 + 1; - - case Primitive::GX_DRAW_POINTS: - return index_len; - default: return 0; } @@ -232,15 +271,6 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c return index_len / 3 + 2; case Primitive::GX_DRAW_TRIANGLE_FAN: return index_len / 3 + 2; - - case Primitive::GX_DRAW_LINES: - return index_len; - case Primitive::GX_DRAW_LINE_STRIP: - return index_len / 2 + 1; - - case Primitive::GX_DRAW_POINTS: - return index_len; - default: return 0; } @@ -511,13 +541,24 @@ void VertexManagerBase::Flush() VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices, &base_vertex, &base_index); + if (g_ActiveConfig.backend_info.api_type != APIType::D3D && + g_ActiveConfig.UseVSForLinePointExpand() && + (m_current_primitive_type == PrimitiveType::Points || + m_current_primitive_type == PrimitiveType::Lines)) + { + // VS point/line expansion puts the vertex id at gl_VertexID << 2 + // That means the base vertex has to be adjusted to match + // (The shader adds this after shifting right on D3D, so no need to do this) + base_vertex <<= 2; + } + // Texture loading can cause palettes to be applied (-> uniforms -> draws). // Palette application does not use vertices, only a full-screen quad, so this is okay. // Same with GPU texture decoding, which uses compute shaders. g_texture_cache->BindTextures(used_textures); // Now we can upload uniforms, as nothing else will override them. - GeometryShaderManager::SetConstants(); + GeometryShaderManager::SetConstants(m_current_primitive_type); PixelShaderManager::SetConstants(); UploadUniforms(); diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 2eb5a53a97..948dc3ad34 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -6,6 +6,7 @@ #include "Common/Assert.h" #include "Common/CommonTypes.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/ConstantManager.h" #include "VideoCommon/LightingShaderGen.h" #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/VertexLoaderManager.h" @@ -83,6 +84,8 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho const bool ssaa = host_config.ssaa; const bool vertex_rounding = host_config.vertex_rounding; + ShaderCode input_extract; + out.Write("{}", s_lighting_struct); // uniforms @@ -91,6 +94,21 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("{}", s_shader_uniforms); out.Write("}};\n"); + if (uid_data->vs_expand != VSExpand::None) + { + out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n"); + out.Write("{}", s_geometry_shader_uniforms); + out.Write("}};\n"); + + if (api_type == APIType::D3D) + { + // D3D doesn't include the base vertex in SV_VertexID + out.Write("UBO_BINDING(std140, 4) uniform DX_Constants {{\n" + " uint base_vertex;\n" + "}};\n\n"); + } + } + out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "", ShaderStage::Vertex); @@ -98,31 +116,114 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho WriteIsNanHeader(out, api_type); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); - if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - if ((uid_data->components & VB_HAS_NORMAL) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); - if ((uid_data->components & VB_HAS_TANGENT) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); - if ((uid_data->components & VB_HAS_BINORMAL) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); - - if ((uid_data->components & VB_HAS_COL0) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); - if ((uid_data->components & VB_HAS_COL1) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); - - for (u32 i = 0; i < 8; ++i) + if (uid_data->vs_expand == VSExpand::None) { - const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + if ((uid_data->components & VB_HAS_NORMAL) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); + if ((uid_data->components & VB_HAS_TANGENT) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); + if ((uid_data->components & VB_HAS_BINORMAL) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); - if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) + if ((uid_data->components & VB_HAS_COL0) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + if ((uid_data->components & VB_HAS_COL1) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + + for (u32 i = 0; i < 8; ++i) { - out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, - has_texmtx != 0 ? 3 : 2, i); + const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); + + if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) + { + out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, + has_texmtx != 0 ? 3 : 2, i); + } } } + else + { + // Can't use float3, etc because we want 4-byte alignment + out.Write( + "uint4 unpack_ubyte4(uint value) {{\n" + " return uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);\n" + "}}\n\n" + "struct InputData {{\n"); + if (uid_data->components & VB_HAS_POSMTXIDX) + { + out.Write(" uint posmtx;\n"); + input_extract.Write("uint4 posmtx = unpack_ubyte4(i.posmtx);\n"); + } + if (uid_data->position_has_3_elems) + { + out.Write(" float pos0;\n" + " float pos1;\n" + " float pos2;\n"); + input_extract.Write("float4 rawpos = float4(i.pos0, i.pos1, i.pos2, 1.0f);\n"); + } + else + { + out.Write(" float pos0;\n" + " float pos1;\n"); + input_extract.Write("float4 rawpos = float4(i.pos0, i.pos1, 0.0f, 1.0f);\n"); + } + std::array names = {"normal", "binormal", "tangent"}; + for (int i = 0; i < 3; i++) + { + if (uid_data->components & (VB_HAS_NORMAL << i)) + { + out.Write(" float {0}0;\n" + " float {0}1;\n" + " float {0}2;\n", + names[i]); + input_extract.Write("float3 raw{0} = float3(i.{0}0, i.{0}1, i.{0}2);\n", names[i]); + } + } + for (int i = 0; i < 2; i++) + { + if (uid_data->components & (VB_HAS_COL0 << i)) + { + out.Write(" uint color{};\n", i); + input_extract.Write("float4 rawcolor{0} = float4(unpack_ubyte4(i.color{0})) / 255.0f;\n", + i); + } + } + for (int i = 0; i < 8; i++) + { + if (uid_data->components & (VB_HAS_UV0 << i)) + { + u32 ncomponents = (uid_data->texcoord_elem_count >> (2 * i)) & 3; + if (ncomponents < 2) + { + out.Write(" float tex{};\n", i); + input_extract.Write("float3 rawtex{0} = float3(i.tex{0}, 0.0f, 0.0f);\n", i); + } + else if (ncomponents == 2) + { + out.Write(" float tex{0}_0;\n" + " float tex{0}_1;\n", + i); + input_extract.Write("float3 rawtex{0} = float3(i.tex{0}_0, i.tex{0}_1, 0.0f);\n", i); + } + else + { + out.Write(" float tex{0}_0;\n" + " float tex{0}_1;\n" + " float tex{0}_2;\n", + i); + input_extract.Write("float3 rawtex{0} = float3(i.tex{0}_0, i.tex{0}_1, i.tex{0}_2);\n", + i); + } + } + } + out.Write("}};\n\n" + "SSBO_BINDING(1) readonly restrict buffer InputBuffer {{\n" + " InputData input_buffer[];\n" + "}};\n\n"); + } if (host_config.backend_geometry_shaders) { @@ -161,6 +262,21 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("void main()\n{{\n"); + if (uid_data->vs_expand != VSExpand::None) + { + out.Write("bool is_bottom = (gl_VertexID & 2) != 0;\n" + "bool is_right = (gl_VertexID & 1) != 0;\n"); + // D3D doesn't include the base vertex in SV_VertexID + // See comment in UberShaderVertex for details + if (api_type == APIType::D3D) + out.Write("uint vertex_id = (gl_VertexID >> 2) + base_vertex;\n"); + else + out.Write("uint vertex_id = gl_VertexID >> 2;\n"); + out.Write("InputData i = input_buffer[vertex_id];\n" + "{}", + input_extract.GetBuffer()); + } + out.Write("VS_OUTPUT o;\n"); // xfmem.numColorChans controls the number of color channels available to TEV, but we still need @@ -403,6 +519,86 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("}}\n"); } + if (uid_data->vs_expand == VSExpand::Line) + { + out.Write("// Line expansion\n" + "uint other_id = vertex_id;\n" + "if (is_bottom) {{\n" + " other_id -= 1;\n" + "}} else {{\n" + " other_id += 1;\n" + "}}\n" + "InputData other = input_buffer[other_id];\n"); + if (uid_data->position_has_3_elems) + out.Write("float4 other_pos = float4(other.pos0, other.pos1, other.pos2, 1.0f);\n"); + else + out.Write("float4 other_pos = float4(other.pos0, other.pos1, 0.0f, 1.0f);\n"); + if (uid_data->components & VB_HAS_POSMTXIDX) + { + out.Write("uint other_posidx = other.posmtx & 0xff;\n" + "float4 other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n" + "float4 other_p1 = " I_TRANSFORMMATRICES "[other_posidx + 1];\n" + "float4 other_p2 = " I_TRANSFORMMATRICES "[other_posidx + 2];\n" + "other_pos = float4(dot(other_p0, other_pos), dot(other_p1, other_pos), " + "dot(other_p2, other_pos), 1.0f);\n"); + } + else + { + out.Write("other_pos = float4(dot(P0, other_pos), dot(P1, other_pos), dot(P2, other_pos), " + "1.0f);\n"); + } + out.Write("other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION + "[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION + "[3], other_pos));\n" + "float expand_sign = is_right ? 1.0f : -1.0f;\n" + "float2 offset;\n" + "float2 to = abs(o.pos.xy / o.pos.w - other_pos.xy / other_pos.w);\n" + // FIXME: What does real hardware do when line is at a 45-degree angle? + // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. + "if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" + // Line is more tall. Extend geometry left and right. + // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] + " offset = float2(expand_sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" + "}} else {{\n" + // Line is more wide. Extend geometry up and down. + // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] + " offset = float2(0, expand_sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" + "}}\n" + "\n" + "o.pos.xy += offset * o.pos.w;\n"); + if (uid_data->numTexGens > 0) + { + out.Write("if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n" + " float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); + for (u32 i = 0; i < uid_data->numTexGens; i++) + { + out.Write(" if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i); + out.Write(" o.tex{}.x += texOffset;\n", i); + } + out.Write("}}\n"); + } + } + else if (uid_data->vs_expand == VSExpand::Point) + { + out.Write("// Point expansion\n" + "float2 expand_sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? 1.0f : -1.0f);\n" + "float2 offset = expand_sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n" + "o.pos.xy += offset * o.pos.w;\n"); + if (uid_data->numTexGens > 0) + { + out.Write("if (" I_TEXOFFSET "[3] != 0) {{\n" + " float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n" + " float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, " + "is_bottom ? texOffsetMagnitude : 0.0f);"); + for (u32 i = 0; i < uid_data->numTexGens; i++) + { + out.Write(" if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", i); + out.Write(" o.tex{}.xy += texOffset;\n", i); + } + out.Write("}}\n"); + } + } + if (per_pixel_lighting) { // When per-pixel lighting is enabled, the vertex colors are passed through diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index 028404c6e8..0c30223b9e 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -11,6 +11,7 @@ enum class APIType; enum class TexInputForm : u32; enum class TexGenType : u32; enum class SourceRow : u32; +enum class VSExpand : u32; // TODO should be reordered enum : int @@ -42,10 +43,12 @@ struct vertex_shader_uid_data u32 numTexGens : 4; u32 numColorChans : 2; u32 dualTexTrans_enabled : 1; + VSExpand vs_expand : 2; + u32 position_has_3_elems : 1; - u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is - // 8 bits wide - u32 pad : 18; + u16 texcoord_elem_count; // 2 bits per texcoord input + u16 texMtxInfo_n_projection; // Stored separately to guarantee that the texMtxInfo struct is + // 8 bits wide struct { diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 579cf5e30a..dfdd0ffe79 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -222,9 +222,16 @@ struct VideoConfig final bool bSupportsSettingObjectNames = false; bool bSupportsPartialMultisampleResolve = false; bool bSupportsDynamicVertexLoader = false; + bool bSupportsVSLinePointExpand = false; } backend_info; // Utility + bool UseVSForLinePointExpand() const + { + if (!backend_info.bSupportsVSLinePointExpand) + return false; + return !backend_info.bSupportsGeometryShaders; + } bool MultisamplingEnabled() const { return iMultisamples > 1; } bool ExclusiveFullscreenEnabled() const {