From 1ad76ad33180c1e33b40ed616f1c3d464994362e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 20 Oct 2018 17:43:00 +0300 Subject: [PATCH] rsx: Restructure programs - Also re-enable pipeline optimizations --- rpcs3/Emu/RSX/Common/GLSLCommon.h | 42 ++-- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 26 ++- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 154 +++++++++----- rpcs3/Emu/RSX/GL/GLGSRender.h | 5 +- rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 12 +- rpcs3/Emu/RSX/RSXFIFO.cpp | 235 +++++++++++----------- rpcs3/Emu/RSX/RSXFIFO.h | 10 +- rpcs3/Emu/RSX/RSXFragmentProgram.h | 4 +- rpcs3/Emu/RSX/RSXThread.cpp | 86 +++++--- rpcs3/Emu/RSX/RSXThread.h | 24 ++- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 33 ++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 265 ++++++++++++++++--------- rpcs3/Emu/RSX/VK/VKGSRender.h | 54 +++-- rpcs3/Emu/RSX/VK/VKHelpers.h | 40 ++-- rpcs3/Emu/RSX/VK/VKOverlays.h | 16 +- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 94 +++++---- rpcs3/Emu/RSX/VK/VKTexture.cpp | 4 +- rpcs3/Emu/RSX/VK/VKTextureCache.h | 4 +- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 6 +- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 26 ++- rpcs3/Emu/RSX/rsx_methods.cpp | 76 ++++++- 21 files changed, 777 insertions(+), 439 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index 66962aea6a..e4840f9491 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include "ShaderParam.h" @@ -169,7 +169,6 @@ namespace glsl " int is_volatile;\n" " int frequency;\n" " int divisor;\n" - " int modulo;\n" "};\n\n" "uint get_bits(uvec4 v, int swap)\n" @@ -290,17 +289,24 @@ namespace glsl "attribute_desc fetch_desc(int location)\n" "{\n" + " // Each descriptor is 64 bits wide\n" + " // [0-8] attribute stride\n" + " // [8-20] attribute divisor\n" + " // [20-21] swap bytes flag\n" + " // [21-22] volatile flag\n" + " // [22-24] frequency op\n" + " // [24-27] attribute type\n" + " // [27-30] attribute size\n" " attribute_desc result;\n" - " int attribute_flags = input_attributes[location].w;\n" - " result.type = input_attributes[location].x;\n" - " result.attribute_size = input_attributes[location].y;\n" - " result.starting_offset = input_attributes[location].z;\n" + " int attribute_flags = input_attributes[location].x;\n" " result.stride = attribute_flags & 0xFF;\n" - " result.swap_bytes = (attribute_flags >> 8) & 0x1;\n" - " result.is_volatile = (attribute_flags >> 9) & 0x1;\n" - " result.frequency = (attribute_flags >> 10) & 0x3;\n" - " result.modulo = (attribute_flags >> 12) & 0x1;\n" - " result.divisor = (attribute_flags >> 13) & 0xFFFF;\n" + " result.divisor = (attribute_flags >> 8) & 0xFFF;\n" + " result.swap_bytes = (attribute_flags >> 20) & 0x1;\n" + " result.is_volatile = (attribute_flags >> 21) & 0x1;\n" + " result.frequency = (attribute_flags >> 22) & 0x3;\n" + " result.type = (attribute_flags >> 24) & 0x7;\n" + " result.attribute_size = (attribute_flags >> 27) & 0x7;\n" + " result.starting_offset = input_attributes[location].y;\n" " return result;\n" "}\n\n" @@ -325,14 +331,18 @@ namespace glsl " }\n\n" " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n" " if (desc.frequency == 0)\n" + " {\n" " vertex_id = 0;\n" - " else if (desc.frequency > 1)\n" + " }\n" + " else if (desc.frequency == 2)\n" " {\n" " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n" - " if (desc.modulo != 0)\n" - " vertex_id = " << vertex_id_name << " % desc.divisor;\n" - " else\n" - " vertex_id = " << vertex_id_name << " / desc.divisor;\n" + " vertex_id = " << vertex_id_name << " / desc.divisor;\n" + " }\n" + " else if (desc.frequency == 3)\n" + " {\n" + " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n" + " vertex_id = " << vertex_id_name << " % desc.divisor;\n" " }\n" "\n" " if (desc.is_volatile != 0)\n" diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 1ca0a87aca..64f006b65a 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include #include "Emu/Memory/vm.h" #include "Emu/System.h" @@ -132,9 +132,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) } OS << "\n"; - OS << "layout(std140, binding = 2) uniform FragmentConstantsBuffer\n"; - OS << "{\n"; + std::string constants_block; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { if (PT.type == "sampler1D" || @@ -144,10 +143,21 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) continue; for (const ParamItem& PI : PT.items) - OS << " " << PT.type << " " << PI.name << ";\n"; + { + constants_block += " " + PT.type + " " + PI.name + ";\n"; + } } - // Fragment state parameters + if (!constants_block.empty()) + { + OS << "layout(std140, binding = 3) uniform FragmentConstantsBuffer\n"; + OS << "{\n"; + OS << constants_block; + OS << "};\n\n"; + } + + OS << "layout(std140, binding = 4) uniform FragmentStateBuffer\n"; + OS << "{\n"; OS << " float fog_param0;\n"; OS << " float fog_param1;\n"; OS << " uint rop_control;\n"; @@ -156,8 +166,12 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << " uint fog_mode;\n"; OS << " float wpos_scale;\n"; OS << " float wpos_bias;\n"; + OS << "};\n\n"; + + OS << "layout(std140, binding = 5) uniform TextureParametersBuffer\n"; + OS << "{\n"; OS << " vec4 texture_parameters[16];\n"; //sampling: x,y scaling and (unused) offsets data - OS << "};\n"; + OS << "};\n\n"; } void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 2442266080..1423f0df1a 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -624,7 +624,10 @@ void GLGSRender::end() m_attrib_ring_buffer->notify(); m_index_ring_buffer->notify(); - m_vertex_state_buffer->notify(); + m_fragment_env_buffer->notify(); + m_vertex_env_buffer->notify(); + m_texture_parameters_buffer->notify(); + m_vertex_layout_buffer->notify(); m_fragment_constants_buffer->notify(); m_transform_constants_buffer->notify(); @@ -795,7 +798,10 @@ void GLGSRender::on_init_thread() m_attrib_ring_buffer.reset(new gl::legacy_ring_buffer()); m_transform_constants_buffer.reset(new gl::legacy_ring_buffer()); m_fragment_constants_buffer.reset(new gl::legacy_ring_buffer()); - m_vertex_state_buffer.reset(new gl::legacy_ring_buffer()); + m_fragment_env_buffer.reset(new gl::legacy_ring_buffer()); + m_vertex_env_buffer.reset(new gl::legacy_ring_buffer()); + m_texture_parameters_buffer.reset(new gl::legacy_ring_buffer()); + m_vertex_layout_buffer.reset(new gl::legacy_ring_buffer()); m_index_ring_buffer.reset(new gl::legacy_ring_buffer()); } else @@ -803,7 +809,10 @@ void GLGSRender::on_init_thread() m_attrib_ring_buffer.reset(new gl::ring_buffer()); m_transform_constants_buffer.reset(new gl::ring_buffer()); m_fragment_constants_buffer.reset(new gl::ring_buffer()); - m_vertex_state_buffer.reset(new gl::ring_buffer()); + m_fragment_env_buffer.reset(new gl::ring_buffer()); + m_vertex_env_buffer.reset(new gl::ring_buffer()); + m_texture_parameters_buffer.reset(new gl::ring_buffer()); + m_vertex_layout_buffer.reset(new gl::ring_buffer()); m_index_ring_buffer.reset(new gl::ring_buffer()); } @@ -811,7 +820,10 @@ void GLGSRender::on_init_thread() m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000); m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000); m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); - m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); + m_fragment_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); + m_vertex_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); + m_texture_parameters_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); + m_vertex_layout_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); if (gl_caps.vendor_AMD) { @@ -1013,9 +1025,24 @@ void GLGSRender::on_exit() m_fragment_constants_buffer->remove(); } - if (m_vertex_state_buffer) + if (m_fragment_env_buffer) { - m_vertex_state_buffer->remove(); + m_fragment_env_buffer->remove(); + } + + if (m_vertex_env_buffer) + { + m_vertex_env_buffer->remove(); + } + + if (m_texture_parameters_buffer) + { + m_texture_parameters_buffer->remove(); + } + + if (m_vertex_layout_buffer) + { + m_vertex_layout_buffer->remove(); } if (m_index_ring_buffer) @@ -1224,78 +1251,113 @@ bool GLGSRender::load_program() void GLGSRender::load_program_env(const gl::vertex_upload_info& upload_info) { - u8 *buf; - u32 vertex_state_offset; - u32 vertex_constants_offset; - u32 fragment_constants_offset; - - const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length; - const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float)); - const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty); - if (!m_program) { fmt::throw_exception("Unreachable right now" HERE); } + const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length; + + const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty); + const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty) && fragment_constants_size; + const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty); + const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty); + const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); + m_program->use(); if (manually_flush_ring_buffers) { - m_vertex_state_buffer->reserve_storage_on_heap(512); - m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256)); + if (update_fragment_env) m_fragment_env_buffer->reserve_storage_on_heap(128); + if (update_vertex_env) m_vertex_env_buffer->reserve_storage_on_heap(256); + if (update_fragment_texture_env) m_texture_parameters_buffer->reserve_storage_on_heap(256); + if (update_fragment_constants) m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_constants_size, 256)); if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192); + + m_vertex_layout_buffer->reserve_storage_on_heap(128 + 16); } - // Vertex state - auto mapping = m_vertex_state_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); - buf = static_cast(mapping.first); - vertex_state_offset = mapping.second; - fill_scale_offset_data(buf, false); - fill_user_clip_data(buf + 64); - *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 132)) = upload_info.vertex_index_base; - *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); - *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); - *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); - fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); + if (update_vertex_env) + { + // Vertex state + auto mapping = m_vertex_env_buffer->alloc_from_heap(160, m_uniform_buffer_offset_align); + auto buf = static_cast(mapping.first); + fill_scale_offset_data(buf, false); + fill_user_clip_data(buf + 64); + *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); + *(reinterpret_cast(buf + 132)) = 0; // Reserved + *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); + *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); + *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); + + m_vertex_env_buffer->bind_range(0, mapping.second, 160); + } + + { + // Vertex layout state + auto mapping = m_vertex_layout_buffer->alloc_from_heap(128 + 16, m_uniform_buffer_offset_align); + auto buf = static_cast(mapping.first); + *buf = upload_info.vertex_index_base; + buf += 4; + fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); + + m_vertex_layout_buffer->bind_range(1, mapping.second, 128 + 16); + } if (update_transform_constants) { // Vertex constants - mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); - buf = static_cast(mapping.first); - vertex_constants_offset = mapping.second; + auto mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); + auto buf = static_cast(mapping.first); fill_vertex_program_constants_data(buf); + + m_transform_constants_buffer->bind_range(2, mapping.second, 8192); } - // Fragment constants - mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align); - buf = static_cast(mapping.first); - fragment_constants_offset = mapping.second; - if (fragment_constants_size) + if (update_fragment_constants) { + // Fragment constants + auto mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align); + auto buf = static_cast(mapping.first); + m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_size) }, current_fragment_program, gl::get_driver_caps().vendor_NVIDIA); + + m_fragment_constants_buffer->bind_range(3, mapping.second, fragment_constants_size); } - // Fragment state - fill_fragment_state_buffer(buf + fragment_constants_size, current_fragment_program); + if (update_fragment_env) + { + // Fragment state + auto mapping = m_fragment_env_buffer->alloc_from_heap(32, m_uniform_buffer_offset_align); + auto buf = static_cast(mapping.first); + fill_fragment_state_buffer(buf, current_fragment_program); - m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512); - m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); + m_fragment_env_buffer->bind_range(4, mapping.second, 32); + } - if (update_transform_constants) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192); + if (update_fragment_texture_env) + { + // Fragment texture parameters + auto mapping = m_texture_parameters_buffer->alloc_from_heap(256, m_uniform_buffer_offset_align); + auto buf = static_cast(mapping.first); + fill_fragment_texture_parameters(buf, current_fragment_program); + + m_texture_parameters_buffer->bind_range(5, mapping.second, 256); + } if (manually_flush_ring_buffers) { - m_vertex_state_buffer->unmap(); - m_fragment_constants_buffer->unmap(); - + if (update_fragment_env) m_fragment_env_buffer->unmap(); + if (update_vertex_env) m_vertex_env_buffer->unmap(); + if (update_fragment_texture_env) m_texture_parameters_buffer->unmap(); + if (update_fragment_constants) m_fragment_constants_buffer->unmap(); if (update_transform_constants) m_transform_constants_buffer->unmap(); + + m_vertex_layout_buffer->unmap(); } - const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty); + const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty); m_graphics_state &= ~handled_flags; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 8d5eb06f58..2550934ca9 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -294,7 +294,10 @@ private: std::unique_ptr m_attrib_ring_buffer; std::unique_ptr m_fragment_constants_buffer; std::unique_ptr m_transform_constants_buffer; - std::unique_ptr m_vertex_state_buffer; + std::unique_ptr m_fragment_env_buffer; + std::unique_ptr m_vertex_env_buffer; + std::unique_ptr m_texture_parameters_buffer; + std::unique_ptr m_vertex_layout_buffer; std::unique_ptr m_index_ring_buffer; // Identity buffer used to fix broken gl_VertexID on ATI stack diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 27730d3695..793d805185 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "Emu/System.h" #include "GLVertexProgram.h" @@ -37,11 +37,15 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << " ivec4 user_clip_enabled[2];\n"; OS << " vec4 user_clip_factor[2];\n"; OS << " uint transform_branch_bits;\n"; - OS << " uint vertex_base_index;\n"; OS << " float point_size;\n"; OS << " float z_near;\n"; OS << " float z_far;\n"; - OS << " ivec4 input_attributes[16];\n"; + OS << "};\n\n"; + + OS << "layout(std140, binding = 1) uniform VertexLayoutBuffer\n"; + OS << "{\n"; + OS << " uint vertex_base_index;\n"; + OS << " ivec2 input_attributes[16];\n"; OS << "};\n\n"; } @@ -53,7 +57,7 @@ void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v void GLVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) { - OS << "layout(std140, binding = 1) uniform VertexConstantsBuffer\n"; + OS << "layout(std140, binding = 2) uniform VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 vc[468];\n"; OS << "};\n\n"; diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp index 88d71571d5..51a3e59979 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.cpp +++ b/rpcs3/Emu/RSX/RSXFIFO.cpp @@ -129,7 +129,7 @@ namespace rsx m_prefetcher_busy.store(true); read_ahead(m_prefetcher_info, m_prefetched_queue, m_prefetch_get); - //optimize(m_prefetcher_info, m_prefetched_queue); + optimize(m_prefetcher_info, m_prefetched_queue); m_prefetcher_busy.store(false); m_prefetch_mutex.unlock(); @@ -206,7 +206,9 @@ namespace rsx info.start_loc = get_pointer; info.num_draw_calls = 0; - info.draw_call_distance_weight = 0; + + u32 cmd; + u32 count; while (true) { @@ -218,8 +220,6 @@ namespace rsx // Validate put and get registers before reading the command // TODO: Who should handle graphics exceptions?? - u32 cmd; - if (u32 addr = RSXIOMem.RealAddr(get_pointer)) { cmd = vm::read32(addr); @@ -230,17 +230,19 @@ namespace rsx break; } - if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD || - (cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD || - (cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD || - (cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) + if (UNLIKELY(cmd & 0xe0030003)) { - // Flow control, stop read ahead - commands.push_back({ cmd, 0, get_pointer }); - break; + if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD || + (cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD || + (cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD || + (cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) + { + // Flow control, stop read ahead + commands.push_back({ cmd, 0, get_pointer }); + break; + } } - - if ((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD) + else if (UNLIKELY((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD)) { if (commands.empty() || commands.back().reg != RSX_METHOD_NOP_CMD) { @@ -251,49 +253,50 @@ namespace rsx get_pointer += 4; continue; } - - if (cmd & 0x3) + else if (UNLIKELY(cmd & 0x3)) { // Malformed command, optional recovery break; } - u32 count = (cmd >> 18) & 0x7ff; - //Validate the args ptr if the command attempts to read from it auto args = vm::ptr::make(RSXIOMem.RealAddr(get_pointer + 4)); - - if (!args && count) + if (UNLIKELY(!args)) { // Optional recovery break; } - // Stop command execution if put will be equal to get ptr during the execution itself - if (count * 4 + 4 > put - get_pointer) - { - count = (put - get_pointer) / 4 - 1; - } - + count = (cmd >> 18) & 0x7ff; if (count > 1) { + // Stop command execution if put will be equal to get ptr during the execution itself + if (UNLIKELY(count * 4 + 4 > put - get_pointer)) + { + count = (put - get_pointer) / 4 - 1; + } + // Queue packet header commands.push_back({ FIFO_PACKET_BEGIN, count, get_pointer }); - const bool no_increment = (cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD; - u32 reg = cmd & 0xfffc; - get_pointer += 4; // First executed command is at data[0] + // First executed command is at data[0] + get_pointer += 4; - for (u32 i = 0; i < count; i++, get_pointer += 4) + if (UNLIKELY((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD)) { - commands.push_back({ reg, args[i], get_pointer }); - - if (reg == (NV4097_SET_BEGIN_END << 2)) + const u32 reg = cmd & 0xfffc; + for (u32 i = 0; i < count; i++, get_pointer += 4) { - info.num_draw_calls++; + commands.push_back({ reg, args[i], get_pointer }); + } + } + else + { + u32 reg = cmd & 0xfffc; + for (u32 i = 0; i < count; i++, get_pointer += 4, reg += 4) + { + commands.push_back({ reg, args[i], get_pointer }); } - - if (!no_increment) reg += 4; } } else @@ -315,15 +318,14 @@ namespace rsx } info.length = get_pointer - info.start_loc; - if (!info.num_draw_calls) + if (info.num_draw_calls < 2) { return; } info.num_draw_calls /= 2; // Begin+End pairs - //info.draw_call_distance_weight = info.length / info.num_draw_calls; } -#pragma optimize("", on) + void FIFO_control::report_branch_hit(u32 source, u32 target) { const auto range = m_branch_prediction_table.equal_range(source); @@ -507,7 +509,7 @@ namespace rsx if (queue_size > 0) { - if (m_internal_get != m_ctrl->get) + if (UNLIKELY(m_internal_get != m_ctrl->get)) { // Control register changed registers_changed = true; @@ -545,7 +547,7 @@ namespace rsx } } - verify(HERE), m_queue.empty(); + //verify(HERE), m_queue.empty(); if (m_ctrl->put == m_ctrl->get) { @@ -573,7 +575,7 @@ namespace rsx } // Lock to disable the prefetcher - if (!m_prefetch_mutex.try_lock()) + if (0)//!m_prefetch_mutex.try_lock()) { return busy_cmd; } @@ -601,13 +603,13 @@ namespace rsx { m_internal_get = m_ctrl->get; read_ahead(m_fifo_info, m_queue, m_internal_get); - //optimize(m_fifo_info, m_queue); + optimize(m_fifo_info, m_queue); m_ctrl->get = m_internal_get; m_ctrl_tag++; } - m_prefetch_mutex.unlock(); + //m_prefetch_mutex.unlock(); if (!m_queue.empty()) { @@ -656,49 +658,58 @@ namespace rsx // Vertex { NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 16 }, { NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 16 }, - // Raster - { NV4097_SET_ALPHA_TEST_ENABLE, 1 }, - { NV4097_SET_ALPHA_FUNC, 1 }, - { NV4097_SET_ALPHA_REF, 1 }, - { NV4097_SET_FRONT_FACE, 1 }, }; - for (u32 reg = 0; reg < m_skippable_registers.size(); ++reg) + const std::pair ignorable_ranges[] = { - bool _continue = false; - for (const auto &method : skippable_ranges) - { - if (reg < method.first) - break; + // General + { NV4097_INVALIDATE_VERTEX_FILE, 3 }, // PSLight clears VERTEX_FILE[0-2] + { NV4097_INVALIDATE_VERTEX_CACHE_FILE, 1 }, + { NV4097_INVALIDATE_L2, 1 }, + { NV4097_INVALIDATE_ZCULL, 1 }, + // FIFO + { (FIFO_DISABLED_COMMAND >> 2), 1}, + { (FIFO_PACKET_BEGIN >> 2), 1 }, + { (FIFO_DRAW_BARRIER >> 2), 1 }, + // ROP + { NV4097_SET_ALPHA_FUNC, 1 }, + { NV4097_SET_ALPHA_REF, 1 }, + { NV4097_SET_ALPHA_TEST_ENABLE, 1 }, + { NV4097_SET_ANTI_ALIASING_CONTROL, 1 }, + // Program + { NV4097_SET_SHADER_PACKER, 1 }, + { NV4097_SET_SHADER_WINDOW, 1 }, + // Vertex data offsets + { NV4097_SET_VERTEX_DATA_BASE_OFFSET, 1 }, + { NV4097_SET_VERTEX_DATA_BASE_INDEX, 1 } + }; - if (reg - method.first < method.second) - { - // Safe to ignore if value has not changed - m_skippable_registers[reg] = true; - _continue = true; - break; - } - } - - if (_continue) - continue; - - m_skippable_registers[reg] = false; - } + std::fill(m_register_properties.begin(), m_register_properties.end(), 0u); for (const auto &method : skippable_ranges) { - for (int subreg = 0; subreg < method.second; ++subreg) + for (int i = 0; i < method.second; ++i) { - // Safe to ignore if value has not changed - verify(HERE), m_skippable_registers[subreg] = true; + m_register_properties[method.first + i] = register_props::skippable; + } + } + + for (const auto &method : ignorable_ranges) + { + for (int i = 0; i < method.second; ++i) + { + m_register_properties[method.first + i] |= register_props::ignorable; } } } void flattening_pass::optimize(const fifo_buffer_info_t& info, simple_array& commands, const u32* registers) { - __unused(info); + if (info.num_draw_calls < 20) + { + // Not enough draw calls + return; + } #if (ENABLE_OPTIMIZATION_DEBUGGING) auto copy = commands; @@ -750,31 +761,14 @@ namespace rsx for (auto &command : commands) { - //LOG_ERROR(RSX, "[0x%x] %s(0x%x)", command.loc, _get_method_name(command.reg), command.value); - bool flush_commands_flag = has_deferred_call; bool execute_method_flag = true; const auto reg = command.reg >> 2; const auto value = command.value; + switch (reg) { - case NV4097_INVALIDATE_VERTEX_FILE: // PSLight clears VERTEX_FILE[0-2] - case NV4097_PIPE_NOP: - case NV4097_INVALIDATE_VERTEX_FILE + 2: - case NV4097_INVALIDATE_VERTEX_CACHE_FILE: - case NV4097_INVALIDATE_L2: - case NV4097_INVALIDATE_ZCULL: - case (FIFO_DISABLED_COMMAND >> 2): - case (FIFO_PACKET_BEGIN >> 2): - case (FIFO_DRAW_BARRIER >> 2): - case (FIFO_EMPTY >> 2): - case (FIFO_BUSY >> 2): - { - // Ignore these completely - flush_commands_flag = false; - break; - } case NV4097_SET_BEGIN_END: { if (value && value != deferred_primitive_type) @@ -788,47 +782,50 @@ namespace rsx has_deferred_call = true; flush_commands_flag = false; execute_method_flag = false; - - // TODO: If END, insert draw barrier } - break; } case NV4097_DRAW_ARRAYS: { - const auto cmd = method_registers.current_draw_clause.command; - if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none) - break; + if (has_deferred_call) + { + const auto cmd = method_registers.current_draw_clause.command; + if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none) + break; - flush_commands_flag = false; + flush_commands_flag = false; + } break; } case NV4097_DRAW_INDEX_ARRAY: { - const auto cmd = method_registers.current_draw_clause.command; - if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none) - break; + if (has_deferred_call) + { + const auto cmd = method_registers.current_draw_clause.command; + if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none) + break; - flush_commands_flag = false; - break; - } - case NV4097_SET_VERTEX_DATA_BASE_INDEX: - case NV4097_SET_VERTEX_DATA_BASE_OFFSET: - { - // These can be executed when emitting geometry - flush_commands_flag = false; + flush_commands_flag = false; + } break; } default: { - // Hopefully this is skippable so the batch can keep growing - if (reg >= m_skippable_registers.size()) + if (reg >= m_register_properties.size()) { - // Likely flow control, unskippable + // Flow control or special command break; } - if (m_skippable_registers[reg]) + const auto properties = m_register_properties[reg]; + if (properties & register_props::ignorable) + { + // These have no effect on rendering behavior or can be handled within begin/end + flush_commands_flag = false; + break; + } + + if (properties & register_props::skippable) { if (has_deferred_call) { @@ -840,9 +837,10 @@ namespace rsx break; } } + + set_register(reg, value); } - set_register(reg, value); break; } } @@ -1211,7 +1209,7 @@ namespace rsx return; } - if (cmd == FIFO::FIFO_EMPTY || !Emu.IsRunning()) + if (cmd == FIFO::FIFO_EMPTY) { if (performance_counters.state == FIFO_state::running) { @@ -1219,7 +1217,6 @@ namespace rsx performance_counters.state = FIFO_state::empty; } - std::this_thread::yield(); return; } @@ -1227,7 +1224,7 @@ namespace rsx // TODO: Who should handle graphics exceptions?? if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) { - u32 offs = cmd & 0x1ffffffc; + const u32 offs = cmd & 0x1ffffffc; if (offs == command.loc) { //Jump to self. Often preceded by NOP @@ -1245,7 +1242,7 @@ namespace rsx } if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) { - u32 offs = cmd & 0xfffffffc; + const u32 offs = cmd & 0xfffffffc; if (offs == command.loc) { //Jump to self. Often preceded by NOP @@ -1271,8 +1268,7 @@ namespace rsx return; } - u32 offs = cmd & 0xfffffffc; - //LOG_WARNING(RSX, "rsx call(0x%x) #0x%x - 0x%x", offs, cmd, get); + const u32 offs = cmd & 0xfffffffc; m_return_addr = command.loc + 4; fifo_ctrl->set_get(offs); return; @@ -1286,7 +1282,6 @@ namespace rsx return; } - //LOG_WARNING(RSX, "rsx return(0x%x)", get); fifo_ctrl->set_get(m_return_addr); m_return_addr = -1; return; diff --git a/rpcs3/Emu/RSX/RSXFIFO.h b/rpcs3/Emu/RSX/RSXFIFO.h index 64a0a8ccdb..a832a92a62 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.h +++ b/rpcs3/Emu/RSX/RSXFIFO.h @@ -47,7 +47,7 @@ namespace rsx u32 start_loc; u32 length; u32 num_draw_calls; - u32 draw_call_distance_weight; + u32 reserved; }; struct branch_target_info_t @@ -67,7 +67,13 @@ namespace rsx struct flattening_pass : public optimization_pass { private: - std::array m_skippable_registers; + enum register_props : u8 + { + skippable = 1, + ignorable = 2 + }; + + std::array m_register_properties; public: flattening_pass(); diff --git a/rpcs3/Emu/RSX/RSXFragmentProgram.h b/rpcs3/Emu/RSX/RSXFragmentProgram.h index 10dae66e21..244374bf47 100644 --- a/rpcs3/Emu/RSX/RSXFragmentProgram.h +++ b/rpcs3/Emu/RSX/RSXFragmentProgram.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "GCM.h" #include "RSXTexture.h" @@ -230,7 +230,7 @@ struct RSXFragmentProgram bool front_color_specular_output : 1; u32 texture_dimensions; - std::array texture_scale[16]; + float texture_scale[16][4]; u8 textures_alpha_kill[16]; u8 textures_zfunc[16]; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 65f31c7ef8..9037bc40d0 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -419,6 +419,15 @@ namespace rsx conditional_render_test_address = 0; } + if (m_graphics_state & rsx::pipeline_state::fragment_program_dirty) + { + // Request for update of fragment constants if the program block is invalidated + m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty; + + // Request for update of texture parameters if the program is likely to have changed + m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty; + } + in_begin_end = true; } @@ -545,7 +554,7 @@ namespace rsx fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this); - //fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); + fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); //fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!! //fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); @@ -640,13 +649,20 @@ namespace rsx while (external_interrupt_lock.load()) _mm_pause(); } + // Idle if emulation paused + if (Emu.IsPaused()) + { + std::this_thread::sleep_for(1ms); + continue; + } + // Execute backend-local tasks first do_local_task(performance_counters.state); // Update sub-units zcull_ctrl->update(this); - // Execite FIFO queue + // Execute FIFO queue run_FIFO(); } } @@ -716,8 +732,9 @@ namespace rsx rsx::method_registers.clip_plane_5_enabled(), }; - s32 clip_enabled_flags[8] = {}; - f32 clip_distance_factors[8] = {}; + u8 data_block[64]; + s32* clip_enabled_flags = reinterpret_cast(data_block); + f32* clip_distance_factors = reinterpret_cast(data_block + 32); for (int index = 0; index < 6; ++index) { @@ -743,8 +760,7 @@ namespace rsx } } - memcpy(buffer, clip_enabled_flags, 32); - memcpy((char*)buffer + 32, clip_distance_factors, 32); + memcpy(buffer, data_block, 2 * 8 * sizeof(u32)); } /** @@ -814,16 +830,11 @@ namespace rsx u32 *dst = static_cast(buffer); stream_vector(dst, (u32&)fog0, (u32&)fog1, rop_control, (u32&)alpha_ref); stream_vector(dst + 4, alpha_func, fog_mode, (u32&)wpos_scale, (u32&)wpos_bias); + } - size_t offset = 8; - for (int index = 0; index < 16; ++index) - { - stream_vector(&dst[offset], - (u32&)fragment_program.texture_scale[index][0], (u32&)fragment_program.texture_scale[index][1], - (u32&)fragment_program.texture_scale[index][2], (u32&)fragment_program.texture_scale[index][3]); - - offset += 4; - } + void thread::fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program) + { + memcpy(buffer, fragment_program.texture_scale, 16 * 4 * sizeof(float)); } void thread::write_inline_array_to_buffer(void *dst_buffer) @@ -2020,15 +2031,23 @@ namespace rsx } } - //Fill the data + // Fill the data + // Each descriptor field is 64 bits wide + // [0-8] attribute stride\n" + // [8-20] attribute divisor\n" + // [20-21] swap bytes flag\n" + // [21-22] volatile flag\n" + // [22-24] frequency op\n" + // [24-27] attribute type\n" + // [27-30] attribute size\n" + memset(buffer, 0, 256); - const s32 swap_storage_mask = (1 << 8); - const s32 volatile_storage_mask = (1 << 9); - const s32 default_frequency_mask = (1 << 10); - const s32 repeating_frequency_mask = (3 << 10); - const s32 input_function_modulo_mask = (1 << 12); - const s32 input_divisor_mask = (0xFFFF << 13); + const s32 swap_storage_mask = (1 << 20); + const s32 volatile_storage_mask = (1 << 21); + const s32 default_frequency_mask = (1 << 22); + const s32 division_op_frequency_mask = (2 << 22); + const s32 modulo_op_frequency_mask = (3 << 22); const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); @@ -2114,11 +2133,14 @@ namespace rsx } default: { - if (modulo_mask & (1 << index)) - attributes |= input_function_modulo_mask; + verify(HERE), frequency <= 4095u; - attributes |= repeating_frequency_mask; - attributes |= (frequency << 13) & input_divisor_mask; + if (modulo_mask & (1 << index)) + attributes |= modulo_op_frequency_mask; + else + attributes |= division_op_frequency_mask; + + attributes |= (frequency << 8); break; } } @@ -2144,10 +2166,11 @@ namespace rsx if (to_swap_bytes) attributes |= swap_storage_mask; - buffer[index * 4 + 0] = static_cast(type); - buffer[index * 4 + 1] = size; - buffer[index * 4 + 2] = offset_in_block[index]; - buffer[index * 4 + 3] = attributes; + attributes |= (static_cast(type) << 24); + attributes |= (size << 27); + + buffer[index * 4 + 0] = attributes; + buffer[index * 4 + 1] = offset_in_block[index]; } } @@ -2326,6 +2349,9 @@ namespace rsx { zcull_ctrl->sync(this); + // Fragment constants may have been updated + m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty; + //TODO: On sync every sub-unit should finish any pending tasks //Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently //verify (HERE), async_tasks_pending.load() == 0; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index cc19a52f6d..2813c31dbb 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -88,18 +88,21 @@ namespace rsx context_clear_all = context_clear_color | context_clear_depth }; - enum pipeline_state : u8 + enum pipeline_state : u32 { - fragment_program_dirty = 1, - vertex_program_dirty = 2, - fragment_state_dirty = 4, - vertex_state_dirty = 8, - transform_constants_dirty = 16, - framebuffer_reads_dirty = 32, + fragment_program_dirty = 0x1, // Fragment program changed + vertex_program_dirty = 0x2, // Vertex program changed + fragment_state_dirty = 0x4, // Fragment state changed (alpha test, etc) + vertex_state_dirty = 0x8, // Vertex state changed (scale_offset, clip planes, etc) + transform_constants_dirty = 0x10, // Transform constants changed + fragment_constants_dirty = 0x20, // Fragment constants changed + framebuffer_reads_dirty = 0x40, // Framebuffer contents changed + fragment_texture_state_dirty = 0x80, // Fragment texture parameters changed + vertex_texture_state_dirty = 0x80, // Fragment texture parameters changed invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty, memory_barrier_bits = framebuffer_reads_dirty, - all_dirty = 255 + all_dirty = -1u }; enum FIFO_state : u8 @@ -641,6 +644,11 @@ namespace rsx */ void fill_fragment_state_buffer(void *buffer, const RSXFragmentProgram &fragment_program); + /** + * Fill buffer with fragment texture parameter constants (texture matrix) + */ + void fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program); + /** * Write inlined array data to buffer. * The storage of inlined data looks different from memory stored arrays. diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index db907b66f8..ab311b402b 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "Emu/Memory/vm.h" #include "Emu/System.h" #include "VKFragmentProgram.h" @@ -144,9 +144,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) } } - OS << "layout(std140, set = 0, binding = 2) uniform FragmentConstantsBuffer\n"; - OS << "{\n"; - + std::string constants_block; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { if (PT.type == "sampler1D" || @@ -156,9 +154,21 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) continue; for (const ParamItem& PI : PT.items) - OS << " " << PT.type << " " << PI.name << ";\n"; + { + constants_block += " " + PT.type + " " + PI.name + ";\n"; + } } + if (!constants_block.empty()) + { + OS << "layout(std140, set = 0, binding = 3) uniform FragmentConstantsBuffer\n"; + OS << "{\n"; + OS << constants_block; + OS << "};\n\n"; + } + + OS << "layout(std140, set = 0, binding = 4) uniform FragmentStateBuffer\n"; + OS << "{\n"; OS << " float fog_param0;\n"; OS << " float fog_param1;\n"; OS << " uint rop_control;\n"; @@ -167,15 +177,26 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << " uint fog_mode;\n"; OS << " float wpos_scale;\n"; OS << " float wpos_bias;\n"; + OS << "};\n\n"; + + OS << "layout(std140, set = 0, binding = 5) uniform TextureParametersBuffer\n"; + OS << "{\n"; OS << " vec4 texture_parameters[16];\n"; - OS << "};\n"; + OS << "};\n\n"; vk::glsl::program_input in; in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT; in.domain = glsl::glsl_fragment_program; in.name = "FragmentConstantsBuffer"; in.type = vk::glsl::input_type_uniform_buffer; + inputs.push_back(in); + in.location = FRAGMENT_STATE_BIND_SLOT; + in.name = "FragmentStateBuffer"; + inputs.push_back(in); + + in.location = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT; + in.name = "TextureParametersBuffer"; inputs.push_back(in); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b726ee90d3..0f86ce0b75 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -433,11 +433,12 @@ namespace std::tuple get_shared_pipeline_layout(VkDevice dev) { - std::array bindings = {}; + std::array bindings = {}; size_t idx = 0; - // Vertex buffer - for (int i = 0; i < 16; i++) + + // Vertex stream, one stream for cacheable data, one stream for transient data + for (int i = 0; i < 2; i++) { bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; bindings[idx].descriptorCount = 1; @@ -453,6 +454,20 @@ namespace idx++; + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = FRAGMENT_STATE_BIND_SLOT; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT; + + idx++; + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; @@ -460,7 +475,21 @@ namespace idx++; - for (int i = 0; i < 16; i++) + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + bindings[idx].binding = VERTEX_LAYOUT_BIND_SLOT; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + bindings[idx].binding = VERTEX_PARAMS_BIND_SLOT; + + idx++; + + for (int i = 0; i < rsx::limits::fragment_textures_count; i++) { bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[idx].descriptorCount = 1; @@ -469,7 +498,7 @@ namespace idx++; } - for (int i = 0; i < 4; i++) + for (int i = 0; i < rsx::limits::vertex_textures_count; i++) { bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[idx].descriptorCount = 1; @@ -478,10 +507,7 @@ namespace idx++; } - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; - bindings[idx].binding = SCALE_OFFSET_BIND_SLOT; + verify(HERE), idx == VK_NUM_DESCRIPTOR_BINDINGS; VkDescriptorSetLayoutCreateInfo infos = {}; infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -619,7 +645,11 @@ VKGSRender::VKGSRender() : GSRender() //VRAM allocation m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000); - m_uniform_buffer_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer"); + m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer"); + m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer"); + m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer"); + m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer"); + m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer"); m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); @@ -719,10 +749,14 @@ VKGSRender::~VKGSRender() vk::destroy_global_resources(); //Heaps - m_index_buffer_ring_info.destroy(); - m_uniform_buffer_ring_info.destroy(); - m_transform_constants_ring_info.destroy(); m_attrib_ring_info.destroy(); + m_fragment_env_ring_info.destroy(); + m_vertex_env_ring_info.destroy(); + m_fragment_texture_params_ring_info.destroy(); + m_vertex_layout_ring_info.destroy(); + m_fragment_constants_ring_info.destroy(); + m_transform_constants_ring_info.destroy(); + m_index_buffer_ring_info.destroy(); m_texture_upload_buffer_ring_info.destroy(); //Fallback bindables @@ -938,7 +972,11 @@ void VKGSRender::check_heap_status() { if (m_attrib_ring_info.is_critical() || m_texture_upload_buffer_ring_info.is_critical() || - m_uniform_buffer_ring_info.is_critical() || + m_fragment_env_ring_info.is_critical() || + m_vertex_env_ring_info.is_critical() || + m_fragment_texture_params_ring_info.is_critical() || + m_vertex_layout_ring_info.is_critical() || + m_fragment_constants_ring_info.is_critical() || m_transform_constants_ring_info.is_critical() || m_index_buffer_ring_info.is_critical()) { @@ -963,7 +1001,11 @@ void VKGSRender::check_heap_status() m_vertex_cache->purge(); m_index_buffer_ring_info.reset_allocation_stats(); - m_uniform_buffer_ring_info.reset_allocation_stats(); + m_fragment_env_ring_info.reset_allocation_stats(); + m_vertex_env_ring_info.reset_allocation_stats(); + m_fragment_texture_params_ring_info.reset_allocation_stats(); + m_vertex_layout_ring_info.reset_allocation_stats(); + m_fragment_constants_ring_info.reset_allocation_stats(); m_transform_constants_ring_info.reset_allocation_stats(); m_attrib_ring_info.reset_allocation_stats(); m_texture_upload_buffer_ring_info.reset_allocation_stats(); @@ -1161,15 +1203,10 @@ void VKGSRender::emit_geometry(u32 sub_index) if (sub_index == 0) { - // Load program execution environment - load_program_env(upload_info); update_descriptors = true; } else { - // Update vertex fetch environment - update_vertex_env(upload_info); - if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) { /* VkDescriptorSetAllocateInfo alloc_info = {}; @@ -1192,10 +1229,13 @@ void VKGSRender::emit_geometry(u32 sub_index) } } + // Update vertex fetch parameters + update_vertex_env(upload_info); + if (update_descriptors) { - m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set); - m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set); + m_program->bind_uniform(persistent_buffer, vk::glsl::program_input_type::input_type_texel_buffer, "persistent_input_stream", m_current_frame->descriptor_set); + m_program->bind_uniform(volatile_buffer, vk::glsl::program_input_type::input_type_texel_buffer, "volatile_input_stream", m_current_frame->descriptor_set); vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); } @@ -1203,8 +1243,6 @@ void VKGSRender::emit_geometry(u32 sub_index) //std::chrono::time_point draw_start = steady_clock::now(); //m_setup_time += std::chrono::duration_cast(draw_start - vertex_end).count(); - begin_render_pass(); - if (!upload_info.index_info) { if (draw_call.is_single_draw()) @@ -1247,8 +1285,6 @@ void VKGSRender::emit_geometry(u32 sub_index) } } - close_render_pass(); - //std::chrono::time_point draw_end = steady_clock::now(); //m_draw_time += std::chrono::duration_cast(draw_end - draw_start).count(); } @@ -1503,6 +1539,9 @@ void VKGSRender::end() return; } + // Load program execution environment + load_program_env(); + std::chrono::time_point program_end = steady_clock::now(); m_setup_time += std::chrono::duration_cast(program_end - program_start).count(); @@ -1608,6 +1647,7 @@ void VKGSRender::end() vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); update_draw_state(); + begin_render_pass(); u32 sub_index = 0; rsx::method_registers.current_draw_clause.begin(); @@ -1617,6 +1657,7 @@ void VKGSRender::end() } while (rsx::method_registers.current_draw_clause.next()); + close_render_pass(); vk::leave_uninterruptible(); if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) @@ -2022,7 +2063,11 @@ void VKGSRender::advance_queued_frames() m_vertex_cache->purge(); m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(), - m_uniform_buffer_ring_info.get_current_put_pos_minus_one(), + m_vertex_env_ring_info.get_current_put_pos_minus_one(), + m_fragment_env_ring_info.get_current_put_pos_minus_one(), + m_vertex_layout_ring_info.get_current_put_pos_minus_one(), + m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(), + m_fragment_constants_ring_info.get_current_put_pos_minus_one(), m_transform_constants_ring_info.get_current_put_pos_minus_one(), m_index_buffer_ring_info.get_current_put_pos_minus_one(), m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one()); @@ -2148,14 +2193,22 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) //Heap cleanup; deallocates memory consumed by the frame if it is still held m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr; - m_uniform_buffer_ring_info.m_get_pos = ctx->ubo_heap_ptr; - m_transform_constants_ring_info.m_get_pos = ctx->vtxconst_heap_ptr; + m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr; + m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr; + m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr; + m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr; + m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr; + m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr; m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr; m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr; m_attrib_ring_info.notify(); - m_uniform_buffer_ring_info.notify(); + m_vertex_env_ring_info.notify(); + m_fragment_env_ring_info.notify(); + m_fragment_constants_ring_info.notify(); m_transform_constants_ring_info.notify(); + m_vertex_layout_ring_info.notify(); + m_fragment_texture_params_ring_info.notify(); m_index_buffer_ring_info.notify(); m_texture_upload_buffer_ring_info.notify(); } @@ -2482,107 +2535,119 @@ bool VKGSRender::load_program() return m_program != nullptr; } -void VKGSRender::load_program_env(const vk::vertex_upload_info& vertex_info) +void VKGSRender::load_program_env() { if (!m_program) { fmt::throw_exception("Unreachable right now" HERE); } - if (1)//m_graphics_state & (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty)) + const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length; + + const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty); + const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty); + const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty); + const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty); + const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); + + if (update_vertex_env) { - const size_t fragment_constants_sz = current_fp_metadata.program_constants_buffer_length; - const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float)); - const size_t required_mem = 512 + fragment_buffer_sz; + // Vertex state + const auto mem = m_vertex_env_ring_info.alloc<256>(256); + auto buf = (u8*)m_vertex_env_ring_info.map(mem, 160); - const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem); - const size_t fragment_constants_offset = vertex_state_offset + 512; - - //We do this in one go - u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem); - - //Vertex state fill_scale_offset_data(buf, false); fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 132)) = vertex_info.vertex_index_base; + *(reinterpret_cast(buf + 132)) = 0; // Reserved *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); - fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast(buf + 160), - vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); - - //Fragment constants - buf = buf + 512; - if (fragment_constants_sz) - { - m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, - current_fragment_program, vk::sanitize_fp_values()); - } - - fill_fragment_state_buffer(buf + fragment_constants_sz, current_fragment_program); - - m_uniform_buffer_ring_info.unmap(); - - m_vertex_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 }; - m_fragment_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }; + m_vertex_env_ring_info.unmap(); + m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 160 }; } - if (m_graphics_state & rsx::pipeline_state::transform_constants_dirty) + if (update_transform_constants) { - //Vertex constants - const size_t vertex_constants_offset = m_transform_constants_ring_info.alloc<256>(8192); - auto buf = m_transform_constants_ring_info.map(vertex_constants_offset, 8192); + // Transform constants + auto mem = m_transform_constants_ring_info.alloc<256>(8192); + auto buf = m_transform_constants_ring_info.map(mem, 8192); fill_vertex_program_constants_data(buf); m_transform_constants_ring_info.unmap(); - m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, vertex_constants_offset, 8192 }; + m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, mem, 8192 }; } - if (1)//m_graphics_state || old_program != m_program) + if (update_fragment_constants) { - m_program->bind_uniform(m_vertex_state_buffer_info, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set); + // Fragment constants + if (fragment_constants_size) + { + auto mem = m_fragment_constants_ring_info.alloc<256>(fragment_constants_size); + auto buf = m_fragment_constants_ring_info.map(mem, fragment_constants_size); + + m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_size) }, + current_fragment_program, vk::sanitize_fp_values()); + + m_fragment_constants_ring_info.unmap(); + m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, mem, fragment_constants_size }; + } + else + { + m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, 0, VK_WHOLE_SIZE }; + } + } + + if (update_fragment_env) + { + auto mem = m_fragment_env_ring_info.alloc<256>(256); + auto buf = m_fragment_env_ring_info.map(mem, 32); + + fill_fragment_state_buffer(buf, current_fragment_program); + m_fragment_env_ring_info.unmap(); + m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, mem, 32 }; + } + + if (update_fragment_texture_env) + { + auto mem = m_fragment_texture_params_ring_info.alloc<256>(256); + auto buf = m_fragment_texture_params_ring_info.map(mem, 256); + + fill_fragment_texture_parameters(buf, current_fragment_program); + m_fragment_texture_params_ring_info.unmap(); + m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 256 }; + } + + //if (1) + { + m_program->bind_uniform(m_vertex_env_buffer_info, VERTEX_PARAMS_BIND_SLOT, m_current_frame->descriptor_set); m_program->bind_uniform(m_vertex_constants_buffer_info, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_state_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_constants_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_env_buffer_info, FRAGMENT_STATE_BIND_SLOT, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_texture_params_buffer_info, FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, m_current_frame->descriptor_set); } //Clear flags - const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty); + const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty); m_graphics_state &= ~handled_flags; } void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info) { - // Vertex base index = vertex_offset + 132 - // Vertex layout = vertex_offset + 160 + auto mem = m_vertex_layout_ring_info.alloc<256>(256); + auto buf = (u32*)m_vertex_layout_ring_info.map(mem, 128 + 16); - std::array vertex_layout; - fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, vertex_layout.data(), + *buf = vertex_info.vertex_index_base; + buf += 4; + + fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, (s32*)buf, vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); - vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512, - VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); + m_vertex_layout_ring_info.unmap(); + m_vertex_layout_buffer_info = { m_vertex_layout_ring_info.heap->value, mem, 128 + 16 }; - vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset + 132, 4, &vertex_info.vertex_index_base); - - u32 write_offset = m_vertex_state_buffer_info.offset + 160; - s32 *src_ptr = vertex_layout.data(); - - for (const auto& placement : m_vertex_layout.attribute_placement) - { - constexpr u32 data_len = 4 * sizeof(s32); - if (placement != rsx::attribute_buffer_placement::none) - { - vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, write_offset, data_len, src_ptr); - } - - write_offset += data_len; - src_ptr += 4; - } - - vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT); + m_program->bind_uniform(m_vertex_layout_buffer_info, VERTEX_LAYOUT_BIND_SLOT, m_current_frame->descriptor_set); } void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading) @@ -2633,7 +2698,11 @@ void VKGSRender::write_buffers() void VKGSRender::close_and_submit_command_buffer(const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) { if (m_attrib_ring_info.dirty() || - m_uniform_buffer_ring_info.dirty() || + m_fragment_env_ring_info.dirty() || + m_vertex_env_ring_info.dirty() || + m_fragment_texture_params_ring_info.dirty() || + m_vertex_layout_ring_info.dirty() || + m_fragment_constants_ring_info.dirty() || m_index_buffer_ring_info.dirty() || m_transform_constants_ring_info.dirty() || m_texture_upload_buffer_ring_info.dirty()) @@ -2642,7 +2711,11 @@ void VKGSRender::close_and_submit_command_buffer(const std::vector m_secondary_command_buffer.begin(); m_attrib_ring_info.sync(m_secondary_command_buffer); - m_uniform_buffer_ring_info.sync(m_secondary_command_buffer); + m_fragment_env_ring_info.sync(m_secondary_command_buffer); + m_vertex_env_ring_info.sync(m_secondary_command_buffer); + m_fragment_texture_params_ring_info.sync(m_secondary_command_buffer); + m_vertex_layout_ring_info.sync(m_secondary_command_buffer); + m_fragment_constants_ring_info.sync(m_secondary_command_buffer); m_index_buffer_ring_info.sync(m_secondary_command_buffer); m_transform_constants_ring_info.sync(m_secondary_command_buffer); m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 98d22b9664..ce567b3a43 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -36,8 +36,9 @@ namespace vk //NOTE: Texture uploads can be huge, up to 16MB for a single texture (4096x4096px) #define VK_ATTRIB_RING_BUFFER_SIZE_M 384 #define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256 -#define VK_UBO_RING_BUFFER_SIZE_M 64 +#define VK_UBO_RING_BUFFER_SIZE_M 16 #define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64 +#define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 64 #define VK_INDEX_RING_BUFFER_SIZE_M 64 #define VK_MAX_ASYNC_CB_COUNT 64 @@ -161,8 +162,12 @@ struct frame_context_t //Heap pointers s64 attrib_heap_ptr = 0; - s64 ubo_heap_ptr = 0; - s64 vtxconst_heap_ptr = 0; + s64 vtx_env_heap_ptr = 0; + s64 frag_env_heap_ptr = 0; + s64 frag_const_heap_ptr = 0; + s64 vtx_const_heap_ptr = 0; + s64 vtx_layout_heap_ptr = 0; + s64 frag_texparam_heap_ptr = 0; s64 index_heap_ptr = 0; s64 texture_upload_heap_ptr = 0; @@ -177,9 +182,13 @@ struct frame_context_t used_descriptors = other.used_descriptors; attrib_heap_ptr = other.attrib_heap_ptr; - ubo_heap_ptr = other.attrib_heap_ptr; - vtxconst_heap_ptr = other.vtxconst_heap_ptr; - index_heap_ptr = other.attrib_heap_ptr; + vtx_env_heap_ptr = other.vtx_env_heap_ptr; + frag_env_heap_ptr = other.frag_env_heap_ptr; + vtx_layout_heap_ptr = other.vtx_layout_heap_ptr; + frag_texparam_heap_ptr = other.frag_texparam_heap_ptr; + frag_const_heap_ptr = other.frag_const_heap_ptr; + vtx_const_heap_ptr = other.vtx_const_heap_ptr; + index_heap_ptr = other.index_heap_ptr; texture_upload_heap_ptr = other.texture_upload_heap_ptr; } @@ -190,11 +199,15 @@ struct frame_context_t std::swap(samplers_to_clean, other.samplers_to_clean); } - void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 vtxconst_loc, s64 index_loc, s64 texture_loc) + void tag_frame_end(s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc, s64 fragtex_loc, s64 fragconst_loc,s64 vtxconst_loc, s64 index_loc, s64 texture_loc) { attrib_heap_ptr = attrib_loc; - ubo_heap_ptr = ubo_loc; - vtxconst_heap_ptr = vtxconst_loc; + vtx_env_heap_ptr = vtxenv_loc; + frag_env_heap_ptr = fragenv_loc; + vtx_layout_heap_ptr = vtxlayout_loc; + frag_texparam_heap_ptr = fragtex_loc; + frag_const_heap_ptr = fragconst_loc; + vtx_const_heap_ptr = vtxconst_loc; index_heap_ptr = index_loc; texture_upload_heap_ptr = texture_loc; @@ -326,15 +339,22 @@ private: u64 m_last_heap_sync_time = 0; u32 m_texbuffer_view_size = 0; - vk::vk_data_heap m_attrib_ring_info; - vk::vk_data_heap m_uniform_buffer_ring_info; - vk::vk_data_heap m_transform_constants_ring_info; - vk::vk_data_heap m_index_buffer_ring_info; - vk::vk_data_heap m_texture_upload_buffer_ring_info; + vk::data_heap m_attrib_ring_info; // Vertex data + vk::data_heap m_fragment_constants_ring_info; // Fragment program constants + vk::data_heap m_transform_constants_ring_info; // Transform program constants + vk::data_heap m_fragment_env_ring_info; // Fragment environment params + vk::data_heap m_vertex_env_ring_info; // Vertex environment params + vk::data_heap m_fragment_texture_params_ring_info; // Fragment texture params + vk::data_heap m_vertex_layout_ring_info; // Vertex layout structure + vk::data_heap m_index_buffer_ring_info; // Index data + vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap - VkDescriptorBufferInfo m_vertex_state_buffer_info; + VkDescriptorBufferInfo m_vertex_env_buffer_info; + VkDescriptorBufferInfo m_fragment_env_buffer_info; VkDescriptorBufferInfo m_vertex_constants_buffer_info; - VkDescriptorBufferInfo m_fragment_state_buffer_info; + VkDescriptorBufferInfo m_fragment_constants_buffer_info; + VkDescriptorBufferInfo m_vertex_layout_buffer_info; + VkDescriptorBufferInfo m_fragment_texture_params_buffer_info; std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage @@ -404,7 +424,7 @@ private: vk::vertex_upload_info upload_vertex_data(); bool load_program(); - void load_program_env(const vk::vertex_upload_info& upload_info); + void load_program_env(); void update_vertex_env(const vk::vertex_upload_info& upload_info); public: diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 1dcb44264f..cdc8e26c87 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -34,12 +34,17 @@ #define DESCRIPTOR_MAX_DRAW_CALLS 4096 #define OCCLUSION_MAX_POOL_SIZE 8192 -#define VERTEX_BUFFERS_FIRST_BIND_SLOT 3 -#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2 -#define VERTEX_CONSTANT_BUFFERS_BIND_SLOT 1 -#define SCALE_OFFSET_BIND_SLOT 0 -#define TEXTURES_FIRST_BIND_SLOT 19 -#define VERTEX_TEXTURES_FIRST_BIND_SLOT 35 //19+16 +#define VERTEX_PARAMS_BIND_SLOT 0 +#define VERTEX_LAYOUT_BIND_SLOT 1 +#define VERTEX_CONSTANT_BUFFERS_BIND_SLOT 2 +#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 3 +#define FRAGMENT_STATE_BIND_SLOT 4 +#define FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 5 +#define VERTEX_BUFFERS_FIRST_BIND_SLOT 6 +#define TEXTURES_FIRST_BIND_SLOT 8 +#define VERTEX_TEXTURES_FIRST_BIND_SLOT 24 //8+16 + +#define VK_NUM_DESCRIPTOR_BINDINGS (VERTEX_TEXTURES_FIRST_BIND_SLOT + 4) namespace rsx { @@ -80,7 +85,7 @@ namespace vk class command_buffer; struct image; struct buffer; - struct vk_data_heap; + struct data_heap; class mem_allocator_base; struct memory_type_mapping; struct gpu_formats_support; @@ -131,7 +136,7 @@ namespace vk */ void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - VkImageAspectFlags flags, vk::vk_data_heap &upload_heap); + VkImageAspectFlags flags, vk::data_heap &upload_heap); //Other texture management helpers void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range); @@ -2723,12 +2728,14 @@ public: namespace glsl { - enum program_input_type + enum program_input_type : u32 { input_type_uniform_buffer = 0, input_type_texel_buffer = 1, input_type_texture = 2, - input_type_storage_buffer = 3 + input_type_storage_buffer = 3, + + input_type_max_enum = 4 }; struct bound_sampler @@ -2834,8 +2841,9 @@ public: class program { - std::vector uniforms; + std::array, input_type_max_enum> uniforms; VkDevice m_device; + public: VkPipeline pipeline; u64 attribute_location_mask; @@ -2848,10 +2856,10 @@ public: program& load_uniforms(::glsl::program_domain domain, const std::vector& inputs); - bool has_uniform(std::string uniform_name); - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set); + bool has_uniform(program_input_type type, const std::string &uniform_name); + void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorSet &descriptor_set); void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set); - void bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set); + void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set); void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set); @@ -2859,7 +2867,7 @@ public: }; } - struct vk_data_heap : public data_heap + struct data_heap : public ::data_heap { std::unique_ptr heap; bool mapped = false; @@ -2874,7 +2882,7 @@ public: void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000) { - data_heap::init(size, name, guard); + ::data_heap::init(size, name, guard); const auto device = get_current_renderer(); const auto memory_map = device->get_memory_mapping(); diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 0b64178a11..d46f2d9989 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "VKHelpers.h" #include "VKVertexProgram.h" #include "VKFragmentProgram.h" @@ -26,8 +26,8 @@ namespace vk std::unordered_map> m_program_cache; std::unique_ptr m_sampler; std::unique_ptr m_draw_fbo; - vk_data_heap m_vao; - vk_data_heap m_ubo; + vk::data_heap m_vao; + vk::data_heap m_ubo; vk::render_device* m_device = nullptr; std::string vs_src; @@ -574,7 +574,7 @@ namespace vk } vk::image_view* upload_simple_texture(vk::render_device &dev, vk::command_buffer &cmd, - vk::vk_data_heap& upload_heap, u64 key, int w, int h, bool font, bool temp, void *pixel_src, u32 owner_uid) + vk::data_heap& upload_heap, u64 key, int w, int h, bool font, bool temp, void *pixel_src, u32 owner_uid) { const VkFormat format = (font) ? VK_FORMAT_R8_UNORM : VK_FORMAT_B8G8R8A8_UNORM; const u32 pitch = (font) ? w : w * 4; @@ -627,7 +627,7 @@ namespace vk return result; } - void create(vk::command_buffer &cmd, vk::vk_data_heap &upload_heap) + void create(vk::command_buffer &cmd, vk::data_heap &upload_heap) { auto& dev = cmd.get_command_pool().get_owner(); overlay_pass::create(dev); @@ -674,7 +674,7 @@ namespace vk } } - vk::image_view* find_font(rsx::overlays::font *font, vk::command_buffer &cmd, vk::vk_data_heap &upload_heap) + vk::image_view* find_font(rsx::overlays::font *font, vk::command_buffer &cmd, vk::data_heap &upload_heap) { u64 key = (u64)font; auto found = view_cache.find(key); @@ -686,7 +686,7 @@ namespace vk true, false, font->glyph_data.data(), UINT32_MAX); } - vk::image_view* find_temp_image(rsx::overlays::image_info *desc, vk::command_buffer &cmd, vk::vk_data_heap &upload_heap, u32 owner_uid) + vk::image_view* find_temp_image(rsx::overlays::image_info *desc, vk::command_buffer &cmd, vk::data_heap &upload_heap, u32 owner_uid) { u64 key = (u64)desc; auto found = temp_view_cache.find(key); @@ -735,7 +735,7 @@ namespace vk } void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* target, VkRenderPass render_pass, - vk::vk_data_heap &upload_heap, rsx::overlays::overlay &ui) + vk::data_heap &upload_heap, rsx::overlays::overlay &ui) { m_scale_offset = color4f((f32)ui.virtual_width, (f32)ui.virtual_height, 1.f, 1.f); m_time = (f32)(get_system_time() / 1000) * 0.005f; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index a93bab8ce6..769c44ed0d 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "VKHelpers.h" namespace vk @@ -23,23 +23,17 @@ namespace vk program& program::load_uniforms(program_domain domain, const std::vector& inputs) { - std::vector store = uniforms; - uniforms.resize(0); - - for (auto &item : store) - { - uniforms.push_back(item); - } - for (auto &item : inputs) - uniforms.push_back(item); + { + uniforms[item.type].push_back(item); + } return *this; } - bool program::has_uniform(std::string uniform_name) + bool program::has_uniform(program_input_type type, const std::string &uniform_name) { - for (auto &uniform : uniforms) + for (const auto &uniform : uniforms[type]) { if (uniform.name == uniform_name) return true; @@ -48,20 +42,25 @@ namespace vk return false; } - void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set) + void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorSet &descriptor_set) { - for (auto &uniform : uniforms) + for (const auto &uniform : uniforms[program_input_type::input_type_texture]) { if (uniform.name == uniform_name) { - VkWriteDescriptorSet descriptor_writer = {}; - descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_writer.dstSet = descriptor_set; - descriptor_writer.descriptorCount = 1; - descriptor_writer.pImageInfo = &image_descriptor; - descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - descriptor_writer.dstArrayElement = 0; - descriptor_writer.dstBinding = uniform.location; + const VkWriteDescriptorSet descriptor_writer = + { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType + nullptr, // pNext + descriptor_set, // dstSet + uniform.location, // dstBinding + 0, // dstArrayElement + 1, // descriptorCount + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, // descriptorType + &image_descriptor, // pImageInfo + nullptr, // pBufferInfo + nullptr // pTexelBufferView + }; vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); attribute_location_mask |= (1ull << uniform.location); @@ -77,20 +76,25 @@ namespace vk bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, descriptor_set); } - void program::bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set) + void program::bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set) { - for (auto &uniform : uniforms) + for (const auto &uniform : uniforms[type]) { if (uniform.name == binding_name) { - VkWriteDescriptorSet descriptor_writer = {}; - descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_writer.dstSet = descriptor_set; - descriptor_writer.descriptorCount = 1; - descriptor_writer.pTexelBufferView = &buffer_view; - descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor_writer.dstArrayElement = 0; - descriptor_writer.dstBinding = uniform.location; + const VkWriteDescriptorSet descriptor_writer = + { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType + nullptr, // pNext + descriptor_set, // dstSet + uniform.location, // dstBinding + 0, // dstArrayElement + 1, // descriptorCount + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,// descriptorType + nullptr, // pImageInfo + nullptr, // pBufferInfo + &buffer_view // pTexelBufferView + }; vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); attribute_location_mask |= (1ull << uniform.location); @@ -103,14 +107,19 @@ namespace vk void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set) { - VkWriteDescriptorSet descriptor_writer = {}; - descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_writer.dstSet = descriptor_set; - descriptor_writer.descriptorCount = 1; - descriptor_writer.pBufferInfo = &buffer_descriptor; - descriptor_writer.descriptorType = type; - descriptor_writer.dstArrayElement = 0; - descriptor_writer.dstBinding = binding_point; + const VkWriteDescriptorSet descriptor_writer = + { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType + nullptr, // pNext + descriptor_set, // dstSet + binding_point, // dstBinding + 0, // dstArrayElement + 1, // descriptorCount + type, // descriptorType + nullptr, // pImageInfo + &buffer_descriptor, // pBufferInfo + nullptr // pTexelBufferView + }; vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); attribute_location_mask |= (1ull << binding_point); @@ -121,10 +130,9 @@ namespace vk if (vertex_attributes_mask) return vertex_attributes_mask; - for (auto &uniform : uniforms) + for (const auto &uniform : uniforms[program_input_type::input_type_texel_buffer]) { - if (uniform.domain == program_domain::glsl_vertex_program && - uniform.type == program_input_type::input_type_texel_buffer) + if (uniform.domain == program_domain::glsl_vertex_program) { vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT)); } diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 9a04f2e820..f77cfc70bf 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "VKHelpers.h" #include "../GCM.h" #include "../RSXThread.h" @@ -427,7 +427,7 @@ namespace vk void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - VkImageAspectFlags flags, vk::vk_data_heap &upload_heap) + VkImageAspectFlags flags, vk::data_heap &upload_heap) { u32 mipmap_level = 0; u32 block_in_pixel = get_format_block_size_in_texel(format); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index d2d42a90a8..06ccee4faf 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -427,7 +427,7 @@ namespace vk vk::memory_type_mapping m_memory_types; vk::gpu_formats_support m_formats_support; VkQueue m_submit_queue; - vk_data_heap* m_texture_upload_heap; + vk::data_heap* m_texture_upload_heap; //Stuff that has been dereferenced goes into these std::list m_discardable_storage; @@ -956,7 +956,7 @@ namespace vk public: using baseclass::texture_cache; - void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap) + void initialize(vk::render_device& device, VkQueue submit_queue, vk::data_heap& upload_heap) { m_device = &device; m_memory_types = device.get_memory_mapping(); diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 7a6fb333bc..b6f2b0abe7 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -63,7 +63,7 @@ namespace std::tuple> generate_emulating_index_buffer( const rsx::draw_clause& clause, u32 vertex_count, - vk::vk_data_heap& m_index_buffer_ring_info) + vk::data_heap& m_index_buffer_ring_info) { u32 index_count = get_index_count(clause.primitive, vertex_count); u32 upload_size = index_count * sizeof(u16); @@ -91,7 +91,7 @@ namespace struct draw_command_visitor { - draw_command_visitor(vk::vk_data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout) + draw_command_visitor(vk::data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout) : m_index_buffer_ring_info(index_buffer_ring_info) , m_vertex_layout(layout) { @@ -226,7 +226,7 @@ namespace } private: - vk::vk_data_heap& m_index_buffer_ring_info; + vk::data_heap& m_index_buffer_ring_info; rsx::vertex_input_layout& m_vertex_layout; }; } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index f72970e29d..7181162821 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "Emu/System.h" #include "VKVertexProgram.h" @@ -28,33 +28,41 @@ std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::stri void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) { OS << "#version 450\n\n"; - OS << "#extension GL_ARB_separate_shader_objects : enable\n"; + OS << "#extension GL_ARB_separate_shader_objects : enable\n\n"; + OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n"; OS << "{\n"; OS << " mat4 scale_offset_mat;\n"; OS << " ivec4 user_clip_enabled[2];\n"; OS << " vec4 user_clip_factor[2];\n"; OS << " uint transform_branch_bits;\n"; - OS << " uint vertex_base_index;\n"; OS << " float point_size;\n"; OS << " float z_near;\n"; OS << " float z_far;\n"; - OS << " ivec4 input_attributes[16];\n"; - OS << "};\n"; + OS << "};\n\n"; + + OS << "layout(std140, set = 0, binding = 1) uniform VertexLayoutBuffer\n"; + OS << "{\n"; + OS << " uint vertex_base_index;\n"; + OS << " ivec2 input_attributes[16];\n"; + OS << "};\n\n"; vk::glsl::program_input in; - in.location = SCALE_OFFSET_BIND_SLOT; + in.location = VERTEX_PARAMS_BIND_SLOT; in.domain = glsl::glsl_vertex_program; in.name = "VertexContextBuffer"; in.type = vk::glsl::input_type_uniform_buffer; + inputs.push_back(in); + in.location = VERTEX_LAYOUT_BIND_SLOT; + in.name = "VertexLayoutBuffer"; inputs.push_back(in); } void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector& inputs) { - OS << "layout(set=0, binding=3) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable) - OS << "layout(set=0, binding=4) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data) + OS << "layout(set=0, binding=6) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable) + OS << "layout(set=0, binding=7) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data) vk::glsl::program_input in; in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT; @@ -72,7 +80,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) { - OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer\n"; + OS << "layout(std140, set=0, binding = 2) uniform VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 vc[468];\n"; OS << "};\n\n"; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 18c2785cb8..f9a61229c0 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -584,18 +584,48 @@ namespace rsx rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; } - void set_surface_dirty_bit(thread* rsx, u32, u32) + void set_surface_dirty_bit(thread* rsx, u32 reg, u32 arg) { + if (reg == NV4097_SET_SURFACE_CLIP_VERTICAL || + reg == NV4097_SET_SURFACE_CLIP_HORIZONTAL) + { + if (arg != method_registers.register_previous_value) + { + rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty; + } + } + rsx->m_rtts_dirty = true; rsx->m_framebuffer_state_contested = false; } + void set_surface_format(thread* rsx, u32 reg, u32 arg) + { + // Special consideration - antialiasing control can affect ROP state + const auto aa_mask = (0xF << 12); + if ((arg & aa_mask) != (method_registers.register_previous_value & aa_mask)) + { + // Antialias control has changed, update ROP parameters + rsx->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty; + } + + set_surface_dirty_bit(rsx, reg, arg); + } + void set_surface_options_dirty_bit(thread* rsx, u32, u32) { if (rsx->m_framebuffer_state_contested) rsx->m_rtts_dirty = true; } + void set_ROP_state_dirty_bit(thread* rsx, u32, u32 arg) + { + if (arg != method_registers.register_previous_value) + { + rsx->m_graphics_state |= rsx::fragment_state_dirty; + } + } + void set_vertex_base_offset(thread* rsx, u32 reg, u32 arg) { if (rsx->in_begin_end) @@ -620,6 +650,22 @@ namespace rsx } } + void set_vertex_env_dirty_bit(thread* rsx, u32 reg, u32 arg) + { + if (arg != method_registers.register_previous_value) + { + rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty; + } + } + + void set_fragment_env_dirty_bit(thread* rsx, u32 reg, u32 arg) + { + if (arg != method_registers.register_previous_value) + { + rsx->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty; + } + } + template struct set_texture_dirty_bit { @@ -647,6 +693,18 @@ namespace rsx } } }; + + template + struct set_viewport_dirty_bit + { + static void impl(thread* rsx, u32 _reg, u32 arg) + { + if (arg != method_registers.register_previous_value) + { + rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty; + } + } + }; } namespace nv308a @@ -2619,7 +2677,7 @@ namespace rsx bind(); bind(); bind(); - bind(); + bind(); bind(); bind(); bind(); @@ -2660,6 +2718,20 @@ namespace rsx bind(); bind(); bind(); + bind(); + bind(); + bind(); + bind(); + bind(); + bind(); + bind(); + bind(); + bind(); + bind(); + bind(); + bind_array(); + bind_range(); + bind_range(); //NV308A bind_range();