From fb778e4821f5241e75eecf5845f492c0d22c762f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 21 Jan 2019 21:07:27 +0300 Subject: [PATCH] rsx: Reimplement attrib divisor --- rpcs3/Emu/RSX/Common/GLSLCommon.h | 2 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 4 +- rpcs3/Emu/RSX/GL/GLGSRender.h | 1 + rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp | 54 +++++----- rpcs3/Emu/RSX/RSXThread.cpp | 143 +++++++++------------------ rpcs3/Emu/RSX/RSXThread.h | 86 ++++++++++++++-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 6 +- rpcs3/Emu/RSX/VK/VKGSRender.h | 1 + rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 50 +++++----- 9 files changed, 178 insertions(+), 169 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index 29cf509963..e42bbba919 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -361,7 +361,7 @@ namespace glsl " }\n" " else\n" " {\n" - " vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) / int(desc.frequency); \n" + " vertex_id = vertex_id / int(desc.frequency); \n" " }\n" " }\n" "\n" diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index abc7ef20d4..bebc527bd6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -478,7 +478,7 @@ void GLGSRender::end() { if (!subdraw) { - m_vertex_layout = analyse_inputs_interleaved(); + analyse_inputs_interleaved(m_vertex_layout); if (!m_vertex_layout.validate()) { // Execute remainining pipeline barriers with NOP draw @@ -1427,7 +1427,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info) buf[1] = upload_info.vertex_index_offset; buf += 4; - fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, (s32*)buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); + fill_vertex_layout_state(m_vertex_layout, upload_info.first_vertex, upload_info.allocated_vertex_count, (s32*)buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); m_vertex_layout_buffer->bind_range(1, mapping.second, 128 + 16); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 5495d9478c..44d8ac1edc 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -24,6 +24,7 @@ namespace gl { u32 vertex_draw_count; u32 allocated_vertex_count; + u32 first_vertex; u32 vertex_index_base; u32 vertex_index_offset; u32 persistent_mapping_offset; diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index d2ef284349..df26a3c81f 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -48,10 +48,10 @@ namespace struct vertex_input_state { + bool index_rebase; + u32 min_index; + u32 max_index; u32 vertex_draw_count; - u32 allocated_vertex_count; - u32 vertex_data_base; - u32 vertex_index_base; u32 vertex_index_offset; std::optional> index_info; }; @@ -70,6 +70,7 @@ namespace { const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); const u32 min_index = rsx::method_registers.current_draw_clause.min_index(); + const u32 max_index = (min_index + vertex_count) - 1; if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { @@ -79,10 +80,10 @@ namespace rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer, rsx::method_registers.current_draw_clause.get_elements_count()); - return{ index_count, vertex_count, min_index, 0, 0, std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer) }; + return{ false, min_index, max_index, index_count, 0, std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer) }; } - return{ vertex_count, vertex_count, min_index, 0, 0, std::optional>() }; + return{ false, min_index, max_index, vertex_count, 0, std::optional>() }; } vertex_input_state operator()(const rsx::draw_indexed_array_command& command) @@ -117,26 +118,13 @@ namespace if (min_index >= max_index) { //empty set, do not draw - return{ 0, 0, 0, 0, 0, std::make_tuple(get_index_type(type), offset_in_index_buffer) }; + return{ false, 0, 0, 0, 0, std::make_tuple(get_index_type(type), offset_in_index_buffer) }; } + // Prefer only reading the vertices that are referenced in the index buffer itself + // Offset data source by min_index verts, but also notify the shader to offset the vertexID (important for modulo op) const auto index_offset = rsx::method_registers.vertex_data_base_index(); - - //check for vertex arrays with frequency modifiers - for (auto &block : m_vertex_layout.interleaved_blocks) - { - if (block.min_divisor > 1) - { - //Ignore base offsets and return real results - //The upload function will optimize the uploaded range anyway - return{ index_count, max_index, 0, 0, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) }; - } - } - - //Prefer only reading the vertices that are referenced in the index buffer itself - //Offset data source by min_index verts, but also notify the shader to offset the vertexID - const auto data_offset = rsx::get_index_from_base(min_index, index_offset); - return{ index_count, (max_index - min_index + 1), data_offset, min_index, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) }; + return{ true, min_index, max_index, index_count, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) }; } vertex_input_state operator()(const rsx::draw_inlined_array& command) @@ -151,10 +139,10 @@ namespace std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer, vertex_count); - return{ index_count, vertex_count, 0, 0, 0, std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer) }; + return{ false, index_count, vertex_count, 0, 0, std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer) }; } - return{ vertex_count, vertex_count, 0, 0, 0, std::optional>() }; + return{ false, vertex_count, vertex_count, 0, 0, std::optional>() }; } private: @@ -170,18 +158,26 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() //Write index buffers and count verts auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers)); - auto &vertex_count = result.allocated_vertex_count; - auto &vertex_base = result.vertex_data_base; + const u32 vertex_count = (result.max_index - result.min_index) + 1; + u32 vertex_base = result.min_index; + u32 index_base = 0; + + if (result.index_rebase) + { + vertex_base = rsx::get_index_from_base(vertex_base, rsx::method_registers.vertex_data_base_index()); + index_base = result.min_index; + } //Do actual vertex upload - auto required = calculate_memory_requirements(m_vertex_layout, vertex_count); + auto required = calculate_memory_requirements(m_vertex_layout, vertex_base, vertex_count); std::pair persistent_mapping = {}, volatile_mapping = {}; gl::vertex_upload_info upload_info = { result.vertex_draw_count, // Vertex count - result.allocated_vertex_count, // Allocated vertex count - result.vertex_index_base, // Index of attribute at data location 0 + vertex_count, // Allocated vertex count + vertex_base, // First vertex in block + index_base, // Index of attribute at data location 0 result.vertex_index_offset, // Hw index offset 0u, 0u, // Mapping result.index_info // Index buffer info diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 82f09f3fbd..0bae1fcae1 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1342,15 +1342,15 @@ namespace rsx } } - vertex_input_layout thread::analyse_inputs_interleaved() const + void thread::analyse_inputs_interleaved(vertex_input_layout& result) const { const rsx_state& state = rsx::method_registers; const u32 input_mask = state.vertex_attrib_input_mask(); + result.clear(); + if (state.current_draw_clause.command == rsx::draw_command::inlined_array) { - vertex_input_layout result = {}; - interleaved_range_info info = {}; info.interleaved = true; info.locations.reserve(8); @@ -1363,7 +1363,7 @@ namespace rsx { // Stride must be updated even if the stream is disabled info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size()); - info.locations.push_back(index); + info.locations.push_back({ index, false, 1 }); if (input_mask & (1u << index)) { @@ -1378,12 +1378,11 @@ namespace rsx } } - result.interleaved_blocks.push_back(info); - return result; + result.interleaved_blocks.emplace_back(std::move(info)); + return; } const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask(); - vertex_input_layout result = {}; result.interleaved_blocks.reserve(16); result.referenced_registers.reserve(16); @@ -1433,6 +1432,7 @@ namespace rsx result.attribute_placement[index] = attribute_buffer_placement::persistent; const u32 base_address = info.offset() & 0x7fffffff; bool alloc_new_block = true; + bool modulo = !!(frequency_divider_mask & (1 << index)); for (auto &block : result.interleaved_blocks) { @@ -1471,13 +1471,8 @@ namespace rsx } alloc_new_block = false; - block.locations.push_back(index); + block.locations.push_back({ index, modulo, info.frequency() }); block.interleaved = true; - block.min_divisor = std::min(block.min_divisor, info.frequency()); - - if (block.all_modulus) - block.all_modulus = !!(frequency_divider_mask & (1 << index)); - break; } @@ -1488,9 +1483,7 @@ namespace rsx block.attribute_stride = info.stride(); block.memory_location = info.offset() >> 31; block.locations.reserve(16); - block.locations.push_back(index); - block.min_divisor = info.frequency(); - block.all_modulus = !!(frequency_divider_mask & (1 << index)); + block.locations.push_back({ index, modulo, info.frequency() }); if (block.attribute_stride == 0) { @@ -1498,7 +1491,7 @@ namespace rsx block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size()); } - result.interleaved_blocks.push_back(block); + result.interleaved_blocks.emplace_back(std::move(block)); } } } @@ -1508,8 +1501,6 @@ namespace rsx //Calculate real data address to be used during upload info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info.base_offset), info.memory_location); } - - return result; } void thread::get_current_fragment_program(const std::array, rsx::limits::fragment_textures_count>& sampler_descriptors) @@ -1835,7 +1826,7 @@ namespace rsx fmt::throw_exception("%s(addr=0x%x): RSXIO memory not mapped" HERE, __FUNCTION__, addr); } - std::pair thread::calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count) + std::pair thread::calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count) { u32 persistent_memory_size = 0; u32 volatile_memory_size = 0; @@ -1861,37 +1852,13 @@ namespace rsx } } - for (const auto &block : layout.interleaved_blocks) - { - u32 unique_verts; - - if (block.single_vertex) - { - unique_verts = 1; - } - else if (block.min_divisor > 1) - { - if (block.all_modulus) - unique_verts = block.min_divisor; - else - { - unique_verts = vertex_count / block.min_divisor; - if (vertex_count % block.min_divisor) unique_verts++; - } - } - else - { - unique_verts = vertex_count; - } - - persistent_memory_size += block.attribute_stride * unique_verts; - } + persistent_memory_size = layout.calculate_interleaved_memory_requirements(first_vertex, vertex_count); } return std::make_pair(persistent_memory_size, volatile_memory_size); } - void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base) + void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base) { std::array offset_in_block = {}; u32 volatile_offset = volatile_offset_base; @@ -1917,11 +1884,11 @@ namespace rsx { const auto &block = layout.interleaved_blocks[0]; u32 inline_data_offset = volatile_offset; - for (const u8 index : block.locations) + for (const auto& attrib : block.locations) { - auto &info = rsx::method_registers.vertex_arrays_info[index]; + auto &info = rsx::method_registers.vertex_arrays_info[attrib.index]; - offset_in_block[index] = inline_data_offset; + offset_in_block[attrib.index] = inline_data_offset; inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size()); } } @@ -1929,34 +1896,14 @@ namespace rsx { for (const auto &block : layout.interleaved_blocks) { - for (u8 index : block.locations) + for (const auto& attrib : block.locations) { - const u32 local_address = (rsx::method_registers.vertex_arrays_info[index].offset() & 0x7fffffff); - offset_in_block[index] = persistent_offset + (local_address - block.base_offset); + const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff); + offset_in_block[attrib.index] = persistent_offset + (local_address - block.base_offset); } - u32 unique_verts; - - if (block.single_vertex) - { - unique_verts = 1; - } - else if (block.min_divisor > 1) - { - if (block.all_modulus) - unique_verts = block.min_divisor; - else - { - unique_verts = vertex_count / block.min_divisor; - if (vertex_count % block.min_divisor) unique_verts++; - } - } - else - { - unique_verts = vertex_count; - } - - persistent_offset += block.attribute_stride * unique_verts; + const auto range = block.calculate_required_range(first_vertex, vertex_count); + persistent_offset += block.attribute_stride * range.second; } } @@ -1978,6 +1925,7 @@ namespace rsx const s32 modulo_op_frequency_mask = (1 << 31); const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); + const auto max_index = (first_vertex + vertex_count) - 1; for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { @@ -2067,9 +2015,25 @@ namespace rsx default: { if (modulo_mask & (1 << index)) - attrib1 |= modulo_op_frequency_mask; - - attrib0 |= (frequency << 8); + { + if (max_index >= frequency) + { + // Only set modulo mask if a modulo op is actually necessary! + // This requires that the uploaded range for this attr = [0, freq-1] + // Ignoring modulo op if the rendered range does not wrap allows for range optimization + attrib0 |= (frequency << 8); + attrib1 |= modulo_op_frequency_mask; + } + else + { + attrib0 |= default_frequency_mask; + } + } + else + { + // Division + attrib0 |= (frequency << 8); + } break; } } @@ -2148,30 +2112,11 @@ namespace rsx { for (const auto &block : layout.interleaved_blocks) { - u32 unique_verts; - u32 vertex_base = 0; + auto range = block.calculate_required_range(first_vertex, vertex_count); - if (block.single_vertex) - { - unique_verts = 1; - } - else if (block.min_divisor > 1) - { - if (block.all_modulus) - unique_verts = block.min_divisor; - else - { - unique_verts = vertex_count / block.min_divisor; - if (vertex_count % block.min_divisor) unique_verts++; - } - } - else - { - unique_verts = vertex_count; - vertex_base = first_vertex * block.attribute_stride; - } + const u32 data_size = range.second * block.attribute_stride; + const u32 vertex_base = range.first * block.attribute_stride; - const u32 data_size = block.attribute_stride * unique_verts; memcpy(persistent, (char*)vm::base(block.real_offset_address) + vertex_base, data_size); persistent += data_size; } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 323b0ff868..ab55b4c7fa 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -177,18 +177,69 @@ namespace rsx u32 __dummy2; }; + struct interleaved_attribute_t + { + u8 index; + bool modulo; + u16 frequency; + }; + struct interleaved_range_info { bool interleaved = false; - bool all_modulus = false; bool single_vertex = false; u32 base_offset = 0; u32 real_offset_address = 0; u8 memory_location = 0; u8 attribute_stride = 0; - u16 min_divisor = 0; - std::vector locations; + rsx::simple_array locations; + + // Check if we need to upload a full unoptimized range, i.e [0-max_index] + std::pair calculate_required_range(u32 first, u32 count) const + { + if (single_vertex) + { + return { 0, 1 }; + } + + const u32 max_index = (first + count) - 1; + u32 _max_index = first; + u32 _min_index = first; + + for (const auto &attrib : locations) + { + if (LIKELY(attrib.frequency <= 1)) + { + _max_index = max_index; + } + else + { + if (attrib.modulo) + { + if (max_index >= attrib.frequency) + { + // Actually uses the modulo operator, cannot safely optimize + _min_index = 0; + _max_index = std::max(_max_index, attrib.frequency - 1); + } + else + { + // Same as having no modulo + _max_index = max_index; + } + } + else + { + // Division operator + _min_index = std::min(_min_index, first / attrib.frequency); + _max_index = std::max(_max_index, max_index / attrib.frequency); + } + } + } + + return { _min_index, (_max_index - _min_index) + 1 }; + } }; enum attribute_buffer_placement : u8 @@ -201,8 +252,8 @@ namespace rsx struct vertex_input_layout { std::vector interleaved_blocks; // Interleaved blocks to be uploaded as-is - std::vector> volatile_blocks; // Volatile data blocks (immediate draw vertex data for example) - std::vector referenced_registers; // Volatile register data + std::vector> volatile_blocks; // Volatile data blocks (immediate draw vertex data for example) + rsx::simple_array referenced_registers; // Volatile register data std::array attribute_placement; @@ -211,6 +262,13 @@ namespace rsx attribute_placement.fill(attribute_buffer_placement::none); } + void clear() + { + interleaved_blocks.resize(0); + volatile_blocks.resize(0); + referenced_registers.resize(0); + } + bool validate() const { // Criteria: At least one array stream has to be defined to feed vertex positions @@ -252,6 +310,18 @@ namespace rsx return false; } + + u32 calculate_interleaved_memory_requirements(u32 first_vertex, u32 vertex_count) const + { + u32 mem = 0; + for (auto &block : interleaved_blocks) + { + const auto range = block.calculate_required_range(first_vertex, vertex_count); + mem += range.second * block.attribute_stride; + } + + return mem; + } }; struct framebuffer_layout @@ -478,7 +548,7 @@ namespace rsx /** * Analyze vertex inputs and group all interleaved blocks */ - vertex_input_layout analyse_inputs_interleaved() const; + void analyse_inputs_interleaved(vertex_input_layout&) const; RSXVertexProgram current_vertex_program = {}; RSXFragmentProgram current_fragment_program = {}; @@ -592,12 +662,12 @@ namespace rsx * result.first contains persistent memory requirements * result.second contains volatile memory requirements */ - std::pair calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count); + std::pair calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count); /** * Generates vertex input descriptors as an array of 16x4 s32s */ - void fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0); + void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0); /** * Uploads vertex data described in the layout descriptor diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 2455e4ad3f..2085fd3067 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1175,7 +1175,7 @@ void VKGSRender::emit_geometry(u32 sub_index) if (sub_index == 0) { - m_vertex_layout = analyse_inputs_interleaved(); + analyse_inputs_interleaved(m_vertex_layout); } if (!m_vertex_layout.validate()) @@ -1640,7 +1640,7 @@ void VKGSRender::end() occlusion_id = m_occlusion_query_pool.find_free_slot(); if (occlusion_id == UINT32_MAX) { - LOG_ERROR(RSX, "Occlusion pool overflow"); + //LOG_ERROR(RSX, "Occlusion pool overflow"); if (m_current_task) m_current_task->result = 1; } } @@ -2718,7 +2718,7 @@ void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info) buf[1] = vertex_info.vertex_index_offset; buf += 4; - fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, (s32*)buf, + fill_vertex_layout_state(m_vertex_layout, vertex_info.first_vertex, vertex_info.allocated_vertex_count, (s32*)buf, vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); m_vertex_layout_ring_info.unmap(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 055acecb9b..7538b0c9f9 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -25,6 +25,7 @@ namespace vk VkPrimitiveTopology primitive; u32 vertex_draw_count; u32 allocated_vertex_count; + u32 first_vertex; u32 vertex_index_base; u32 vertex_index_offset; u32 persistent_window_offset; diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 2745fef444..6dfd49014d 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -82,10 +82,10 @@ namespace struct vertex_input_state { VkPrimitiveTopology native_primitive_type; + bool index_rebase; + u32 min_index; + u32 max_index; u32 vertex_draw_count; - u32 allocated_vertex_count; - u32 vertex_data_base; - u32 vertex_index_base; u32 vertex_index_offset; std::optional> index_info; }; @@ -106,6 +106,7 @@ namespace const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); const u32 min_index = rsx::method_registers.current_draw_clause.min_index(); + const u32 max_index = (min_index + vertex_count) - 1; if (primitives_emulated) { @@ -116,10 +117,10 @@ namespace generate_emulating_index_buffer(rsx::method_registers.current_draw_clause, vertex_count, m_index_buffer_ring_info); - return{ prims, index_count, vertex_count, min_index, 0, 0, index_info }; + return{ prims, false, min_index, max_index, index_count, 0, index_info }; } - return{ prims, vertex_count, vertex_count, min_index, 0, {} }; + return{ prims, false, min_index, max_index, vertex_count, 0, {} }; } vertex_input_state operator()(const rsx::draw_indexed_array_command& command) @@ -173,7 +174,7 @@ namespace { //empty set, do not draw m_index_buffer_ring_info.unmap(); - return{ prims, 0, 0, 0, 0, 0, {} }; + return{ prims, false, 0, 0, 0, 0, {} }; } if (emulate_restart) @@ -194,20 +195,7 @@ namespace std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type)); const auto index_offset = rsx::method_registers.vertex_data_base_index(); - - //check for vertex arrays with frequency modifiers - for (auto &block : m_vertex_layout.interleaved_blocks) - { - if (block.min_divisor > 1) - { - //Ignore base offsets and return real results - //The upload function will optimize the uploaded range anyway - return{ prims, index_count, max_index, 0, 0, index_offset, index_info }; - } - } - - const auto data_offset = rsx::get_index_from_base(min_index, index_offset); - return {prims, index_count, (max_index - min_index + 1), data_offset, min_index, index_offset, index_info}; + return {prims, true, min_index, max_index, index_count, index_offset, index_info}; } vertex_input_state operator()(const rsx::draw_inlined_array& command) @@ -221,13 +209,13 @@ namespace if (!primitives_emulated) { - return{ prims, vertex_count, vertex_count, 0, 0, {} }; + return{ prims, false, 0, vertex_count - 1, vertex_count, 0, {} }; } u32 index_count; std::optional> index_info; std::tie(index_count, index_info) = generate_emulating_index_buffer(draw_clause, vertex_count, m_index_buffer_ring_info); - return{ prims, index_count, vertex_count, 0, 0, 0, index_info }; + return{ prims, false, 0, vertex_count - 1, index_count, 0, index_info }; } private: @@ -241,11 +229,18 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data() draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout); auto result = std::visit(visitor, get_draw_command(rsx::method_registers)); - auto &vertex_count = result.allocated_vertex_count; - auto &vertex_base = result.vertex_data_base; + const u32 vertex_count = (result.max_index - result.min_index) + 1; + u32 vertex_base = result.min_index; + u32 index_base = 0; + + if (result.index_rebase) + { + vertex_base = rsx::get_index_from_base(vertex_base, rsx::method_registers.vertex_data_base_index()); + index_base = result.min_index; + } //Do actual vertex upload - auto required = calculate_memory_requirements(m_vertex_layout, vertex_count); + auto required = calculate_memory_requirements(m_vertex_layout, vertex_base, vertex_count); u32 persistent_range_base = UINT32_MAX, volatile_range_base = UINT32_MAX; size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX; @@ -358,8 +353,9 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data() return{ result.native_primitive_type, // Primitive result.vertex_draw_count, // Vertex count - result.allocated_vertex_count, // Allocated vertex count - result.vertex_index_base, // Index of vertex at data location 0 + vertex_count, // Allocated vertex count + vertex_base, // First vertex in stream + index_base, // Index of vertex at data location 0 result.vertex_index_offset, // Index offset persistent_range_base, volatile_range_base, // Binding range result.index_info }; // Index buffer info