diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h index 446a3b749a..baf1fdf7b2 100644 --- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h +++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h @@ -131,4 +131,9 @@ public: else fmt::throw_exception("m_put_pos == m_get_pos!" HERE); } + + size_t size() const + { + return m_size; + } }; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index e881ddbc43..0f0ba6db2f 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1034,7 +1034,7 @@ bool GLGSRender::check_program_state() return (rsx::method_registers.shader_program_address() != 0); } -void GLGSRender::load_program(const vertex_upload_info& upload_info) +void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) { get_current_fragment_program(fs_sampler_state); verify(HERE), current_fragment_program.valid; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 002c96ccd2..2393b901f2 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -21,6 +21,16 @@ namespace gl using null_vertex_cache = vertex_cache; using shader_cache = rsx::shaders_cache; + + struct vertex_upload_info + { + u32 vertex_draw_count; + u32 allocated_vertex_count; + u32 vertex_index_base; + u32 persistent_mapping_offset; + u32 volatile_mapping_offset; + std::optional > index_info; + }; } struct work_item @@ -255,16 +265,6 @@ struct driver_state } }; -struct vertex_upload_info -{ - u32 vertex_draw_count; - u32 allocated_vertex_count; - u32 vertex_index_base; - u32 persistent_mapping_offset; - u32 volatile_mapping_offset; - std::optional > index_info; -}; - class GLGSRender : public GSRender { private: @@ -340,14 +340,14 @@ private: driver_state gl_state; // Return element to draw and in case of indexed draw index type and offset in index buffer - vertex_upload_info set_vertex_buffer(); + gl::vertex_upload_info set_vertex_buffer(); rsx::vertex_input_layout m_vertex_layout = {}; void clear_surface(u32 arg); void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); bool check_program_state(); - void load_program(const vertex_upload_info& upload_info); + void load_program(const gl::vertex_upload_info& upload_info); void update_draw_state(); diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 3011493286..43c9d3ad3f 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -180,7 +180,7 @@ namespace }; } -vertex_upload_info GLGSRender::set_vertex_buffer() +gl::vertex_upload_info GLGSRender::set_vertex_buffer() { std::chrono::time_point then = steady_clock::now(); @@ -196,7 +196,7 @@ vertex_upload_info GLGSRender::set_vertex_buffer() auto required = calculate_memory_requirements(m_vertex_layout, vertex_count); std::pair persistent_mapping = {}, volatile_mapping = {}; - vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info }; + gl::vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info }; if (required.first > 0) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 90c820f98e..5488974718 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -666,6 +666,9 @@ VKGSRender::~VKGSRender() vk::finalize_compiler_context(); m_prog_buffer->clear(); + m_persistent_attribute_storage.reset(); + m_volatile_attribute_storage.reset(); + //Global resources vk::destroy_global_resources(); @@ -1209,10 +1212,12 @@ void VKGSRender::end() //Load program std::chrono::time_point program_start = textures_end; - load_program(std::get<2>(upload_info), std::get<3>(upload_info)); + load_program(upload_info); - m_program->bind_uniform(m_persistent_attribute_storage, "persistent_input_stream", m_current_frame->descriptor_set); - m_program->bind_uniform(m_volatile_attribute_storage, "volatile_input_stream", m_current_frame->descriptor_set); + VkBufferView persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; + VkBufferView volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; + m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set); + m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set); std::chrono::time_point program_stop = steady_clock::now(); m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); @@ -1445,8 +1450,6 @@ void VKGSRender::end() vkCmdClearAttachments(*m_current_command_buffer, static_cast(buffers_to_clear.size()), buffers_to_clear.data(), 1, &clear_rect); } - std::optional > index_info = std::get<4>(upload_info); - bool primitive_emulated = false; vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); @@ -1461,12 +1464,11 @@ void VKGSRender::end() m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer; } - if (!index_info) + if (!upload_info.index_info) { if (single_draw) { - const auto vertex_count = std::get<1>(upload_info); - vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0); + vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0); } else { @@ -1480,10 +1482,10 @@ void VKGSRender::end() else { VkIndexType index_type; - u32 index_count = std::get<1>(upload_info); + const u32 index_count = upload_info.vertex_draw_count; VkDeviceSize offset; - std::tie(offset, index_type) = index_info.value(); + std::tie(offset, index_type) = upload_info.index_info.value(); vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); if (single_draw) @@ -2160,7 +2162,7 @@ bool VKGSRender::check_program_status() return (rsx::method_registers.shader_program_address() != 0); } -void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) +void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) { get_current_fragment_program(fs_sampler_state); verify(HERE), current_fragment_program.valid; @@ -2343,11 +2345,13 @@ void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) fill_scale_offset_data(buf, false); fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 132)) = vertex_base; + *(reinterpret_cast(buf + 132)) = vertex_info.vertex_index_base; *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); - fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast(buf + 160)); + + fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast(buf + 160), + vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); //Vertex constants buf = buf + 512; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 66c64c02ee..693970c9c3 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -23,6 +23,17 @@ namespace vk using null_vertex_cache = vertex_cache; using shader_cache = rsx::shaders_cache; + + struct vertex_upload_info + { + VkPrimitiveTopology primitive; + u32 vertex_draw_count; + u32 allocated_vertex_count; + u32 vertex_index_base; + u32 persistent_window_offset; + u32 volatile_window_offset; + std::optional> index_info; + }; } //Heap allocation sizes in MB @@ -262,8 +273,8 @@ private: std::array, rsx::limits::fragment_textures_count> fs_sampler_handles; std::array, rsx::limits::vertex_textures_count> vs_sampler_handles; - VkBufferView m_persistent_attribute_storage; - VkBufferView m_volatile_attribute_storage; + std::unique_ptr m_persistent_attribute_storage; + std::unique_ptr m_volatile_attribute_storage; public: //vk::fbo draw_fbo; @@ -379,11 +390,11 @@ private: void check_heap_status(); - /// returns primitive topology, index_count, allocated_verts, vertex_base_index, (offset in index buffer, index type) - std::tuple > > upload_vertex_data(); + vk::vertex_upload_info upload_vertex_data(); + public: bool check_program_status(); - void load_program(u32 vertex_count, u32 vertex_base); + void load_program(const vk::vertex_upload_info& vertex_info); void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); void read_buffers(); void write_buffers(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 7d07e78286..69a67d3f50 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -616,6 +616,25 @@ namespace vk buffer_view(const buffer_view&) = delete; buffer_view(buffer_view&&) = delete; + bool in_range(u32 address, u32 size, u32& offset) const + { + if (address < info.offset) + return false; + + const u32 _offset = address - (u32)info.offset; + if (info.range < _offset) + return false; + + const auto remaining = info.range - _offset; + if (size <= remaining) + { + offset = _offset; + return true; + } + + return false; + } + private: VkDevice m_device; }; diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index f1b0c73142..277d3ffcad 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -253,8 +253,7 @@ namespace }; } -std::tuple > > -VKGSRender::upload_vertex_data() +vk::vertex_upload_info VKGSRender::upload_vertex_data() { m_vertex_layout = analyse_inputs_interleaved(); @@ -266,11 +265,9 @@ VKGSRender::upload_vertex_data() //Do actual vertex upload auto required = calculate_memory_requirements(m_vertex_layout, vertex_count); + u32 persistent_range_base = UINT32_MAX, volatile_range_base = UINT32_MAX; size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX; - m_persistent_attribute_storage = VK_NULL_HANDLE; - m_volatile_attribute_storage = VK_NULL_HANDLE; - if (required.first > 0) { //Check if cacheable @@ -287,8 +284,7 @@ VKGSRender::upload_vertex_data() if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first)) { in_cache = true; - m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, - m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, cached->offset_in_heap, required.first)); + persistent_range_base = cached->offset_in_heap; } else { @@ -299,8 +295,7 @@ VKGSRender::upload_vertex_data() if (!in_cache) { persistent_offset = (u32)m_attrib_ring_info.alloc<256>(required.first); - m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, - m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_offset, required.first)); + persistent_range_base = (u32)persistent_offset; if (to_store) { @@ -308,25 +303,12 @@ VKGSRender::upload_vertex_data() m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, (u32)persistent_offset); } } - - m_persistent_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value; - } - else - { - m_persistent_attribute_storage = null_buffer_view->value; } if (required.second > 0) { volatile_offset = (u32)m_attrib_ring_info.alloc<256>(required.second); - m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, - m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second)); - - m_volatile_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value; - } - else - { - m_volatile_attribute_storage = null_buffer_view->value; + volatile_range_base = (u32)volatile_offset; } //Write all the data once if possible @@ -358,5 +340,32 @@ VKGSRender::upload_vertex_data() } } - return std::make_tuple(result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info); + if (persistent_range_base != UINT32_MAX) + { + if (!m_persistent_attribute_storage || !m_persistent_attribute_storage->in_range(persistent_range_base, required.first, persistent_range_base)) + { + if (m_persistent_attribute_storage) + m_current_frame->buffer_views_to_clean.push_back(std::move(m_persistent_attribute_storage)); + + //View 64M blocks at a time (different drivers will only allow a fixed viewable heap size, 64M should be safe) + const size_t view_size = (persistent_range_base + 0x4000000) > m_attrib_ring_info.size() ? m_attrib_ring_info.size() - persistent_range_base : 0x4000000; + m_persistent_attribute_storage = std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_range_base, view_size); + persistent_range_base = 0; + } + } + + if (volatile_range_base != UINT32_MAX) + { + if (!m_volatile_attribute_storage || !m_volatile_attribute_storage->in_range(volatile_range_base, required.second, volatile_range_base)) + { + if (m_volatile_attribute_storage) + m_current_frame->buffer_views_to_clean.push_back(std::move(m_volatile_attribute_storage)); + + const size_t view_size = (volatile_range_base + 0x4000000) > m_attrib_ring_info.size() ? m_attrib_ring_info.size() - volatile_range_base : 0x4000000; + m_volatile_attribute_storage = std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_range_base, view_size); + volatile_range_base = 0; + } + } + + return{ result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, persistent_range_base, volatile_range_base, result.index_info }; }