From 1da732bbf5841eca80a4c8a3958ae76e94203cb6 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 8 Aug 2017 00:54:40 +0300 Subject: [PATCH] rsx/gl/vk: Invalidate texture regions when memory is unmapped - Free GPU resources immediately if mappings change to avoid leaking VRAM --- rpcs3/Emu/Memory/vm.cpp | 5 ++ rpcs3/Emu/RSX/GL/GLGSRender.cpp | 6 ++ rpcs3/Emu/RSX/GL/GLGSRender.h | 1 + rpcs3/Emu/RSX/GL/GLTextureCache.h | 86 ++++++++++++++++------------ rpcs3/Emu/RSX/RSXThread.cpp | 2 +- rpcs3/Emu/RSX/RSXThread.h | 1 + rpcs3/Emu/RSX/VK/VKGSRender.cpp | 10 ++++ rpcs3/Emu/RSX/VK/VKGSRender.h | 4 +- rpcs3/Emu/RSX/VK/VKTextureCache.h | 54 +++++++++++++---- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 6 +- rpcs3/Emu/RSX/rsx_cache.h | 7 +++ 11 files changed, 128 insertions(+), 54 deletions(-) diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 2dae537d84..e1717118c7 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -6,6 +6,7 @@ #include "Utilities/VirtualMemory.h" #include "Emu/CPU/CPUThread.h" #include "Emu/Cell/lv2/sys_memory.h" +#include "Emu/RSX/GSRender.h" #ifdef _WIN32 #include @@ -656,6 +657,7 @@ namespace vm if (found != m_map.end()) { const u32 size = found->second; + const auto rsxthr = fxm::get(); // Remove entry m_map.erase(found); @@ -663,6 +665,9 @@ namespace vm // Unmap "real" memory pages _page_unmap(addr, size); + // Notify rsx to invalidate range + if (rsxthr != nullptr) rsxthr->on_notify_memory_unmapped(addr, size); + // Write supplementary info if necessary if (sup_out) *sup_out = m_sup[addr]; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index e90ec002cf..bb2dc3c7a9 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1133,6 +1133,12 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) return m_gl_texture_cache.flush_section(address); } +void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) +{ + if (m_gl_texture_cache.invalidate_range(address_base, size, false)) + m_gl_texture_cache.purge_dirty(); +} + void GLGSRender::do_local_task() { std::lock_guard lock(queue_guard); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 39d9033921..fdbb45aa60 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -426,6 +426,7 @@ protected: u32 get_zcull_stats(u32 type) override; bool on_access_violation(u32 address, bool is_writing) override; + void on_notify_memory_unmapped(u32 address_base, u32 size) override; virtual std::array, 4> copy_render_targets_to_memory() override; virtual std::array, 2> copy_depth_stencil_buffer_to_memory() override; diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 1c55f0d102..87c90bfb1b 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -286,6 +286,10 @@ namespace gl void destroy() { + if (!locked && pbo_id == 0 && vram_texture == 0 && m_fence.is_empty()) + //Already destroyed + return; + if (locked) unprotect(); @@ -938,16 +942,21 @@ namespace gl } bool mark_as_dirty(u32 address) + { + return invalidate_range(address, 4096 - (address & 4095)); + } + + bool invalidate_range(u32 address, u32 size, bool unprotect=true) { bool response = false; - std::pair trampled_range = std::make_pair(0xffffffff, 0x0); + std::pair trampled_range = std::make_pair(address, address + size); //TODO: Optimize this function! //Multi-pass checking is slow. Pre-calculate dependency tree at section creation rsx::conditional_lock lock(in_access_violation_handler, m_section_mutex); - if (address >= read_only_range.first && - address < read_only_range.second) + if (trampled_range.second >= read_only_range.first && + trampled_range.first < read_only_range.second) { for (int i = 0; i < read_only_memory_sections.size(); ++i) { @@ -966,15 +975,24 @@ namespace gl i = 0; } - tex.unprotect(); - tex.set_dirty(true); + if (unprotect) + { + tex.unprotect(); + tex.set_dirty(true); + } + else + { + //abandon memory + tex.discard(); + } + response = true; } } } - if (address >= no_access_range.first && - address < no_access_range.second) + if (trampled_range.second >= no_access_range.first && + trampled_range.first < no_access_range.second) { rsx::conditional_lock lock(in_access_violation_handler, m_section_mutex); @@ -995,8 +1013,16 @@ namespace gl i = 0; } - tex.unprotect(); - tex.set_dirty(true); + if (unprotect) + { + tex.unprotect(); + tex.set_dirty(true); + } + else + { + LOG_WARNING(RSX, "Framebuffer region 0x%X -> 0x%X is being discarded", tex.get_section_base(), tex.get_section_base() + tex.get_section_size()); + tex.discard(); + } response = true; } @@ -1006,35 +1032,6 @@ namespace gl return response; } - void invalidate_range(u32 base, u32 size) - { - rsx::conditional_lock lock(in_access_violation_handler, m_section_mutex); - std::pair range = std::make_pair(base, size); - - if (base < read_only_range.second && - (base + size) >= read_only_range.first) - { - for (cached_texture_section &tex : read_only_memory_sections) - { - if (!tex.is_dirty() && tex.overlaps(range)) - tex.destroy(); - } - } - - if (base < no_access_range.second && - (base + size) >= no_access_range.first) - { - for (cached_texture_section &tex : no_access_memory_sections) - { - if (!tex.is_dirty() && tex.overlaps(range)) - { - tex.unprotect(); - tex.set_dirty(true); - } - } - } - } - bool flush_section(u32 address); void clear_temporary_surfaces() @@ -1047,6 +1044,19 @@ namespace gl m_temporary_surfaces.clear(); } + void purge_dirty() + { + reader_lock lock(m_section_mutex); + + for (cached_texture_section &tex : read_only_memory_sections) + { + if (tex.is_dirty()) + { + tex.destroy(); + } + } + } + bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts) { //Since we will have dst in vram, we can 'safely' ignore the swizzle flag diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index fc0d4002c7..e7267e588e 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -973,7 +973,7 @@ namespace rsx if (vertex_push_buffers[index].size > 0) { - std::pair volatile_range_info = std::make_pair(index, vertex_push_buffers[index].data.size() * (u32)sizeof(u32)); + std::pair volatile_range_info = std::make_pair(index, static_cast(vertex_push_buffers[index].data.size() * sizeof(u32))); result.volatile_blocks.push_back(volatile_range_info); result.attribute_placement[index] = attribute_buffer_placement::transient; continue; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 9489e0deea..07beecee33 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -253,6 +253,7 @@ namespace rsx virtual void flip(int buffer) = 0; virtual u64 timestamp() const; virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; } + virtual void on_notify_memory_unmapped(u32 /*address_base*/, u32 /*size*/) {} //zcull virtual void notify_zcull_info_changed() {} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index abb10b96b8..17aec9c3af 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -578,6 +578,9 @@ VKGSRender::VKGSRender() : GSRender() m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000); m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); + //Empty view to bind to buffer locations without data + m_null_buffer_view.reset(new vk::buffer_view(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0)); + m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats); std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device); @@ -658,6 +661,7 @@ VKGSRender::~VKGSRender() vk::destroy_global_resources(); //Data heaps/buffers + m_null_buffer_view.reset(); m_index_buffer_ring_info.heap.reset(); m_uniform_buffer_ring_info.heap.reset(); m_attrib_ring_info.heap.reset(); @@ -791,6 +795,12 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) return false; } +void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) +{ + if (m_texture_cache.invalidate_range(address_base, size, false)) + m_texture_cache.flush(true); +} + void VKGSRender::begin() { rsx::thread::begin(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 53865c7880..64775db26f 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -129,11 +129,12 @@ private: vk::render_device *m_device; vk::swap_chain* m_swap_chain; - //buffer + //buffer vk::vk_data_heap m_uniform_buffer_ring_info; vk::vk_data_heap m_index_buffer_ring_info; vk::vk_data_heap m_texture_upload_buffer_ring_info; + std::unique_ptr m_null_buffer_view; //Vulkan internals vk::command_pool m_command_buffer_pool; @@ -263,4 +264,5 @@ protected: void do_local_task() override; bool on_access_violation(u32 address, bool is_writing) override; + void on_notify_memory_unmapped(u32 address_base, u32 size) override; }; diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index a1387930fa..993da1a00d 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -837,17 +837,23 @@ namespace vk bool invalidate_address(u32 address) { - if (address < read_only_range.first || - address > read_only_range.second) + return invalidate_range(address, 4096 - (address & 4095)); + } + + bool invalidate_range(u32 address, u32 range, bool unprotect=true) + { + std::pair trampled_range = std::make_pair(address, address + range); + + if (trampled_range.second < read_only_range.first || + trampled_range.first > read_only_range.second) { //Doesnt fall in the read_only textures range; check render targets - if (address < no_access_range.first || - address > no_access_range.second) + if (trampled_range.second < no_access_range.first || + trampled_range.first > no_access_range.second) return false; } bool response = false; - std::pair trampled_range = std::make_pair(0xffffffff, 0x0); std::unordered_map processed_ranges; rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); @@ -865,8 +871,7 @@ namespace vk const u32 lock_base = base & ~0xfff; const u32 lock_limit = align(range_data.max_range + base, 4096); - if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) && - (lock_base > address || lock_limit <= address)) + if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second) { processed_ranges[base] = true; continue; @@ -892,8 +897,15 @@ namespace vk range_reset = true; } - tex.set_dirty(true); - tex.unprotect(); + if (unprotect) + { + tex.set_dirty(true); + tex.unprotect(); + } + else + { + tex.discard(); + } range_data.valid_count--; response = true; @@ -912,8 +924,30 @@ namespace vk return response; } - void flush() + void flush(bool purge_dirty=false) { + if (purge_dirty) + { + //Reclaims all graphics memory consumed by dirty textures + for (auto &address_range : m_cache) + { + auto &range_data = address_range.second; + for (auto &tex : range_data.data) + { + if (!tex.is_dirty()) + continue; + + if (tex.exists()) + { + m_dirty_textures.push_back(std::move(tex.get_texture())); + m_temporary_image_view.push_back(std::move(tex.get_view())); + } + + tex.release_dma_resources(); + } + } + } + m_image_views_to_purge.clear(); m_images_to_purge.clear(); diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 6895c2db8b..c91f3656a5 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -254,8 +254,7 @@ VKGSRender::upload_vertex_data() } else { - m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0)); - persistent_view = m_current_frame->buffer_views_to_clean.back()->value; + persistent_view = m_null_buffer_view->value; } if (required.second > 0) @@ -267,8 +266,7 @@ VKGSRender::upload_vertex_data() } else { - m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0)); - volatile_view = m_current_frame->buffer_views_to_clean.back()->value; + volatile_view = m_null_buffer_view->value; } m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set); diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 01952df834..0e296f0bac 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -115,6 +115,13 @@ namespace rsx locked = false; } + void discard() + { + protection = utils::protection::rw; + dirty = true; + locked = false; + } + bool overlaps(std::pair range) { return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);