mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-15 09:40:05 +00:00
rsx/gl/vk: Invalidate texture regions when memory is unmapped
- Free GPU resources immediately if mappings change to avoid leaking VRAM
This commit is contained in:
parent
00b0311c86
commit
1da732bbf5
@ -6,6 +6,7 @@
|
||||
#include "Utilities/VirtualMemory.h"
|
||||
#include "Emu/CPU/CPUThread.h"
|
||||
#include "Emu/Cell/lv2/sys_memory.h"
|
||||
#include "Emu/RSX/GSRender.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
@ -656,6 +657,7 @@ namespace vm
|
||||
if (found != m_map.end())
|
||||
{
|
||||
const u32 size = found->second;
|
||||
const auto rsxthr = fxm::get<GSRender>();
|
||||
|
||||
// Remove entry
|
||||
m_map.erase(found);
|
||||
@ -663,6 +665,9 @@ namespace vm
|
||||
// Unmap "real" memory pages
|
||||
_page_unmap(addr, size);
|
||||
|
||||
// Notify rsx to invalidate range
|
||||
if (rsxthr != nullptr) rsxthr->on_notify_memory_unmapped(addr, size);
|
||||
|
||||
// Write supplementary info if necessary
|
||||
if (sup_out) *sup_out = m_sup[addr];
|
||||
|
||||
|
@ -1133,6 +1133,12 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
return m_gl_texture_cache.flush_section(address);
|
||||
}
|
||||
|
||||
void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
||||
{
|
||||
if (m_gl_texture_cache.invalidate_range(address_base, size, false))
|
||||
m_gl_texture_cache.purge_dirty();
|
||||
}
|
||||
|
||||
void GLGSRender::do_local_task()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_guard);
|
||||
|
@ -426,6 +426,7 @@ protected:
|
||||
u32 get_zcull_stats(u32 type) override;
|
||||
|
||||
bool on_access_violation(u32 address, bool is_writing) override;
|
||||
void on_notify_memory_unmapped(u32 address_base, u32 size) override;
|
||||
|
||||
virtual std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
|
||||
virtual std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override;
|
||||
|
@ -286,6 +286,10 @@ namespace gl
|
||||
|
||||
void destroy()
|
||||
{
|
||||
if (!locked && pbo_id == 0 && vram_texture == 0 && m_fence.is_empty())
|
||||
//Already destroyed
|
||||
return;
|
||||
|
||||
if (locked)
|
||||
unprotect();
|
||||
|
||||
@ -938,16 +942,21 @@ namespace gl
|
||||
}
|
||||
|
||||
bool mark_as_dirty(u32 address)
|
||||
{
|
||||
return invalidate_range(address, 4096 - (address & 4095));
|
||||
}
|
||||
|
||||
bool invalidate_range(u32 address, u32 size, bool unprotect=true)
|
||||
{
|
||||
bool response = false;
|
||||
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
|
||||
std::pair<u32, u32> trampled_range = std::make_pair(address, address + size);
|
||||
|
||||
//TODO: Optimize this function!
|
||||
//Multi-pass checking is slow. Pre-calculate dependency tree at section creation
|
||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
|
||||
|
||||
if (address >= read_only_range.first &&
|
||||
address < read_only_range.second)
|
||||
if (trampled_range.second >= read_only_range.first &&
|
||||
trampled_range.first < read_only_range.second)
|
||||
{
|
||||
for (int i = 0; i < read_only_memory_sections.size(); ++i)
|
||||
{
|
||||
@ -966,15 +975,24 @@ namespace gl
|
||||
i = 0;
|
||||
}
|
||||
|
||||
tex.unprotect();
|
||||
tex.set_dirty(true);
|
||||
if (unprotect)
|
||||
{
|
||||
tex.unprotect();
|
||||
tex.set_dirty(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
//abandon memory
|
||||
tex.discard();
|
||||
}
|
||||
|
||||
response = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (address >= no_access_range.first &&
|
||||
address < no_access_range.second)
|
||||
if (trampled_range.second >= no_access_range.first &&
|
||||
trampled_range.first < no_access_range.second)
|
||||
{
|
||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
|
||||
|
||||
@ -995,8 +1013,16 @@ namespace gl
|
||||
i = 0;
|
||||
}
|
||||
|
||||
tex.unprotect();
|
||||
tex.set_dirty(true);
|
||||
if (unprotect)
|
||||
{
|
||||
tex.unprotect();
|
||||
tex.set_dirty(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_WARNING(RSX, "Framebuffer region 0x%X -> 0x%X is being discarded", tex.get_section_base(), tex.get_section_base() + tex.get_section_size());
|
||||
tex.discard();
|
||||
}
|
||||
|
||||
response = true;
|
||||
}
|
||||
@ -1006,35 +1032,6 @@ namespace gl
|
||||
return response;
|
||||
}
|
||||
|
||||
void invalidate_range(u32 base, u32 size)
|
||||
{
|
||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
|
||||
std::pair<u32, u32> range = std::make_pair(base, size);
|
||||
|
||||
if (base < read_only_range.second &&
|
||||
(base + size) >= read_only_range.first)
|
||||
{
|
||||
for (cached_texture_section &tex : read_only_memory_sections)
|
||||
{
|
||||
if (!tex.is_dirty() && tex.overlaps(range))
|
||||
tex.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
if (base < no_access_range.second &&
|
||||
(base + size) >= no_access_range.first)
|
||||
{
|
||||
for (cached_texture_section &tex : no_access_memory_sections)
|
||||
{
|
||||
if (!tex.is_dirty() && tex.overlaps(range))
|
||||
{
|
||||
tex.unprotect();
|
||||
tex.set_dirty(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool flush_section(u32 address);
|
||||
|
||||
void clear_temporary_surfaces()
|
||||
@ -1047,6 +1044,19 @@ namespace gl
|
||||
m_temporary_surfaces.clear();
|
||||
}
|
||||
|
||||
void purge_dirty()
|
||||
{
|
||||
reader_lock lock(m_section_mutex);
|
||||
|
||||
for (cached_texture_section &tex : read_only_memory_sections)
|
||||
{
|
||||
if (tex.is_dirty())
|
||||
{
|
||||
tex.destroy();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts)
|
||||
{
|
||||
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag
|
||||
|
@ -973,7 +973,7 @@ namespace rsx
|
||||
|
||||
if (vertex_push_buffers[index].size > 0)
|
||||
{
|
||||
std::pair<u8, u32> volatile_range_info = std::make_pair(index, vertex_push_buffers[index].data.size() * (u32)sizeof(u32));
|
||||
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
|
||||
result.volatile_blocks.push_back(volatile_range_info);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
continue;
|
||||
|
@ -253,6 +253,7 @@ namespace rsx
|
||||
virtual void flip(int buffer) = 0;
|
||||
virtual u64 timestamp() const;
|
||||
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
|
||||
virtual void on_notify_memory_unmapped(u32 /*address_base*/, u32 /*size*/) {}
|
||||
|
||||
//zcull
|
||||
virtual void notify_zcull_info_changed() {}
|
||||
|
@ -578,6 +578,9 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
|
||||
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
|
||||
|
||||
//Empty view to bind to buffer locations without data
|
||||
m_null_buffer_view.reset(new vk::buffer_view(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
|
||||
|
||||
m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats);
|
||||
|
||||
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
|
||||
@ -658,6 +661,7 @@ VKGSRender::~VKGSRender()
|
||||
vk::destroy_global_resources();
|
||||
|
||||
//Data heaps/buffers
|
||||
m_null_buffer_view.reset();
|
||||
m_index_buffer_ring_info.heap.reset();
|
||||
m_uniform_buffer_ring_info.heap.reset();
|
||||
m_attrib_ring_info.heap.reset();
|
||||
@ -791,6 +795,12 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
return false;
|
||||
}
|
||||
|
||||
void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
||||
{
|
||||
if (m_texture_cache.invalidate_range(address_base, size, false))
|
||||
m_texture_cache.flush(true);
|
||||
}
|
||||
|
||||
void VKGSRender::begin()
|
||||
{
|
||||
rsx::thread::begin();
|
||||
|
@ -129,11 +129,12 @@ private:
|
||||
|
||||
vk::render_device *m_device;
|
||||
vk::swap_chain* m_swap_chain;
|
||||
//buffer
|
||||
|
||||
//buffer
|
||||
vk::vk_data_heap m_uniform_buffer_ring_info;
|
||||
vk::vk_data_heap m_index_buffer_ring_info;
|
||||
vk::vk_data_heap m_texture_upload_buffer_ring_info;
|
||||
std::unique_ptr<vk::buffer_view> m_null_buffer_view;
|
||||
|
||||
//Vulkan internals
|
||||
vk::command_pool m_command_buffer_pool;
|
||||
@ -263,4 +264,5 @@ protected:
|
||||
void do_local_task() override;
|
||||
|
||||
bool on_access_violation(u32 address, bool is_writing) override;
|
||||
void on_notify_memory_unmapped(u32 address_base, u32 size) override;
|
||||
};
|
||||
|
@ -837,17 +837,23 @@ namespace vk
|
||||
|
||||
bool invalidate_address(u32 address)
|
||||
{
|
||||
if (address < read_only_range.first ||
|
||||
address > read_only_range.second)
|
||||
return invalidate_range(address, 4096 - (address & 4095));
|
||||
}
|
||||
|
||||
bool invalidate_range(u32 address, u32 range, bool unprotect=true)
|
||||
{
|
||||
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
|
||||
|
||||
if (trampled_range.second < read_only_range.first ||
|
||||
trampled_range.first > read_only_range.second)
|
||||
{
|
||||
//Doesnt fall in the read_only textures range; check render targets
|
||||
if (address < no_access_range.first ||
|
||||
address > no_access_range.second)
|
||||
if (trampled_range.second < no_access_range.first ||
|
||||
trampled_range.first > no_access_range.second)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool response = false;
|
||||
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
|
||||
std::unordered_map<u32, bool> processed_ranges;
|
||||
|
||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
|
||||
@ -865,8 +871,7 @@ namespace vk
|
||||
const u32 lock_base = base & ~0xfff;
|
||||
const u32 lock_limit = align(range_data.max_range + base, 4096);
|
||||
|
||||
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
|
||||
(lock_base > address || lock_limit <= address))
|
||||
if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second)
|
||||
{
|
||||
processed_ranges[base] = true;
|
||||
continue;
|
||||
@ -892,8 +897,15 @@ namespace vk
|
||||
range_reset = true;
|
||||
}
|
||||
|
||||
tex.set_dirty(true);
|
||||
tex.unprotect();
|
||||
if (unprotect)
|
||||
{
|
||||
tex.set_dirty(true);
|
||||
tex.unprotect();
|
||||
}
|
||||
else
|
||||
{
|
||||
tex.discard();
|
||||
}
|
||||
|
||||
range_data.valid_count--;
|
||||
response = true;
|
||||
@ -912,8 +924,30 @@ namespace vk
|
||||
return response;
|
||||
}
|
||||
|
||||
void flush()
|
||||
void flush(bool purge_dirty=false)
|
||||
{
|
||||
if (purge_dirty)
|
||||
{
|
||||
//Reclaims all graphics memory consumed by dirty textures
|
||||
for (auto &address_range : m_cache)
|
||||
{
|
||||
auto &range_data = address_range.second;
|
||||
for (auto &tex : range_data.data)
|
||||
{
|
||||
if (!tex.is_dirty())
|
||||
continue;
|
||||
|
||||
if (tex.exists())
|
||||
{
|
||||
m_dirty_textures.push_back(std::move(tex.get_texture()));
|
||||
m_temporary_image_view.push_back(std::move(tex.get_view()));
|
||||
}
|
||||
|
||||
tex.release_dma_resources();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_image_views_to_purge.clear();
|
||||
m_images_to_purge.clear();
|
||||
|
||||
|
@ -254,8 +254,7 @@ VKGSRender::upload_vertex_data()
|
||||
}
|
||||
else
|
||||
{
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
|
||||
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
persistent_view = m_null_buffer_view->value;
|
||||
}
|
||||
|
||||
if (required.second > 0)
|
||||
@ -267,8 +266,7 @@ VKGSRender::upload_vertex_data()
|
||||
}
|
||||
else
|
||||
{
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
|
||||
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
volatile_view = m_null_buffer_view->value;
|
||||
}
|
||||
|
||||
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);
|
||||
|
@ -115,6 +115,13 @@ namespace rsx
|
||||
locked = false;
|
||||
}
|
||||
|
||||
void discard()
|
||||
{
|
||||
protection = utils::protection::rw;
|
||||
dirty = true;
|
||||
locked = false;
|
||||
}
|
||||
|
||||
bool overlaps(std::pair<u32, u32> range)
|
||||
{
|
||||
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
||||
|
Loading…
x
Reference in New Issue
Block a user