rsx/gl/vk: Invalidate texture regions when memory is unmapped

- Free GPU resources immediately if mappings change to avoid leaking VRAM
This commit is contained in:
kd-11 2017-08-08 00:54:40 +03:00
parent 00b0311c86
commit 1da732bbf5
11 changed files with 128 additions and 54 deletions

View File

@ -6,6 +6,7 @@
#include "Utilities/VirtualMemory.h"
#include "Emu/CPU/CPUThread.h"
#include "Emu/Cell/lv2/sys_memory.h"
#include "Emu/RSX/GSRender.h"
#ifdef _WIN32
#include <Windows.h>
@ -656,6 +657,7 @@ namespace vm
if (found != m_map.end())
{
const u32 size = found->second;
const auto rsxthr = fxm::get<GSRender>();
// Remove entry
m_map.erase(found);
@ -663,6 +665,9 @@ namespace vm
// Unmap "real" memory pages
_page_unmap(addr, size);
// Notify rsx to invalidate range
if (rsxthr != nullptr) rsxthr->on_notify_memory_unmapped(addr, size);
// Write supplementary info if necessary
if (sup_out) *sup_out = m_sup[addr];

View File

@ -1133,6 +1133,12 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
return m_gl_texture_cache.flush_section(address);
}
void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
{
if (m_gl_texture_cache.invalidate_range(address_base, size, false))
m_gl_texture_cache.purge_dirty();
}
void GLGSRender::do_local_task()
{
std::lock_guard<std::mutex> lock(queue_guard);

View File

@ -426,6 +426,7 @@ protected:
u32 get_zcull_stats(u32 type) override;
bool on_access_violation(u32 address, bool is_writing) override;
void on_notify_memory_unmapped(u32 address_base, u32 size) override;
virtual std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
virtual std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override;

View File

@ -286,6 +286,10 @@ namespace gl
void destroy()
{
if (!locked && pbo_id == 0 && vram_texture == 0 && m_fence.is_empty())
//Already destroyed
return;
if (locked)
unprotect();
@ -938,16 +942,21 @@ namespace gl
}
bool mark_as_dirty(u32 address)
{
return invalidate_range(address, 4096 - (address & 4095));
}
bool invalidate_range(u32 address, u32 size, bool unprotect=true)
{
bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
std::pair<u32, u32> trampled_range = std::make_pair(address, address + size);
//TODO: Optimize this function!
//Multi-pass checking is slow. Pre-calculate dependency tree at section creation
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
if (address >= read_only_range.first &&
address < read_only_range.second)
if (trampled_range.second >= read_only_range.first &&
trampled_range.first < read_only_range.second)
{
for (int i = 0; i < read_only_memory_sections.size(); ++i)
{
@ -966,15 +975,24 @@ namespace gl
i = 0;
}
tex.unprotect();
tex.set_dirty(true);
if (unprotect)
{
tex.unprotect();
tex.set_dirty(true);
}
else
{
//abandon memory
tex.discard();
}
response = true;
}
}
}
if (address >= no_access_range.first &&
address < no_access_range.second)
if (trampled_range.second >= no_access_range.first &&
trampled_range.first < no_access_range.second)
{
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
@ -995,8 +1013,16 @@ namespace gl
i = 0;
}
tex.unprotect();
tex.set_dirty(true);
if (unprotect)
{
tex.unprotect();
tex.set_dirty(true);
}
else
{
LOG_WARNING(RSX, "Framebuffer region 0x%X -> 0x%X is being discarded", tex.get_section_base(), tex.get_section_base() + tex.get_section_size());
tex.discard();
}
response = true;
}
@ -1006,35 +1032,6 @@ namespace gl
return response;
}
void invalidate_range(u32 base, u32 size)
{
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
std::pair<u32, u32> range = std::make_pair(base, size);
if (base < read_only_range.second &&
(base + size) >= read_only_range.first)
{
for (cached_texture_section &tex : read_only_memory_sections)
{
if (!tex.is_dirty() && tex.overlaps(range))
tex.destroy();
}
}
if (base < no_access_range.second &&
(base + size) >= no_access_range.first)
{
for (cached_texture_section &tex : no_access_memory_sections)
{
if (!tex.is_dirty() && tex.overlaps(range))
{
tex.unprotect();
tex.set_dirty(true);
}
}
}
}
bool flush_section(u32 address);
void clear_temporary_surfaces()
@ -1047,6 +1044,19 @@ namespace gl
m_temporary_surfaces.clear();
}
void purge_dirty()
{
reader_lock lock(m_section_mutex);
for (cached_texture_section &tex : read_only_memory_sections)
{
if (tex.is_dirty())
{
tex.destroy();
}
}
}
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts)
{
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag

View File

@ -973,7 +973,7 @@ namespace rsx
if (vertex_push_buffers[index].size > 0)
{
std::pair<u8, u32> volatile_range_info = std::make_pair(index, vertex_push_buffers[index].data.size() * (u32)sizeof(u32));
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
result.volatile_blocks.push_back(volatile_range_info);
result.attribute_placement[index] = attribute_buffer_placement::transient;
continue;

View File

@ -253,6 +253,7 @@ namespace rsx
virtual void flip(int buffer) = 0;
virtual u64 timestamp() const;
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
virtual void on_notify_memory_unmapped(u32 /*address_base*/, u32 /*size*/) {}
//zcull
virtual void notify_zcull_info_changed() {}

View File

@ -578,6 +578,9 @@ VKGSRender::VKGSRender() : GSRender()
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
//Empty view to bind to buffer locations without data
m_null_buffer_view.reset(new vk::buffer_view(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats);
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
@ -658,6 +661,7 @@ VKGSRender::~VKGSRender()
vk::destroy_global_resources();
//Data heaps/buffers
m_null_buffer_view.reset();
m_index_buffer_ring_info.heap.reset();
m_uniform_buffer_ring_info.heap.reset();
m_attrib_ring_info.heap.reset();
@ -791,6 +795,12 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
return false;
}
void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
{
if (m_texture_cache.invalidate_range(address_base, size, false))
m_texture_cache.flush(true);
}
void VKGSRender::begin()
{
rsx::thread::begin();

View File

@ -129,11 +129,12 @@ private:
vk::render_device *m_device;
vk::swap_chain* m_swap_chain;
//buffer
//buffer
vk::vk_data_heap m_uniform_buffer_ring_info;
vk::vk_data_heap m_index_buffer_ring_info;
vk::vk_data_heap m_texture_upload_buffer_ring_info;
std::unique_ptr<vk::buffer_view> m_null_buffer_view;
//Vulkan internals
vk::command_pool m_command_buffer_pool;
@ -263,4 +264,5 @@ protected:
void do_local_task() override;
bool on_access_violation(u32 address, bool is_writing) override;
void on_notify_memory_unmapped(u32 address_base, u32 size) override;
};

View File

@ -837,17 +837,23 @@ namespace vk
bool invalidate_address(u32 address)
{
if (address < read_only_range.first ||
address > read_only_range.second)
return invalidate_range(address, 4096 - (address & 4095));
}
bool invalidate_range(u32 address, u32 range, bool unprotect=true)
{
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
if (trampled_range.second < read_only_range.first ||
trampled_range.first > read_only_range.second)
{
//Doesnt fall in the read_only textures range; check render targets
if (address < no_access_range.first ||
address > no_access_range.second)
if (trampled_range.second < no_access_range.first ||
trampled_range.first > no_access_range.second)
return false;
}
bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
std::unordered_map<u32, bool> processed_ranges;
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
@ -865,8 +871,7 @@ namespace vk
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
(lock_base > address || lock_limit <= address))
if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second)
{
processed_ranges[base] = true;
continue;
@ -892,8 +897,15 @@ namespace vk
range_reset = true;
}
tex.set_dirty(true);
tex.unprotect();
if (unprotect)
{
tex.set_dirty(true);
tex.unprotect();
}
else
{
tex.discard();
}
range_data.valid_count--;
response = true;
@ -912,8 +924,30 @@ namespace vk
return response;
}
void flush()
void flush(bool purge_dirty=false)
{
if (purge_dirty)
{
//Reclaims all graphics memory consumed by dirty textures
for (auto &address_range : m_cache)
{
auto &range_data = address_range.second;
for (auto &tex : range_data.data)
{
if (!tex.is_dirty())
continue;
if (tex.exists())
{
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
tex.release_dma_resources();
}
}
}
m_image_views_to_purge.clear();
m_images_to_purge.clear();

View File

@ -254,8 +254,7 @@ VKGSRender::upload_vertex_data()
}
else
{
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
persistent_view = m_null_buffer_view->value;
}
if (required.second > 0)
@ -267,8 +266,7 @@ VKGSRender::upload_vertex_data()
}
else
{
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
volatile_view = m_null_buffer_view->value;
}
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);

View File

@ -115,6 +115,13 @@ namespace rsx
locked = false;
}
void discard()
{
protection = utils::protection::rw;
dirty = true;
locked = false;
}
bool overlaps(std::pair<u32, u32> range)
{
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);