mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-06 00:40:11 +00:00
vk: Implement VRAM spilling
- The idea is to shift memory to "shared graphics memory" when VRAM is running out
This commit is contained in:
parent
000414c47d
commit
c18e5e07cc
@ -49,7 +49,10 @@ namespace rsx
|
||||
|
||||
// Arbitrary r/w flags, use with caution.
|
||||
memory_write = 8,
|
||||
memory_read = 16
|
||||
memory_read = 16,
|
||||
|
||||
// Not r/w but signifies a GPU reference to this object.
|
||||
gpu_reference = 32
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -112,7 +112,7 @@ void VKGSRender::advance_queued_frames()
|
||||
vk::vmm_check_memory_usage();
|
||||
|
||||
// m_rtts storage is double buffered and should be safe to tag on frame boundary
|
||||
m_rtts.free_invalidated(*m_current_command_buffer);
|
||||
m_rtts.free_invalidated(*m_current_command_buffer, vk::vmm_determine_memory_load_severity());
|
||||
|
||||
// Texture cache is also double buffered to prevent use-after-free
|
||||
m_texture_cache.on_frame_end();
|
||||
|
@ -52,7 +52,59 @@ namespace vk
|
||||
|
||||
if (severity >= rsx::problem_severity::fatal)
|
||||
{
|
||||
// TODO
|
||||
// Drop MSAA resolve/unresolve caches. Only trigger when a hard sync is guaranteed to follow else it will cause even more problems!
|
||||
auto relieve_memory_pressure = [&](const auto& list)
|
||||
{
|
||||
// 2-pass to ensure resources are available where they are most needed
|
||||
std::vector<std::unique_ptr<vk::viewable_image>> resolve_target_cache;
|
||||
std::vector<vk::render_target*> deferred_spills;
|
||||
auto gc = vk::get_resource_manager();
|
||||
|
||||
// 1. Scan the list and spill resources that can be spilled immediately if requested. Also gather resources from those that don't need it.
|
||||
for (auto& surface : list)
|
||||
{
|
||||
auto& rtt = surface.second;
|
||||
if (!rtt->spill_request_tag || rtt->spill_request_tag < surface.second->last_rw_access_tag)
|
||||
{
|
||||
// We're not going to be spilling into system RAM. If a MSAA resolve target exists, remove it to save memory.
|
||||
if (rtt->resolve_surface)
|
||||
{
|
||||
resolve_target_cache.emplace_back(std::move(rtt->resolve_surface));
|
||||
rtt->msaa_flags |= rsx::surface_state_flags::require_resolve;
|
||||
any_released |= true;
|
||||
}
|
||||
|
||||
rtt->spill_request_tag = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rtt->resolve_surface || rtt->samples() == 1)
|
||||
{
|
||||
// Can spill immediately. Do it.
|
||||
rtt->spill(cmd, resolve_target_cache);
|
||||
any_released |= true;
|
||||
continue;
|
||||
}
|
||||
|
||||
deferred_spills.push_back(rtt.get());
|
||||
}
|
||||
|
||||
// 2. We should have enough discarded reusable memory for the second pass.
|
||||
for (auto& surface : deferred_spills)
|
||||
{
|
||||
surface->spill(cmd, resolve_target_cache);
|
||||
any_released |= true;
|
||||
}
|
||||
|
||||
// 3. Discard the now-useless resolve cache memory
|
||||
for (auto& data : resolve_target_cache)
|
||||
{
|
||||
gc->dispose(data);
|
||||
}
|
||||
};
|
||||
|
||||
relieve_memory_pressure(m_render_targets_storage);
|
||||
relieve_memory_pressure(m_depth_stencil_storage);
|
||||
}
|
||||
|
||||
return any_released;
|
||||
@ -123,6 +175,63 @@ namespace vk
|
||||
return (surface_cache_vram_load > surface_cache_allocation_quota);
|
||||
}
|
||||
|
||||
bool surface_cache::spill_unused_memory()
|
||||
{
|
||||
// Determine how much memory we need to save to system RAM if any
|
||||
const u64 current_surface_cache_memory = vk::vmm_get_application_pool_usage(VMM_ALLOCATION_POOL_SURFACE_CACHE);
|
||||
const u64 total_device_memory = vk::get_current_renderer()->get_memory_mapping().device_local_total_bytes;
|
||||
const u64 target_memory = get_surface_cache_memory_quota(total_device_memory);
|
||||
|
||||
rsx_log.warning("Surface cache memory usage is %lluM", current_surface_cache_memory / 0x100000);
|
||||
if (current_surface_cache_memory < target_memory)
|
||||
{
|
||||
rsx_log.warning("Surface cache memory usage is very low. Will not spill contents to RAM");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Very slow, but should only be called when the situation is dire
|
||||
std::vector<render_target*> sorted_list;
|
||||
sorted_list.reserve(m_render_targets_storage.size() + m_depth_stencil_storage.size());
|
||||
|
||||
auto process_list_function = [&](const auto& list)
|
||||
{
|
||||
for (auto& surface : list)
|
||||
{
|
||||
if (surface.second->value && !surface.second->is_bound)
|
||||
{
|
||||
sorted_list.push_back(surface.second.get());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
process_list_function(m_render_targets_storage);
|
||||
process_list_function(m_depth_stencil_storage);
|
||||
|
||||
std::sort(sorted_list.begin(), sorted_list.end(), [](const auto& a, const auto& b)
|
||||
{
|
||||
return a->last_rw_access_tag < b->last_rw_access_tag;
|
||||
});
|
||||
|
||||
// Remove upto target_memory bytes from VRAM
|
||||
u64 bytes_spilled = 0;
|
||||
const u64 bytes_to_remove = current_surface_cache_memory - target_memory;
|
||||
const u64 spill_time = rsx::get_shared_tag();
|
||||
|
||||
for (auto& surface : sorted_list)
|
||||
{
|
||||
bytes_spilled += surface->memory->size();
|
||||
surface->spill_request_tag = spill_time;
|
||||
|
||||
if (bytes_spilled >= bytes_to_remove)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rsx_log.warning("Surface cache will attempt to spill %llu bytes.", bytes_spilled);
|
||||
return (bytes_spilled > 0);
|
||||
}
|
||||
|
||||
// Get the linear resolve target bound to this surface. Initialize if none exists
|
||||
vk::viewable_image* render_target::get_resolve_target_safe(vk::command_buffer& cmd)
|
||||
{
|
||||
@ -334,6 +443,156 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<VkBufferImageCopy> render_target::build_spill_transfer_descriptors(vk::image* target)
|
||||
{
|
||||
std::vector<VkBufferImageCopy> result;
|
||||
result.reserve(2);
|
||||
|
||||
result.push_back({});
|
||||
auto& rgn = result.back();
|
||||
rgn.imageExtent.width = target->width();
|
||||
rgn.imageExtent.height = target->height();
|
||||
rgn.imageExtent.depth = 1;
|
||||
rgn.imageSubresource.aspectMask = target->aspect();
|
||||
rgn.imageSubresource.layerCount = 1;
|
||||
|
||||
if (aspect() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
|
||||
{
|
||||
result.push_back(rgn);
|
||||
rgn.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
result.back().imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
result.back().bufferOffset = target->width() * target->height() * 4;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void render_target::spill(vk::command_buffer& cmd, std::vector<std::unique_ptr<vk::viewable_image>>& resolve_cache)
|
||||
{
|
||||
ensure(value);
|
||||
|
||||
u64 element_size;
|
||||
switch (const auto fmt = format())
|
||||
{
|
||||
case VK_FORMAT_D32_SFLOAT:
|
||||
element_size = 4;
|
||||
break;
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
element_size = 5;
|
||||
break;
|
||||
default:
|
||||
element_size = get_format_texel_width(fmt);
|
||||
}
|
||||
|
||||
vk::image* src = nullptr;
|
||||
if (samples() == 1) [[likely]]
|
||||
{
|
||||
src = this;
|
||||
}
|
||||
else if (resolve_surface)
|
||||
{
|
||||
src = resolve_surface.get();
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto transfer_w = width() * samples_x;
|
||||
const auto transfer_h = height() * samples_y;
|
||||
|
||||
for (auto& surface : resolve_cache)
|
||||
{
|
||||
if (surface->format() == format() &&
|
||||
surface->width() == transfer_w &&
|
||||
surface->height() == transfer_h)
|
||||
{
|
||||
src = surface.get();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!src)
|
||||
{
|
||||
if (vmm_determine_memory_load_severity() <= rsx::problem_severity::moderate)
|
||||
{
|
||||
// We have some freedom to allocate something. Add to the shared cache
|
||||
src = get_resolve_target_safe(cmd);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: Spill to DMA buf
|
||||
// For now, just skip this one if we don't have the capacity for it
|
||||
rsx_log.warning("Could not spill memory due to resolve failure. Will ignore spilling for the moment.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
msaa_flags |= rsx::surface_state_flags::require_resolve;
|
||||
}
|
||||
|
||||
// If a resolve is requested, move data to the target
|
||||
if (msaa_flags & rsx::surface_state_flags::require_resolve)
|
||||
{
|
||||
ensure(samples() > 1);
|
||||
resolve(cmd);
|
||||
}
|
||||
|
||||
const auto pdev = vk::get_current_renderer();
|
||||
const auto alloc_size = element_size * src->width() * src->height();
|
||||
|
||||
m_spilled_mem = std::make_unique<vk::buffer>(*pdev, alloc_size, pdev->get_memory_mapping().host_visible_coherent,
|
||||
0, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0, VMM_ALLOCATION_POOL_UNDEFINED);
|
||||
|
||||
const auto regions = build_spill_transfer_descriptors(src);
|
||||
src->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, m_spilled_mem->value, ::size32(regions), regions.data());
|
||||
|
||||
// Destroy this object through a cloned object
|
||||
auto obj = std::unique_ptr<viewable_image>(clone());
|
||||
vk::get_resource_manager()->dispose(obj);
|
||||
|
||||
if (resolve_surface)
|
||||
{
|
||||
// Just add to the resolve cache and move on
|
||||
resolve_cache.emplace_back(std::move(resolve_surface));
|
||||
}
|
||||
|
||||
ensure(!memory && !value && views.empty() && !resolve_surface);
|
||||
spill_request_tag = 0ull;
|
||||
}
|
||||
|
||||
void render_target::unspill(vk::command_buffer& cmd)
|
||||
{
|
||||
// Recreate the image
|
||||
const auto pdev = vk::get_current_renderer();
|
||||
create_impl(*pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, pdev->get_memory_mapping().device_local, VMM_ALLOCATION_POOL_SURFACE_CACHE);
|
||||
change_layout(cmd, is_depth_surface() ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
|
||||
// Load image from host-visible buffer
|
||||
ensure(m_spilled_mem);
|
||||
|
||||
// Data transfer can be skipped if an erase command is being served
|
||||
if (!(state_flags & rsx::surface_state_flags::erase_bkgnd))
|
||||
{
|
||||
// Warn. Ideally this should never happen if you have enough resources
|
||||
rsx_log.warning("[PERFORMANCE WARNING] Loading spilled memory back to the GPU. You may want to lower your resolution scaling.");
|
||||
|
||||
vk::image* dst = (samples() > 1) ? get_resolve_target_safe(cmd) : this;
|
||||
const auto regions = build_spill_transfer_descriptors(dst);
|
||||
|
||||
dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
vkCmdCopyBufferToImage(cmd, m_spilled_mem->value, dst->value, dst->current_layout, ::size32(regions), regions.data());
|
||||
|
||||
if (samples() > 1)
|
||||
{
|
||||
msaa_flags &= ~rsx::surface_state_flags::require_resolve;
|
||||
msaa_flags |= rsx::surface_state_flags::require_unresolve;
|
||||
}
|
||||
}
|
||||
|
||||
// Delete host-visible buffer
|
||||
vk::get_resource_manager()->dispose(m_spilled_mem);
|
||||
}
|
||||
|
||||
// Load memory from cell and use to initialize the surface
|
||||
void render_target::load_memory(vk::command_buffer& cmd)
|
||||
{
|
||||
@ -426,6 +685,8 @@ namespace vk
|
||||
|
||||
vk::viewable_image* render_target::get_surface(rsx::surface_access access_type)
|
||||
{
|
||||
last_rw_access_tag = rsx::get_shared_tag();
|
||||
|
||||
if (samples() == 1 || access_type == rsx::surface_access::shader_write)
|
||||
{
|
||||
return this;
|
||||
@ -491,6 +752,18 @@ namespace vk
|
||||
|
||||
void render_target::memory_barrier(vk::command_buffer& cmd, rsx::surface_access access)
|
||||
{
|
||||
if (access == rsx::surface_access::gpu_reference)
|
||||
{
|
||||
// This barrier only requires that an object is made available for GPU usage.
|
||||
if (!value)
|
||||
{
|
||||
unspill(cmd);
|
||||
}
|
||||
|
||||
spill_request_tag = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
const bool is_depth = is_depth_surface();
|
||||
const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers;
|
||||
|
||||
@ -506,6 +779,12 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
// Unspill here, because erase flag may have been set above.
|
||||
if (!value)
|
||||
{
|
||||
unspill(cmd);
|
||||
}
|
||||
|
||||
if (access == rsx::surface_access::shader_write && write_barrier_sync_tag != 0)
|
||||
{
|
||||
if (current_layout == VK_IMAGE_LAYOUT_GENERAL)
|
||||
|
@ -24,6 +24,9 @@ namespace vk
|
||||
u64 cyclic_reference_sync_tag = 0;
|
||||
u64 write_barrier_sync_tag = 0;
|
||||
|
||||
// Memory spilling support
|
||||
std::unique_ptr<vk::buffer> m_spilled_mem;
|
||||
|
||||
// MSAA support:
|
||||
// Get the linear resolve target bound to this surface. Initialize if none exists
|
||||
vk::viewable_image* get_resolve_target_safe(vk::command_buffer& cmd);
|
||||
@ -40,8 +43,17 @@ namespace vk
|
||||
// Generic - chooses whether to clear or load.
|
||||
void initialize_memory(vk::command_buffer& cmd, rsx::surface_access access);
|
||||
|
||||
// Spill helpers
|
||||
// Re-initialize using spilled memory
|
||||
void unspill(vk::command_buffer& cmd);
|
||||
// Build spill transfer descriptors
|
||||
std::vector<VkBufferImageCopy> build_spill_transfer_descriptors(vk::image* target);
|
||||
|
||||
public:
|
||||
u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid
|
||||
u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid
|
||||
u64 last_rw_access_tag = 0; // timestamp when this object was last used
|
||||
u64 spill_request_tag = 0; // timestamp when spilling was requested
|
||||
bool is_bound = false; // set when the surface is bound for rendering
|
||||
|
||||
using viewable_image::viewable_image;
|
||||
|
||||
@ -54,6 +66,9 @@ namespace vk
|
||||
image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap,
|
||||
VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override;
|
||||
|
||||
// Memory management
|
||||
void spill(vk::command_buffer& cmd, std::vector<std::unique_ptr<vk::viewable_image>>& resolve_cache);
|
||||
|
||||
// Synchronization
|
||||
void texture_barrier(vk::command_buffer& cmd);
|
||||
void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access);
|
||||
@ -270,13 +285,16 @@ namespace vk
|
||||
static bool is_compatible_surface(const vk::render_target* surface, const vk::render_target* ref, u16 width, u16 height, u8 sample_count)
|
||||
{
|
||||
return (surface->format() == ref->format() &&
|
||||
surface->get_spp() == sample_count &&
|
||||
surface->get_surface_width() >= width &&
|
||||
surface->get_surface_height() >= height);
|
||||
surface->get_spp() == sample_count &&
|
||||
surface->get_surface_width() >= width &&
|
||||
surface->get_surface_height() >= height);
|
||||
}
|
||||
|
||||
static void prepare_surface_for_drawing(vk::command_buffer& cmd, vk::render_target* surface)
|
||||
{
|
||||
// Special case barrier
|
||||
surface->memory_barrier(cmd, rsx::surface_access::gpu_reference);
|
||||
|
||||
if (surface->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
{
|
||||
surface->change_layout(cmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
@ -288,10 +306,13 @@ namespace vk
|
||||
|
||||
surface->reset_surface_counters();
|
||||
surface->memory_usage_flags |= rsx::surface_usage_flags::attachment;
|
||||
surface->is_bound = true;
|
||||
}
|
||||
|
||||
static void prepare_surface_for_sampling(vk::command_buffer& /*cmd*/, vk::render_target* /*surface*/)
|
||||
{}
|
||||
static void prepare_surface_for_sampling(vk::command_buffer& /*cmd*/, vk::render_target* surface)
|
||||
{
|
||||
surface->is_bound = false;
|
||||
}
|
||||
|
||||
static bool surface_is_pitch_compatible(const std::unique_ptr<vk::render_target>& surface, usz pitch)
|
||||
{
|
||||
@ -385,9 +406,11 @@ namespace vk
|
||||
|
||||
public:
|
||||
void destroy();
|
||||
bool spill_unused_memory();
|
||||
bool is_overallocated();
|
||||
bool can_collapse_surface(const std::unique_ptr<vk::render_target>& surface) override;
|
||||
bool handle_memory_pressure(vk::command_buffer& cmd, rsx::problem_severity severity) override;
|
||||
void free_invalidated(vk::command_buffer& cmd, rsx::problem_severity memory_pressure);
|
||||
};
|
||||
}
|
||||
//h
|
Loading…
x
Reference in New Issue
Block a user