From f8a9b1fa30f72790fcf6dec4da9a4e0eb4ace659 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 1 Aug 2018 23:55:14 +0300 Subject: [PATCH] [WIP] rsx: Improve memory inheritance hierachy - Cascade memory writes by invalidating 'downstream' subsurfaces - Fixup; always resolve for overlapping surfaces before sampling (force atlas gather test) --- rpcs3/Emu/RSX/Common/surface_store.h | 156 ++++++++++++++++++++++++++- rpcs3/Emu/RSX/Common/texture_cache.h | 55 +++++----- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 8 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 8 +- 4 files changed, 186 insertions(+), 41 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index ca9d12dfb0..259b52960c 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -58,6 +58,26 @@ namespace rsx u8 bpp; }; + template + struct surface_hierachy_info + { + struct memory_overlap_t + { + image_storage_type _ref; + u32 memory_address; + u32 x; + u32 y; + u32 w; + u32 h; + }; + + u32 memory_address; + u32 memory_range; + image_storage_type memory_contents; + + std::vector overlapping_set; + }; + template struct render_target_descriptor { @@ -155,12 +175,98 @@ namespace rsx std::tuple m_bound_depth_stencil = {}; std::list invalidated_resources; + std::vector> m_memory_tree; u64 cache_tag = 0ull; u64 write_tag = 0ull; + u64 memory_tag = 0ull; surface_store() = default; ~surface_store() = default; surface_store(const surface_store&) = delete; + + private: + void generate_render_target_memory_tree() + { + auto process_entry = [](surface_hierachy_info& block_info, + const surface_format_info& info, + u32 memory_address, u32 memory_end, + u32 address, surface_type surface) + { + if (address <= memory_address) // also intentionally fails on self-test + return; + + if (address >= memory_end) + return; + + surface_format_info info2; + Traits::get_surface_info(surface, &info2); + const auto offset = (address - memory_address); + const auto offset_y = (offset / info.rsx_pitch); + const auto offset_x = (offset % info.rsx_pitch) / info.bpp; + const auto pitch2 = info2.bpp * info2.surface_width; + + const bool fits_w = ((offset % info.rsx_pitch) + pitch2) <= info.rsx_pitch; + const bool fits_h = ((offset_y + info2.surface_height) * info.rsx_pitch) <= (memory_end - memory_address); + + if (fits_w && fits_h) + { + surface_hierachy_info::memory_overlap_t overlap; + overlap._ref = surface; + overlap.memory_address = address; + overlap.x = offset_x; + overlap.y = offset_y; + overlap.w = info2.surface_width; + overlap.h = info2.surface_height; + + block_info.overlapping_set.push_back(overlap); + } + else + { + // TODO + } + }; + + auto process_block = [this, process_entry](u32 memory_address, surface_type surface) + { + surface_hierachy_info block_info; + surface_format_info info; + Traits::get_surface_info(surface, &info); + const auto memory_end = memory_address + (info.rsx_pitch * info.surface_height); + + for (const auto &rtt : m_render_targets_storage) + { + process_entry(block_info, info, memory_address, memory_end, rtt.first, Traits::get(rtt.second)); + } + + for (const auto &ds : m_depth_stencil_storage) + { + process_entry(block_info, info, memory_address, memory_end, ds.first, Traits::get(ds.second)); + } + + if (!block_info.overlapping_set.empty()) + { + block_info.memory_address = memory_address; + block_info.memory_range = (memory_end - memory_address); + block_info.memory_contents = surface; + + m_memory_tree.push_back(block_info); + } + }; + + for (auto &rtt : m_bound_render_targets) + { + if (const auto address = std::get<0>(rtt)) + { + process_block(address, std::get<1>(rtt)); + } + } + + if (const auto address = std::get<0>(m_bound_depth_stencil)) + { + process_block(address, std::get<1>(m_bound_depth_stencil)); + } + } + protected: /** * If render target already exists at address, issue state change operation on cmdList. @@ -357,6 +463,7 @@ namespace rsx // u32 clip_y = clip_vertical_reg; cache_tag++; + m_memory_tree.clear(); // Make previous RTTs sampleable for (std::tuple &rtt : m_bound_render_targets) @@ -907,13 +1014,43 @@ namespace rsx return result; } - void on_write() + void on_write(u32 address = 0) { - if (write_tag == cache_tag) + if (!address && write_tag == cache_tag) + { + // Nothing to do return; + } + + if (memory_tag != cache_tag) + { + generate_render_target_memory_tree(); + memory_tag = cache_tag; + } + + if (!m_memory_tree.empty()) + { + for (auto &e : m_memory_tree) + { + if (address && e.memory_address != address) + { + continue; + } + + for (auto &entry : e.overlapping_set) + { + entry._ref->dirty = true; + } + } + } for (auto &rtt : m_bound_render_targets) { + if (address && std::get<0>(rtt) != address) + { + continue; + } + if (auto surface = std::get<1>(rtt)) { surface->on_write(); @@ -922,10 +1059,21 @@ namespace rsx if (auto ds = std::get<1>(m_bound_depth_stencil)) { - ds->on_write(); + if (!address || std::get<0>(m_bound_depth_stencil) == address) + { + ds->on_write(); + } } - write_tag = cache_tag; + if (!address) + { + write_tag = cache_tag; + } + } + + void notify_memory_structure_changed() + { + cache_tag++; } }; } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 3e70490d94..9d90d8a2fd 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1600,39 +1600,36 @@ namespace rsx scale_y = 0.f; } - if (internal_width > surface_width || internal_height > surface_height) + auto bpp = get_format_block_size_in_bytes(format); + auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp); + + if (overlapping.size() > 1) { - auto bpp = get_format_block_size_in_bytes(format); - auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp); + const auto w = rsx::apply_resolution_scale(internal_width, true); + const auto h = rsx::apply_resolution_scale(internal_height, true); - if (overlapping.size() > 1) + sampled_image_descriptor result = { texptr->get_surface(), deferred_request_command::atlas_gather, + texaddr, format, 0, 0, w, h, 1, texture_upload_context::framebuffer_storage, is_depth, + scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap }; + + result.external_subresource_desc.sections_to_copy.reserve(overlapping.size()); + + for (auto §ion : overlapping) { - const auto w = rsx::apply_resolution_scale(internal_width, true); - const auto h = rsx::apply_resolution_scale(internal_height, true); - - sampled_image_descriptor result = { texptr->get_surface(), deferred_request_command::atlas_gather, - texaddr, format, 0, 0, w, h, 1, texture_upload_context::framebuffer_storage, is_depth, - scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap }; - - result.external_subresource_desc.sections_to_copy.reserve(overlapping.size()); - - for (auto §ion : overlapping) - { - result.external_subresource_desc.sections_to_copy.push_back - ({ - section.surface->get_surface(), - rsx::apply_resolution_scale(section.src_x, true), - rsx::apply_resolution_scale(section.src_y, true), - rsx::apply_resolution_scale(section.dst_x, true), - rsx::apply_resolution_scale(section.dst_y, true), - 0, - rsx::apply_resolution_scale(section.width, true), - rsx::apply_resolution_scale(section.height, true) - }); - } - - return result; + result.external_subresource_desc.sections_to_copy.push_back + ({ + section.surface->get_surface(), + rsx::apply_resolution_scale(section.src_x, true), + rsx::apply_resolution_scale(section.src_y, true), + rsx::apply_resolution_scale(section.dst_x, true), + rsx::apply_resolution_scale(section.dst_y, true), + 0, + rsx::apply_resolution_scale(section.width, true), + rsx::apply_resolution_scale(section.height, true) + }); } + + return result; } bool requires_processing = surface_width > internal_width || surface_height > internal_height; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 0243e4d8f6..2a1fe879dd 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1029,9 +1029,9 @@ void GLGSRender::clear_surface(u32 arg) gl_state.clear_depth(f32(clear_depth) / max_depth_value); mask |= GLenum(gl::buffers::depth); - if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) + if (const auto address = std::get<0>(m_rtts.m_bound_depth_stencil)) { - ds->on_write(); + m_rtts.on_write(address); } } @@ -1075,9 +1075,9 @@ void GLGSRender::clear_surface(u32 arg) for (auto &rtt : m_rtts.m_bound_render_targets) { - if (auto surface = std::get<1>(rtt)) + if (const auto address = std::get<0>(rtt)) { - surface->on_write(); + m_rtts.on_write(address); } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index e226b584cd..1e28eb6d6e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1775,9 +1775,9 @@ void VKGSRender::clear_surface(u32 mask) for (auto &rtt : m_rtts.m_bound_render_targets) { - if (auto surface = std::get<1>(rtt)) + if (const auto address = std::get<0>(rtt)) { - surface->on_write(); + m_rtts.on_write(address); } } } @@ -1786,9 +1786,9 @@ void VKGSRender::clear_surface(u32 mask) if (mask & 0x3) { - if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) + if (const auto address = std::get<0>(m_rtts.m_bound_depth_stencil)) { - ds->on_write(); + m_rtts.on_write(address); clear_descriptors.push_back({ (VkImageAspectFlags)depth_stencil_mask, 0, depth_stencil_clear_values }); } }