From af360b78f248ee1ce1892b3af6c99a09123607b3 Mon Sep 17 00:00:00 2001 From: Rui Pinheiro Date: Thu, 18 Oct 2018 23:22:00 +0100 Subject: [PATCH] Texture cache section management fixups Fixes VRAM leaks and incorrect destruction of resources, which could lead to drivers crashes. Additionally, lock_memory_region is now able to flush superseded sections. However, due to the potential performance impact of this for little gain, a new debug setting ("Strict Flushing") has been added to config.yaml --- rpcs3/Emu/RSX/Common/texture_cache.h | 226 ++++++++------------- rpcs3/Emu/RSX/Common/texture_cache_utils.h | 91 ++++----- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 4 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 32 +-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 4 +- rpcs3/Emu/RSX/VK/VKTextureCache.h | 44 ++-- rpcs3/Emu/System.h | 1 + 7 files changed, 180 insertions(+), 222 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 03a0bc004d..1a67af21b5 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -74,7 +74,7 @@ namespace rsx //------------------------- // It is illegal to have only exclusions except when reading from a range with only RO sections - ASSERT(flush_and_unprotect_count > 0 || exclude_count == 0 || !cause.is_write()); + ASSERT(flush_and_unprotect_count > 0 || exclude_count == 0 || cause.is_read()); if (flush_and_unprotect_count == 0 && exclude_count > 0) { // double-check that only RO sections exists @@ -87,8 +87,8 @@ namespace rsx const auto min_overlap_fault_no_ro = tex_cache_checker.get_minimum_number_of_sections(fault_range); const auto min_overlap_invalidate_no_ro = tex_cache_checker.get_minimum_number_of_sections(invalidate_range); - const u16 min_overlap_fault = min_overlap_fault_no_ro.first + (cause.is_write() ? min_overlap_fault_no_ro.second : 0); - const u16 min_overlap_invalidate = min_overlap_invalidate_no_ro.first + (cause.is_write() ? min_overlap_invalidate_no_ro.second : 0); + const u16 min_overlap_fault = min_overlap_fault_no_ro.first + (cause.is_read() ? 0 : min_overlap_fault_no_ro.second); + const u16 min_overlap_invalidate = min_overlap_invalidate_no_ro.first + (cause.is_read() ? 0 : min_overlap_invalidate_no_ro.second); AUDIT(min_overlap_fault <= min_overlap_invalidate); const u16 min_flush_or_unprotect = min_overlap_fault; @@ -326,7 +326,7 @@ namespace rsx inline bool region_intersects_cache(const address_range &test_range, bool is_writing) { - AUDIT( test_range.valid() ); + AUDIT(test_range.valid()); // Quick range overlaps with cache tests if (!is_writing) @@ -411,7 +411,7 @@ namespace rsx for (const auto §ion : sections) { const auto &new_range = section->get_locked_range(); - AUDIT( new_range.is_page_range() ); + AUDIT(new_range.is_page_range()); result.merge(new_range); } @@ -473,7 +473,7 @@ namespace rsx exclusion_range.intersect(data.invalidate_range); // Sanity checks - AUDIT( exclusion_range.is_page_range() ); + AUDIT(exclusion_range.is_page_range()); AUDIT(!exclusion_range.overlaps(data.fault_range)); // Apply exclusion @@ -508,10 +508,10 @@ namespace rsx } } } - AUDIT( !ranges_to_unprotect.empty() ); + AUDIT(!ranges_to_unprotect.empty()); // Exclude the fault range if told to do so (this means the fault_range got unmapped or is otherwise invalid) - if (data.cause.exclude_fault_range()) + if (data.cause.keep_fault_range_protection()) { ranges_to_unprotect.exclude(data.fault_range); ranges_to_protect_ro.exclude(data.fault_range); @@ -521,10 +521,10 @@ namespace rsx } else { - AUDIT( ranges_to_unprotect.inside(data.invalidate_range) ); - AUDIT( ranges_to_protect_ro.inside(data.invalidate_range) ); + AUDIT(ranges_to_unprotect.inside(data.invalidate_range)); + AUDIT(ranges_to_protect_ro.inside(data.invalidate_range)); } - AUDIT( !ranges_to_protect_ro.overlaps(ranges_to_unprotect) ); + AUDIT(!ranges_to_protect_ro.overlaps(ranges_to_unprotect)); // Unprotect and discard protect_ranges(ranges_to_unprotect, utils::protection::rw); @@ -540,11 +540,9 @@ namespace rsx // Return a set containing all sections that should be flushed/unprotected/reprotected std::atomic m_last_section_cache_tag = 0; - intersecting_set get_intersecting_set(const address_range &fault_range, bool is_writing) + intersecting_set get_intersecting_set(const address_range &fault_range) { - (void)is_writing; // silence unused formal parameter warning; used only for debug purposes if TEXTURE_CACHE_DEBUG is defined - - AUDIT( fault_range.is_page_range() ); + AUDIT(fault_range.is_page_range()); const u64 cache_tag = ++m_last_section_cache_tag; @@ -623,7 +621,7 @@ namespace rsx } } - AUDIT( result.invalidate_range.is_page_range() ); + AUDIT(result.invalidate_range.is_page_range()); #ifdef TEXTURE_CACHE_DEBUG // naive check that sections are not duplicated in the results @@ -636,12 +634,6 @@ namespace rsx } verify(HERE), count == 1; } - - // Check that the number of sections we "found" matches the sections known to be in the invalidation range - const u32 count = static_cast(result.sections.size()); - const auto inv_min_no_ro = tex_cache_checker.get_minimum_number_of_sections(invalidate_range); - const u16 inv_min = inv_min_no_ro.first + (is_writing ? inv_min_no_ro.second : 0); - ASSERT(count >= inv_min); #endif //TEXTURE_CACHE_DEBUG return result; @@ -657,18 +649,19 @@ namespace rsx tex_cache_checker.verify(); #endif // TEXTURE_CACHE_DEBUG - AUDIT( cause.valid() ); - AUDIT( fault_range_in.valid() ); + AUDIT(cause.valid()); + AUDIT(fault_range_in.valid()); address_range fault_range = fault_range_in.to_page_range(); - auto trampled_set = std::move(get_intersecting_set(fault_range, cause.is_write())); + auto trampled_set = std::move(get_intersecting_set(fault_range)); thrashed_set result = {}; result.cause = cause; result.fault_range = fault_range; result.invalidate_range = trampled_set.invalidate_range; - if (cause == invalidation_cause::unmap && !trampled_set.sections.empty()) + // Fast code-path for keeping the fault range protection when not flushing anything + if (cause.keep_fault_range_protection() && cause.skip_flush() && !trampled_set.sections.empty()) { // We discard all sections fully inside fault_range for (auto &obj : trampled_set.sections) @@ -676,16 +669,15 @@ namespace rsx auto &tex = *obj; if (tex.inside(fault_range, section_bounds::locked_range)) { - // Discard and destroy - this section won't be needed any more - tex.discard(); - tex.destroy(); + // Discard - this section won't be needed any more + tex.discard(/* set_dirty */ true); } - else + else if (tex.overlaps(fault_range, section_bounds::locked_range)) { - if (tex.is_flushable()) + if (g_cfg.video.strict_texture_flushing && tex.is_flushable()) { // TODO: Flush only the part outside the fault_range - LOG_TODO(RSX, "Flushable section 0x%x data may have been lost (invalidate_range_impl_base)", tex.get_section_base()); + LOG_TODO(RSX, "Flushable section data may have been lost"); } tex.set_dirty(true); @@ -709,15 +701,6 @@ namespace rsx return result; } AUDIT(fault_range.inside(invalidate_range)); - - // There are textures that need to be invalidated, we now trigger another intersecting_set search on it - // "invalidate_range" contains the range of sections that overlaps the unmap boundary - // We set "fault_range = invalidate_range" to cause a forced invalidation over that whole range, - // and proceed as normal. - // NOTE: result.fault_range *must* stay the original fault_range otherwise everything breaks - fault_range = invalidate_range; - trampled_set = std::move(get_intersecting_set(fault_range, true)); - result.invalidate_range = trampled_set.invalidate_range; } @@ -730,16 +713,19 @@ namespace rsx { auto &tex = *obj; + if (!tex.is_locked()) + continue; + const rsx::section_bounds bounds = tex.get_overlap_test_bounds(); if ( // RO sections during a read invalidation can be ignored (unless there are flushables in trampled_set, since those could overwrite RO data) // TODO: Also exclude RO sections even if there are flushables - (invalidation_keep_ro_during_read && !trampled_set.has_flushables && !cause.is_write() && tex.get_protection() == utils::protection::ro) || + (invalidation_keep_ro_during_read && !trampled_set.has_flushables && cause.is_read() && tex.get_protection() == utils::protection::ro) || // Sections that are not fully contained in invalidate_range can be ignored !tex.inside(trampled_set.invalidate_range, bounds) || - // Unsynchronized sections that do not overlap the fault range directly can also be ignored - (invalidation_ignore_unsynchronized && tex.is_flushable() && !tex.is_synchronized() && !tex.overlaps(fault_range, bounds)) + // Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored + (invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !tex.overlaps(fault_range, bounds)) ) { // False positive @@ -749,12 +735,12 @@ namespace rsx if (tex.is_flushable()) { - //Write if and only if no one else has trashed section memory already - //TODO: Proper section management should prevent this from happening - //TODO: Blit engine section merge support and/or partial texture memory buffering + // Write if and only if no one else has trashed section memory already + // TODO: Proper section management should prevent this from happening + // TODO: Blit engine section merge support and/or partial texture memory buffering if (tex.is_dirty() || !tex.test_memory_head() || !tex.test_memory_tail()) { - //Contents clobbered, destroy this + // Contents clobbered, destroy this if (!tex.is_dirty()) { tex.set_dirty(true); @@ -771,10 +757,10 @@ namespace rsx } else { - //allow_flush = false and not synchronized + // deferred_flush = true and not synchronized if (!tex.is_dirty()) { - AUDIT( tex.get_memory_read_flags() != memory_read_flags::flush_always ); + AUDIT(tex.get_memory_read_flags() != memory_read_flags::flush_always); tex.set_dirty(true); } @@ -795,7 +781,7 @@ namespace rsx const bool has_flushables = !result.sections_to_flush.empty(); const bool has_unprotectables = !result.sections_to_unprotect.empty(); - if (cause.is_deferred() && has_flushables) + if (cause.deferred_flush() && has_flushables) { // There is something to flush, but we've been asked to defer it result.num_flushable = static_cast(result.sections_to_flush.size()); @@ -804,24 +790,24 @@ namespace rsx } else if (has_flushables || has_unprotectables) { - AUDIT(!has_flushables || cause.allow_flush()); + AUDIT(!has_flushables || !cause.deferred_flush()); // We have something to flush and are allowed to flush now // or there is nothing to flush but we have something to unprotect - if (has_flushables) + if (has_flushables && !cause.skip_flush()) { flush_set(result, std::forward(extras)...); } unprotect_set(result); - //Everything has been handled + // Everything has been handled result.clear_sections(); } else { // This is a read and all overlapping sections were RO and were excluded - AUDIT(!cause.is_write() && !result.sections_to_exclude.empty()); + AUDIT(cause.is_read() && !result.sections_to_exclude.empty()); // We did not handle this violation result.clear_sections(); @@ -981,8 +967,7 @@ namespace rsx auto &block = m_storage.block_for(range); section_storage_type *best_fit = nullptr; - section_storage_type *first_dirty = nullptr; - section_storage_type *mismatch = nullptr; + section_storage_type *reuse = nullptr; #ifdef TEXTURE_CACHE_DEBUG section_storage_type *res = nullptr; #endif @@ -1003,10 +988,6 @@ namespace rsx res = &tex; #endif } - else - { - mismatch = &tex; - } } else if (best_fit == nullptr && tex.can_be_reused()) { @@ -1014,9 +995,9 @@ namespace rsx best_fit = &tex; } } - else if (first_dirty == nullptr && tex.can_be_reused()) + else if (reuse == nullptr && tex.can_be_reused()) { - first_dirty = &tex; + reuse = &tex; } } @@ -1025,9 +1006,9 @@ namespace rsx return res; #endif - if (mismatch != nullptr) + if (best_fit != nullptr) { - auto &tex = *mismatch; + auto &tex = *best_fit; LOG_WARNING(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters (width=%d vs %d; height=%d vs %d; depth=%d vs %d; mipmaps=%d vs %d)", range.start, width, tex.get_width(), height, tex.get_height(), depth, tex.get_depth(), mipmaps, tex.get_mipmaps()); } @@ -1036,19 +1017,25 @@ namespace rsx return nullptr; // If found, use the best fitting section - if (best_fit) + if (best_fit != nullptr) { - best_fit->destroy(); + if (best_fit->exists()) + { + best_fit->destroy(); + } return best_fit; } // Return the first dirty section found, if any - if (first_dirty != nullptr) + if (reuse != nullptr) { - first_dirty->destroy(); + if (reuse->exists()) + { + reuse->destroy(); + } - return first_dirty; + return reuse; } // Create and return a new section @@ -1072,30 +1059,42 @@ namespace rsx return nullptr; } - template - void lock_memory_region(image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, Args&&... extras) + template + void lock_memory_region(image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, const std::tuple& flush_extras, Args&&... extras) { - AUDIT( g_cfg.video.write_color_buffers ); // this method is only called when WCB is enabled + AUDIT(g_cfg.video.write_color_buffers); // this method is only called when WCB is enabled std::lock_guard lock(m_cache_mutex); + // Find a cached section to use section_storage_type& region = *find_cached_texture(rsx_range, true, false); - if (region.get_context() != texture_upload_context::framebuffer_storage && region.exists()) + if (!region.is_locked()) { + // Invalidate sections from surface cache occupying same address range + std::apply(&texture_cache::invalidate_range_impl_base, std::tuple_cat(std::make_tuple(this, rsx_range, invalidation_cause::superseded_by_fbo), flush_extras)); + } + + // Prepare and initialize fbo region + if (region.exists() && region.get_context() != texture_upload_context::framebuffer_storage) + { + AUDIT(region.matches(rsx_range)); + //This space was being used for other purposes other than framebuffer storage //Delete used resources before attaching it to framebuffer memory read_only_tex_invalidate = true; // We are going to reprotect this section in a second, so discard it here if (region.is_locked()) + { region.discard(); + } // Destroy the resources region.destroy(); } - if (!region.is_locked()) + if (!region.is_locked() || region.can_be_reused()) { // New region, we must prepare it region.reset(rsx_range); @@ -1105,8 +1104,8 @@ namespace rsx } else { - // Re-using locked fbo region - AUDIT(region.matches(rsx_range)); + // Re-using clean fbo region + ASSERT(region.matches(rsx_range)); ASSERT(region.get_context() == texture_upload_context::framebuffer_storage); ASSERT(region.get_image_type() == rsx::texture_dimension_extended::texture_dimension_2d); } @@ -1126,63 +1125,6 @@ namespace rsx AUDIT(m_flush_always_cache.find(region.get_section_range()) != m_flush_always_cache.end()); } - // Test for invalidated sections from surface cache occupying same address range - const auto mem_range = rsx_range.to_page_range(); - const auto &overlapped = find_texture_from_range(mem_range); - - bool found_region = false; - for (auto* surface : overlapped) - { - if (surface == ®ion) - { - found_region = true; - continue; - } - - if (surface->get_context() == rsx::texture_upload_context::framebuffer_storage) - { - if (surface->get_section_base() != rsx_range.start) - // HACK: preserve other overlapped sections despite overlap unless new section is superseding - // TODO: write memory to cell or redesign sections to preserve the data - // TODO ruipin: can this be done now? - continue; - } - - // Memory is shared with another surface - // Discard it - the backend should ensure memory contents are preserved if needed - // TODO ruipin: This fails the protection checker. Refactor to use invalidate_range_impl_base - surface->set_dirty(true); - - if (surface->is_locked()) - { - AUDIT(surface->get_memory_read_flags() != memory_read_flags::flush_always); - - if (surface->is_flushable() && surface->test_memory_head() && surface->test_memory_tail()) - { - if (!surface->inside(rsx_range, rsx::section_bounds::full_range)) - { - // TODO: This section contains data that should be flushed - LOG_TODO(RSX, "Flushable section 0x%x data may have been lost (lock_memory_region)", surface->get_section_base()); - } - } - - if (surface->inside(region, rsx::section_bounds::locked_range)) - { - // Since this surface falls inside "region", we don't need to unlock and can simply discard - surface->discard(); - } - else - { - // TODO: Exclude other NA/RO regions overlapping this one but not "region". - // Exclude region to avoid having the region's locked_range unprotected for a split second - const auto &srfc_rng = surface->get_section_range(); - LOG_TODO(RSX, "Valid region data may have been incorrectly unprotected (0x%x-0x%x)", srfc_rng.start, srfc_rng.end); - surface->unprotect(); - } - } - } - ASSERT(found_region); - // Delay protection until here in case the invalidation block above has unprotected pages in this range region.reprotect(utils::protection::no, { 0, rsx_range.length() }); update_cache_tag(); @@ -1200,14 +1142,14 @@ namespace rsx auto* region_ptr = find_cached_texture(memory_range, false, false); if (region_ptr == nullptr) { - AUDIT( m_flush_always_cache.find(memory_range) == m_flush_always_cache.end() ); - LOG_ERROR(RSX, "set_memory_flags(0x%x, 0x%x, %d): region_ptr == nullptr"); + AUDIT(m_flush_always_cache.find(memory_range) == m_flush_always_cache.end()); + LOG_ERROR(RSX, "set_memory_flags(0x%x, 0x%x, %d): region_ptr == nullptr", memory_range.start, memory_range.end, static_cast(flags)); return; } auto& region = *region_ptr; - if (region.is_dirty() || !region.exists() || region.get_context() != texture_upload_context::framebuffer_storage) + if (!region.exists() || region.is_dirty() || region.get_context() != texture_upload_context::framebuffer_storage) { #ifdef TEXTURE_CACHE_DEBUG if (!region.is_dirty()) @@ -1309,7 +1251,7 @@ namespace rsx { //Test before trying to acquire the lock const auto range = page_for(address); - if (!region_intersects_cache(range, cause.is_write())) + if (!region_intersects_cache(range, !cause.is_read())) return{}; std::lock_guard lock(m_cache_mutex); @@ -1320,7 +1262,7 @@ namespace rsx thrashed_set invalidate_range(const address_range &range, invalidation_cause cause, Args&&... extras) { //Test before trying to acquire the lock - if (!region_intersects_cache(range, cause.is_write())) + if (!region_intersects_cache(range, !cause.is_read())) return {}; std::lock_guard lock(m_cache_mutex); @@ -1332,7 +1274,7 @@ namespace rsx { std::lock_guard lock(m_cache_mutex); - AUDIT(data.cause.is_deferred()); + AUDIT(data.cause.deferred_flush()); AUDIT(!data.flushed); if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag) @@ -2306,7 +2248,7 @@ namespace rsx //TODO: Check for other types of format mismatch const address_range dst_range = address_range::start_length(dst_address, dst.pitch * dst.height); - AUDIT( cached_dest == nullptr || cached_dest->overlaps(dst_range, section_bounds::full_range) ); + AUDIT(cached_dest == nullptr || cached_dest->overlaps(dst_range, section_bounds::full_range)); if (format_mismatch) { lock.upgrade(); @@ -2528,7 +2470,7 @@ namespace rsx return m_storage.m_unreleased_texture_objects; } - virtual const u32 get_texture_memory_in_use() const + virtual const u64 get_texture_memory_in_use() const { return m_storage.m_texture_memory_in_use; } diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index da26b6b082..7c22adc1a7 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -47,57 +47,61 @@ namespace rsx deferred_read, write, deferred_write, - unmap + unmap, // fault range is being unmapped + reprotect, // we are going to reprotect the fault range + superseded_by_fbo // used by texture_cache::locked_memory_region } cause; - bool valid() const + constexpr bool valid() const { return cause != invalid; } - bool is_read() const + constexpr bool is_read() const { AUDIT(valid()); return (cause == read || cause == deferred_read); } - bool is_write() const - { - AUDIT(valid()); - return (cause == write || cause == deferred_write || cause == unmap); - } - - bool is_deferred() const + constexpr bool deferred_flush() const { AUDIT(valid()); return (cause == deferred_read || cause == deferred_write); } - bool allow_flush() const - { - return (cause == read || cause == write || cause == unmap); - } - - bool exclude_fault_range() const + constexpr bool destroy_fault_range() const { + AUDIT(valid()); return (cause == unmap); } - invalidation_cause undefer() const + constexpr bool keep_fault_range_protection() const { - AUDIT(is_deferred()); - if (is_read()) + AUDIT(valid()); + return (cause == unmap || cause == reprotect || cause == superseded_by_fbo); + } + + bool skip_flush() const + { + AUDIT(valid()); + return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo); + } + + constexpr invalidation_cause undefer() const + { + AUDIT(deferred_flush()); + if (cause == deferred_read) return read; - else if (is_write()) + else if (cause == deferred_write) return write; else fmt::throw_exception("Unreachable " HERE); } - invalidation_cause() : cause(invalid) {} - invalidation_cause(enum_type _cause) : cause(_cause) {} + constexpr invalidation_cause() : cause(invalid) {} + constexpr invalidation_cause(enum_type _cause) : cause(_cause) {} operator enum_type&() { return cause; } - operator enum_type() const { return cause; } + constexpr operator enum_type() const { return cause; } }; struct typeless_xfer @@ -417,6 +421,9 @@ namespace rsx { for (auto §ion : *this) { + if (section.is_locked()) + section.unprotect(); + section.destroy(); } @@ -580,7 +587,7 @@ namespace rsx public: std::atomic m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory - std::atomic m_texture_memory_in_use = { 0 }; + std::atomic m_texture_memory_in_use = { 0 }; // Constructor ranged_storage(texture_cache_type *tex_cache) : @@ -716,8 +723,8 @@ namespace rsx void on_section_resources_destroyed(const section_storage_type §ion) { - u32 size = section.get_section_size(); - u32 prev_size = m_texture_memory_in_use.fetch_sub(size); + u64 size = section.get_section_size(); + u64 prev_size = m_texture_memory_in_use.fetch_sub(size); ASSERT(prev_size >= size); } @@ -1036,8 +1043,8 @@ namespace rsx AUDIT(memory_range.valid()); AUDIT(!is_locked()); - // Invalidate if necessary - invalidate_range(); + // Destroy if necessary + destroy(); // Superclass rsx::buffered_section::reset(memory_range); @@ -1083,10 +1090,6 @@ namespace rsx */ inline bool is_destroyed() const { return !exists(); } // this section is currently destroyed - inline bool can_destroy() const { - return !is_destroyed() && is_tracked(); - } // This section may be destroyed - protected: void on_section_resources_created() { @@ -1107,16 +1110,12 @@ namespace rsx triggered_exists_callbacks = false; AUDIT(valid_range()); + ASSERT(!is_locked()); + ASSERT(is_managed()); // Set dirty set_dirty(true); - // Unlock - if (is_locked()) - { - unprotect(); - } - // Trigger callbacks m_block->on_section_resources_destroyed(*derived()); m_storage->on_section_resources_destroyed(*derived()); @@ -1204,14 +1203,9 @@ namespace rsx /** * Misc. */ - bool is_tracked() const - { - return !exists() || (get_context() != framebuffer_storage); - } - bool is_unreleased() const { - return is_tracked() && exists() && is_dirty() && !is_locked(); + return exists() && is_dirty() && !is_locked(); } bool can_be_reused() const @@ -1530,12 +1524,17 @@ namespace rsx /** * Derived wrappers */ - inline void destroy() + void destroy() { derived()->destroy(); } - inline bool exists() const + bool is_managed() const + { + return derived()->is_managed(); + } + + bool exists() const { return derived()->exists(); } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 854db96a1d..a4e8a13376 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -390,7 +390,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]); m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, - color_format.format, color_format.type, color_format.swap_bytes); + std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes); } } @@ -401,7 +401,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, - depth_format_gl.format, depth_format_gl.type, true); + std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true); } } diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index dbdbc25e80..e29f713e1d 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -253,18 +253,12 @@ namespace gl public: using baseclass::cached_texture_section; - void reset(const utils::address_range &memory_range) - { - vram_texture = nullptr; - managed_texture.reset(); - - baseclass::reset(memory_range); - } - void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only, gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes) { - vram_texture = static_cast(image); + auto new_texture = static_cast(image); + ASSERT(!exists() || !is_managed() || vram_texture == new_texture); + vram_texture = new_texture; if (read_only) { @@ -277,6 +271,7 @@ namespace gl init_buffer(); aa_mode = static_cast(image)->read_aa_mode; + ASSERT(managed_texture.get() == nullptr); } flushed = false; @@ -302,6 +297,8 @@ namespace gl void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps) { + ASSERT(!exists() || !is_managed() || vram_texture == image); + //Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle this->width = width; this->height = height; @@ -353,6 +350,8 @@ namespace gl void copy_texture(bool=false) { + ASSERT(exists()); + if (!pbo_id) { init_buffer(); @@ -466,8 +465,10 @@ namespace gl bool flush() { + ASSERT(exists()); + if (flushed) return true; //Already written, ignore - AUDIT( is_locked() ); + AUDIT(is_locked()); bool result = true; if (!synchronized) @@ -493,7 +494,7 @@ namespace gl const auto valid_range = get_confirmed_range_delta(); const u32 valid_offset = valid_range.first; const u32 valid_length = valid_range.second; - AUDIT( valid_length > 0 ); + AUDIT(valid_length > 0); void *dst = get_ptr(get_section_base() + valid_offset); glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); @@ -624,11 +625,16 @@ namespace gl baseclass::on_section_resources_destroyed(); } - inline bool exists() const + bool exists() const { return (vram_texture != nullptr); } + bool is_managed() const + { + return !exists() || managed_texture.get() != nullptr; + } + texture::format get_format() const { return format; @@ -951,7 +957,7 @@ namespace gl if (context != rsx::texture_upload_context::blit_engine_dst) { - AUDIT( cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always ); + AUDIT(cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always); read_only_range = cached.get_min_max(read_only_range, rsx::section_bounds::locked_range); // TODO ruipin: This was outside the if, but is inside the if in Vulkan. Ask kd-11 cached.protect(utils::protection::ro); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 3dc4705474..5778e1cac0 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2921,7 +2921,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, - m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second); + m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple{ *m_current_command_buffer, m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second); } } @@ -2932,7 +2932,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, - m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false); + m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple{ *m_current_command_buffer, m_swapchain->get_graphics_queue() }, gcm_format, false); } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 06ccee4faf..76383c2431 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -28,16 +28,12 @@ namespace vk public: using baseclass::cached_texture_section; - void reset(const utils::address_range &memory_range) - { - if (memory_range.length() > get_section_size()) - release_dma_resources(); - - baseclass::reset(memory_range); - } - void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false) { + auto new_texture = static_cast(image); + ASSERT(!exists() || !is_managed() || vram_texture == new_texture); + vram_texture = new_texture; + width = w; height = h; this->depth = depth; @@ -46,8 +42,6 @@ namespace vk this->gcm_format = gcm_format; this->pack_unpack_swap_bytes = pack_swap_bytes; - vram_texture = static_cast(image); - if (managed) { managed_texture.reset(vram_texture); @@ -85,18 +79,27 @@ namespace vk void destroy() { + if (!exists()) + return; + m_tex_cache->on_section_destroyed(*this); vram_texture = nullptr; + ASSERT(managed_texture.get() == nullptr); release_dma_resources(); baseclass::on_section_resources_destroyed(); } - inline bool exists() const + bool exists() const { return (vram_texture != nullptr); } + bool is_managed() const + { + return !exists() || managed_texture.get() != nullptr; + } + vk::image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap) { return vram_texture->get_view(remap_encoding, remap); @@ -130,6 +133,8 @@ namespace vk void copy_texture(bool manage_cb_lifetime, vk::command_buffer& cmd, VkQueue submit_queue) { + ASSERT(exists()); + if (m_device == nullptr) { m_device = &cmd.get_command_pool().get_owner(); @@ -282,8 +287,10 @@ namespace vk bool flush(vk::command_buffer& cmd, VkQueue submit_queue) { + ASSERT(exists()); + if (flushed) return true; - AUDIT( is_locked() ); + AUDIT(is_locked()); if (m_device == nullptr) { @@ -306,7 +313,7 @@ namespace vk const auto valid_range = get_confirmed_range_delta(); const u32 valid_offset = valid_range.first; const u32 valid_length = valid_range.second; - AUDIT( valid_length > 0 ); + AUDIT(valid_length > 0); void* pixels_src = dma_buffer->map(valid_offset, valid_length); void* pixels_dst = get_ptr(get_section_base() + valid_offset); @@ -398,7 +405,7 @@ namespace vk view = std::move(_view); } - discarded_storage(cached_texture_section& tex) + discarded_storage(vk::cached_texture_section& tex) { combined_image = std::move(tex.get_texture()); block_size = tex.get_section_size(); @@ -415,8 +422,11 @@ namespace vk public: virtual void on_section_destroyed(cached_texture_section& tex) { - m_discarded_memory_size += tex.get_section_size(); - m_discardable_storage.push_back(tex); + if (tex.is_managed()) + { + m_discarded_memory_size += tex.get_section_size(); + m_discardable_storage.push_back(tex); + } } private: @@ -1233,7 +1243,7 @@ namespace vk return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size(); } - const u32 get_texture_memory_in_use() const override + const u64 get_texture_memory_in_use() const override { return m_storage.m_texture_memory_in_use; } diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index d2b162795c..ce15e929c2 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -437,6 +437,7 @@ struct cfg_root : cfg::node cfg::_bool disable_vulkan_mem_allocator{this, "Disable Vulkan Memory Allocator", false}; cfg::_bool full_rgb_range_output{this, "Use full RGB output range", true}; // Video out dynamic range cfg::_bool disable_asynchronous_shader_compiler{this, "Disable Asynchronous Shader Compiler", false}; + cfg::_bool strict_texture_flushing{this, "Strict Texture Flushing", false}; cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames To Draw", 1}; cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames To Skip", 1}; cfg::_int<50, 800> resolution_scale_percent{this, "Resolution Scale", 100};