From fc486a1baca4c406c6e5e6501260654792eac454 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 19 Sep 2018 01:21:57 +0300 Subject: [PATCH] rsx: Preserve memory order when doing flush - Orders flushing to preserve memory at all cost - Avoids false positive where flushing overlapping sections can falsely invalidate another with head/tail test --- rpcs3/Emu/RSX/Common/texture_cache.h | 144 +++++++++++++++------------ rpcs3/Emu/RSX/GL/GLGSRender.cpp | 28 +++++- rpcs3/Emu/RSX/GL/GLTextureCache.h | 9 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 22 +++- rpcs3/Emu/RSX/VK/VKTextureCache.h | 5 +- 5 files changed, 128 insertions(+), 80 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 54c1b09beb..ced3d8d3fb 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "../rsx_cache.h" #include "../rsx_utils.h" @@ -74,12 +74,28 @@ namespace rsx std::deque read_history; u64 cache_tag = 0; + u64 last_write_tag = 0; memory_read_flags readback_behaviour = memory_read_flags::flush_once; rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order; rsx::texture_upload_context context = rsx::texture_upload_context::shader_read; rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d; + void reset(u32 rsx_address, u32 rsx_size) + { + rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative; + rsx::buffered_section::reset(rsx_address, rsx_size, policy); + + flushed = false; + synchronized = false; + + sync_timestamp = 0ull; + last_write_tag = 0ull; + cache_tag = 0ull; + + // TODO: Fix write tracking and reset stats + } + bool matches(u32 rsx_address, u32 rsx_size) { return rsx::buffered_section::matches(rsx_address, rsx_size); @@ -110,9 +126,10 @@ namespace rsx return false; } - void touch() + void touch(u64 tag) { num_writes++; + last_write_tag = tag; } void reset_write_statistics() @@ -645,6 +662,47 @@ namespace rsx return result; } + template + void flush_set(thrashed_set& data, Args&&... extras) + { + if (data.sections_to_flush.size() > 1) + { + // Sort with oldest data first + // Ensures that new data tramples older data + std::sort(data.sections_to_flush.begin(), data.sections_to_flush.end(), [](const auto& a, const auto& b) + { + return (a->last_write_tag < b->last_write_tag); + }); + } + + for (auto &surface : data.sections_to_flush) + { + if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always) + { + // This region is set to always read from itself (unavoidable hard sync) + const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; + if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp()) + { + m_num_cache_mispredictions++; + m_num_cache_misses++; + surface->copy_texture(true, std::forward(extras)...); + } + } + + if (!surface->flush(std::forward(extras)...)) + { + // Missed address, note this + // TODO: Lower severity when successful to keep the cache from overworking + record_cache_miss(*surface); + } + + m_num_flush_requests++; + data.sections_to_unprotect.push_back(surface); + } + + data.sections_to_flush.clear(); + } + void unprotect_set(thrashed_set& data) { auto release_set = [this](std::vector& _set) @@ -844,33 +902,9 @@ namespace rsx result.sections_to_unprotect.push_back(obj.first); } - else if (!allow_flush) - { - result.sections_to_flush.push_back(obj.first); - } else { - if (obj.first->get_memory_read_flags() == rsx::memory_read_flags::flush_always) - { - // This region is set to always read from itself (unavoidable hard sync) - const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; - if (obj.first->is_synchronized() && ROP_timestamp > obj.first->get_sync_timestamp()) - { - m_num_cache_mispredictions++; - m_num_cache_misses++; - obj.first->copy_texture(true, std::forward(extras)...); - } - } - - if (!obj.first->flush(std::forward(extras)...)) - { - //Missed address, note this - //TODO: Lower severity when successful to keep the cache from overworking - record_cache_miss(*obj.first); - } - - m_num_flush_requests++; - result.sections_to_unprotect.push_back(obj.first); + result.sections_to_flush.push_back(obj.first); } continue; @@ -897,19 +931,25 @@ namespace rsx obj.second->remove_one(); } - if (deferred_flush && result.sections_to_flush.size()) + if (!result.sections_to_flush.empty()) { - result.num_flushable = static_cast(result.sections_to_flush.size()); - result.address_base = address; - result.address_range = range; - result.cache_tag = m_cache_update_tag.load(std::memory_order_consume); - return result; - } - else - { - unprotect_set(result); + if (deferred_flush) + { + result.num_flushable = static_cast(result.sections_to_flush.size()); + result.address_base = address; + result.address_range = range; + result.cache_tag = m_cache_update_tag.load(std::memory_order_consume); + return result; + } + else + { + verify(HERE), allow_flush; + flush_set(result, std::forward(extras)...); + } } + unprotect_set(result); + //Everything has been handled result = {}; result.violation_handled = true; @@ -1184,7 +1224,7 @@ namespace rsx region.set_context(texture_upload_context::framebuffer_storage); region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d); region.set_memory_read_flags(memory_read_flags::flush_always); - region.touch(); + region.touch(m_cache_update_tag); m_flush_always_cache[memory_address] = memory_size; @@ -1376,30 +1416,7 @@ namespace rsx if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag) { //1. Write memory to cpu side - for (auto &tex : data.sections_to_flush) - { - if (tex->is_locked()) - { - if (tex->get_memory_read_flags() == rsx::memory_read_flags::flush_always) - { - // This region is set to always read from itself (unavoidable hard sync) - const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; - if (tex->is_synchronized() && ROP_timestamp > tex->get_sync_timestamp()) - { - m_num_cache_mispredictions++; - m_num_cache_misses++; - tex->copy_texture(true, std::forward(extras)...); - } - } - - if (!tex->flush(std::forward(extras)...)) - { - record_cache_miss(*tex); - } - - m_num_flush_requests++; - } - } + flush_set(data, std::forward(extras)...); //2. Release all obsolete sections unprotect_set(data); @@ -2521,7 +2538,8 @@ namespace rsx verify(HERE), (mem_base + mem_length) <= cached_dest->get_section_size(); cached_dest->reprotect(utils::protection::no, { mem_base, mem_length }); - cached_dest->touch(); + cached_dest->touch(m_cache_update_tag); + update_cache_tag(); } else { diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 578fa7ce09..5e188d062e 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "Emu/Memory/vm.h" #include "Emu/System.h" #include "GLGSRender.h" @@ -1473,10 +1473,27 @@ void GLGSRender::flip(int buffer) if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) { - buffer_width = render_target_texture->width(); - buffer_height = render_target_texture->height(); + if (render_target_texture->last_use_tag == m_rtts.write_tag) + { + image = render_target_texture->raw_handle(); + } + else + { + const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4); + verify(HERE), !overlap_info.empty(); - image = render_target_texture->raw_handle(); + if (overlap_info.back().surface == render_target_texture) + { + // Confirmed to be the newest data source in that range + image = render_target_texture->raw_handle(); + } + } + + if (image) + { + buffer_width = render_target_texture->width(); + buffer_height = render_target_texture->height(); + } } else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address, buffer_width, buffer_height)) { @@ -1484,7 +1501,8 @@ void GLGSRender::flip(int buffer) //The render might have been done offscreen or in software and a blit used to display image = surface->get_raw_texture()->id(); } - else + + if (!image) { LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index a34775466e..df5dff0f72 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "stdafx.h" @@ -252,12 +252,7 @@ namespace gl void reset(u32 base, u32 size, bool /*flushable*/=false) { - rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative; - rsx::buffered_section::reset(base, size, policy); - - flushed = false; - synchronized = false; - sync_timestamp = 0ull; + rsx::cached_texture_section::reset(base, size); vram_texture = nullptr; managed_texture.reset(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 3c972a7883..869aa770a0 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2922,6 +2922,9 @@ void VKGSRender::flip(int buffer) u32 buffer_width = display_buffers[buffer].width; u32 buffer_height = display_buffers[buffer].height; + u32 buffer_pitch = display_buffers[buffer].pitch; + + if (!buffer_pitch) buffer_pitch = buffer_width * 4; // TODO: Check avconf coordi aspect_ratio; @@ -3005,7 +3008,21 @@ void VKGSRender::flip(int buffer) if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) { - image_to_flip = render_target_texture; + if (render_target_texture->last_use_tag == m_rtts.write_tag) + { + image_to_flip = render_target_texture; + } + else + { + const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4); + verify(HERE), !overlap_info.empty(); + + if (overlap_info.back().surface == render_target_texture) + { + // Confirmed to be the newest data source in that range + image_to_flip = render_target_texture; + } + } } else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address, buffer_width, buffer_height)) { @@ -3013,7 +3030,8 @@ void VKGSRender::flip(int buffer) //The render might have been done offscreen or in software and a blit used to display image_to_flip = surface->get_raw_texture(); } - else + + if (!image_to_flip) { //Read from cell image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 27bc84fd5e..382db74bcf 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "stdafx.h" #include "VKRenderTargets.h" #include "VKGSRender.h" @@ -32,8 +32,7 @@ namespace vk if (length > cpu_address_range) release_dma_resources(); - rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative; - rsx::buffered_section::reset(base, length, policy); + rsx::cached_texture_section::reset(base, length); } void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false)