rsx: Preserve memory order when doing flush

- Orders flushing to preserve memory at all cost
- Avoids false positive where flushing overlapping sections can falsely invalidate another with head/tail test
This commit is contained in:
kd-11 2018-09-19 01:21:57 +03:00 committed by kd-11
parent 23dc9d54e3
commit fc486a1bac
5 changed files with 128 additions and 80 deletions

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "../rsx_cache.h"
#include "../rsx_utils.h"
@ -74,12 +74,28 @@ namespace rsx
std::deque<u32> read_history;
u64 cache_tag = 0;
u64 last_write_tag = 0;
memory_read_flags readback_behaviour = memory_read_flags::flush_once;
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
rsx::texture_upload_context context = rsx::texture_upload_context::shader_read;
rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d;
void reset(u32 rsx_address, u32 rsx_size)
{
rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative;
rsx::buffered_section::reset(rsx_address, rsx_size, policy);
flushed = false;
synchronized = false;
sync_timestamp = 0ull;
last_write_tag = 0ull;
cache_tag = 0ull;
// TODO: Fix write tracking and reset stats
}
bool matches(u32 rsx_address, u32 rsx_size)
{
return rsx::buffered_section::matches(rsx_address, rsx_size);
@ -110,9 +126,10 @@ namespace rsx
return false;
}
void touch()
void touch(u64 tag)
{
num_writes++;
last_write_tag = tag;
}
void reset_write_statistics()
@ -645,6 +662,47 @@ namespace rsx
return result;
}
template <typename ...Args>
void flush_set(thrashed_set& data, Args&&... extras)
{
if (data.sections_to_flush.size() > 1)
{
// Sort with oldest data first
// Ensures that new data tramples older data
std::sort(data.sections_to_flush.begin(), data.sections_to_flush.end(), [](const auto& a, const auto& b)
{
return (a->last_write_tag < b->last_write_tag);
});
}
for (auto &surface : data.sections_to_flush)
{
if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
{
// This region is set to always read from itself (unavoidable hard sync)
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp())
{
m_num_cache_mispredictions++;
m_num_cache_misses++;
surface->copy_texture(true, std::forward<Args>(extras)...);
}
}
if (!surface->flush(std::forward<Args>(extras)...))
{
// Missed address, note this
// TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(*surface);
}
m_num_flush_requests++;
data.sections_to_unprotect.push_back(surface);
}
data.sections_to_flush.clear();
}
void unprotect_set(thrashed_set& data)
{
auto release_set = [this](std::vector<section_storage_type*>& _set)
@ -844,33 +902,9 @@ namespace rsx
result.sections_to_unprotect.push_back(obj.first);
}
else if (!allow_flush)
{
result.sections_to_flush.push_back(obj.first);
}
else
{
if (obj.first->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
{
// This region is set to always read from itself (unavoidable hard sync)
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (obj.first->is_synchronized() && ROP_timestamp > obj.first->get_sync_timestamp())
{
m_num_cache_mispredictions++;
m_num_cache_misses++;
obj.first->copy_texture(true, std::forward<Args>(extras)...);
}
}
if (!obj.first->flush(std::forward<Args>(extras)...))
{
//Missed address, note this
//TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(*obj.first);
}
m_num_flush_requests++;
result.sections_to_unprotect.push_back(obj.first);
result.sections_to_flush.push_back(obj.first);
}
continue;
@ -897,19 +931,25 @@ namespace rsx
obj.second->remove_one();
}
if (deferred_flush && result.sections_to_flush.size())
if (!result.sections_to_flush.empty())
{
result.num_flushable = static_cast<int>(result.sections_to_flush.size());
result.address_base = address;
result.address_range = range;
result.cache_tag = m_cache_update_tag.load(std::memory_order_consume);
return result;
}
else
{
unprotect_set(result);
if (deferred_flush)
{
result.num_flushable = static_cast<int>(result.sections_to_flush.size());
result.address_base = address;
result.address_range = range;
result.cache_tag = m_cache_update_tag.load(std::memory_order_consume);
return result;
}
else
{
verify(HERE), allow_flush;
flush_set(result, std::forward<Args>(extras)...);
}
}
unprotect_set(result);
//Everything has been handled
result = {};
result.violation_handled = true;
@ -1184,7 +1224,7 @@ namespace rsx
region.set_context(texture_upload_context::framebuffer_storage);
region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d);
region.set_memory_read_flags(memory_read_flags::flush_always);
region.touch();
region.touch(m_cache_update_tag);
m_flush_always_cache[memory_address] = memory_size;
@ -1376,30 +1416,7 @@ namespace rsx
if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag)
{
//1. Write memory to cpu side
for (auto &tex : data.sections_to_flush)
{
if (tex->is_locked())
{
if (tex->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
{
// This region is set to always read from itself (unavoidable hard sync)
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (tex->is_synchronized() && ROP_timestamp > tex->get_sync_timestamp())
{
m_num_cache_mispredictions++;
m_num_cache_misses++;
tex->copy_texture(true, std::forward<Args>(extras)...);
}
}
if (!tex->flush(std::forward<Args>(extras)...))
{
record_cache_miss(*tex);
}
m_num_flush_requests++;
}
}
flush_set(data, std::forward<Args>(extras)...);
//2. Release all obsolete sections
unprotect_set(data);
@ -2521,7 +2538,8 @@ namespace rsx
verify(HERE), (mem_base + mem_length) <= cached_dest->get_section_size();
cached_dest->reprotect(utils::protection::no, { mem_base, mem_length });
cached_dest->touch();
cached_dest->touch(m_cache_update_tag);
update_cache_tag();
}
else
{

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "Emu/Memory/vm.h"
#include "Emu/System.h"
#include "GLGSRender.h"
@ -1473,10 +1473,27 @@ void GLGSRender::flip(int buffer)
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image = render_target_texture->raw_handle();
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
verify(HERE), !overlap_info.empty();
image = render_target_texture->raw_handle();
if (overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image = render_target_texture->raw_handle();
}
}
if (image)
{
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
}
}
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address, buffer_width, buffer_height))
{
@ -1484,7 +1501,8 @@ void GLGSRender::flip(int buffer)
//The render might have been done offscreen or in software and a blit used to display
image = surface->get_raw_texture()->id();
}
else
if (!image)
{
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "stdafx.h"
@ -252,12 +252,7 @@ namespace gl
void reset(u32 base, u32 size, bool /*flushable*/=false)
{
rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative;
rsx::buffered_section::reset(base, size, policy);
flushed = false;
synchronized = false;
sync_timestamp = 0ull;
rsx::cached_texture_section::reset(base, size);
vram_texture = nullptr;
managed_texture.reset();

View File

@ -2922,6 +2922,9 @@ void VKGSRender::flip(int buffer)
u32 buffer_width = display_buffers[buffer].width;
u32 buffer_height = display_buffers[buffer].height;
u32 buffer_pitch = display_buffers[buffer].pitch;
if (!buffer_pitch) buffer_pitch = buffer_width * 4; // TODO: Check avconf
coordi aspect_ratio;
@ -3005,7 +3008,21 @@ void VKGSRender::flip(int buffer)
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{
image_to_flip = render_target_texture;
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image_to_flip = render_target_texture;
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
verify(HERE), !overlap_info.empty();
if (overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image_to_flip = render_target_texture;
}
}
}
else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address, buffer_width, buffer_height))
{
@ -3013,7 +3030,8 @@ void VKGSRender::flip(int buffer)
//The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
}
else
if (!image_to_flip)
{
//Read from cell
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "stdafx.h"
#include "VKRenderTargets.h"
#include "VKGSRender.h"
@ -32,8 +32,7 @@ namespace vk
if (length > cpu_address_range)
release_dma_resources();
rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative;
rsx::buffered_section::reset(base, length, policy);
rsx::cached_texture_section::reset(base, length);
}
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false)