mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-12-28 09:23:34 +00:00
rsx: improve memory coherency
- Avoid tagging and rely on read/write barriers and the dirty flag mechanism. Testing is done with a weak 8-byte memory test - Introducing new data when tagging breaks applications with race conditions where tags can overwrite flushed data
This commit is contained in:
parent
89c9c54743
commit
52ac0a901a
@ -91,11 +91,11 @@ namespace rsx
|
||||
template <typename image_storage_type>
|
||||
struct render_target_descriptor
|
||||
{
|
||||
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
|
||||
u32 tag_address = 0;
|
||||
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
|
||||
u32 memory_tag_address = 0u; // memory address of the start of the ROP block
|
||||
u64 memory_tag_sample = 0ull; // memory sample taken at the memory_tag_address for change testing
|
||||
|
||||
bool dirty = false;
|
||||
bool needs_tagging = false;
|
||||
image_storage_type old_contents = nullptr;
|
||||
rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample;
|
||||
|
||||
@ -119,31 +119,26 @@ namespace rsx
|
||||
write_aa_mode = read_aa_mode = rsx::surface_antialiasing::center_1_sample;
|
||||
}
|
||||
|
||||
void tag()
|
||||
bool test() const
|
||||
{
|
||||
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address);
|
||||
*ptr = tag_address;
|
||||
|
||||
needs_tagging = false;
|
||||
}
|
||||
|
||||
bool test()
|
||||
{
|
||||
if (needs_tagging && dirty)
|
||||
if (dirty)
|
||||
{
|
||||
// TODO
|
||||
// Should RCB or mem-sync (inherit previous mem) to init memory
|
||||
LOG_TODO(RSX, "Resource used before memory initialization");
|
||||
}
|
||||
|
||||
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address);
|
||||
return (*ptr == tag_address);
|
||||
return (memory_tag_sample == *vm::get_super_ptr<u64>(memory_tag_address));
|
||||
}
|
||||
|
||||
void queue_tag(u32 address)
|
||||
{
|
||||
tag_address = address;
|
||||
needs_tagging = true;
|
||||
memory_tag_address = address;
|
||||
}
|
||||
|
||||
void sync_tag()
|
||||
{
|
||||
memory_tag_sample = *vm::get_super_ptr<u64>(memory_tag_address);
|
||||
}
|
||||
|
||||
void on_write(u64 write_tag = 0)
|
||||
@ -154,10 +149,8 @@ namespace rsx
|
||||
last_use_tag = write_tag;
|
||||
}
|
||||
|
||||
if (needs_tagging)
|
||||
{
|
||||
tag();
|
||||
}
|
||||
// Tag unconditionally without introducing new data
|
||||
sync_tag();
|
||||
|
||||
read_aa_mode = write_aa_mode;
|
||||
dirty = false;
|
||||
|
@ -372,18 +372,6 @@ namespace rsx
|
||||
return true;
|
||||
}
|
||||
|
||||
void tag_framebuffer(u32 texaddr)
|
||||
{
|
||||
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
|
||||
*ptr = texaddr;
|
||||
}
|
||||
|
||||
bool test_framebuffer(u32 texaddr)
|
||||
{
|
||||
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
|
||||
return *ptr == texaddr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Section invalidation
|
||||
*/
|
||||
@ -1157,7 +1145,6 @@ namespace rsx
|
||||
|
||||
region.create(width, height, 1, 1, image, pitch, false, std::forward<Args>(extras)...);
|
||||
region.reprotect(utils::protection::no, { 0, rsx_range.length() });
|
||||
tag_framebuffer(region.get_section_base());
|
||||
|
||||
region.set_dirty(false);
|
||||
region.touch(m_cache_update_tag);
|
||||
@ -1703,8 +1690,8 @@ namespace rsx
|
||||
// TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block
|
||||
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
|
||||
{
|
||||
const bool is_active = m_rtts.address_is_bound(texaddr, false);
|
||||
if (texptr->test() || is_active)
|
||||
if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
|
||||
is_active || texptr->test())
|
||||
{
|
||||
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
|
||||
tex_width, tex_height, depth, tex_pitch, extended_dimension, false, is_active,
|
||||
@ -1719,8 +1706,8 @@ namespace rsx
|
||||
|
||||
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
|
||||
{
|
||||
const bool is_active = m_rtts.address_is_bound(texaddr, true);
|
||||
if (texptr->test() || is_active)
|
||||
if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
|
||||
is_active || texptr->test())
|
||||
{
|
||||
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
|
||||
tex_width, tex_height, depth, tex_pitch, extended_dimension, true, is_active,
|
||||
@ -1949,6 +1936,8 @@ namespace rsx
|
||||
|
||||
if (src_is_render_target)
|
||||
{
|
||||
src_subres.surface->read_barrier(cmd);
|
||||
|
||||
const auto surf = src_subres.surface;
|
||||
auto src_bpp = surf->get_native_pitch() / surf->get_surface_width();
|
||||
auto expected_bpp = src_is_argb8 ? 4 : 2;
|
||||
@ -1972,6 +1961,9 @@ namespace rsx
|
||||
|
||||
if (dst_is_render_target)
|
||||
{
|
||||
// Full barrier is required in case of partial transfers
|
||||
dst_subres.surface->read_barrier(cmd);
|
||||
|
||||
auto dst_bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width();
|
||||
auto expected_bpp = dst_is_argb8 ? 4 : 2;
|
||||
if (dst_bpp != expected_bpp)
|
||||
@ -2411,7 +2403,6 @@ namespace rsx
|
||||
AUDIT(section.get_memory_read_flags() == memory_read_flags::flush_always);
|
||||
|
||||
section.reprotect(utils::protection::no);
|
||||
tag_framebuffer(section.get_section_base());
|
||||
update_tag = true;
|
||||
}
|
||||
}
|
||||
|
@ -309,6 +309,7 @@ void GLGSRender::end()
|
||||
// Program is not ready, skip drawing this
|
||||
std::this_thread::yield();
|
||||
execute_nop_draw();
|
||||
m_rtts.on_write();
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
@ -261,6 +261,12 @@ namespace gl
|
||||
baseclass::on_speculative_flush();
|
||||
}
|
||||
|
||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
auto as_rtt = static_cast<gl::render_target*>(vram_texture);
|
||||
if (as_rtt->dirty) as_rtt->read_barrier(cmd);
|
||||
}
|
||||
|
||||
if (!pbo_id)
|
||||
{
|
||||
init_buffer();
|
||||
@ -403,7 +409,6 @@ namespace gl
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
|
||||
// Shuffle
|
||||
bool require_manual_shuffle = false;
|
||||
if (pack_unpack_swap_bytes)
|
||||
@ -424,7 +429,6 @@ namespace gl
|
||||
}
|
||||
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
|
||||
{
|
||||
|
||||
//AMD driver bug - cannot use pack_swap_bytes
|
||||
//Manually byteswap texel data
|
||||
switch (type)
|
||||
@ -474,6 +478,12 @@ namespace gl
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
// Update memory tag
|
||||
static_cast<gl::render_target*>(vram_texture)->sync_tag();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1504,6 +1504,7 @@ void VKGSRender::end()
|
||||
// Program is not ready, skip drawing this
|
||||
std::this_thread::yield();
|
||||
execute_nop_draw();
|
||||
m_rtts.on_write();
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
@ -180,6 +180,12 @@ namespace vk
|
||||
cmd.begin();
|
||||
}
|
||||
|
||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
auto as_rtt = static_cast<vk::render_target*>(vram_texture);
|
||||
if (as_rtt->dirty) as_rtt->read_barrier(cmd);
|
||||
}
|
||||
|
||||
vk::image *target = vram_texture;
|
||||
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * vram_texture->width();
|
||||
|
||||
@ -333,6 +339,12 @@ namespace vk
|
||||
void finish_flush()
|
||||
{
|
||||
dma_buffer->unmap();
|
||||
|
||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
// Update memory tag
|
||||
static_cast<vk::render_target*>(vram_texture)->sync_tag();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user