rsx: improve memory coherency

- Avoid tagging and rely on read/write barriers and the dirty flag mechanism. Testing is done with a weak 8-byte memory test
- Introducing new data when tagging breaks applications with race conditions where tags can overwrite flushed data
This commit is contained in:
kd-11 2019-01-05 13:12:36 +03:00 committed by kd-11
parent 89c9c54743
commit 52ac0a901a
6 changed files with 49 additions and 41 deletions

View File

@ -91,11 +91,11 @@ namespace rsx
template <typename image_storage_type>
struct render_target_descriptor
{
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
u32 tag_address = 0;
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
u32 memory_tag_address = 0u; // memory address of the start of the ROP block
u64 memory_tag_sample = 0ull; // memory sample taken at the memory_tag_address for change testing
bool dirty = false;
bool needs_tagging = false;
image_storage_type old_contents = nullptr;
rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample;
@ -119,31 +119,26 @@ namespace rsx
write_aa_mode = read_aa_mode = rsx::surface_antialiasing::center_1_sample;
}
void tag()
bool test() const
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address);
*ptr = tag_address;
needs_tagging = false;
}
bool test()
{
if (needs_tagging && dirty)
if (dirty)
{
// TODO
// Should RCB or mem-sync (inherit previous mem) to init memory
LOG_TODO(RSX, "Resource used before memory initialization");
}
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address);
return (*ptr == tag_address);
return (memory_tag_sample == *vm::get_super_ptr<u64>(memory_tag_address));
}
void queue_tag(u32 address)
{
tag_address = address;
needs_tagging = true;
memory_tag_address = address;
}
void sync_tag()
{
memory_tag_sample = *vm::get_super_ptr<u64>(memory_tag_address);
}
void on_write(u64 write_tag = 0)
@ -154,10 +149,8 @@ namespace rsx
last_use_tag = write_tag;
}
if (needs_tagging)
{
tag();
}
// Tag unconditionally without introducing new data
sync_tag();
read_aa_mode = write_aa_mode;
dirty = false;

View File

@ -372,18 +372,6 @@ namespace rsx
return true;
}
void tag_framebuffer(u32 texaddr)
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
*ptr = texaddr;
}
bool test_framebuffer(u32 texaddr)
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
return *ptr == texaddr;
}
/**
* Section invalidation
*/
@ -1157,7 +1145,6 @@ namespace rsx
region.create(width, height, 1, 1, image, pitch, false, std::forward<Args>(extras)...);
region.reprotect(utils::protection::no, { 0, rsx_range.length() });
tag_framebuffer(region.get_section_base());
region.set_dirty(false);
region.touch(m_cache_update_tag);
@ -1703,8 +1690,8 @@ namespace rsx
// TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{
const bool is_active = m_rtts.address_is_bound(texaddr, false);
if (texptr->test() || is_active)
if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
is_active || texptr->test())
{
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
tex_width, tex_height, depth, tex_pitch, extended_dimension, false, is_active,
@ -1719,8 +1706,8 @@ namespace rsx
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
{
const bool is_active = m_rtts.address_is_bound(texaddr, true);
if (texptr->test() || is_active)
if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
is_active || texptr->test())
{
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
tex_width, tex_height, depth, tex_pitch, extended_dimension, true, is_active,
@ -1949,6 +1936,8 @@ namespace rsx
if (src_is_render_target)
{
src_subres.surface->read_barrier(cmd);
const auto surf = src_subres.surface;
auto src_bpp = surf->get_native_pitch() / surf->get_surface_width();
auto expected_bpp = src_is_argb8 ? 4 : 2;
@ -1972,6 +1961,9 @@ namespace rsx
if (dst_is_render_target)
{
// Full barrier is required in case of partial transfers
dst_subres.surface->read_barrier(cmd);
auto dst_bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width();
auto expected_bpp = dst_is_argb8 ? 4 : 2;
if (dst_bpp != expected_bpp)
@ -2411,7 +2403,6 @@ namespace rsx
AUDIT(section.get_memory_read_flags() == memory_read_flags::flush_always);
section.reprotect(utils::protection::no);
tag_framebuffer(section.get_section_base());
update_tag = true;
}
}

View File

@ -309,6 +309,7 @@ void GLGSRender::end()
// Program is not ready, skip drawing this
std::this_thread::yield();
execute_nop_draw();
m_rtts.on_write();
rsx::thread::end();
return;
}

View File

@ -261,6 +261,12 @@ namespace gl
baseclass::on_speculative_flush();
}
if (context == rsx::texture_upload_context::framebuffer_storage)
{
auto as_rtt = static_cast<gl::render_target*>(vram_texture);
if (as_rtt->dirty) as_rtt->read_barrier(cmd);
}
if (!pbo_id)
{
init_buffer();
@ -403,7 +409,6 @@ namespace gl
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
// Shuffle
bool require_manual_shuffle = false;
if (pack_unpack_swap_bytes)
@ -424,7 +429,6 @@ namespace gl
}
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
{
//AMD driver bug - cannot use pack_swap_bytes
//Manually byteswap texel data
switch (type)
@ -474,6 +478,12 @@ namespace gl
}
}
}
if (context == rsx::texture_upload_context::framebuffer_storage)
{
// Update memory tag
static_cast<gl::render_target*>(vram_texture)->sync_tag();
}
}
/**

View File

@ -1504,6 +1504,7 @@ void VKGSRender::end()
// Program is not ready, skip drawing this
std::this_thread::yield();
execute_nop_draw();
m_rtts.on_write();
rsx::thread::end();
return;
}

View File

@ -180,6 +180,12 @@ namespace vk
cmd.begin();
}
if (context == rsx::texture_upload_context::framebuffer_storage)
{
auto as_rtt = static_cast<vk::render_target*>(vram_texture);
if (as_rtt->dirty) as_rtt->read_barrier(cmd);
}
vk::image *target = vram_texture;
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * vram_texture->width();
@ -333,6 +339,12 @@ namespace vk
void finish_flush()
{
dma_buffer->unmap();
if (context == rsx::texture_upload_context::framebuffer_storage)
{
// Update memory tag
static_cast<vk::render_target*>(vram_texture)->sync_tag();
}
}