rsx: Avoid unnecessarily touching framebuffer memory

- Do not bind companion framebuffer when clearing single aspect; let the
  contest mechanism sort it out instead
- Do not prematurely tag framebuffers, instead only do so at
  write-confirmation time. Should avoid false tagging if setup does not
  allow a render to occur.
This commit is contained in:
kd-11 2018-12-12 11:58:44 +03:00 committed by kd-11
parent a13986ec5c
commit 15488eb247
10 changed files with 182 additions and 113 deletions

View File

@ -1,6 +1,7 @@
#pragma once
#pragma once
#include "Utilities/GSL.h"
#include "Emu/Memory/vm.h"
#include "../GCM.h"
#include <list>
@ -91,8 +92,10 @@ namespace rsx
struct render_target_descriptor
{
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
u32 tag_address = 0;
bool dirty = false;
bool needs_tagging = false;
image_storage_type old_contents = nullptr;
rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample;
@ -116,6 +119,33 @@ namespace rsx
write_aa_mode = read_aa_mode = rsx::surface_antialiasing::center_1_sample;
}
void tag()
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address);
*ptr = tag_address;
needs_tagging = false;
}
bool test()
{
if (needs_tagging && dirty)
{
// TODO
LOG_ERROR(RSX, "Resource used before memory initialization");
return false;
}
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address);
return (*ptr == tag_address);
}
void queue_tag(u32 address)
{
tag_address = address;
needs_tagging = true;
}
void on_write(u64 write_tag = 0)
{
if (write_tag)
@ -124,6 +154,11 @@ namespace rsx
last_use_tag = write_tag;
}
if (needs_tagging)
{
tag();
}
read_aa_mode = write_aa_mode;
dirty = false;
old_contents = nullptr;
@ -353,7 +388,7 @@ namespace rsx
invalidated_resources.erase(It);
new_surface = Traits::get(new_surface_storage);
Traits::invalidate_surface_contents(command_list, new_surface, contents_to_copy);
Traits::invalidate_surface_contents(address, command_list, new_surface, contents_to_copy);
Traits::prepare_rtt_for_drawing(command_list, new_surface);
break;
}
@ -438,7 +473,7 @@ namespace rsx
new_surface = Traits::get(new_surface_storage);
Traits::prepare_ds_for_drawing(command_list, new_surface);
Traits::invalidate_surface_contents(command_list, new_surface, contents_to_copy);
Traits::invalidate_surface_contents(address, command_list, new_surface, contents_to_copy);
break;
}
}
@ -1080,6 +1115,7 @@ namespace rsx
for (auto &entry : e.overlapping_set)
{
// GPU-side contents changed
entry._ref->dirty = true;
}
}

View File

@ -372,7 +372,17 @@ namespace rsx
return true;
}
void tag_framebuffer(u32 texaddr)
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
*ptr = texaddr;
}
bool test_framebuffer(u32 texaddr)
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
return *ptr == texaddr;
}
/**
* Section invalidation
@ -1712,7 +1722,7 @@ namespace rsx
//TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{
if (test_framebuffer(texaddr))
if (texptr->test())
{
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
tex_width, tex_height, depth, tex_pitch, extended_dimension, false, tex.remap(),
@ -1727,7 +1737,7 @@ namespace rsx
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
{
if (test_framebuffer(texaddr))
if (texptr->test())
{
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
tex_width, tex_height, depth, tex_pitch, extended_dimension, true, tex.remap(),
@ -1755,7 +1765,7 @@ namespace rsx
const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex_width, tex_height, tex_pitch);
if (rsc.surface)
{
if (!test_framebuffer(rsc.base_address))
if (!rsc.surface->test())
{
m_rtts.invalidate_surface_address(rsc.base_address, rsc.is_depth_surface);
invalidate_address(rsc.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
@ -1936,14 +1946,14 @@ namespace rsx
src_is_render_target = false;
}
if (src_is_render_target && !test_framebuffer(src_subres.base_address))
if (src_is_render_target && !src_subres.surface->test())
{
m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface);
invalidate_address(src_subres.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
src_is_render_target = false;
}
if (dst_is_render_target && !test_framebuffer(dst_subres.base_address))
if (dst_is_render_target && !dst_subres.surface->test())
{
m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface);
invalidate_address(dst_subres.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
@ -2451,18 +2461,6 @@ namespace rsx
return read_only_tex_invalidate;
}
void tag_framebuffer(u32 texaddr)
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
*ptr = texaddr;
}
bool test_framebuffer(u32 texaddr)
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
return *ptr == texaddr;
}
/**
* Per-frame statistics

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include <utility>
#include <d3d12.h>
@ -130,6 +130,7 @@ struct render_target_traits
static
void invalidate_surface_contents(
u32,
ID3D12GraphicsCommandList*,
ID3D12Resource*, ID3D12Resource*)
{}

View File

@ -173,14 +173,10 @@ namespace
void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading)
{
if (m_framebuffer_state_contested && (m_framebuffer_contest_type != context))
{
// Clear commands affect contested memory
m_rtts_dirty = true;
}
if (m_draw_fbo && !m_rtts_dirty)
if (m_current_framebuffer_context == context && !m_rtts_dirty && m_draw_fbo)
{
// Fast path
// Framebuffer usage has not changed, framebuffer exists and config regs have not changed
set_scissor();
return;
}
@ -257,7 +253,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
rtt->tile = find_tile(color_offsets[i], color_locations[i]);
rtt->write_aa_mode = layout.aa_mode;
m_gl_texture_cache.notify_surface_changed(m_surface_info[i].address);
m_gl_texture_cache.tag_framebuffer(m_surface_info[i].address);
}
else
{
@ -286,7 +281,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
ds->write_aa_mode = layout.aa_mode;
m_gl_texture_cache.notify_surface_changed(layout.zeta_address);
m_gl_texture_cache.tag_framebuffer(layout.zeta_address);
}
else
{

View File

@ -147,7 +147,7 @@ struct gl_render_target_traits
static
std::unique_ptr<gl::render_target> create_new_surface(
u32 /*address*/,
u32 address,
rsx::surface_color_format surface_color_format,
size_t width,
size_t height,
@ -165,6 +165,7 @@ struct gl_render_target_traits
result->set_native_component_layout(native_layout);
result->old_contents = old_surface;
result->queue_tag(address);
result->set_cleared(false);
result->update_surface();
return result;
@ -172,7 +173,7 @@ struct gl_render_target_traits
static
std::unique_ptr<gl::render_target> create_new_surface(
u32 /*address*/,
u32 address,
rsx::surface_depth_format surface_depth_format,
size_t width,
size_t height,
@ -192,6 +193,7 @@ struct gl_render_target_traits
result->set_native_component_layout(native_layout);
result->old_contents = old_surface;
result->queue_tag(address);
result->set_cleared(false);
result->update_surface();
return result;
@ -214,11 +216,12 @@ struct gl_render_target_traits
static void prepare_ds_for_sampling(void *, gl::render_target*) {}
static
void invalidate_surface_contents(void *, gl::render_target *surface, gl::render_target* old_surface)
void invalidate_surface_contents(u32 address, void *, gl::render_target *surface, gl::render_target* old_surface)
{
surface->set_cleared(false);
surface->old_contents = old_surface;
surface->reset_aa_mode();
surface->queue_tag(address);
surface->set_cleared(false);
}
static

View File

@ -1124,7 +1124,7 @@ namespace rsx
framebuffer_status_valid = false;
m_framebuffer_state_contested = false;
m_framebuffer_contest_type = context;
m_current_framebuffer_context = context;
if (layout.width == 0 || layout.height == 0)
{
@ -1159,106 +1159,146 @@ namespace rsx
// Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries
const auto required_zeta_pitch = std::max<u32>((u32)(layout.depth_format == rsx::surface_depth_format::z16 ? layout.width * 2 : layout.width * 4) * aa_factor_u, 64u);
const auto required_color_pitch = std::max<u32>((u32)rsx::utility::get_packed_pitch(layout.color_format, layout.width) * aa_factor_u, 64u);
const bool color_write_enabled = (context & rsx::framebuffer_creation_context::context_clear_color) ? true : rsx::method_registers.color_write_enabled();
const bool depth_write_enabled = (context & rsx::framebuffer_creation_context::context_clear_depth) ? true : rsx::method_registers.depth_write_enabled();
const auto lg2w = rsx::method_registers.surface_log2_width();
const auto lg2h = rsx::method_registers.surface_log2_height();
const auto clipw_log2 = (u32)floor(log2(layout.width));
const auto cliph_log2 = (u32)floor(log2(layout.height));
const bool color_write_enabled = rsx::method_registers.color_write_enabled();
const bool depth_write_enabled = rsx::method_registers.depth_write_enabled();
const bool stencil_test_enabled = layout.depth_format == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled();
const bool depth_test_enabled = rsx::method_registers.depth_test_enabled();
const bool ignore_depth = (context == rsx::framebuffer_creation_context::context_clear_color);
const bool ignore_color = (context == rsx::framebuffer_creation_context::context_clear_depth);
if (layout.zeta_address)
bool depth_buffer_unused = false, color_buffer_unused = false;
switch (context)
{
if (!depth_test_enabled &&
!stencil_test_enabled &&
layout.target != rsx::surface_target::none)
case rsx::framebuffer_creation_context::context_clear_all:
break;
case rsx::framebuffer_creation_context::context_clear_depth:
color_buffer_unused = true;
break;
case rsx::framebuffer_creation_context::context_clear_color:
depth_buffer_unused = true;
break;
case rsx::framebuffer_creation_context::context_draw:
// NOTE: As with all other hw, depth/stencil writes involve the corresponding depth/stencil test, i.e No test = No write
color_buffer_unused = !color_write_enabled || layout.target == rsx::surface_target::none;
depth_buffer_unused = !depth_test_enabled && !stencil_test_enabled;
m_framebuffer_state_contested = color_buffer_unused || depth_buffer_unused;
break;
default:
fmt::throw_exception("Unknown framebuffer context 0x%x" HERE, (u32)context);
}
auto check_swizzled_render = [&]()
{
// Packed rasterization with optimal memory layout
// Pitch has to be packed for all active render targets, i.e 64
// Formats also seemingly need matching depth and color pitch if both are active
if (color_buffer_unused)
{
// Disable depth buffer if depth testing is not enabled, unless a clear command is targeting the depth buffer
const bool is_depth_clear = !!(context & rsx::framebuffer_creation_context::context_clear_depth);
if (!is_depth_clear)
{
layout.zeta_address = 0;
m_framebuffer_state_contested = true;
}
// Check only depth
return (layout.zeta_pitch == 64);
}
if (layout.zeta_address && layout.zeta_pitch < required_zeta_pitch)
else if (depth_buffer_unused)
{
if (lg2w < clipw_log2 || lg2h < cliph_log2)
// Check only color
for (const auto& index : rsx::utility::get_rtt_indexes(layout.target))
{
// Cannot fit
layout.zeta_address = 0;
if (lg2w > 0 || lg2h > 0)
if (layout.color_pitch[index] != 64)
{
// Something was actually declared for the swizzle context dimensions
LOG_WARNING(RSX, "Invalid swizzled context depth surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height);
return false;
}
}
else
{
LOG_TRACE(RSX, "Swizzled context depth surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height);
}
return true;
}
if (layout.zeta_address)
if (required_color_pitch != required_zeta_pitch)
{
// Still exists? Unlikely to get discarded
layout.actual_zeta_pitch = std::max(layout.zeta_pitch, required_zeta_pitch);
// Both depth and color exist, but pixel size differs
return false;
}
else
{
// Qualifies, but only if all the pitch values are disabled (64)
// Both depth and color are assumed to exist in this case, unless proven otherwise
if (layout.zeta_pitch != 64)
{
return false;
}
for (const auto& index : rsx::utility::get_rtt_indexes(layout.target))
{
if (layout.color_pitch[index] != 64)
{
return false;
}
}
return true;
}
};
// Swizzled render does tight packing of bytes
const bool packed_render = check_swizzled_render();
if (depth_buffer_unused)
{
layout.zeta_address = 0;
}
else if (layout.zeta_pitch < required_zeta_pitch && !packed_render)
{
layout.zeta_address = 0;
}
else
{
// Still exists? Unlikely to get discarded
layout.actual_zeta_pitch = std::max(layout.zeta_pitch, required_zeta_pitch);
}
for (const auto &index : rsx::utility::get_rtt_indexes(layout.target))
{
if (layout.color_pitch[index] < required_color_pitch)
if (color_buffer_unused)
{
if (lg2w < clipw_log2 || lg2h < cliph_log2)
{
layout.color_addresses[index] = 0;
if (lg2w > 0 || lg2h > 0)
{
// Something was actually declared for the swizzle context dimensions
LOG_WARNING(RSX, "Invalid swizzled context color surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height);
}
}
else
{
LOG_TRACE(RSX, "Swizzled context color surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height);
}
layout.color_addresses[index] = 0;
continue;
}
if (layout.zeta_address && (layout.color_addresses[index] == layout.zeta_address))
if (layout.color_pitch[index] < required_color_pitch && !packed_render)
{
LOG_TRACE(RSX, "Framebuffer at 0x%X has aliasing color/depth targets, color_index=%d, zeta_pitch = %d, color_pitch=%d, context=%d",
// Unlike the depth buffer, when given a color target we know it is intended to be rendered to
LOG_ERROR(RSX, "Framebuffer setup error: Color target failed pitch check, Pitch=[%d, %d, %d, %d] + %d, target=%d, context=%d",
layout.color_pitch[0], layout.color_pitch[1], layout.color_pitch[2], layout.color_pitch[3],
layout.zeta_pitch, (u32)layout.target, (u32)context);
// Do not remove this buffer for now as it implies something went horribly wrong anyway
break;
}
if (layout.color_addresses[index] == layout.zeta_address)
{
LOG_WARNING(RSX, "Framebuffer at 0x%X has aliasing color/depth targets, color_index=%d, zeta_pitch = %d, color_pitch=%d, context=%d",
layout.zeta_address, index, layout.zeta_pitch, layout.color_pitch[index], (u32)context);
m_framebuffer_state_contested = true;
// TODO: Research clearing both depth AND color
// TODO: If context is creation_draw, deal with possibility of a lost buffer clear
if (!ignore_depth &&
(ignore_color || depth_test_enabled || stencil_test_enabled ||
(!color_write_enabled && depth_write_enabled)))
if (depth_test_enabled || stencil_test_enabled || (!color_write_enabled && depth_write_enabled))
{
// Use address for depth data
layout.color_addresses[index] = 0;
continue;
}
else
{
// Use address for color data
layout.zeta_address = 0;
m_framebuffer_state_contested = true;
}
}
if (layout.color_addresses[index])
{
layout.actual_color_pitch[index] = std::max(layout.color_pitch[index], required_color_pitch);
framebuffer_status_valid = true;
}
verify(HERE), layout.color_addresses[index];
layout.actual_color_pitch[index] = std::max(layout.color_pitch[index], required_color_pitch);
framebuffer_status_valid = true;
}
if (!framebuffer_status_valid && !layout.zeta_address)

View File

@ -461,7 +461,7 @@ namespace rsx
bool m_textures_dirty[16];
bool m_vertex_textures_dirty[4];
bool m_framebuffer_state_contested = false;
rsx::framebuffer_creation_context m_framebuffer_contest_type = rsx::framebuffer_creation_context::context_draw;
rsx::framebuffer_creation_context m_current_framebuffer_context = rsx::framebuffer_creation_context::context_draw;
u32 m_graphics_state = 0;
u64 ROP_sync_timestamp = 0;

View File

@ -2831,14 +2831,10 @@ void VKGSRender::open_command_buffer()
void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
if (m_framebuffer_state_contested && (m_framebuffer_contest_type != context))
{
// Clear commands affect contested memory
m_rtts_dirty = true;
}
if (m_draw_fbo && !m_rtts_dirty)
if (m_current_framebuffer_context == context && !m_rtts_dirty && m_draw_fbo)
{
// Fast path
// Framebuffer usage has not changed, framebuffer exists and config regs have not changed
set_scissor();
return;
}
@ -2939,7 +2935,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
surface->write_aa_mode = layout.aa_mode;
m_texture_cache.notify_surface_changed(layout.color_addresses[index]);
m_texture_cache.tag_framebuffer(layout.color_addresses[index]);
m_draw_buffers.push_back(index);
}
}
@ -2955,7 +2950,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
ds->write_aa_mode = layout.aa_mode;
m_texture_cache.notify_surface_changed(layout.zeta_address);
m_texture_cache.tag_framebuffer(layout.zeta_address);
}
if (g_cfg.video.write_color_buffers)

View File

@ -79,7 +79,7 @@ namespace rsx
using download_buffer_object = void*;
static std::unique_ptr<vk::render_target> create_new_surface(
u32 /*address*/,
u32 address,
surface_color_format format,
size_t width, size_t height,
vk::render_target* old_surface,
@ -106,6 +106,7 @@ namespace rsx
rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format);
rtt->surface_width = (u16)width;
rtt->surface_height = (u16)height;
rtt->queue_tag(address);
rtt->dirty = true;
if (old_surface != nullptr && old_surface->info.format == requested_format)
@ -115,7 +116,7 @@ namespace rsx
}
static std::unique_ptr<vk::render_target> create_new_surface(
u32 /* address */,
u32 address,
surface_depth_format format,
size_t width, size_t height,
vk::render_target* old_surface,
@ -151,6 +152,7 @@ namespace rsx
ds->attachment_aspect_flag = range.aspectMask;
ds->surface_width = (u16)width;
ds->surface_height = (u16)height;
ds->queue_tag(address);
ds->dirty = true;
if (old_surface != nullptr && old_surface->info.format == requested_format)
@ -202,11 +204,12 @@ namespace rsx
}
static
void invalidate_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *surface, vk::render_target *old_surface)
void invalidate_surface_contents(u32 address, vk::command_buffer* /*pcmd*/, vk::render_target *surface, vk::render_target *old_surface)
{
surface->old_contents = old_surface;
surface->dirty = true;
surface->reset_aa_mode();
surface->queue_tag(address);
surface->dirty = true;
}
static

View File

@ -2255,7 +2255,7 @@ struct registers_decoder<NV3089_DS_DX>
if ((s32)val < 0)
{
return 1. / (((val & ~(1<<31)) / 1048576.f) - 2048.f);
return 1.f / (((val & ~(1<<31)) / 1048576.f) - 2048.f);
}
return 1048576.f / val;
@ -2293,7 +2293,7 @@ struct registers_decoder<NV3089_DT_DY>
if ((s32)val < 0)
{
return 1. / (((val & ~(1<<31)) / 1048576.f) - 2048.f);
return 1.f / (((val & ~(1<<31)) / 1048576.f) - 2048.f);
}
return 1048576.f / val;