rsx: Blit engine improvements

- Detect writes to the display output memory and handle it specially.
  It already defines a known 2D region.
- Try and detect situations where raw transfers would be of benefit.
This commit is contained in:
kd-11 2020-03-07 22:20:38 +03:00 committed by kd-11
parent ab4189998c
commit 84a542fbce
2 changed files with 109 additions and 22 deletions

View File

@ -2186,26 +2186,50 @@ namespace rsx
areai dst_area = { 0, 0, dst_w, dst_h };
size2i dst_dimensions = { dst.pitch / dst_bpp, dst.height };
position2i dst_offset = { dst.offset_x, dst.offset_y };
u32 dst_base_address = dst.rsx_address;
const auto src_payload_length = (src.pitch * (src_h - 1) + (src_w * src_bpp));
const auto dst_payload_length = (dst.pitch * (dst_h - 1) + (dst_w * dst_bpp));
const auto dst_range = address_range::start_length(dst_address, dst_payload_length);
if (src_is_render_target)
if (!use_null_region && !dst_is_render_target)
{
// Attempt to optimize...
if (dst_dimensions.width == src_subres.surface->get_surface_width(rsx::surface_metrics::samples))
size2u src_dimensions = { 0, 0 };
if (src_is_render_target)
{
dst_dimensions.height = std::max(src_subres.surface->get_surface_height(rsx::surface_metrics::samples), dst.height);
src_dimensions.width = src_subres.surface->get_surface_width(rsx::surface_metrics::samples);
src_dimensions.height = src_subres.surface->get_surface_height(rsx::surface_metrics::samples);
}
else if (dst_dimensions.width == 1280 || dst_dimensions.width == 2560) [[likely]]
const auto props = texture_cache_helpers::get_optimal_blit_target_properties(
src_is_render_target,
dst_range,
dst.pitch,
src_dimensions,
static_cast<size2u>(dst_dimensions)
);
if (props.use_dma_region)
{
// Optimizations table based on common width/height pairings. If we guess wrong, the upload resolver will fix it anyway
// TODO: Add more entries based on empirical data
dst_dimensions.height = std::max<s32>(dst.height, 720);
// Try to use a dma flush
use_null_region = (is_copy_op && !is_format_convert);
}
else
{
//rsx_log.trace("Blit transfer to surface with dims %dx%d", dst_dimensions.width, dst.height);
if (props.offset)
{
// Calculate new offsets
dst_base_address = props.offset;
const auto new_offset = (dst_address - dst_base_address);
// Generate new offsets
dst_offset.y = new_offset / dst.pitch;
dst_offset.x = (new_offset % dst.pitch) / dst_bpp;
}
dst_dimensions.width = static_cast<s32>(props.width);
dst_dimensions.height = static_cast<s32>(props.height);
}
}
@ -2226,9 +2250,7 @@ namespace rsx
if (skip_if_collision_exists) required_type_mask |= texture_upload_context::shader_read;
}
const auto dst_range = address_range::start_length(dst_address, dst_payload_length);
auto overlapping_surfaces = find_texture_from_range(dst_range, dst.pitch, required_type_mask);
for (const auto &surface : overlapping_surfaces)
{
if (!surface->is_locked())
@ -2533,7 +2555,6 @@ namespace rsx
src_area.y2 += scaled_clip_offset_y;
}
const auto dst_range = utils::address_range::start_length(dst_address, dst_payload_length);
if (!cached_dest && !dst_is_render_target)
{
verify(HERE), !dest_texture;
@ -2541,12 +2562,12 @@ namespace rsx
// Need to calculate the minium required size that will fit the data, anchored on the rsx_address
// If the application starts off with an 'inseted' section, the guessed dimensions may not fit!
const u32 write_end = dst_address + dst_payload_length;
u32 block_end = dst.rsx_address + (dst.pitch * dst_dimensions.height);
u32 block_end = dst_base_address + (dst.pitch * dst_dimensions.height);
// Confirm if the pages actually exist in vm
// Only need to test the extra padding memory and only when its on main memory
// NOTE: When src is not a render target, padding is not added speculatively
if (src_is_render_target && get_location(dst.rsx_address) != CELL_GCM_LOCATION_LOCAL)
if (src_is_render_target && get_location(dst_base_address) != CELL_GCM_LOCATION_LOCAL)
{
if (block_end > write_end)
{
@ -2558,11 +2579,11 @@ namespace rsx
}
}
const u32 usable_section_length = std::max(write_end, block_end) - dst.rsx_address;
const u32 usable_section_length = std::max(write_end, block_end) - dst_base_address;
dst_dimensions.height = align2(usable_section_length, dst.pitch) / dst.pitch;
const u32 full_section_length = ((dst_dimensions.height - 1) * dst.pitch) + (dst_dimensions.width * dst_bpp);
const auto rsx_range = address_range::start_length(dst.rsx_address, full_section_length);
const auto rsx_range = address_range::start_length(dst_base_address, full_section_length);
lock.upgrade();
@ -2591,10 +2612,10 @@ namespace rsx
rsx::texture_create_flags::swapped_native_component_order;
// Translate dst_area into the 'full' dst block based on dst.rsx_address as (0, 0)
dst_area.x1 += dst.offset_x;
dst_area.x2 += dst.offset_x;
dst_area.y1 += dst.offset_y;
dst_area.y2 += dst.offset_y;
dst_area.x1 += dst_offset.x;
dst_area.x2 += dst_offset.x;
dst_area.y1 += dst_offset.y;
dst_area.y2 += dst_offset.y;
if (!dst_area.x1 && !dst_area.y1 && dst_area.x2 == dst_dimensions.width && dst_area.y2 == dst_dimensions.height)
{
@ -2616,7 +2637,7 @@ namespace rsx
subres.height_in_block = subres.height_in_texel = dst_dimensions.height;
subres.pitch_in_block = pitch_in_block;
subres.depth = 1;
subres.data = { vm::get_super_ptr<const std::byte>(dst.rsx_address), static_cast<gsl::span<const std::byte>::index_type>(dst.pitch * dst_dimensions.height) };
subres.data = { vm::get_super_ptr<const std::byte>(dst_base_address), static_cast<gsl::span<const std::byte>::index_type>(dst.pitch * dst_dimensions.height) };
subresource_layout.push_back(subres);
cached_dest = upload_image_from_cpu(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
@ -2783,7 +2804,7 @@ namespace rsx
}
else
{
result.real_dst_address = dst.rsx_address;
result.real_dst_address = dst_base_address;
result.real_dst_size = dst.pitch * dst_dimensions.height;
}

View File

@ -73,6 +73,14 @@ namespace rsx
}
};
struct blit_target_properties
{
bool use_dma_region;
u32 offset;
u32 width;
u32 height;
};
struct texture_cache_search_options
{
u8 lookup_mask = 0xff;
@ -166,6 +174,64 @@ namespace rsx
}
}
static blit_target_properties get_optimal_blit_target_properties(
bool src_is_render_target,
address_range dst_range,
u32 dst_pitch,
const sizeu src_dimensions,
const sizeu dst_dimensions)
{
if (get_location(dst_range.start) == CELL_GCM_LOCATION_LOCAL)
{
// Check if this is a blit to the output buffer
// TODO: This can be used to implement reference tracking to possibly avoid downscaling
const auto renderer = rsx::get_current_renderer();
for (u32 i = 0; i < renderer->display_buffers_count; ++i)
{
const auto& buffer = renderer->display_buffers[i];
const u32 pitch = buffer.pitch? static_cast<u32>(buffer.pitch) : g_fxo->get<rsx::avconf>()->get_bpp() * buffer.width;
if (pitch != dst_pitch)
{
continue;
}
const auto buffer_range = address_range::start_length(rsx::constants::local_mem_base + buffer.offset, pitch * buffer.height);
if (dst_range.inside(buffer_range))
{
// Match found
return { false, buffer_range.start, buffer.width, buffer.height };
}
if (dst_range.overlaps(buffer_range)) [[unlikely]]
{
// The range clips the destination but does not fit inside it
// Use DMA stream to optimize the flush that is likely to happen when flipping
return { true };
}
}
}
if (src_is_render_target)
{
// Attempt to optimize...
if (dst_dimensions.width == 1280 || dst_dimensions.width == 2560) [[likely]]
{
// Optimizations table based on common width/height pairings. If we guess wrong, the upload resolver will fix it anyway
// TODO: Add more entries based on empirical data
const auto optimal_height = std::max(dst_dimensions.height, 720u);
return { false, 0, dst_dimensions.width, optimal_height };
}
if (dst_dimensions.width == src_dimensions.width)
{
const auto optimal_height = std::max(dst_dimensions.width, src_dimensions.width);
return { false, 0, dst_dimensions.width, optimal_height };
}
}
return { false, 0, dst_dimensions.width, dst_dimensions.height };
}
template<typename section_storage_type, typename copy_region_type, typename surface_store_list_type>
void gather_texture_slices(
std::vector<copy_region_type>& out,