rsx: Improve format validation for blit engine

- Check all possible cases where format mismatch is possible.
- Warn if a slow path is going to be taken. Should help with future
optimizations.
This commit is contained in:
kd-11 2019-11-17 23:03:18 +03:00 committed by kd-11
parent c415578e79
commit 4a0e1c79ed
11 changed files with 188 additions and 157 deletions

View File

@ -15,7 +15,6 @@ namespace rsx
{ {
if (fcmp(src_scaling_hint, dst_scaling_hint) && !fcmp(src_scaling_hint, 1.f)) if (fcmp(src_scaling_hint, dst_scaling_hint) && !fcmp(src_scaling_hint, 1.f))
{ {
verify(HERE), src_is_depth == dst_is_depth;
src_is_typeless = dst_is_typeless = false; src_is_typeless = dst_is_typeless = false;
src_scaling_hint = dst_scaling_hint = 1.f; src_scaling_hint = dst_scaling_hint = 1.f;
} }

View File

@ -68,8 +68,6 @@ namespace rsx
{ {
bool src_is_typeless = false; bool src_is_typeless = false;
bool dst_is_typeless = false; bool dst_is_typeless = false;
bool src_is_depth = false;
bool dst_is_depth = false;
bool flip_vertical = false; bool flip_vertical = false;
bool flip_horizontal = false; bool flip_horizontal = false;

View File

@ -12,6 +12,8 @@ extern u64 get_system_time();
namespace rsx namespace rsx
{ {
namespace helpers = rsx::texture_cache_helpers;
template <typename derived_type, typename _traits> template <typename derived_type, typename _traits>
class texture_cache class texture_cache
{ {
@ -1487,11 +1489,11 @@ namespace rsx
if (UNLIKELY(m_rtts.address_is_bound(attr.address))) if (UNLIKELY(m_rtts.address_is_bound(attr.address)))
{ {
if (auto texptr = m_rtts.get_surface_at(attr.address); if (auto texptr = m_rtts.get_surface_at(attr.address);
texture_cache_helpers::check_framebuffer_resource(texptr, attr, extended_dimension)) helpers::check_framebuffer_resource(texptr, attr, extended_dimension))
{ {
const bool force_convert = !render_target_format_is_compatible(texptr, attr.gcm_format); const bool force_convert = !render_target_format_is_compatible(texptr, attr.gcm_format);
auto result = texture_cache_helpers::process_framebuffer_resource_fast<sampled_image_descriptor>( auto result = helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, texptr, attr, scale, extended_dimension, encoded_remap, remap, true, force_convert); cmd, texptr, attr, scale, extended_dimension, encoded_remap, remap, true, force_convert);
if (!options.skip_texture_barriers && result.is_cyclic_reference) if (!options.skip_texture_barriers && result.is_cyclic_reference)
@ -1515,7 +1517,7 @@ namespace rsx
{ {
const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format); const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format);
return texture_cache_helpers::process_framebuffer_resource_fast<sampled_image_descriptor>( return helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, last.surface, attr, scale, extended_dimension, encoded_remap, remap, false, force_convert); cmd, last.surface, attr, scale, extended_dimension, encoded_remap, remap, false, force_convert);
} }
@ -1611,23 +1613,23 @@ namespace rsx
normalized_width >= attr.width && last->get_height() >= attr.height) normalized_width >= attr.width && last->get_height() >= attr.height)
{ {
u32 gcm_format = attr.gcm_format; u32 gcm_format = attr.gcm_format;
const bool gcm_format_is_depth = texture_cache_helpers::is_gcm_depth_format(attr.gcm_format); const bool gcm_format_is_depth = helpers::is_gcm_depth_format(attr.gcm_format);
if (!gcm_format_is_depth && last->is_depth_texture()) if (!gcm_format_is_depth && last->is_depth_texture())
{ {
// While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible // While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible
gcm_format = texture_cache_helpers::get_compatible_depth_format(attr.gcm_format); gcm_format = helpers::get_compatible_depth_format(attr.gcm_format);
} }
auto new_attr = attr; auto new_attr = attr;
new_attr.gcm_format = gcm_format; new_attr.gcm_format = gcm_format;
return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {}, return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {},
last->get_context(), texture_cache_helpers::get_format_class(gcm_format), scale, extended_dimension, remap }; last->get_context(), helpers::get_format_class(gcm_format), scale, extended_dimension, remap };
} }
} }
auto result = texture_cache_helpers::merge_cache_resources<sampled_image_descriptor>( auto result = helpers::merge_cache_resources<sampled_image_descriptor>(
overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, encoded_remap, remap, _pool); overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, encoded_remap, remap, _pool);
if (options.skip_texture_merge) if (options.skip_texture_merge)
@ -1692,7 +1694,7 @@ namespace rsx
const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN);
auto extended_dimension = tex.get_extended_texture_dimension(); auto extended_dimension = tex.get_extended_texture_dimension();
options.is_compressed_format = texture_cache_helpers::is_compressed_gcm_format(attributes.gcm_format); options.is_compressed_format = helpers::is_compressed_gcm_format(attributes.gcm_format);
u32 tex_size = 0, required_surface_height; u32 tex_size = 0, required_surface_height;
u8 subsurface_count; u8 subsurface_count;
@ -1801,7 +1803,7 @@ namespace rsx
std::vector<copy_region_descriptor> sections; std::vector<copy_region_descriptor> sections;
const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage && g_cfg.video.resolution_scale_percent != 100); const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage && g_cfg.video.resolution_scale_percent != 100);
if (UNLIKELY(!texture_cache_helpers::append_mipmap_level(sections, result, attributes, 0, use_upscaling, attributes))) if (UNLIKELY(!helpers::append_mipmap_level(sections, result, attributes, 0, use_upscaling, attributes)))
{ {
// Abort if mip0 is not compatible // Abort if mip0 is not compatible
return result; return result;
@ -1831,7 +1833,7 @@ namespace rsx
options, range, extended_dimension, m_rtts, std::forward<Args>(extras)...); options, range, extended_dimension, m_rtts, std::forward<Args>(extras)...);
if (!ret.validate() || if (!ret.validate() ||
!texture_cache_helpers::append_mipmap_level(sections, ret, attr2, subsurface, use_upscaling, attributes)) !helpers::append_mipmap_level(sections, ret, attr2, subsurface, use_upscaling, attributes))
{ {
// Abort // Abort
break; break;
@ -1866,7 +1868,7 @@ namespace rsx
// Do direct upload from CPU as the last resort // Do direct upload from CPU as the last resort
const auto subresources_layout = get_subresources_layout(tex); const auto subresources_layout = get_subresources_layout(tex);
const auto format_class = texture_cache_helpers::get_format_class(attributes.gcm_format); const auto format_class = helpers::get_format_class(attributes.gcm_format);
if (!tex_size) if (!tex_size)
{ {
@ -2083,28 +2085,23 @@ namespace rsx
} }
} }
// Sanity and format compatibility checks
if (dst_is_render_target)
{
if (src_subres.is_depth != dst_subres.is_depth)
{
// Create a cache-local resource to resolve later
// TODO: Support depth->RGBA typeless transfer for vulkan
dst_is_render_target = false;
}
}
if (src_is_render_target) if (src_is_render_target)
{ {
const auto surf = src_subres.surface; const auto surf = src_subres.surface;
const auto bpp = surf->get_bpp(); const auto bpp = surf->get_bpp();
if (bpp != src_bpp) const bool typeless = (bpp != src_bpp || is_format_convert);
if (LIKELY(!typeless))
{ {
//Enable type scaling in src // Use format as-is
typeless_info.src_gcm_format = helpers::get_sized_blit_format(src_is_argb8, src_subres.is_depth);
}
else
{
// Enable type scaling in src
typeless_info.src_is_typeless = true; typeless_info.src_is_typeless = true;
typeless_info.src_is_depth = src_subres.is_depth;
typeless_info.src_scaling_hint = (f32)bpp / src_bpp; typeless_info.src_scaling_hint = (f32)bpp / src_bpp;
typeless_info.src_gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; typeless_info.src_gcm_format = helpers::get_sized_blit_format(src_is_argb8, false);
} }
if (surf->get_surface_width(rsx::surface_metrics::pixels) != surf->width() || if (surf->get_surface_width(rsx::surface_metrics::pixels) != surf->width() ||
@ -2118,18 +2115,24 @@ namespace rsx
if (dst_is_render_target) if (dst_is_render_target)
{ {
auto bpp = dst_subres.surface->get_bpp(); const auto bpp = dst_subres.surface->get_bpp();
if (bpp != dst_bpp) const bool typeless = (bpp != dst_bpp || is_format_convert);
if (LIKELY(!typeless))
{ {
//Enable type scaling in dst typeless_info.dst_gcm_format = helpers::get_sized_blit_format(dst_is_argb8, dst_subres.is_depth);
}
else
{
// Enable type scaling in dst
typeless_info.dst_is_typeless = true; typeless_info.dst_is_typeless = true;
typeless_info.dst_is_depth = dst_subres.is_depth;
typeless_info.dst_scaling_hint = (f32)bpp / dst_bpp; typeless_info.dst_scaling_hint = (f32)bpp / dst_bpp;
typeless_info.dst_gcm_format = dst_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; typeless_info.dst_gcm_format = helpers::get_sized_blit_format(dst_is_argb8, false);
} }
} }
section_storage_type* cached_dest = nullptr; section_storage_type* cached_dest = nullptr;
bool dst_is_depth_surface = false;
u16 max_dst_width = dst.width; u16 max_dst_width = dst.width;
u16 max_dst_height = dst.height; u16 max_dst_height = dst.height;
areai src_area = { 0, 0, src_w, src_h }; areai src_area = { 0, 0, src_w, src_h };
@ -2202,6 +2205,20 @@ namespace rsx
continue; continue;
} }
switch (surface->get_gcm_format())
{
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_DEPTH24_D8:
if (!dst_is_argb8) continue;
break;
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_DEPTH16:
if (dst_is_argb8) continue;
break;
default:
continue;
}
if (const u32 address_offset = dst_address - this_address) if (const u32 address_offset = dst_address - this_address)
{ {
const u16 offset_y = address_offset / dst.pitch; const u16 offset_y = address_offset / dst.pitch;
@ -2229,6 +2246,38 @@ namespace rsx
dst_area = old_dst_area; dst_area = old_dst_area;
} }
const bool format_check = (src_is_render_target || is_format_convert);
if (!use_null_region && cached_dest && format_check)
{
bool src_is_depth;
if (is_format_convert)
{
src_is_depth = false;
}
else
{
verify(HERE), src_is_render_target;
src_is_depth = (typeless_info.src_is_typeless)? false : src_subres.is_depth;
}
if (cached_dest->is_depth_texture() != src_is_depth)
{
// Opt to cancel the destination. Can also use typeless convert
LOG_WARNING(RSX, "Format mismatch on blit destination block. Performance warning.");
// The invalidate call before creating a new target will remove this section
cached_dest = nullptr;
dest_texture = 0;
dst_area = old_dst_area;
}
}
if (LIKELY(cached_dest))
{
typeless_info.dst_gcm_format = cached_dest->get_gcm_format();
dst_is_depth_surface = cached_dest->is_depth_texture();
}
} }
else else
{ {
@ -2237,59 +2286,26 @@ namespace rsx
dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer); dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer);
typeless_info.dst_context = texture_upload_context::framebuffer_storage; typeless_info.dst_context = texture_upload_context::framebuffer_storage;
dst_is_depth_surface = typeless_info.dst_is_typeless ? false : dst_subres.is_depth;
max_dst_width = (u16)(dst_subres.surface->get_surface_width(rsx::surface_metrics::samples) * typeless_info.dst_scaling_hint); max_dst_width = (u16)(dst_subres.surface->get_surface_width(rsx::surface_metrics::samples) * typeless_info.dst_scaling_hint);
max_dst_height = dst_subres.surface->get_surface_height(rsx::surface_metrics::samples); max_dst_height = dst_subres.surface->get_surface_height(rsx::surface_metrics::samples);
} }
// Check if available target is acceptable
// TODO: Check for other types of format mismatch
if (cached_dest && !use_null_region)
{
bool format_mismatch = false;
if (cached_dest->is_depth_texture() != src_subres.is_depth)
{
// Dest surface has the wrong 'aspect'
format_mismatch = true;
}
else
{
// Check if it matches the transfer declaration
switch (cached_dest->get_gcm_format())
{
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_DEPTH24_D8:
format_mismatch = !dst_is_argb8;
break;
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_DEPTH16:
format_mismatch = dst_is_argb8;
break;
default:
format_mismatch = true;
break;
}
}
if (format_mismatch)
{
// The invalidate call before creating a new target will remove this section
cached_dest = nullptr;
dest_texture = 0;
dst_area = old_dst_area;
}
}
// Create source texture if does not exist // Create source texture if does not exist
// TODO: This can be greatly improved with DMA optimizations. Most transfer operations here are actually non-graphical (no transforms applied) // TODO: This can be greatly improved with DMA optimizations. Most transfer operations here are actually non-graphical (no transforms applied)
if (!src_is_render_target) if (!src_is_render_target)
{ {
// NOTE: Src address already takes into account the flipped nature of the overlap! // NOTE: Src address already takes into account the flipped nature of the overlap!
const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_src | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::shader_read; const u32 lookup_mask = rsx::texture_upload_context::blit_engine_src | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::shader_read;
auto overlapping_surfaces = find_texture_from_range<false>(address_range::start_length(src_address, src_payload_length), src.pitch, lookup_mask); auto overlapping_surfaces = find_texture_from_range<false>(address_range::start_length(src_address, src_payload_length), src.pitch, lookup_mask);
auto old_src_area = src_area; auto old_src_area = src_area;
section_storage_type *cached_src = nullptr;
// If no source exists, a dest texture must exist since format matching should always pass
verify(HERE), dest_texture;
for (const auto &surface : overlapping_surfaces) for (const auto &surface : overlapping_surfaces)
{ {
if (!surface->is_locked()) if (!surface->is_locked())
@ -2352,27 +2368,14 @@ namespace rsx
if (src_area.x2 <= surface->get_width() && if (src_area.x2 <= surface->get_width() &&
src_area.y2 <= surface->get_height()) src_area.y2 <= surface->get_height())
{ {
vram_texture = surface->get_raw_texture(); cached_src = surface;
typeless_info.src_context = surface->get_context();
typeless_info.src_is_depth = surface->is_depth_texture();
const bool dst_is_depth = cached_dest ? cached_dest->is_depth_texture() : dst_subres.is_depth;
if (dst_is_depth != typeless_info.src_is_depth && !typeless_info.dst_is_typeless)
{
// Transfer crosses the dreaded DEPTH_STENCIL<->COLOR barrier
// Transfer in a typeless context using this surface as the reference
typeless_info.dst_is_depth = dst_is_depth;
typeless_info.dst_is_typeless = true;
typeless_info.dst_gcm_format = surface->get_gcm_format();
}
break; break;
} }
src_area = old_src_area; src_area = old_src_area;
} }
if (!vram_texture) if (!cached_src)
{ {
const u16 full_width = src.pitch / src_bpp; const u16 full_width = src.pitch / src_bpp;
u32 image_base = src.rsx_address; u32 image_base = src.rsx_address;
@ -2394,11 +2397,6 @@ namespace rsx
image_height = src_h; image_height = src_h;
} }
lock.upgrade();
const auto rsx_range = address_range::start_length(image_base, src.pitch * image_height);
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::read, std::forward<Args>(extras)...);
std::vector<rsx_subresource_layout> subresource_layout; std::vector<rsx_subresource_layout> subresource_layout;
rsx_subresource_layout subres = {}; rsx_subresource_layout subres = {};
subres.width_in_block = subres.width_in_texel = image_width; subres.width_in_block = subres.width_in_texel = image_width;
@ -2408,11 +2406,30 @@ namespace rsx
subres.data = { vm::_ptr<const std::byte>(image_base), static_cast<gsl::span<const std::byte>::index_type>(src.pitch * image_height) }; subres.data = { vm::_ptr<const std::byte>(image_base), static_cast<gsl::span<const std::byte>::index_type>(src.pitch * image_height) };
subresource_layout.push_back(subres); subresource_layout.push_back(subres);
vram_texture = upload_image_from_cpu(cmd, rsx_range, image_width, image_height, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src, const u32 gcm_format = helpers::get_sized_blit_format(src_is_argb8, dst_is_depth_surface);
subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled)->get_raw_texture(); const auto rsx_range = address_range::start_length(image_base, src.pitch * image_height);
typeless_info.src_context = texture_upload_context::blit_engine_src; lock.upgrade();
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::read, std::forward<Args>(extras)...);
cached_src = upload_image_from_cpu(cmd, rsx_range, image_width, image_height, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src,
subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled);
typeless_info.src_gcm_format = gcm_format;
} }
else if (cached_src->is_depth_texture() != dst_is_depth_surface)
{
typeless_info.src_is_typeless = true;
typeless_info.src_gcm_format = helpers::get_sized_blit_format(src_is_argb8, dst_is_depth_surface);
}
else
{
typeless_info.src_gcm_format = cached_src->get_gcm_format();
}
vram_texture = cached_src->get_raw_texture();
typeless_info.src_context = cached_src->get_context();
} }
else else
{ {
@ -2421,15 +2438,7 @@ namespace rsx
typeless_info.src_context = texture_upload_context::framebuffer_storage; typeless_info.src_context = texture_upload_context::framebuffer_storage;
} }
// Type of blit decided by the source, destination use should adapt on the fly const auto preferred_dst_format = helpers::get_sized_blit_format(dst_is_argb8, dst_is_depth_surface);
const bool is_depth_blit = src_subres.is_depth;
u32 gcm_format;
if (is_depth_blit)
gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_DEPTH24_D8 : CELL_GCM_TEXTURE_DEPTH16;
else
gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
if (cached_dest && !use_null_region) if (cached_dest && !use_null_region)
{ {
// Prep surface // Prep surface
@ -2437,7 +2446,7 @@ namespace rsx
dst_is_argb8 ? rsx::texture_create_flags::default_component_order : dst_is_argb8 ? rsx::texture_create_flags::default_component_order :
rsx::texture_create_flags::swapped_native_component_order; rsx::texture_create_flags::swapped_native_component_order;
enforce_surface_creation_type(*cached_dest, gcm_format, channel_order); enforce_surface_creation_type(*cached_dest, preferred_dst_format, channel_order);
} }
// Validate clipping region // Validate clipping region
@ -2520,7 +2529,7 @@ namespace rsx
if (!dst_area.x1 && !dst_area.y1 && dst_area.x2 == dst_dimensions.width && dst_area.y2 == dst_dimensions.height) if (!dst_area.x1 && !dst_area.y1 && dst_area.x2 == dst_dimensions.width && dst_area.y2 == dst_dimensions.height)
{ {
cached_dest = create_new_texture(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch, cached_dest = create_new_texture(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d, preferred_dst_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d,
channel_order); channel_order);
} }
else else
@ -2541,10 +2550,12 @@ namespace rsx
subresource_layout.push_back(subres); subresource_layout.push_back(subres);
cached_dest = upload_image_from_cpu(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch, cached_dest = upload_image_from_cpu(cmd, rsx_range, dst_dimensions.width, dst_dimensions.height, 1, 1, dst.pitch,
gcm_format, rsx::texture_upload_context::blit_engine_dst, subresource_layout, preferred_dst_format, rsx::texture_upload_context::blit_engine_dst, subresource_layout,
rsx::texture_dimension_extended::texture_dimension_2d, false); rsx::texture_dimension_extended::texture_dimension_2d, false);
enforce_surface_creation_type(*cached_dest, gcm_format, channel_order); enforce_surface_creation_type(*cached_dest, preferred_dst_format, channel_order);
typeless_info.dst_gcm_format = preferred_dst_format;
} }
dest_texture = cached_dest->get_raw_texture(); dest_texture = cached_dest->get_raw_texture();
@ -2626,7 +2637,7 @@ namespace rsx
if (!use_null_region) if (!use_null_region)
{ {
typeless_info.analyse(); typeless_info.analyse();
blitter.scale_image(cmd, vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info); blitter.scale_image(cmd, vram_texture, dest_texture, src_area, dst_area, interpolate, typeless_info);
} }
else else
{ {
@ -2634,7 +2645,6 @@ namespace rsx
} }
blit_op_result result = true; blit_op_result result = true;
result.is_depth = is_depth_blit;
if (cached_dest) if (cached_dest)
{ {

View File

@ -61,7 +61,6 @@ namespace rsx
struct blit_op_result struct blit_op_result
{ {
bool succeeded = false; bool succeeded = false;
bool is_depth = false;
u32 real_dst_address = 0; u32 real_dst_address = 0;
u32 real_dst_size = 0; u32 real_dst_size = 0;
@ -123,6 +122,18 @@ namespace rsx
return gcm_format; return gcm_format;
} }
static inline u32 get_sized_blit_format(bool _32_bit, bool depth_format)
{
if (LIKELY(_32_bit))
{
return (!depth_format) ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_DEPTH24_D8;
}
else
{
return (!depth_format) ? CELL_GCM_TEXTURE_R5G6B5 : CELL_GCM_TEXTURE_DEPTH16;
}
}
static inline bool is_compressed_gcm_format(u32 format) static inline bool is_compressed_gcm_format(u32 format)
{ {
switch (format) switch (format)

View File

@ -374,8 +374,8 @@ namespace gl
return attrib_t(index); return attrib_t(index);
} }
void blitter::scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, void blitter::scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect,
bool is_depth_copy, const rsx::typeless_xfer& xfer_info) bool linear_interpolation, const rsx::typeless_xfer& xfer_info)
{ {
std::unique_ptr<texture> typeless_src; std::unique_ptr<texture> typeless_src;
std::unique_ptr<texture> typeless_dst; std::unique_ptr<texture> typeless_dst;
@ -384,41 +384,49 @@ namespace gl
if (xfer_info.src_is_typeless) if (xfer_info.src_is_typeless)
{ {
const auto internal_width = (u16)(src->width() * xfer_info.src_scaling_hint);
const auto internal_fmt = xfer_info.src_native_format_override ? const auto internal_fmt = xfer_info.src_native_format_override ?
GLenum(xfer_info.src_native_format_override) : GLenum(xfer_info.src_native_format_override) :
get_sized_internal_format(xfer_info.src_gcm_format); get_sized_internal_format(xfer_info.src_gcm_format);
typeless_src = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt); if (static_cast<gl::texture::internal_format>(internal_fmt) != src->get_internal_format())
copy_typeless(typeless_src.get(), src); {
const auto internal_width = (u16)(src->width() * xfer_info.src_scaling_hint);
typeless_src = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt);
copy_typeless(typeless_src.get(), src);
real_src = typeless_src.get(); real_src = typeless_src.get();
src_rect.x1 = (u16)(src_rect.x1 * xfer_info.src_scaling_hint); src_rect.x1 = (u16)(src_rect.x1 * xfer_info.src_scaling_hint);
src_rect.x2 = (u16)(src_rect.x2 * xfer_info.src_scaling_hint); src_rect.x2 = (u16)(src_rect.x2 * xfer_info.src_scaling_hint);
}
} }
if (xfer_info.dst_is_typeless) if (xfer_info.dst_is_typeless)
{ {
const auto internal_width = (u16)(dst->width() * xfer_info.dst_scaling_hint);
const auto internal_fmt = xfer_info.dst_native_format_override ? const auto internal_fmt = xfer_info.dst_native_format_override ?
GLenum(xfer_info.dst_native_format_override) : GLenum(xfer_info.dst_native_format_override) :
get_sized_internal_format(xfer_info.dst_gcm_format); get_sized_internal_format(xfer_info.dst_gcm_format);
typeless_dst = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt); if (static_cast<gl::texture::internal_format>(internal_fmt) != dst->get_internal_format())
copy_typeless(typeless_dst.get(), dst); {
const auto internal_width = (u16)(dst->width() * xfer_info.dst_scaling_hint);
typeless_dst = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt);
copy_typeless(typeless_dst.get(), dst);
real_dst = typeless_dst.get(); real_dst = typeless_dst.get();
dst_rect.x1 = (u16)(dst_rect.x1 * xfer_info.dst_scaling_hint); dst_rect.x1 = (u16)(dst_rect.x1 * xfer_info.dst_scaling_hint);
dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint); dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint);
}
} }
filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest; verify(HERE), real_src->aspect() == real_dst->aspect();
const bool is_depth_copy = (real_src->aspect() != image_aspect::color);
const filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest;
GLenum attachment; GLenum attachment;
gl::buffers target; gl::buffers target;
if (is_depth_copy) if (is_depth_copy)
{ {
verify(HERE), real_src->aspect() == real_dst->aspect();
if (real_dst->aspect() & gl::image_aspect::stencil) if (real_dst->aspect() & gl::image_aspect::stencil)
{ {
attachment = GL_DEPTH_STENCIL_ATTACHMENT; attachment = GL_DEPTH_STENCIL_ATTACHMENT;

View File

@ -2926,7 +2926,7 @@ public:
} }
void scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, void scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation,
bool is_depth_copy, const rsx::typeless_xfer& xfer_info); const rsx::typeless_xfer& xfer_info);
void fast_clear_image(gl::command_context& cmd, const texture* dst, const color4f& color); void fast_clear_image(gl::command_context& cmd, const texture* dst, const color4f& color);
void fast_clear_image(gl::command_context& cmd, const texture* dst, float depth, u8 stencil); void fast_clear_image(gl::command_context& cmd, const texture* dst, float depth, u8 stencil);

View File

@ -485,7 +485,6 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
typeless_info.src_is_typeless = true; typeless_info.src_is_typeless = true;
typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage;
typeless_info.src_native_format_override = (u32)get_internal_format(); typeless_info.src_native_format_override = (u32)get_internal_format();
typeless_info.src_is_depth = !!(src_texture->aspect() & gl::image_aspect::depth);
typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp; typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp;
} }
} }
@ -508,7 +507,7 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
gl::g_hw_blitter->scale_image(cmd, section.source, this, gl::g_hw_blitter->scale_image(cmd, section.source, this,
section.src_rect(), section.src_rect(),
section.dst_rect(), section.dst_rect(),
!dst_is_depth, dst_is_depth, typeless_info); !dst_is_depth, typeless_info);
newest_tag = src_texture->last_use_tag; newest_tag = src_texture->last_use_tag;
} }

View File

@ -258,9 +258,8 @@ namespace gl
scaled_texture = std::make_unique<gl::texture>(GL_TEXTURE_2D, real_width, real_height, 1, 1, (GLenum)ifmt); scaled_texture = std::make_unique<gl::texture>(GL_TEXTURE_2D, real_width, real_height, 1, 1, (GLenum)ifmt);
} }
const bool is_depth = is_depth_texture(); const bool linear_interp = is_depth_texture() ? false : true;
const bool linear_interp = is_depth? false : true; g_hw_blitter->scale_image(cmd, vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, {});
g_hw_blitter->scale_image(cmd, vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, is_depth, {});
target_texture = scaled_texture.get(); target_texture = scaled_texture.get();
} }
} }
@ -643,7 +642,7 @@ namespace gl
} }
_blitter->scale_image(cmd, src_image, _dst, _blitter->scale_image(cmd, src_image, _dst,
src_rect, dst_rect, false, false, {}); src_rect, dst_rect, false, {});
if (_dst != dst_image) if (_dst != dst_image)
{ {

View File

@ -3534,6 +3534,6 @@ public:
struct blitter struct blitter
{ {
void scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info); void scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, const rsx::typeless_xfer& xfer_info);
}; };
} }

View File

@ -457,7 +457,6 @@ namespace vk
typeless_info.src_is_typeless = true; typeless_info.src_is_typeless = true;
typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage;
typeless_info.src_native_format_override = (u32)info.format; typeless_info.src_native_format_override = (u32)info.format;
typeless_info.src_is_depth = is_depth;
typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp; typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp;
} }
} }
@ -502,7 +501,7 @@ namespace vk
this->get_surface(rsx::surface_access::transfer), this->get_surface(rsx::surface_access::transfer),
src_area, src_area,
dst_area, dst_area,
/*linear?*/false, /*depth?(unused)*/false, typeless_info); /*linear?*/false, typeless_info);
optimize_copy = optimize_copy && !memory_load; optimize_copy = optimize_copy && !memory_load;
newest_tag = src_texture->last_use_tag; newest_tag = src_texture->last_use_tag;

View File

@ -847,7 +847,7 @@ namespace vk
return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] }; return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] };
} }
void blitter::scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info) void blitter::scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, const rsx::typeless_xfer& xfer_info)
{ {
const auto src_aspect = vk::get_aspect_flags(src->info.format); const auto src_aspect = vk::get_aspect_flags(src->info.format);
const auto dst_aspect = vk::get_aspect_flags(dst->info.format); const auto dst_aspect = vk::get_aspect_flags(dst->info.format);
@ -857,40 +857,48 @@ namespace vk
if (xfer_info.src_is_typeless) if (xfer_info.src_is_typeless)
{ {
const auto internal_width = src->width() * xfer_info.src_scaling_hint;
const auto format = xfer_info.src_native_format_override ? const auto format = xfer_info.src_native_format_override ?
VkFormat(xfer_info.src_native_format_override) : VkFormat(xfer_info.src_native_format_override) :
vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.src_gcm_format); vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.src_gcm_format);
const auto aspect = vk::get_aspect_flags(format);
// Transfer bits from src to typeless src if (format != src->format())
real_src = vk::get_typeless_helper(format, (u32)internal_width, src->height()); {
vk::change_image_layout(cmd, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect, 0, 1, 0, 1 }); const auto internal_width = src->width() * xfer_info.src_scaling_hint;
const auto aspect = vk::get_aspect_flags(format);
vk::copy_image_typeless(cmd, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1, // Transfer bits from src to typeless src
vk::get_aspect_flags(src->info.format), aspect); real_src = vk::get_typeless_helper(format, (u32)internal_width, src->height());
vk::change_image_layout(cmd, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect, 0, 1, 0, 1 });
src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint); vk::copy_image_typeless(cmd, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1,
src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint); vk::get_aspect_flags(src->info.format), aspect);
src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint);
src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint);
}
} }
if (xfer_info.dst_is_typeless) if (xfer_info.dst_is_typeless)
{ {
const auto internal_width = dst->width() * xfer_info.dst_scaling_hint;
const auto format = xfer_info.dst_native_format_override ? const auto format = xfer_info.dst_native_format_override ?
VkFormat(xfer_info.dst_native_format_override) : VkFormat(xfer_info.dst_native_format_override) :
vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.dst_gcm_format); vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.dst_gcm_format);
const auto aspect = vk::get_aspect_flags(format);
// Transfer bits from dst to typeless dst if (format != dst->format())
real_dst = vk::get_typeless_helper(format, (u32)internal_width, dst->height()); {
vk::change_image_layout(cmd, real_dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect, 0, 1, 0, 1 }); const auto internal_width = dst->width() * xfer_info.dst_scaling_hint;
const auto aspect = vk::get_aspect_flags(format);
vk::copy_image_typeless(cmd, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1, // Transfer bits from dst to typeless dst
vk::get_aspect_flags(dst->info.format), aspect); real_dst = vk::get_typeless_helper(format, (u32)internal_width, dst->height());
vk::change_image_layout(cmd, real_dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect, 0, 1, 0, 1 });
dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint); vk::copy_image_typeless(cmd, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1,
dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint); vk::get_aspect_flags(dst->info.format), aspect);
dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint);
dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint);
}
} }
// Checks // Checks