rsx: Rewrite memory inheritance transfers

- Implicitly invoke a memory barrier if actively reading from an unsynchronized texture
- Simplify memory transfer operations
- Should allow more games to work without strict mode
This commit is contained in:
kd-11 2018-12-16 14:57:22 +03:00 committed by kd-11
parent 97704d1396
commit 15d5507154
14 changed files with 434 additions and 335 deletions

View File

@ -33,6 +33,38 @@ namespace rsx
virtual ~sampled_image_descriptor_base() {}
virtual u32 encoded_component_map() const = 0;
};
struct typeless_xfer
{
bool src_is_typeless = false;
bool dst_is_typeless = false;
bool src_is_depth = false;
bool dst_is_depth = false;
u32 src_gcm_format = 0;
u32 dst_gcm_format = 0;
u32 src_native_format_override = 0;
u32 dst_native_format_override = 0;
f32 src_scaling_hint = 1.f;
f32 dst_scaling_hint = 1.f;
texture_upload_context src_context = texture_upload_context::blit_engine_src;
texture_upload_context dst_context = texture_upload_context::blit_engine_dst;
void analyse()
{
if (src_is_typeless && dst_is_typeless)
{
if (src_scaling_hint == dst_scaling_hint &&
src_scaling_hint != 1.f)
{
if (src_is_depth == dst_is_depth)
{
src_is_typeless = dst_is_typeless = false;
src_scaling_hint = dst_scaling_hint = 1.f;
}
}
}
}
};
}
struct rsx_subresource_layout

View File

@ -1444,7 +1444,8 @@ namespace rsx
}
template<typename surface_store_type>
std::vector<copy_region_descriptor> gather_texture_slices_from_framebuffers(u32 texaddr, u16 slice_w, u16 slice_h, u16 pitch, u16 count, u8 bpp, surface_store_type& m_rtts)
std::vector<copy_region_descriptor> gather_texture_slices_from_framebuffers(commandbuffer_type& cmd,
u32 texaddr, u16 slice_w, u16 slice_h, u16 pitch, u16 count, u8 bpp, surface_store_type& m_rtts)
{
std::vector<copy_region_descriptor> surfaces;
u32 current_address = texaddr;
@ -1465,6 +1466,8 @@ namespace rsx
{
for (auto &section : overlapping)
{
section.surface->memory_barrier(cmd);
surfaces.push_back
({
section.surface->get_surface(),
@ -1501,6 +1504,8 @@ namespace rsx
u32 internal_height = tex_height;
get_native_dimensions(internal_width, internal_height, texptr);
texptr->memory_barrier(cmd);
if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d &&
extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d)
{
@ -1521,7 +1526,7 @@ namespace rsx
rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap };
auto bpp = get_format_block_size_in_bytes(format);
desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(texaddr, tex_width, tex_height, tex_pitch, 6, bpp, m_rtts));
desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(cmd, texaddr, tex_width, tex_height, tex_pitch, 6, bpp, m_rtts));
return desc;
}
else if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d && tex_depth > 1)
@ -1543,7 +1548,7 @@ namespace rsx
rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap };
const auto bpp = get_format_block_size_in_bytes(format);
desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(texaddr, tex_width, tex_height, tex_pitch, tex_depth, bpp, m_rtts));
desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(cmd, texaddr, tex_width, tex_height, tex_pitch, tex_depth, bpp, m_rtts));
return desc;
}
}
@ -1562,8 +1567,7 @@ namespace rsx
auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp);
bool requires_merging = false;
// TODO ruipin: This AUDIT fails due to a bug that kd will have to fix
//AUDIT( !overlapping.empty() );
AUDIT(!overlapping.empty());
if (overlapping.size() > 1)
{
// The returned values are sorted with oldest first and newest last
@ -1593,6 +1597,8 @@ namespace rsx
for (auto &section : overlapping)
{
section.surface->memory_barrier(cmd);
result.external_subresource_desc.sections_to_copy.push_back
({
section.surface->get_surface(),

View File

@ -105,36 +105,6 @@ namespace rsx
constexpr operator enum_type() const { return cause; }
};
struct typeless_xfer
{
bool src_is_typeless = false;
bool dst_is_typeless = false;
bool src_is_depth = false;
bool dst_is_depth = false;
u32 src_gcm_format = 0;
u32 dst_gcm_format = 0;
f32 src_scaling_hint = 1.f;
f32 dst_scaling_hint = 1.f;
texture_upload_context src_context = texture_upload_context::blit_engine_src;
texture_upload_context dst_context = texture_upload_context::blit_engine_dst;
void analyse()
{
if (src_is_typeless && dst_is_typeless)
{
if (src_scaling_hint == dst_scaling_hint &&
src_scaling_hint != 1.f)
{
if (src_is_depth == dst_is_depth)
{
src_is_typeless = dst_is_typeless = false;
src_scaling_hint = dst_scaling_hint = 1.f;
}
}
}
}
};
/**

View File

@ -210,26 +210,6 @@ void GLGSRender::end()
}
};
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [this](gl::render_target *surface, bool is_depth)
{
if (surface->get_internal_format() == surface->old_contents->get_internal_format())
{
// Disable stencil test to avoid switching off and back on later
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
// Copy data from old contents onto this one
const auto region = rsx::get_transferable_region(surface);
gl::g_hw_blitter->scale_image(surface->old_contents, surface, { 0, 0, std::get<0>(region), std::get<1>(region) }, { 0, 0, std::get<2>(region) , std::get<3>(region) }, !is_depth, is_depth, {});
// Memory has been transferred, discard old contents and update memory flags
// TODO: Preserve memory outside surface clip region
surface->on_write();
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
};
//Check if we have any 'recycled' surfaces in memory and if so, clear them
std::vector<int> buffers_to_clear;
bool clear_all_color = true;
@ -291,24 +271,6 @@ void GLGSRender::end()
ds->on_write();
}
if (g_cfg.video.strict_rendering_mode)
{
if (ds && ds->old_contents != nullptr)
copy_rtt_contents(ds, true);
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface, false);
}
}
}
// Unconditionally enable stencil test if it was disabled before
gl_state.enable(GL_TRUE, GL_SCISSOR_TEST);
// Load textures
{
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
@ -473,9 +435,28 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
update_draw_state();
std::chrono::time_point<steady_clock> draw_start = textures_end;
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
// Optionally do memory synchronization if the texture stage has not yet triggered this
if (g_cfg.video.strict_rendering_mode)
{
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
if (ds) ds->memory_barrier();
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
surface->memory_barrier();
}
}
}
// Unconditionally enable stencil test if it was disabled before
gl_state.enable(GL_TRUE, GL_SCISSOR_TEST);
update_draw_state();
if (g_cfg.video.debug_output)
{

View File

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "GLHelpers.h"
#include "GLTexture.h"
#include "Utilities/Log.h"
namespace gl
@ -360,4 +361,101 @@ namespace gl
{
return attrib_t(index);
}
void blitter::scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation,
bool is_depth_copy, const rsx::typeless_xfer& xfer_info)
{
std::unique_ptr<texture> typeless_src;
std::unique_ptr<texture> typeless_dst;
u32 src_id = src->id();
u32 dst_id = dst->id();
if (xfer_info.src_is_typeless)
{
const auto internal_width = (u16)(src->width() * xfer_info.src_scaling_hint);
const auto internal_fmt = xfer_info.src_native_format_override ?
GLenum(xfer_info.src_native_format_override) :
get_sized_internal_format(xfer_info.src_gcm_format);
typeless_src = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt);
copy_typeless(typeless_src.get(), src);
src_id = typeless_src->id();
src_rect.x1 = (u16)(src_rect.x1 * xfer_info.src_scaling_hint);
src_rect.x2 = (u16)(src_rect.x2 * xfer_info.src_scaling_hint);
}
if (xfer_info.dst_is_typeless)
{
const auto internal_width = (u16)(dst->width() * xfer_info.dst_scaling_hint);
const auto internal_fmt = xfer_info.dst_native_format_override ?
GLenum(xfer_info.dst_native_format_override) :
get_sized_internal_format(xfer_info.dst_gcm_format);
typeless_dst = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt);
copy_typeless(typeless_dst.get(), dst);
dst_id = typeless_dst->id();
dst_rect.x1 = (u16)(dst_rect.x1 * xfer_info.dst_scaling_hint);
dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint);
}
s32 old_fbo = 0;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo);
filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest;
GLenum attachment;
gl::buffers target;
if (is_depth_copy)
{
if (src->get_internal_format() == gl::texture::internal_format::depth16 ||
dst->get_internal_format() == gl::texture::internal_format::depth16)
{
attachment = GL_DEPTH_ATTACHMENT;
target = gl::buffers::depth;
}
else
{
attachment = GL_DEPTH_STENCIL_ATTACHMENT;
target = gl::buffers::depth_stencil;
}
}
else
{
attachment = GL_COLOR_ATTACHMENT0;
target = gl::buffers::color;
}
blit_src.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0);
blit_src.check();
blit_dst.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_id, 0);
blit_dst.check();
GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST);
if (scissor_test_enabled)
glDisable(GL_SCISSOR_TEST);
blit_src.blit(blit_dst, src_rect, dst_rect, target, interp);
if (xfer_info.dst_is_typeless)
{
//Transfer contents from typeless dst back to original dst
copy_typeless(dst, typeless_dst.get());
}
blit_src.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0);
blit_dst.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0);
if (scissor_test_enabled)
glEnable(GL_SCISSOR_TEST);
glBindFramebuffer(GL_FRAMEBUFFER, old_fbo);
}
}

View File

@ -10,6 +10,7 @@
#include "OpenGL.h"
#include "../GCM.h"
#include "../Common/TextureUtils.h"
#include "Utilities/geometry.h"
@ -2792,4 +2793,27 @@ public:
}
};
}
class blitter
{
fbo blit_src;
fbo blit_dst;
public:
void init()
{
blit_src.create();
blit_dst.create();
}
void destroy()
{
blit_dst.remove();
blit_src.remove();
}
void scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation,
bool is_depth_copy, const rsx::typeless_xfer& xfer_info);
};
}

View File

@ -553,3 +553,68 @@ void GLGSRender::read_buffers()
std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type);
}
}
void gl::render_target::memory_barrier(void*)
{
if (!old_contents)
{
// No memory to inherit
return;
}
auto src_texture = static_cast<gl::render_target*>(old_contents);
if (src_texture->get_rsx_pitch() != get_rsx_pitch())
{
LOG_TODO(RSX, "Pitch mismatch, could not transfer inherited memory");
return;
}
auto is_depth = [](gl::texture::internal_format format)
{
// TODO: Change this to image aspect semantics
switch (format)
{
case gl::texture::internal_format::depth16:
case gl::texture::internal_format::depth24_stencil8:
case gl::texture::internal_format::depth32f_stencil8:
return true;
default:
return false;
}
};
auto src_bpp = src_texture->get_native_pitch() / src_texture->width();
auto dst_bpp = get_native_pitch() / width();
rsx::typeless_xfer typeless_info{};
const bool dst_is_depth = is_depth(get_internal_format());
const auto region = rsx::get_transferable_region(this);
if (get_internal_format() == src_texture->get_internal_format())
{
// Copy data from old contents onto this one
verify(HERE), src_bpp == dst_bpp;
}
else
{
// Mem cast, generate typeless xfer info
const bool src_is_depth = is_depth(src_texture->get_internal_format());
if (src_bpp != dst_bpp || dst_is_depth || src_is_depth)
{
typeless_info.src_is_typeless = true;
typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage;
typeless_info.src_native_format_override = (u32)get_internal_format();
typeless_info.src_is_depth = src_is_depth;
typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp;
}
}
gl::g_hw_blitter->scale_image(old_contents, this,
{ 0, 0, std::get<0>(region), std::get<1>(region) },
{ 0, 0, std::get<2>(region) , std::get<3>(region) },
!dst_is_depth, dst_is_depth, typeless_info);
// Memory has been transferred, discard old contents and update memory flags
// TODO: Preserve memory outside surface clip region
on_write();
}

View File

@ -130,6 +130,8 @@ namespace gl
//Use forward scaling to account for rounding and clamping errors
return (rsx::apply_resolution_scale(_width, true) == internal_width) && (rsx::apply_resolution_scale(_height, true) == internal_height);
}
void memory_barrier(void* = nullptr);
};
struct framebuffer_holder : public gl::fbo, public rsx::ref_counted

View File

@ -30,118 +30,6 @@ namespace gl
extern void copy_typeless(texture*, const texture*);
extern blitter *g_hw_blitter;
class blitter
{
fbo blit_src;
fbo blit_dst;
public:
void init()
{
blit_src.create();
blit_dst.create();
}
void destroy()
{
blit_dst.remove();
blit_src.remove();
}
void scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation,
bool is_depth_copy, const rsx::typeless_xfer& xfer_info)
{
std::unique_ptr<texture> typeless_src;
std::unique_ptr<texture> typeless_dst;
u32 src_id = src->id();
u32 dst_id = dst->id();
if (xfer_info.src_is_typeless)
{
const auto internal_width = (u16)(src->width() * xfer_info.src_scaling_hint);
const auto internal_fmt = get_sized_internal_format(xfer_info.src_gcm_format);
typeless_src = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt);
copy_typeless(typeless_src.get(), src);
src_id = typeless_src->id();
src_rect.x1 = (u16)(src_rect.x1 * xfer_info.src_scaling_hint);
src_rect.x2 = (u16)(src_rect.x2 * xfer_info.src_scaling_hint);
}
if (xfer_info.dst_is_typeless)
{
const auto internal_width = (u16)(dst->width() * xfer_info.dst_scaling_hint);
const auto internal_fmt = get_sized_internal_format(xfer_info.dst_gcm_format);
typeless_dst = std::make_unique<texture>(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt);
copy_typeless(typeless_dst.get(), dst);
dst_id = typeless_dst->id();
dst_rect.x1 = (u16)(dst_rect.x1 * xfer_info.dst_scaling_hint);
dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint);
}
s32 old_fbo = 0;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo);
filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest;
GLenum attachment;
gl::buffers target;
if (is_depth_copy)
{
if (src->get_internal_format() == gl::texture::internal_format::depth16 ||
dst->get_internal_format() == gl::texture::internal_format::depth16)
{
attachment = GL_DEPTH_ATTACHMENT;
target = gl::buffers::depth;
}
else
{
attachment = GL_DEPTH_STENCIL_ATTACHMENT;
target = gl::buffers::depth_stencil;
}
}
else
{
attachment = GL_COLOR_ATTACHMENT0;
target = gl::buffers::color;
}
blit_src.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0);
blit_src.check();
blit_dst.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_id, 0);
blit_dst.check();
GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST);
if (scissor_test_enabled)
glDisable(GL_SCISSOR_TEST);
blit_src.blit(blit_dst, src_rect, dst_rect, target, interp);
if (xfer_info.dst_is_typeless)
{
//Transfer contents from typeless dst back to original dst
copy_typeless(dst, typeless_dst.get());
}
blit_src.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0);
blit_dst.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0);
if (scissor_test_enabled)
glEnable(GL_SCISSOR_TEST);
glBindFramebuffer(GL_FRAMEBUFFER, old_fbo);
}
};
class cached_texture_section;
class texture_cache;

View File

@ -1387,41 +1387,19 @@ void VKGSRender::end()
if (g_cfg.video.strict_rendering_mode)
{
auto copy_rtt_contents = [&](vk::render_target* surface, bool is_depth)
{
if (LIKELY(surface->info.format == surface->old_contents->info.format))
{
const auto region = rsx::get_transferable_region(surface);
const auto src_w = std::get<0>(region);
const auto src_h = std::get<1>(region);
const auto dst_w = std::get<2>(region);
const auto dst_h = std::get<3>(region);
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value,
surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h,
0, 0, dst_w, dst_h, 1, aspect, true, VK_FILTER_LINEAR, surface->info.format, surface->old_contents->info.format);
// Memory has been transferred, discard old contents and update memory flags
// TODO: Preserve memory outside surface clip region
surface->on_write();
}
};
//Prepare surfaces if needed
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface, false);
surface->memory_barrier(*m_current_command_buffer);
}
}
if (ds && ds->old_contents)
{
copy_rtt_contents(ds, true);
ds->memory_barrier(*m_current_command_buffer);
}
}

View File

@ -2980,4 +2980,14 @@ public:
}
}
};
class blitter
{
vk::command_buffer* commands;
public:
blitter(vk::command_buffer *c) : commands(c) {}
void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info);
};
}

View File

@ -5,6 +5,7 @@
#include "../GCM.h"
#include "../Common/surface_store.h"
#include "../Common/TextureUtils.h"
#include "../Common/texture_cache_utils.h"
#include "VKFormats.h"
#include "../rsx_utils.h"
@ -55,6 +56,54 @@ namespace vk
//Use forward scaling to account for rounding and clamping errors
return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height());
}
void memory_barrier(vk::command_buffer& cmd)
{
if (!old_contents)
{
return;
}
auto src_texture = static_cast<vk::render_target*>(old_contents);
if (src_texture->get_rsx_pitch() != get_rsx_pitch())
{
LOG_TODO(RSX, "Pitch mismatch, could not transfer inherited memory");
return;
}
auto src_bpp = src_texture->get_native_pitch() / src_texture->width();
auto dst_bpp = get_native_pitch() / width();
rsx::typeless_xfer typeless_info{};
const auto region = rsx::get_transferable_region(this);
if (src_texture->info.format == info.format)
{
verify(HERE), src_bpp == dst_bpp;
}
else
{
const bool src_is_depth = !!(vk::get_aspect_flags(src_texture->info.format) & VK_IMAGE_ASPECT_DEPTH_BIT);
const bool dst_is_depth = !!(vk::get_aspect_flags(info.format) & VK_IMAGE_ASPECT_DEPTH_BIT);
if (src_bpp != dst_bpp || src_is_depth || dst_is_depth)
{
typeless_info.src_is_typeless = true;
typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage;
typeless_info.src_native_format_override = (u32)info.format;
typeless_info.src_is_depth = src_is_depth;
typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp;
}
}
vk::blitter hw_blitter(&cmd);
hw_blitter.scale_image(old_contents, this,
{ 0, 0, std::get<0>(region), std::get<1>(region) },
{ 0, 0, std::get<2>(region) , std::get<3>(region) },
/*linear?*/false, /*depth?(unused)*/false, typeless_info);
on_write();
}
};
struct framebuffer_holder: public vk::framebuffer, public rsx::ref_counted

View File

@ -529,4 +529,123 @@ namespace vk
return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] };
}
void blitter::scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info)
{
const auto src_aspect = vk::get_aspect_flags(src->info.format);
const auto dst_aspect = vk::get_aspect_flags(dst->info.format);
vk::image* real_src = src;
vk::image* real_dst = dst;
if (xfer_info.src_is_typeless)
{
auto internal_width = src->width() * xfer_info.src_scaling_hint;
auto format = xfer_info.src_native_format_override ?
VkFormat(xfer_info.src_native_format_override) :
vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.src_gcm_format);
// Transfer bits from src to typeless src
real_src = vk::get_typeless_helper(format);
src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint);
src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint);
vk::copy_image_typeless(*commands, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1,
vk::get_aspect_flags(src->info.format), vk::get_aspect_flags(format));
}
if (xfer_info.dst_is_typeless)
{
auto internal_width = dst->width() * xfer_info.dst_scaling_hint;
auto format = xfer_info.dst_native_format_override ?
VkFormat(xfer_info.dst_native_format_override) :
vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.dst_gcm_format);
// Transfer bits from dst to typeless dst
real_dst = vk::get_typeless_helper(format);
dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint);
dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint);
vk::copy_image_typeless(*commands, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1,
vk::get_aspect_flags(dst->info.format), vk::get_aspect_flags(format));
}
else if (xfer_info.dst_context == rsx::texture_upload_context::framebuffer_storage)
{
if (xfer_info.src_context != rsx::texture_upload_context::blit_engine_dst &&
xfer_info.src_context != rsx::texture_upload_context::framebuffer_storage)
{
// Data moving to rendertarget, where byte ordering has to be preserved
// NOTE: This is a workaround, true accuracy would require all RTT<->cache transfers to invoke this step but thats too slow
// Sampling is ok; image view swizzle will work around it
if (dst->info.format == VK_FORMAT_B8G8R8A8_UNORM)
{
// For this specific format, channel ordering is faked via custom remap, undo this before transfer
VkBufferImageCopy copy{};
copy.imageExtent = src->info.extent;
copy.imageOffset = { 0, 0, 0 };
copy.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
const auto scratch_buf = vk::get_scratch_buffer();
const auto data_length = src->info.extent.width * src->info.extent.height * 4;
const auto current_layout = src->current_layout;
vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdCopyImageToBuffer(*commands, src->value, src->current_layout, scratch_buf->value, 1, &copy);
vk::change_image_layout(*commands, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
vk::get_compute_task<vk::cs_shuffle_32>()->run(*commands, scratch_buf, data_length);
vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
real_src = vk::get_typeless_helper(src->info.format);
vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdCopyBufferToImage(*commands, scratch_buf->value, real_src->value, real_src->current_layout, 1, &copy);
}
}
}
// Checks
if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1)
{
LOG_ERROR(RSX, "Blit request consists of an empty region descriptor!");
return;
}
if (src_area.x1 < 0 || src_area.x2 >(s32)real_src->width() || src_area.y1 < 0 || src_area.y2 >(s32)real_src->height())
{
LOG_ERROR(RSX, "Blit request denied because the source region does not fit!");
return;
}
if (dst_area.x1 < 0 || dst_area.x2 >(s32)real_dst->width() || dst_area.y1 < 0 || dst_area.y2 >(s32)real_dst->height())
{
LOG_ERROR(RSX, "Blit request denied because the destination region does not fit!");
return;
}
const auto src_width = src_area.x2 - src_area.x1;
const auto src_height = src_area.y2 - src_area.y1;
const auto dst_width = dst_area.x2 - dst_area.x1;
const auto dst_height = dst_area.y2 - dst_area.y1;
copy_scaled_image(*commands, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, dst_aspect, real_src->info.format == real_dst->info.format,
interpolate ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, real_src->info.format, real_dst->info.format);
if (real_dst != dst)
{
auto internal_width = dst->width() * xfer_info.dst_scaling_hint;
vk::copy_image_typeless(*commands, real_dst, dst, { 0, 0, (s32)internal_width, (s32)dst->height() }, { 0, 0, (s32)dst->width(), (s32)dst->height() }, 1,
vk::get_aspect_flags(real_dst->info.format), vk::get_aspect_flags(dst->info.format));
}
change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, { (VkImageAspectFlags)dst_aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers });
}
}

View File

@ -1097,130 +1097,7 @@ namespace vk
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
{
struct blit_helper
{
vk::command_buffer* commands;
VkFormat format;
blit_helper(vk::command_buffer *c) : commands(c) {}
void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info)
{
const auto src_aspect = vk::get_aspect_flags(src->info.format);
const auto dst_aspect = vk::get_aspect_flags(dst->info.format);
vk::image* real_src = src;
vk::image* real_dst = dst;
if (xfer_info.src_is_typeless)
{
auto internal_width = src->width() * xfer_info.src_scaling_hint;
auto format = vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.src_gcm_format);
// Transfer bits from src to typeless src
real_src = vk::get_typeless_helper(format);
src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint);
src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint);
vk::copy_image_typeless(*commands, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1,
vk::get_aspect_flags(src->info.format), vk::get_aspect_flags(format));
}
if (xfer_info.dst_is_typeless)
{
auto internal_width = dst->width() * xfer_info.dst_scaling_hint;
auto format = vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.dst_gcm_format);
// Transfer bits from dst to typeless dst
real_dst = vk::get_typeless_helper(format);
dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint);
dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint);
vk::copy_image_typeless(*commands, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1,
vk::get_aspect_flags(dst->info.format), vk::get_aspect_flags(format));
}
else if (xfer_info.dst_context == rsx::texture_upload_context::framebuffer_storage)
{
if (xfer_info.src_context != rsx::texture_upload_context::blit_engine_dst &&
xfer_info.src_context != rsx::texture_upload_context::framebuffer_storage)
{
// Data moving to rendertarget, where byte ordering has to be preserved
// NOTE: This is a workaround, true accuracy would require all RTT<->cache transfers to invoke this step but thats too slow
// Sampling is ok; image view swizzle will work around it
if (dst->info.format == VK_FORMAT_B8G8R8A8_UNORM)
{
// For this specific format, channel ordering is faked via custom remap, undo this before transfer
VkBufferImageCopy copy{};
copy.imageExtent = src->info.extent;
copy.imageOffset = { 0, 0, 0 };
copy.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
const auto scratch_buf = vk::get_scratch_buffer();
const auto data_length = src->info.extent.width * src->info.extent.height * 4;
const auto current_layout = src->current_layout;
vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdCopyImageToBuffer(*commands, src->value, src->current_layout, scratch_buf->value, 1, &copy);
vk::change_image_layout(*commands, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
vk::get_compute_task<vk::cs_shuffle_32>()->run(*commands, scratch_buf, data_length);
vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
real_src = vk::get_typeless_helper(src->info.format);
vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1});
vkCmdCopyBufferToImage(*commands, scratch_buf->value, real_src->value, real_src->current_layout, 1, &copy);
}
}
}
// Checks
if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1)
{
LOG_ERROR(RSX, "Blit request consists of an empty region descriptor!");
return;
}
if (src_area.x1 < 0 || src_area.x2 >(s32)real_src->width() || src_area.y1 < 0 || src_area.y2 >(s32)real_src->height())
{
LOG_ERROR(RSX, "Blit request denied because the source region does not fit!");
return;
}
if (dst_area.x1 < 0 || dst_area.x2 >(s32)real_dst->width() || dst_area.y1 < 0 || dst_area.y2 >(s32)real_dst->height())
{
LOG_ERROR(RSX, "Blit request denied because the destination region does not fit!");
return;
}
const auto src_width = src_area.x2 - src_area.x1;
const auto src_height = src_area.y2 - src_area.y1;
const auto dst_width = dst_area.x2 - dst_area.x1;
const auto dst_height = dst_area.y2 - dst_area.y1;
copy_scaled_image(*commands, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, dst_aspect, real_src->info.format == real_dst->info.format,
interpolate ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, real_src->info.format, real_dst->info.format);
if (real_dst != dst)
{
auto internal_width = dst->width() * xfer_info.dst_scaling_hint;
vk::copy_image_typeless(*commands, real_dst, dst, { 0, 0, (s32)internal_width, (s32)dst->height() }, { 0, 0, (s32)dst->width(), (s32)dst->height() }, 1,
vk::get_aspect_flags(real_dst->info.format), vk::get_aspect_flags(dst->info.format));
}
change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, {(VkImageAspectFlags)dst_aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers});
format = dst->info.format;
}
}
helper(&cmd);
blitter helper(&cmd);
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, const_cast<const VkQueue>(m_submit_queue));
if (reply.succeeded)