rsx: Synchronize surface cache and texture cache data

- TODO: The whole upload_texture thing is a big hack, fix it properly
This commit is contained in:
kd-11 2019-02-02 22:44:18 +03:00 committed by kd-11
parent a43e7c172c
commit ef071ebb6b
6 changed files with 343 additions and 157 deletions

View File

@ -3,6 +3,7 @@
#include "Utilities/GSL.h"
#include "Emu/Memory/vm.h"
#include "../GCM.h"
#include "../rsx_utils.h"
#include <list>
namespace
@ -507,7 +508,7 @@ namespace rsx
// u32 clip_x = clip_horizontal_reg;
// u32 clip_y = clip_vertical_reg;
cache_tag++;
cache_tag = rsx::get_shared_tag();
m_memory_tree.clear();
// Make previous RTTs sampleable
@ -542,15 +543,11 @@ namespace rsx
}
/**
* Search for given address in stored color surface and returns it if size/format match.
* Search for given address in stored color surface
* Return an empty surface_type otherwise.
*/
surface_type get_texture_from_render_target_if_applicable(u32 address)
{
// TODO: Handle texture that overlaps one (or several) surface.
// Handle texture conversion
// FIXME: Disgaea 3 loading screen seems to use a subset of a surface. It's not properly handled here.
// Note: not const because conversions/resolve/... can happen
auto It = m_render_targets_storage.find(address);
if (It != m_render_targets_storage.end())
return Traits::get(It->second);
@ -558,12 +555,11 @@ namespace rsx
}
/**
* Search for given address in stored depth stencil surface and returns it if size/format match.
* Search for given address in stored depth stencil surface
* Return an empty surface_type otherwise.
*/
surface_type get_texture_from_depth_stencil_if_applicable(u32 address)
{
// TODO: Same as above although there wasn't any game using corner case for DS yet.
auto It = m_depth_stencil_storage.find(address);
if (It != m_depth_stencil_storage.end())
return Traits::get(It->second);
@ -723,7 +719,7 @@ namespace rsx
invalidated_resources.push_back(std::move(It->second));
m_render_targets_storage.erase(It);
cache_tag++;
cache_tag = rsx::get_shared_tag();
return;
}
}
@ -741,7 +737,7 @@ namespace rsx
invalidated_resources.push_back(std::move(It->second));
m_depth_stencil_storage.erase(It);
cache_tag++;
cache_tag = rsx::get_shared_tag();
return;
}
}
@ -768,7 +764,7 @@ namespace rsx
invalidated_resources.push_back(std::move(It->second));
m_render_targets_storage.erase(It);
cache_tag++;
cache_tag = rsx::get_shared_tag();
return;
}
}
@ -781,7 +777,7 @@ namespace rsx
invalidated_resources.push_back(std::move(It->second));
m_depth_stencil_storage.erase(It);
cache_tag++;
cache_tag = rsx::get_shared_tag();
return;
}
}
@ -1138,7 +1134,7 @@ namespace rsx
void notify_memory_structure_changed()
{
cache_tag++;
cache_tag = rsx::get_shared_tag();
}
};
}

View File

@ -131,16 +131,25 @@ namespace rsx
bool has_flushables = false;
};
enum surface_transform : u32
{
identity = 0,
argb_to_bgra = 1
};
struct copy_region_descriptor
{
image_resource_type src;
surface_transform xform;
u16 src_x;
u16 src_y;
u16 dst_x;
u16 dst_y;
u16 dst_z;
u16 w;
u16 h;
u16 src_w;
u16 src_h;
u16 dst_w;
u16 dst_h;
};
enum deferred_request_command : u32
@ -311,7 +320,7 @@ namespace rsx
*/
inline void update_cache_tag()
{
m_cache_update_tag++;
m_cache_update_tag = rsx::get_shared_tag();
}
template <typename... Args>
@ -965,7 +974,7 @@ namespace rsx
}
std::vector<section_storage_type*> find_texture_from_range(const address_range &test_range)
std::vector<section_storage_type*> find_texture_from_range(const address_range &test_range, u32 context_mask=0xFF)
{
std::vector<section_storage_type*> results;
@ -977,8 +986,10 @@ namespace rsx
//if (tex.get_section_base() > test_range.start)
// continue;
if (!tex.is_dirty())
if (!tex.is_dirty() && (context_mask & (u32)tex.get_context()))
{
results.push_back(&tex);
}
}
return results;
@ -1376,7 +1387,15 @@ namespace rsx
std::vector<copy_region_descriptor> sections(6);
for (u16 n = 0; n < 6; ++n)
{
sections[n] = { desc.external_handle, 0, (u16)(desc.height * n), 0, 0, n, desc.width, desc.height };
sections[n] =
{
desc.external_handle,
surface_transform::identity,
0, (u16)(desc.height * n),
0, 0, n,
desc.width, desc.height,
desc.width, desc.height
};
}
result = generate_cubemap_from_images(cmd, desc.gcm_format, desc.width, sections, desc.remap);
@ -1393,7 +1412,15 @@ namespace rsx
sections.resize(desc.depth);
for (u16 n = 0; n < desc.depth; ++n)
{
sections[n] = { desc.external_handle, 0, (u16)(desc.height * n), 0, 0, n, desc.width, desc.height };
sections[n] =
{
desc.external_handle,
surface_transform::identity,
0, (u16)(desc.height * n),
0, 0, n,
desc.width, desc.height,
desc.width, desc.height
};
}
result = generate_3d_from_2d_images(cmd, desc.gcm_format, desc.width, desc.height, desc.depth, sections, desc.remap);
@ -1455,16 +1482,19 @@ namespace rsx
{
section.surface->read_barrier(cmd);
const auto src_width = rsx::apply_resolution_scale(section.width, true), dst_width = src_width;
const auto src_height = rsx::apply_resolution_scale(section.height, true), dst_height = src_height;
surfaces.push_back
({
section.surface->get_surface(),
surface_transform::identity,
rsx::apply_resolution_scale(section.src_x, true),
rsx::apply_resolution_scale(section.src_y, true),
rsx::apply_resolution_scale(section.dst_x, true),
rsx::apply_resolution_scale(section.dst_y, true),
slice,
rsx::apply_resolution_scale(section.width, true),
rsx::apply_resolution_scale(section.height, true)
src_width, src_height,
dst_width, dst_height
});
}
}
@ -1554,52 +1584,141 @@ namespace rsx
auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp);
bool requires_merging = false;
AUDIT(!overlapping.empty());
if (overlapping.size() > 1)
verify(HERE), !overlapping.empty();
if (LIKELY(overlapping.back().surface == texptr))
{
// The returned values are sorted with oldest first and newest last
// This allows newer data to overwrite older memory when merging the list
if (overlapping.back().surface == texptr)
{
// The texture 'proposed' by the previous lookup is the newest one
// If it occupies the entire requested region, just use it as-is
requires_merging = (internal_width > surface_width || internal_height > surface_height);
}
else
{
requires_merging = true;
}
// The texture 'proposed' by the previous lookup is the newest one
// If it occupies the entire requested region, just use it as-is
requires_merging = (internal_width > surface_width || internal_height > surface_height);
}
else
{
verify(HERE), overlapping.size() > 1;
requires_merging = true;
}
if (requires_merging)
{
const auto w = rsx::apply_resolution_scale(internal_width, true);
const auto h = rsx::apply_resolution_scale(internal_height, true);
// TODO: For now we're only testing against blit engine dst, should add other types as wel
const auto range = rsx::address_range::start_length(texaddr, tex_pitch * tex_height);
auto local_resources = find_texture_from_range(range, rsx::texture_upload_context::blit_engine_dst);
sampled_image_descriptor result = { texptr->get_surface(), deferred_request_command::atlas_gather,
texaddr, format, 0, 0, w, h, 1, texture_upload_context::framebuffer_storage, is_depth,
scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap };
result.external_subresource_desc.sections_to_copy.reserve(overlapping.size());
for (auto &section : overlapping)
if (local_resources.empty() && overlapping.size() == 1)
{
section.surface->read_barrier(cmd);
result.external_subresource_desc.sections_to_copy.push_back
({
section.surface->get_surface(),
rsx::apply_resolution_scale(section.src_x, true),
rsx::apply_resolution_scale(section.src_y, true),
rsx::apply_resolution_scale(section.dst_x, true),
rsx::apply_resolution_scale(section.dst_y, true),
0,
rsx::apply_resolution_scale(section.width, true),
rsx::apply_resolution_scale(section.height, true)
});
// TODO: Fall back to full upload and merge
}
else
{
const auto w = rsx::apply_resolution_scale(internal_width, true);
const auto h = rsx::apply_resolution_scale(internal_height, true);
return result;
sampled_image_descriptor result = { texptr->get_surface(), deferred_request_command::atlas_gather,
texaddr, format, 0, 0, w, h, 1, texture_upload_context::framebuffer_storage, is_depth,
scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap };
result.external_subresource_desc.sections_to_copy.reserve(overlapping.size() + local_resources.size());
auto add_rtt_resource = [&](auto& section)
{
section.surface->read_barrier(cmd);
const auto src_width = rsx::apply_resolution_scale(section.width, true), dst_width = src_width;
const auto src_height = rsx::apply_resolution_scale(section.height, true), dst_height = src_height;
result.external_subresource_desc.sections_to_copy.push_back
({
section.surface->get_surface(),
surface_transform::identity,
rsx::apply_resolution_scale(section.src_x, true),
rsx::apply_resolution_scale(section.src_y, true),
rsx::apply_resolution_scale(section.dst_x, true),
rsx::apply_resolution_scale(section.dst_y, true),
0,
src_width, src_height,
dst_width, dst_height
});
};
auto add_local_resource = [&](auto& section)
{
// Intersect this resource with the original one
const auto section_bpp = get_format_block_size_in_bytes(section->get_gcm_format());
const auto clipped = rsx::intersect_region(texaddr, tex_width, tex_height, bpp,
section->get_section_base(), section->get_width(), section->get_height(), section_bpp, tex_pitch);
// Since output is upscaled, also upscale on dst
result.external_subresource_desc.sections_to_copy.push_back
({
section->get_raw_texture(),
is_depth ? surface_transform::identity : surface_transform::argb_to_bgra,
(u16)std::get<0>(clipped).x,
(u16)std::get<0>(clipped).y,
rsx::apply_resolution_scale((u16)std::get<1>(clipped).x, true),
rsx::apply_resolution_scale((u16)std::get<1>(clipped).y, true),
0,
(u16)std::get<2>(clipped).width,
(u16)std::get<2>(clipped).height,
rsx::apply_resolution_scale((u16)std::get<2>(clipped).width, true),
rsx::apply_resolution_scale((u16)std::get<2>(clipped).height, true),
});
};
if (LIKELY(local_resources.empty()))
{
for (auto &section : overlapping)
{
add_rtt_resource(section);
}
}
else
{
// Need to preserve sorting order
struct sort_helper
{
u64 tag; // Timestamp
u32 list; // List source, 0 = fbo, 1 = local
u32 index; // Index in list
};
std::vector<sort_helper> sort_list;
sort_list.reserve(overlapping.size() + local_resources.size());
for (u32 index = 0; index < overlapping.size(); ++index)
{
sort_list.push_back({ overlapping[index].surface->last_use_tag, 0, index });
}
for (u32 index = 0; index < local_resources.size(); ++index)
{
if (local_resources[index]->get_rsx_pitch() != tex_pitch)
continue;
// TODO: Typeless transfers
if (local_resources[index]->is_depth_texture() != is_depth)
continue;
sort_list.push_back({ local_resources[index]->last_write_tag, 1, index });
}
std::sort(sort_list.begin(), sort_list.end(), [](const auto &a, const auto &b)
{
return (a.tag < b.tag);
});
for (const auto &e : sort_list)
{
if (e.list == 0)
{
add_rtt_resource(overlapping[e.index]);
}
else
{
add_local_resource(local_resources[e.index]);
}
}
}
return result;
}
}
bool requires_processing = surface_width > internal_width || surface_height > internal_height;
@ -1688,7 +1807,8 @@ namespace rsx
{
// Check for sampleable rtts from previous render passes
// TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
texptr && texptr->get_rsx_pitch() == tex_pitch)
{
if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
is_active || texptr->test())
@ -1704,7 +1824,8 @@ namespace rsx
}
}
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
texptr && texptr->get_rsx_pitch() == tex_pitch)
{
if (const bool is_active = m_rtts.address_is_bound(texaddr, true);
is_active || texptr->test())
@ -1797,13 +1918,12 @@ namespace rsx
if (is_hw_blit_engine_compatible(format))
{
//Find based on range instead
auto overlapping_surfaces = find_texture_from_range(tex_range);
auto overlapping_surfaces = find_texture_from_range(tex_range, rsx::texture_upload_context::blit_engine_dst);
if (!overlapping_surfaces.empty())
{
for (const auto &surface : overlapping_surfaces)
{
if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst ||
!surface->overlaps(tex_range, rsx::section_bounds::confirmed_range))
if (!surface->overlaps(tex_range, rsx::section_bounds::confirmed_range))
continue;
if (surface->get_width() >= tex_width && surface->get_height() >= tex_height)
@ -1893,7 +2013,7 @@ namespace rsx
u16 dst_h = dst.clip_height;
//Check if src/dst are parts of render targets
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, false, false);
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, false, false, false);
dst_is_render_target = dst_subres.surface != nullptr;
if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch)
@ -2027,13 +2147,10 @@ namespace rsx
if (!dst_is_render_target)
{
// Check for any available region that will fit this one
auto overlapping_surfaces = find_texture_from_range(address_range::start_length(dst_address, dst.pitch * dst.clip_height));
auto overlapping_surfaces = find_texture_from_range(address_range::start_length(dst_address, dst.pitch * dst.clip_height), rsx::texture_upload_context::blit_engine_dst);
for (const auto &surface : overlapping_surfaces)
{
if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst)
continue;
if (surface->get_rsx_pitch() != dst.pitch)
continue;

View File

@ -733,6 +733,32 @@ namespace gl
}
}
void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources)
{
for (const auto &slice : sources)
{
if (!slice.src)
continue;
if (slice.src_w == slice.dst_w && slice.src_h == slice.dst_h)
{
glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0,
dst_image->id(), (GLenum)dst_image->get_target(), 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.src_w, slice.src_h, 1);
}
else
{
verify(HERE), dst_image->get_target() == gl::texture::target::texture2D;
auto _blitter = gl::g_hw_blitter;
const areai src_rect = { slice.src_x, slice.src_y, slice.src_x + slice.src_w, slice.src_y + slice.src_h };
const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h };
_blitter->scale_image(cmd, slice.src, dst_image,
src_rect, dst_rect, false, false, {});
}
}
}
protected:
gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
@ -748,7 +774,7 @@ namespace gl
GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true);
}
gl::texture_view* generate_cubemap_from_images(gl::command_context&, u32 gcm_format, u16 size, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& /*remap_vector*/) override
gl::texture_view* generate_cubemap_from_images(gl::command_context& cmd, u32 gcm_format, u16 size, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& /*remap_vector*/) override
{
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
std::unique_ptr<gl::texture> dst_image = std::make_unique<gl::viewable_image>(GL_TEXTURE_CUBE_MAP, size, size, 1, 1, ifmt);
@ -757,14 +783,7 @@ namespace gl
//Empty GL_ERROR
glGetError();
for (const auto &slice : sources)
{
if (slice.src)
{
glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0,
dst_image->id(), GL_TEXTURE_CUBE_MAP, 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.w, slice.h, 1);
}
}
copy_transfer_regions_impl(cmd, dst_image.get(), sources);
if (GLenum err = glGetError())
{
@ -777,7 +796,7 @@ namespace gl
return result;
}
gl::texture_view* generate_3d_from_2d_images(gl::command_context&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& /*remap_vector*/) override
gl::texture_view* generate_3d_from_2d_images(gl::command_context& cmd, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& /*remap_vector*/) override
{
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
std::unique_ptr<gl::texture> dst_image = std::make_unique<gl::viewable_image>(GL_TEXTURE_3D, width, height, depth, 1, ifmt);
@ -786,14 +805,7 @@ namespace gl
//Empty GL_ERROR
glGetError();
for (const auto &slice : sources)
{
if (slice.src)
{
glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0,
dst_image->id(), GL_TEXTURE_3D, 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.w, slice.h, 1);
}
}
copy_transfer_regions_impl(cmd, dst_image.get(), sources);
if (GLenum err = glGetError())
{
@ -806,17 +818,12 @@ namespace gl
return result;
}
gl::texture_view* generate_atlas_from_images(gl::command_context&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy,
gl::texture_view* generate_atlas_from_images(gl::command_context& cmd, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy,
const texture_channel_remap_t& remap_vector) override
{
auto result = create_temporary_subresource_impl(nullptr, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false);
for (const auto &region : sections_to_copy)
{
glCopyImageSubData(region.src->id(), GL_TEXTURE_2D, 0, region.src_x, region.src_y, 0,
result->image()->id(), GL_TEXTURE_2D, 0, region.dst_x, region.dst_y, 0, region.w, region.h, 1);
}
copy_transfer_regions_impl(cmd, result->image(), sections_to_copy);
return result;
}

View File

@ -496,6 +496,89 @@ namespace vk
return mapping;
}
void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer)
{
for (const auto &section : sections_to_transfer)
{
if (!section.src)
continue;
VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst->info.format);
VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format);
VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 };
if (section.src_w == section.dst_w && section.src_h == section.dst_h &&
section.xform == surface_transform::identity)
{
VkImageLayout old_src_layout = section.src->current_layout;
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { section.src_x, section.src_y, 0 };
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, section.dst_z, 1 };
copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.extent = { section.src_w, section.src_h, 1 };
vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, dst->value, dst->current_layout, 1, &copy_rgn);
vk::change_image_layout(cmd, section.src, old_src_layout, src_range);
}
else
{
verify(HERE), section.dst_z == 0;
if (section.xform == surface_transform::identity)
{
vk::copy_scaled_image(cmd, section.src->value, dst->value, section.src->current_layout, dst->current_layout,
section.src_x, section.src_y, section.src_w, section.src_h,
section.dst_x, section.dst_y, section.dst_w, section.dst_h,
1, src_aspect, section.src->info.format == dst->info.format,
VK_FILTER_NEAREST);
}
else if (section.xform == surface_transform::argb_to_bgra)
{
VkImageLayout old_src_layout = section.src->current_layout;
VkBufferImageCopy copy{};
copy.imageExtent = { section.src_w, section.src_h, 1 };
copy.imageOffset = { section.src_x, section.src_y, 0 };
copy.imageSubresource = { src_aspect, 0, 0, 1 };
auto scratch_buf = vk::get_scratch_buffer();
vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
vkCmdCopyImageToBuffer(cmd, section.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, scratch_buf->value, 1, &copy);
const auto length = section.src->width() * section.src->width() * 4;
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
auto shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
shuffle_kernel->run(cmd, scratch_buf, length);
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
auto tmp = vk::get_typeless_helper(section.src->info.format, section.dst_x + section.dst_w, section.dst_y + section.dst_h);
vk::change_image_layout(cmd, tmp, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { src_aspect, 0, 1, 0, 1 });
copy.imageOffset = { 0, 0, 0 };
vkCmdCopyBufferToImage(cmd, scratch_buf->value, tmp->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy);
vk::copy_scaled_image(cmd, tmp->value, dst->value, tmp->current_layout, dst->current_layout,
0, 0, section.src_w, section.src_h,
section.dst_x, section.dst_y, section.dst_w, section.dst_h,
1, src_aspect, section.src->info.format == dst->info.format,
VK_FILTER_NEAREST);
vk::change_image_layout(cmd, section.src, old_src_layout, src_range);
}
else
{
fmt::throw_exception("Unreachable" HERE);
}
}
}
}
protected:
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,
u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy)
@ -627,26 +710,7 @@ namespace vk
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
}
for (const auto &section : sections_to_copy)
{
if (section.src)
{
VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format);
VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 };
VkImageLayout old_src_layout = section.src->current_layout;
vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { section.src_x, section.src_y, 0 };
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, section.dst_z, 1 };
copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.extent = { section.w, section.h, 1 };
vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, image->value, image->current_layout, 1, &copy_rgn);
vk::change_image_layout(cmd, section.src, old_src_layout, src_range);
}
}
copy_transfer_regions_impl(cmd, image.get(), sections_to_copy);
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
@ -690,26 +754,7 @@ namespace vk
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
}
for (const auto &section : sections_to_copy)
{
if (section.src)
{
VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format);
VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 };
VkImageLayout old_src_layout = section.src->current_layout;
vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { section.src_x, section.src_y, 0 };
copy_rgn.dstOffset = { section.dst_x, section.dst_y, section.dst_z };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.extent = { section.w, section.h, 1 };
vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, image->value, image->current_layout, 1, &copy_rgn);
vk::change_image_layout(cmd, section.src, old_src_layout, src_range);
}
}
copy_transfer_regions_impl(cmd, image.get(), sections_to_copy);
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
@ -727,32 +772,13 @@ namespace vk
auto result = create_temporary_subresource_view_impl(cmd, nullptr, VK_IMAGE_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, remap_vector, false);
VkImage dst = result->info.image;
VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format);
VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 };
vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range);
vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range);
for (const auto &region : sections_to_copy)
{
VkImageAspectFlags src_aspect = vk::get_aspect_flags(region.src->info.format);
VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 };
VkImageLayout old_src_layout = region.src->current_layout;
vk::change_image_layout(cmd, region.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
copy_transfer_regions_impl(cmd, result->image(), sections_to_copy);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { region.src_x, region.src_y, 0 };
copy_rgn.dstOffset = { region.dst_x, region.dst_y, 0 };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.extent = { region.w, region.h, 1 };
vkCmdCopyImage(cmd, region.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1, &copy_rgn);
vk::change_image_layout(cmd, region.src, old_src_layout, src_range);
}
vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
return result;
}

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "rsx_utils.h"
#include "rsx_methods.h"
#include "RSXThread.h"
@ -14,6 +14,8 @@ extern "C"
namespace rsx
{
atomic_t<u64> g_rsx_shared_tag{ 0 };
void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear)
{

View File

@ -30,6 +30,7 @@ namespace rsx
// Definitions
class thread;
extern thread* g_current_renderer;
extern atomic_t<u64> g_rsx_shared_tag;
//Base for resources with reference counting
struct ref_counted
@ -163,6 +164,12 @@ namespace rsx
return static_cast<u32>((1ULL << 32) >> utils::cntlz32(x - 1, true));
}
// Returns an ever-increasing tag value
static inline u64 get_shared_tag()
{
return g_rsx_shared_tag++;
}
// Copy memory in inverse direction from source
// Used to scale negatively x axis while transfering image data
template <typename Ts = u8, typename Td = Ts>
@ -411,6 +418,37 @@ namespace rsx
return std::make_tuple(x, y, width, height);
}
static inline std::tuple<position2u, position2u, size2u> intersect_region(
u32 dst_address, u16 dst_w, u16 dst_h, u16 dst_bpp,
u32 src_address, u16 src_w, u16 src_h, u32 src_bpp,
u32 pitch)
{
if (src_address < dst_address)
{
const auto offset = dst_address - src_address;
const auto src_y = (offset / pitch);
const auto src_x = (offset % pitch) / src_bpp;
const auto dst_x = 0u;
const auto dst_y = 0u;
const auto w = std::min<u32>(dst_w, src_w - src_x);
const auto h = std::min<u32>(dst_h, src_h - src_y);
return std::make_tuple<position2u, position2u, size2u>({ src_x, src_y }, { dst_x, dst_y }, { w, h });
}
else
{
const auto offset = dst_address - src_address;
const auto src_x = 0u;
const auto src_y = 0u;
const auto dst_y = (offset / pitch);
const auto dst_x = (offset % pitch) / dst_bpp;
const auto w = std::min<u32>(src_w, dst_w - dst_x);
const auto h = std::min<u32>(src_h, dst_h - dst_y);
return std::make_tuple<position2u, position2u, size2u>({ src_x, src_y }, { dst_x, dst_y }, { w, h });
}
}
static inline const f32 get_resolution_scale()
{
return g_cfg.video.strict_rendering_mode? 1.f : ((f32)g_cfg.video.resolution_scale_percent / 100.f);