rsx: Texture search rewrite

- Perform a full search across all resource types as needed without
taking too many shortcuts/hacks
This commit is contained in:
kd-11 2019-02-25 18:03:14 +03:00 committed by kd-11
parent 6ef9dcd62e
commit 3a071a9c07
12 changed files with 818 additions and 534 deletions

View File

@ -7,7 +7,7 @@
namespace rsx namespace rsx
{ {
enum texture_upload_context enum texture_upload_context : u32
{ {
shader_read = 1, shader_read = 1,
blit_engine_src = 2, blit_engine_src = 2,
@ -15,7 +15,7 @@ namespace rsx
framebuffer_storage = 8 framebuffer_storage = 8
}; };
enum texture_colorspace enum texture_colorspace : u32
{ {
rgb_linear = 0, rgb_linear = 0,
srgb_nonlinear = 1 srgb_nonlinear = 1

View File

@ -108,6 +108,7 @@ namespace rsx
virtual u16 get_surface_height() const = 0; virtual u16 get_surface_height() const = 0;
virtual u16 get_rsx_pitch() const = 0; virtual u16 get_rsx_pitch() const = 0;
virtual u16 get_native_pitch() const = 0; virtual u16 get_native_pitch() const = 0;
virtual bool is_depth_surface() const = 0;
void save_aa_mode() void save_aa_mode()
{ {
@ -216,6 +217,9 @@ namespace rsx
std::unordered_map<u32, surface_storage_type> m_render_targets_storage = {}; std::unordered_map<u32, surface_storage_type> m_render_targets_storage = {};
std::unordered_map<u32, surface_storage_type> m_depth_stencil_storage = {}; std::unordered_map<u32, surface_storage_type> m_depth_stencil_storage = {};
rsx::address_range m_render_targets_memory_range;
rsx::address_range m_depth_stencil_memory_range;
public: public:
std::array<std::tuple<u32, surface_type>, 4> m_bound_render_targets = {}; std::array<std::tuple<u32, surface_type>, 4> m_bound_render_targets = {};
std::tuple<u32, surface_type> m_bound_depth_stencil = {}; std::tuple<u32, surface_type> m_bound_depth_stencil = {};
@ -323,7 +327,8 @@ namespace rsx
surface_type bind_address_as_render_targets( surface_type bind_address_as_render_targets(
command_list_type command_list, command_list_type command_list,
u32 address, u32 address,
surface_color_format color_format, size_t width, size_t height, surface_color_format color_format,
size_t width, size_t height, size_t pitch,
Args&&... extra_params) Args&&... extra_params)
{ {
// TODO: Fix corner cases // TODO: Fix corner cases
@ -360,6 +365,10 @@ namespace rsx
m_render_targets_storage.erase(address); m_render_targets_storage.erase(address);
} }
// Range test
rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height));
m_render_targets_memory_range = range.get_min_max(m_render_targets_memory_range);
// Select source of original data if any // Select source of original data if any
auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface; auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface;
@ -410,7 +419,8 @@ namespace rsx
surface_type bind_address_as_depth_stencil( surface_type bind_address_as_depth_stencil(
command_list_type command_list, command_list_type command_list,
u32 address, u32 address,
surface_depth_format depth_format, size_t width, size_t height, surface_depth_format depth_format,
size_t width, size_t height, size_t pitch,
Args&&... extra_params) Args&&... extra_params)
{ {
surface_storage_type old_surface_storage; surface_storage_type old_surface_storage;
@ -445,6 +455,10 @@ namespace rsx
m_depth_stencil_storage.erase(address); m_depth_stencil_storage.erase(address);
} }
// Range test
rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height));
m_depth_stencil_memory_range = range.get_min_max(m_depth_stencil_memory_range);
// Select source of original data if any // Select source of original data if any
auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface; auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface;
@ -525,8 +539,9 @@ namespace rsx
if (surface_addresses[surface_index] == 0) if (surface_addresses[surface_index] == 0)
continue; continue;
const auto pitch = clip_width * 4; // TODO
m_bound_render_targets[surface_index] = std::make_tuple(surface_addresses[surface_index], m_bound_render_targets[surface_index] = std::make_tuple(surface_addresses[surface_index],
bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, clip_width, clip_height, std::forward<Args>(extra_params)...)); bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, clip_width, clip_height, pitch, std::forward<Args>(extra_params)...));
} }
// Same for depth buffer // Same for depth buffer
@ -538,8 +553,10 @@ namespace rsx
if (!address_z) if (!address_z)
return; return;
// TODO
const auto pitch = (depth_format == rsx::surface_depth_format::z16) ? clip_width * 2 : clip_width * 4;
m_bound_depth_stencil = std::make_tuple(address_z, m_bound_depth_stencil = std::make_tuple(address_z,
bind_address_as_depth_stencil(command_list, address_z, depth_format, clip_width, clip_height, std::forward<Args>(extra_params)...)); bind_address_as_depth_stencil(command_list, address_z, depth_format, clip_width, clip_height, pitch, std::forward<Args>(extra_params)...));
} }
/** /**
@ -566,6 +583,19 @@ namespace rsx
return surface_type(); return surface_type();
} }
surface_type get_surface_at(u32 address)
{
auto It = m_render_targets_storage.find(address);
if (It != m_render_targets_storage.end())
return Traits::get(It->second);
auto _It = m_depth_stencil_storage.find(address);
if (_It != m_depth_stencil_storage.end())
return Traits::get(_It->second);
fmt::throw_exception("Unreachable" HERE);
}
/** /**
* Get bound color surface raw data. * Get bound color surface raw data.
*/ */
@ -749,7 +779,7 @@ namespace rsx
*/ */
void invalidate_surface_address(u32 addr, bool depth) void invalidate_surface_address(u32 addr, bool depth)
{ {
if (address_is_bound(addr, depth)) if (address_is_bound(addr))
{ {
LOG_ERROR(RSX, "Cannot invalidate a currently bound render target!"); LOG_ERROR(RSX, "Cannot invalidate a currently bound render target!");
return; return;
@ -862,14 +892,8 @@ namespace rsx
return (offset < range); return (offset < range);
} }
bool address_is_bound(u32 address, bool is_depth) const bool address_is_bound(u32 address) const
{ {
if (is_depth)
{
const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil);
return (bound_depth_address == address);
}
for (auto &surface : m_bound_render_targets) for (auto &surface : m_bound_render_targets)
{ {
const u32 bound_address = std::get<0>(surface); const u32 bound_address = std::get<0>(surface);
@ -877,6 +901,9 @@ namespace rsx
return true; return true;
} }
if (std::get<0>(m_bound_depth_stencil) == address)
return true;
return false; return false;
} }
@ -966,7 +993,7 @@ namespace rsx
} }
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped }; return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address), false, clipped };
} }
} }
@ -992,16 +1019,18 @@ namespace rsx
} }
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped }; return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address), true, clipped };
} }
} }
return{}; return{};
} }
std::vector<surface_overlap_info> get_merged_texture_memory_region(u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u32 bpp) template <typename commandbuffer_type>
std::vector<surface_overlap_info> get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u32 bpp)
{ {
std::vector<surface_overlap_info> result; std::vector<surface_overlap_info> result;
std::vector<std::pair<u32, bool>> dirty;
const u32 limit = texaddr + (required_pitch * required_height); const u32 limit = texaddr + (required_pitch * required_height);
auto process_list_function = [&](std::unordered_map<u32, surface_storage_type>& data, bool is_depth) auto process_list_function = [&](std::unordered_map<u32, surface_storage_type>& data, bool is_depth)
@ -1021,6 +1050,12 @@ namespace rsx
if ((this_address + texture_size) <= texaddr) if ((this_address + texture_size) <= texaddr)
continue; continue;
if (surface->read_barrier(cmd); !surface->test())
{
dirty.emplace_back(this_address, is_depth);
continue;
}
surface_overlap_info info; surface_overlap_info info;
info.surface = surface; info.surface = surface;
info.is_depth = is_depth; info.is_depth = is_depth;
@ -1050,8 +1085,27 @@ namespace rsx
} }
}; };
process_list_function(m_render_targets_storage, false); // Range test helper to quickly discard blocks
process_list_function(m_depth_stencil_storage, true); // Fortunately, render targets tend to be clustered anyway
rsx::address_range test = rsx::address_range::start_end(texaddr, limit-1);
if (test.overlaps(m_render_targets_memory_range))
{
process_list_function(m_render_targets_storage, false);
}
if (test.overlaps(m_depth_stencil_memory_range))
{
process_list_function(m_depth_stencil_storage, true);
}
if (!dirty.empty())
{
for (const auto& p : dirty)
{
invalidate_surface_address(p.first, p.second);
}
}
if (result.size() > 1) if (result.size() > 1)
{ {

File diff suppressed because it is too large Load Diff

View File

@ -99,6 +99,17 @@ namespace rsx
fmt::throw_exception("Unreachable " HERE); fmt::throw_exception("Unreachable " HERE);
} }
constexpr invalidation_cause defer() const
{
AUDIT(!deferred_flush());
if (cause == read)
return deferred_read;
else if (cause == write)
return deferred_write;
else
fmt::throw_exception("Unreachable " HERE);
}
constexpr invalidation_cause() : cause(invalid) {} constexpr invalidation_cause() : cause(invalid) {}
constexpr invalidation_cause(enum_type _cause) : cause(_cause) {} constexpr invalidation_cause(enum_type _cause) : cause(_cause) {}
operator enum_type&() { return cause; } operator enum_type&() { return cause; }

View File

@ -330,12 +330,12 @@ void GLGSRender::end()
_SelectTexture(GL_FRAGMENT_TEXTURES_START + i); _SelectTexture(GL_FRAGMENT_TEXTURES_START + i);
gl::texture_view* view = nullptr; gl::texture_view* view = nullptr;
if (rsx::method_registers.fragment_textures[i].enabled()) auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
{
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
view = sampler_state->image_handle;
if (!view && sampler_state->external_subresource_desc.external_handle) if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
{
if (view = sampler_state->image_handle; UNLIKELY(!view))
{ {
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
} }
@ -375,13 +375,17 @@ void GLGSRender::end()
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get()); auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
_SelectTexture(GL_VERTEX_TEXTURES_START + i); _SelectTexture(GL_VERTEX_TEXTURES_START + i);
if (sampler_state->image_handle) if (rsx::method_registers.vertex_textures[i].enabled() &&
sampler_state->validate())
{ {
sampler_state->image_handle->bind(); if (LIKELY(sampler_state->image_handle))
} {
else if (sampler_state->external_subresource_desc.external_handle) sampler_state->image_handle->bind();
{ }
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(); else
{
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
}
} }
else else
{ {
@ -1640,7 +1644,8 @@ void GLGSRender::flip(int buffer)
} }
else else
{ {
const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4); gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
verify(HERE), !overlap_info.empty(); verify(HERE), !overlap_info.empty();
if (overlap_info.back().surface == render_target_texture) if (overlap_info.back().surface == render_target_texture)
@ -1825,8 +1830,8 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write) is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read); : (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
gl::command_context null_cmd; auto cmd = can_flush ? gl::command_context{ gl_state } : gl::command_context{};
auto result = m_gl_texture_cache.invalidate_address(null_cmd, address, cause); auto result = m_gl_texture_cache.invalidate_address(cmd, address, cause);
if (!result.violation_handled) if (!result.violation_handled)
return false; return false;

View File

@ -375,30 +375,37 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
m_gl_texture_cache.clear_ro_tex_invalidate_intr(); m_gl_texture_cache.clear_ro_tex_invalidate_intr();
//Mark buffer regions as NO_ACCESS on Cell visible side const auto color_format = rsx::internals::surface_color_format_to_gl(layout.color_format);
if (g_cfg.video.write_color_buffers) for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{ {
auto color_format = rsx::internals::surface_color_format_to_gl(layout.color_format); if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
if (g_cfg.video.write_color_buffers)
{ {
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; // Mark buffer regions as NO_ACCESS on Cell-visible side
const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes); std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes);
} }
else
{
m_gl_texture_cache.commit_framebuffer_memory_region(cmd, surface_range);
}
} }
if (g_cfg.video.write_depth_buffer) if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{ {
if (m_depth_surface_info.address && m_depth_surface_info.pitch) const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
if (g_cfg.video.write_depth_buffer)
{ {
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true); std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true);
} }
else
{
m_gl_texture_cache.commit_framebuffer_memory_region(cmd, surface_range);
}
} }
if (m_gl_texture_cache.get_ro_tex_invalidate_intr()) if (m_gl_texture_cache.get_ro_tex_invalidate_intr())
@ -585,6 +592,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
{ {
gl::g_hw_blitter->fast_clear_image(cmd, this, {}); gl::g_hw_blitter->fast_clear_image(cmd, this, {});
} }
on_write();
} }
return; return;

View File

@ -107,6 +107,19 @@ namespace gl
return surface_height; return surface_height;
} }
bool is_depth_surface() const override
{
switch (get_internal_format())
{
case gl::texture::internal_format::depth16:
case gl::texture::internal_format::depth24_stencil8:
case gl::texture::internal_format::depth32f_stencil8:
return true;
default:
return false;
}
}
texture* get_surface() override texture* get_surface() override
{ {
return (gl::texture*)this; return (gl::texture*)this;

View File

@ -387,6 +387,7 @@ namespace gl
if (synchronized) if (synchronized)
return; return;
verify(HERE), cmd.drv;
copy_texture(cmd, blocking); copy_texture(cmd, blocking);
if (blocking) if (blocking)
@ -700,7 +701,7 @@ namespace gl
return result; return result;
} }
std::array<GLenum, 4> get_component_mapping(u32 gcm_format, rsx::texture_create_flags flags) std::array<GLenum, 4> get_component_mapping(u32 gcm_format, rsx::texture_create_flags flags) const
{ {
switch (gcm_format) switch (gcm_format)
{ {
@ -733,7 +734,7 @@ namespace gl
} }
} }
void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources) void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources) const
{ {
for (const auto &slice : sources) for (const auto &slice : sources)
{ {
@ -759,6 +760,38 @@ namespace gl
} }
} }
gl::texture* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const
{
gl::texture* result = nullptr;
for (const auto &section : sections_to_transfer)
{
if (!section.src)
continue;
if (!result)
{
result = section.src;
}
else
{
const auto set1 = result->get_native_component_layout();
const auto set2 = section.src->get_native_component_layout();
if (set1[0] != set2[0] ||
set1[1] != set2[1] ||
set1[2] != set2[2] ||
set1[3] != set2[3])
{
// TODO
// This requires a far more complex setup as its not always possible to mix and match without compute assistance
return nullptr;
}
}
}
return result;
}
protected: protected:
gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
@ -909,6 +942,7 @@ namespace gl
gl::upload_texture(section->get_raw_texture()->id(), gcm_format, width, height, depth, mipmaps, gl::upload_texture(section->get_raw_texture()->id(), gcm_format, width, height, depth, mipmaps,
input_swizzled, type, subresource_layout); input_swizzled, type, subresource_layout);
section->last_write_tag = rsx::get_shared_tag();
return section; return section;
} }
@ -966,6 +1000,7 @@ namespace gl
return (ifmt == gl::texture::internal_format::depth24_stencil8 || return (ifmt == gl::texture::internal_format::depth24_stencil8 ||
ifmt == gl::texture::internal_format::depth32f_stencil8 || ifmt == gl::texture::internal_format::depth32f_stencil8 ||
ifmt == gl::texture::internal_format::depth_stencil); ifmt == gl::texture::internal_format::depth_stencil);
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return (ifmt == gl::texture::internal_format::depth16 || return (ifmt == gl::texture::internal_format::depth16 ||

View File

@ -1626,12 +1626,12 @@ void VKGSRender::end()
if (current_fp_metadata.referenced_textures_mask & (1 << i)) if (current_fp_metadata.referenced_textures_mask & (1 << i))
{ {
vk::image_view* view = nullptr; vk::image_view* view = nullptr;
if (rsx::method_registers.fragment_textures[i].enabled()) auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
{
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
view = sampler_state->image_handle;
if (!view && sampler_state->external_subresource_desc.external_handle) if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
{
if (view = sampler_state->image_handle; !view)
{ {
//Requires update, copy subresource //Requires update, copy subresource
view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
@ -1705,7 +1705,7 @@ void VKGSRender::end()
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get()); auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
auto image_ptr = sampler_state->image_handle; auto image_ptr = sampler_state->image_handle;
if (!image_ptr && sampler_state->external_subresource_desc.external_handle) if (!image_ptr && sampler_state->validate())
{ {
image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
m_vertex_textures_dirty[i] = true; m_vertex_textures_dirty[i] = true;
@ -2970,28 +2970,36 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_texture_cache.notify_surface_changed(layout.zeta_address); m_texture_cache.notify_surface_changed(layout.zeta_address);
} }
if (g_cfg.video.write_color_buffers) const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format);
for (u8 index : m_draw_buffers)
{ {
const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format); if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
for (u8 index : m_draw_buffers)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
if (g_cfg.video.write_color_buffers)
{
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second); m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second);
} }
else
{
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
}
} }
if (g_cfg.video.write_depth_buffer) if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{ {
if (m_depth_surface_info.address && m_depth_surface_info.pitch) const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
if (g_cfg.video.write_depth_buffer)
{ {
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, gcm_format, false); m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, gcm_format, false);
} }
else
{
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
}
} }
auto vk_depth_format = (layout.zeta_address == 0) ? VK_FORMAT_UNDEFINED : vk::get_compatible_depth_surface_format(m_device->get_formats_support(), layout.depth_format); auto vk_depth_format = (layout.zeta_address == 0) ? VK_FORMAT_UNDEFINED : vk::get_compatible_depth_surface_format(m_device->get_formats_support(), layout.depth_format);
@ -3301,7 +3309,7 @@ void VKGSRender::flip(int buffer)
} }
else else
{ {
const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4); const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
verify(HERE), !overlap_info.empty(); verify(HERE), !overlap_info.empty();
if (overlap_info.back().surface == render_target_texture) if (overlap_info.back().surface == render_target_texture)
@ -3341,16 +3349,14 @@ void VKGSRender::flip(int buffer)
{ {
// Read from cell // Read from cell
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height); const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
const auto overlap = m_texture_cache.find_texture_from_range(range); const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
const auto overlap = m_texture_cache.find_texture_from_range(range, 0, lookup_mask);
bool flush_queue = false; bool flush_queue = false;
for (const auto & section : overlap) for (const auto & section : overlap)
{ {
if (section->get_protection() == utils::protection::no) section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue());
{ flush_queue = true;
section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue());
flush_queue = true;
}
} }
if (flush_queue) if (flush_queue)

View File

@ -51,6 +51,11 @@ namespace vk
return native_pitch; return native_pitch;
} }
bool is_depth_surface() const override
{
return !!(attachment_aspect_flag & VK_IMAGE_ASPECT_DEPTH_BIT);
}
bool matches_dimensions(u16 _width, u16 _height) const bool matches_dimensions(u16 _width, u16 _height) const
{ {
//Use forward scaling to account for rounding and clamping errors //Use forward scaling to account for rounding and clamping errors

View File

@ -123,7 +123,7 @@ namespace vk
if (src->current_layout != preferred_src_format) if (src->current_layout != preferred_src_format)
change_image_layout(cmd, src->value, src_layout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); change_image_layout(cmd, src->value, src_layout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dst->current_layout != preferred_dst_format) if (dst->current_layout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst->value, dst_layout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); change_image_layout(cmd, dst->value, dst_layout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
auto scratch_buf = vk::get_scratch_buffer(); auto scratch_buf = vk::get_scratch_buffer();
@ -196,7 +196,7 @@ namespace vk
if (src_layout != preferred_src_format) if (src_layout != preferred_src_format)
change_image_layout(cmd, src->value, preferred_src_format, src_layout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); change_image_layout(cmd, src->value, preferred_src_format, src_layout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dst_layout != preferred_dst_format) if (dst_layout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst->value, preferred_dst_format, dst_layout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); change_image_layout(cmd, dst->value, preferred_dst_format, dst_layout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
} }
@ -231,7 +231,7 @@ namespace vk
if (srcLayout != preferred_src_format) if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dstLayout != preferred_dst_format) if (dstLayout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
@ -245,7 +245,7 @@ namespace vk
if (srcLayout != preferred_src_format) if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dstLayout != preferred_dst_format) if (dstLayout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
} }
@ -272,7 +272,7 @@ namespace vk
if (srcLayout != preferred_src_format) if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != preferred_dst_format) if (dstLayout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (compatible_formats && src_width == dst_width && src_height == dst_height) if (compatible_formats && src_width == dst_width && src_height == dst_height)
@ -296,7 +296,7 @@ namespace vk
} }
else else
{ {
auto stretch_image_typeless_unsafe = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless, auto stretch_image_typeless_unsafe = [&cmd, preferred_src_format, preferred_dst_format, filter](VkImage src, VkImage dst, VkImage typeless,
const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF) const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF)
{ {
const u32 src_w = u32(src_rect.x2 - src_rect.x1); const u32 src_w = u32(src_rect.x2 - src_rect.x1);
@ -314,14 +314,14 @@ namespace vk
//2. Blit typeless surface to self //2. Blit typeless surface to self
copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST); 0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, filter);
//3. Copy back the aspect bits //3. Copy back the aspect bits
copy_image(cmd, typeless, dst, VK_IMAGE_LAYOUT_GENERAL, preferred_dst_format, copy_image(cmd, typeless, dst, VK_IMAGE_LAYOUT_GENERAL, preferred_dst_format,
{0, (s32)src_h, (s32)dst_w, s32(src_h + dst_h) }, dst_rect, 1, VK_IMAGE_ASPECT_COLOR_BIT, aspect, 0xFF, transfer_flags); {0, (s32)src_h, (s32)dst_w, s32(src_h + dst_h) }, dst_rect, 1, VK_IMAGE_ASPECT_COLOR_BIT, aspect, 0xFF, transfer_flags);
}; };
auto stretch_image_typeless_safe = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless, auto stretch_image_typeless_safe = [&cmd, preferred_src_format, preferred_dst_format, filter](VkImage src, VkImage dst, VkImage typeless,
const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF) const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF)
{ {
const u32 src_w = u32(src_rect.x2 - src_rect.x1); const u32 src_w = u32(src_rect.x2 - src_rect.x1);
@ -345,7 +345,7 @@ namespace vk
//2. Blit typeless surface to self //2. Blit typeless surface to self
copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST); 0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, filter);
//3. Copy back the aspect bits //3. Copy back the aspect bits
info.imageExtent = { dst_w, dst_h, 1 }; info.imageExtent = { dst_w, dst_h, 1 };
@ -423,7 +423,7 @@ namespace vk
if (srcLayout != preferred_src_format) if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != preferred_dst_format) if (dstLayout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
} }

View File

@ -457,7 +457,7 @@ namespace vk
m_discarded_memory_size = 0; m_discarded_memory_size = 0;
} }
VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::texture_create_flags flags, const texture_channel_remap_t& remap_vector) VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::texture_create_flags flags, const texture_channel_remap_t& remap_vector) const
{ {
switch (gcm_format) switch (gcm_format)
{ {
@ -496,7 +496,7 @@ namespace vk
return mapping; return mapping;
} }
void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer) void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer) const
{ {
for (const auto &section : sections_to_transfer) for (const auto &section : sections_to_transfer)
{ {
@ -515,10 +515,19 @@ namespace vk
copy_rgn.srcOffset = { section.src_x, section.src_y, 0 }; copy_rgn.srcOffset = { section.src_x, section.src_y, 0 };
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 }; copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, section.dst_z, 1 }; copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.extent = { section.src_w, section.src_h, 1 }; copy_rgn.extent = { section.src_w, section.src_h, 1 };
if (dst->info.imageType == VK_IMAGE_TYPE_3D)
{
copy_rgn.dstOffset.z = section.dst_z;
}
else
{
copy_rgn.dstSubresource.baseArrayLayer = section.dst_z;
}
vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, dst->value, dst->current_layout, 1, &copy_rgn); vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, dst->value, dst->current_layout, 1, &copy_rgn);
vk::change_image_layout(cmd, section.src, old_src_layout, src_range); vk::change_image_layout(cmd, section.src, old_src_layout, src_range);
@ -579,6 +588,35 @@ namespace vk
} }
} }
vk::image* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const
{
vk::image* result = nullptr;
for (const auto &section : sections_to_transfer)
{
if (!section.src)
continue;
if (!result)
{
result = section.src;
}
else
{
if (section.src->native_component_map.a != result->native_component_map.a ||
section.src->native_component_map.r != result->native_component_map.r ||
section.src->native_component_map.g != result->native_component_map.g ||
section.src->native_component_map.b != result->native_component_map.b)
{
// TODO
// This requires a far more complex setup as its not always possible to mix and match without compute assistance
return nullptr;
}
}
}
return result;
}
protected: protected:
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,
u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy) u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy)
@ -587,7 +625,7 @@ namespace vk
std::unique_ptr<vk::image_view> view; std::unique_ptr<vk::image_view> view;
VkImageAspectFlags aspect; VkImageAspectFlags aspect;
VkImageCreateFlags image_flags; VkImageCreateFlags image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format);
if (source) if (source)
@ -599,13 +637,10 @@ namespace vk
//HACK! Should use typeless transfer //HACK! Should use typeless transfer
dst_format = source->info.format; dst_format = source->info.format;
} }
image_flags = source->info.flags;
} }
else else
{ {
aspect = vk::get_aspect_flags(dst_format); aspect = vk::get_aspect_flags(dst_format);
image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE)? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
} }
image.reset(new vk::viewable_image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, image.reset(new vk::viewable_image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
@ -772,13 +807,25 @@ namespace vk
auto result = create_temporary_subresource_view_impl(cmd, nullptr, VK_IMAGE_TYPE_2D, auto result = create_temporary_subresource_view_impl(cmd, nullptr, VK_IMAGE_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, remap_vector, false); VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, remap_vector, false);
const auto image = result->image();
VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format);
VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 };
vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range);
copy_transfer_regions_impl(cmd, result->image(), sections_to_copy); if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT))
{
VkClearColorValue clear = {};
vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
}
else
{
VkClearDepthStencilValue clear = { 1.f, 0 };
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
}
vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); copy_transfer_regions_impl(cmd, image, sections_to_copy);
vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
return result; return result;
} }
@ -940,6 +987,7 @@ namespace vk
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subres_range); change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subres_range);
section->last_write_tag = rsx::get_shared_tag();
return section; return section;
} }
@ -988,6 +1036,7 @@ namespace vk
case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT); return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT);
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return (vk_format == VK_FORMAT_D16_UNORM); return (vk_format == VK_FORMAT_D16_UNORM);