From bd21930d1aa08988f69f59a4172e360f7e8ca267 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 1 Aug 2020 14:57:28 +0300 Subject: [PATCH] rsx: Decode swizzled GPU data on CPU readback - Currently this conversion is being done on the CPU to reuse as much code as possible. The expectation is that this almost never happens, so there is not point in increasing maintenance burden by adding compute paths --- rpcs3/Emu/RSX/GL/GLTexture.cpp | 2 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 25 +++++++++++++++++++ rpcs3/Emu/RSX/VK/VKTextureCache.h | 41 +++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 9dd312e630..f42122f0af 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -69,7 +69,7 @@ namespace gl case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5); case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8); case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_FLOAT); - case CELL_GCM_TEXTURE_DEPTH16: return std::make_tuple(GL_DEPTH_COMPONENT, GL_SHORT); + case CELL_GCM_TEXTURE_DEPTH16: return std::make_tuple(GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT); case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_HALF_FLOAT); case CELL_GCM_TEXTURE_X16: return std::make_tuple(GL_RED, GL_UNSIGNED_SHORT); case CELL_GCM_TEXTURE_Y16_X16: return std::make_tuple(GL_RG, GL_UNSIGNED_SHORT); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index f825ffb660..d094e960ae 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -333,6 +333,31 @@ namespace gl } } + if (is_swizzled()) + { + // This format is completely worthless to CPU processing algorithms where cache lines on die are linear. + // If this is happening, usually it means it was not a planned readback (e.g shared pages situation) + rsx_log.warning("[Performance warning] CPU readback of swizzled data"); + + // Read-modify-write to avoid corrupting already resident memory outside texture region + std::vector tmp_data(rsx_pitch * height); + std::memcpy(tmp_data.data(), dst, tmp_data.size()); + + switch (type) + { + case gl::texture::type::uint_8_8_8_8: + case gl::texture::type::uint_24_8: + rsx::convert_linear_swizzle(tmp_data.data(), dst, width, height, rsx_pitch); + break; + case gl::texture::type::ushort_5_6_5: + case gl::texture::type::ushort: + rsx::convert_linear_swizzle(tmp_data.data(), dst, width, height, rsx_pitch); + break; + default: + rsx_log.error("Unexpected swizzled texture format 0x%x", static_cast(format)); + } + } + if (context == rsx::texture_upload_context::framebuffer_storage) { // Update memory tag diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 88aefec7f8..123ffc8c12 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -292,6 +292,21 @@ namespace vk // Set cb flag for queued dma operations cmd.set_flag(vk::command_buffer::cb_has_dma_transfer); + if (get_context() == rsx::texture_upload_context::dma) + { + // Save readback hint in case transformation is required later + switch (internal_bpp) + { + case 2: + gcm_format = CELL_GCM_TEXTURE_R5G6B5; + break; + case 4: + default: + gcm_format = CELL_GCM_TEXTURE_A8R8G8B8; + break; + } + } + synchronized = true; sync_timestamp = get_system_time(); } @@ -391,6 +406,32 @@ namespace vk const auto range = get_confirmed_range(); vk::flush_dma(range.start, range.length()); + if (is_swizzled()) + { + // This format is completely worthless to CPU processing algorithms where cache lines on die are linear. + // If this is happening, usually it means it was not a planned readback (e.g shared pages situation) + rsx_log.warning("[Performance warning] CPU readback of swizzled data"); + + // Read-modify-write to avoid corrupting already resident memory outside texture region + void* data = get_ptr(range.start); + std::vector tmp_data(rsx_pitch * height); + std::memcpy(tmp_data.data(), data, tmp_data.size()); + + switch (gcm_format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_DEPTH24_D8: + rsx::convert_linear_swizzle(tmp_data.data(), data, width, height, rsx_pitch); + break; + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_DEPTH16: + rsx::convert_linear_swizzle(tmp_data.data(), data, width, height, rsx_pitch); + break; + default: + rsx_log.error("Unexpected swizzled texture format 0x%x", gcm_format); + } + } + if (context == rsx::texture_upload_context::framebuffer_storage) { // Update memory tag