From a4495c35b734ef9d9421d469ada3eeb191044059 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 6 Apr 2019 17:59:58 +0300 Subject: [PATCH] rsx: Fixups for swizzled texture scanning - Revert to using block metrics, but with optional per-channel decode stage for the final transfer. Much cleaner than hacking in the width to be in channels instead of blocks. --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 106 ++++++++++++++++++-------- 1 file changed, 76 insertions(+), 30 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 5cabc2fd49..582ad86e40 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -25,9 +25,16 @@ namespace template constexpr void copy(gsl::span dst, gsl::span src) { - static_assert(std::is_convertible::value, "Cannot convert source and destination span type."); - verify(HERE), (dst.size() == src.size()); - std::copy(src.begin(), src.end(), dst.begin()); + if (std::is_same::value) + { + std::memcpy(dst.data(), src.data(), src.size_bytes()); + } + else + { + static_assert(std::is_convertible::value, "Cannot convert source and destination span type."); + verify(HERE), (dst.size() == src.size()); + std::copy(src.begin(), src.end(), dst.begin()); + } } u16 convert_rgb655_to_rgb565(const u16 bits) @@ -40,30 +47,63 @@ namespace struct copy_unmodified_block { template - static void copy_mipmap_level(gsl::span dst, gsl::span src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) + static void copy_mipmap_level(gsl::span dst, gsl::span src, u16 words_per_block, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) { static_assert(sizeof(T) == sizeof(U), "Type size doesn't match."); + + const u32 width_in_words = width_in_block * words_per_block; + const u32 src_pitch_in_words = src_pitch_in_block * words_per_block; + const u32 dst_pitch_in_words = dst_pitch_in_block * words_per_block; + + u32 src_offset = 0, dst_offset = 0; for (int row = 0; row < row_count * depth; ++row) - copy(dst.subspan(row * dst_pitch_in_block, width_in_block), src.subspan(row * src_pitch_in_block, width_in_block)); + { + copy(dst.subspan(dst_offset, width_in_words), src.subspan(src_offset, width_in_words)); + + src_offset += src_pitch_in_words; + dst_offset += dst_pitch_in_words; + } } }; struct copy_unmodified_block_swizzled { + // NOTE: Pixel channel types are T (out) and const U (in). V is the pixel block type that consumes one whole pixel. + // e.g 4x16-bit format can use u16, be_t, u64 as arguments template - static void copy_mipmap_level(gsl::span dst, gsl::span src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block) + static void copy_mipmap_level(gsl::span dst, gsl::span src, u16 words_per_block, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block) { - if (std::is_same::value && dst_pitch_in_block == width_in_block) + if (std::is_same::value && dst_pitch_in_block == width_in_block && words_per_block == 1) { rsx::convert_linear_swizzle_3d((void*)src.data(), (void*)dst.data(), width_in_block, row_count, depth); } else { - std::vector tmp(width_in_block * row_count * depth); - rsx::convert_linear_swizzle_3d((void*)src.data(), tmp.data(), width_in_block, row_count, depth); + std::vector tmp(width_in_block * 2 * words_per_block * row_count * depth); + if (LIKELY(words_per_block == 1)) + { + rsx::convert_linear_swizzle_3d((void*)src.data(), tmp.data(), width_in_block, row_count, depth); + } + else + { + switch (words_per_block * sizeof(T)) + { + case 4: + rsx::convert_linear_swizzle_3d((void*)src.data(), tmp.data(), width_in_block, row_count, depth); + break; + case 8: + rsx::convert_linear_swizzle_3d((void*)src.data(), tmp.data(), width_in_block, row_count, depth); + break; + case 16: + rsx::convert_linear_swizzle_3d((void*)src.data(), tmp.data(), width_in_block, row_count, depth); + break; + default: + fmt::throw_exception("Failed to decode swizzled format, words_per_block=%d, src_type_size=%d", words_per_block, sizeof(T)); + } + } gsl::span src_span = tmp; - copy_unmodified_block::copy_mipmap_level(dst, src_span, width_in_block, row_count, depth, dst_pitch_in_block, width_in_block); + copy_unmodified_block::copy_mipmap_level(dst, src_span, words_per_block, width_in_block, row_count, depth, dst_pitch_in_block, width_in_block); } } }; @@ -262,6 +302,12 @@ u32 get_row_pitch_in_block(u16 width_in_block, size_t multiple_constraints_in_by return static_cast(divided * multiple_constraints_in_byte / sizeof(T)); } +u32 get_row_pitch_in_block(u16 block_size_in_bytes, u16 width_in_block, size_t multiple_constraints_in_byte) +{ + size_t divided = (width_in_block * block_size_in_bytes + multiple_constraints_in_byte - 1) / multiple_constraints_in_byte; + return static_cast(divided * multiple_constraints_in_byte / block_size_in_bytes); +} + /** * Since rsx ignore unused dimensionnality some app set them to 0. * Use 1 value instead to be more general. @@ -363,9 +409,9 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre case CELL_GCM_TEXTURE_B8: { if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } @@ -403,9 +449,9 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre case CELL_GCM_TEXTURE_G8B8: { if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } @@ -413,9 +459,9 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // Untested { if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } @@ -423,9 +469,9 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre case CELL_GCM_TEXTURE_D8R8G8B8: { if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } @@ -437,28 +483,28 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre case CELL_GCM_TEXTURE_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: { - const auto block_size_in_words = get_format_block_size_in_bytes(format) / 2; - const auto words_per_row = (w * block_size_in_words); - const auto src_pitch_in_words = (src_layout.pitch_in_block * block_size_in_words); + const u16 block_size = get_format_block_size_in_bytes(format); + const u16 words_per_block = block_size / 2; + const auto dst_pitch_in_block = get_row_pitch_in_block(block_size, w, dst_row_pitch_multiple_of); if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_row, h, depth, get_row_pitch_in_block(words_per_row, dst_row_pitch_multiple_of)); + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, dst_pitch_in_block); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_row, h, depth, get_row_pitch_in_block(words_per_row, dst_row_pitch_multiple_of), src_pitch_in_words); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, dst_pitch_in_block, src_layout.pitch_in_block); break; } case CELL_GCM_TEXTURE_X32_FLOAT: case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: { - const auto block_size_in_words = get_format_block_size_in_bytes(format) / 4; - const auto words_per_row = (w * block_size_in_words); - const auto src_pitch_in_words = (src_layout.pitch_in_block * block_size_in_words); + const u16 block_size = get_format_block_size_in_bytes(format); + const u16 words_per_block = block_size / 4; + const auto dst_pitch_in_block = get_row_pitch_in_block(block_size, w, dst_row_pitch_multiple_of); if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_row, h, depth, get_row_pitch_in_block(words_per_row, dst_row_pitch_multiple_of)); + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, dst_pitch_in_block); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_row, h, depth, get_row_pitch_in_block(words_per_row, dst_row_pitch_multiple_of), src_pitch_in_words); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, dst_pitch_in_block, src_layout.pitch_in_block); break; } @@ -473,7 +519,7 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre } else { - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } break; } @@ -490,7 +536,7 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre } else { - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } break; }