span: implement as_span workarounds as utils::bless

Minor cleanup.
This commit is contained in:
Nekotekina 2021-05-31 15:25:02 +03:00
parent d862817485
commit a1608b636f
3 changed files with 57 additions and 77 deletions

View File

@ -46,20 +46,12 @@ const bool s_use_ssse3 = utils::has_ssse3();
const bool s_use_sse4_1 = utils::has_sse41();
const bool s_use_avx2 = utils::has_avx2();
namespace
namespace utils
{
// FIXME: GSL as_span break build if template parameter is non const with current revision.
// Replace with true as_span when fixed.
template <typename T>
std::span<T> as_span_workaround(std::span<std::byte> unformated_span)
template <typename T, typename U>
[[nodiscard]] auto bless(const std::span<U>& span)
{
return{ reinterpret_cast<T*>(unformated_span.data()), unformated_span.size_bytes() / sizeof(T) };
}
template <typename T>
std::span<T> as_const_span(std::span<const std::byte> unformated_span)
{
return{ reinterpret_cast<T*>(unformated_span.data()), unformated_span.size_bytes() / sizeof(T) };
return std::span<T>(bless<T>(span.data()), sizeof(U) * span.size() / sizeof(T));
}
}
@ -665,7 +657,7 @@ void write_vertex_array_data_to_buffer(std::span<std::byte> raw_dst_span, std::s
}
case rsx::vertex_base_type::cmp:
{
std::span<u16> dst_span = as_span_workaround<u16>(raw_dst_span);
std::span<u16> dst_span = utils::bless<u16>(raw_dst_span);
for (u32 i = 0; i < count; ++i)
{
u32 src_value;
@ -1295,13 +1287,13 @@ std::tuple<u32, u32, u32> write_index_array_data_to_buffer(std::span<std::byte>
{
case rsx::index_array_type::u16:
{
return write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u16>(dst_ptr),
as_const_span<const be_t<u16>>(src_ptr), draw_mode, restart_index_enabled, restart_index, expands);
return write_index_array_data_to_buffer_impl<u16>(utils::bless<u16>(dst_ptr), utils::bless<const be_t<u16>>(src_ptr),
draw_mode, restart_index_enabled, restart_index, expands);
}
case rsx::index_array_type::u32:
{
return write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst_ptr),
as_const_span<const be_t<u32>>(src_ptr), draw_mode, restart_index_enabled, restart_index, expands);
return write_index_array_data_to_buffer_impl<u32>(utils::bless<u32>(dst_ptr), utils::bless<const be_t<u32>>(src_ptr),
draw_mode, restart_index_enabled, restart_index, expands);
}
default:
fmt::throw_exception("Unreachable");

View File

@ -6,37 +6,24 @@
#include "util/asm.hpp"
namespace utils
{
template <typename T, typename U>
[[nodiscard]] auto bless(const std::span<U>& span)
{
return std::span<T>(bless<T>(span.data()), sizeof(U) * span.size() / sizeof(T));
}
}
namespace
{
// FIXME: GSL as_span break build if template parameter is non const with current revision.
// Replace with true as_span when fixed.
template <typename T>
std::span<T> as_span_workaround(std::span<std::byte> unformated_span)
{
return{ reinterpret_cast<T*>(unformated_span.data()), unformated_span.size_bytes() / sizeof(T) };
}
template <typename T>
std::span<T> as_const_span(std::span<const std::byte> unformated_span)
{
return{ reinterpret_cast<T*>(unformated_span.data()), unformated_span.size_bytes() / sizeof(T) };
}
// TODO: Make this function part of GSL
// Note: Doesn't handle overlapping range detection.
template<typename T1, typename T2>
constexpr void copy(std::span<T1> dst, std::span<T2> src)
{
static_assert(std::is_convertible<T1, T2>::value, "Cannot convert source and destination span type.");
std::copy(src.begin(), src.end(), dst.begin());
}
u16 convert_rgb655_to_rgb565(const u16 bits)
{
// g6 = g5
// r5 = (((bits & 0xFC00) >> 1) & 0xFC00) << 1 is equivalent to truncating the least significant bit
return (bits & 0xF81F) | (bits & 0x3E0) << 1;
}
u16 convert_rgb655_to_rgb565(const u16 bits)
{
// g6 = g5
// r5 = (((bits & 0xFC00) >> 1) & 0xFC00) << 1 is equivalent to truncating the least significant bit
return (bits & 0xF81F) | (bits & 0x3E0) << 1;
}
struct copy_unmodified_block
{
@ -49,7 +36,7 @@ struct copy_unmodified_block
{
// Fast copy
const auto data_length = src_pitch_in_block * words_per_block * row_count * depth;
copy(dst, src.subspan(0, data_length));
std::copy_n(src.begin(), data_length, dst.begin());
return;
}
@ -69,7 +56,7 @@ struct copy_unmodified_block
for (int row = 0; row < row_count; ++row)
{
// NNOTE: src_offset is already shifted along the border at initialization
copy(dst.subspan(dst_offset, width_in_words), src.subspan(src_offset, width_in_words));
std::copy_n(src.begin() + src_offset, width_in_words, dst.begin() + dst_offset);
src_offset += src_pitch_in_words;
dst_offset += dst_pitch_in_words;
@ -164,7 +151,7 @@ struct copy_unmodified_block_vtc
for (u32 i = 0; i < row_element_count; i += 1)
{
// Copy one span (8 bytes for DXT1 or 16 bytes for DXT5)
copy(dst.subspan(dst_offset + i, 1), src.subspan(src_offset + i * 4, 1));
dst[dst_offset + i] = src[src_offset + i];
}
dst_offset += row_element_count;
@ -189,7 +176,7 @@ struct copy_unmodified_block_vtc
for (u32 i = 0; i < row_element_count; i += 1)
{
// Copy one span (8 bytes for DXT1 or 16 bytes for DXT5)
copy(dst.subspan(dst_offset + i, 1), src.subspan(src_offset + i * vtc_tile_count, 1));
dst[dst_offset + i] = src[src_offset + i * vtc_tile_count];
}
dst_offset += row_element_count;
@ -627,22 +614,22 @@ namespace rsx
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
{
copy_decoded_rb_rg_block::copy_mipmap_level<true>(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
copy_decoded_rb_rg_block::copy_mipmap_level<true>(utils::bless<u32>(dst_buffer), utils::bless<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
break;
}
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
{
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
copy_decoded_rb_rg_block::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
break;
}
case CELL_GCM_TEXTURE_R6G5B5:
{
if (is_swizzled)
copy_rgb655_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment));
copy_rgb655_block_swizzled::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment));
else
copy_rgb655_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment), src_layout.pitch_in_block);
copy_rgb655_block::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment), src_layout.pitch_in_block);
break;
}
@ -707,7 +694,7 @@ namespace rsx
// PS3 uses the Nvidia VTC memory layout for compressed 3D textures.
// This is only supported using Nvidia OpenGL.
// Remove the VTC tiling to support ATI and Vulkan.
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
copy_unmodified_block_vtc::copy_mipmap_level(utils::bless<u64>(dst_buffer), utils::bless<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
}
else if (caps.supports_zero_copy)
{
@ -716,7 +703,7 @@ namespace rsx
}
else
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(utils::bless<u64>(dst_buffer), utils::bless<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
}
break;
}
@ -729,7 +716,7 @@ namespace rsx
// PS3 uses the Nvidia VTC memory layout for compressed 3D textures.
// This is only supported using Nvidia OpenGL.
// Remove the VTC tiling to support ATI and Vulkan.
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
copy_unmodified_block_vtc::copy_mipmap_level(utils::bless<u128>(dst_buffer), utils::bless<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
}
else if (caps.supports_zero_copy)
{
@ -738,7 +725,7 @@ namespace rsx
}
else
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(utils::bless<u128>(dst_buffer), utils::bless<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
}
break;
}
@ -753,7 +740,7 @@ namespace rsx
{
if (is_swizzled)
{
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless<u8>(dst_buffer), utils::bless<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
}
else if (caps.supports_zero_copy)
{
@ -762,7 +749,7 @@ namespace rsx
}
else
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(utils::bless<u8>(dst_buffer), utils::bless<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
}
else
@ -796,11 +783,11 @@ namespace rsx
}
else if (word_size == 2)
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (word_size == 4)
{
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
}
else
@ -808,16 +795,16 @@ namespace rsx
if (word_size == 2)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
else if (word_size == 4)
{
if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
copy_unmodified_block::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
}
}
}

View File

@ -4,6 +4,7 @@
#include "table_item_delegate.h"
#include "Emu/RSX/RSXThread.h"
#include "Emu/RSX/gcm_printing.h"
#include "util/asm.hpp"
#include <QHBoxLayout>
#include <QHeaderView>
@ -23,12 +24,12 @@ enum GCMEnumTypes
constexpr auto qstr = QString::fromStdString;
namespace
namespace utils
{
template <typename T>
std::span<T> as_const_span(std::span<const std::byte> unformated_span)
template <typename T, typename U>
[[nodiscard]] auto bless(const std::span<U>& span)
{
return{ reinterpret_cast<T*>(unformated_span.data()), unformated_span.size_bytes() / sizeof(T) };
return std::span<T>(bless<T>(span.data()), sizeof(U) * span.size() / sizeof(T));
}
}
@ -420,12 +421,12 @@ namespace
{
case rsx::surface_color_format::b8:
{
const u8 value = as_const_span<const u8>(orig_buffer)[idx];
const u8 value = utils::bless<const u8>(orig_buffer)[idx];
return{ value, value, value };
}
case rsx::surface_color_format::x32:
{
const be_t<u32> stored_val = as_const_span<const be_t<u32>>(orig_buffer)[idx];
const be_t<u32> stored_val = utils::bless<const be_t<u32>>(orig_buffer)[idx];
const u32 swapped_val = stored_val;
const f32 float_val = std::bit_cast<f32>(swapped_val);
const u8 val = float_val * 255.f;
@ -435,19 +436,19 @@ namespace
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
{
const auto ptr = as_const_span<const u8>(orig_buffer);
const auto ptr = utils::bless<const u8>(orig_buffer);
return{ ptr[1 + idx * 4], ptr[2 + idx * 4], ptr[3 + idx * 4] };
}
case rsx::surface_color_format::a8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
{
const auto ptr = as_const_span<const u8>(orig_buffer);
const auto ptr = utils::bless<const u8>(orig_buffer);
return{ ptr[3 + idx * 4], ptr[2 + idx * 4], ptr[1 + idx * 4] };
}
case rsx::surface_color_format::w16z16y16x16:
{
const auto ptr = as_const_span<const u16>(orig_buffer);
const auto ptr = utils::bless<const u16>(orig_buffer);
const f16 h0 = static_cast<f16>(ptr[4 * idx]);
const f16 h1 = static_cast<f16>(ptr[4 * idx + 1]);
const f16 h2 = static_cast<f16>(ptr[4 * idx + 2]);
@ -532,7 +533,7 @@ void rsx_debugger::OnClickDrawCalls()
{
for (u32 col = 0; col < width; col++)
{
const u32 depth_val = as_const_span<const u32>(orig_buffer)[row * width + col];
const u32 depth_val = utils::bless<const u32>(orig_buffer)[row * width + col];
const u8 displayed_depth_val = 255 * depth_val / 0xFFFFFF;
buffer[4 * col + 0 + width * row * 4] = displayed_depth_val;
buffer[4 * col + 1 + width * row * 4] = displayed_depth_val;
@ -547,7 +548,7 @@ void rsx_debugger::OnClickDrawCalls()
{
for (u32 col = 0; col < width; col++)
{
const u16 depth_val = as_const_span<const u16>(orig_buffer)[row * width + col];
const u16 depth_val = utils::bless<const u16>(orig_buffer)[row * width + col];
const u8 displayed_depth_val = 255 * depth_val / 0xFFFF;
buffer[4 * col + 0 + width * row * 4] = displayed_depth_val;
buffer[4 * col + 1 + width * row * 4] = displayed_depth_val;
@ -571,7 +572,7 @@ void rsx_debugger::OnClickDrawCalls()
{
for (u32 col = 0; col < width; col++)
{
const u8 stencil_val = as_const_span<const u8>(orig_buffer)[row * width + col];
const u8 stencil_val = utils::bless<const u8>(orig_buffer)[row * width + col];
buffer[4 * col + 0 + width * row * 4] = stencil_val;
buffer[4 * col + 1 + width * row * 4] = stencil_val;
buffer[4 * col + 2 + width * row * 4] = stencil_val;