rsx: research native texel byte order on cpu readback (WCB) [WIP]

This commit is contained in:
kd-11 2017-11-09 19:47:38 +03:00
parent 59be9dc36e
commit be6b5922dd
5 changed files with 143 additions and 28 deletions

View File

@ -45,6 +45,8 @@ namespace rsx
u16 real_pitch;
u16 rsx_pitch;
u32 gcm_format = 0;
u64 cache_tag = 0;
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
@ -96,6 +98,11 @@ namespace rsx
image_type = type;
}
void set_gcm_format(u32 format)
{
gcm_format = format;
}
u16 get_width() const
{
return width;
@ -120,6 +127,11 @@ namespace rsx
{
return image_type;
}
u32 get_gcm_format() const
{
return gcm_format;
}
};
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>

View File

@ -425,6 +425,16 @@ namespace gl
rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples);
}
/* switch (gcm_format)
{
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
rsx::shuffle_texel_data_wzyx<u16>(dst, rsx_pitch, width, height);
break;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
rsx::shuffle_texel_data_wzyx<u32>(dst, rsx_pitch, width, height);
break;
}*/
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);

View File

@ -98,6 +98,51 @@ namespace vk
}
}
std::pair<u32, bool> get_compatible_gcm_format(rsx::surface_color_format color_format)
{
switch (color_format)
{
case rsx::surface_color_format::r5g6b5:
return{ CELL_GCM_TEXTURE_R5G6B5, false };
case rsx::surface_color_format::a8r8g8b8:
return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; //verified
case rsx::surface_color_format::a8b8g8r8:
return{ CELL_GCM_TEXTURE_A8R8G8B8, false };
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
return{ CELL_GCM_TEXTURE_A8R8G8B8, true };
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
return{ CELL_GCM_TEXTURE_A8R8G8B8, false };
case rsx::surface_color_format::w16z16y16x16:
return{ CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT, true };
case rsx::surface_color_format::w32z32y32x32:
return{ CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT, true };
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return{ CELL_GCM_TEXTURE_A1R5G5B5, false };
case rsx::surface_color_format::b8:
return{ CELL_GCM_TEXTURE_B8, false };
case rsx::surface_color_format::g8b8:
return{ CELL_GCM_TEXTURE_G8B8, true };
case rsx::surface_color_format::x32:
return{ CELL_GCM_TEXTURE_X32_FLOAT, true }; //verified
default:
return{ CELL_GCM_TEXTURE_A8R8G8B8, false };
}
}
/** Maps color_format, depth_stencil_format and color count to an int as below :
* idx = color_count + 5 * depth_stencil_idx + 15 * color_format_idx
* This should perform a 1:1 mapping
@ -2354,8 +2399,11 @@ void VKGSRender::prepare_rtts()
}
}
const auto color_fmt = rsx::method_registers.surface_color();
const auto depth_fmt = rsx::method_registers.surface_depth_fmt();
m_rtts.prepare_render_target(&*m_current_command_buffer,
rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(),
color_fmt, depth_fmt,
clip_width, clip_height,
rsx::method_registers.surface_color_target(),
surface_addresses, zeta_address,
@ -2378,13 +2426,13 @@ void VKGSRender::prepare_rtts()
m_surface_info[i].address = m_surface_info[i].pitch = 0;
m_surface_info[i].width = clip_width;
m_surface_info[i].height = clip_height;
m_surface_info[i].color_format = rsx::method_registers.surface_color();
m_surface_info[i].color_format = color_fmt;
}
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
m_depth_surface_info.width = clip_width;
m_depth_surface_info.height = clip_height;
m_depth_surface_info.depth_format = rsx::method_registers.surface_depth_fmt();
m_depth_surface_info.depth_format = depth_fmt;
//Bind created rtts as current fbo...
std::vector<u8> draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target());
@ -2395,7 +2443,7 @@ void VKGSRender::prepare_rtts()
std::vector<vk::image*> bound_images;
bound_images.reserve(5);
const auto bpp = get_format_block_size_in_bytes(rsx::method_registers.surface_color());
const auto bpp = get_format_block_size_in_bytes(color_fmt);
for (u8 index : draw_buffers)
{
@ -2445,13 +2493,14 @@ void VKGSRender::prepare_rtts()
if (g_cfg.video.write_color_buffers)
{
const auto color_fmt_info = vk::get_compatible_gcm_format(color_fmt);
for (u8 index : draw_buffers)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch);
m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second);
}
}
@ -2460,11 +2509,17 @@ void VKGSRender::prepare_rtts()
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
u32 pitch = m_depth_surface_info.width * 2;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
u32 gcm_format = CELL_GCM_TEXTURE_DEPTH16;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)
{
gcm_format = CELL_GCM_TEXTURE_DEPTH24_D8;
pitch *= 2;
}
const u32 range = pitch * m_depth_surface_info.height;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch);
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, true);
}
}
@ -2512,7 +2567,7 @@ void VKGSRender::prepare_rtts()
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
}
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size());
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(color_fmt).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size());
VkRenderPass current_render_pass = m_render_passes[idx];
if (m_draw_fbo)

View File

@ -21,6 +21,7 @@ namespace vk
VkFence dma_fence = VK_NULL_HANDLE;
bool synchronized = false;
bool flushed = false;
bool pack_unpack_swap_bytes = false;
u64 sync_timestamp = 0;
u64 last_use_timestamp = 0;
vk::render_device* m_device = nullptr;
@ -40,13 +41,16 @@ namespace vk
rsx::buffered_section::reset(base, length, policy);
}
void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 rsx_pitch=0, bool managed=true)
void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 rsx_pitch, bool managed, const u32 gcm_format, bool pack_swap_bytes = false)
{
width = w;
height = h;
this->depth = depth;
this->mipmaps = mipmaps;
this->gcm_format = gcm_format;
this->pack_unpack_swap_bytes = pack_swap_bytes;
if (managed)
{
managed_texture.reset(image);
@ -265,20 +269,6 @@ namespace vk
const u8 bpp = real_pitch / width;
//We have to do our own byte swapping since the driver doesnt do it for us
bool swap_bytes = false;
switch (vram_texture->info.format)
{
case VK_FORMAT_D32_SFLOAT_S8_UINT:
case VK_FORMAT_D24_UNORM_S8_UINT:
//TODO: Hardware tests to determine correct memory layout
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R32_SFLOAT:
swap_bytes = true;
break;
}
if (real_pitch == rsx_pitch)
{
switch (bpp)
@ -289,23 +279,29 @@ namespace vk
do_memory_transfer<u8, false>(pixels_dst, pixels_src);
break;
case 2:
if (swap_bytes)
if (pack_unpack_swap_bytes)
do_memory_transfer<u16, true>(pixels_dst, pixels_src);
else
do_memory_transfer<u16, false>(pixels_dst, pixels_src);
break;
case 4:
if (swap_bytes)
if (pack_unpack_swap_bytes)
do_memory_transfer<u32, true>(pixels_dst, pixels_src);
else
do_memory_transfer<u32, false>(pixels_dst, pixels_src);
break;
case 8:
if (swap_bytes)
if (pack_unpack_swap_bytes)
do_memory_transfer<u64, true>(pixels_dst, pixels_src);
else
do_memory_transfer<u64, false>(pixels_dst, pixels_src);
break;
case 16:
if (pack_unpack_swap_bytes)
do_memory_transfer<u128, true>(pixels_dst, pixels_src);
else
do_memory_transfer<u128, false>(pixels_dst, pixels_src);
break;
}
}
else
@ -314,12 +310,22 @@ namespace vk
//usually we can just get away with nearest filtering
const u8 samples = rsx_pitch / real_pitch;
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, swap_bytes);
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, pack_unpack_swap_bytes);
}
dma_buffer->unmap();
//Its highly likely that this surface will be reused, so we just leave resources in place
switch (gcm_format)
{
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
rsx::shuffle_texel_data_wzyx<u16>(pixels_dst, rsx_pitch, width, height);
break;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
rsx::shuffle_texel_data_wzyx<u32>(pixels_dst, rsx_pitch, width, height);
break;
}
return result;
}
@ -692,7 +698,7 @@ namespace vk
cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, section_depth);
region.reset(rsx_address, rsx_size);
region.create(width, height, section_depth, mipmaps, view, image);
region.create(width, height, section_depth, mipmaps, view, image, 0, true, gcm_format);
region.set_dirty(false);
region.set_context(context);
region.set_image_type(type);

View File

@ -162,6 +162,38 @@ namespace rsx
std::array<float, 4> get_constant_blend_colors();
/**
* Shuffle texel layout from xyzw to wzyx
* TODO: Variable src/dst and optional se conversion
*/
template <typename T>
void shuffle_texel_data_wzyx(void *data, u16 row_pitch_in_bytes, u16 row_length_in_texels, u16 num_rows)
{
char *raw_src = (char*)data;
T tmp[4];
for (u16 n = 0; n < num_rows; ++n)
{
T* src = (T*)raw_src;
raw_src += row_pitch_in_bytes;
for (u16 m = 0; m < row_length_in_texels; ++m)
{
tmp[0] = src[3];
tmp[1] = src[2];
tmp[2] = src[1];
tmp[3] = src[0];
src[0] = tmp[0];
src[1] = tmp[1];
src[2] = tmp[2];
src[3] = tmp[3];
src += 4;
}
}
}
/**
* Clips a rect so that it never falls outside the parent region
* attempt_fit: allows resizing of the requested region. If false, failure to fit will result in the child rect being pinned to (0, 0)