diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 4715c75194..d53b726c66 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -45,6 +45,8 @@ namespace rsx u16 real_pitch; u16 rsx_pitch; + u32 gcm_format = 0; + u64 cache_tag = 0; rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order; @@ -96,6 +98,11 @@ namespace rsx image_type = type; } + void set_gcm_format(u32 format) + { + gcm_format = format; + } + u16 get_width() const { return width; @@ -120,6 +127,11 @@ namespace rsx { return image_type; } + + u32 get_gcm_format() const + { + return gcm_format; + } }; template diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 6e3cf9338f..c664e1dbd9 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -425,6 +425,16 @@ namespace gl rsx::scale_image_nearest(dst, const_cast(data), width, height, rsx_pitch, real_pitch, pixel_size, samples); } +/* switch (gcm_format) + { + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, height); + break; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, height); + break; + }*/ + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 726577084b..1776bc1d12 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -98,6 +98,51 @@ namespace vk } } + std::pair get_compatible_gcm_format(rsx::surface_color_format color_format) + { + switch (color_format) + { + case rsx::surface_color_format::r5g6b5: + return{ CELL_GCM_TEXTURE_R5G6B5, false }; + + case rsx::surface_color_format::a8r8g8b8: + return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; //verified + + case rsx::surface_color_format::a8b8g8r8: + return{ CELL_GCM_TEXTURE_A8R8G8B8, false }; + + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; + + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + return{ CELL_GCM_TEXTURE_A8R8G8B8, false }; + + case rsx::surface_color_format::w16z16y16x16: + return{ CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT, true }; + + case rsx::surface_color_format::w32z32y32x32: + return{ CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT, true }; + + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + return{ CELL_GCM_TEXTURE_A1R5G5B5, false }; + + case rsx::surface_color_format::b8: + return{ CELL_GCM_TEXTURE_B8, false }; + + case rsx::surface_color_format::g8b8: + return{ CELL_GCM_TEXTURE_G8B8, true }; + + case rsx::surface_color_format::x32: + return{ CELL_GCM_TEXTURE_X32_FLOAT, true }; //verified + + default: + return{ CELL_GCM_TEXTURE_A8R8G8B8, false }; + } + } + /** Maps color_format, depth_stencil_format and color count to an int as below : * idx = color_count + 5 * depth_stencil_idx + 15 * color_format_idx * This should perform a 1:1 mapping @@ -2354,8 +2399,11 @@ void VKGSRender::prepare_rtts() } } + const auto color_fmt = rsx::method_registers.surface_color(); + const auto depth_fmt = rsx::method_registers.surface_depth_fmt(); + m_rtts.prepare_render_target(&*m_current_command_buffer, - rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(), + color_fmt, depth_fmt, clip_width, clip_height, rsx::method_registers.surface_color_target(), surface_addresses, zeta_address, @@ -2378,13 +2426,13 @@ void VKGSRender::prepare_rtts() m_surface_info[i].address = m_surface_info[i].pitch = 0; m_surface_info[i].width = clip_width; m_surface_info[i].height = clip_height; - m_surface_info[i].color_format = rsx::method_registers.surface_color(); + m_surface_info[i].color_format = color_fmt; } m_depth_surface_info.address = m_depth_surface_info.pitch = 0; m_depth_surface_info.width = clip_width; m_depth_surface_info.height = clip_height; - m_depth_surface_info.depth_format = rsx::method_registers.surface_depth_fmt(); + m_depth_surface_info.depth_format = depth_fmt; //Bind created rtts as current fbo... std::vector draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target()); @@ -2395,7 +2443,7 @@ void VKGSRender::prepare_rtts() std::vector bound_images; bound_images.reserve(5); - const auto bpp = get_format_block_size_in_bytes(rsx::method_registers.surface_color()); + const auto bpp = get_format_block_size_in_bytes(color_fmt); for (u8 index : draw_buffers) { @@ -2445,13 +2493,14 @@ void VKGSRender::prepare_rtts() if (g_cfg.video.write_color_buffers) { + const auto color_fmt_info = vk::get_compatible_gcm_format(color_fmt); for (u8 index : draw_buffers) { if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; const u32 range = m_surface_info[index].pitch * m_surface_info[index].height; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range, - m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch); + m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second); } } @@ -2460,11 +2509,17 @@ void VKGSRender::prepare_rtts() if (m_depth_surface_info.address && m_depth_surface_info.pitch) { u32 pitch = m_depth_surface_info.width * 2; - if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2; + u32 gcm_format = CELL_GCM_TEXTURE_DEPTH16; + + if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) + { + gcm_format = CELL_GCM_TEXTURE_DEPTH24_D8; + pitch *= 2; + } const u32 range = pitch * m_depth_surface_info.height; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, - m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch); + m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, true); } } @@ -2512,7 +2567,7 @@ void VKGSRender::prepare_rtts() fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); } - size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size()); + size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(color_fmt).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size()); VkRenderPass current_render_pass = m_render_passes[idx]; if (m_draw_fbo) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 7cd01d42aa..84f812434b 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -21,6 +21,7 @@ namespace vk VkFence dma_fence = VK_NULL_HANDLE; bool synchronized = false; bool flushed = false; + bool pack_unpack_swap_bytes = false; u64 sync_timestamp = 0; u64 last_use_timestamp = 0; vk::render_device* m_device = nullptr; @@ -40,13 +41,16 @@ namespace vk rsx::buffered_section::reset(base, length, policy); } - void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 rsx_pitch=0, bool managed=true) + void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 rsx_pitch, bool managed, const u32 gcm_format, bool pack_swap_bytes = false) { width = w; height = h; this->depth = depth; this->mipmaps = mipmaps; + this->gcm_format = gcm_format; + this->pack_unpack_swap_bytes = pack_swap_bytes; + if (managed) { managed_texture.reset(image); @@ -265,20 +269,6 @@ namespace vk const u8 bpp = real_pitch / width; //We have to do our own byte swapping since the driver doesnt do it for us - bool swap_bytes = false; - switch (vram_texture->info.format) - { - case VK_FORMAT_D32_SFLOAT_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - //TODO: Hardware tests to determine correct memory layout - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_R16G16B16A16_SFLOAT: - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32_SFLOAT: - swap_bytes = true; - break; - } - if (real_pitch == rsx_pitch) { switch (bpp) @@ -289,23 +279,29 @@ namespace vk do_memory_transfer(pixels_dst, pixels_src); break; case 2: - if (swap_bytes) + if (pack_unpack_swap_bytes) do_memory_transfer(pixels_dst, pixels_src); else do_memory_transfer(pixels_dst, pixels_src); break; case 4: - if (swap_bytes) + if (pack_unpack_swap_bytes) do_memory_transfer(pixels_dst, pixels_src); else do_memory_transfer(pixels_dst, pixels_src); break; case 8: - if (swap_bytes) + if (pack_unpack_swap_bytes) do_memory_transfer(pixels_dst, pixels_src); else do_memory_transfer(pixels_dst, pixels_src); break; + case 16: + if (pack_unpack_swap_bytes) + do_memory_transfer(pixels_dst, pixels_src); + else + do_memory_transfer(pixels_dst, pixels_src); + break; } } else @@ -314,12 +310,22 @@ namespace vk //usually we can just get away with nearest filtering const u8 samples = rsx_pitch / real_pitch; - rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, swap_bytes); + rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, pack_unpack_swap_bytes); } dma_buffer->unmap(); //Its highly likely that this surface will be reused, so we just leave resources in place + switch (gcm_format) + { + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + rsx::shuffle_texel_data_wzyx(pixels_dst, rsx_pitch, width, height); + break; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + rsx::shuffle_texel_data_wzyx(pixels_dst, rsx_pitch, width, height); + break; + } + return result; } @@ -692,7 +698,7 @@ namespace vk cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, section_depth); region.reset(rsx_address, rsx_size); - region.create(width, height, section_depth, mipmaps, view, image); + region.create(width, height, section_depth, mipmaps, view, image, 0, true, gcm_format); region.set_dirty(false); region.set_context(context); region.set_image_type(type); diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index f8d088c461..ccaeb5fa86 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -162,6 +162,38 @@ namespace rsx std::array get_constant_blend_colors(); + /** + * Shuffle texel layout from xyzw to wzyx + * TODO: Variable src/dst and optional se conversion + */ + template + void shuffle_texel_data_wzyx(void *data, u16 row_pitch_in_bytes, u16 row_length_in_texels, u16 num_rows) + { + char *raw_src = (char*)data; + T tmp[4]; + + for (u16 n = 0; n < num_rows; ++n) + { + T* src = (T*)raw_src; + raw_src += row_pitch_in_bytes; + + for (u16 m = 0; m < row_length_in_texels; ++m) + { + tmp[0] = src[3]; + tmp[1] = src[2]; + tmp[2] = src[1]; + tmp[3] = src[0]; + + src[0] = tmp[0]; + src[1] = tmp[1]; + src[2] = tmp[2]; + src[3] = tmp[3]; + + src += 4; + } + } + } + /** * Clips a rect so that it never falls outside the parent region * attempt_fit: allows resizing of the requested region. If false, failure to fit will result in the child rect being pinned to (0, 0)