diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 7c8efe1100..eddfd8d29f 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -167,7 +167,7 @@ namespace current_subresource_layout.depth = depth; // src_pitch in texture can uses 20 bits so fits on 32 bits int. u32 src_pitch_in_block = padded_row ? suggested_pitch_in_bytes / block_size_in_bytes : miplevel_width_in_block; - current_subresource_layout.pitch_in_bytes = src_pitch_in_block; + current_subresource_layout.pitch_in_block = src_pitch_in_block; current_subresource_layout.data = gsl::span(texture_data_pointer + offset_in_src, src_pitch_in_block * block_size_in_bytes * miplevel_height_in_block * depth); @@ -277,7 +277,7 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre u16 w = src_layout.width_in_block; u16 h = src_layout.height_in_block; u16 depth = src_layout.depth; - u32 pitch = src_layout.pitch_in_bytes; + u32 pitch = src_layout.pitch_in_block; // Ignore when texture width > pitch if (w > pitch) @@ -290,7 +290,7 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre if (is_swizzled) copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } @@ -312,34 +312,42 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre if (is_swizzled) copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } - case CELL_GCM_TEXTURE_DEPTH24_D8: // Untested + case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // Untested + { + if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); + else + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); + break; + } + case CELL_GCM_TEXTURE_A8R8G8B8: case CELL_GCM_TEXTURE_D8R8G8B8: { if (is_swizzled) copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; } case CELL_GCM_TEXTURE_Y16_X16: case CELL_GCM_TEXTURE_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_X32_FLOAT: - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); break; case CELL_GCM_TEXTURE_COMPRESSED_DXT1: @@ -348,11 +356,11 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } else { - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } break; @@ -363,11 +371,11 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } else { - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_block); } break; diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index cf48823b75..704f544473 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -38,7 +38,7 @@ struct rsx_subresource_layout u16 width_in_block; u16 height_in_block; u16 depth; - u32 pitch_in_bytes; + u32 pitch_in_block; }; /** diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 1ef66f5743..351b599ac6 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2077,7 +2077,7 @@ namespace rsx rsx_subresource_layout subres = {}; subres.width_in_block = src.width; subres.height_in_block = src.slice_h; - subres.pitch_in_bytes = pitch_in_block; + subres.pitch_in_block = pitch_in_block; subres.depth = 1; subres.data = { (const gsl::byte*)src.pixels, src.pitch * src.slice_h }; subresource_layout.push_back(subres); diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 4a89e7df4f..40bea9bdb5 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -67,7 +67,7 @@ namespace gl case CELL_GCM_TEXTURE_A8R8G8B8: return std::make_tuple(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8); case CELL_GCM_TEXTURE_G8B8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE); case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5); - case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE); + case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8); case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_FLOAT); case CELL_GCM_TEXTURE_DEPTH16: return std::make_tuple(GL_DEPTH_COMPONENT, GL_SHORT); case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_HALF_FLOAT); @@ -254,7 +254,8 @@ namespace gl glSamplerParameteri(samplerHandle, GL_TEXTURE_MAG_FILTER, tex_mag_filter(tex.mag_filter())); const u32 texture_format = tex.format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - if (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8) + if (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 || + texture_format == CELL_GCM_TEXTURE_DEPTH16_FLOAT || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT) { //NOTE: The stored texture function is reversed wrt the textureProj compare function GLenum compare_mode = (GLenum)tex.zfunc() | GL_NEVER; @@ -321,10 +322,6 @@ namespace gl case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_R5G6B5: case CELL_GCM_TEXTURE_A8R8G8B8: // TODO - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: case CELL_GCM_TEXTURE_COMPRESSED_DXT45: @@ -332,6 +329,12 @@ namespace gl case ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return{ GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE }; + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return{ GL_RED, GL_RED, GL_RED, GL_RED }; + case CELL_GCM_TEXTURE_A4R4G4B4: return{ GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA }; @@ -574,6 +577,7 @@ namespace gl glBindTexture(target, id); glPixelStorei(GL_UNPACK_ALIGNMENT, 4); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 04cbd47f1a..e2c8e63c52 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -695,14 +695,19 @@ namespace gl if (sized_internal_fmt == GL_NONE) sized_internal_fmt = gl::get_sized_internal_format(gcm_format); - auto ifmt = src->get_internal_format(); - switch (ifmt) + gl::texture::internal_format ifmt = static_cast(sized_internal_fmt); + if (src) { - case gl::texture::internal_format::depth16: - case gl::texture::internal_format::depth24_stencil8: - case gl::texture::internal_format::depth32f_stencil8: - sized_internal_fmt = (GLenum)ifmt; - break; + ifmt = src->get_internal_format(); + switch (ifmt) + { + case gl::texture::internal_format::depth16: + case gl::texture::internal_format::depth24_stencil8: + case gl::texture::internal_format::depth32f_stencil8: + //HACK! Should use typeless transfer instead + sized_internal_fmt = (GLenum)ifmt; + break; + } } auto dst = std::make_unique(dst_type, width, height, 1, 1, sized_internal_fmt); @@ -723,13 +728,22 @@ namespace gl } } - std::array swizzle = src->get_native_component_layout(); - if ((GLenum)ifmt != sized_internal_fmt) + std::array swizzle; + if (!src || (GLenum)ifmt != sized_internal_fmt) { - err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt); + if (src) + { + //Format mismatch + err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt); + } + //Apply base component map onto the new texture if a data cast has been done swizzle = get_component_mapping(gcm_format, rsx::texture_create_flags::default_component_order); } + else + { + swizzle = src->get_native_component_layout(); + } if (memcmp(remap.first.data(), rsx::default_remap_vector.first.data(), 4) || memcmp(remap.second.data(), rsx::default_remap_vector.second.data(), 4)) @@ -744,20 +758,6 @@ namespace gl std::array get_component_mapping(u32 gcm_format, rsx::texture_create_flags flags) { - //NOTE: Depth textures should always read RRRR - switch (gcm_format) - { - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - { - return{ GL_RED, GL_RED, GL_RED, GL_RED }; - } - default: - break; - } - switch (flags) { case rsx::texture_create_flags::default_component_order: @@ -858,7 +858,7 @@ namespace gl gl::texture_view* generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, const texture_channel_remap_t& remap_vector) override { - auto result = create_temporary_subresource_impl(sections_to_copy.front().src, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false); + auto result = create_temporary_subresource_impl(nullptr, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false); for (const auto ®ion : sections_to_copy) { diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 39c82d478b..28383ac2bd 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1564,8 +1564,8 @@ namespace rsx { case CELL_GCM_TEXTURE_A8R8G8B8: case CELL_GCM_TEXTURE_D8R8G8B8: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_A4R4G4B4: //TODO + case CELL_GCM_TEXTURE_R5G6B5: //TODO { u32 remap = tex.remap(); result.redirected_textures |= (1 << i); @@ -1573,8 +1573,9 @@ namespace rsx break; } case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: { const auto compare_mode = (rsx::comparison_function)tex.zfunc(); if (result.textures_alpha_kill[i] == 0 && @@ -1731,8 +1732,9 @@ namespace rsx break; } case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: { const auto compare_mode = (rsx::comparison_function)tex.zfunc(); if (result.textures_alpha_kill[i] == 0 && diff --git a/rpcs3/Emu/RSX/VK/VKFormats.cpp b/rpcs3/Emu/RSX/VK/VKFormats.cpp index c15181c655..660b6fbbeb 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.cpp +++ b/rpcs3/Emu/RSX/VK/VKFormats.cpp @@ -3,185 +3,343 @@ namespace vk { - -gpu_formats_support get_optimal_tiling_supported_formats(VkPhysicalDevice physical_device) -{ - gpu_formats_support result = {}; - - VkFormatProperties props; - vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_D24_UNORM_S8_UINT, &props); - - result.d24_unorm_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT); - - vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_D32_SFLOAT_S8_UINT, &props); - result.d32_sfloat_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) - && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT); - - //Hide d24_s8 if force high precision z buffer is enabled - if (g_cfg.video.force_high_precision_z_buffer && result.d32_sfloat_s8) - result.d24_unorm_s8 = false; - - return result; -} - -VkFormat get_compatible_depth_surface_format(const gpu_formats_support &support, rsx::surface_depth_format format) -{ - switch (format) + gpu_formats_support get_optimal_tiling_supported_formats(VkPhysicalDevice physical_device) { - case rsx::surface_depth_format::z16: return VK_FORMAT_D16_UNORM; - case rsx::surface_depth_format::z24s8: - { - if (support.d24_unorm_s8) return VK_FORMAT_D24_UNORM_S8_UINT; - if (support.d32_sfloat_s8) return VK_FORMAT_D32_SFLOAT_S8_UINT; - fmt::throw_exception("No hardware support for z24s8" HERE); - } - } - fmt::throw_exception("Invalid format (0x%x)" HERE, (u32)format); -} + gpu_formats_support result = {}; -std::tuple get_min_filter_and_mip(rsx::texture_minify_filter min_filter) -{ - switch (min_filter) - { - case rsx::texture_minify_filter::nearest: return std::make_tuple(VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST); - case rsx::texture_minify_filter::linear: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); - case rsx::texture_minify_filter::nearest_nearest: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); - case rsx::texture_minify_filter::linear_nearest: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); - case rsx::texture_minify_filter::nearest_linear: return std::make_tuple(VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_LINEAR); - case rsx::texture_minify_filter::linear_linear: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); - case rsx::texture_minify_filter::convolution_min: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); - } - fmt::throw_exception("Invalid max filter" HERE); -} + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_D24_UNORM_S8_UINT, &props); -VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter) -{ - switch (mag_filter) - { - case rsx::texture_magnify_filter::nearest: return VK_FILTER_NEAREST; - case rsx::texture_magnify_filter::linear: return VK_FILTER_LINEAR; - case rsx::texture_magnify_filter::convolution_mag: return VK_FILTER_LINEAR; - } - fmt::throw_exception("Invalid mag filter (0x%x)" HERE, (u32)mag_filter); -} + result.d24_unorm_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT); -VkBorderColor get_border_color(u8 color) -{ - // TODO: Handle simulated alpha tests and modify texture operations accordingly - if ((color / 0x10) >= 0x8) - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - else - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; -} + vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_D32_SFLOAT_S8_UINT, &props); + result.d32_sfloat_s8 = !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) + && !!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT); -VkSamplerAddressMode vk_wrap_mode(rsx::texture_wrap_mode gcm_wrap) -{ - switch (gcm_wrap) - { - case rsx::texture_wrap_mode::wrap: return VK_SAMPLER_ADDRESS_MODE_REPEAT; - case rsx::texture_wrap_mode::mirror: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; - case rsx::texture_wrap_mode::clamp_to_edge: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - case rsx::texture_wrap_mode::border: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - case rsx::texture_wrap_mode::clamp: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - case rsx::texture_wrap_mode::mirror_once_clamp_to_edge: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; - case rsx::texture_wrap_mode::mirror_once_border: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; - case rsx::texture_wrap_mode::mirror_once_clamp: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; - } - fmt::throw_exception("unhandled texture clamp mode" HERE); -} + //Hide d24_s8 if force high precision z buffer is enabled + if (g_cfg.video.force_high_precision_z_buffer && result.d32_sfloat_s8) + result.d24_unorm_s8 = false; -float max_aniso(rsx::texture_max_anisotropy gcm_aniso) -{ - switch (gcm_aniso) - { - case rsx::texture_max_anisotropy::x1: return 1.0f; - case rsx::texture_max_anisotropy::x2: return 2.0f; - case rsx::texture_max_anisotropy::x4: return 4.0f; - case rsx::texture_max_anisotropy::x6: return 6.0f; - case rsx::texture_max_anisotropy::x8: return 8.0f; - case rsx::texture_max_anisotropy::x10: return 10.0f; - case rsx::texture_max_anisotropy::x12: return 12.0f; - case rsx::texture_max_anisotropy::x16: return 16.0f; + return result; } - fmt::throw_exception("Texture anisotropy error: bad max aniso (%d)" HERE, (u32)gcm_aniso); -} - - -std::array get_component_mapping(u32 format) -{ - //Component map in ARGB format - std::array mapping = {}; - - switch (format) + VkFormat get_compatible_depth_surface_format(const gpu_formats_support &support, rsx::surface_depth_format format) { - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - mapping = { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; break; + switch (format) + { + case rsx::surface_depth_format::z16: return VK_FORMAT_D16_UNORM; + case rsx::surface_depth_format::z24s8: + { + if (support.d24_unorm_s8) return VK_FORMAT_D24_UNORM_S8_UINT; + if (support.d32_sfloat_s8) return VK_FORMAT_D32_SFLOAT_S8_UINT; + fmt::throw_exception("No hardware support for z24s8" HERE); + } + } + fmt::throw_exception("Invalid format (0x%x)" HERE, (u32)format); + } - case CELL_GCM_TEXTURE_A4R4G4B4: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; break; + std::tuple get_min_filter_and_mip(rsx::texture_minify_filter min_filter) + { + switch (min_filter) + { + case rsx::texture_minify_filter::nearest: return std::make_tuple(VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST); + case rsx::texture_minify_filter::linear: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); + case rsx::texture_minify_filter::nearest_nearest: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); + case rsx::texture_minify_filter::linear_nearest: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); + case rsx::texture_minify_filter::nearest_linear: return std::make_tuple(VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_LINEAR); + case rsx::texture_minify_filter::linear_linear: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); + case rsx::texture_minify_filter::convolution_min: return std::make_tuple(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); + } + fmt::throw_exception("Invalid max filter" HERE); + } - case CELL_GCM_TEXTURE_G8B8: - mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; + VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter) + { + switch (mag_filter) + { + case rsx::texture_magnify_filter::nearest: return VK_FILTER_NEAREST; + case rsx::texture_magnify_filter::linear: return VK_FILTER_LINEAR; + case rsx::texture_magnify_filter::convolution_mag: return VK_FILTER_LINEAR; + } + fmt::throw_exception("Invalid mag filter (0x%x)" HERE, (u32)mag_filter); + } - case CELL_GCM_TEXTURE_B8: - mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + VkBorderColor get_border_color(u8 color) + { + // TODO: Handle simulated alpha tests and modify texture operations accordingly + if ((color / 0x10) >= 0x8) + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + else + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + } - case CELL_GCM_TEXTURE_X16: - //Blue component is also R (Mass Effect 3) - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + VkSamplerAddressMode vk_wrap_mode(rsx::texture_wrap_mode gcm_wrap) + { + switch (gcm_wrap) + { + case rsx::texture_wrap_mode::wrap: return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case rsx::texture_wrap_mode::mirror: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case rsx::texture_wrap_mode::clamp_to_edge: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case rsx::texture_wrap_mode::border: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + case rsx::texture_wrap_mode::clamp: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case rsx::texture_wrap_mode::mirror_once_clamp_to_edge: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + case rsx::texture_wrap_mode::mirror_once_border: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + case rsx::texture_wrap_mode::mirror_once_clamp: return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + } + fmt::throw_exception("unhandled texture clamp mode" HERE); + } - case CELL_GCM_TEXTURE_X32_FLOAT: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + float max_aniso(rsx::texture_max_anisotropy gcm_aniso) + { + switch (gcm_aniso) + { + case rsx::texture_max_anisotropy::x1: return 1.0f; + case rsx::texture_max_anisotropy::x2: return 2.0f; + case rsx::texture_max_anisotropy::x4: return 4.0f; + case rsx::texture_max_anisotropy::x6: return 6.0f; + case rsx::texture_max_anisotropy::x8: return 8.0f; + case rsx::texture_max_anisotropy::x10: return 10.0f; + case rsx::texture_max_anisotropy::x12: return 12.0f; + case rsx::texture_max_anisotropy::x16: return 16.0f; + } - case CELL_GCM_TEXTURE_Y16_X16: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; + fmt::throw_exception("Texture anisotropy error: bad max aniso (%d)" HERE, (u32)gcm_aniso); + } - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G }; break; + std::array get_component_mapping(u32 format) + { + //Component map in ARGB format + std::array mapping = {}; + + switch (format) + { + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + mapping = { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; break; + + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_A4R4G4B4: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; break; + + case CELL_GCM_TEXTURE_G8B8: + mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_B8: + mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_X16: + //Blue component is also R (Mass Effect 3) + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_X32_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_Y16_X16: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; + + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }; break; + + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G }; break; - case CELL_GCM_TEXTURE_D8R8G8B8: - mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; break; + case CELL_GCM_TEXTURE_D8R8G8B8: + mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; break; - case CELL_GCM_TEXTURE_D1R5G5B5: - mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; break; + case CELL_GCM_TEXTURE_D1R5G5B5: + mapping = { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B }; break; - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }; break; - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - mapping = { VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_R }; break; + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + mapping = { VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_R }; break; - case CELL_GCM_TEXTURE_A8R8G8B8: - mapping = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; break; + case CELL_GCM_TEXTURE_A8R8G8B8: + mapping = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; break; - default: - fmt::throw_exception("Invalid or unsupported component mapping for texture format (0x%x)" HERE, format); - } + default: + fmt::throw_exception("Invalid or unsupported component mapping for texture format (0x%x)" HERE, format); + } - return mapping; -} + return mapping; + } + VkFormat get_compatible_sampler_format(u32 format) + { + switch (format) + { + case CELL_GCM_TEXTURE_B8: return VK_FORMAT_R8_UNORM; + case CELL_GCM_TEXTURE_A1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_A4R4G4B4: return VK_FORMAT_R4G4B4A4_UNORM_PACK16; + case CELL_GCM_TEXTURE_R5G6B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_A8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return VK_FORMAT_BC2_UNORM_BLOCK; + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return VK_FORMAT_BC3_UNORM_BLOCK; + case CELL_GCM_TEXTURE_G8B8: return VK_FORMAT_R8G8_UNORM; + case CELL_GCM_TEXTURE_R6G5B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; // Expand, discard high bit? + case CELL_GCM_TEXTURE_DEPTH24_D8: return VK_FORMAT_D24_UNORM_S8_UINT; //TODO + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return VK_FORMAT_D24_UNORM_S8_UINT; //TODO + case CELL_GCM_TEXTURE_DEPTH16: return VK_FORMAT_D16_UNORM; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return VK_FORMAT_D16_UNORM; + case CELL_GCM_TEXTURE_X16: return VK_FORMAT_R16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16: return VK_FORMAT_R16G16_UNORM; + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return VK_FORMAT_R16G16_SFLOAT; + case CELL_GCM_TEXTURE_R5G5B5A1: return VK_FORMAT_R5G5B5A1_UNORM_PACK16; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return VK_FORMAT_R16G16B16A16_SFLOAT; + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return VK_FORMAT_R32G32B32A32_SFLOAT; + case CELL_GCM_TEXTURE_X32_FLOAT: return VK_FORMAT_R32_SFLOAT; + case CELL_GCM_TEXTURE_D1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; + case CELL_GCM_TEXTURE_D8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; // Expand + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8B8A8_UNORM; // Expand + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return VK_FORMAT_R8G8_UNORM; + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return VK_FORMAT_R8G8_SNORM; + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_R8G8_UNORM; // Not right + case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8_UNORM; // Not right + } + fmt::throw_exception("Invalid or unsupported sampler format for texture format (0x%x)" HERE, format); + } + + VkFormat get_compatible_srgb_format(VkFormat rgb_format) + { + switch (rgb_format) + { + case VK_FORMAT_B8G8R8A8_UNORM: + return VK_FORMAT_B8G8R8A8_SRGB; + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + return VK_FORMAT_BC1_RGBA_SRGB_BLOCK; + case VK_FORMAT_BC2_UNORM_BLOCK: + return VK_FORMAT_BC2_SRGB_BLOCK; + case VK_FORMAT_BC3_UNORM_BLOCK: + return VK_FORMAT_BC3_SRGB_BLOCK; + default: + return rgb_format; + } + } + + u8 get_format_texel_width(VkFormat format) + { + switch (format) + { + case VK_FORMAT_R8_UNORM: + return 1; + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R16_UNORM: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8_SNORM: + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_R5G5B5A1_UNORM_PACK16: + return 2; + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: + case VK_FORMAT_BC2_SRGB_BLOCK: + case VK_FORMAT_BC3_SRGB_BLOCK: + return 4; + case VK_FORMAT_R16G16B16A16_SFLOAT: + return 8; + case VK_FORMAT_R32G32B32A32_SFLOAT: + return 16; + case VK_FORMAT_D16_UNORM: + return 2; + case VK_FORMAT_D32_SFLOAT_S8_UINT: //TODO: Translate to D24S8 + case VK_FORMAT_D24_UNORM_S8_UINT: + return 4; + } + + fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format); + } + + std::pair get_format_element_size(VkFormat format) + { + // Return value is {ELEMENT_SIZE, NUM_ELEMENTS_PER_TEXEL} + // NOTE: Due to endianness issues, coalesced larger types are preferred + // e.g UINT1 to hold 4x1 bytes instead of UBYTE4 to hold 4x1 + + switch (format) + { + //8-bit + case VK_FORMAT_R8_UNORM: + return{ 1, 1 }; + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8_SNORM: + return{ 2, 1 }; //UNSIGNED_SHORT_8_8 + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + return{ 4, 1 }; //UNSIGNED_INT_8_8_8_8 + //16-bit + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R16_UNORM: + return{ 2, 1 }; //UNSIGNED_SHORT and HALF_FLOAT + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SFLOAT: + return{ 2, 2 }; //HALF_FLOAT + case VK_FORMAT_R16G16B16A16_SFLOAT: + return{ 2, 4 }; //HALF_FLOAT + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_R5G5B5A1_UNORM_PACK16: + return{ 2, 1 }; //UNSIGNED_SHORT_X_Y_Z_W + //32-bit + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32_SFLOAT: + return{ 4, 1 }; //FLOAT + case VK_FORMAT_R32G32B32A32_SFLOAT: + return{ 4, 4 }; //FLOAT + //DXT + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: + case VK_FORMAT_BC2_SRGB_BLOCK: + case VK_FORMAT_BC3_SRGB_BLOCK: + return{ 4, 1 }; + //Depth + case VK_FORMAT_D16_UNORM: + return{ 2, 1 }; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + return{ 4, 1 }; + } + + fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format); + } } diff --git a/rpcs3/Emu/RSX/VK/VKFormats.h b/rpcs3/Emu/RSX/VK/VKFormats.h index 63bad8a610..5fd3dbad59 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.h +++ b/rpcs3/Emu/RSX/VK/VKFormats.h @@ -11,13 +11,14 @@ namespace vk }; gpu_formats_support get_optimal_tiling_supported_formats(VkPhysicalDevice physical_device); - VkFormat get_compatible_depth_surface_format(const gpu_formats_support &support, rsx::surface_depth_format format); - VkStencilOp get_stencil_op(rsx::stencil_op op); - VkLogicOp get_logic_op(rsx::logic_op op); - VkFrontFace get_front_face_ccw(rsx::front_face ffv); - VkCullModeFlags get_cull_face(u32 cfv); VkBorderColor get_border_color(u8 color); + VkFormat get_compatible_depth_surface_format(const gpu_formats_support &support, rsx::surface_depth_format format); + VkFormat get_compatible_sampler_format(u32 format); + VkFormat get_compatible_srgb_format(VkFormat rgb_format); + u8 get_format_texel_width(VkFormat format); + std::pair get_format_element_size(VkFormat format); + std::tuple get_min_filter_and_mip(rsx::texture_minify_filter min_filter); VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter); VkSamplerAddressMode vk_wrap_mode(rsx::texture_wrap_mode gcm_wrap); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 4dad64522a..03f7aabaa2 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1190,7 +1190,8 @@ void VKGSRender::end() *sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8); + const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 || + texture_format == CELL_GCM_TEXTURE_DEPTH16_FLOAT || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT); VkCompareOp depth_compare_mode = compare_enabled ? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true) : VK_COMPARE_OP_NEVER; bool replace = !fs_sampler_handles[i]; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 84d7aa2a8b..07a46e106f 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -108,105 +108,6 @@ namespace vk return result; } - VkFormat get_compatible_sampler_format(u32 format) - { - switch (format) - { - case CELL_GCM_TEXTURE_B8: return VK_FORMAT_R8_UNORM; - case CELL_GCM_TEXTURE_A1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; - case CELL_GCM_TEXTURE_A4R4G4B4: return VK_FORMAT_R4G4B4A4_UNORM_PACK16; - case CELL_GCM_TEXTURE_R5G6B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; - case CELL_GCM_TEXTURE_A8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return VK_FORMAT_BC2_UNORM_BLOCK; - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return VK_FORMAT_BC3_UNORM_BLOCK; - case CELL_GCM_TEXTURE_G8B8: return VK_FORMAT_R8G8_UNORM; - case CELL_GCM_TEXTURE_R6G5B5: return VK_FORMAT_R5G6B5_UNORM_PACK16; // Expand, discard high bit? - case CELL_GCM_TEXTURE_DEPTH24_D8: return VK_FORMAT_D24_UNORM_S8_UINT; //TODO - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return VK_FORMAT_D24_UNORM_S8_UINT; //TODO - case CELL_GCM_TEXTURE_DEPTH16: return VK_FORMAT_D16_UNORM; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return VK_FORMAT_D16_UNORM; - case CELL_GCM_TEXTURE_X16: return VK_FORMAT_R16_UNORM; - case CELL_GCM_TEXTURE_Y16_X16: return VK_FORMAT_R16G16_UNORM; - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return VK_FORMAT_R16G16_SFLOAT; - case CELL_GCM_TEXTURE_R5G5B5A1: return VK_FORMAT_R5G5B5A1_UNORM_PACK16; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return VK_FORMAT_R16G16B16A16_SFLOAT; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return VK_FORMAT_R32G32B32A32_SFLOAT; - case CELL_GCM_TEXTURE_X32_FLOAT: return VK_FORMAT_R32_SFLOAT; - case CELL_GCM_TEXTURE_D1R5G5B5: return VK_FORMAT_A1R5G5B5_UNORM_PACK16; - case CELL_GCM_TEXTURE_D8R8G8B8: return VK_FORMAT_B8G8R8A8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; // Expand - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8B8A8_UNORM; // Expand - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return VK_FORMAT_R8G8_UNORM; - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return VK_FORMAT_R8G8_SNORM; - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return VK_FORMAT_R8G8_UNORM; // Not right - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return VK_FORMAT_R8G8_UNORM; // Not right - } - fmt::throw_exception("Invalid or unsupported sampler format for texture format (0x%x)" HERE, format); - } - - VkFormat get_compatible_srgb_format(VkFormat rgb_format) - { - switch (rgb_format) - { - case VK_FORMAT_B8G8R8A8_UNORM: - return VK_FORMAT_B8G8R8A8_SRGB; - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - return VK_FORMAT_BC1_RGBA_SRGB_BLOCK; - case VK_FORMAT_BC2_UNORM_BLOCK: - return VK_FORMAT_BC2_SRGB_BLOCK; - case VK_FORMAT_BC3_UNORM_BLOCK: - return VK_FORMAT_BC3_SRGB_BLOCK; - default: - return rgb_format; - } - } - - u8 get_format_texel_width(const VkFormat format) - { - switch (format) - { - case VK_FORMAT_R8_UNORM: - return 1; - case VK_FORMAT_R16_UINT: - case VK_FORMAT_R16_SFLOAT: - case VK_FORMAT_R16_UNORM: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8_SNORM: - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - case VK_FORMAT_R4G4B4A4_UNORM_PACK16: - case VK_FORMAT_R5G6B5_UNORM_PACK16: - case VK_FORMAT_R5G5B5A1_UNORM_PACK16: - return 2; - case VK_FORMAT_R32_UINT: - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R16G16_SFLOAT: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: - case VK_FORMAT_BC2_SRGB_BLOCK: - case VK_FORMAT_BC3_SRGB_BLOCK: - return 4; - case VK_FORMAT_R16G16B16A16_SFLOAT: - return 8; - case VK_FORMAT_R32G32B32A32_SFLOAT: - return 16; - case VK_FORMAT_D16_UNORM: - return 2; - case VK_FORMAT_D32_SFLOAT_S8_UINT: //TODO: Translate to D24S8 - case VK_FORMAT_D24_UNORM_S8_UINT: - return 4; - } - - fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format); - } - VkAllocationCallbacks default_callbacks() { VkAllocationCallbacks callbacks; @@ -449,18 +350,7 @@ namespace vk { if (image->current_layout == new_layout) return; - VkImageAspectFlags flags = VK_IMAGE_ASPECT_COLOR_BIT; - switch (image->info.format) - { - case VK_FORMAT_D16_UNORM: - flags = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } - + VkImageAspectFlags flags = get_aspect_flags(image->info.format); change_image_layout(cmd, image->value, image->current_layout, new_layout, { flags, 0, 1, 0, 1 }); image->current_layout = new_layout; } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index dd439c39a7..4c824a6a56 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -62,6 +62,7 @@ namespace vk class physical_device; class command_buffer; struct image; + struct vk_data_heap; vk::context *get_current_thread_ctx(); void set_current_thread_ctx(const vk::context &ctx); @@ -79,6 +80,7 @@ namespace vk VkComponentMapping apply_swizzle_remap(const std::array& base_remap, const std::pair, std::array>& remap_vector); VkImageSubresource default_image_subresource(); VkImageSubresourceRange get_image_subresource_range(uint32_t base_layer, uint32_t base_mip, uint32_t layer_count, uint32_t level_count, VkImageAspectFlags aspect); + VkImageAspectFlagBits get_aspect_flags(VkFormat format); VkSampler null_sampler(); VkImageView null_image_view(vk::command_buffer&); @@ -89,15 +91,22 @@ namespace vk void destroy_global_resources(); + /** + * Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer. + * Then copy all layers into dst_image. + * dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag. + */ + void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, + const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, + VkImageAspectFlags flags, vk::vk_data_heap &upload_heap); + + //Other texture management helpers void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout); void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect); void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect, bool compatible_formats); - VkFormat get_compatible_sampler_format(u32 format); - VkFormat get_compatible_srgb_format(VkFormat rgb_format); - u8 get_format_texel_width(const VkFormat format); std::pair get_compatible_surface_format(rsx::surface_color_format color_format); size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count); @@ -105,6 +114,7 @@ namespace vk void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout layout, VkImageSubresourceRange range); void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image); + //Manage 'uininterruptible' state where secondary operations (e.g violation handlers) will have to wait void enter_uninterruptible(); void leave_uninterruptible(); bool is_uninterruptible(); @@ -2184,7 +2194,7 @@ public: void enable_depth_bias(bool enable = true) { - rs.depthBiasEnable = VK_TRUE; + rs.depthBiasEnable = enable ? VK_TRUE : VK_FALSE; } void enable_depth_bounds_test(bool enable = true) @@ -2334,13 +2344,4 @@ public: heap->unmap(); } }; - - /** - * Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer. - * Then copy all layers into dst_image. - * dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag. - */ - void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image, - const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - VkImageAspectFlags flags, vk::vk_data_heap &upload_heap); } diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index a83f0c9203..396b7e7fb5 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -41,6 +41,20 @@ namespace vk return subres; } + VkImageAspectFlagBits get_aspect_flags(VkFormat format) + { + switch (format) + { + default: + return VkImageAspectFlagBits(VK_IMAGE_ASPECT_COLOR_BIT); + case VK_FORMAT_D16_UNORM: + return VkImageAspectFlagBits(VK_IMAGE_ASPECT_DEPTH_BIT); + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return VkImageAspectFlagBits(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + } + } + void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect) { VkImageSubresourceLayers a_src = {}, a_dst = {}; @@ -147,22 +161,69 @@ namespace vk change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); } - void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image, + void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, VkImageAspectFlags flags, vk::vk_data_heap &upload_heap) { u32 mipmap_level = 0; u32 block_in_pixel = get_format_block_size_in_texel(format); u8 block_size_in_bytes = get_format_block_size_in_bytes(format); + std::vector staging_buffer; + + //TODO: Depth and stencil transfer together + flags &= ~(VK_IMAGE_ASPECT_STENCIL_BIT); + for (const rsx_subresource_layout &layout : subresource_layout) { u32 row_pitch = align(layout.width_in_block * block_size_in_bytes, 256); u32 image_linear_size = row_pitch * layout.height_in_block * layout.depth; - size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size); - void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size); - gsl::span mapped{ (gsl::byte*)mapped_buffer, ::narrow(image_linear_size) }; + //Map with extra padding bytes in case of realignment + size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8); + void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8); + void *dst = mapped_buffer; + + bool use_staging = false; + if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT || + dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT) + { + //Misalign intentionally to skip the first stencil byte in D24S8 data + //Ensures the real depth data is dword aligned + + if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT) + { + //Emulate D24x8 passthrough to D32 format + //Reads from GPU managed memory are slow at best and at worst unreliable + use_staging = true; + staging_buffer.resize(image_linear_size + 8); + dst = staging_buffer.data() + 4 - 1; + } + else + { + //Skip leading dword when writing to texture + offset_in_buffer += 4; + dst = (char*)(mapped_buffer) + 4 - 1; + } + } + + gsl::span mapped{ (gsl::byte*)dst, ::narrow(image_linear_size) }; upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256); + + if (use_staging) + { + if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT) + { + //Map depth component from D24x8 to a f32 depth value + //NOTE: One byte (contains first S8 value) is skipped + rsx::convert_le_d24x8_to_le_f32(mapped_buffer, (char*)dst + 1, image_linear_size >> 2, 1); + } + else //unused + { + //Copy emulated data back to the target buffer + memcpy(mapped_buffer, dst, image_linear_size); + } + } + upload_heap.unmap(); VkBufferImageCopy copy_info = {}; @@ -176,7 +237,7 @@ namespace vk copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count; copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes; - vkCmdCopyBufferToImage(cmd, upload_heap.heap->value, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info); + vkCmdCopyBufferToImage(cmd, upload_heap.heap->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info); mipmap_level++; } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index c88da3936c..045731e772 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -167,18 +167,7 @@ namespace vk const u16 internal_width = (context != rsx::texture_upload_context::framebuffer_storage? width : std::min(width, rsx::apply_resolution_scale(width, true))); const u16 internal_height = (context != rsx::texture_upload_context::framebuffer_storage? height : std::min(height, rsx::apply_resolution_scale(height, true))); - - VkImageAspectFlags aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; - switch (vram_texture->info.format) - { - case VK_FORMAT_D16_UNORM: - aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } + VkImageAspectFlags aspect_flag = vk::get_aspect_flags(vram_texture->info.format); //TODO: Read back stencil values (is this really necessary?) VkBufferImageCopy copyRegion = {}; @@ -215,13 +204,13 @@ namespace vk } template - void do_memory_transfer(void *pixels_dst, const void *pixels_src) + void do_memory_transfer(void *pixels_dst, const void *pixels_src, u32 channels_count) { if (sizeof(T) == 1) memcpy(pixels_dst, pixels_src, cpu_address_range); else { - const u32 block_size = width * height; + const u32 block_size = width * height * channels_count; if (swapped) { @@ -262,7 +251,9 @@ namespace vk void* pixels_src = dma_buffer->map(0, cpu_address_range); void* pixels_dst = vm::base(cpu_address_base); - const u8 bpp = real_pitch / width; + const auto texel_layout = vk::get_format_element_size(vram_texture->info.format); + const auto elem_size = texel_layout.first; + const auto channel_count = texel_layout.second; //We have to do our own byte swapping since the driver doesnt do it for us if (real_pitch == rsx_pitch) @@ -283,36 +274,24 @@ namespace vk if (!is_depth_format) { - switch (bpp) + switch (elem_size) { default: - LOG_ERROR(RSX, "Invalid bpp %d", bpp); + LOG_ERROR(RSX, "Invalid element width %d", elem_size); case 1: - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); break; case 2: if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); else - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); break; case 4: if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); else - do_memory_transfer(pixels_dst, pixels_src); - break; - case 8: - if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src); - else - do_memory_transfer(pixels_dst, pixels_src); - break; - case 16: - if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src); - else - do_memory_transfer(pixels_dst, pixels_src); + do_memory_transfer(pixels_dst, pixels_src, channel_count); break; } } @@ -334,7 +313,8 @@ namespace vk break; } - rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples_u, samples_v, pack_unpack_swap_bytes); + u16 row_length = u16(width * channel_count); + rsx::scale_image_nearest(pixels_dst, pixels_src, row_length, height, rsx_pitch, real_pitch, elem_size, samples_u, samples_v, pack_unpack_swap_bytes); switch (vram_texture->info.format) { @@ -491,20 +471,6 @@ namespace vk VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::texture_create_flags flags, const texture_channel_remap_t& remap_vector) { - //NOTE: Depth textures should always read RRRR - switch (gcm_format) - { - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - { - return{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; - } - default: - break; - } - VkComponentMapping mapping = {}; switch (flags) { @@ -542,41 +508,41 @@ namespace vk vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy) { - VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; - - switch (source->info.format) - { - case VK_FORMAT_D16_UNORM: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } - - VkFormat dst_format = vk::get_compatible_sampler_format(gcm_format); - if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT || - vk::get_format_texel_width(dst_format) != vk::get_format_texel_width(source->info.format)) - { - dst_format = source->info.format; - } - - VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; - std::unique_ptr image; std::unique_ptr view; + VkImageAspectFlags aspect; + VkImageCreateFlags image_flags; + VkFormat dst_format = vk::get_compatible_sampler_format(gcm_format); + + if (source) + { + aspect = vk::get_aspect_flags(source->info.format); + if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT || + vk::get_format_texel_width(dst_format) != vk::get_format_texel_width(source->info.format)) + { + //HACK! Should use typeless transfer + dst_format = source->info.format; + } + + image_flags = source->info.flags; + } + else + { + aspect = vk::get_aspect_flags(dst_format); + image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE)? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; + } + image.reset(new vk::image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, image_type, dst_format, w, h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, source->info.flags)); + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags)); //This method is almost exclusively used to work on framebuffer resources //Keep the original swizzle layout unless there is data format conversion - VkComponentMapping view_swizzle = source->native_component_map; - if (dst_format != source->info.format) + VkComponentMapping view_swizzle; + if (!source || dst_format != source->info.format) { //This is a data cast operation //Use native mapping for the new type @@ -584,6 +550,10 @@ namespace vk const auto remap = get_component_mapping(gcm_format); view_swizzle = { remap[1], remap[2], remap[3], remap[0] }; } + else + { + view_swizzle = source->native_component_map; + } if (memcmp(remap_vector.first.data(), rsx::default_remap_vector.first.data(), 4) || memcmp(remap_vector.second.data(), rsx::default_remap_vector.second.data(), 4)) @@ -594,6 +564,7 @@ namespace vk if (copy) { + VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; VkImageLayout old_src_layout = source->current_layout; vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); @@ -638,8 +609,8 @@ namespace vk std::unique_ptr image; std::unique_ptr view; - VkImageAspectFlags dst_aspect; VkFormat dst_format = vk::get_compatible_sampler_format(gcm_format); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst_format); image.reset(new vk::image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, @@ -647,19 +618,6 @@ namespace vk size, size, 1, 1, 6, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)); - switch (gcm_format) - { - case CELL_GCM_TEXTURE_DEPTH16: - dst_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case CELL_GCM_TEXTURE_DEPTH24_D8: - dst_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - default: - dst_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - break; - } - VkImageSubresourceRange view_range = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 6 }; view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, VK_IMAGE_VIEW_TYPE_CUBE, image->info.format, image->native_component_map, view_range)); @@ -681,21 +639,7 @@ namespace vk { if (section.src) { - VkImageAspectFlags src_aspect; - switch (section.src->info.format) - { - case VK_FORMAT_D16_UNORM: - src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - default: - src_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - break; - } - + VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format); VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; VkImageLayout old_src_layout = section.src->current_layout; vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); @@ -728,8 +672,8 @@ namespace vk std::unique_ptr image; std::unique_ptr view; - VkImageAspectFlags dst_aspect; VkFormat dst_format = vk::get_compatible_sampler_format(gcm_format); + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst_format); image.reset(new vk::image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_3D, @@ -737,19 +681,6 @@ namespace vk width, height, depth, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0)); - switch (gcm_format) - { - case CELL_GCM_TEXTURE_DEPTH16: - dst_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case CELL_GCM_TEXTURE_DEPTH24_D8: - dst_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - default: - dst_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - break; - } - VkImageSubresourceRange view_range = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 }; view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, VK_IMAGE_VIEW_TYPE_3D, image->info.format, image->native_component_map, view_range)); @@ -771,21 +702,7 @@ namespace vk { if (section.src) { - VkImageAspectFlags src_aspect; - switch (section.src->info.format) - { - case VK_FORMAT_D16_UNORM: - src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - default: - src_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - break; - } - + VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format); VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; VkImageLayout old_src_layout = section.src->current_layout; vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); @@ -815,64 +732,42 @@ namespace vk vk::image_view* generate_atlas_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, const texture_channel_remap_t& remap_vector) override { - auto result = create_temporary_subresource_view_impl(cmd, sections_to_copy.front().src, VK_IMAGE_TYPE_2D, + auto result = create_temporary_subresource_view_impl(cmd, nullptr, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, remap_vector, false); VkImage dst = result->info.image; - VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; - - switch (sections_to_copy.front().src->info.format) - { - case VK_FORMAT_D16_UNORM: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } - - VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; - vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + VkImageAspectFlags dst_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; + vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); for (const auto ®ion : sections_to_copy) { + VkImageAspectFlags src_aspect = vk::get_aspect_flags(region.src->info.format); + VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; VkImageLayout old_src_layout = region.src->current_layout; - vk::change_image_layout(cmd, region.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); + vk::change_image_layout(cmd, region.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); VkImageCopy copy_rgn; copy_rgn.srcOffset = { region.src_x, region.src_y, 0 }; copy_rgn.dstOffset = { region.dst_x, region.dst_y, 0 }; - copy_rgn.dstSubresource = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.srcSubresource = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; + copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; + copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; copy_rgn.extent = { region.w, region.h, 1 }; vkCmdCopyImage(cmd, region.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_rgn); - vk::change_image_layout(cmd, region.src, old_src_layout, subresource_range); + vk::change_image_layout(cmd, region.src, old_src_layout, src_range); } - vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); + vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); return result; } void update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) override { VkImage dst = dst_view->info.image; - VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; - - switch (src->info.format) - { - case VK_FORMAT_D16_UNORM: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - break; - } - + VkImageAspectFlags aspect = vk::get_aspect_flags(src->info.format); VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); @@ -937,11 +832,13 @@ namespace vk switch (gcm_format) { case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; usage_flags |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; vk_format = m_formats_support.d24_unorm_s8? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT; break; case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT; usage_flags |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; vk_format = VK_FORMAT_D16_UNORM; @@ -1030,7 +927,7 @@ namespace vk section->set_sampler_status(rsx::texture_sampler_status::status_ready); } - vk::copy_mipmaped_image_using_buffer(cmd, image->value, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask, + vk::copy_mipmaped_image_using_buffer(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, subres_range.aspectMask, *m_texture_upload_heap); vk::leave_uninterruptible(); diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index c358c5420b..92aff462f0 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -82,7 +82,7 @@ namespace rsx * N - Sample count */ template - void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) + void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 element_size, u8 samples_u, u8 samples_v) { u32 dst_offset = 0; u32 src_offset = 0; @@ -112,51 +112,39 @@ namespace rsx } } - void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) + void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 element_size, u8 samples_u, u8 samples_v) { - switch (pixel_size) + switch (element_size) { case 1: - scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; case 2: - scale_image_fallback_impl((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; case 4: - scale_image_fallback_impl((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); - break; - case 8: - scale_image_fallback_impl((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); - break; - case 16: - scale_image_fallback_impl((u128*)dst, (const u128*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; default: - fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + fmt::throw_exception("unsupported element size %d" HERE, element_size); } } - void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v) + void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 element_size, u8 samples_u, u8 samples_v) { - switch (pixel_size) + switch (element_size) { case 1: - scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; case 2: - scale_image_fallback_impl>((u16*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl>((u16*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; case 4: - scale_image_fallback_impl>((u32*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); - break; - case 8: - scale_image_fallback_impl>((u64*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); - break; - case 16: - scale_image_fallback_impl>((u128*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_impl>((u32*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); break; default: - fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + fmt::throw_exception("unsupported element size %d" HERE, element_size); } } @@ -185,9 +173,9 @@ namespace rsx } template - void scale_image_fast(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) + void scale_image_fast(void *dst, const void *src, u8 element_size, u16 src_width, u16 src_height, u16 padding) { - switch (pixel_size) + switch (element_size) { case 1: scale_image_impl((u8*)dst, (const u8*)src, src_width, src_height, padding); @@ -202,14 +190,14 @@ namespace rsx scale_image_impl((u64*)dst, (const u64*)src, src_width, src_height, padding); break; default: - fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + fmt::throw_exception("unsupported pixel size %d" HERE, element_size); } } template - void scale_image_fast_with_byte_swap(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) + void scale_image_fast_with_byte_swap(void *dst, const void *src, u8 element_size, u16 src_width, u16 src_height, u16 padding) { - switch (pixel_size) + switch (element_size) { case 1: scale_image_impl((u8*)dst, (const u8*)src, src_width, src_height, padding); @@ -224,17 +212,17 @@ namespace rsx scale_image_impl, N>((u64*)dst, (const be_t*)src, src_width, src_height, padding); break; default: - fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + fmt::throw_exception("unsupported pixel size %d" HERE, element_size); } } - void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes) + void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 element_size, u8 samples_u, u8 samples_v, bool swap_bytes) { //Scale this image by repeating pixel data n times //n = expected_pitch / real_pitch //Use of fixed argument templates for performance reasons - const u16 dst_width = dst_pitch / pixel_size; + const u16 dst_width = dst_pitch / element_size; const u16 padding = dst_width - (src_width * samples_u); if (!swap_bytes) @@ -244,30 +232,30 @@ namespace rsx switch (samples_u) { case 1: - scale_image_fast<1>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<1>(dst, src, element_size, src_width, src_height, padding); break; case 2: - scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<2>(dst, src, element_size, src_width, src_height, padding); break; case 3: - scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<3>(dst, src, element_size, src_width, src_height, padding); break; case 4: - scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<4>(dst, src, element_size, src_width, src_height, padding); break; case 8: - scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<8>(dst, src, element_size, src_width, src_height, padding); break; case 16: - scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast<16>(dst, src, element_size, src_width, src_height, padding); break; default: - scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1); + scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, 1); } } else { - scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); } } else @@ -277,30 +265,30 @@ namespace rsx switch (samples_u) { case 1: - scale_image_fast_with_byte_swap<1>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<1>(dst, src, element_size, src_width, src_height, padding); break; case 2: - scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<2>(dst, src, element_size, src_width, src_height, padding); break; case 3: - scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<3>(dst, src, element_size, src_width, src_height, padding); break; case 4: - scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<4>(dst, src, element_size, src_width, src_height, padding); break; case 8: - scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<8>(dst, src, element_size, src_width, src_height, padding); break; case 16: - scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding); + scale_image_fast_with_byte_swap<16>(dst, src, element_size, src_width, src_height, padding); break; default: - scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1); + scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, 1); } } else { - scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v); + scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, element_size, samples_u, samples_v); } } } @@ -413,4 +401,26 @@ namespace rsx ++src_ptr; } } + + void convert_le_d24x8_to_le_f32(void *dst, void *src, u32 row_length_in_texels, u32 num_rows) + { + const u32 num_pixels = row_length_in_texels * num_rows; + verify(HERE), (num_pixels & 3) == 0; + + const auto num_iterations = (num_pixels >> 2); + + __m128i* dst_ptr = (__m128i*)dst; + __m128i* src_ptr = (__m128i*)src; + + const __m128 scale_vector = _mm_set1_ps(1.f / 16777214.f); + const __m128i mask = _mm_set1_epi32(0x00FFFFFF); + for (u32 n = 0; n < num_iterations; ++n) + { + const __m128 src_vector = _mm_cvtepi32_ps(_mm_and_si128(mask, _mm_loadu_si128(src_ptr))); + const __m128 normalized_vector = _mm_mul_ps(src_vector, scale_vector); + _mm_stream_si128(dst_ptr, (__m128i&)normalized_vector); + ++dst_ptr; + ++src_ptr; + } + } } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 183c0e6682..b2855e6067 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -278,6 +278,7 @@ namespace rsx void convert_le_f32_to_be_d24(void *dst, void *src, u32 row_length_in_texels, u32 num_rows); void convert_le_d24x8_to_be_d24x8(void *dst, void *src, u32 row_length_in_texels, u32 num_rows); + void convert_le_d24x8_to_le_f32(void *dst, void *src, u32 row_length_in_texels, u32 num_rows); void fill_scale_offset_matrix(void *dest_, bool transpose, float offset_x, float offset_y, float offset_z,