mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-23 19:21:43 +00:00
vk: Implement depth scaling using hardware blit/copy engines
- Removes the old depth scaling using an overlay. It was never going to work properly due to per-pixel stencil writes being unavailable - TODO: Preserve stencil buffer during ARGB8->D32S8 shader conversion pass
This commit is contained in:
parent
3150619320
commit
fc18e17ba6
@ -1632,8 +1632,13 @@ void GLGSRender::synchronize_buffers()
|
||||
|
||||
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
|
||||
{
|
||||
m_samplers_dirty.store(true);
|
||||
return m_gl_texture_cache.blit(src, dst, interpolate, m_rtts);
|
||||
if (m_gl_texture_cache.blit(src, dst, interpolate, m_rtts))
|
||||
{
|
||||
m_samplers_dirty.store(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void GLGSRender::notify_tile_unbound(u32 tile)
|
||||
|
@ -84,8 +84,29 @@ namespace gl
|
||||
s32 old_fbo = 0;
|
||||
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo);
|
||||
|
||||
filter interp = linear_interpolation ? filter::linear : filter::nearest;
|
||||
GLenum attachment = is_depth_copy ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0;
|
||||
filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest;
|
||||
GLenum attachment;
|
||||
gl::buffers target;
|
||||
|
||||
if (is_depth_copy)
|
||||
{
|
||||
if (src->get_internal_format() == gl::texture::internal_format::depth16 ||
|
||||
dst->get_internal_format() == gl::texture::internal_format::depth16)
|
||||
{
|
||||
attachment = GL_DEPTH_ATTACHMENT;
|
||||
target = gl::buffers::depth;
|
||||
}
|
||||
else
|
||||
{
|
||||
attachment = GL_DEPTH_STENCIL_ATTACHMENT;
|
||||
target = gl::buffers::depth_stencil;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
attachment = GL_COLOR_ATTACHMENT0;
|
||||
target = gl::buffers::color;
|
||||
}
|
||||
|
||||
blit_src.bind();
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0);
|
||||
@ -99,7 +120,7 @@ namespace gl
|
||||
if (scissor_test_enabled)
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
blit_src.blit(blit_dst, src_rect, dst_rect, is_depth_copy ? buffers::depth : buffers::color, interp);
|
||||
blit_src.blit(blit_dst, src_rect, dst_rect, target, interp);
|
||||
|
||||
if (xfer_info.dst_is_typeless)
|
||||
{
|
||||
|
@ -634,9 +634,6 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
m_depth_converter.reset(new vk::depth_convert_pass());
|
||||
m_depth_converter->create(*m_device);
|
||||
|
||||
m_depth_scaler.reset(new vk::depth_scaling_pass());
|
||||
m_depth_scaler->create(*m_device);
|
||||
|
||||
m_attachment_clear_pass.reset(new vk::attachment_clear_pass());
|
||||
m_attachment_clear_pass->create(*m_device);
|
||||
|
||||
@ -764,10 +761,6 @@ VKGSRender::~VKGSRender()
|
||||
m_depth_converter->destroy();
|
||||
m_depth_converter.reset();
|
||||
|
||||
//Depth surface blitter
|
||||
m_depth_scaler->destroy();
|
||||
m_depth_scaler.reset();
|
||||
|
||||
//Attachment clear helper
|
||||
m_attachment_clear_pass->destroy();
|
||||
m_attachment_clear_pass.reset();
|
||||
@ -1164,24 +1157,11 @@ void VKGSRender::end()
|
||||
const auto dst_w = std::get<2>(region);
|
||||
const auto dst_h = std::get<3>(region);
|
||||
|
||||
if (!is_depth || (src_w == dst_w && src_h == dst_h))
|
||||
{
|
||||
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
|
||||
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
|
||||
|
||||
vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value,
|
||||
surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h,
|
||||
0, 0, dst_w, dst_h, 1, aspect, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, surface->info.format, 0);
|
||||
auto render_pass = m_render_passes[rp];
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
m_depth_scaler->run(*m_current_command_buffer, { 0, 0, (f32)src_w, (f32)src_h }, { 0, 0, (f32)dst_w, (f32)dst_h }, surface,
|
||||
surface->old_contents, static_cast<vk::render_target*>(surface->old_contents)->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
|
||||
}
|
||||
vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value,
|
||||
surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h,
|
||||
0, 0, dst_w, dst_h, 1, aspect, true, VK_FILTER_LINEAR, surface->info.format, surface->old_contents->info.format);
|
||||
}
|
||||
};
|
||||
|
||||
@ -2048,7 +2028,6 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
|
||||
|
||||
m_attachment_clear_pass->free_resources();
|
||||
m_depth_converter->free_resources();
|
||||
m_depth_scaler->free_resources();
|
||||
m_ui_renderer->free_resources();
|
||||
|
||||
ctx->buffer_views_to_clean.clear();
|
||||
@ -3311,46 +3290,8 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
|
||||
//Stop all parallel operations until this is finished
|
||||
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
|
||||
|
||||
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
|
||||
m_current_command_buffer->begin();
|
||||
|
||||
if (result.succeeded)
|
||||
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
|
||||
{
|
||||
bool require_flush = false;
|
||||
if (result.deferred)
|
||||
{
|
||||
//Requires manual scaling; depth/stencil surface
|
||||
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, result.dst_image->info.format, 0);
|
||||
auto render_pass = m_render_passes[rp];
|
||||
|
||||
auto old_src_layout = result.src_image->current_layout;
|
||||
auto old_dst_layout = result.dst_image->current_layout;
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, result.src_image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
vk::change_image_layout(*m_current_command_buffer, result.dst_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
|
||||
|
||||
// TODO: Insets
|
||||
const areaf src_area = { 0, 0, (f32)result.src_image->width(), (f32)result.src_image->height() };
|
||||
const areaf dst_area = { 0, 0, (f32)result.dst_image->width(), (f32)result.dst_image->height() };
|
||||
m_depth_scaler->run(*m_current_command_buffer, src_area, dst_area, result.dst_image, result.src_image,
|
||||
result.src_view, render_pass, m_framebuffers_to_clean);
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, result.src_image, old_src_layout);
|
||||
vk::change_image_layout(*m_current_command_buffer, result.dst_image, old_dst_layout);
|
||||
|
||||
require_flush = true;
|
||||
}
|
||||
|
||||
if (result.dst_image)
|
||||
{
|
||||
if (m_texture_cache.flush_if_cache_miss_likely(result.dst_image->info.format, result.real_dst_address, result.real_dst_size,
|
||||
*m_current_command_buffer, m_swapchain->get_graphics_queue()))
|
||||
require_flush = true;
|
||||
}
|
||||
|
||||
if (require_flush)
|
||||
flush_command_queue();
|
||||
|
||||
m_samplers_dirty.store(true);
|
||||
return true;
|
||||
}
|
||||
|
@ -261,7 +261,6 @@ private:
|
||||
|
||||
std::unique_ptr<vk::text_writer> m_text_writer;
|
||||
std::unique_ptr<vk::depth_convert_pass> m_depth_converter;
|
||||
std::unique_ptr<vk::depth_scaling_pass> m_depth_scaler;
|
||||
std::unique_ptr<vk::ui_overlay_renderer> m_ui_renderer;
|
||||
std::unique_ptr<vk::attachment_clear_pass> m_attachment_clear_pass;
|
||||
|
||||
|
@ -6,10 +6,12 @@ namespace vk
|
||||
{
|
||||
context* g_current_vulkan_ctx = nullptr;
|
||||
render_device g_current_renderer;
|
||||
driver_vendor g_driver_vendor = driver_vendor::unknown;
|
||||
std::shared_ptr<vk::mem_allocator_base> g_mem_allocator = nullptr;
|
||||
|
||||
std::unique_ptr<image> g_null_texture;
|
||||
std::unique_ptr<image_view> g_null_image_view;
|
||||
std::unordered_map<VkFormat, std::unique_ptr<image>> g_typeless_textures;
|
||||
|
||||
VkSampler g_null_sampler = nullptr;
|
||||
|
||||
@ -148,7 +150,7 @@ namespace vk
|
||||
if (g_null_image_view)
|
||||
return g_null_image_view->value;
|
||||
|
||||
g_null_texture.reset(new image(g_current_renderer, get_memory_mapping(g_current_renderer.gpu()).device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
g_null_texture.reset(new image(g_current_renderer, g_current_renderer.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, 4, 4, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0));
|
||||
|
||||
@ -167,6 +169,25 @@ namespace vk
|
||||
return g_null_image_view->value;
|
||||
}
|
||||
|
||||
vk::image* get_typeless_helper(VkFormat format)
|
||||
{
|
||||
auto create_texture = [&]()
|
||||
{
|
||||
return new vk::image(g_current_renderer, g_current_renderer.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VK_IMAGE_TYPE_2D, format, 4096, 4096, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0);
|
||||
};
|
||||
|
||||
auto &ptr = g_typeless_textures[format];
|
||||
if (!ptr)
|
||||
{
|
||||
auto _img = create_texture();
|
||||
ptr.reset(_img);
|
||||
}
|
||||
|
||||
return ptr.get();
|
||||
}
|
||||
|
||||
void acquire_global_submit_lock()
|
||||
{
|
||||
g_submit_mutex.lock();
|
||||
@ -180,7 +201,9 @@ namespace vk
|
||||
void destroy_global_resources()
|
||||
{
|
||||
g_null_texture.reset();
|
||||
g_null_image_view .reset();
|
||||
g_null_image_view.reset();
|
||||
|
||||
g_typeless_textures.clear();
|
||||
|
||||
if (g_null_sampler)
|
||||
vkDestroySampler(g_current_renderer, g_null_sampler, nullptr);
|
||||
@ -222,6 +245,7 @@ namespace vk
|
||||
g_drv_disable_fence_reset = false;
|
||||
g_num_processed_frames = 0;
|
||||
g_num_total_frames = 0;
|
||||
g_driver_vendor = driver_vendor::unknown;
|
||||
|
||||
const auto gpu_name = g_current_renderer.gpu().name();
|
||||
|
||||
@ -240,14 +264,33 @@ namespace vk
|
||||
//Disable fence reset for proprietary driver and delete+initialize a new fence instead
|
||||
if (gpu_name.find("Radeon") != std::string::npos)
|
||||
{
|
||||
g_driver_vendor = driver_vendor::AMD;
|
||||
g_drv_disable_fence_reset = true;
|
||||
}
|
||||
|
||||
//Nvidia cards are easily susceptible to NaN poisoning
|
||||
if (gpu_name.find("NVIDIA") != std::string::npos || gpu_name.find("GeForce") != std::string::npos)
|
||||
{
|
||||
g_driver_vendor = driver_vendor::NVIDIA;
|
||||
g_drv_sanitize_fp_values = true;
|
||||
}
|
||||
|
||||
if (g_driver_vendor == driver_vendor::unknown)
|
||||
{
|
||||
if (gpu_name.find("RADV") != std::string::npos)
|
||||
{
|
||||
g_driver_vendor = driver_vendor::RADV;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_WARNING(RSX, "Unknown driver vendor for device '%s'", gpu_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
driver_vendor get_driver_vendor()
|
||||
{
|
||||
return g_driver_vendor;
|
||||
}
|
||||
|
||||
bool emulate_primitive_restart(rsx::primitive_type type)
|
||||
|
@ -66,6 +66,7 @@ namespace vk
|
||||
struct image;
|
||||
struct vk_data_heap;
|
||||
class mem_allocator_base;
|
||||
enum driver_vendor;
|
||||
|
||||
vk::context *get_current_thread_ctx();
|
||||
void set_current_thread_ctx(const vk::context &ctx);
|
||||
@ -80,6 +81,7 @@ namespace vk
|
||||
bool emulate_primitive_restart(rsx::primitive_type type);
|
||||
bool sanitize_fp_values();
|
||||
bool fence_reset_disabled();
|
||||
driver_vendor get_driver_vendor();
|
||||
|
||||
VkComponentMapping default_component_map();
|
||||
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector);
|
||||
@ -89,6 +91,7 @@ namespace vk
|
||||
|
||||
VkSampler null_sampler();
|
||||
VkImageView null_image_view(vk::command_buffer&);
|
||||
image* get_typeless_helper(VkFormat format);
|
||||
|
||||
//Sync helpers around vkQueueSubmit
|
||||
void acquire_global_submit_lock();
|
||||
@ -109,8 +112,14 @@ namespace vk
|
||||
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range);
|
||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range);
|
||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
|
||||
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect);
|
||||
void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats);
|
||||
|
||||
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout,
|
||||
const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect,
|
||||
VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF);
|
||||
|
||||
void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout,
|
||||
u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps,
|
||||
VkImageAspectFlags aspect, bool compatible_formats, VkFilter filter = VK_FILTER_LINEAR, VkFormat src_format = VK_FORMAT_UNDEFINED, VkFormat dst_format = VK_FORMAT_UNDEFINED);
|
||||
|
||||
std::pair<VkFormat, VkComponentMapping> get_compatible_surface_format(rsx::surface_color_format color_format);
|
||||
size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count);
|
||||
@ -134,6 +143,14 @@ namespace vk
|
||||
|
||||
void die_with_error(const char* faulting_addr, VkResult error_code);
|
||||
|
||||
enum driver_vendor
|
||||
{
|
||||
unknown,
|
||||
AMD,
|
||||
NVIDIA,
|
||||
RADV
|
||||
};
|
||||
|
||||
struct memory_type_mapping
|
||||
{
|
||||
uint32_t host_visible_coherent;
|
||||
@ -328,7 +345,7 @@ namespace vk
|
||||
public:
|
||||
using mem_handle_t = void *;
|
||||
|
||||
mem_allocator_base(VkDevice dev, VkPhysicalDevice pdev) : m_device(dev) {};
|
||||
mem_allocator_base(VkDevice dev, VkPhysicalDevice /*pdev*/) : m_device(dev) {};
|
||||
~mem_allocator_base() {};
|
||||
|
||||
virtual void destroy() = 0;
|
||||
@ -386,7 +403,7 @@ namespace vk
|
||||
vmaFreeMemory(m_allocator, static_cast<VmaAllocation>(mem_handle));
|
||||
}
|
||||
|
||||
void *map(mem_handle_t mem_handle, u64 offset, u64 size) override
|
||||
void *map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override
|
||||
{
|
||||
void *data = nullptr;
|
||||
|
||||
@ -432,7 +449,7 @@ namespace vk
|
||||
|
||||
void destroy() override {};
|
||||
|
||||
mem_handle_t alloc(u64 block_sz, u64 alignment, uint32_t memory_type_index) override
|
||||
mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, uint32_t memory_type_index) override
|
||||
{
|
||||
VkDeviceMemory memory;
|
||||
VkMemoryAllocateInfo info = {};
|
||||
@ -466,7 +483,7 @@ namespace vk
|
||||
return (VkDeviceMemory)mem_handle;
|
||||
}
|
||||
|
||||
u64 get_vk_device_memory_offset(mem_handle_t mem_handle)
|
||||
u64 get_vk_device_memory_offset(mem_handle_t /*mem_handle*/)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -76,22 +76,27 @@ namespace vk
|
||||
//Reserve descriptor pools
|
||||
m_descriptor_pool.create(*m_device, descriptor_pool_sizes, 2);
|
||||
|
||||
VkDescriptorSetLayoutBinding bindings[2] = {};
|
||||
VkDescriptorSetLayoutBinding bindings[3] = {};
|
||||
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[0].binding = 0;
|
||||
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[1].descriptorCount = 1;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
bindings[1].binding = 1;
|
||||
|
||||
bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[2].descriptorCount = 1;
|
||||
bindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
bindings[2].binding = 2;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo infos = {};
|
||||
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||
infos.pBindings = bindings;
|
||||
infos.bindingCount = 2;
|
||||
infos.bindingCount = 3;
|
||||
|
||||
CHECK_RESULT(vkCreateDescriptorSetLayout(*m_device, &infos, nullptr, &m_descriptor_layout));
|
||||
|
||||
@ -116,8 +121,9 @@ namespace vk
|
||||
virtual std::vector<vk::glsl::program_input> get_fragment_inputs()
|
||||
{
|
||||
std::vector<vk::glsl::program_input> fs_inputs;
|
||||
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 0, "fs0" });
|
||||
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 1, "static_data" });
|
||||
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, "static_data" });
|
||||
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 1, "fs0" });
|
||||
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 2, "fs1" });
|
||||
return fs_inputs;
|
||||
}
|
||||
|
||||
@ -209,7 +215,7 @@ namespace vk
|
||||
return result;
|
||||
}
|
||||
|
||||
void load_program(vk::command_buffer cmd, VkRenderPass pass, VkImageView src)
|
||||
void load_program(vk::command_buffer cmd, VkRenderPass pass, const std::vector<VkImageView>& src)
|
||||
{
|
||||
vk::glsl::program *program = nullptr;
|
||||
auto found = m_program_cache.find(pass);
|
||||
@ -233,14 +239,18 @@ namespace vk
|
||||
{
|
||||
m_sampler = std::make_unique<vk::sampler>(*m_device,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK);
|
||||
VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK);
|
||||
}
|
||||
|
||||
update_uniforms(program);
|
||||
|
||||
VkDescriptorImageInfo info = { m_sampler->value, src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL };
|
||||
program->bind_uniform(info, "fs0", m_descriptor_set);
|
||||
program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 1, m_descriptor_set);
|
||||
program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set);
|
||||
|
||||
for (int n = 0; n < src.size(); ++n)
|
||||
{
|
||||
VkDescriptorImageInfo info = { m_sampler->value, src[n], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL };
|
||||
program->bind_uniform(info, "fs" + std::to_string(n), m_descriptor_set);
|
||||
}
|
||||
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, program->pipeline);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr);
|
||||
@ -343,7 +353,7 @@ namespace vk
|
||||
vkCmdSetScissor(cmd, 0, 1, &vs);
|
||||
}
|
||||
|
||||
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, VkImageView src, VkRenderPass render_pass)
|
||||
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, const std::vector<VkImageView>& src, VkRenderPass render_pass)
|
||||
{
|
||||
load_program(cmd, render_pass, src);
|
||||
set_up_viewport(cmd, w, h);
|
||||
@ -362,12 +372,18 @@ namespace vk
|
||||
vkCmdEndRenderPass(cmd);
|
||||
}
|
||||
|
||||
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, VkImageView src, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
|
||||
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, const std::vector<VkImageView>& src, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
|
||||
{
|
||||
vk::framebuffer *fbo = get_framebuffer(target, render_pass, framebuffer_resources);
|
||||
run(cmd, w, h, fbo, src, render_pass);
|
||||
}
|
||||
|
||||
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, VkImageView src, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
|
||||
{
|
||||
std::vector<VkImageView> views = { src };
|
||||
run(cmd, w, h, target, views, render_pass, framebuffer_resources);
|
||||
}
|
||||
|
||||
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, vk::image_view* src, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
|
||||
{
|
||||
run(cmd, w, h, target, src->value, render_pass, framebuffer_resources);
|
||||
@ -397,7 +413,8 @@ namespace vk
|
||||
{
|
||||
"#version 420\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
|
||||
"#extension GL_ARB_shader_stencil_export : enable\n\n"
|
||||
"layout(set=0, binding=1) uniform sampler2D fs0;\n"
|
||||
"layout(location=0) in vec2 tc0;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
@ -409,6 +426,7 @@ namespace vk
|
||||
|
||||
renderpass_config.set_depth_mask(true);
|
||||
renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS);
|
||||
renderpass_config.enable_stencil_test(VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_COMPARE_OP_ALWAYS, 0xFF, 0xFF);
|
||||
|
||||
m_vertex_shader.id = 100002;
|
||||
m_fragment_shader.id = 100003;
|
||||
@ -438,7 +456,7 @@ namespace vk
|
||||
"#version 450\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(location=0) in vec4 in_pos;\n"
|
||||
"layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n"
|
||||
"layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n"
|
||||
"layout(location=0) out vec2 tc0;\n"
|
||||
"layout(location=1) out vec4 color;\n"
|
||||
"layout(location=2) out vec4 parameters;\n"
|
||||
@ -459,7 +477,7 @@ namespace vk
|
||||
{
|
||||
"#version 420\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
|
||||
"layout(set=0, binding=1) uniform sampler2D fs0;\n"
|
||||
"layout(location=0) in vec2 tc0;\n"
|
||||
"layout(location=1) in vec4 color;\n"
|
||||
"layout(location=2) in vec4 parameters;\n"
|
||||
@ -704,95 +722,13 @@ namespace vk
|
||||
break;
|
||||
}
|
||||
|
||||
overlay_pass::run(cmd, w, h, target, src, render_pass);
|
||||
overlay_pass::run(cmd, w, h, target, { src }, render_pass);
|
||||
}
|
||||
|
||||
ui.update();
|
||||
}
|
||||
};
|
||||
|
||||
struct depth_scaling_pass : public overlay_pass
|
||||
{
|
||||
areaf src_area;
|
||||
areaf dst_area;
|
||||
u16 src_width;
|
||||
u16 src_height;
|
||||
|
||||
depth_scaling_pass()
|
||||
{
|
||||
vs_src =
|
||||
{
|
||||
"#version 450\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n"
|
||||
"layout(location=0) out vec2 tc0;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
|
||||
" vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n"
|
||||
" gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n"
|
||||
" tc0 = coords[gl_VertexIndex % 4] * regs[0].xy + regs[0].zw;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
fs_src =
|
||||
{
|
||||
"#version 420\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
|
||||
"layout(location=0) in vec2 tc0;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_FragDepth = texture(fs0, tc0).x;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
renderpass_config.set_depth_mask(true);
|
||||
renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS);
|
||||
|
||||
m_vertex_shader.id = 100006;
|
||||
m_fragment_shader.id = 100007;
|
||||
}
|
||||
|
||||
void update_uniforms(vk::glsl::program* /*program*/) override
|
||||
{
|
||||
m_ubo_offset = (u32)m_ubo.alloc<256>(128);
|
||||
auto dst = (f32*)m_ubo.map(m_ubo_offset, 128);
|
||||
dst[0] = f32(src_area.x2 - src_area.x1) / src_width;
|
||||
dst[1] = f32(src_area.y2 - src_area.y1) / src_height;
|
||||
dst[2] = src_area.x1 / f32(src_area.x2 - src_area.x1);
|
||||
dst[3] = src_area.y1 / f32(src_area.y2 - src_area.y1);
|
||||
m_ubo.unmap();
|
||||
}
|
||||
|
||||
void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h) override
|
||||
{
|
||||
VkRect2D region = { { s32(dst_area.x1), s32(dst_area.y1) },{ u32(dst_area.x2 - dst_area.x1), u32(dst_area.y2 - dst_area.y1) } };
|
||||
vkCmdSetScissor(cmd, 0, 1, ®ion);
|
||||
|
||||
VkViewport vp{};
|
||||
vp.x = dst_area.x1;
|
||||
vp.y = dst_area.y1;
|
||||
vp.width = f32(region.extent.width);
|
||||
vp.height = f32(region.extent.height);
|
||||
vp.minDepth = 0.f;
|
||||
vp.maxDepth = 1.f;
|
||||
vkCmdSetViewport(cmd, 0, 1, &vp);
|
||||
}
|
||||
|
||||
void run(vk::command_buffer &cmd, const areaf& src_rect, const areaf& dst_rect, vk::image* target, vk::image* src, vk::image_view* src_view, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
|
||||
{
|
||||
src_area = src_rect;
|
||||
dst_area = dst_rect;
|
||||
src_width = src->width();
|
||||
src_height = src->height();
|
||||
|
||||
overlay_pass::run(cmd, target->width(), target->height(), target, src_view, render_pass, framebuffer_resources);
|
||||
}
|
||||
};
|
||||
|
||||
struct attachment_clear_pass : public overlay_pass
|
||||
{
|
||||
color4f clear_color = { 0.f, 0.f, 0.f, 0.f };
|
||||
@ -805,7 +741,7 @@ namespace vk
|
||||
{
|
||||
"#version 450\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n"
|
||||
"layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n"
|
||||
"layout(location=0) out vec2 tc0;\n"
|
||||
"layout(location=1) out vec4 color;\n"
|
||||
"layout(location=2) out vec4 mask;\n"
|
||||
@ -825,7 +761,7 @@ namespace vk
|
||||
{
|
||||
"#version 420\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
|
||||
"layout(set=0, binding=1) uniform sampler2D fs0;\n"
|
||||
"layout(location=0) in vec2 tc0;\n"
|
||||
"layout(location=1) in vec4 color;\n"
|
||||
"layout(location=2) in vec4 mask;\n"
|
||||
|
@ -19,7 +19,7 @@ namespace vk
|
||||
u16 surface_height = 0;
|
||||
|
||||
VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
std::unordered_map<u32, std::unique_ptr<vk::image_view>> views;
|
||||
std::unordered_multimap<u32, std::unique_ptr<vk::image_view>> views;
|
||||
|
||||
u64 frame_tag = 0; //frame id when invalidated, 0 if not invalid
|
||||
|
||||
@ -40,12 +40,16 @@ namespace vk
|
||||
mipmaps, layers, samples, initial_layout, tiling, usage, image_flags)
|
||||
{}
|
||||
|
||||
vk::image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap)
|
||||
vk::image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap,
|
||||
VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
{
|
||||
auto found = views.find(remap_encoding);
|
||||
if (found != views.end())
|
||||
auto found = views.equal_range(remap_encoding);
|
||||
for (auto It = found.first; It != found.second; ++It)
|
||||
{
|
||||
return found->second.get();
|
||||
if (It->second->info.subresourceRange.aspectMask & mask)
|
||||
{
|
||||
return It->second.get();
|
||||
}
|
||||
}
|
||||
|
||||
VkComponentMapping real_mapping = vk::apply_swizzle_remap
|
||||
@ -55,10 +59,10 @@ namespace vk
|
||||
);
|
||||
|
||||
auto view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), value, VK_IMAGE_VIEW_TYPE_2D, info.format,
|
||||
real_mapping, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT)));
|
||||
real_mapping, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & mask));
|
||||
|
||||
auto result = view.get();
|
||||
views[remap_encoding] = std::move(view);
|
||||
views.emplace(remap_encoding, std::move(view));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -55,30 +55,39 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect)
|
||||
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout,
|
||||
const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect,
|
||||
VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask)
|
||||
{
|
||||
// NOTE: src_aspect should match dst_aspect according to spec but drivers seem to work just fine with the mismatch
|
||||
// TODO: Implement separate pixel transfer for drivers that refuse this workaround
|
||||
|
||||
VkImageSubresourceLayers a_src = {}, a_dst = {};
|
||||
a_src.aspectMask = aspect;
|
||||
a_src.aspectMask = src_aspect & src_transfer_mask;
|
||||
a_src.baseArrayLayer = 0;
|
||||
a_src.layerCount = 1;
|
||||
a_src.mipLevel = 0;
|
||||
|
||||
a_dst = a_src;
|
||||
a_dst.aspectMask = dst_aspect & dst_transfer_mask;
|
||||
|
||||
VkImageCopy rgn = {};
|
||||
rgn.extent.depth = 1;
|
||||
rgn.extent.width = width;
|
||||
rgn.extent.height = height;
|
||||
rgn.dstOffset = { 0, 0, 0 };
|
||||
rgn.srcOffset = { 0, 0, 0 };
|
||||
rgn.extent.width = u32(src_rect.x2 - src_rect.x1);
|
||||
rgn.extent.height = u32(src_rect.y2 - src_rect.y1);
|
||||
rgn.dstOffset = { dst_rect.x1, dst_rect.y1, 0 };
|
||||
rgn.srcOffset = { src_rect.x1, src_rect.y1, 0 };
|
||||
rgn.srcSubresource = a_src;
|
||||
rgn.dstSubresource = a_dst;
|
||||
|
||||
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
|
||||
change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||
auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
|
||||
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
|
||||
change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
if (srcLayout != preferred_src_format)
|
||||
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
|
||||
|
||||
if (dstLayout != preferred_dst_format)
|
||||
change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
|
||||
|
||||
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
|
||||
{
|
||||
@ -88,11 +97,11 @@ namespace vk
|
||||
rgn.dstSubresource.mipLevel++;
|
||||
}
|
||||
|
||||
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
|
||||
change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
if (srcLayout != preferred_src_format)
|
||||
change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
|
||||
|
||||
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
|
||||
change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
if (dstLayout != preferred_dst_format)
|
||||
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
|
||||
}
|
||||
|
||||
void copy_scaled_image(VkCommandBuffer cmd,
|
||||
@ -100,7 +109,8 @@ namespace vk
|
||||
VkImageLayout srcLayout, VkImageLayout dstLayout,
|
||||
u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height,
|
||||
u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height,
|
||||
u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats)
|
||||
u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats,
|
||||
VkFilter filter, VkFormat src_format, VkFormat dst_format)
|
||||
{
|
||||
VkImageSubresourceLayers a_src = {}, a_dst = {};
|
||||
a_src.aspectMask = aspect;
|
||||
@ -110,22 +120,129 @@ namespace vk
|
||||
|
||||
a_dst = a_src;
|
||||
|
||||
auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||
auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
|
||||
//TODO: Use an array of offsets/dimensions for mipmapped blits (mipmap count > 1) since subimages will have different dimensions
|
||||
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
|
||||
change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
if (srcLayout != preferred_src_format)
|
||||
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
|
||||
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
|
||||
change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
if (dstLayout != preferred_dst_format)
|
||||
change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
|
||||
if (src_width != dst_width || src_height != dst_height || mipmaps > 1 || !compatible_formats)
|
||||
if (compatible_formats && src_width == dst_width && src_height != dst_height)
|
||||
{
|
||||
if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0)
|
||||
{
|
||||
//Most depth/stencil formats cannot be scaled using hw blit
|
||||
LOG_ERROR(RSX, "Cannot perform scaled blit for depth/stencil images");
|
||||
return;
|
||||
}
|
||||
VkImageCopy copy_rgn;
|
||||
copy_rgn.srcOffset = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
|
||||
copy_rgn.dstOffset = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 };
|
||||
copy_rgn.dstSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
|
||||
copy_rgn.srcSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
|
||||
copy_rgn.extent = { src_width, src_height, 1 };
|
||||
|
||||
vkCmdCopyImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, ©_rgn);
|
||||
}
|
||||
else if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0)
|
||||
{
|
||||
//Most depth/stencil formats cannot be scaled using hw blit
|
||||
if (src_format == VK_FORMAT_UNDEFINED || dst_width > 4096 || (src_height + dst_height) > 4096)
|
||||
{
|
||||
LOG_ERROR(RSX, "Could not blit depth/stencil image. src_fmt=0x%x, src=%dx%d, dst=%dx%d",
|
||||
(u32)src_format, src_width, src_height, dst_width, dst_height);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto stretch_image_typeless = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless,
|
||||
const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF)
|
||||
{
|
||||
const u32 src_w = u32(src_rect.x2 - src_rect.x1);
|
||||
const u32 src_h = u32(src_rect.y2 - src_rect.y1);
|
||||
const u32 dst_w = u32(dst_rect.x2 - dst_rect.x1);
|
||||
const u32 dst_h = u32(dst_rect.y2 - dst_rect.y1);
|
||||
|
||||
// Drivers are not very accepting of aspect COLOR -> aspect DEPTH or aspect STENCIL separately
|
||||
// However, this works okay for D24S8 (nvidia-only format)
|
||||
// To work around the problem we use the non-existent DEPTH/STENCIL/DEPTH_STENCIL aspect of the color texture instead
|
||||
VkImageAspectFlags typeless_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
if (transfer_flags == VK_IMAGE_ASPECT_DEPTH_BIT || transfer_flags == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
{
|
||||
// NOTE: This path is only taken for VK_FORMAT_D32_SFLOAT_S8_UINT as there is no 36-bit format available
|
||||
// On Nvidia, the default format is VK_FORMAT_D24_UNORM_S8_UINT which does not require this workaround
|
||||
switch (vk::get_driver_vendor())
|
||||
{
|
||||
case driver_vendor::AMD:
|
||||
// Quirks: This workaround allows proper transfer of stencil data
|
||||
case driver_vendor::NVIDIA:
|
||||
// Quirks: This workaround allows only transfer of depth data, stencil is ignored
|
||||
typeless_aspect = aspect;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//1. Copy unscaled to typeless surface
|
||||
copy_image(cmd, src, typeless, preferred_src_format, VK_IMAGE_LAYOUT_GENERAL,
|
||||
src_rect, { 0, 0, (s32)src_w, (s32)src_h }, 1, aspect, typeless_aspect, transfer_flags, 0xFF);
|
||||
|
||||
//2. Blit typeless surface to self
|
||||
copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
|
||||
0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST);
|
||||
|
||||
//3. Copy back the aspect bits
|
||||
copy_image(cmd, typeless, dst, VK_IMAGE_LAYOUT_GENERAL, preferred_dst_format,
|
||||
{0, (s32)src_h, (s32)dst_w, s32(src_h + dst_h) }, dst_rect, 1, typeless_aspect, aspect, 0xFF, transfer_flags);
|
||||
};
|
||||
|
||||
areai src_rect = { (s32)src_x_offset, (s32)src_y_offset, s32(src_x_offset + src_width), s32(src_y_offset + src_height) };
|
||||
areai dst_rect = { (s32)dst_x_offset, (s32)dst_y_offset, s32(dst_x_offset + dst_width), s32(dst_y_offset + dst_height) };
|
||||
|
||||
switch (src_format)
|
||||
{
|
||||
case VK_FORMAT_D16_UNORM:
|
||||
{
|
||||
auto typeless = vk::get_typeless_helper(VK_FORMAT_R16_UNORM);
|
||||
change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL);
|
||||
stretch_image_typeless(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT);
|
||||
break;
|
||||
}
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
{
|
||||
auto typeless = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM);
|
||||
change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL);
|
||||
stretch_image_typeless(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
break;
|
||||
}
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
{
|
||||
// NOTE: Typeless transfer (Depth/Stencil->Equivalent Color->Depth/Stencil) of single aspects does not work on AMD when done from a non-depth texture
|
||||
// Since the typeless transfer itself violates spec, the only way to make it work is to use a D32S8 intermediate
|
||||
// Copy from src->intermediate then intermediate->dst for each aspect separately
|
||||
|
||||
auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_R32_SFLOAT);
|
||||
auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UINT);
|
||||
change_image_layout(cmd, typeless_depth, VK_IMAGE_LAYOUT_GENERAL);
|
||||
change_image_layout(cmd, typeless_stencil, VK_IMAGE_LAYOUT_GENERAL);
|
||||
|
||||
auto intermediate = vk::get_typeless_helper(VK_FORMAT_D32_SFLOAT_S8_UINT);
|
||||
change_image_layout(cmd, intermediate, preferred_dst_format);
|
||||
|
||||
const areai intermediate_rect = { 0, 0, (s32)dst_width, (s32)dst_height };
|
||||
const VkImageAspectFlags depth_stencil = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
// Blit DEPTH aspect
|
||||
stretch_image_typeless(src, intermediate->value, typeless_depth->value, src_rect, intermediate_rect, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT);
|
||||
copy_image(cmd, intermediate->value, dst, preferred_dst_format, preferred_dst_format, intermediate_rect, dst_rect, 1, depth_stencil, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT, VK_IMAGE_ASPECT_DEPTH_BIT);
|
||||
|
||||
// Blit STENCIL aspect
|
||||
stretch_image_typeless(src, intermediate->value, typeless_stencil->value, src_rect, intermediate_rect, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
copy_image(cmd, intermediate->value, dst, preferred_dst_format, preferred_dst_format, intermediate_rect, dst_rect, 1, depth_stencil, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT, VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
VkImageBlit rgn = {};
|
||||
rgn.srcOffsets[0] = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
|
||||
rgn.srcOffsets[1] = { (int32_t)(src_width + src_x_offset), (int32_t)(src_height + src_y_offset), 1 };
|
||||
@ -136,29 +253,18 @@ namespace vk
|
||||
|
||||
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
|
||||
{
|
||||
vkCmdBlitImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn, VK_FILTER_LINEAR);
|
||||
vkCmdBlitImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, &rgn, filter);
|
||||
|
||||
rgn.srcSubresource.mipLevel++;
|
||||
rgn.dstSubresource.mipLevel++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
VkImageCopy copy_rgn;
|
||||
copy_rgn.srcOffset = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
|
||||
copy_rgn.dstOffset = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 };
|
||||
copy_rgn.dstSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
|
||||
copy_rgn.srcSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
|
||||
copy_rgn.extent = { src_width, src_height, 1 };
|
||||
|
||||
vkCmdCopyImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_rgn);
|
||||
}
|
||||
if (srcLayout != preferred_src_format)
|
||||
change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
|
||||
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
|
||||
change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
|
||||
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
|
||||
change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
if (dstLayout != preferred_dst_format)
|
||||
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
|
||||
}
|
||||
|
||||
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
|
||||
|
@ -1039,18 +1039,6 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
struct vk_blit_op_result : public blit_op_result
|
||||
{
|
||||
bool deferred = false;
|
||||
vk::image *src_image = nullptr;
|
||||
vk::image *dst_image = nullptr;
|
||||
vk::image_view *src_view = nullptr;
|
||||
|
||||
using blit_op_result::blit_op_result;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap)
|
||||
@ -1133,14 +1121,14 @@ namespace vk
|
||||
return upload_texture(cmd, tex, m_rtts, cmd, const_cast<const VkQueue>(m_submit_queue));
|
||||
}
|
||||
|
||||
vk::image *upload_image_simple(vk::command_buffer& /*cmd*/, u32 address, u32 width, u32 height)
|
||||
vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height)
|
||||
{
|
||||
//Uploads a linear memory range as a BGRA8 texture
|
||||
auto image = std::make_unique<vk::image>(*m_device, m_memory_types.host_visible_coherent,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VK_IMAGE_TYPE_2D,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
width, height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
width, height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_PREINITIALIZED,
|
||||
VK_IMAGE_TILING_LINEAR, VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 0);
|
||||
|
||||
VkImageSubresource subresource{};
|
||||
@ -1170,6 +1158,8 @@ namespace vk
|
||||
|
||||
image->memory->unmap();
|
||||
|
||||
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
|
||||
auto result = image.get();
|
||||
const u32 resource_memory = width * height * 4; //Rough approximate
|
||||
m_discardable_storage.push_back(image);
|
||||
@ -1179,22 +1169,16 @@ namespace vk
|
||||
return result;
|
||||
}
|
||||
|
||||
vk_blit_op_result blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
|
||||
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
|
||||
{
|
||||
struct blit_helper
|
||||
{
|
||||
vk::command_buffer* commands;
|
||||
VkFormat format;
|
||||
blit_helper(vk::command_buffer *c) : commands(c) {}
|
||||
|
||||
bool deferred = false;
|
||||
vk::image* deferred_op_src = nullptr;
|
||||
vk::image* deferred_op_dst = nullptr;
|
||||
|
||||
void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool /*interpolate*/, bool is_depth, const rsx::typeless_xfer& /*typeless*/)
|
||||
void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& /*typeless*/)
|
||||
{
|
||||
VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
if (is_depth) aspect = (VkImageAspectFlagBits)(src->info.format == VK_FORMAT_D16_UNORM ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
|
||||
//Checks
|
||||
if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1)
|
||||
{
|
||||
@ -1214,54 +1198,35 @@ namespace vk
|
||||
return;
|
||||
}
|
||||
|
||||
const auto aspect = vk::get_aspect_flags(src->info.format);
|
||||
const auto src_width = src_area.x2 - src_area.x1;
|
||||
const auto src_height = src_area.y2 - src_area.y1;
|
||||
const auto dst_width = dst_area.x2 - dst_area.x1;
|
||||
const auto dst_height = dst_area.y2 - dst_area.y1;
|
||||
|
||||
deferred_op_src = src;
|
||||
deferred_op_dst = dst;
|
||||
|
||||
if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
|
||||
{
|
||||
if (src_width != dst_width || src_height != dst_height || src->info.format != dst->info.format)
|
||||
{
|
||||
//Scaled depth scaling
|
||||
deferred = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!deferred)
|
||||
{
|
||||
copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
|
||||
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format);
|
||||
}
|
||||
copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
|
||||
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format,
|
||||
interpolate? VK_FILTER_LINEAR : VK_FILTER_NEAREST, src->info.format, dst->info.format);
|
||||
|
||||
change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, {(VkImageAspectFlags)aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers});
|
||||
format = dst->info.format;
|
||||
}
|
||||
}
|
||||
helper(&cmd);
|
||||
|
||||
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, const_cast<const VkQueue>(m_submit_queue));
|
||||
|
||||
vk_blit_op_result result = reply.succeeded;
|
||||
result.real_dst_address = reply.real_dst_address;
|
||||
result.real_dst_size = reply.real_dst_size;
|
||||
result.is_depth = reply.is_depth;
|
||||
result.deferred = helper.deferred;
|
||||
result.dst_image = helper.deferred_op_dst;
|
||||
result.src_image = helper.deferred_op_src;
|
||||
if (reply.succeeded)
|
||||
{
|
||||
if (reply.real_dst_size)
|
||||
{
|
||||
flush_if_cache_miss_likely(helper.format, reply.real_dst_address, reply.real_dst_size, cmd, m_submit_queue);
|
||||
}
|
||||
|
||||
if (!helper.deferred)
|
||||
return result;
|
||||
return true;
|
||||
}
|
||||
|
||||
VkImageSubresourceRange view_range = { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 };
|
||||
auto tmp_view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), helper.deferred_op_src->value, VK_IMAGE_VIEW_TYPE_2D,
|
||||
helper.deferred_op_src->info.format, helper.deferred_op_src->native_component_map, view_range);
|
||||
|
||||
result.src_view = tmp_view.get();
|
||||
m_discardable_storage.push_back(tmp_view);
|
||||
return result;
|
||||
return false;
|
||||
}
|
||||
|
||||
const u32 get_unreleased_textures_count() const override
|
||||
|
@ -502,7 +502,7 @@ namespace rsx
|
||||
entries.push_back(tmp);
|
||||
}
|
||||
|
||||
if ((entry_count = entries.size()) <= 2)
|
||||
if ((entry_count = (u32)entries.size()) <= 2)
|
||||
return;
|
||||
|
||||
root.rewind();
|
||||
|
Loading…
x
Reference in New Issue
Block a user