rsx/vk: Bug fixes

- Make each frame context own its own memory
- Fix GPU blit
- Fix image layout transitions in flip

vk: Improve frame-local memory usage tracking to prevent overwrites
- Also slightly bumps VRAM requirements for stream buffers to help with running out of storage
- Fixes flickering and missing graphics in some cases. Flickering is still there and needs more work
vk: Up vertex attribute heap size and increase the guard size on it
vulkan: Reorganize memory management
vulkan: blit cleanup
vulkan: blit engine improvements
- Override existing image mapping when conflicts detected
- Allow blitting of depth/stencil surfaces
This commit is contained in:
kd-11 2017-08-27 16:22:59 +03:00
parent 2033f3f7dc
commit deb590cb05
9 changed files with 297 additions and 97 deletions

View File

@ -59,10 +59,10 @@ struct render_target_traits
{
//TODO
auto desc = surface->GetDesc();
info->rsx_pitch = desc.Width;
info->native_pitch = desc.Width;
info->surface_width = desc.Width;
info->surface_height = desc.Height;
info->rsx_pitch = static_cast<u16>(desc.Width);
info->native_pitch = static_cast<u16>(desc.Width);
info->surface_width = static_cast<u32>(desc.Width);
info->surface_height = static_cast<u32>(desc.Height);
info->bpp = 1;
}

View File

@ -1290,7 +1290,7 @@ namespace gl
//These textures are completely GPU resident so we dont watch for CPU access
//There's no data to be fetched from the CPU
//Its is possible for a title to attempt to read from the region, but the CPU path should be used in such cases
cached.protect(utils::protection::rw);
cached.protect(utils::protection::ro);
cached.set_dirty(false);
return true;

View File

@ -568,21 +568,8 @@ VKGSRender::VKGSRender() : GSRender()
m_secondary_command_buffer_pool.create((*m_device));
m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
//VRAM allocation
m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000);
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000);
m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000);
m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
//Empty view to bind to buffer locations without data
m_null_buffer_view.reset(new vk::buffer_view(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
//Precalculated stuff
m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats);
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
//Generate frame contexts
@ -595,15 +582,24 @@ VKGSRender::VKGSRender() : GSRender()
VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
//VRAM allocation
m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, 0x400000);
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000);
m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000);
m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
for (auto &ctx : frame_context_storage)
{
ctx = {};
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_semaphore);
ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast<uint32_t>(sizes.size()));
}
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32);
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32);
vk::initialize_compiler_context();
@ -664,8 +660,7 @@ VKGSRender::~VKGSRender()
//Global resources
vk::destroy_global_resources();
//Data heaps/buffers
m_null_buffer_view.reset();
//Heaps
m_index_buffer_ring_info.heap.reset();
m_uniform_buffer_ring_info.heap.reset();
m_attrib_ring_info.heap.reset();
@ -677,6 +672,15 @@ VKGSRender::~VKGSRender()
//Frame context
m_framebuffers_to_clean.clear();
if (m_current_frame == &m_aux_frame_context)
{
//Return resources back to the owner
m_current_frame = &frame_context_storage[m_current_queue_index];
m_current_frame->swap_storage(m_aux_frame_context);
m_current_frame->grab_resources(m_aux_frame_context);
}
m_aux_frame_context.buffer_views_to_clean.clear();
m_aux_frame_context.samplers_to_clean.clear();
@ -858,11 +862,6 @@ void VKGSRender::begin()
CHECK_RESULT(vkResetDescriptorPool(*m_device, m_current_frame->descriptor_pool, 0));
m_current_frame->used_descriptors = 0;
m_uniform_buffer_ring_info.reset_allocation_stats();
m_index_buffer_ring_info.reset_allocation_stats();
m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats();
}
if (m_attrib_ring_info.is_critical() ||
@ -875,6 +874,12 @@ void VKGSRender::begin()
flush_command_queue(true);
m_vertex_cache->purge();
m_index_buffer_ring_info.reset_allocation_stats();
m_uniform_buffer_ring_info.reset_allocation_stats();
m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats();
m_current_frame->reset_heap_ptrs();
std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
}
@ -1031,7 +1036,8 @@ void VKGSRender::end()
continue;
}
vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts, m_memory_type_mapping, m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts, m_memory_type_mapping,
m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
if (!texture0)
{
@ -1085,7 +1091,8 @@ void VKGSRender::end()
continue;
}
vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts, m_memory_type_mapping, m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts, m_memory_type_mapping,
m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
if (!texture0)
{
@ -1502,6 +1509,10 @@ void VKGSRender::advance_queued_frames()
});
m_vertex_cache->purge();
m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(),
m_uniform_buffer_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one());
m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES;
m_current_frame = &frame_context_storage[m_current_queue_index];
@ -1574,6 +1585,17 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
ctx->buffer_views_to_clean.clear();
ctx->samplers_to_clean.clear();
if (ctx->last_frame_sync_time > m_last_heap_sync_time)
{
m_last_heap_sync_time = ctx->last_frame_sync_time;
//Heap cleanup; deallocates memory consumed by the frame if it is still held
m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr;
m_uniform_buffer_ring_info.m_get_pos = ctx->ubo_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
}
}
ctx->swap_command_buffer = nullptr;
@ -1629,11 +1651,17 @@ bool VKGSRender::check_program_status()
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
{
vk::render_target *surface = nullptr;
if (!is_depth)
surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
else
{
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
if (!surface && m_texture_cache.is_depth_texture(texaddr, m_rtts))
return std::make_tuple(true, 0);
}
if (!surface) return std::make_tuple(false, 0);
return std::make_tuple(true, surface->native_pitch);
@ -2366,7 +2394,7 @@ void VKGSRender::flip(int buffer)
barrier.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
barrier.image = target_image;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = subres;
@ -2496,11 +2524,6 @@ void VKGSRender::flip(int buffer)
std::chrono::time_point<steady_clock> flip_end = steady_clock::now();
m_flip_time = std::chrono::duration_cast<std::chrono::microseconds>(flip_end - flip_start).count();
m_uniform_buffer_ring_info.reset_allocation_stats();
m_index_buffer_ring_info.reset_allocation_stats();
m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats();
//NOTE:Resource destruction is handled within the real swap handler
m_frame->flip(m_context);
@ -2526,6 +2549,8 @@ void VKGSRender::flip(int buffer)
bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
{
close_render_pass();
return m_texture_cache.upload_scaled_image(src, dst, interpolate, (*m_device), *m_current_command_buffer, m_memory_type_mapping,
m_swap_chain->get_present_queue(), m_rtts, m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get());
}

View File

@ -25,11 +25,11 @@ namespace vk
using shader_cache = rsx::shaders_cache<vk::pipeline_props, VKProgramBuffer>;
}
//Heap allocation sizes in MB
//Heap allocation sizes in MB - each 'frame' owns a private heap, one of each kind
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 64
#define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 128
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 64
#define VK_MAX_ASYNC_CB_COUNT 64
#define VK_MAX_ASYNC_FRAMES 2
@ -117,8 +117,6 @@ private:
vk::glsl::program *m_program;
vk::context m_thread_context;
vk::vk_data_heap m_attrib_ring_info;
vk::texture_cache m_texture_cache;
rsx::vk_render_targets m_rtts;
@ -141,12 +139,6 @@ private:
vk::render_device *m_device;
vk::swap_chain* m_swap_chain;
//buffer
vk::vk_data_heap m_uniform_buffer_ring_info;
vk::vk_data_heap m_index_buffer_ring_info;
vk::vk_data_heap m_texture_upload_buffer_ring_info;
std::unique_ptr<vk::buffer_view> m_null_buffer_view;
//Vulkan internals
vk::command_pool m_command_buffer_pool;
@ -165,6 +157,12 @@ private:
std::unique_ptr<vk::framebuffer_holder> m_draw_fbo;
u64 m_last_heap_sync_time = 0;
vk::vk_data_heap m_attrib_ring_info;
vk::vk_data_heap m_uniform_buffer_ring_info;
vk::vk_data_heap m_index_buffer_ring_info;
vk::vk_data_heap m_texture_upload_buffer_ring_info;
struct frame_context_t
{
VkSemaphore present_semaphore = VK_NULL_HANDLE;
@ -178,6 +176,14 @@ private:
u32 present_image = UINT32_MAX;
command_buffer_chunk* swap_command_buffer = nullptr;
//Heap pointers
s64 attrib_heap_ptr = 0;
s64 ubo_heap_ptr = 0;
s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0;
u64 last_frame_sync_time = 0;
//Copy shareable information
void grab_resources(frame_context_t &other)
{
@ -185,6 +191,11 @@ private:
descriptor_set = other.descriptor_set;
descriptor_pool = other.descriptor_pool;
used_descriptors = other.used_descriptors;
attrib_heap_ptr = other.attrib_heap_ptr;
ubo_heap_ptr = other.attrib_heap_ptr;
index_heap_ptr = other.attrib_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
}
//Exchange storage (non-copyable)
@ -193,6 +204,21 @@ private:
std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
std::swap(samplers_to_clean, other.samplers_to_clean);
}
void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc)
{
attrib_heap_ptr = attrib_loc;
ubo_heap_ptr = ubo_loc;
index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc;
last_frame_sync_time = get_system_time();
}
void reset_heap_ptrs()
{
last_frame_sync_time = 0;
}
};
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;

View File

@ -233,7 +233,7 @@ namespace vk
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_MEMORY_READ_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
@ -258,7 +258,7 @@ namespace vk
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_MEMORY_READ_BIT;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:

View File

@ -458,6 +458,12 @@ namespace vk
CHECK_RESULT(vkCreateImageView(m_device, &info, nullptr, &value));
}
image_view(VkDevice dev, VkImageViewCreateInfo create_info)
: m_device(dev), info(create_info)
{
CHECK_RESULT(vkCreateImageView(m_device, &info, nullptr, &value));
}
~image_view()
{
vkDestroyImageView(m_device, value, nullptr);

View File

@ -97,27 +97,47 @@ namespace vk
a_dst = a_src;
//TODO: Use an array of offsets/dimensions for mipmapped blits (mipmap count > 1) since subimages will have different dimensions
VkImageBlit rgn = {};
rgn.srcOffsets[0] = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
rgn.srcOffsets[1] = { (int32_t)src_width, (int32_t)src_height, 1 };
rgn.dstOffsets[0] = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 };
rgn.dstOffsets[1] = { (int32_t)(dst_width + dst_x_offset), (int32_t)(dst_height + dst_y_offset), 1 };
rgn.dstSubresource = a_dst;
rgn.srcSubresource = a_src;
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
if (src_width != dst_width || src_height != dst_height || mipmaps > 1)
{
vkCmdBlitImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn, VK_FILTER_LINEAR);
if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0)
{
//Most depth/stencil formats cannot be scaled using hw blit
LOG_ERROR(RSX, "Cannot perform scaled blit for depth/stencil images");
return;
}
rgn.srcSubresource.mipLevel++;
rgn.dstSubresource.mipLevel++;
VkImageBlit rgn = {};
rgn.srcOffsets[0] = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
rgn.srcOffsets[1] = { (int32_t)(src_width + src_x_offset), (int32_t)(src_height + src_y_offset), 1 };
rgn.dstOffsets[0] = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 };
rgn.dstOffsets[1] = { (int32_t)(dst_width + dst_x_offset), (int32_t)(dst_height + dst_y_offset), 1 };
rgn.dstSubresource = a_dst;
rgn.srcSubresource = a_src;
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
{
vkCmdBlitImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn, VK_FILTER_LINEAR);
rgn.srcSubresource.mipLevel++;
rgn.dstSubresource.mipLevel++;
}
}
else
{
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
copy_rgn.dstOffset = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 };
copy_rgn.dstSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
copy_rgn.extent = { src_width, src_height, 1 };
vkCmdCopyImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_rgn);
}
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)

View File

@ -607,6 +607,40 @@ namespace vk
purge_cache();
}
bool is_depth_texture(const u32 texaddr, rsx::vk_render_targets &m_rtts)
{
if (m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
return true;
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(texaddr);
if (found == m_cache.end())
return false;
if (found->second.valid_count == 0)
return false;
for (auto& tex : found->second.data)
{
if (tex.is_dirty())
continue;
switch (tex.get_format())
{
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
case VK_FORMAT_D24_UNORM_S8_UINT:
return true;
default:
return false;
}
}
//Unreachable; silence compiler warning anyway
return false;
}
template <typename RsxTextureType>
vk::image_view* upload_texture(command_buffer &cmd, RsxTextureType &tex, rsx::vk_render_targets &m_rtts, const vk::memory_type_mapping &memory_type_mapping, vk_data_heap& upload_heap, vk::buffer* upload_buffer)
{
@ -652,12 +686,6 @@ namespace vk
return rtt_texture->get_view();
}
u32 raw_format = tex.format();
u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
VkComponentMapping mapping = get_component_map(tex, format);
VkFormat vk_format = get_compatible_sampler_format(format);
VkImageType image_type;
VkImageViewType image_view_type;
u16 height = 0;
@ -696,7 +724,8 @@ namespace vk
break;
}
cached_texture_section& region = find_cached_texture(texaddr, range, true, tex.width(), height, tex.get_exact_mipmap_count());
//Ignoring the mipmaps count is intentional - its common for games to pass in incorrect values as mipmap count
cached_texture_section& region = find_cached_texture(texaddr, range, true, tex.width(), height, 0);
if (region.exists() && !region.is_dirty())
{
return region.get_view().get();
@ -712,6 +741,12 @@ namespace vk
return nullptr;
}
u32 raw_format = tex.format();
u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
VkComponentMapping mapping = get_component_map(tex, format);
VkFormat vk_format = get_compatible_sampler_format(format);
vk::image *image = new vk::image(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type,
vk_format,
@ -1104,16 +1139,20 @@ namespace vk
bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8);
bool src_is_argb8 = (src.format == rsx::blit_engine::transfer_source_format::a8r8g8b8);
VkFormat src_vk_format = src_is_argb8 ? VK_FORMAT_B8G8R8A8_UNORM : VK_FORMAT_R5G6B5_UNORM_PACK16;
const VkComponentMapping rgba_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
const VkComponentMapping bgra_map = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A };
auto dest_mapping = (!dst_is_argb8 || dst.swizzled) ? bgra_map : rgba_map;
vk::image* vram_texture = nullptr;
vk::image* dest_texture = nullptr;
cached_texture_section* cached_dest = nullptr;
const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0));
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0));
//Check if src/dst are parts of render targets
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, true);
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false);
dst_is_render_target = dst_subres.surface != nullptr;
u16 max_dst_width = dst.width;
@ -1165,7 +1204,7 @@ namespace vk
{
//First check if this surface exists in VRAM with exact dimensions
//Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible
auto cached_dest = find_texture_from_dimensions(dst.rsx_address, dst.pitch * dst.clip_height, dst_dimensions.width, dst_dimensions.height);
cached_dest = find_texture_from_dimensions(dst.rsx_address, dst.pitch * dst.clip_height, dst_dimensions.width, dst_dimensions.height);
//Check for any available region that will fit this one
if (!cached_dest) cached_dest = find_texture_from_range(dst.rsx_address, dst.pitch * dst.clip_height);
@ -1187,6 +1226,21 @@ namespace vk
max_dst_width = cached_dest->get_width();
max_dst_height = cached_dest->get_height();
//If dest has a component swizzle (usually caused by ARGB->BGRA compatibility when uploading from cpu) remove it
auto& image_view = cached_dest->get_view();
if (image_view->info.components.a != dest_mapping.a ||
image_view->info.components.r != dest_mapping.r ||
image_view->info.components.g != dest_mapping.g ||
image_view->info.components.b != dest_mapping.b)
{
auto create_info = image_view->info;
create_info.components = dest_mapping;
m_temporary_image_view.push_back(std::move(image_view));
image_view.reset(new vk::image_view(dev, create_info));
}
}
else if (is_memcpy)
{
@ -1220,7 +1274,7 @@ namespace vk
}
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, true);
auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, false);
src_is_render_target = src_subres.surface != nullptr;
//Create source texture if does not exist
@ -1235,7 +1289,9 @@ namespace vk
else
{
flush_address(src_address, dev, cmd, memory_types, submit_queue);
writer_lock lock(m_cache_mutex);
const VkFormat src_vk_format = src_is_argb8 ? VK_FORMAT_R8G8B8A8_UNORM : VK_FORMAT_R5G6B5_UNORM_PACK16;
const VkComponentMapping component_mapping = (!src_is_argb8 || dst.swizzled) ? bgra_map : rgba_map;
//Upload texture from CPU
vk::image *image = new vk::image(*vk::get_current_renderer(), memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
@ -1245,12 +1301,15 @@ namespace vk
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0);
vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), image->value, VK_IMAGE_VIEW_TYPE_2D, src_vk_format,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A },
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
component_mapping, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
cached_texture_section& region = find_cached_texture(dst.rsx_address, src.pitch * src.slice_h, true, src.width, src.slice_h, 1);
region.reset(src.rsx_address, src.pitch * src.slice_h);
region.create(src.width, src.slice_h, 1, 1, view, dest_texture);
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
cached_texture_section& region = find_cached_texture(src_address, src.pitch * src.slice_h, true, src.width, src.slice_h, 1);
writer_lock lock(m_cache_mutex);
region.reset(src_address, src.pitch * src.slice_h);
region.create(src.width, src.slice_h, 1, 1, view, image);
region.protect(utils::protection::ro);
region.set_dirty(false);
@ -1262,14 +1321,16 @@ namespace vk
auto &subres = layout.back();
subres.width_in_block = src.width;
subres.height_in_block = src.slice_h;
subres.pitch_in_bytes = src.pitch;
subres.pitch_in_bytes = src.width; //Seems to be a typo - should be pitch_in_block
subres.depth = 1;
subres.data = {(const gsl::byte*)src.pixels, src.pitch * src.slice_h};
subres.data = {(const gsl::byte*)src.pixels, align(src.pitch, 256) * src.slice_h};
copy_mipmaped_image_using_buffer(cmd, image->value, layout, src_vk_format, false, 1,
upload_heap, upload_buffer);
copy_mipmaped_image_using_buffer(cmd, image->value, layout, src_is_argb8? CELL_GCM_TEXTURE_A8R8G8B8: CELL_GCM_TEXTURE_R5G6B5,
false, 1, upload_heap, upload_buffer);
vk::leave_uninterruptible();
vram_texture = image;
}
}
else
@ -1294,6 +1355,54 @@ namespace vk
vram_texture = src_subres.surface;
}
VkImageAspectFlags aspect_to_copy = VK_IMAGE_ASPECT_COLOR_BIT;
bool dest_exists = dest_texture != nullptr;
VkFormat dst_vk_format = dst_is_argb8 ? VK_FORMAT_B8G8R8A8_UNORM : VK_FORMAT_R5G6B5_UNORM_PACK16;
const u8 bpp = dst_is_argb8 ? 4 : 2;
const u32 real_width = dst.pitch / bpp;
//If src is depth, dest has to be depth as well
if (src_subres.is_depth_surface)
{
if (dest_exists)
{
if (dst_is_render_target && !dst_subres.is_depth_surface)
{
LOG_ERROR(RSX, "Depth->RGBA blit requested but not supported");
return true;
}
if (!dst_is_render_target)
{
if (dest_texture->info.format != src_subres.surface->info.format)
{
cached_dest->unprotect();
cached_dest->set_dirty(true);
dest_exists = false;
cached_dest = nullptr;
}
}
else
{
if (dst_subres.surface->info.format != src_subres.surface->info.format)
{
LOG_ERROR(RSX, "Depth blit requested, but formats do not match (0x%X vs 0x%X)",
(u32)dst_subres.surface->info.format, (u32)src_subres.surface->info.format);
return true;
}
}
}
dst_vk_format = src_subres.surface->info.format;
dest_mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R };
if (dst_vk_format == VK_FORMAT_D16_UNORM)
aspect_to_copy = VK_IMAGE_ASPECT_DEPTH_BIT;
else
aspect_to_copy = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
//Validate clip offsets (Persona 4 Arena at 720p)
//Check if can fit
//NOTE: It is possible that the check is simpler (if (clip_x >= clip_width))
@ -1313,11 +1422,6 @@ namespace vk
src_area.y2 += scaled_clip_offset_y;
}
bool dest_exists = dest_texture != nullptr;
const VkFormat dst_vk_format = dst_is_argb8 ? VK_FORMAT_R8G8B8A8_UNORM : VK_FORMAT_R5G6B5_UNORM_PACK16;
const u8 bpp = dst_is_argb8 ? 4 : 2;
const u32 real_width = dst.pitch / bpp;
if (!dest_exists)
{
dest_texture = new vk::image(*vk::get_current_renderer(), memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
@ -1325,15 +1429,32 @@ namespace vk
dst_vk_format,
real_width, dst.clip_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0);
change_image_layout(cmd, dest_texture, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_to_copy, 0, 1, 0, 1});
}
//Copy data
u32 src_width = src_area.x2 - src_area.x1;
u32 src_height = src_area.y2 - src_area.y1;
u32 dst_width = dst_area.x2 - dst_area.x1;
u32 dst_height = dst_area.y2 - dst_area.y1;
if (dst.clip_width != dst_width ||
dst.clip_height != dst_height)
{
//clip reproject
src_width = (src_width * dst.clip_width) / dst_width;
src_height = (src_height * dst.clip_height) / dst_height;
}
copy_scaled_image(cmd, vram_texture->value, dest_texture->value, vram_texture->current_layout, dest_texture->current_layout,
src_area.x1, src_area.y1, src_w, src_h, dst_area.x1, dst_area.y1, dst.clip_width, dst.clip_height, 1, VK_IMAGE_ASPECT_COLOR_BIT);
src_area.x1, src_area.y1, src_width, src_height, dst_offset.x, dst_offset.y, dst.clip_width, dst.clip_height, 1, (VkImageAspectFlagBits)aspect_to_copy);
if (dest_exists)
return true;
change_image_layout(cmd, dest_texture, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, { aspect_to_copy, 0, 1, 0, 1 });
//TODO: Verify if any titles ever scale into CPU memory. It defeats the purpose of uploading data to the GPU, but it could happen
//If so, add this texture to the no_access queue not the read_only queue
cached_texture_section& region = find_cached_texture(dst.rsx_address, dst.pitch * dst.clip_height, true, real_width, dst.clip_height, 1);
@ -1344,12 +1465,11 @@ namespace vk
//Its is possible for a title to attempt to read from the region, but the CPU path should be used in such cases
vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), dest_texture->value, VK_IMAGE_VIEW_TYPE_2D, dst_vk_format,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A },
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
dest_mapping, { aspect_to_copy & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 });
region.reset(dst.rsx_address, dst.pitch * dst.clip_height);
region.create(real_width, dst.clip_height, 1, 1, view, dest_texture);
region.protect(utils::protection::rw);
region.protect(utils::protection::ro);
region.set_dirty(false);
read_only_range = region.get_min_max(read_only_range);

View File

@ -230,7 +230,8 @@ VKGSRender::upload_vertex_data()
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
{
in_cache = true;
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, cached->offset_in_heap, required.first));
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device,
m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, cached->offset_in_heap, required.first));
}
else
{
@ -241,7 +242,8 @@ VKGSRender::upload_vertex_data()
if (!in_cache)
{
persistent_offset = (u32)m_attrib_ring_info.alloc<256>(required.first);
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_offset, required.first));
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device,
m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_offset, required.first));
if (to_store)
{
@ -254,19 +256,20 @@ VKGSRender::upload_vertex_data()
}
else
{
persistent_view = m_null_buffer_view->value;
persistent_view = null_buffer_view->value;
}
if (required.second > 0)
{
volatile_offset = (u32)m_attrib_ring_info.alloc<256>(required.second);
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second));
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device,
m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second));
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
}
else
{
volatile_view = m_null_buffer_view->value;
volatile_view = null_buffer_view->value;
}
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);