vk: Buffer sync timing tweaks

vulkan: more sync timing fixes
This commit is contained in:
kd-11 2017-04-23 15:00:38 +03:00
parent e1a75deb25
commit 3b27b3c182
2 changed files with 73 additions and 37 deletions

View File

@ -572,6 +572,8 @@ VKGSRender::~VKGSRender()
return;
}
m_current_command_buffer->reset();
//Wait for queue
vkQueueWaitIdle(m_swap_chain->get_present_queue());
@ -642,29 +644,46 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
return m_texture_cache.invalidate_address(address);
else
{
if (!m_texture_cache.address_is_flushable(address))
bool flushable, synchronized;
std::tie(flushable, synchronized) = m_texture_cache.address_is_flushable(address);
if (!flushable)
return false;
if (m_last_flushable_cb >= 0)
if (synchronized)
{
if (m_primary_cb_list[m_last_flushable_cb].pending)
m_primary_cb_list[m_last_flushable_cb].wait();
if (m_last_flushable_cb >= 0)
{
if (m_primary_cb_list[m_last_flushable_cb].pending)
m_primary_cb_list[m_last_flushable_cb].wait();
}
m_last_flushable_cb = -1;
}
if (std::this_thread::get_id() != rsx_thread)
else
{
//TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise
m_flush_commands = true;
m_queued_threads++;
//This region is buffered, but no previous sync point has been put in place to start sync efforts
//Just stall and get what we have at this point
if (std::this_thread::get_id() != rsx_thread)
{
//TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise
m_flush_commands = true;
m_queued_threads++;
//This is awful!
while (m_flush_commands);
//This is awful!
while (m_flush_commands);
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
m_queued_threads--;
return status;
m_queued_threads--;
return status;
}
else
{
//NOTE: If the rsx::thread is trampling its own data, we have an operation that should be moved to the GPU
flush_command_queue();
}
}
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
@ -721,7 +740,6 @@ void VKGSRender::begin()
std::chrono::time_point<steady_clock> stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
m_draw_calls++;
m_used_descriptors++;
}
@ -826,6 +844,13 @@ void VKGSRender::end()
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
//upload_vertex_data is a memory op and can trigger an access violation
//render passes are supposed to be uninterruptible, so we have to finish everything first before we start the render pass
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = current_render_pass;
@ -836,12 +861,6 @@ void VKGSRender::end()
rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height();
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
@ -867,6 +886,7 @@ void VKGSRender::end()
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
copy_render_targets_to_dma_location();
m_draw_calls++;
rsx::thread::end();
}
@ -928,6 +948,7 @@ void VKGSRender::clear_surface(u32 mask)
if (m_current_present_image == 0xFFFF) return;
init_buffers();
copy_render_targets_to_dma_location();
float depth_clear = 1.f;
u32 stencil_clear = 0;
@ -1070,15 +1091,19 @@ void VKGSRender::flush_command_queue(bool hard_sync)
if (hard_sync)
{
m_current_command_buffer->pending = true;
m_current_command_buffer->wait();
//swap handler checks the pending flag, so call it here
process_swap_request();
//wait for the latest intruction to execute
m_current_command_buffer->pending = true;
m_current_command_buffer->wait();
//Clear all command buffer statuses
for (auto &cb : m_primary_cb_list)
cb.poke();
m_last_flushable_cb = -1;
m_flush_commands = false;
}
else
{
@ -1135,8 +1160,6 @@ void VKGSRender::process_swap_request()
present.pImageIndices = &m_current_present_image;
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
}
else
fmt::throw_exception("How can a process be set without a pending flag?");
//Clean up all the resources from the last frame
@ -1162,14 +1185,11 @@ void VKGSRender::do_local_task()
{
if (m_flush_commands)
{
//WARNING: This is a hard sync, expect horrendous performance
//Need to process this a little better!
//TODO: Link cb with draw buffer and wait for that specific cb based on address
LOG_ERROR(RSX, "Hard sync point is to be processed. Performance warning");
flush_command_queue(true);
//TODO: Determine if a hard sync is necessary
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
flush_command_queue();
m_flush_commands = false;
m_flush_draw_buffers = false;
while (m_queued_threads);
}
}

View File

@ -21,6 +21,7 @@ namespace vk
//DMA relevant data
u16 native_pitch;
VkFence dma_fence = VK_NULL_HANDLE;
bool synchronized = false;
vk::render_device* m_device = nullptr;
vk::image *vram_texture = nullptr;
std::unique_ptr<vk::buffer> dma_buffer;
@ -52,6 +53,10 @@ namespace vk
//TODO: Properly compute these values
this->native_pitch = native_pitch;
pitch = cpu_address_range / height;
//Even if we are managing the same vram section, we cannot guarantee contents are static
//The create method is only invoked when a new mangaged session is required
synchronized = false;
}
void release_dma_resources()
@ -193,6 +198,8 @@ namespace vk
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
}
synchronized = true;
}
template<typename T>
@ -217,7 +224,7 @@ namespace vk
if (m_device == nullptr)
m_device = &dev;
if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr)
if (!synchronized)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
copy_texture(cmd, heap_index, submit_queue, true, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
@ -263,6 +270,11 @@ namespace vk
dma_buffer->unmap();
//Its highly likely that this surface will be reused, so we just leave resources in place
}
bool is_synchronized() const
{
return synchronized;
}
};
class texture_cache
@ -538,18 +550,22 @@ namespace vk
region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue);
}
bool address_is_flushable(u32 address)
std::tuple<bool, bool> address_is_flushable(u32 address)
{
if (address < texture_cache_range.first ||
address > texture_cache_range.second)
return std::make_tuple(false, false);
for (auto &tex : m_cache)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (tex.overlaps(address))
return true;
return std::make_tuple(true, tex.is_synchronized());
}
return false;
return std::make_tuple(false, false);
}
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)