vk: Reimplement events using synchronization2 extension

This commit is contained in:
kd-11 2023-06-22 00:19:03 +03:00 committed by kd-11
parent 00cca7be69
commit 850166eca1
6 changed files with 183 additions and 69 deletions

View File

@ -478,7 +478,7 @@ void VKGSRender::load_texture_env()
// Sync any async scheduler tasks // Sync any async scheduler tasks
if (auto ev = async_task_scheduler.get_primary_sync_label()) if (auto ev = async_task_scheduler.get_primary_sync_label())
{ {
ev->gpu_wait(*m_current_command_buffer); ev->gpu_wait(*m_current_command_buffer, { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR });
} }
} }
} }

View File

@ -191,9 +191,23 @@ namespace vk
src->pop_layout(cmd); src->pop_layout(cmd);
VkMemoryBarrier2KHR copy_memory_barrier = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR,
.pNext = nullptr,
.srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT_KHR,
.srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
.dstStageMask = VK_PIPELINE_STAGE_2_NONE_KHR,
.dstAccessMask = 0
};
// Create event object for this transfer and queue signal op // Create event object for this transfer and queue signal op
dma_fence = std::make_unique<vk::event>(*m_device, sync_domain::any); dma_fence = std::make_unique<vk::event>(*m_device, sync_domain::any);
dma_fence->signal(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); dma_fence->signal(cmd,
{
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.memoryBarrierCount = 1,
.pMemoryBarriers = &copy_memory_barrier
});
// Set cb flag for queued dma operations // Set cb flag for queued dma operations
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer); cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);

View File

@ -113,6 +113,7 @@ namespace vk
optional_features_support.conditional_rendering = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME); optional_features_support.conditional_rendering = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
optional_features_support.external_memory_host = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME); optional_features_support.external_memory_host = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
optional_features_support.sampler_mirror_clamped = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME); optional_features_support.sampler_mirror_clamped = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
optional_features_support.synchronization_2 = device_extensions.is_supported(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
optional_features_support.unrestricted_depth_range = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); optional_features_support.unrestricted_depth_range = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
optional_features_support.debug_utils = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); optional_features_support.debug_utils = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
@ -486,6 +487,11 @@ namespace vk
requested_extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); requested_extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
} }
if (pgpu->optional_features_support.synchronization_2)
{
requested_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE; enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE; enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE; enabled_features.independentBlend = VK_TRUE;
@ -670,6 +676,14 @@ namespace vk
device.pNext = &custom_border_color_features; device.pNext = &custom_border_color_features;
} }
VkPhysicalDeviceSynchronization2FeaturesKHR synchronization2_info{};
if (pgpu->optional_features_support.synchronization_2)
{
synchronization2_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES;
synchronization2_info.pNext = const_cast<void*>(device.pNext);
device.pNext = &synchronization2_info;
}
CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error); CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error);
// Initialize queues // Initialize queues
@ -695,6 +709,12 @@ namespace vk
_vkCmdInsertDebugUtilsLabelEXT = reinterpret_cast<PFN_vkCmdInsertDebugUtilsLabelEXT>(vkGetDeviceProcAddr(dev, "vkCmdInsertDebugUtilsLabelEXT")); _vkCmdInsertDebugUtilsLabelEXT = reinterpret_cast<PFN_vkCmdInsertDebugUtilsLabelEXT>(vkGetDeviceProcAddr(dev, "vkCmdInsertDebugUtilsLabelEXT"));
} }
if (pgpu->optional_features_support.synchronization_2)
{
_vkCmdSetEvent2KHR = reinterpret_cast<PFN_vkCmdSetEvent2KHR>(vkGetDeviceProcAddr(dev, "vkCmdSetEvent2KHR"));
_vkCmdWaitEvents2KHR = reinterpret_cast<PFN_vkCmdWaitEvents2KHR>(vkGetDeviceProcAddr(dev, "vkCmdWaitEvents2KHR"));
}
memory_map = vk::get_memory_mapping(pdev); memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev); m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev); m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);

View File

@ -79,6 +79,7 @@ namespace vk
bool sampler_mirror_clamped = false; bool sampler_mirror_clamped = false;
bool shader_stencil_export = false; bool shader_stencil_export = false;
bool surface_capabilities_2 = false; bool surface_capabilities_2 = false;
bool synchronization_2 = false;
bool unrestricted_depth_range = false; bool unrestricted_depth_range = false;
} optional_features_support; } optional_features_support;
@ -135,6 +136,8 @@ namespace vk
PFN_vkSetDebugUtilsObjectNameEXT _vkSetDebugUtilsObjectNameEXT = nullptr; PFN_vkSetDebugUtilsObjectNameEXT _vkSetDebugUtilsObjectNameEXT = nullptr;
PFN_vkQueueInsertDebugUtilsLabelEXT _vkQueueInsertDebugUtilsLabelEXT = nullptr; PFN_vkQueueInsertDebugUtilsLabelEXT _vkQueueInsertDebugUtilsLabelEXT = nullptr;
PFN_vkCmdInsertDebugUtilsLabelEXT _vkCmdInsertDebugUtilsLabelEXT = nullptr; PFN_vkCmdInsertDebugUtilsLabelEXT _vkCmdInsertDebugUtilsLabelEXT = nullptr;
PFN_vkCmdSetEvent2KHR _vkCmdSetEvent2KHR = nullptr;
PFN_vkCmdWaitEvents2KHR _vkCmdWaitEvents2KHR = nullptr;
public: public:
render_device() = default; render_device() = default;
@ -168,6 +171,7 @@ namespace vk
bool get_framebuffer_loops_support() const { return pgpu->optional_features_support.framebuffer_loops; } bool get_framebuffer_loops_support() const { return pgpu->optional_features_support.framebuffer_loops; }
bool get_barycoords_support() const { return pgpu->optional_features_support.barycentric_coords; } bool get_barycoords_support() const { return pgpu->optional_features_support.barycentric_coords; }
bool get_custom_border_color_support() const { return pgpu->optional_features_support.custom_border_color; } bool get_custom_border_color_support() const { return pgpu->optional_features_support.custom_border_color; }
bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; }
u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; } u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; }
u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; } u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; }

View File

@ -15,6 +15,106 @@
namespace vk namespace vk
{ {
// Util
namespace v1_utils
{
VkPipelineStageFlags gather_src_stages(const VkDependencyInfoKHR& dependency)
{
VkPipelineStageFlags stages = VK_PIPELINE_STAGE_NONE;
for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i)
{
stages |= dependency.pBufferMemoryBarriers[i].srcStageMask;
}
for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i)
{
stages |= dependency.pImageMemoryBarriers[i].srcStageMask;
}
for (u32 i = 0; i < dependency.memoryBarrierCount; ++i)
{
stages |= dependency.pMemoryBarriers[i].srcStageMask;
}
return stages;
}
VkPipelineStageFlags gather_dst_stages(const VkDependencyInfoKHR& dependency)
{
VkPipelineStageFlags stages = VK_PIPELINE_STAGE_NONE;
for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i)
{
stages |= dependency.pBufferMemoryBarriers[i].dstStageMask;
}
for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i)
{
stages |= dependency.pImageMemoryBarriers[i].dstStageMask;
}
for (u32 i = 0; i < dependency.memoryBarrierCount; ++i)
{
stages |= dependency.pMemoryBarriers[i].dstStageMask;
}
return stages;
}
auto get_memory_barriers(const VkDependencyInfoKHR& dependency)
{
std::vector<VkMemoryBarrier> result;
for (u32 i = 0; i < dependency.memoryBarrierCount; ++i)
{
result.emplace_back
(
VK_STRUCTURE_TYPE_MEMORY_BARRIER,
nullptr,
static_cast<VkAccessFlags>(dependency.pMemoryBarriers[i].srcAccessMask),
static_cast<VkAccessFlags>(dependency.pMemoryBarriers[i].dstAccessMask)
);
}
return result;
}
auto get_image_memory_barriers(const VkDependencyInfoKHR& dependency)
{
std::vector<VkImageMemoryBarrier> result;
for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i)
{
result.emplace_back
(
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
nullptr,
static_cast<VkAccessFlags>(dependency.pImageMemoryBarriers[i].srcAccessMask),
static_cast<VkAccessFlags>(dependency.pImageMemoryBarriers[i].dstAccessMask),
dependency.pImageMemoryBarriers[i].oldLayout,
dependency.pImageMemoryBarriers[i].newLayout,
dependency.pImageMemoryBarriers[i].srcQueueFamilyIndex,
dependency.pImageMemoryBarriers[i].dstQueueFamilyIndex,
dependency.pImageMemoryBarriers[i].image,
dependency.pImageMemoryBarriers[i].subresourceRange
);
}
return result;
}
auto get_buffer_memory_barriers(const VkDependencyInfoKHR& dependency)
{
std::vector<VkBufferMemoryBarrier> result;
for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i)
{
result.emplace_back
(
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
static_cast<VkAccessFlags>(dependency.pBufferMemoryBarriers[i].srcAccessMask),
static_cast<VkAccessFlags>(dependency.pBufferMemoryBarriers[i].dstAccessMask),
dependency.pBufferMemoryBarriers[i].srcQueueFamilyIndex,
dependency.pBufferMemoryBarriers[i].dstQueueFamilyIndex,
dependency.pBufferMemoryBarriers[i].buffer,
dependency.pBufferMemoryBarriers[i].offset,
dependency.pBufferMemoryBarriers[i].size
);
}
return result;
}
}
// Objects
fence::fence(VkDevice dev) fence::fence(VkDevice dev)
{ {
owner = dev; owner = dev;
@ -75,101 +175,78 @@ namespace vk
} }
event::event(const render_device& dev, sync_domain domain) event::event(const render_device& dev, sync_domain domain)
: m_device(dev) : m_device(&dev), v2(dev.get_synchronization2_support())
{ {
const auto vendor = dev.gpu().get_driver_vendor(); VkEventCreateInfo info
if (domain != sync_domain::gpu &&
(vendor == vk::driver_vendor::AMD || vendor == vk::driver_vendor::INTEL))
{ {
// Work around AMD and INTEL broken event signal synchronization scope .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
// Will be dropped after transitioning to VK1.3 .pNext = nullptr,
m_buffer = std::make_unique<buffer> .flags = 0
( };
dev, CHECK_RESULT(vkCreateEvent(dev, &info, nullptr, &m_vk_event));
4,
dev.get_memory_mapping().host_visible_coherent,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
0,
VMM_ALLOCATION_POOL_SYSTEM
);
m_value = reinterpret_cast<u32*>(m_buffer->map(0, 4));
*m_value = 0xCAFEBABE;
}
else
{
VkEventCreateInfo info
{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr,
.flags = 0
};
vkCreateEvent(dev, &info, nullptr, &m_vk_event);
}
} }
event::~event() event::~event()
{ {
if (m_vk_event) [[likely]] if (m_vk_event) [[likely]]
{ {
vkDestroyEvent(m_device, m_vk_event, nullptr); vkDestroyEvent(*m_device, m_vk_event, nullptr);
}
else
{
m_buffer->unmap();
m_buffer.reset();
m_value = nullptr;
} }
} }
void event::signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access) void event::signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency)
{ {
if (m_vk_event) [[likely]] if (v2) [[ likely ]]
{ {
vkCmdSetEvent(cmd, m_vk_event, stages); m_device->_vkCmdSetEvent2KHR(cmd, m_vk_event, &dependency);
} }
else else
{ {
insert_global_memory_barrier(cmd, stages, VK_PIPELINE_STAGE_TRANSFER_BIT, access, VK_ACCESS_TRANSFER_WRITE_BIT); // Legacy fallback. Should be practically unused with the exception of in-development drivers.
vkCmdFillBuffer(cmd, m_buffer->value, 0, 4, 0xDEADBEEF); const auto stages = v1_utils::gather_src_stages(dependency);
vkCmdSetEvent(cmd, m_vk_event, stages);
} }
} }
void event::host_signal() const void event::host_signal() const
{ {
ensure(m_vk_event); ensure(m_vk_event);
vkSetEvent(m_device, m_vk_event); vkSetEvent(*m_device, m_vk_event);
} }
void event::gpu_wait(const command_buffer& cmd) const void event::gpu_wait(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) const
{ {
ensure(m_vk_event); ensure(m_vk_event);
vkCmdWaitEvents(cmd, 1, &m_vk_event, 0, 0, 0, nullptr, 0, nullptr, 0, nullptr);
if (v2) [[ likely ]]
{
m_device->_vkCmdWaitEvents2KHR(cmd, 1, &m_vk_event, &dependency);
}
else
{
const auto src_stages = v1_utils::gather_src_stages(dependency);
const auto dst_stages = v1_utils::gather_dst_stages(dependency);
const auto memory_barriers = v1_utils::get_memory_barriers(dependency);
const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency);
const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency);
vkCmdWaitEvents(cmd,
1, &m_vk_event,
src_stages, dst_stages,
::size32(memory_barriers), memory_barriers.data(),
::size32(buffer_memory_barriers), buffer_memory_barriers.data(),
::size32(image_memory_barriers), image_memory_barriers.data());
}
} }
void event::reset() const void event::reset() const
{ {
if (m_vk_event) [[likely]] vkResetEvent(*m_device, m_vk_event);
{
vkResetEvent(m_device, m_vk_event);
}
else
{
*m_value = 0xCAFEBABE;
}
} }
VkResult event::status() const VkResult event::status() const
{ {
if (m_vk_event) [[likely]] return vkGetEventStatus(*m_device, m_vk_event);
{
return vkGetEventStatus(m_device, m_vk_event);
}
else
{
return (*m_value == 0xCAFEBABE) ? VK_EVENT_RESET : VK_EVENT_SET;
}
} }
gpu_debug_marker_pool::gpu_debug_marker_pool(const vk::render_device& dev, u32 count) gpu_debug_marker_pool::gpu_debug_marker_pool(const vk::render_device& dev, u32 count)

View File

@ -9,6 +9,7 @@
namespace vk namespace vk
{ {
class command_buffer; class command_buffer;
class image;
enum class sync_domain enum class sync_domain
{ {
@ -54,20 +55,18 @@ namespace vk
class event class event
{ {
VkDevice m_device = VK_NULL_HANDLE; const vk::render_device* m_device = nullptr;
VkEvent m_vk_event = VK_NULL_HANDLE; VkEvent m_vk_event = VK_NULL_HANDLE;
bool v2 = true;
std::unique_ptr<buffer> m_buffer;
volatile u32* m_value = nullptr;
public: public:
event(const render_device& dev, sync_domain domain); event(const render_device& dev, sync_domain domain);
~event(); ~event();
event(const event&) = delete; event(const event&) = delete;
void signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access); void signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency);
void host_signal() const; void host_signal() const;
void gpu_wait(const command_buffer& cmd) const; void gpu_wait(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) const;
VkResult status() const; VkResult status() const;
void reset() const; void reset() const;
}; };