Vulkan: Add path which uses WSI acquire semaphores.

Fence wait path has some issues on certain platforms. For now, gate this
on whether the GPU is integrated or not.
This commit is contained in:
Themaister 2020-06-06 14:24:13 +02:00
parent 15df55d011
commit cd4f22c275
3 changed files with 168 additions and 22 deletions

View File

@ -1746,6 +1746,22 @@ static bool vulkan_context_init_device(gfx_ctx_vulkan_data_t *vk)
vk->emulate_mailbox = vk->fullscreen;
#endif
/* If we're emulating mailbox, stick to using fences rather than semaphores.
* Avoids some really weird driver bugs. */
if (!vk->emulate_mailbox)
{
if (vk->context.gpu_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU)
{
vk->use_wsi_semaphore = true;
RARCH_LOG("[Vulkan]: Using semaphores for WSI acquire.\n");
}
else
{
vk->use_wsi_semaphore = false;
RARCH_LOG("[Vulkan]: Using fences for WSI acquire.\n");
}
}
RARCH_LOG("[Vulkan]: Using GPU: %s\n", vk->context.gpu_properties.deviceName);
{
@ -2555,10 +2571,28 @@ static void vulkan_destroy_swapchain(gfx_ctx_vulkan_data_t *vk)
if (vk->context.swapchain_fences[i] != VK_NULL_HANDLE)
vkDestroyFence(vk->context.device,
vk->context.swapchain_fences[i], NULL);
if (vk->context.swapchain_recycled_semaphores[i] != VK_NULL_HANDLE)
vkDestroySemaphore(vk->context.device,
vk->context.swapchain_recycled_semaphores[i], NULL);
if (vk->context.swapchain_wait_semaphores[i] != VK_NULL_HANDLE)
vkDestroySemaphore(vk->context.device,
vk->context.swapchain_wait_semaphores[i], NULL);
}
memset(vk->context.swapchain_semaphores, 0, sizeof(vk->context.swapchain_semaphores));
memset(vk->context.swapchain_fences, 0, sizeof(vk->context.swapchain_fences));
if (vk->context.swapchain_acquire_semaphore != VK_NULL_HANDLE)
vkDestroySemaphore(vk->context.device,
vk->context.swapchain_acquire_semaphore, NULL);
vk->context.swapchain_acquire_semaphore = VK_NULL_HANDLE;
memset(vk->context.swapchain_semaphores, 0,
sizeof(vk->context.swapchain_semaphores));
memset(vk->context.swapchain_recycled_semaphores, 0,
sizeof(vk->context.swapchain_recycled_semaphores));
memset(vk->context.swapchain_wait_semaphores, 0,
sizeof(vk->context.swapchain_wait_semaphores));
memset(vk->context.swapchain_fences, 0,
sizeof(vk->context.swapchain_fences));
vk->context.num_recycled_acquire_semaphores = 0;
}
void vulkan_present(gfx_ctx_vulkan_data_t *vk, unsigned index)
@ -2673,6 +2707,12 @@ void vulkan_context_destroy(gfx_ctx_vulkan_data_t *vk,
}
}
static void vulkan_recycle_acquire_semaphore(struct vulkan_context *ctx, VkSemaphore sem)
{
assert(ctx->num_recycled_acquire_semaphores < VULKAN_MAX_SWAPCHAIN_IMAGES);
ctx->swapchain_recycled_semaphores[ctx->num_recycled_acquire_semaphores++] = sem;
}
static void vulkan_acquire_clear_fences(gfx_ctx_vulkan_data_t *vk)
{
unsigned i;
@ -2685,11 +2725,31 @@ static void vulkan_acquire_clear_fences(gfx_ctx_vulkan_data_t *vk)
vk->context.swapchain_fences[i] = VK_NULL_HANDLE;
}
vk->context.swapchain_fences_signalled[i] = false;
if (vk->context.swapchain_wait_semaphores[i])
vulkan_recycle_acquire_semaphore(&vk->context, vk->context.swapchain_wait_semaphores[i]);
vk->context.swapchain_wait_semaphores[i] = VK_NULL_HANDLE;
}
vk->context.current_frame_index = 0;
}
static VkSemaphore vulkan_get_wsi_acquire_semaphore(struct vulkan_context *ctx)
{
if (ctx->num_recycled_acquire_semaphores == 0)
{
VkSemaphoreCreateInfo sem_info = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
vkCreateSemaphore(ctx->device, &sem_info, NULL,
&ctx->swapchain_recycled_semaphores[ctx->num_recycled_acquire_semaphores++]);
}
VkSemaphore sem =
ctx->swapchain_recycled_semaphores[--ctx->num_recycled_acquire_semaphores];
ctx->swapchain_recycled_semaphores[ctx->num_recycled_acquire_semaphores] =
VK_NULL_HANDLE;
return sem;
}
static void vulkan_acquire_wait_fences(gfx_ctx_vulkan_data_t *vk)
{
VkFenceCreateInfo fence_info =
@ -2712,6 +2772,10 @@ static void vulkan_acquire_wait_fences(gfx_ctx_vulkan_data_t *vk)
else
vkCreateFence(vk->context.device, &fence_info, NULL, next_fence);
vk->context.swapchain_fences_signalled[index] = false;
if (vk->context.swapchain_wait_semaphores[index] != VK_NULL_HANDLE)
vulkan_recycle_acquire_semaphore(&vk->context, vk->context.swapchain_wait_semaphores[index]);
vk->context.swapchain_wait_semaphores[index] = VK_NULL_HANDLE;
}
static void vulkan_create_wait_fences(gfx_ctx_vulkan_data_t *vk)
@ -2734,7 +2798,8 @@ void vulkan_acquire_next_image(gfx_ctx_vulkan_data_t *vk)
{
unsigned index;
VkResult err;
VkFence fence;
VkFence fence = VK_NULL_HANDLE;
VkSemaphore semaphore = VK_NULL_HANDLE;
VkFenceCreateInfo fence_info =
{ VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
VkSemaphoreCreateInfo sem_info =
@ -2756,6 +2821,7 @@ retry:
{
/* We still don't have a swapchain, so just fake it ... */
vk->context.current_swapchain_index = 0;
vk->context.current_frame_index = 0;
vulkan_acquire_clear_fences(vk);
vulkan_acquire_wait_fences(vk);
vk->context.invalid_swapchain = true;
@ -2772,14 +2838,17 @@ retry:
* MAILBOX would do. */
err = vulkan_emulated_mailbox_acquire_next_image(
&vk->mailbox, &vk->context.current_swapchain_index);
fence = VK_NULL_HANDLE;
}
else
{
vkCreateFence(vk->context.device, &fence_info, NULL, &fence);
if (vk->use_wsi_semaphore)
semaphore = vulkan_get_wsi_acquire_semaphore(&vk->context);
else
vkCreateFence(vk->context.device, &fence_info, NULL, &fence);
err = vkAcquireNextImageKHR(vk->context.device,
vk->swapchain, UINT64_MAX,
VK_NULL_HANDLE, fence, &vk->context.current_swapchain_index);
semaphore, fence, &vk->context.current_swapchain_index);
#ifdef ANDROID
/* VK_SUBOPTIMAL_KHR can be returned on Android 10
@ -2796,9 +2865,27 @@ retry:
if (fence != VK_NULL_HANDLE)
vkWaitForFences(vk->context.device, 1, &fence, true, UINT64_MAX);
vk->context.has_acquired_swapchain = true;
if (vk->context.swapchain_acquire_semaphore)
{
#ifdef HAVE_THREADS
slock_lock(vk->context.queue_lock);
#endif
RARCH_LOG("[Vulkan]: Destroying stale acquire semaphore.\n");
vkDeviceWaitIdle(vk->context.device);
vkDestroySemaphore(vk->context.device, vk->context.swapchain_acquire_semaphore, NULL);
#ifdef HAVE_THREADS
slock_unlock(vk->context.queue_lock);
#endif
}
vk->context.swapchain_acquire_semaphore = semaphore;
}
else
{
vk->context.has_acquired_swapchain = false;
if (semaphore)
vulkan_recycle_acquire_semaphore(&vk->context, semaphore);
}
#ifdef WSI_HARDENING_TEST
trigger_spurious_error_vkresult(&err);
@ -2809,9 +2896,7 @@ retry:
if (err == VK_NOT_READY || err == VK_TIMEOUT)
{
/* Just pretend we have a swapchain index, round-robin style. */
vk->context.current_swapchain_index =
(vk->context.current_swapchain_index + 1) % vk->context.num_swapchain_images;
/* Do nothing. */
}
else if (err == VK_ERROR_OUT_OF_DATE_KHR || err == VK_SUBOPTIMAL_KHR)
{

View File

@ -118,6 +118,11 @@ typedef struct vulkan_context
VkSemaphore swapchain_semaphores[VULKAN_MAX_SWAPCHAIN_IMAGES];
VkFormat swapchain_format;
VkSemaphore swapchain_acquire_semaphore;
unsigned num_recycled_acquire_semaphores;
VkSemaphore swapchain_recycled_semaphores[VULKAN_MAX_SWAPCHAIN_IMAGES];
VkSemaphore swapchain_wait_semaphores[VULKAN_MAX_SWAPCHAIN_IMAGES];
slock_t *queue_lock;
retro_vulkan_destroy_device_t destroy_device;
@ -154,6 +159,10 @@ typedef struct gfx_ctx_vulkan_data
bool created_new_swapchain;
bool emulate_mailbox;
bool emulating_mailbox;
/* If set, prefer a path where we use
* semaphores instead of fences for vkAcquireNextImageKHR.
* Helps workaround certain performance issues on some drivers. */
bool use_wsi_semaphore;
vulkan_context_t context;
VkSurfaceKHR vk_surface;
VkSwapchainKHR swapchain;
@ -421,7 +430,7 @@ typedef struct vk
struct retro_hw_render_interface_vulkan iface;
const struct retro_vulkan_image *image;
const VkSemaphore *semaphores;
VkSemaphore *semaphores;
VkSemaphore signal_semaphore;
VkPipelineStageFlags *wait_dst_stages;
VkCommandBuffer *cmd;

View File

@ -909,6 +909,7 @@ static void vulkan_deinit_static_resources(vk_t *vk)
vk->staging_pool, NULL);
free(vk->hw.cmd);
free(vk->hw.wait_dst_stages);
free(vk->hw.semaphores);
for (i = 0; i < VULKAN_MAX_SWAPCHAIN_IMAGES; i++)
if (vk->readback.staging[i].memory != VK_NULL_HANDLE)
@ -1002,21 +1003,27 @@ static void vulkan_set_image(void *handle,
vk->hw.image = image;
vk->hw.num_semaphores = num_semaphores;
vk->hw.semaphores = semaphores;
if (num_semaphores > 0)
{
VkPipelineStageFlags *stage_flags = (VkPipelineStageFlags*)
realloc(vk->hw.wait_dst_stages,
sizeof(VkPipelineStageFlags) * vk->hw.num_semaphores);
/* Allocate one extra in case we need to use WSI acquire semaphores. */
VkPipelineStageFlags *stage_flags = (VkPipelineStageFlags*)realloc(vk->hw.wait_dst_stages,
sizeof(VkPipelineStageFlags) * (vk->hw.num_semaphores + 1));
VkSemaphore *new_semaphores = (VkSemaphore*)realloc(vk->hw.semaphores,
sizeof(VkSemaphore) * (vk->hw.num_semaphores + 1));
/* If this fails, we're screwed anyways. */
retro_assert(stage_flags);
retro_assert(stage_flags && new_semaphores);
vk->hw.wait_dst_stages = stage_flags;
vk->hw.semaphores = new_semaphores;
for (i = 0; i < vk->hw.num_semaphores; i++)
{
vk->hw.wait_dst_stages[i] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
vk->hw.semaphores[i] = semaphores[i];
}
vk->hw.valid_semaphore = true;
vk->hw.src_queue_family = src_queue_family;
@ -1629,7 +1636,7 @@ static void vulkan_inject_black_frame(vk_t *vk, video_frame_info_t *video_info,
vulkan_image_layout_transition(vk, vk->cmd, backbuffer->image,
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
0, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);
vkCmdClearColorImage(vk->cmd, backbuffer->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@ -1645,12 +1652,28 @@ static void vulkan_inject_black_frame(vk_t *vk, video_frame_info_t *video_info,
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &vk->cmd;
if (vk->context->swapchain_semaphores[swapchain_index] != VK_NULL_HANDLE)
if (vk->context->has_acquired_swapchain &&
vk->context->swapchain_semaphores[swapchain_index] != VK_NULL_HANDLE)
{
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &vk->context->swapchain_semaphores[swapchain_index];
}
if (vk->context->has_acquired_swapchain &&
vk->context->swapchain_acquire_semaphore != VK_NULL_HANDLE)
{
static const VkPipelineStageFlags wait_stage =
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
assert(!vk->context->swapchain_wait_semaphores[frame_index]);
vk->context->swapchain_wait_semaphores[frame_index] =
vk->context->swapchain_acquire_semaphore;
vk->context->swapchain_acquire_semaphore = VK_NULL_HANDLE;
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitSemaphores = &vk->context->swapchain_wait_semaphores[frame_index];
submit_info.pWaitDstStageMask = &wait_stage;
}
#ifdef HAVE_THREADS
slock_lock(vk->context->queue_lock);
#endif
@ -1922,11 +1945,11 @@ static bool vulkan_frame(void *data, const void *frame,
clear_color.color.float32[2] = 0.0f;
clear_color.color.float32[3] = 0.0f;
/* Prepare backbuffer for rendering. We don't use WSI semaphores here. */
/* Prepare backbuffer for rendering. */
vulkan_image_layout_transition(vk, vk->cmd, backbuffer->image,
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
0, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
/* Begin render pass and set up viewport */
@ -2023,7 +2046,7 @@ static bool vulkan_frame(void *data, const void *frame,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
VK_ACCESS_TRANSFER_READ_BIT,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);
vulkan_readback(vk);
@ -2049,8 +2072,8 @@ static bool vulkan_frame(void *data, const void *frame,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
VK_ACCESS_MEMORY_READ_BIT,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
0,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
}
@ -2097,6 +2120,35 @@ static bool vulkan_frame(void *data, const void *frame,
/* Consume the semaphores. */
vk->hw.valid_semaphore = false;
/* We allocated space for this. */
if (vk->context->has_acquired_swapchain &&
vk->context->swapchain_acquire_semaphore != VK_NULL_HANDLE)
{
assert(!vk->context->swapchain_wait_semaphores[frame_index]);
vk->context->swapchain_wait_semaphores[frame_index] =
vk->context->swapchain_acquire_semaphore;
vk->context->swapchain_acquire_semaphore = VK_NULL_HANDLE;
vk->hw.semaphores[submit_info.waitSemaphoreCount] = vk->context->swapchain_wait_semaphores[frame_index];
vk->hw.wait_dst_stages[submit_info.waitSemaphoreCount] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
submit_info.waitSemaphoreCount++;
}
}
else if (vk->context->has_acquired_swapchain &&
vk->context->swapchain_acquire_semaphore != VK_NULL_HANDLE)
{
static const VkPipelineStageFlags wait_stage =
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
assert(!vk->context->swapchain_wait_semaphores[frame_index]);
vk->context->swapchain_wait_semaphores[frame_index] =
vk->context->swapchain_acquire_semaphore;
vk->context->swapchain_acquire_semaphore = VK_NULL_HANDLE;
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitSemaphores = &vk->context->swapchain_wait_semaphores[frame_index];
submit_info.pWaitDstStageMask = &wait_stage;
}
submit_info.signalSemaphoreCount = 0;