From c63ec27abdf33335c05fd8b536a94efdb648099b Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 09:32:08 -0400 Subject: [PATCH 01/17] vk/queue: move VkPerformanceQuerySubmitInfoKHR handling up this is only supported for actual submits --- src/vulkan/runtime/vk_queue.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 9e38299ed05fe..d95b694ef8b50 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -594,7 +594,8 @@ struct vulkan_submit_info { static VkResult vk_queue_submit(struct vk_queue *queue, - const struct vulkan_submit_info *info) + const struct vulkan_submit_info *info, + uint32_t perf_pass_index) { struct vk_device *device = queue->base.device; VkResult result; @@ -633,14 +634,7 @@ vk_queue_submit(struct vk_queue *queue, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - /* From the Vulkan 1.2.194 spec: - * - * "If the VkSubmitInfo::pNext chain does not include this structure, - * the batch defaults to use counter pass index 0." - */ - const VkPerformanceQuerySubmitInfoKHR *perf_info = - vk_find_struct_const(info->pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); - submit->perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; + submit->perf_pass_index = perf_pass_index; bool has_binary_permanent_semaphore_wait = false; for (uint32_t i = 0; i < info->wait_count; i++) { @@ -1172,7 +1166,17 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, .signals = pSubmits[i].pSignalSemaphoreInfos, .fence = i == submitCount - 1 ? fence : NULL }; - VkResult result = vk_queue_submit(queue, &info); + + /* From the Vulkan 1.2.194 spec: + * + * "If the VkSubmitInfo::pNext chain does not include this structure, + * the batch defaults to use counter pass index 0." + */ + const VkPerformanceQuerySubmitInfoKHR *perf_info = + vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); + uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; + + VkResult result = vk_queue_submit(queue, &info, perf_pass_index); if (unlikely(result != VK_SUCCESS)) return result; } @@ -1274,7 +1278,7 @@ vk_common_QueueBindSparse(VkQueue _queue, .image_binds = pBindInfo[i].pImageBinds, .fence = i == bindInfoCount - 1 ? fence : NULL }; - VkResult result = vk_queue_submit(queue, &info); + VkResult result = vk_queue_submit(queue, &info, 0); STACK_ARRAY_FINISH(wait_semaphore_infos); STACK_ARRAY_FINISH(signal_semaphore_infos); -- GitLab From d1936c0f4b72fbfb6f904e52cbd95c0ecb103aeb Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 09:46:41 -0400 Subject: [PATCH 02/17] vk/queue: move WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA handling up this (should) only come from mesa wsi, which does a single queue submit per present, so handling it up front is both more logically consistent and more performant --- src/vulkan/runtime/vk_queue.c | 43 +++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index d95b694ef8b50..8758b94d4a98d 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -595,7 +595,8 @@ struct vulkan_submit_info { static VkResult vk_queue_submit(struct vk_queue *queue, const struct vulkan_submit_info *info, - uint32_t perf_pass_index) + uint32_t perf_pass_index, + struct vk_sync *mem_sync) { struct vk_device *device = queue->base.device; VkResult result; @@ -613,12 +614,6 @@ vk_queue_submit(struct vk_queue *queue, for (uint32_t i = 0; i < info->image_bind_count; ++i) sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount; - const struct wsi_memory_signal_submit_info *mem_signal = - vk_find_struct_const(info->pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA); - bool signal_mem_sync = mem_signal != NULL && - mem_signal->memory != VK_NULL_HANDLE && - queue->base.device->create_sync_for_memory != NULL; - struct vk_queue_submit *submit = vk_queue_submit_alloc(queue, info->wait_count, info->command_buffer_count, @@ -628,7 +623,7 @@ vk_queue_submit(struct vk_queue *queue, sparse_memory_bind_entry_count, sparse_memory_image_bind_entry_count, info->signal_count + - signal_mem_sync + (info->fence != NULL), + (mem_sync != NULL) + (info->fence != NULL), &sparse_memory_bind_entries, &sparse_memory_image_bind_entries); if (unlikely(submit == NULL)) @@ -792,14 +787,7 @@ vk_queue_submit(struct vk_queue *queue, } uint32_t signal_count = info->signal_count; - if (signal_mem_sync) { - struct vk_sync *mem_sync; - result = queue->base.device->create_sync_for_memory(queue->base.device, - mem_signal->memory, - true, &mem_sync); - if (unlikely(result != VK_SUCCESS)) - goto fail; - + if (mem_sync) { submit->_mem_signal_temp = mem_sync; assert(submit->signals[signal_count].sync == NULL); @@ -976,7 +964,7 @@ vk_queue_submit(struct vk_queue *queue, vk_queue_push_submit(queue, submit); - if (signal_mem_sync) { + if (mem_sync) { /* If we're signaling a memory object, we have to ensure that * vkQueueSubmit does not return until the kernel submission has * happened. Otherwise, we may get a race between this process @@ -1155,6 +1143,23 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, } } + /* WSI signal info comes from WSI, which does 1 submit */ + struct vk_sync *mem_sync = NULL; + if (submitCount == 1) { + const struct wsi_memory_signal_submit_info *mem_signal = + vk_find_struct_const(pSubmits->pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA); + bool signal_mem_sync = mem_signal != NULL && + mem_signal->memory != VK_NULL_HANDLE && + queue->base.device->create_sync_for_memory != NULL; + if (signal_mem_sync) { + VkResult result = queue->base.device->create_sync_for_memory(queue->base.device, + mem_signal->memory, + true, &mem_sync); + if (unlikely(result != VK_SUCCESS)) + return result; + } + } + for (uint32_t i = 0; i < submitCount; i++) { struct vulkan_submit_info info = { .pNext = pSubmits[i].pNext, @@ -1176,7 +1181,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; - VkResult result = vk_queue_submit(queue, &info, perf_pass_index); + VkResult result = vk_queue_submit(queue, &info, perf_pass_index, mem_sync); if (unlikely(result != VK_SUCCESS)) return result; } @@ -1278,7 +1283,7 @@ vk_common_QueueBindSparse(VkQueue _queue, .image_binds = pBindInfo[i].pImageBinds, .fence = i == bindInfoCount - 1 ? fence : NULL }; - VkResult result = vk_queue_submit(queue, &info, 0); + VkResult result = vk_queue_submit(queue, &info, 0, NULL); STACK_ARRAY_FINISH(wait_semaphore_infos); STACK_ARRAY_FINISH(signal_semaphore_infos); -- GitLab From 78130ed25e0ddf5d44a87cce38afa4e1e0501c35 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 10:33:23 -0400 Subject: [PATCH 03/17] vk/queue: precalc sparse bind counts before calling submit no functional changes --- src/vulkan/runtime/vk_queue.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 8758b94d4a98d..45a2de1dcaf38 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -596,24 +596,15 @@ static VkResult vk_queue_submit(struct vk_queue *queue, const struct vulkan_submit_info *info, uint32_t perf_pass_index, - struct vk_sync *mem_sync) + struct vk_sync *mem_sync, + uint32_t sparse_memory_bind_entry_count, + uint32_t sparse_memory_image_bind_entry_count) { struct vk_device *device = queue->base.device; VkResult result; - uint32_t sparse_memory_bind_entry_count = 0; - uint32_t sparse_memory_image_bind_entry_count = 0; VkSparseMemoryBind *sparse_memory_bind_entries = NULL; VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL; - for (uint32_t i = 0; i < info->buffer_bind_count; ++i) - sparse_memory_bind_entry_count += info->buffer_binds[i].bindCount; - - for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i) - sparse_memory_bind_entry_count += info->image_opaque_binds[i].bindCount; - - for (uint32_t i = 0; i < info->image_bind_count; ++i) - sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount; - struct vk_queue_submit *submit = vk_queue_submit_alloc(queue, info->wait_count, info->command_buffer_count, @@ -1181,7 +1172,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; - VkResult result = vk_queue_submit(queue, &info, perf_pass_index, mem_sync); + VkResult result = vk_queue_submit(queue, &info, perf_pass_index, mem_sync, 0, 0); if (unlikely(result != VK_SUCCESS)) return result; } @@ -1283,7 +1274,21 @@ vk_common_QueueBindSparse(VkQueue _queue, .image_binds = pBindInfo[i].pImageBinds, .fence = i == bindInfoCount - 1 ? fence : NULL }; - VkResult result = vk_queue_submit(queue, &info, 0, NULL); + uint32_t sparse_memory_bind_entry_count = 0; + uint32_t sparse_memory_image_bind_entry_count = 0; + + for (uint32_t i = 0; i < info.buffer_bind_count; ++i) + sparse_memory_bind_entry_count += info.buffer_binds[i].bindCount; + + for (uint32_t i = 0; i < info.image_opaque_bind_count; ++i) + sparse_memory_bind_entry_count += info.image_opaque_binds[i].bindCount; + + for (uint32_t i = 0; i < info.image_bind_count; ++i) + sparse_memory_image_bind_entry_count += info.image_binds[i].bindCount; + + VkResult result = vk_queue_submit(queue, &info, 0, NULL, + sparse_memory_bind_entry_count, + sparse_memory_image_bind_entry_count); STACK_ARRAY_FINISH(wait_semaphore_infos); STACK_ARRAY_FINISH(signal_semaphore_infos); -- GitLab From 005352375cc7e6ef7539c57df56cb537fd13d320 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 10:38:13 -0400 Subject: [PATCH 04/17] vk/queue: move vk_queue_submit allocation up no functional changes --- src/vulkan/runtime/vk_queue.c | 68 +++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 45a2de1dcaf38..02b98e83ca032 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -595,30 +595,16 @@ struct vulkan_submit_info { static VkResult vk_queue_submit(struct vk_queue *queue, const struct vulkan_submit_info *info, + struct vk_queue_submit *submit, uint32_t perf_pass_index, struct vk_sync *mem_sync, - uint32_t sparse_memory_bind_entry_count, - uint32_t sparse_memory_image_bind_entry_count) + VkSparseMemoryBind *sparse_memory_bind_entries, + VkSparseImageMemoryBind *sparse_memory_image_bind_entries) { struct vk_device *device = queue->base.device; VkResult result; - VkSparseMemoryBind *sparse_memory_bind_entries = NULL; - VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL; - - struct vk_queue_submit *submit = - vk_queue_submit_alloc(queue, info->wait_count, - info->command_buffer_count, - info->buffer_bind_count, - info->image_opaque_bind_count, - info->image_bind_count, - sparse_memory_bind_entry_count, - sparse_memory_image_bind_entry_count, - info->signal_count + - (mem_sync != NULL) + (info->fence != NULL), - &sparse_memory_bind_entries, - &sparse_memory_image_bind_entries); - if (unlikely(submit == NULL)) - return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + uint32_t sparse_memory_bind_entry_count = 0; + uint32_t sparse_memory_image_bind_entry_count = 0; submit->perf_pass_index = perf_pass_index; @@ -689,9 +675,6 @@ vk_queue_submit(struct vk_queue *queue, submit->command_buffers[i] = cmd_buffer; } - sparse_memory_bind_entry_count = 0; - sparse_memory_image_bind_entry_count = 0; - if (info->buffer_binds) typed_memcpy(submit->buffer_binds, info->buffer_binds, info->buffer_bind_count); @@ -1172,7 +1155,22 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; - VkResult result = vk_queue_submit(queue, &info, perf_pass_index, mem_sync, 0, 0); + struct vk_queue_submit *submit = + vk_queue_submit_alloc(queue, info.wait_count, + info.command_buffer_count, + info.buffer_bind_count, + info.image_opaque_bind_count, + info.image_bind_count, + 0, + 0, + info.signal_count + + (mem_sync != NULL) + (info.fence != NULL), + NULL, + NULL); + if (unlikely(submit == NULL)) + return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, 0, 0); if (unlikely(result != VK_SUCCESS)) return result; } @@ -1286,9 +1284,27 @@ vk_common_QueueBindSparse(VkQueue _queue, for (uint32_t i = 0; i < info.image_bind_count; ++i) sparse_memory_image_bind_entry_count += info.image_binds[i].bindCount; - VkResult result = vk_queue_submit(queue, &info, 0, NULL, - sparse_memory_bind_entry_count, - sparse_memory_image_bind_entry_count); + VkSparseMemoryBind *sparse_memory_bind_entries = NULL; + VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL; + + struct vk_queue_submit *submit = + vk_queue_submit_alloc(queue, info.wait_count, + info.command_buffer_count, + info.buffer_bind_count, + info.image_opaque_bind_count, + info.image_bind_count, + sparse_memory_bind_entry_count, + sparse_memory_image_bind_entry_count, + info.signal_count + + (info.fence != NULL), + &sparse_memory_bind_entries, + &sparse_memory_image_bind_entries); + if (unlikely(submit == NULL)) + return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = vk_queue_submit(queue, &info, submit, 0, NULL, + sparse_memory_bind_entries, + sparse_memory_image_bind_entries); STACK_ARRAY_FINISH(wait_semaphore_infos); STACK_ARRAY_FINISH(signal_semaphore_infos); -- GitLab From 390c5a8f7bf383a197d8e9fa8c3b93496dc8bb1e Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 11:09:16 -0400 Subject: [PATCH 05/17] vk/queue: break out submit wait parsing no functional changes --- src/vulkan/runtime/vk_queue.c | 39 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 02b98e83ca032..7df848c75b5f3 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -592,22 +592,11 @@ struct vulkan_submit_info { struct vk_fence *fence; }; -static VkResult -vk_queue_submit(struct vk_queue *queue, - const struct vulkan_submit_info *info, - struct vk_queue_submit *submit, - uint32_t perf_pass_index, - struct vk_sync *mem_sync, - VkSparseMemoryBind *sparse_memory_bind_entries, - VkSparseImageMemoryBind *sparse_memory_image_bind_entries) +static bool +vk_queue_parse_waits(struct vk_device *device, + const struct vulkan_submit_info *info, + struct vk_queue_submit *submit) { - struct vk_device *device = queue->base.device; - VkResult result; - uint32_t sparse_memory_bind_entry_count = 0; - uint32_t sparse_memory_image_bind_entry_count = 0; - - submit->perf_pass_index = perf_pass_index; - bool has_binary_permanent_semaphore_wait = false; for (uint32_t i = 0; i < info->wait_count; i++) { VK_FROM_HANDLE(vk_semaphore, semaphore, @@ -655,6 +644,26 @@ vk_queue_submit(struct vk_queue *queue, .wait_value = wait_value, }; } + return has_binary_permanent_semaphore_wait; +} + +static VkResult +vk_queue_submit(struct vk_queue *queue, + const struct vulkan_submit_info *info, + struct vk_queue_submit *submit, + uint32_t perf_pass_index, + struct vk_sync *mem_sync, + VkSparseMemoryBind *sparse_memory_bind_entries, + VkSparseImageMemoryBind *sparse_memory_image_bind_entries) +{ + struct vk_device *device = queue->base.device; + VkResult result; + uint32_t sparse_memory_bind_entry_count = 0; + uint32_t sparse_memory_image_bind_entry_count = 0; + + submit->perf_pass_index = perf_pass_index; + + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(device, info, submit); for (uint32_t i = 0; i < info->command_buffer_count; i++) { VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, -- GitLab From 443864152ac4977d72d3268ef1b3ade36f32f1ad Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 11:12:45 -0400 Subject: [PATCH 06/17] vk/queue: break out cmdbuf parsing no functional changes --- src/vulkan/runtime/vk_queue.c | 42 +++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 7df848c75b5f3..0bd8942dd0586 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -647,24 +647,11 @@ vk_queue_parse_waits(struct vk_device *device, return has_binary_permanent_semaphore_wait; } -static VkResult -vk_queue_submit(struct vk_queue *queue, - const struct vulkan_submit_info *info, - struct vk_queue_submit *submit, - uint32_t perf_pass_index, - struct vk_sync *mem_sync, - VkSparseMemoryBind *sparse_memory_bind_entries, - VkSparseImageMemoryBind *sparse_memory_image_bind_entries) +static void +vk_queue_parse_cmdbufs(struct vk_queue *queue, + const struct vulkan_submit_info *info, + struct vk_queue_submit *submit) { - struct vk_device *device = queue->base.device; - VkResult result; - uint32_t sparse_memory_bind_entry_count = 0; - uint32_t sparse_memory_image_bind_entry_count = 0; - - submit->perf_pass_index = perf_pass_index; - - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(device, info, submit); - for (uint32_t i = 0; i < info->command_buffer_count; i++) { VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, info->command_buffers[i].commandBuffer); @@ -683,6 +670,27 @@ vk_queue_submit(struct vk_queue *queue, submit->command_buffers[i] = cmd_buffer; } +} + +static VkResult +vk_queue_submit(struct vk_queue *queue, + const struct vulkan_submit_info *info, + struct vk_queue_submit *submit, + uint32_t perf_pass_index, + struct vk_sync *mem_sync, + VkSparseMemoryBind *sparse_memory_bind_entries, + VkSparseImageMemoryBind *sparse_memory_image_bind_entries) +{ + struct vk_device *device = queue->base.device; + VkResult result; + uint32_t sparse_memory_bind_entry_count = 0; + uint32_t sparse_memory_image_bind_entry_count = 0; + + submit->perf_pass_index = perf_pass_index; + + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(device, info, submit); + + vk_queue_parse_cmdbufs(queue, info, submit); if (info->buffer_binds) typed_memcpy(submit->buffer_binds, info->buffer_binds, info->buffer_bind_count); -- GitLab From 11d5e2d0c1493d07ad48f61fcdf5b4093708f005 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 11:16:32 -0400 Subject: [PATCH 07/17] vk/queue: move wait parsing up no functional changes --- src/vulkan/runtime/vk_queue.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 0bd8942dd0586..f5190e1c0aff4 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -678,6 +678,7 @@ vk_queue_submit(struct vk_queue *queue, struct vk_queue_submit *submit, uint32_t perf_pass_index, struct vk_sync *mem_sync, + bool has_binary_permanent_semaphore_wait, VkSparseMemoryBind *sparse_memory_bind_entries, VkSparseImageMemoryBind *sparse_memory_image_bind_entries) { @@ -688,8 +689,6 @@ vk_queue_submit(struct vk_queue *queue, submit->perf_pass_index = perf_pass_index; - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(device, info, submit); - vk_queue_parse_cmdbufs(queue, info, submit); if (info->buffer_binds) @@ -1187,7 +1186,9 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, 0, 0); + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit); + + VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); if (unlikely(result != VK_SUCCESS)) return result; } @@ -1319,7 +1320,10 @@ vk_common_QueueBindSparse(VkQueue _queue, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit); + VkResult result = vk_queue_submit(queue, &info, submit, 0, NULL, + has_binary_permanent_semaphore_wait, sparse_memory_bind_entries, sparse_memory_image_bind_entries); -- GitLab From 681dd67e6fecf820258fe1872a13f6339e4c6fff Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 11:17:14 -0400 Subject: [PATCH 08/17] vk/queue: move cmdbuf parsing up no functional changes --- src/vulkan/runtime/vk_queue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index f5190e1c0aff4..82c9b3e843696 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -689,8 +689,6 @@ vk_queue_submit(struct vk_queue *queue, submit->perf_pass_index = perf_pass_index; - vk_queue_parse_cmdbufs(queue, info, submit); - if (info->buffer_binds) typed_memcpy(submit->buffer_binds, info->buffer_binds, info->buffer_bind_count); @@ -1187,6 +1185,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit); + vk_queue_parse_cmdbufs(queue, &info, submit); VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); if (unlikely(result != VK_SUCCESS)) -- GitLab From 2c3bdfe4effa72c7aadad05875ec3da6231bec50 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 11:46:59 -0400 Subject: [PATCH 09/17] vk/queue: break out binary semaphore waiting for threaded queues no functional changes --- src/vulkan/runtime/vk_queue.c | 178 ++++++++++++++++++---------------- 1 file changed, 95 insertions(+), 83 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 82c9b3e843696..04974e340db60 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -672,6 +672,98 @@ vk_queue_parse_cmdbufs(struct vk_queue *queue, } } +static VkResult +vk_queue_handle_threaded_waits(struct vk_queue *queue, + const struct vulkan_submit_info *info, + struct vk_queue_submit *submit) +{ + assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED); + for (uint32_t i = 0; i < info->wait_count; i++) { + VK_FROM_HANDLE(vk_semaphore, semaphore, + info->waits[i].semaphore); + + if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY) + continue; + + /* From the Vulkan 1.2.194 spec: + * + * "When a batch is submitted to a queue via a queue + * submission, and it includes semaphores to be waited on, + * it defines a memory dependency between prior semaphore + * signal operations and the batch, and defines semaphore + * wait operations. + * + * Such semaphore wait operations set the semaphores + * created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_BINARY to the unsignaled state." + * + * For threaded submit, we depend on tracking the unsignaled + * state of binary semaphores to determine when we can safely + * submit. The VK_SYNC_WAIT_PENDING check above as well as the + * one in the sumbit thread depend on all binary semaphores + * being reset when they're not in active use from the point + * of view of the client's CPU timeline. This means we need to + * reset them inside vkQueueSubmit and cannot wait until the + * actual submit which happens later in the thread. + * + * We've already stolen temporary semaphore payloads above as + * part of basic semaphore processing. We steal permanent + * semaphore payloads here by way of vk_sync_move. For shared + * semaphores, this can be a bit expensive (sync file import + * and export) but, for non-shared semaphores, it can be made + * fairly cheap. Also, we only do this semaphore swapping in + * the case where you have real timelines AND the client is + * using timeline semaphores with wait-before-signal (that's + * the only way to get a submit thread) AND mixing those with + * waits on binary semaphores AND said binary semaphore is + * using its permanent payload. In other words, this code + * should basically only ever get executed in CTS tests. + */ + if (submit->_wait_temps[i] != NULL) + continue; + + assert(submit->waits[i].sync == &semaphore->permanent); + + /* From the Vulkan 1.2.194 spec: + * + * VUID-vkQueueSubmit-pWaitSemaphores-03238 + * + * "All elements of the pWaitSemaphores member of all + * elements of pSubmits created with a VkSemaphoreType of + * VK_SEMAPHORE_TYPE_BINARY must reference a semaphore + * signal operation that has been submitted for execution + * and any semaphore signal operations on which it depends + * (if any) must have also been submitted for execution." + * + * Therefore, we can safely do a blocking wait here and it + * won't actually block for long. This ensures that the + * vk_sync_move below will succeed. + */ + VkResult result = vk_sync_wait(queue->base.device, + submit->waits[i].sync, 0, + VK_SYNC_WAIT_PENDING, UINT64_MAX); + if (unlikely(result != VK_SUCCESS)) + return result; + + result = vk_sync_create(queue->base.device, + semaphore->permanent.type, + 0 /* flags */, + 0 /* initial value */, + &submit->_wait_temps[i]); + if (unlikely(result != VK_SUCCESS)) + return result; + + result = vk_sync_move(queue->base.device, + submit->_wait_temps[i], + &semaphore->permanent); + if (unlikely(result != VK_SUCCESS)) + return result; + + submit->waits[i].sync = submit->_wait_temps[i]; + } + return VK_SUCCESS; +} + static VkResult vk_queue_submit(struct vk_queue *queue, const struct vulkan_submit_info *info, @@ -865,89 +957,9 @@ vk_queue_submit(struct vk_queue *queue, case VK_QUEUE_SUBMIT_MODE_THREADED: if (has_binary_permanent_semaphore_wait) { - for (uint32_t i = 0; i < info->wait_count; i++) { - VK_FROM_HANDLE(vk_semaphore, semaphore, - info->waits[i].semaphore); - - if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY) - continue; - - /* From the Vulkan 1.2.194 spec: - * - * "When a batch is submitted to a queue via a queue - * submission, and it includes semaphores to be waited on, - * it defines a memory dependency between prior semaphore - * signal operations and the batch, and defines semaphore - * wait operations. - * - * Such semaphore wait operations set the semaphores - * created with a VkSemaphoreType of - * VK_SEMAPHORE_TYPE_BINARY to the unsignaled state." - * - * For threaded submit, we depend on tracking the unsignaled - * state of binary semaphores to determine when we can safely - * submit. The VK_SYNC_WAIT_PENDING check above as well as the - * one in the sumbit thread depend on all binary semaphores - * being reset when they're not in active use from the point - * of view of the client's CPU timeline. This means we need to - * reset them inside vkQueueSubmit and cannot wait until the - * actual submit which happens later in the thread. - * - * We've already stolen temporary semaphore payloads above as - * part of basic semaphore processing. We steal permanent - * semaphore payloads here by way of vk_sync_move. For shared - * semaphores, this can be a bit expensive (sync file import - * and export) but, for non-shared semaphores, it can be made - * fairly cheap. Also, we only do this semaphore swapping in - * the case where you have real timelines AND the client is - * using timeline semaphores with wait-before-signal (that's - * the only way to get a submit thread) AND mixing those with - * waits on binary semaphores AND said binary semaphore is - * using its permanent payload. In other words, this code - * should basically only ever get executed in CTS tests. - */ - if (submit->_wait_temps[i] != NULL) - continue; - - assert(submit->waits[i].sync == &semaphore->permanent); - - /* From the Vulkan 1.2.194 spec: - * - * VUID-vkQueueSubmit-pWaitSemaphores-03238 - * - * "All elements of the pWaitSemaphores member of all - * elements of pSubmits created with a VkSemaphoreType of - * VK_SEMAPHORE_TYPE_BINARY must reference a semaphore - * signal operation that has been submitted for execution - * and any semaphore signal operations on which it depends - * (if any) must have also been submitted for execution." - * - * Therefore, we can safely do a blocking wait here and it - * won't actually block for long. This ensures that the - * vk_sync_move below will succeed. - */ - result = vk_sync_wait(queue->base.device, - submit->waits[i].sync, 0, - VK_SYNC_WAIT_PENDING, UINT64_MAX); - if (unlikely(result != VK_SUCCESS)) - goto fail; - - result = vk_sync_create(queue->base.device, - semaphore->permanent.type, - 0 /* flags */, - 0 /* initial value */, - &submit->_wait_temps[i]); - if (unlikely(result != VK_SUCCESS)) - goto fail; - - result = vk_sync_move(queue->base.device, - submit->_wait_temps[i], - &semaphore->permanent); - if (unlikely(result != VK_SUCCESS)) - goto fail; - - submit->waits[i].sync = submit->_wait_temps[i]; - } + result = vk_queue_handle_threaded_waits(queue, info, submit); + if (unlikely(result != VK_SUCCESS)) + goto fail; } vk_queue_push_submit(queue, submit); -- GitLab From c79eba5ce9f94c5d7f00a294d78e412169ec961d Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 11:51:23 -0400 Subject: [PATCH 10/17] vk/queue: pass waits directly to vk_queue_handle_threaded_waits() no functional changes --- src/vulkan/runtime/vk_queue.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 04974e340db60..c9278a814f85e 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -674,13 +674,14 @@ vk_queue_parse_cmdbufs(struct vk_queue *queue, static VkResult vk_queue_handle_threaded_waits(struct vk_queue *queue, - const struct vulkan_submit_info *info, + uint32_t wait_count, + const VkSemaphoreSubmitInfo *waits, struct vk_queue_submit *submit) { assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED); - for (uint32_t i = 0; i < info->wait_count; i++) { + for (uint32_t i = 0; i < wait_count; i++) { VK_FROM_HANDLE(vk_semaphore, semaphore, - info->waits[i].semaphore); + waits[i].semaphore); if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY) continue; @@ -957,7 +958,7 @@ vk_queue_submit(struct vk_queue *queue, case VK_QUEUE_SUBMIT_MODE_THREADED: if (has_binary_permanent_semaphore_wait) { - result = vk_queue_handle_threaded_waits(queue, info, submit); + result = vk_queue_handle_threaded_waits(queue, info->wait_count, info->waits, submit); if (unlikely(result != VK_SUCCESS)) goto fail; } -- GitLab From ae75a3cb85500f73b07e7d466760abe8217c2b0b Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 11:58:10 -0400 Subject: [PATCH 11/17] vk/queue: pass wait info directly to vk_queue_parse_waits() no functional changes --- src/vulkan/runtime/vk_queue.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index c9278a814f85e..3b3f331ab0c24 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -594,13 +594,14 @@ struct vulkan_submit_info { static bool vk_queue_parse_waits(struct vk_device *device, - const struct vulkan_submit_info *info, + uint32_t wait_count, + const VkSemaphoreSubmitInfo *waits, struct vk_queue_submit *submit) { bool has_binary_permanent_semaphore_wait = false; - for (uint32_t i = 0; i < info->wait_count; i++) { + for (uint32_t i = 0; i < wait_count; i++) { VK_FROM_HANDLE(vk_semaphore, semaphore, - info->waits[i].semaphore); + waits[i].semaphore); /* From the Vulkan 1.2.194 spec: * @@ -636,11 +637,11 @@ vk_queue_parse_waits(struct vk_device *device, } uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ? - info->waits[i].value : 0; + waits[i].value : 0; submit->waits[i] = (struct vk_sync_wait) { .sync = sync, - .stage_mask = info->waits[i].stageMask, + .stage_mask = waits[i].stageMask, .wait_value = wait_value, }; } @@ -1197,7 +1198,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit); + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit); vk_queue_parse_cmdbufs(queue, &info, submit); VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); @@ -1332,7 +1333,7 @@ vk_common_QueueBindSparse(VkQueue _queue, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit); + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit); VkResult result = vk_queue_submit(queue, &info, submit, 0, NULL, has_binary_permanent_semaphore_wait, -- GitLab From 1e517e9e81478cc63f841d4cf4eceb2150f22804 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 11:58:40 -0400 Subject: [PATCH 12/17] vk/queue: pass cmdbuf info directly to vk_queue_parse_cmdbufs() no functional changes --- src/vulkan/runtime/vk_queue.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 3b3f331ab0c24..54f9c41820bfd 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -650,14 +650,15 @@ vk_queue_parse_waits(struct vk_device *device, static void vk_queue_parse_cmdbufs(struct vk_queue *queue, - const struct vulkan_submit_info *info, + uint32_t command_buffer_count, + const VkCommandBufferSubmitInfo *command_buffers, struct vk_queue_submit *submit) { - for (uint32_t i = 0; i < info->command_buffer_count; i++) { + for (uint32_t i = 0; i < command_buffer_count; i++) { VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, - info->command_buffers[i].commandBuffer); - assert(info->command_buffers[i].deviceMask == 0 || - info->command_buffers[i].deviceMask == 1); + command_buffers[i].commandBuffer); + assert(command_buffers[i].deviceMask == 0 || + command_buffers[i].deviceMask == 1); assert(cmd_buffer->pool->queue_family_index == queue->queue_family_index); /* Some drivers don't call vk_command_buffer_begin/end() yet and, for @@ -1199,7 +1200,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit); - vk_queue_parse_cmdbufs(queue, &info, submit); + vk_queue_parse_cmdbufs(queue, pSubmits[i].commandBufferInfoCount, pSubmits[i].pCommandBufferInfos, submit); VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); if (unlikely(result != VK_SUCCESS)) -- GitLab From 968f0c66b7f1dd29941626516d2bd0b74a8b5aa9 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 12:01:08 -0400 Subject: [PATCH 13/17] vk/queue: move vk_queue_handle_threaded_waits() calls up no functional changes --- src/vulkan/runtime/vk_queue.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 54f9c41820bfd..1e49cdbbf216a 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -959,12 +959,6 @@ vk_queue_submit(struct vk_queue *queue, return vk_device_flush(queue->base.device); case VK_QUEUE_SUBMIT_MODE_THREADED: - if (has_binary_permanent_semaphore_wait) { - result = vk_queue_handle_threaded_waits(queue, info->wait_count, info->waits, submit); - if (unlikely(result != VK_SUCCESS)) - goto fail; - } - vk_queue_push_submit(queue, submit); if (mem_sync) { @@ -1164,6 +1158,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, } for (uint32_t i = 0; i < submitCount; i++) { + VkResult result = VK_SUCCESS; struct vulkan_submit_info info = { .pNext = pSubmits[i].pNext, .command_buffer_count = pSubmits[i].commandBufferInfoCount, @@ -1202,7 +1197,15 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit); vk_queue_parse_cmdbufs(queue, pSubmits[i].commandBufferInfoCount, pSubmits[i].pCommandBufferInfos, submit); - VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); + if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { + result = vk_queue_handle_threaded_waits(queue, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit); + if (unlikely(result != VK_SUCCESS)) { + vk_queue_submit_destroy(queue, submit); + return result; + } + } + + result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); if (unlikely(result != VK_SUCCESS)) return result; } @@ -1231,6 +1234,7 @@ vk_common_QueueBindSparse(VkQueue _queue, } for (uint32_t i = 0; i < bindInfoCount; i++) { + VkResult result = VK_SUCCESS; const VkTimelineSemaphoreSubmitInfo *timeline_info = vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO); const uint64_t *wait_values = NULL; @@ -1336,11 +1340,19 @@ vk_common_QueueBindSparse(VkQueue _queue, bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit); - VkResult result = vk_queue_submit(queue, &info, submit, 0, NULL, - has_binary_permanent_semaphore_wait, - sparse_memory_bind_entries, - sparse_memory_image_bind_entries); + if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { + result = vk_queue_handle_threaded_waits(queue, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit); + if (unlikely(result != VK_SUCCESS)) { + vk_queue_submit_destroy(queue, submit); + goto fail; + } + } + result = vk_queue_submit(queue, &info, submit, 0, NULL, + has_binary_permanent_semaphore_wait, + sparse_memory_bind_entries, + sparse_memory_image_bind_entries); +fail: STACK_ARRAY_FINISH(wait_semaphore_infos); STACK_ARRAY_FINISH(signal_semaphore_infos); -- GitLab From 0f428de58525cad8e1ebd3c75c1503b10cbaf389 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 12:01:44 -0400 Subject: [PATCH 14/17] vk/queue: remove wait and cmdbuf info from vulkan_submit_info these are no longer used --- src/vulkan/runtime/vk_queue.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 1e49cdbbf216a..328b9abf44386 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -571,12 +571,6 @@ vk_queue_enable_submit_thread(struct vk_queue *queue) struct vulkan_submit_info { const void *pNext; - uint32_t command_buffer_count; - const VkCommandBufferSubmitInfo *command_buffers; - - uint32_t wait_count; - const VkSemaphoreSubmitInfo *waits; - uint32_t signal_count; const VkSemaphoreSubmitInfo *signals; @@ -1159,12 +1153,12 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, for (uint32_t i = 0; i < submitCount; i++) { VkResult result = VK_SUCCESS; + uint32_t wait_count = pSubmits[i].waitSemaphoreInfoCount; + const VkSemaphoreSubmitInfo *wait_semaphore_infos = pSubmits[i].pWaitSemaphoreInfos; + uint32_t cmdbuf_count = pSubmits[i].commandBufferInfoCount; + const VkCommandBufferSubmitInfo *cmdbufs = pSubmits[i].pCommandBufferInfos; struct vulkan_submit_info info = { .pNext = pSubmits[i].pNext, - .command_buffer_count = pSubmits[i].commandBufferInfoCount, - .command_buffers = pSubmits[i].pCommandBufferInfos, - .wait_count = pSubmits[i].waitSemaphoreInfoCount, - .waits = pSubmits[i].pWaitSemaphoreInfos, .signal_count = pSubmits[i].signalSemaphoreInfoCount, .signals = pSubmits[i].pSignalSemaphoreInfos, .fence = i == submitCount - 1 ? fence : NULL @@ -1180,8 +1174,8 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; struct vk_queue_submit *submit = - vk_queue_submit_alloc(queue, info.wait_count, - info.command_buffer_count, + vk_queue_submit_alloc(queue, wait_count, + cmdbuf_count, info.buffer_bind_count, info.image_opaque_bind_count, info.image_bind_count, @@ -1194,11 +1188,11 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit); - vk_queue_parse_cmdbufs(queue, pSubmits[i].commandBufferInfoCount, pSubmits[i].pCommandBufferInfos, submit); + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit); + vk_queue_parse_cmdbufs(queue, cmdbuf_count, cmdbufs, submit); if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { - result = vk_queue_handle_threaded_waits(queue, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit); + result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit); if (unlikely(result != VK_SUCCESS)) { vk_queue_submit_destroy(queue, submit); return result; @@ -1268,6 +1262,7 @@ vk_common_QueueBindSparse(VkQueue _queue, signal_values = timeline_info->pSignalSemaphoreValues; } + uint32_t wait_count = pBindInfo[i].waitSemaphoreCount; STACK_ARRAY(VkSemaphoreSubmitInfo, wait_semaphore_infos, pBindInfo[i].waitSemaphoreCount); STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphore_infos, @@ -1296,8 +1291,6 @@ vk_common_QueueBindSparse(VkQueue _queue, } struct vulkan_submit_info info = { .pNext = pBindInfo[i].pNext, - .wait_count = pBindInfo[i].waitSemaphoreCount, - .waits = wait_semaphore_infos, .signal_count = pBindInfo[i].signalSemaphoreCount, .signals = signal_semaphore_infos, .buffer_bind_count = pBindInfo[i].bufferBindCount, @@ -1324,8 +1317,8 @@ vk_common_QueueBindSparse(VkQueue _queue, VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL; struct vk_queue_submit *submit = - vk_queue_submit_alloc(queue, info.wait_count, - info.command_buffer_count, + vk_queue_submit_alloc(queue, pBindInfo[i].waitSemaphoreCount, + 0, info.buffer_bind_count, info.image_opaque_bind_count, info.image_bind_count, @@ -1338,10 +1331,10 @@ vk_common_QueueBindSparse(VkQueue _queue, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit); + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit); if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { - result = vk_queue_handle_threaded_waits(queue, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit); + result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit); if (unlikely(result != VK_SUCCESS)) { vk_queue_submit_destroy(queue, submit); goto fail; -- GitLab From 683694ee2d0beb22191e779b5e7eea1e58fc016f Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 12:40:00 -0400 Subject: [PATCH 15/17] vk/queue: split out allocation+submission for QueueSubmit2KHR no functional changes --- src/vulkan/runtime/vk_queue.c | 83 +++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 328b9abf44386..d2a7fbd76b742 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -1114,6 +1114,52 @@ vk_queue_finish(struct vk_queue *queue) vk_object_base_finish(&queue->base); } +static VkResult +vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmit, + uint32_t wait_count, const VkSemaphoreSubmitInfo *wait_semaphore_infos, + uint32_t cmdbuf_count, const VkCommandBufferSubmitInfo *cmdbufs, + uint32_t perf_pass_index, struct vk_sync *mem_sync, struct vk_fence *fence) +{ + VkResult result = VK_SUCCESS; + struct vulkan_submit_info info = { + .pNext = pSubmit->pNext, + .signal_count = pSubmit->signalSemaphoreInfoCount, + .signals = pSubmit->pSignalSemaphoreInfos, + .fence = fence + }; + + struct vk_queue_submit *submit = + vk_queue_submit_alloc(queue, wait_count, + cmdbuf_count, + info.buffer_bind_count, + info.image_opaque_bind_count, + info.image_bind_count, + 0, + 0, + info.signal_count + + (mem_sync != NULL) + (info.fence != NULL), + NULL, + NULL); + if (unlikely(submit == NULL)) + return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); + + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit); + vk_queue_parse_cmdbufs(queue, cmdbuf_count, cmdbufs, submit); + + if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { + result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit); + if (unlikely(result != VK_SUCCESS)) { + vk_queue_submit_destroy(queue, submit); + goto fail; + } + } + + result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); +fail: + vk_queue_submit_destroy(queue, submit); + return result; +} + VKAPI_ATTR VkResult VKAPI_CALL vk_common_QueueSubmit2KHR(VkQueue _queue, uint32_t submitCount, @@ -1157,12 +1203,6 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, const VkSemaphoreSubmitInfo *wait_semaphore_infos = pSubmits[i].pWaitSemaphoreInfos; uint32_t cmdbuf_count = pSubmits[i].commandBufferInfoCount; const VkCommandBufferSubmitInfo *cmdbufs = pSubmits[i].pCommandBufferInfos; - struct vulkan_submit_info info = { - .pNext = pSubmits[i].pNext, - .signal_count = pSubmits[i].signalSemaphoreInfoCount, - .signals = pSubmits[i].pSignalSemaphoreInfos, - .fence = i == submitCount - 1 ? fence : NULL - }; /* From the Vulkan 1.2.194 spec: * @@ -1173,33 +1213,10 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; - struct vk_queue_submit *submit = - vk_queue_submit_alloc(queue, wait_count, - cmdbuf_count, - info.buffer_bind_count, - info.image_opaque_bind_count, - info.image_bind_count, - 0, - 0, - info.signal_count + - (mem_sync != NULL) + (info.fence != NULL), - NULL, - NULL); - if (unlikely(submit == NULL)) - return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit); - vk_queue_parse_cmdbufs(queue, cmdbuf_count, cmdbufs, submit); - - if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { - result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit); - if (unlikely(result != VK_SUCCESS)) { - vk_queue_submit_destroy(queue, submit); - return result; - } - } - - result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); + result = vk_queue_submit_flush(queue, &pSubmits[i], + wait_count, wait_semaphore_infos, + cmdbuf_count, cmdbufs, + perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL); if (unlikely(result != VK_SUCCESS)) return result; } -- GitLab From cd84beeccd14986b30faf154a0d371a93c94242f Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 12:47:12 -0400 Subject: [PATCH 16/17] vk/queue: add a count param to vk_queue_submit_flush not currently used --- src/vulkan/runtime/vk_queue.c | 74 +++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index d2a7fbd76b742..20d28dfaba821 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -590,6 +590,7 @@ static bool vk_queue_parse_waits(struct vk_device *device, uint32_t wait_count, const VkSemaphoreSubmitInfo *waits, + uint32_t offset, struct vk_queue_submit *submit) { bool has_binary_permanent_semaphore_wait = false; @@ -618,7 +619,7 @@ vk_queue_parse_waits(struct vk_device *device, struct vk_sync *sync; if (semaphore->temporary) { assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY); - sync = submit->_wait_temps[i] = semaphore->temporary; + sync = submit->_wait_temps[i + offset] = semaphore->temporary; semaphore->temporary = NULL; } else { if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) { @@ -633,7 +634,7 @@ vk_queue_parse_waits(struct vk_device *device, uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ? waits[i].value : 0; - submit->waits[i] = (struct vk_sync_wait) { + submit->waits[i + offset] = (struct vk_sync_wait) { .sync = sync, .stage_mask = waits[i].stageMask, .wait_value = wait_value, @@ -646,6 +647,7 @@ static void vk_queue_parse_cmdbufs(struct vk_queue *queue, uint32_t command_buffer_count, const VkCommandBufferSubmitInfo *command_buffers, + uint32_t offset, struct vk_queue_submit *submit) { for (uint32_t i = 0; i < command_buffer_count; i++) { @@ -664,7 +666,7 @@ vk_queue_parse_cmdbufs(struct vk_queue *queue, cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_PENDING); cmd_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_PENDING; - submit->command_buffers[i] = cmd_buffer; + submit->command_buffers[i + offset] = cmd_buffer; } } @@ -672,6 +674,7 @@ static VkResult vk_queue_handle_threaded_waits(struct vk_queue *queue, uint32_t wait_count, const VkSemaphoreSubmitInfo *waits, + unsigned offset, struct vk_queue_submit *submit) { assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED); @@ -716,10 +719,10 @@ vk_queue_handle_threaded_waits(struct vk_queue *queue, * using its permanent payload. In other words, this code * should basically only ever get executed in CTS tests. */ - if (submit->_wait_temps[i] != NULL) + if (submit->_wait_temps[i + offset] != NULL) continue; - assert(submit->waits[i].sync == &semaphore->permanent); + assert(submit->waits[i + offset].sync == &semaphore->permanent); /* From the Vulkan 1.2.194 spec: * @@ -746,17 +749,17 @@ vk_queue_handle_threaded_waits(struct vk_queue *queue, semaphore->permanent.type, 0 /* flags */, 0 /* initial value */, - &submit->_wait_temps[i]); + &submit->_wait_temps[i + offset]); if (unlikely(result != VK_SUCCESS)) return result; result = vk_sync_move(queue->base.device, - submit->_wait_temps[i], + submit->_wait_temps[i + offset], &semaphore->permanent); if (unlikely(result != VK_SUCCESS)) return result; - submit->waits[i].sync = submit->_wait_temps[i]; + submit->waits[i + offset].sync = submit->_wait_temps[i + offset]; } return VK_SUCCESS; } @@ -1115,16 +1118,16 @@ vk_queue_finish(struct vk_queue *queue) } static VkResult -vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmit, - uint32_t wait_count, const VkSemaphoreSubmitInfo *wait_semaphore_infos, - uint32_t cmdbuf_count, const VkCommandBufferSubmitInfo *cmdbufs, +vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmits, unsigned submit_count, + uint32_t wait_count, + uint32_t cmdbuf_count, uint32_t perf_pass_index, struct vk_sync *mem_sync, struct vk_fence *fence) { VkResult result = VK_SUCCESS; struct vulkan_submit_info info = { - .pNext = pSubmit->pNext, - .signal_count = pSubmit->signalSemaphoreInfoCount, - .signals = pSubmit->pSignalSemaphoreInfos, + .pNext = pSubmits->pNext, + .signal_count = pSubmits[submit_count - 1].signalSemaphoreInfoCount, + .signals = pSubmits[submit_count - 1].pSignalSemaphoreInfos, .fence = fence }; @@ -1143,21 +1146,28 @@ vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmit, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit); - vk_queue_parse_cmdbufs(queue, cmdbuf_count, cmdbufs, submit); - - if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { - result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit); - if (unlikely(result != VK_SUCCESS)) { - vk_queue_submit_destroy(queue, submit); - goto fail; + uint32_t wait_counter = 0; + uint32_t cmdbuf_counter = 0; + bool has_binary_permanent_semaphore_wait = false; + for (unsigned i = 0; i < submit_count; i++) { + uint32_t cur_wait_count = pSubmits[i].waitSemaphoreInfoCount; + const VkSemaphoreSubmitInfo *wait_semaphore_infos = pSubmits[i].pWaitSemaphoreInfos; + uint32_t cur_cmdbuf_count = pSubmits[i].commandBufferInfoCount; + const VkCommandBufferSubmitInfo *cmdbufs = pSubmits[i].pCommandBufferInfos; + has_binary_permanent_semaphore_wait |= vk_queue_parse_waits(queue->base.device, cur_wait_count, wait_semaphore_infos, wait_counter, submit); + vk_queue_parse_cmdbufs(queue, cur_cmdbuf_count, cmdbufs, cmdbuf_counter, submit); + if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { + result = vk_queue_handle_threaded_waits(queue, cur_wait_count, wait_semaphore_infos, wait_counter, submit); + if (unlikely(result != VK_SUCCESS)) { + vk_queue_submit_destroy(queue, submit); + return result; + } } + wait_counter += cur_wait_count; + cmdbuf_counter += cur_cmdbuf_count; } - result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); -fail: - vk_queue_submit_destroy(queue, submit); - return result; + return vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0); } VKAPI_ATTR VkResult VKAPI_CALL @@ -1200,9 +1210,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, for (uint32_t i = 0; i < submitCount; i++) { VkResult result = VK_SUCCESS; uint32_t wait_count = pSubmits[i].waitSemaphoreInfoCount; - const VkSemaphoreSubmitInfo *wait_semaphore_infos = pSubmits[i].pWaitSemaphoreInfos; uint32_t cmdbuf_count = pSubmits[i].commandBufferInfoCount; - const VkCommandBufferSubmitInfo *cmdbufs = pSubmits[i].pCommandBufferInfos; /* From the Vulkan 1.2.194 spec: * @@ -1213,9 +1221,9 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; - result = vk_queue_submit_flush(queue, &pSubmits[i], - wait_count, wait_semaphore_infos, - cmdbuf_count, cmdbufs, + result = vk_queue_submit_flush(queue, &pSubmits[i], 1, + wait_count, + cmdbuf_count, perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL); if (unlikely(result != VK_SUCCESS)) return result; @@ -1348,10 +1356,10 @@ vk_common_QueueBindSparse(VkQueue _queue, if (unlikely(submit == NULL)) return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit); + bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, 0, submit); if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) { - result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit); + result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, 0, submit); if (unlikely(result != VK_SUCCESS)) { vk_queue_submit_destroy(queue, submit); goto fail; -- GitLab From e2c84bbbfec5cd618909ad0ec98d3304ab7f05f2 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 21 Sep 2023 13:12:14 -0400 Subject: [PATCH 17/17] vk/queue: merge queue submissions when possible given various conditions, it's possible to merge queue submits into a single submission to reduce synchronization overhead --- src/vulkan/runtime/vk_queue.c | 89 +++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 10 deletions(-) diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c index 20d28dfaba821..ebe412a0024fe 100644 --- a/src/vulkan/runtime/vk_queue.c +++ b/src/vulkan/runtime/vk_queue.c @@ -1117,6 +1117,21 @@ vk_queue_finish(struct vk_queue *queue) vk_object_base_finish(&queue->base); } +static bool +filter_pnexts(const void *pNext) +{ + vk_foreach_struct_const(s, pNext) { + switch (s->sType) { + /* can possibly be merged */ + case VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR: + break; + default: + return false; + } + } + return true; +} + static VkResult vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmits, unsigned submit_count, uint32_t wait_count, @@ -1207,11 +1222,14 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, } } + uint32_t prev_perf_pass_index = 0; + bool iterate = true; + bool has_perf_info = false; + bool has_signals = false; + bool needs_last = false; + uint32_t first = 0, last = 0; + uint32_t wait_count = 0, cmdbuf_count = 0; for (uint32_t i = 0; i < submitCount; i++) { - VkResult result = VK_SUCCESS; - uint32_t wait_count = pSubmits[i].waitSemaphoreInfoCount; - uint32_t cmdbuf_count = pSubmits[i].commandBufferInfoCount; - /* From the Vulkan 1.2.194 spec: * * "If the VkSubmitInfo::pNext chain does not include this structure, @@ -1221,12 +1239,63 @@ vk_common_QueueSubmit2KHR(VkQueue _queue, vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0; - result = vk_queue_submit_flush(queue, &pSubmits[i], 1, - wait_count, - cmdbuf_count, - perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL); - if (unlikely(result != VK_SUCCESS)) - return result; + /* determine when to split the submits + * - split if unhandled pNext is in chain + * - split if perf counterPassIndex changes or is added/omitted + * - split if signal ordering would be disrupted + */ + if (!filter_pnexts(pSubmits[i].pNext)) + iterate = false; + if (i && (!!perf_info != has_perf_info || (has_perf_info && perf_pass_index != prev_perf_pass_index))) + iterate = false; + if (has_signals) + iterate = false; + if (i == submitCount - 1) { + /* always flush on last submit*/ + if (iterate || !i) { + /* include last submit for flush if it can be included */ + wait_count += pSubmits[i].waitSemaphoreInfoCount; + cmdbuf_count += pSubmits[i].commandBufferInfoCount; + last = i; + } else { + needs_last = true; + } + iterate = false; + } + + if (!iterate) { + /* submits must split: flush pending but NOT current (unless last submit) */ + VkResult result = vk_queue_submit_flush(queue, &pSubmits[first], last - first + 1, + wait_count, + cmdbuf_count, + perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL); + if (unlikely(result != VK_SUCCESS)) + return result; + wait_count = 0; + cmdbuf_count = 0; + first = last = i; + iterate = true; + } + + /* always keep accumulating */ + wait_count += pSubmits[i].waitSemaphoreInfoCount; + cmdbuf_count += pSubmits[i].commandBufferInfoCount; + last = i; + + has_perf_info = perf_info != NULL; + prev_perf_pass_index = perf_pass_index; + has_signals = pSubmits[i].signalSemaphoreInfoCount > 0; + if (needs_last) { + /* catch the last submit if it couldn't be merged above */ + assert(first == last); + assert(first == submitCount - 1); + VkResult result = vk_queue_submit_flush(queue, &pSubmits[first], last - first + 1, + wait_count, + cmdbuf_count, + perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL); + if (unlikely(result != VK_SUCCESS)) + return result; + } } return VK_SUCCESS; -- GitLab