bazzite/spec_files/mesa/25576.patch
2024-07-06 21:02:03 -07:00

1121 lines
43 KiB
Diff

From cc598b60cac90742e3a714748d13d418efa81016 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Thu, 5 Oct 2023 16:36:10 -0500
Subject: [PATCH 1/7] vulkan/queue: Don't use vk_semaphore in threaded payload
stealing
---
src/vulkan/runtime/vk_queue.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
index c8b55b58b0a3a..64d4003184d85 100644
--- a/src/vulkan/runtime/vk_queue.c
+++ b/src/vulkan/runtime/vk_queue.c
@@ -895,11 +895,8 @@ vk_queue_submit(struct vk_queue *queue,
case VK_QUEUE_SUBMIT_MODE_THREADED:
if (has_binary_permanent_semaphore_wait) {
- for (uint32_t i = 0; i < info->wait_count; i++) {
- VK_FROM_HANDLE(vk_semaphore, semaphore,
- info->waits[i].semaphore);
-
- if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY)
+ for (uint32_t i = 0; i < submit->wait_count; i++) {
+ if (submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE)
continue;
/* From the Vulkan 1.2.194 spec:
@@ -939,8 +936,6 @@ vk_queue_submit(struct vk_queue *queue,
if (submit->_wait_temps[i] != NULL)
continue;
- assert(submit->waits[i].sync == &semaphore->permanent);
-
/* From the Vulkan 1.2.194 spec:
*
* VUID-vkQueueSubmit-pWaitSemaphores-03238
@@ -963,7 +958,7 @@ vk_queue_submit(struct vk_queue *queue,
goto fail;
result = vk_sync_create(queue->base.device,
- semaphore->permanent.type,
+ submit->waits[i].sync->type,
0 /* flags */,
0 /* initial value */,
&submit->_wait_temps[i]);
@@ -972,7 +967,7 @@ vk_queue_submit(struct vk_queue *queue,
result = vk_sync_move(queue->base.device,
submit->_wait_temps[i],
- &semaphore->permanent);
+ submit->waits[i].sync);
if (unlikely(result != VK_SUCCESS))
goto fail;
--
GitLab
From e6c990c780650b911f1945ad364776285a350c44 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Thu, 5 Oct 2023 16:38:32 -0500
Subject: [PATCH 2/7] vulkan/queue: Move has_binary_permanent_semaphore_wait
into the sumbit struct
---
src/vulkan/runtime/vk_queue.c | 7 +++----
src/vulkan/runtime/vk_queue.h | 1 +
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
index 64d4003184d85..91c9968fdbdbf 100644
--- a/src/vulkan/runtime/vk_queue.c
+++ b/src/vulkan/runtime/vk_queue.c
@@ -642,7 +642,6 @@ vk_queue_submit(struct vk_queue *queue,
vk_find_struct_const(info->pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
submit->perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
- bool has_binary_permanent_semaphore_wait = false;
for (uint32_t i = 0; i < info->wait_count; i++) {
VK_FROM_HANDLE(vk_semaphore, semaphore,
info->waits[i].semaphore);
@@ -674,7 +673,7 @@ vk_queue_submit(struct vk_queue *queue,
if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) {
if (vk_device_supports_threaded_submit(device))
assert(semaphore->permanent.type->move);
- has_binary_permanent_semaphore_wait = true;
+ submit->_has_binary_permanent_semaphore_wait = true;
}
sync = &semaphore->permanent;
@@ -863,7 +862,7 @@ vk_queue_submit(struct vk_queue *queue,
* operation. If we don't signal the vk_sync, then we need to reset it.
*/
if (vk_device_supports_threaded_submit(device) &&
- has_binary_permanent_semaphore_wait) {
+ submit->_has_binary_permanent_semaphore_wait) {
for (uint32_t i = 0; i < submit->wait_count; i++) {
if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
submit->_wait_temps[i] != NULL)
@@ -894,7 +893,7 @@ vk_queue_submit(struct vk_queue *queue,
return vk_device_flush(queue->base.device);
case VK_QUEUE_SUBMIT_MODE_THREADED:
- if (has_binary_permanent_semaphore_wait) {
+ if (submit->_has_binary_permanent_semaphore_wait) {
for (uint32_t i = 0; i < submit->wait_count; i++) {
if (submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE)
continue;
diff --git a/src/vulkan/runtime/vk_queue.h b/src/vulkan/runtime/vk_queue.h
index 814f9fefcdd17..23e10d9365925 100644
--- a/src/vulkan/runtime/vk_queue.h
+++ b/src/vulkan/runtime/vk_queue.h
@@ -237,6 +237,7 @@ struct vk_queue_submit {
uint32_t perf_pass_index;
/* Used internally; should be ignored by drivers */
+ bool _has_binary_permanent_semaphore_wait;
struct vk_sync **_wait_temps;
struct vk_sync *_mem_signal_temp;
struct vk_sync_timeline_point **_wait_points;
--
GitLab
From 35127c3b88267952db73f8ec395e578043b72d1a Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Thu, 5 Oct 2023 16:45:01 -0500
Subject: [PATCH 3/7] vulkan/queue: Use _mem_signal_temp instead of
signal_mem_sync
The two checks should be equivalent. This just lets us use data in
struct vk_queue_submit rather than a local boolean.
---
src/vulkan/runtime/vk_queue.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
index 91c9968fdbdbf..19bf55965a694 100644
--- a/src/vulkan/runtime/vk_queue.c
+++ b/src/vulkan/runtime/vk_queue.c
@@ -976,7 +976,7 @@ vk_queue_submit(struct vk_queue *queue,
vk_queue_push_submit(queue, submit);
- if (signal_mem_sync) {
+ if (submit->_mem_signal_temp != NULL) {
/* If we're signaling a memory object, we have to ensure that
* vkQueueSubmit does not return until the kernel submission has
* happened. Otherwise, we may get a race between this process
--
GitLab
From 491f788180e331c0de493d7047c973b3fc60c788 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Thu, 5 Oct 2023 17:02:00 -0500
Subject: [PATCH 4/7] vulkan,nvk: Add a vk_queue_submit_has_bind() helper
---
src/nouveau/vulkan/nvk_queue_drm_nouveau.c | 4 +---
src/vulkan/runtime/vk_queue.h | 8 ++++++++
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/nouveau/vulkan/nvk_queue_drm_nouveau.c b/src/nouveau/vulkan/nvk_queue_drm_nouveau.c
index a0087df592a49..17c1323fc4068 100644
--- a/src/nouveau/vulkan/nvk_queue_drm_nouveau.c
+++ b/src/nouveau/vulkan/nvk_queue_drm_nouveau.c
@@ -594,9 +594,7 @@ nvk_queue_submit_drm_nouveau(struct nvk_queue *queue,
if (result != VK_SUCCESS)
return result;
- const bool is_vmbind = submit->buffer_bind_count > 0 ||
- submit->image_bind_count > 0 ||
- submit->image_opaque_bind_count > 0;
+ const bool is_vmbind = vk_queue_submit_has_bind(submit);
push_builder_init(queue, &pb, is_vmbind);
if (!is_vmbind && upload_time_point > 0)
diff --git a/src/vulkan/runtime/vk_queue.h b/src/vulkan/runtime/vk_queue.h
index 23e10d9365925..7d96a4c559d35 100644
--- a/src/vulkan/runtime/vk_queue.h
+++ b/src/vulkan/runtime/vk_queue.h
@@ -244,6 +244,14 @@ struct vk_queue_submit {
struct vk_sync_timeline_point **_signal_points;
};
+static inline bool
+vk_queue_submit_has_bind(const struct vk_queue_submit *submit)
+{
+ return submit->buffer_bind_count > 0 ||
+ submit->image_opaque_bind_count > 0 ||
+ submit->image_bind_count > 0;
+}
+
#ifdef __cplusplus
}
#endif
--
GitLab
From bd5c69333175224fa3605b5a7b4a92443cd2f052 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Thu, 5 Oct 2023 18:48:03 -0500
Subject: [PATCH 5/7] vulkan/queue: Use a builder pattern for vk_queue_submit
---
src/vulkan/runtime/vk_queue.c | 445 +++++++++++++++++++---------------
src/vulkan/runtime/vk_queue.h | 5 +
2 files changed, 258 insertions(+), 192 deletions(-)
diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
index 19bf55965a694..229c68a1abd83 100644
--- a/src/vulkan/runtime/vk_queue.c
+++ b/src/vulkan/runtime/vk_queue.c
@@ -147,9 +147,7 @@ vk_queue_submit_alloc(struct vk_queue *queue,
uint32_t image_bind_count,
uint32_t bind_entry_count,
uint32_t image_bind_entry_count,
- uint32_t signal_count,
- VkSparseMemoryBind **bind_entries,
- VkSparseImageMemoryBind **image_bind_entries)
+ uint32_t signal_count)
{
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct vk_queue_submit, submit, 1);
@@ -163,8 +161,8 @@ vk_queue_submit_alloc(struct vk_queue *queue,
VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBindInfo, image_binds,
image_bind_count);
VK_MULTIALLOC_DECL(&ma, VkSparseMemoryBind,
- bind_entries_local, bind_entry_count);
- VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBind, image_bind_entries_local,
+ bind_entries, bind_entry_count);
+ VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBind, image_bind_entries,
image_bind_entry_count);
VK_MULTIALLOC_DECL(&ma, struct vk_sync_signal, signals, signal_count);
VK_MULTIALLOC_DECL(&ma, struct vk_sync *, wait_temps, wait_count);
@@ -181,29 +179,19 @@ vk_queue_submit_alloc(struct vk_queue *queue,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
return NULL;
- submit->wait_count = wait_count;
- submit->command_buffer_count = command_buffer_count;
- submit->signal_count = signal_count;
- submit->buffer_bind_count = buffer_bind_count;
- submit->image_opaque_bind_count = image_opaque_bind_count;
- submit->image_bind_count = image_bind_count;
-
submit->waits = waits;
submit->command_buffers = command_buffers;
submit->signals = signals;
submit->buffer_binds = buffer_binds;
submit->image_opaque_binds = image_opaque_binds;
submit->image_binds = image_binds;
+
+ submit->_bind_entries = bind_entries;
+ submit->_image_bind_entries = image_bind_entries;
submit->_wait_temps = wait_temps;
submit->_wait_points = wait_points;
submit->_signal_points = signal_points;
- if (bind_entries)
- *bind_entries = bind_entries_local;
-
- if (image_bind_entries)
- *image_bind_entries = image_bind_entries_local;
-
return submit;
}
@@ -253,6 +241,228 @@ vk_queue_submit_destroy(struct vk_queue *queue,
vk_queue_submit_free(queue, submit);
}
+static void
+vk_queue_submit_add_semaphore_wait(struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ const VkSemaphoreSubmitInfo *wait_info)
+{
+ VK_FROM_HANDLE(vk_semaphore, semaphore, wait_info->semaphore);
+
+ /* From the Vulkan 1.2.194 spec:
+ *
+ * "Applications can import a semaphore payload into an existing
+ * semaphore using an external semaphore handle. The effects of the
+ * import operation will be either temporary or permanent, as
+ * specified by the application. If the import is temporary, the
+ * implementation must restore the semaphore to its prior permanent
+ * state after submitting the next semaphore wait operation."
+ *
+ * and
+ *
+ * VUID-VkImportSemaphoreFdInfoKHR-flags-03323
+ *
+ * "If flags contains VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, the
+ * VkSemaphoreTypeCreateInfo::semaphoreType field of the semaphore
+ * from which handle or name was exported must not be
+ * VK_SEMAPHORE_TYPE_TIMELINE"
+ */
+ struct vk_sync *sync;
+ if (semaphore->temporary) {
+ assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
+ sync = submit->_wait_temps[submit->wait_count] = semaphore->temporary;
+ semaphore->temporary = NULL;
+ } else {
+ if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) {
+ if (vk_device_supports_threaded_submit(queue->base.device))
+ assert(semaphore->permanent.type->move);
+ submit->_has_binary_permanent_semaphore_wait = true;
+ }
+
+ sync = &semaphore->permanent;
+ }
+
+ uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ?
+ wait_info->value : 0;
+
+ submit->waits[submit->wait_count] = (struct vk_sync_wait) {
+ .sync = sync,
+ .stage_mask = wait_info->stageMask,
+ .wait_value = wait_value,
+ };
+
+ submit->wait_count++;
+}
+
+static VkResult MUST_CHECK
+vk_queue_submit_add_semaphore_signal(struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ const VkSemaphoreSubmitInfo *signal_info)
+{
+ VK_FROM_HANDLE(vk_semaphore, semaphore, signal_info->semaphore);
+ VkResult result;
+
+ struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore);
+ uint64_t signal_value = signal_info->value;
+ if (semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) {
+ if (signal_value == 0) {
+ return vk_queue_set_lost(queue,
+ "Tried to signal a timeline with value 0");
+ }
+ } else {
+ signal_value = 0;
+ }
+
+ /* For emulated timelines, we need to associate a binary vk_sync with
+ * each time point and pass the binary vk_sync to the driver. We could
+ * do this in vk_queue_submit_final but it might require doing memory
+ * allocation and we don't want to to add extra failure paths there.
+ * Instead, allocate and replace the driver-visible vk_sync now and
+ * we'll insert it into the timeline in vk_queue_submit_final. The
+ * insert step is guaranteed to not fail.
+ */
+ struct vk_sync_timeline *timeline = vk_sync_as_timeline(sync);
+ if (timeline) {
+ assert(queue->base.device->timeline_mode ==
+ VK_DEVICE_TIMELINE_MODE_EMULATED);
+ struct vk_sync_timeline_point **signal_point =
+ &submit->_signal_points[submit->signal_count];
+ result = vk_sync_timeline_alloc_point(queue->base.device, timeline,
+ signal_value, signal_point);
+ if (unlikely(result != VK_SUCCESS))
+ return result;
+
+ sync = &(*signal_point)->sync;
+ signal_value = 0;
+ }
+
+ submit->signals[submit->signal_count] = (struct vk_sync_signal) {
+ .sync = sync,
+ .stage_mask = signal_info->stageMask,
+ .signal_value = signal_value,
+ };
+
+ submit->signal_count++;
+
+ return VK_SUCCESS;
+}
+
+static void
+vk_queue_submit_add_sync_signal(struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ struct vk_sync *sync,
+ uint64_t signal_value)
+{
+ submit->signals[submit->signal_count++] = (struct vk_sync_signal) {
+ .sync = sync,
+ .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ .signal_value = signal_value,
+ };
+}
+
+static VkResult MUST_CHECK
+vk_queue_submit_add_mem_signal(struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ VkDeviceMemory memory)
+{
+ assert(submit->_mem_signal_temp == NULL);
+ VkResult result;
+
+ struct vk_sync *mem_sync;
+ result = queue->base.device->create_sync_for_memory(queue->base.device,
+ memory, true,
+ &mem_sync);
+ if (unlikely(result != VK_SUCCESS))
+ return result;
+
+ submit->_mem_signal_temp = mem_sync;
+
+ vk_queue_submit_add_sync_signal(queue, submit, mem_sync, 0);
+
+ return VK_SUCCESS;
+}
+
+static void
+vk_queue_submit_add_fence_signal(struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ struct vk_fence *fence)
+{
+ vk_queue_submit_add_sync_signal(queue, submit,
+ vk_fence_get_active_sync(fence), 0);
+}
+
+static void
+vk_queue_submit_add_command_buffer(struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ const VkCommandBufferSubmitInfo *info)
+{
+ VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, info->commandBuffer);
+
+ assert(info->deviceMask == 0 || info->deviceMask == 1);
+ assert(cmd_buffer->pool->queue_family_index == queue->queue_family_index);
+
+ /* Some drivers don't call vk_command_buffer_begin/end() yet and, for
+ * those, we'll see initial layout. However, this is enough to catch
+ * command buffers which get submitted without calling EndCommandBuffer.
+ */
+ assert(cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_INITIAL ||
+ cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE ||
+ cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_PENDING);
+ cmd_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_PENDING;
+
+ submit->command_buffers[submit->command_buffer_count++] = cmd_buffer;
+}
+
+static void
+vk_queue_submit_add_buffer_bind(
+ struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ const VkSparseBufferMemoryBindInfo *info)
+{
+ VkSparseMemoryBind *entries = submit->_bind_entries +
+ submit->_bind_entry_count;
+ submit->_bind_entry_count += info->bindCount;
+
+ typed_memcpy(entries, info->pBinds, info->bindCount);
+
+ VkSparseBufferMemoryBindInfo info_tmp = *info;
+ info_tmp.pBinds = entries;
+ submit->buffer_binds[submit->buffer_bind_count++] = info_tmp;
+}
+
+static void
+vk_queue_submit_add_image_opaque_bind(
+ struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ const VkSparseImageOpaqueMemoryBindInfo *info)
+{
+ VkSparseMemoryBind *entries = submit->_bind_entries +
+ submit->_bind_entry_count;
+ submit->_bind_entry_count += info->bindCount;
+
+ typed_memcpy(entries, info->pBinds, info->bindCount);
+
+ VkSparseImageOpaqueMemoryBindInfo info_tmp = *info;
+ info_tmp.pBinds = entries;
+ submit->image_opaque_binds[submit->image_opaque_bind_count++] = info_tmp;
+}
+
+static void
+vk_queue_submit_add_image_bind(
+ struct vk_queue *queue,
+ struct vk_queue_submit *submit,
+ const VkSparseImageMemoryBindInfo *info)
+{
+ VkSparseImageMemoryBind *entries = submit->_image_bind_entries +
+ submit->_image_bind_entry_count;
+ submit->_image_bind_entry_count += info->bindCount;
+
+ typed_memcpy(entries, info->pBinds, info->bindCount);
+
+ VkSparseImageMemoryBindInfo info_tmp = *info;
+ info_tmp.pBinds = entries;
+ submit->image_binds[submit->image_bind_count++] = info_tmp;
+}
+
static void
vk_queue_push_submit(struct vk_queue *queue,
struct vk_queue_submit *submit)
@@ -600,8 +810,6 @@ vk_queue_submit(struct vk_queue *queue,
VkResult result;
uint32_t sparse_memory_bind_entry_count = 0;
uint32_t sparse_memory_image_bind_entry_count = 0;
- VkSparseMemoryBind *sparse_memory_bind_entries = NULL;
- VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL;
for (uint32_t i = 0; i < info->buffer_bind_count; ++i)
sparse_memory_bind_entry_count += info->buffer_binds[i].bindCount;
@@ -618,6 +826,10 @@ vk_queue_submit(struct vk_queue *queue,
mem_signal->memory != VK_NULL_HANDLE &&
queue->base.device->create_sync_for_memory != NULL;
+ uint32_t signal_count = info->signal_count +
+ signal_mem_sync +
+ (info->fence != NULL);
+
struct vk_queue_submit *submit =
vk_queue_submit_alloc(queue, info->wait_count,
info->command_buffer_count,
@@ -626,10 +838,7 @@ vk_queue_submit(struct vk_queue *queue,
info->image_bind_count,
sparse_memory_bind_entry_count,
sparse_memory_image_bind_entry_count,
- info->signal_count +
- signal_mem_sync + (info->fence != NULL),
- &sparse_memory_bind_entries,
- &sparse_memory_image_bind_entries);
+ signal_count);
if (unlikely(submit == NULL))
return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -642,185 +851,41 @@ vk_queue_submit(struct vk_queue *queue,
vk_find_struct_const(info->pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
submit->perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
- for (uint32_t i = 0; i < info->wait_count; i++) {
- VK_FROM_HANDLE(vk_semaphore, semaphore,
- info->waits[i].semaphore);
-
- /* From the Vulkan 1.2.194 spec:
- *
- * "Applications can import a semaphore payload into an existing
- * semaphore using an external semaphore handle. The effects of the
- * import operation will be either temporary or permanent, as
- * specified by the application. If the import is temporary, the
- * implementation must restore the semaphore to its prior permanent
- * state after submitting the next semaphore wait operation."
- *
- * and
- *
- * VUID-VkImportSemaphoreFdInfoKHR-flags-03323
- *
- * "If flags contains VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, the
- * VkSemaphoreTypeCreateInfo::semaphoreType field of the semaphore
- * from which handle or name was exported must not be
- * VK_SEMAPHORE_TYPE_TIMELINE"
- */
- struct vk_sync *sync;
- if (semaphore->temporary) {
- assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
- sync = submit->_wait_temps[i] = semaphore->temporary;
- semaphore->temporary = NULL;
- } else {
- if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) {
- if (vk_device_supports_threaded_submit(device))
- assert(semaphore->permanent.type->move);
- submit->_has_binary_permanent_semaphore_wait = true;
- }
-
- sync = &semaphore->permanent;
- }
-
- uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ?
- info->waits[i].value : 0;
-
- submit->waits[i] = (struct vk_sync_wait) {
- .sync = sync,
- .stage_mask = info->waits[i].stageMask,
- .wait_value = wait_value,
- };
- }
+ for (uint32_t i = 0; i < info->wait_count; i++)
+ vk_queue_submit_add_semaphore_wait(queue, submit, &info->waits[i]);
for (uint32_t i = 0; i < info->command_buffer_count; i++) {
- VK_FROM_HANDLE(vk_command_buffer, cmd_buffer,
- info->command_buffers[i].commandBuffer);
- assert(info->command_buffers[i].deviceMask == 0 ||
- info->command_buffers[i].deviceMask == 1);
- assert(cmd_buffer->pool->queue_family_index == queue->queue_family_index);
-
- /* Some drivers don't call vk_command_buffer_begin/end() yet and, for
- * those, we'll see initial layout. However, this is enough to catch
- * command buffers which get submitted without calling EndCommandBuffer.
- */
- assert(cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_INITIAL ||
- cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE ||
- cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_PENDING);
- cmd_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_PENDING;
-
- submit->command_buffers[i] = cmd_buffer;
+ vk_queue_submit_add_command_buffer(queue, submit,
+ &info->command_buffers[i]);
}
- sparse_memory_bind_entry_count = 0;
- sparse_memory_image_bind_entry_count = 0;
-
- if (info->buffer_binds)
- typed_memcpy(submit->buffer_binds, info->buffer_binds, info->buffer_bind_count);
-
- for (uint32_t i = 0; i < info->buffer_bind_count; ++i) {
- VkSparseMemoryBind *binds = sparse_memory_bind_entries +
- sparse_memory_bind_entry_count;
- submit->buffer_binds[i].pBinds = binds;
- typed_memcpy(binds, info->buffer_binds[i].pBinds,
- info->buffer_binds[i].bindCount);
-
- sparse_memory_bind_entry_count += info->buffer_binds[i].bindCount;
- }
-
- if (info->image_opaque_binds)
- typed_memcpy(submit->image_opaque_binds, info->image_opaque_binds,
- info->image_opaque_bind_count);
+ for (uint32_t i = 0; i < info->buffer_bind_count; ++i)
+ vk_queue_submit_add_buffer_bind(queue, submit, &info->buffer_binds[i]);
for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i) {
- VkSparseMemoryBind *binds = sparse_memory_bind_entries +
- sparse_memory_bind_entry_count;
- submit->image_opaque_binds[i].pBinds = binds;
- typed_memcpy(binds, info->image_opaque_binds[i].pBinds,
- info->image_opaque_binds[i].bindCount);
-
- sparse_memory_bind_entry_count += info->image_opaque_binds[i].bindCount;
+ vk_queue_submit_add_image_opaque_bind(queue, submit,
+ &info->image_opaque_binds[i]);
}
- if (info->image_binds)
- typed_memcpy(submit->image_binds, info->image_binds, info->image_bind_count);
-
- for (uint32_t i = 0; i < info->image_bind_count; ++i) {
- VkSparseImageMemoryBind *binds = sparse_memory_image_bind_entries +
- sparse_memory_image_bind_entry_count;
- submit->image_binds[i].pBinds = binds;
- typed_memcpy(binds, info->image_binds[i].pBinds,
- info->image_binds[i].bindCount);
-
- sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount;
- }
+ for (uint32_t i = 0; i < info->image_bind_count; ++i)
+ vk_queue_submit_add_image_bind(queue, submit, &info->image_binds[i]);
for (uint32_t i = 0; i < info->signal_count; i++) {
- VK_FROM_HANDLE(vk_semaphore, semaphore,
- info->signals[i].semaphore);
-
- struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore);
- uint64_t signal_value = info->signals[i].value;
- if (semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) {
- if (signal_value == 0) {
- result = vk_queue_set_lost(queue,
- "Tried to signal a timeline with value 0");
- goto fail;
- }
- } else {
- signal_value = 0;
- }
-
- /* For emulated timelines, we need to associate a binary vk_sync with
- * each time point and pass the binary vk_sync to the driver. We could
- * do this in vk_queue_submit_final but it might require doing memory
- * allocation and we don't want to to add extra failure paths there.
- * Instead, allocate and replace the driver-visible vk_sync now and
- * we'll insert it into the timeline in vk_queue_submit_final. The
- * insert step is guaranteed to not fail.
- */
- struct vk_sync_timeline *timeline = vk_sync_as_timeline(sync);
- if (timeline) {
- assert(queue->base.device->timeline_mode ==
- VK_DEVICE_TIMELINE_MODE_EMULATED);
- result = vk_sync_timeline_alloc_point(queue->base.device, timeline,
- signal_value,
- &submit->_signal_points[i]);
- if (unlikely(result != VK_SUCCESS))
- goto fail;
-
- sync = &submit->_signal_points[i]->sync;
- signal_value = 0;
- }
-
- submit->signals[i] = (struct vk_sync_signal) {
- .sync = sync,
- .stage_mask = info->signals[i].stageMask,
- .signal_value = signal_value,
- };
+ result = vk_queue_submit_add_semaphore_signal(queue, submit,
+ &info->signals[i]);
+ if (unlikely(result != VK_SUCCESS))
+ goto fail;
}
- uint32_t signal_count = info->signal_count;
if (signal_mem_sync) {
- struct vk_sync *mem_sync;
- result = queue->base.device->create_sync_for_memory(queue->base.device,
- mem_signal->memory,
- true, &mem_sync);
+ result = vk_queue_submit_add_mem_signal(queue, submit,
+ mem_signal->memory);
if (unlikely(result != VK_SUCCESS))
goto fail;
-
- submit->_mem_signal_temp = mem_sync;
-
- assert(submit->signals[signal_count].sync == NULL);
- submit->signals[signal_count++] = (struct vk_sync_signal) {
- .sync = mem_sync,
- .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- };
}
- if (info->fence != NULL) {
- assert(submit->signals[signal_count].sync == NULL);
- submit->signals[signal_count++] = (struct vk_sync_signal) {
- .sync = vk_fence_get_active_sync(info->fence),
- .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- };
- }
+ if (info->fence != NULL)
+ vk_queue_submit_add_fence_signal(queue, submit, info->fence);
assert(signal_count == submit->signal_count);
@@ -1074,15 +1139,11 @@ vk_queue_signal_sync(struct vk_queue *queue,
uint32_t signal_value)
{
struct vk_queue_submit *submit = vk_queue_submit_alloc(queue, 0, 0, 0, 0, 0,
- 0, 0, 1, NULL, NULL);
+ 0, 0, 1);
if (unlikely(submit == NULL))
return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
- submit->signals[0] = (struct vk_sync_signal) {
- .sync = sync,
- .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- .signal_value = signal_value,
- };
+ vk_queue_submit_add_sync_signal(queue, submit, sync, signal_value);
VkResult result;
switch (queue->submit.mode) {
diff --git a/src/vulkan/runtime/vk_queue.h b/src/vulkan/runtime/vk_queue.h
index 7d96a4c559d35..f6efd19c5a464 100644
--- a/src/vulkan/runtime/vk_queue.h
+++ b/src/vulkan/runtime/vk_queue.h
@@ -237,6 +237,11 @@ struct vk_queue_submit {
uint32_t perf_pass_index;
/* Used internally; should be ignored by drivers */
+ uint32_t _bind_entry_count;
+ uint32_t _image_bind_entry_count;
+ VkSparseMemoryBind *_bind_entries;
+ VkSparseImageMemoryBind *_image_bind_entries;
+
bool _has_binary_permanent_semaphore_wait;
struct vk_sync **_wait_temps;
struct vk_sync *_mem_signal_temp;
--
GitLab
From a78e07d371e82fe2f2b14bfad596eb59bc70c0a1 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Thu, 5 Oct 2023 18:59:35 -0500
Subject: [PATCH 6/7] vulkan/queue: Split vk_queue_submit into create and
submit
---
src/vulkan/runtime/vk_queue.c | 34 +++++++++++++++++++++++++++++-----
1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
index 229c68a1abd83..14423601baf32 100644
--- a/src/vulkan/runtime/vk_queue.c
+++ b/src/vulkan/runtime/vk_queue.c
@@ -803,10 +803,10 @@ struct vulkan_submit_info {
};
static VkResult
-vk_queue_submit(struct vk_queue *queue,
- const struct vulkan_submit_info *info)
+vk_queue_submit_create(struct vk_queue *queue,
+ const struct vulkan_submit_info *info,
+ struct vk_queue_submit **submit_out)
{
- struct vk_device *device = queue->base.device;
VkResult result;
uint32_t sparse_memory_bind_entry_count = 0;
uint32_t sparse_memory_image_bind_entry_count = 0;
@@ -889,6 +889,22 @@ vk_queue_submit(struct vk_queue *queue,
assert(signal_count == submit->signal_count);
+ *submit_out = submit;
+
+ return VK_SUCCESS;
+
+fail:
+ vk_queue_submit_destroy(queue, submit);
+ return result;
+}
+
+static VkResult
+vk_queue_submit(struct vk_queue *queue,
+ struct vk_queue_submit *submit)
+{
+ struct vk_device *device = queue->base.device;
+ VkResult result;
+
/* If this device supports threaded submit, we can't rely on the client
* ordering requirements to ensure submits happen in the right order. Even
* if this queue doesn't have a submit thread, another queue (possibly in a
@@ -1231,7 +1247,12 @@ vk_common_QueueSubmit2(VkQueue _queue,
.signals = pSubmits[i].pSignalSemaphoreInfos,
.fence = i == submitCount - 1 ? fence : NULL
};
- VkResult result = vk_queue_submit(queue, &info);
+ struct vk_queue_submit *submit;
+ VkResult result = vk_queue_submit_create(queue, &info, &submit);
+ if (unlikely(result != VK_SUCCESS))
+ return result;
+
+ result = vk_queue_submit(queue, submit);
if (unlikely(result != VK_SUCCESS))
return result;
}
@@ -1333,7 +1354,10 @@ vk_common_QueueBindSparse(VkQueue _queue,
.image_binds = pBindInfo[i].pImageBinds,
.fence = i == bindInfoCount - 1 ? fence : NULL
};
- VkResult result = vk_queue_submit(queue, &info);
+ struct vk_queue_submit *submit;
+ VkResult result = vk_queue_submit_create(queue, &info, &submit);
+ if (likely(result == VK_SUCCESS))
+ result = vk_queue_submit(queue, submit);
STACK_ARRAY_FINISH(wait_semaphore_infos);
STACK_ARRAY_FINISH(signal_semaphore_infos);
--
GitLab
From cad7e59ecd7fa0ebdda08ae6cb8e9a2806927837 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Thu, 5 Oct 2023 19:04:10 -0500
Subject: [PATCH 7/7] vulkan/queue: Merge submits when possible
---
src/vulkan/runtime/vk_queue.c | 182 +++++++++++++++++++++++++++++++++-
1 file changed, 178 insertions(+), 4 deletions(-)
diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
index 14423601baf32..f1d0332f0624f 100644
--- a/src/vulkan/runtime/vk_queue.c
+++ b/src/vulkan/runtime/vk_queue.c
@@ -463,6 +463,135 @@ vk_queue_submit_add_image_bind(
submit->image_binds[submit->image_bind_count++] = info_tmp;
}
+/* Attempts to merge two submits into one. If the merge succeeds, the merged
+ * submit is return and the two submits passed in are destroyed.
+ */
+static struct vk_queue_submit *
+vk_queue_submits_merge(struct vk_queue *queue,
+ struct vk_queue_submit *first,
+ struct vk_queue_submit *second)
+{
+ /* Don't merge if there are signals in between: see 'Signal operation order' */
+ if (first->signal_count > 0 &&
+ (second->command_buffer_count ||
+ second->buffer_bind_count ||
+ second->image_opaque_bind_count ||
+ second->image_bind_count ||
+ second->wait_count))
+ return NULL;
+
+ if (vk_queue_submit_has_bind(first) != vk_queue_submit_has_bind(second))
+ return NULL;
+
+ if (first->_mem_signal_temp)
+ return NULL;
+
+ if (first->perf_pass_index != second->perf_pass_index)
+ return NULL;
+
+ /* noop submits can always do a no-op merge */
+ if (!second->command_buffer_count &&
+ !second->buffer_bind_count &&
+ !second->image_opaque_bind_count &&
+ !second->image_bind_count &&
+ !second->wait_count &&
+ !second->signal_count) {
+ vk_queue_submit_destroy(queue, second);
+ return first;
+ }
+ if (!first->command_buffer_count &&
+ !first->buffer_bind_count &&
+ !first->image_opaque_bind_count &&
+ !first->image_bind_count &&
+ !first->wait_count &&
+ !first->signal_count) {
+ vk_queue_submit_destroy(queue, first);
+ return second;
+ }
+
+ struct vk_queue_submit *merged = vk_queue_submit_alloc(queue,
+ first->wait_count + second->wait_count,
+ first->command_buffer_count + second->command_buffer_count,
+ first->buffer_bind_count + second->buffer_bind_count,
+ first->image_opaque_bind_count + second->image_opaque_bind_count,
+ first->image_bind_count + second->image_bind_count,
+ first->_bind_entry_count + second->_bind_entry_count,
+ first->_image_bind_entry_count + second->_image_bind_entry_count,
+ first->signal_count + second->signal_count);
+ if (merged == NULL)
+ return NULL;
+
+ merged->wait_count = first->wait_count + second->wait_count;
+ typed_memcpy(merged->waits, first->waits, first->wait_count);
+ typed_memcpy(&merged->waits[first->wait_count], second->waits, second->wait_count);
+
+ merged->command_buffer_count = first->command_buffer_count +
+ second->command_buffer_count;
+ typed_memcpy(merged->command_buffers,
+ first->command_buffers, first->command_buffer_count);
+ typed_memcpy(&merged->command_buffers[first->command_buffer_count],
+ second->command_buffers, second->command_buffer_count);
+
+ merged->signal_count = first->signal_count + second->signal_count;
+ typed_memcpy(merged->signals, first->signals, first->signal_count);
+ typed_memcpy(&merged->signals[first->signal_count], second->signals, second->signal_count);
+
+ for (uint32_t i = 0; i < first->buffer_bind_count; i++)
+ vk_queue_submit_add_buffer_bind(queue, first, &first->buffer_binds[i]);
+ for (uint32_t i = 0; i < second->buffer_bind_count; i++)
+ vk_queue_submit_add_buffer_bind(queue, second, &first->buffer_binds[i]);
+
+ for (uint32_t i = 0; i < first->image_opaque_bind_count; i++) {
+ vk_queue_submit_add_image_opaque_bind(queue, first,
+ &first->image_opaque_binds[i]);
+ }
+ for (uint32_t i = 0; i < second->image_opaque_bind_count; i++) {
+ vk_queue_submit_add_image_opaque_bind(queue, second,
+ &first->image_opaque_binds[i]);
+ }
+
+ for (uint32_t i = 0; i < first->image_bind_count; i++)
+ vk_queue_submit_add_image_bind(queue, first, &first->image_binds[i]);
+ for (uint32_t i = 0; i < second->image_bind_count; i++)
+ vk_queue_submit_add_image_bind(queue, second, &first->image_binds[i]);
+
+ merged->perf_pass_index = first->perf_pass_index;
+ assert(second->perf_pass_index == merged->perf_pass_index);
+
+ assert(merged->_bind_entry_count ==
+ first->_bind_entry_count + second->_bind_entry_count);
+ assert(merged->_image_bind_entry_count ==
+ first->_image_bind_entry_count + second->_image_bind_entry_count);
+
+ merged->_has_binary_permanent_semaphore_wait =
+ first->_has_binary_permanent_semaphore_wait;
+
+ typed_memcpy(merged->_wait_temps, first->_wait_temps, first->wait_count);
+ typed_memcpy(&merged->_wait_temps[first->wait_count], second->_wait_temps, second->wait_count);
+
+ assert(first->_mem_signal_temp == NULL);
+ merged->_mem_signal_temp = second->_mem_signal_temp;
+
+ if (queue->base.device->timeline_mode == VK_DEVICE_TIMELINE_MODE_EMULATED) {
+ typed_memcpy(merged->_wait_points,
+ first->_wait_points, first->wait_count);
+ typed_memcpy(&merged->_wait_points[first->wait_count],
+ second->_wait_points, second->wait_count);
+
+ typed_memcpy(merged->_signal_points,
+ first->_signal_points, first->signal_count);
+ typed_memcpy(&merged->_signal_points[first->signal_count],
+ second->_signal_points, second->signal_count);
+ } else {
+ assert(first->_wait_points == NULL && second->_wait_points == NULL);
+ assert(first->_signal_points == NULL && second->_signal_points == NULL);
+ }
+ vk_queue_submit_free(queue, first);
+ vk_queue_submit_free(queue, second);
+
+ return merged;
+}
+
static void
vk_queue_push_submit(struct vk_queue *queue,
struct vk_queue_submit *submit)
@@ -1084,6 +1213,35 @@ fail:
return result;
}
+static VkResult
+vk_queue_merge_submit(struct vk_queue *queue,
+ struct vk_queue_submit **last_submit,
+ struct vk_queue_submit *submit)
+{
+ if (*last_submit == NULL) {
+ *last_submit = submit;
+ return VK_SUCCESS;
+ }
+
+ struct vk_queue_submit *merged =
+ vk_queue_submits_merge(queue, *last_submit, submit);
+ if (merged != NULL) {
+ *last_submit = merged;
+ return VK_SUCCESS;
+ }
+
+ VkResult result = vk_queue_submit(queue, *last_submit);
+ *last_submit = NULL;
+
+ if (likely(result == VK_SUCCESS)) {
+ *last_submit = submit;
+ } else {
+ vk_queue_submit_destroy(queue, submit);
+ }
+
+ return result;
+}
+
VkResult
vk_queue_wait_before_present(struct vk_queue *queue,
const VkPresentInfoKHR *pPresentInfo)
@@ -1224,6 +1382,7 @@ vk_common_QueueSubmit2(VkQueue _queue,
{
VK_FROM_HANDLE(vk_queue, queue, _queue);
VK_FROM_HANDLE(vk_fence, fence, _fence);
+ VkResult result;
if (vk_device_is_lost(queue->base.device))
return VK_ERROR_DEVICE_LOST;
@@ -1236,6 +1395,7 @@ vk_common_QueueSubmit2(VkQueue _queue,
}
}
+ struct vk_queue_submit *last_submit = NULL;
for (uint32_t i = 0; i < submitCount; i++) {
struct vulkan_submit_info info = {
.pNext = pSubmits[i].pNext,
@@ -1248,11 +1408,17 @@ vk_common_QueueSubmit2(VkQueue _queue,
.fence = i == submitCount - 1 ? fence : NULL
};
struct vk_queue_submit *submit;
- VkResult result = vk_queue_submit_create(queue, &info, &submit);
+ result = vk_queue_submit_create(queue, &info, &submit);
+ if (unlikely(result != VK_SUCCESS))
+ return result;
+
+ result = vk_queue_merge_submit(queue, &last_submit, submit);
if (unlikely(result != VK_SUCCESS))
return result;
+ }
- result = vk_queue_submit(queue, submit);
+ if (last_submit != NULL) {
+ result = vk_queue_submit(queue, last_submit);
if (unlikely(result != VK_SUCCESS))
return result;
}
@@ -1268,6 +1434,7 @@ vk_common_QueueBindSparse(VkQueue _queue,
{
VK_FROM_HANDLE(vk_queue, queue, _queue);
VK_FROM_HANDLE(vk_fence, fence, _fence);
+ VkResult result;
if (vk_device_is_lost(queue->base.device))
return VK_ERROR_DEVICE_LOST;
@@ -1280,6 +1447,7 @@ vk_common_QueueBindSparse(VkQueue _queue,
}
}
+ struct vk_queue_submit *last_submit = NULL;
for (uint32_t i = 0; i < bindInfoCount; i++) {
const VkTimelineSemaphoreSubmitInfo *timeline_info =
vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
@@ -1355,9 +1523,9 @@ vk_common_QueueBindSparse(VkQueue _queue,
.fence = i == bindInfoCount - 1 ? fence : NULL
};
struct vk_queue_submit *submit;
- VkResult result = vk_queue_submit_create(queue, &info, &submit);
+ result = vk_queue_submit_create(queue, &info, &submit);
if (likely(result == VK_SUCCESS))
- result = vk_queue_submit(queue, submit);
+ result = vk_queue_merge_submit(queue, &last_submit, submit);
STACK_ARRAY_FINISH(wait_semaphore_infos);
STACK_ARRAY_FINISH(signal_semaphore_infos);
@@ -1366,6 +1534,12 @@ vk_common_QueueBindSparse(VkQueue _queue,
return result;
}
+ if (last_submit != NULL) {
+ result = vk_queue_submit(queue, last_submit);
+ if (unlikely(result != VK_SUCCESS))
+ return result;
+ }
+
return VK_SUCCESS;
}
--
GitLab