diff --git a/spec_files/mesa/0001-broadcom-compiler-needs_quad_helper_invocation-enabl.patch b/spec_files/mesa/0001-broadcom-compiler-needs_quad_helper_invocation-enabl.patch
new file mode 100644
index 00000000..a2971a09
--- /dev/null
+++ b/spec_files/mesa/0001-broadcom-compiler-needs_quad_helper_invocation-enabl.patch
@@ -0,0 +1,41 @@
+From 97f5721bfc4bbbce5c3a39cf48eeb6ad1fb9cf97 Mon Sep 17 00:00:00 2001
+From: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
+Date: Mon, 15 Apr 2024 12:22:31 +0200
+Subject: [PATCH] broadcom/compiler: needs_quad_helper_invocation enable
+ PER_QUAD TMU access
+
+We take advantage of the needs_quad_helper_invocation information to
+only enable the PER_QUAD TMU access on Fragment Shaders when it is needed.
+
+PER_QUAD access is also disabled on stages different to fragment shader.
+Being enabled was causing MMU errors when TMU was doing indexed by vertexid
+reads on disabled lanes on vertex stage. This problem was exercised by some
+shaders from the GTK new GSK_RENDERER=ngl that were accessing a constant buffer
+offset[6], but having PER_QUAD enabled on the TMU access by VertexID was
+doing hidden incorrect access to not existing vertex 6 and 7 as TMU was
+accessing the full quad.
+
+cc: mesa-stable
+
+Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28740>
+---
+ src/broadcom/compiler/nir_to_vir.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
+index ff98e4b5961..0303ca96103 100644
+--- a/src/broadcom/compiler/nir_to_vir.c
++++ b/src/broadcom/compiler/nir_to_vir.c
+@@ -656,6 +656,8 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
+                          */
+                         uint32_t perquad =
+                                 is_load && !vir_in_nonuniform_control_flow(c) &&
++                                c->s->info.stage == MESA_SHADER_FRAGMENT &&
++                                c->s->info.fs.needs_quad_helper_invocations &&
+                                 !c->emitted_discard ?
+                                 GENERAL_TMU_LOOKUP_PER_QUAD :
+                                 GENERAL_TMU_LOOKUP_PER_PIXEL;
+-- 
+2.44.0
+
diff --git a/spec_files/mesa/25352.patch b/spec_files/mesa/25352.patch
new file mode 100644
index 00000000..373f455e
--- /dev/null
+++ b/spec_files/mesa/25352.patch
@@ -0,0 +1,1666 @@
+From c63ec27abdf33335c05fd8b536a94efdb648099b Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 09:32:08 -0400
+Subject: [PATCH 01/17] vk/queue: move VkPerformanceQuerySubmitInfoKHR handling
+ up
+
+this is only supported for actual submits
+---
+ src/vulkan/runtime/vk_queue.c | 26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 9e38299ed05fe..d95b694ef8b50 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -594,7 +594,8 @@ struct vulkan_submit_info {
+ 
+ static VkResult
+ vk_queue_submit(struct vk_queue *queue,
+-                const struct vulkan_submit_info *info)
++                const struct vulkan_submit_info *info,
++                uint32_t perf_pass_index)
+ {
+    struct vk_device *device = queue->base.device;
+    VkResult result;
+@@ -633,14 +634,7 @@ vk_queue_submit(struct vk_queue *queue,
+    if (unlikely(submit == NULL))
+       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-   /* From the Vulkan 1.2.194 spec:
+-    *
+-    *    "If the VkSubmitInfo::pNext chain does not include this structure,
+-    *    the batch defaults to use counter pass index 0."
+-    */
+-   const VkPerformanceQuerySubmitInfoKHR *perf_info =
+-      vk_find_struct_const(info->pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+-   submit->perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
++   submit->perf_pass_index = perf_pass_index;
+ 
+    bool has_binary_permanent_semaphore_wait = false;
+    for (uint32_t i = 0; i < info->wait_count; i++) {
+@@ -1172,7 +1166,17 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          .signals = pSubmits[i].pSignalSemaphoreInfos,
+          .fence = i == submitCount - 1 ? fence : NULL
+       };
+-      VkResult result = vk_queue_submit(queue, &info);
++
++      /* From the Vulkan 1.2.194 spec:
++      *
++      *    "If the VkSubmitInfo::pNext chain does not include this structure,
++      *    the batch defaults to use counter pass index 0."
++      */
++      const VkPerformanceQuerySubmitInfoKHR *perf_info =
++         vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
++      uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
++
++      VkResult result = vk_queue_submit(queue, &info, perf_pass_index);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+    }
+@@ -1274,7 +1278,7 @@ vk_common_QueueBindSparse(VkQueue _queue,
+          .image_binds = pBindInfo[i].pImageBinds,
+          .fence = i == bindInfoCount - 1 ? fence : NULL
+       };
+-      VkResult result = vk_queue_submit(queue, &info);
++      VkResult result = vk_queue_submit(queue, &info, 0);
+ 
+       STACK_ARRAY_FINISH(wait_semaphore_infos);
+       STACK_ARRAY_FINISH(signal_semaphore_infos);
+-- 
+GitLab
+
+
+From d1936c0f4b72fbfb6f904e52cbd95c0ecb103aeb Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 09:46:41 -0400
+Subject: [PATCH 02/17] vk/queue: move WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA
+ handling up
+
+this (should) only come from mesa wsi, which does a single queue submit
+per present, so handling it up front is both more logically consistent
+and more performant
+---
+ src/vulkan/runtime/vk_queue.c | 43 +++++++++++++++++++----------------
+ 1 file changed, 24 insertions(+), 19 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index d95b694ef8b50..8758b94d4a98d 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -595,7 +595,8 @@ struct vulkan_submit_info {
+ static VkResult
+ vk_queue_submit(struct vk_queue *queue,
+                 const struct vulkan_submit_info *info,
+-                uint32_t perf_pass_index)
++                uint32_t perf_pass_index,
++                struct vk_sync *mem_sync)
+ {
+    struct vk_device *device = queue->base.device;
+    VkResult result;
+@@ -613,12 +614,6 @@ vk_queue_submit(struct vk_queue *queue,
+    for (uint32_t i = 0; i < info->image_bind_count; ++i)
+       sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount;
+ 
+-   const struct wsi_memory_signal_submit_info *mem_signal =
+-      vk_find_struct_const(info->pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
+-   bool signal_mem_sync = mem_signal != NULL &&
+-                          mem_signal->memory != VK_NULL_HANDLE &&
+-                          queue->base.device->create_sync_for_memory != NULL;
+-
+    struct vk_queue_submit *submit =
+       vk_queue_submit_alloc(queue, info->wait_count,
+                             info->command_buffer_count,
+@@ -628,7 +623,7 @@ vk_queue_submit(struct vk_queue *queue,
+                             sparse_memory_bind_entry_count,
+                             sparse_memory_image_bind_entry_count,
+                             info->signal_count +
+-                            signal_mem_sync + (info->fence != NULL),
++                            (mem_sync != NULL) + (info->fence != NULL),
+                             &sparse_memory_bind_entries,
+                             &sparse_memory_image_bind_entries);
+    if (unlikely(submit == NULL))
+@@ -792,14 +787,7 @@ vk_queue_submit(struct vk_queue *queue,
+    }
+ 
+    uint32_t signal_count = info->signal_count;
+-   if (signal_mem_sync) {
+-      struct vk_sync *mem_sync;
+-      result = queue->base.device->create_sync_for_memory(queue->base.device,
+-                                                          mem_signal->memory,
+-                                                          true, &mem_sync);
+-      if (unlikely(result != VK_SUCCESS))
+-         goto fail;
+-
++   if (mem_sync) {
+       submit->_mem_signal_temp = mem_sync;
+ 
+       assert(submit->signals[signal_count].sync == NULL);
+@@ -976,7 +964,7 @@ vk_queue_submit(struct vk_queue *queue,
+ 
+       vk_queue_push_submit(queue, submit);
+ 
+-      if (signal_mem_sync) {
++      if (mem_sync) {
+          /* If we're signaling a memory object, we have to ensure that
+           * vkQueueSubmit does not return until the kernel submission has
+           * happened.  Otherwise, we may get a race between this process
+@@ -1155,6 +1143,23 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+       }
+    }
+ 
++   /* WSI signal info comes from WSI, which does 1 submit */
++   struct vk_sync *mem_sync = NULL;
++   if (submitCount == 1) {
++      const struct wsi_memory_signal_submit_info *mem_signal =
++         vk_find_struct_const(pSubmits->pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
++      bool signal_mem_sync = mem_signal != NULL &&
++                             mem_signal->memory != VK_NULL_HANDLE &&
++                             queue->base.device->create_sync_for_memory != NULL;
++      if (signal_mem_sync) {
++         VkResult result = queue->base.device->create_sync_for_memory(queue->base.device,
++                                                                      mem_signal->memory,
++                                                                      true, &mem_sync);
++         if (unlikely(result != VK_SUCCESS))
++            return result;
++      }
++   }
++
+    for (uint32_t i = 0; i < submitCount; i++) {
+       struct vulkan_submit_info info = {
+          .pNext = pSubmits[i].pNext,
+@@ -1176,7 +1181,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+       uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
+ 
+-      VkResult result = vk_queue_submit(queue, &info, perf_pass_index);
++      VkResult result = vk_queue_submit(queue, &info, perf_pass_index, mem_sync);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+    }
+@@ -1278,7 +1283,7 @@ vk_common_QueueBindSparse(VkQueue _queue,
+          .image_binds = pBindInfo[i].pImageBinds,
+          .fence = i == bindInfoCount - 1 ? fence : NULL
+       };
+-      VkResult result = vk_queue_submit(queue, &info, 0);
++      VkResult result = vk_queue_submit(queue, &info, 0, NULL);
+ 
+       STACK_ARRAY_FINISH(wait_semaphore_infos);
+       STACK_ARRAY_FINISH(signal_semaphore_infos);
+-- 
+GitLab
+
+
+From 78130ed25e0ddf5d44a87cce38afa4e1e0501c35 Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 10:33:23 -0400
+Subject: [PATCH 03/17] vk/queue: precalc sparse bind counts before calling
+ submit
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 33 +++++++++++++++++++--------------
+ 1 file changed, 19 insertions(+), 14 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 8758b94d4a98d..45a2de1dcaf38 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -596,24 +596,15 @@ static VkResult
+ vk_queue_submit(struct vk_queue *queue,
+                 const struct vulkan_submit_info *info,
+                 uint32_t perf_pass_index,
+-                struct vk_sync *mem_sync)
++                struct vk_sync *mem_sync,
++                uint32_t sparse_memory_bind_entry_count,
++                uint32_t sparse_memory_image_bind_entry_count)
+ {
+    struct vk_device *device = queue->base.device;
+    VkResult result;
+-   uint32_t sparse_memory_bind_entry_count = 0;
+-   uint32_t sparse_memory_image_bind_entry_count = 0;
+    VkSparseMemoryBind *sparse_memory_bind_entries = NULL;
+    VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL;
+ 
+-   for (uint32_t i = 0; i < info->buffer_bind_count; ++i)
+-      sparse_memory_bind_entry_count += info->buffer_binds[i].bindCount;
+-
+-   for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i)
+-      sparse_memory_bind_entry_count += info->image_opaque_binds[i].bindCount;
+-
+-   for (uint32_t i = 0; i < info->image_bind_count; ++i)
+-      sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount;
+-
+    struct vk_queue_submit *submit =
+       vk_queue_submit_alloc(queue, info->wait_count,
+                             info->command_buffer_count,
+@@ -1181,7 +1172,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+       uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
+ 
+-      VkResult result = vk_queue_submit(queue, &info, perf_pass_index, mem_sync);
++      VkResult result = vk_queue_submit(queue, &info, perf_pass_index, mem_sync, 0, 0);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+    }
+@@ -1283,7 +1274,21 @@ vk_common_QueueBindSparse(VkQueue _queue,
+          .image_binds = pBindInfo[i].pImageBinds,
+          .fence = i == bindInfoCount - 1 ? fence : NULL
+       };
+-      VkResult result = vk_queue_submit(queue, &info, 0, NULL);
++      uint32_t sparse_memory_bind_entry_count = 0;
++      uint32_t sparse_memory_image_bind_entry_count = 0;
++
++      for (uint32_t i = 0; i < info.buffer_bind_count; ++i)
++         sparse_memory_bind_entry_count += info.buffer_binds[i].bindCount;
++
++      for (uint32_t i = 0; i < info.image_opaque_bind_count; ++i)
++         sparse_memory_bind_entry_count += info.image_opaque_binds[i].bindCount;
++
++      for (uint32_t i = 0; i < info.image_bind_count; ++i)
++         sparse_memory_image_bind_entry_count += info.image_binds[i].bindCount;
++
++      VkResult result = vk_queue_submit(queue, &info, 0, NULL,
++                                        sparse_memory_bind_entry_count,
++                                        sparse_memory_image_bind_entry_count);
+ 
+       STACK_ARRAY_FINISH(wait_semaphore_infos);
+       STACK_ARRAY_FINISH(signal_semaphore_infos);
+-- 
+GitLab
+
+
+From 005352375cc7e6ef7539c57df56cb537fd13d320 Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 10:38:13 -0400
+Subject: [PATCH 04/17] vk/queue: move vk_queue_submit allocation up
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 68 +++++++++++++++++++++--------------
+ 1 file changed, 42 insertions(+), 26 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 45a2de1dcaf38..02b98e83ca032 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -595,30 +595,16 @@ struct vulkan_submit_info {
+ static VkResult
+ vk_queue_submit(struct vk_queue *queue,
+                 const struct vulkan_submit_info *info,
++                struct vk_queue_submit *submit,
+                 uint32_t perf_pass_index,
+                 struct vk_sync *mem_sync,
+-                uint32_t sparse_memory_bind_entry_count,
+-                uint32_t sparse_memory_image_bind_entry_count)
++                VkSparseMemoryBind *sparse_memory_bind_entries,
++                VkSparseImageMemoryBind *sparse_memory_image_bind_entries)
+ {
+    struct vk_device *device = queue->base.device;
+    VkResult result;
+-   VkSparseMemoryBind *sparse_memory_bind_entries = NULL;
+-   VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL;
+-
+-   struct vk_queue_submit *submit =
+-      vk_queue_submit_alloc(queue, info->wait_count,
+-                            info->command_buffer_count,
+-                            info->buffer_bind_count,
+-                            info->image_opaque_bind_count,
+-                            info->image_bind_count,
+-                            sparse_memory_bind_entry_count,
+-                            sparse_memory_image_bind_entry_count,
+-                            info->signal_count +
+-                            (mem_sync != NULL) + (info->fence != NULL),
+-                            &sparse_memory_bind_entries,
+-                            &sparse_memory_image_bind_entries);
+-   if (unlikely(submit == NULL))
+-      return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
++   uint32_t sparse_memory_bind_entry_count = 0;
++   uint32_t sparse_memory_image_bind_entry_count = 0;
+ 
+    submit->perf_pass_index = perf_pass_index;
+ 
+@@ -689,9 +675,6 @@ vk_queue_submit(struct vk_queue *queue,
+       submit->command_buffers[i] = cmd_buffer;
+    }
+ 
+-   sparse_memory_bind_entry_count = 0;
+-   sparse_memory_image_bind_entry_count = 0;
+-
+    if (info->buffer_binds)
+       typed_memcpy(submit->buffer_binds, info->buffer_binds, info->buffer_bind_count);
+ 
+@@ -1172,7 +1155,22 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+       uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
+ 
+-      VkResult result = vk_queue_submit(queue, &info, perf_pass_index, mem_sync, 0, 0);
++      struct vk_queue_submit *submit =
++         vk_queue_submit_alloc(queue, info.wait_count,
++                               info.command_buffer_count,
++                               info.buffer_bind_count,
++                               info.image_opaque_bind_count,
++                               info.image_bind_count,
++                               0,
++                               0,
++                               info.signal_count +
++                               (mem_sync != NULL) + (info.fence != NULL),
++                               NULL,
++                               NULL);
++      if (unlikely(submit == NULL))
++         return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
++
++      VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, 0, 0);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+    }
+@@ -1286,9 +1284,27 @@ vk_common_QueueBindSparse(VkQueue _queue,
+       for (uint32_t i = 0; i < info.image_bind_count; ++i)
+          sparse_memory_image_bind_entry_count += info.image_binds[i].bindCount;
+ 
+-      VkResult result = vk_queue_submit(queue, &info, 0, NULL,
+-                                        sparse_memory_bind_entry_count,
+-                                        sparse_memory_image_bind_entry_count);
++      VkSparseMemoryBind *sparse_memory_bind_entries = NULL;
++      VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL;
++
++      struct vk_queue_submit *submit =
++         vk_queue_submit_alloc(queue, info.wait_count,
++                               info.command_buffer_count,
++                               info.buffer_bind_count,
++                               info.image_opaque_bind_count,
++                               info.image_bind_count,
++                               sparse_memory_bind_entry_count,
++                               sparse_memory_image_bind_entry_count,
++                               info.signal_count +
++                               (info.fence != NULL),
++                               &sparse_memory_bind_entries,
++                               &sparse_memory_image_bind_entries);
++      if (unlikely(submit == NULL))
++         return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
++
++      VkResult result = vk_queue_submit(queue, &info, submit, 0, NULL,
++                                        sparse_memory_bind_entries,
++                                        sparse_memory_image_bind_entries);
+ 
+       STACK_ARRAY_FINISH(wait_semaphore_infos);
+       STACK_ARRAY_FINISH(signal_semaphore_infos);
+-- 
+GitLab
+
+
+From 390c5a8f7bf383a197d8e9fa8c3b93496dc8bb1e Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 11:09:16 -0400
+Subject: [PATCH 05/17] vk/queue: break out submit wait parsing
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 39 +++++++++++++++++++++--------------
+ 1 file changed, 24 insertions(+), 15 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 02b98e83ca032..7df848c75b5f3 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -592,22 +592,11 @@ struct vulkan_submit_info {
+    struct vk_fence *fence;
+ };
+ 
+-static VkResult
+-vk_queue_submit(struct vk_queue *queue,
+-                const struct vulkan_submit_info *info,
+-                struct vk_queue_submit *submit,
+-                uint32_t perf_pass_index,
+-                struct vk_sync *mem_sync,
+-                VkSparseMemoryBind *sparse_memory_bind_entries,
+-                VkSparseImageMemoryBind *sparse_memory_image_bind_entries)
++static bool
++vk_queue_parse_waits(struct vk_device *device,
++                     const struct vulkan_submit_info *info,
++                     struct vk_queue_submit *submit)
+ {
+-   struct vk_device *device = queue->base.device;
+-   VkResult result;
+-   uint32_t sparse_memory_bind_entry_count = 0;
+-   uint32_t sparse_memory_image_bind_entry_count = 0;
+-
+-   submit->perf_pass_index = perf_pass_index;
+-
+    bool has_binary_permanent_semaphore_wait = false;
+    for (uint32_t i = 0; i < info->wait_count; i++) {
+       VK_FROM_HANDLE(vk_semaphore, semaphore,
+@@ -655,6 +644,26 @@ vk_queue_submit(struct vk_queue *queue,
+          .wait_value = wait_value,
+       };
+    }
++   return has_binary_permanent_semaphore_wait;
++}
++
++static VkResult
++vk_queue_submit(struct vk_queue *queue,
++                const struct vulkan_submit_info *info,
++                struct vk_queue_submit *submit,
++                uint32_t perf_pass_index,
++                struct vk_sync *mem_sync,
++                VkSparseMemoryBind *sparse_memory_bind_entries,
++                VkSparseImageMemoryBind *sparse_memory_image_bind_entries)
++{
++   struct vk_device *device = queue->base.device;
++   VkResult result;
++   uint32_t sparse_memory_bind_entry_count = 0;
++   uint32_t sparse_memory_image_bind_entry_count = 0;
++
++   submit->perf_pass_index = perf_pass_index;
++
++   bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(device, info, submit);
+ 
+    for (uint32_t i = 0; i < info->command_buffer_count; i++) {
+       VK_FROM_HANDLE(vk_command_buffer, cmd_buffer,
+-- 
+GitLab
+
+
+From 443864152ac4977d72d3268ef1b3ade36f32f1ad Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 11:12:45 -0400
+Subject: [PATCH 06/17] vk/queue: break out cmdbuf parsing
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 42 +++++++++++++++++++++--------------
+ 1 file changed, 25 insertions(+), 17 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 7df848c75b5f3..0bd8942dd0586 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -647,24 +647,11 @@ vk_queue_parse_waits(struct vk_device *device,
+    return has_binary_permanent_semaphore_wait;
+ }
+ 
+-static VkResult
+-vk_queue_submit(struct vk_queue *queue,
+-                const struct vulkan_submit_info *info,
+-                struct vk_queue_submit *submit,
+-                uint32_t perf_pass_index,
+-                struct vk_sync *mem_sync,
+-                VkSparseMemoryBind *sparse_memory_bind_entries,
+-                VkSparseImageMemoryBind *sparse_memory_image_bind_entries)
++static void
++vk_queue_parse_cmdbufs(struct vk_queue *queue,
++                       const struct vulkan_submit_info *info,
++                       struct vk_queue_submit *submit)
+ {
+-   struct vk_device *device = queue->base.device;
+-   VkResult result;
+-   uint32_t sparse_memory_bind_entry_count = 0;
+-   uint32_t sparse_memory_image_bind_entry_count = 0;
+-
+-   submit->perf_pass_index = perf_pass_index;
+-
+-   bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(device, info, submit);
+-
+    for (uint32_t i = 0; i < info->command_buffer_count; i++) {
+       VK_FROM_HANDLE(vk_command_buffer, cmd_buffer,
+                      info->command_buffers[i].commandBuffer);
+@@ -683,6 +670,27 @@ vk_queue_submit(struct vk_queue *queue,
+ 
+       submit->command_buffers[i] = cmd_buffer;
+    }
++}
++
++static VkResult
++vk_queue_submit(struct vk_queue *queue,
++                const struct vulkan_submit_info *info,
++                struct vk_queue_submit *submit,
++                uint32_t perf_pass_index,
++                struct vk_sync *mem_sync,
++                VkSparseMemoryBind *sparse_memory_bind_entries,
++                VkSparseImageMemoryBind *sparse_memory_image_bind_entries)
++{
++   struct vk_device *device = queue->base.device;
++   VkResult result;
++   uint32_t sparse_memory_bind_entry_count = 0;
++   uint32_t sparse_memory_image_bind_entry_count = 0;
++
++   submit->perf_pass_index = perf_pass_index;
++
++   bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(device, info, submit);
++
++   vk_queue_parse_cmdbufs(queue, info, submit);
+ 
+    if (info->buffer_binds)
+       typed_memcpy(submit->buffer_binds, info->buffer_binds, info->buffer_bind_count);
+-- 
+GitLab
+
+
+From 11d5e2d0c1493d07ad48f61fcdf5b4093708f005 Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 11:16:32 -0400
+Subject: [PATCH 07/17] vk/queue: move wait parsing up
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 0bd8942dd0586..f5190e1c0aff4 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -678,6 +678,7 @@ vk_queue_submit(struct vk_queue *queue,
+                 struct vk_queue_submit *submit,
+                 uint32_t perf_pass_index,
+                 struct vk_sync *mem_sync,
++                bool has_binary_permanent_semaphore_wait,
+                 VkSparseMemoryBind *sparse_memory_bind_entries,
+                 VkSparseImageMemoryBind *sparse_memory_image_bind_entries)
+ {
+@@ -688,8 +689,6 @@ vk_queue_submit(struct vk_queue *queue,
+ 
+    submit->perf_pass_index = perf_pass_index;
+ 
+-   bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(device, info, submit);
+-
+    vk_queue_parse_cmdbufs(queue, info, submit);
+ 
+    if (info->buffer_binds)
+@@ -1187,7 +1186,9 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+       if (unlikely(submit == NULL))
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-      VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, 0, 0);
++      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit);
++
++      VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+    }
+@@ -1319,7 +1320,10 @@ vk_common_QueueBindSparse(VkQueue _queue,
+       if (unlikely(submit == NULL))
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
++      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit);
++
+       VkResult result = vk_queue_submit(queue, &info, submit, 0, NULL,
++                                        has_binary_permanent_semaphore_wait,
+                                         sparse_memory_bind_entries,
+                                         sparse_memory_image_bind_entries);
+ 
+-- 
+GitLab
+
+
+From 681dd67e6fecf820258fe1872a13f6339e4c6fff Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 11:17:14 -0400
+Subject: [PATCH 08/17] vk/queue: move cmdbuf parsing up
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index f5190e1c0aff4..82c9b3e843696 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -689,8 +689,6 @@ vk_queue_submit(struct vk_queue *queue,
+ 
+    submit->perf_pass_index = perf_pass_index;
+ 
+-   vk_queue_parse_cmdbufs(queue, info, submit);
+-
+    if (info->buffer_binds)
+       typed_memcpy(submit->buffer_binds, info->buffer_binds, info->buffer_bind_count);
+ 
+@@ -1187,6 +1185,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+       bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit);
++      vk_queue_parse_cmdbufs(queue, &info, submit);
+ 
+       VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
+       if (unlikely(result != VK_SUCCESS))
+-- 
+GitLab
+
+
+From 2c3bdfe4effa72c7aadad05875ec3da6231bec50 Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 11:46:59 -0400
+Subject: [PATCH 09/17] vk/queue: break out binary semaphore waiting for
+ threaded queues
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 178 ++++++++++++++++++----------------
+ 1 file changed, 95 insertions(+), 83 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 82c9b3e843696..04974e340db60 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -672,6 +672,98 @@ vk_queue_parse_cmdbufs(struct vk_queue *queue,
+    }
+ }
+ 
++static VkResult
++vk_queue_handle_threaded_waits(struct vk_queue *queue,
++                               const struct vulkan_submit_info *info,
++                               struct vk_queue_submit *submit)
++{
++   assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED);
++   for (uint32_t i = 0; i < info->wait_count; i++) {
++      VK_FROM_HANDLE(vk_semaphore, semaphore,
++                     info->waits[i].semaphore);
++
++      if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY)
++         continue;
++
++      /* From the Vulkan 1.2.194 spec:
++         *
++         *    "When a batch is submitted to a queue via a queue
++         *    submission, and it includes semaphores to be waited on,
++         *    it defines a memory dependency between prior semaphore
++         *    signal operations and the batch, and defines semaphore
++         *    wait operations.
++         *
++         *    Such semaphore wait operations set the semaphores
++         *    created with a VkSemaphoreType of
++         *    VK_SEMAPHORE_TYPE_BINARY to the unsignaled state."
++         *
++         * For threaded submit, we depend on tracking the unsignaled
++         * state of binary semaphores to determine when we can safely
++         * submit.  The VK_SYNC_WAIT_PENDING check above as well as the
++         * one in the sumbit thread depend on all binary semaphores
++         * being reset when they're not in active use from the point
++         * of view of the client's CPU timeline.  This means we need to
++         * reset them inside vkQueueSubmit and cannot wait until the
++         * actual submit which happens later in the thread.
++         *
++         * We've already stolen temporary semaphore payloads above as
++         * part of basic semaphore processing.  We steal permanent
++         * semaphore payloads here by way of vk_sync_move.  For shared
++         * semaphores, this can be a bit expensive (sync file import
++         * and export) but, for non-shared semaphores, it can be made
++         * fairly cheap.  Also, we only do this semaphore swapping in
++         * the case where you have real timelines AND the client is
++         * using timeline semaphores with wait-before-signal (that's
++         * the only way to get a submit thread) AND mixing those with
++         * waits on binary semaphores AND said binary semaphore is
++         * using its permanent payload.  In other words, this code
++         * should basically only ever get executed in CTS tests.
++         */
++      if (submit->_wait_temps[i] != NULL)
++         continue;
++
++      assert(submit->waits[i].sync == &semaphore->permanent);
++
++      /* From the Vulkan 1.2.194 spec:
++         *
++         *    VUID-vkQueueSubmit-pWaitSemaphores-03238
++         *
++         *    "All elements of the pWaitSemaphores member of all
++         *    elements of pSubmits created with a VkSemaphoreType of
++         *    VK_SEMAPHORE_TYPE_BINARY must reference a semaphore
++         *    signal operation that has been submitted for execution
++         *    and any semaphore signal operations on which it depends
++         *    (if any) must have also been submitted for execution."
++         *
++         * Therefore, we can safely do a blocking wait here and it
++         * won't actually block for long.  This ensures that the
++         * vk_sync_move below will succeed.
++         */
++      VkResult result = vk_sync_wait(queue->base.device,
++                                     submit->waits[i].sync, 0,
++                                     VK_SYNC_WAIT_PENDING, UINT64_MAX);
++      if (unlikely(result != VK_SUCCESS))
++         return result;
++
++      result = vk_sync_create(queue->base.device,
++                              semaphore->permanent.type,
++                              0 /* flags */,
++                              0 /* initial value */,
++                              &submit->_wait_temps[i]);
++      if (unlikely(result != VK_SUCCESS))
++         return result;
++
++      result = vk_sync_move(queue->base.device,
++                              submit->_wait_temps[i],
++                              &semaphore->permanent);
++      if (unlikely(result != VK_SUCCESS))
++         return result;
++
++      submit->waits[i].sync = submit->_wait_temps[i];
++   }
++   return VK_SUCCESS;
++}
++
+ static VkResult
+ vk_queue_submit(struct vk_queue *queue,
+                 const struct vulkan_submit_info *info,
+@@ -865,89 +957,9 @@ vk_queue_submit(struct vk_queue *queue,
+ 
+    case VK_QUEUE_SUBMIT_MODE_THREADED:
+       if (has_binary_permanent_semaphore_wait) {
+-         for (uint32_t i = 0; i < info->wait_count; i++) {
+-            VK_FROM_HANDLE(vk_semaphore, semaphore,
+-                           info->waits[i].semaphore);
+-
+-            if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY)
+-               continue;
+-
+-            /* From the Vulkan 1.2.194 spec:
+-             *
+-             *    "When a batch is submitted to a queue via a queue
+-             *    submission, and it includes semaphores to be waited on,
+-             *    it defines a memory dependency between prior semaphore
+-             *    signal operations and the batch, and defines semaphore
+-             *    wait operations.
+-             *
+-             *    Such semaphore wait operations set the semaphores
+-             *    created with a VkSemaphoreType of
+-             *    VK_SEMAPHORE_TYPE_BINARY to the unsignaled state."
+-             *
+-             * For threaded submit, we depend on tracking the unsignaled
+-             * state of binary semaphores to determine when we can safely
+-             * submit.  The VK_SYNC_WAIT_PENDING check above as well as the
+-             * one in the sumbit thread depend on all binary semaphores
+-             * being reset when they're not in active use from the point
+-             * of view of the client's CPU timeline.  This means we need to
+-             * reset them inside vkQueueSubmit and cannot wait until the
+-             * actual submit which happens later in the thread.
+-             *
+-             * We've already stolen temporary semaphore payloads above as
+-             * part of basic semaphore processing.  We steal permanent
+-             * semaphore payloads here by way of vk_sync_move.  For shared
+-             * semaphores, this can be a bit expensive (sync file import
+-             * and export) but, for non-shared semaphores, it can be made
+-             * fairly cheap.  Also, we only do this semaphore swapping in
+-             * the case where you have real timelines AND the client is
+-             * using timeline semaphores with wait-before-signal (that's
+-             * the only way to get a submit thread) AND mixing those with
+-             * waits on binary semaphores AND said binary semaphore is
+-             * using its permanent payload.  In other words, this code
+-             * should basically only ever get executed in CTS tests.
+-             */
+-            if (submit->_wait_temps[i] != NULL)
+-               continue;
+-
+-            assert(submit->waits[i].sync == &semaphore->permanent);
+-
+-            /* From the Vulkan 1.2.194 spec:
+-             *
+-             *    VUID-vkQueueSubmit-pWaitSemaphores-03238
+-             *
+-             *    "All elements of the pWaitSemaphores member of all
+-             *    elements of pSubmits created with a VkSemaphoreType of
+-             *    VK_SEMAPHORE_TYPE_BINARY must reference a semaphore
+-             *    signal operation that has been submitted for execution
+-             *    and any semaphore signal operations on which it depends
+-             *    (if any) must have also been submitted for execution."
+-             *
+-             * Therefore, we can safely do a blocking wait here and it
+-             * won't actually block for long.  This ensures that the
+-             * vk_sync_move below will succeed.
+-             */
+-            result = vk_sync_wait(queue->base.device,
+-                                  submit->waits[i].sync, 0,
+-                                  VK_SYNC_WAIT_PENDING, UINT64_MAX);
+-            if (unlikely(result != VK_SUCCESS))
+-               goto fail;
+-
+-            result = vk_sync_create(queue->base.device,
+-                                    semaphore->permanent.type,
+-                                    0 /* flags */,
+-                                    0 /* initial value */,
+-                                    &submit->_wait_temps[i]);
+-            if (unlikely(result != VK_SUCCESS))
+-               goto fail;
+-
+-            result = vk_sync_move(queue->base.device,
+-                                  submit->_wait_temps[i],
+-                                  &semaphore->permanent);
+-            if (unlikely(result != VK_SUCCESS))
+-               goto fail;
+-
+-            submit->waits[i].sync = submit->_wait_temps[i];
+-         }
++         result = vk_queue_handle_threaded_waits(queue, info, submit);
++         if (unlikely(result != VK_SUCCESS))
++            goto fail;
+       }
+ 
+       vk_queue_push_submit(queue, submit);
+-- 
+GitLab
+
+
+From c79eba5ce9f94c5d7f00a294d78e412169ec961d Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 11:51:23 -0400
+Subject: [PATCH 10/17] vk/queue: pass waits directly to
+ vk_queue_handle_threaded_waits()
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 04974e340db60..c9278a814f85e 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -674,13 +674,14 @@ vk_queue_parse_cmdbufs(struct vk_queue *queue,
+ 
+ static VkResult
+ vk_queue_handle_threaded_waits(struct vk_queue *queue,
+-                               const struct vulkan_submit_info *info,
++                               uint32_t wait_count,
++                               const VkSemaphoreSubmitInfo *waits,
+                                struct vk_queue_submit *submit)
+ {
+    assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED);
+-   for (uint32_t i = 0; i < info->wait_count; i++) {
++   for (uint32_t i = 0; i < wait_count; i++) {
+       VK_FROM_HANDLE(vk_semaphore, semaphore,
+-                     info->waits[i].semaphore);
++                     waits[i].semaphore);
+ 
+       if (semaphore->type != VK_SEMAPHORE_TYPE_BINARY)
+          continue;
+@@ -957,7 +958,7 @@ vk_queue_submit(struct vk_queue *queue,
+ 
+    case VK_QUEUE_SUBMIT_MODE_THREADED:
+       if (has_binary_permanent_semaphore_wait) {
+-         result = vk_queue_handle_threaded_waits(queue, info, submit);
++         result = vk_queue_handle_threaded_waits(queue, info->wait_count, info->waits, submit);
+          if (unlikely(result != VK_SUCCESS))
+             goto fail;
+       }
+-- 
+GitLab
+
+
+From ae75a3cb85500f73b07e7d466760abe8217c2b0b Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 11:58:10 -0400
+Subject: [PATCH 11/17] vk/queue: pass wait info directly to
+ vk_queue_parse_waits()
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index c9278a814f85e..3b3f331ab0c24 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -594,13 +594,14 @@ struct vulkan_submit_info {
+ 
+ static bool
+ vk_queue_parse_waits(struct vk_device *device,
+-                     const struct vulkan_submit_info *info,
++                     uint32_t wait_count,
++                     const VkSemaphoreSubmitInfo *waits,
+                      struct vk_queue_submit *submit)
+ {
+    bool has_binary_permanent_semaphore_wait = false;
+-   for (uint32_t i = 0; i < info->wait_count; i++) {
++   for (uint32_t i = 0; i < wait_count; i++) {
+       VK_FROM_HANDLE(vk_semaphore, semaphore,
+-                     info->waits[i].semaphore);
++                     waits[i].semaphore);
+ 
+       /* From the Vulkan 1.2.194 spec:
+        *
+@@ -636,11 +637,11 @@ vk_queue_parse_waits(struct vk_device *device,
+       }
+ 
+       uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ?
+-                            info->waits[i].value : 0;
++                            waits[i].value : 0;
+ 
+       submit->waits[i] = (struct vk_sync_wait) {
+          .sync = sync,
+-         .stage_mask = info->waits[i].stageMask,
++         .stage_mask = waits[i].stageMask,
+          .wait_value = wait_value,
+       };
+    }
+@@ -1197,7 +1198,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+       if (unlikely(submit == NULL))
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit);
++      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit);
+       vk_queue_parse_cmdbufs(queue, &info, submit);
+ 
+       VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
+@@ -1332,7 +1333,7 @@ vk_common_QueueBindSparse(VkQueue _queue,
+       if (unlikely(submit == NULL))
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, &info, submit);
++      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit);
+ 
+       VkResult result = vk_queue_submit(queue, &info, submit, 0, NULL,
+                                         has_binary_permanent_semaphore_wait,
+-- 
+GitLab
+
+
+From 1e517e9e81478cc63f841d4cf4eceb2150f22804 Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 11:58:40 -0400
+Subject: [PATCH 12/17] vk/queue: pass cmdbuf info directly to
+ vk_queue_parse_cmdbufs()
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 3b3f331ab0c24..54f9c41820bfd 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -650,14 +650,15 @@ vk_queue_parse_waits(struct vk_device *device,
+ 
+ static void
+ vk_queue_parse_cmdbufs(struct vk_queue *queue,
+-                       const struct vulkan_submit_info *info,
++                       uint32_t command_buffer_count,
++                       const VkCommandBufferSubmitInfo *command_buffers,
+                        struct vk_queue_submit *submit)
+ {
+-   for (uint32_t i = 0; i < info->command_buffer_count; i++) {
++   for (uint32_t i = 0; i < command_buffer_count; i++) {
+       VK_FROM_HANDLE(vk_command_buffer, cmd_buffer,
+-                     info->command_buffers[i].commandBuffer);
+-      assert(info->command_buffers[i].deviceMask == 0 ||
+-             info->command_buffers[i].deviceMask == 1);
++                     command_buffers[i].commandBuffer);
++      assert(command_buffers[i].deviceMask == 0 ||
++             command_buffers[i].deviceMask == 1);
+       assert(cmd_buffer->pool->queue_family_index == queue->queue_family_index);
+ 
+       /* Some drivers don't call vk_command_buffer_begin/end() yet and, for
+@@ -1199,7 +1200,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+       bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit);
+-      vk_queue_parse_cmdbufs(queue, &info, submit);
++      vk_queue_parse_cmdbufs(queue, pSubmits[i].commandBufferInfoCount, pSubmits[i].pCommandBufferInfos, submit);
+ 
+       VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
+       if (unlikely(result != VK_SUCCESS))
+-- 
+GitLab
+
+
+From 968f0c66b7f1dd29941626516d2bd0b74a8b5aa9 Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 12:01:08 -0400
+Subject: [PATCH 13/17] vk/queue: move vk_queue_handle_threaded_waits() calls
+ up
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 34 +++++++++++++++++++++++-----------
+ 1 file changed, 23 insertions(+), 11 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 54f9c41820bfd..1e49cdbbf216a 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -959,12 +959,6 @@ vk_queue_submit(struct vk_queue *queue,
+       return vk_device_flush(queue->base.device);
+ 
+    case VK_QUEUE_SUBMIT_MODE_THREADED:
+-      if (has_binary_permanent_semaphore_wait) {
+-         result = vk_queue_handle_threaded_waits(queue, info->wait_count, info->waits, submit);
+-         if (unlikely(result != VK_SUCCESS))
+-            goto fail;
+-      }
+-
+       vk_queue_push_submit(queue, submit);
+ 
+       if (mem_sync) {
+@@ -1164,6 +1158,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+    }
+ 
+    for (uint32_t i = 0; i < submitCount; i++) {
++      VkResult result = VK_SUCCESS;
+       struct vulkan_submit_info info = {
+          .pNext = pSubmits[i].pNext,
+          .command_buffer_count = pSubmits[i].commandBufferInfoCount,
+@@ -1202,7 +1197,15 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+       bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit);
+       vk_queue_parse_cmdbufs(queue, pSubmits[i].commandBufferInfoCount, pSubmits[i].pCommandBufferInfos, submit);
+ 
+-      VkResult result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
++      if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
++         result = vk_queue_handle_threaded_waits(queue, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit);
++         if (unlikely(result != VK_SUCCESS)) {
++            vk_queue_submit_destroy(queue, submit);
++            return result;
++         }
++      }
++
++      result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+    }
+@@ -1231,6 +1234,7 @@ vk_common_QueueBindSparse(VkQueue _queue,
+    }
+ 
+    for (uint32_t i = 0; i < bindInfoCount; i++) {
++      VkResult result = VK_SUCCESS;
+       const VkTimelineSemaphoreSubmitInfo *timeline_info =
+          vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
+       const uint64_t *wait_values = NULL;
+@@ -1336,11 +1340,19 @@ vk_common_QueueBindSparse(VkQueue _queue,
+ 
+       bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit);
+ 
+-      VkResult result = vk_queue_submit(queue, &info, submit, 0, NULL,
+-                                        has_binary_permanent_semaphore_wait,
+-                                        sparse_memory_bind_entries,
+-                                        sparse_memory_image_bind_entries);
++      if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
++         result = vk_queue_handle_threaded_waits(queue, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit);
++         if (unlikely(result != VK_SUCCESS)) {
++            vk_queue_submit_destroy(queue, submit);
++            goto fail;
++         }
++      }
+ 
++      result = vk_queue_submit(queue, &info, submit, 0, NULL,
++                               has_binary_permanent_semaphore_wait,
++                               sparse_memory_bind_entries,
++                               sparse_memory_image_bind_entries);
++fail:
+       STACK_ARRAY_FINISH(wait_semaphore_infos);
+       STACK_ARRAY_FINISH(signal_semaphore_infos);
+ 
+-- 
+GitLab
+
+
+From 0f428de58525cad8e1ebd3c75c1503b10cbaf389 Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 12:01:44 -0400
+Subject: [PATCH 14/17] vk/queue: remove wait and cmdbuf info from
+ vulkan_submit_info
+
+these are no longer used
+---
+ src/vulkan/runtime/vk_queue.c | 35 ++++++++++++++---------------------
+ 1 file changed, 14 insertions(+), 21 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 1e49cdbbf216a..328b9abf44386 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -571,12 +571,6 @@ vk_queue_enable_submit_thread(struct vk_queue *queue)
+ struct vulkan_submit_info {
+    const void *pNext;
+ 
+-   uint32_t command_buffer_count;
+-   const VkCommandBufferSubmitInfo *command_buffers;
+-
+-   uint32_t wait_count;
+-   const VkSemaphoreSubmitInfo *waits;
+-
+    uint32_t signal_count;
+    const VkSemaphoreSubmitInfo *signals;
+ 
+@@ -1159,12 +1153,12 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+ 
+    for (uint32_t i = 0; i < submitCount; i++) {
+       VkResult result = VK_SUCCESS;
++      uint32_t wait_count = pSubmits[i].waitSemaphoreInfoCount;
++      const VkSemaphoreSubmitInfo *wait_semaphore_infos = pSubmits[i].pWaitSemaphoreInfos;
++      uint32_t cmdbuf_count = pSubmits[i].commandBufferInfoCount;
++      const VkCommandBufferSubmitInfo *cmdbufs = pSubmits[i].pCommandBufferInfos;
+       struct vulkan_submit_info info = {
+          .pNext = pSubmits[i].pNext,
+-         .command_buffer_count = pSubmits[i].commandBufferInfoCount,
+-         .command_buffers = pSubmits[i].pCommandBufferInfos,
+-         .wait_count = pSubmits[i].waitSemaphoreInfoCount,
+-         .waits = pSubmits[i].pWaitSemaphoreInfos,
+          .signal_count = pSubmits[i].signalSemaphoreInfoCount,
+          .signals = pSubmits[i].pSignalSemaphoreInfos,
+          .fence = i == submitCount - 1 ? fence : NULL
+@@ -1180,8 +1174,8 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+       uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
+ 
+       struct vk_queue_submit *submit =
+-         vk_queue_submit_alloc(queue, info.wait_count,
+-                               info.command_buffer_count,
++         vk_queue_submit_alloc(queue, wait_count,
++                               cmdbuf_count,
+                                info.buffer_bind_count,
+                                info.image_opaque_bind_count,
+                                info.image_bind_count,
+@@ -1194,11 +1188,11 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+       if (unlikely(submit == NULL))
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit);
+-      vk_queue_parse_cmdbufs(queue, pSubmits[i].commandBufferInfoCount, pSubmits[i].pCommandBufferInfos, submit);
++      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit);
++      vk_queue_parse_cmdbufs(queue, cmdbuf_count, cmdbufs, submit);
+ 
+       if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
+-         result = vk_queue_handle_threaded_waits(queue, pSubmits[i].waitSemaphoreInfoCount, pSubmits[i].pWaitSemaphoreInfos, submit);
++         result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit);
+          if (unlikely(result != VK_SUCCESS)) {
+             vk_queue_submit_destroy(queue, submit);
+             return result;
+@@ -1268,6 +1262,7 @@ vk_common_QueueBindSparse(VkQueue _queue,
+          signal_values = timeline_info->pSignalSemaphoreValues;
+       }
+ 
++      uint32_t wait_count = pBindInfo[i].waitSemaphoreCount;
+       STACK_ARRAY(VkSemaphoreSubmitInfo, wait_semaphore_infos,
+                   pBindInfo[i].waitSemaphoreCount);
+       STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphore_infos,
+@@ -1296,8 +1291,6 @@ vk_common_QueueBindSparse(VkQueue _queue,
+       }
+       struct vulkan_submit_info info = {
+          .pNext = pBindInfo[i].pNext,
+-         .wait_count = pBindInfo[i].waitSemaphoreCount,
+-         .waits = wait_semaphore_infos,
+          .signal_count = pBindInfo[i].signalSemaphoreCount,
+          .signals = signal_semaphore_infos,
+          .buffer_bind_count = pBindInfo[i].bufferBindCount,
+@@ -1324,8 +1317,8 @@ vk_common_QueueBindSparse(VkQueue _queue,
+       VkSparseImageMemoryBind *sparse_memory_image_bind_entries = NULL;
+ 
+       struct vk_queue_submit *submit =
+-         vk_queue_submit_alloc(queue, info.wait_count,
+-                               info.command_buffer_count,
++         vk_queue_submit_alloc(queue, pBindInfo[i].waitSemaphoreCount,
++                               0,
+                                info.buffer_bind_count,
+                                info.image_opaque_bind_count,
+                                info.image_bind_count,
+@@ -1338,10 +1331,10 @@ vk_common_QueueBindSparse(VkQueue _queue,
+       if (unlikely(submit == NULL))
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit);
++      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit);
+ 
+       if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
+-         result = vk_queue_handle_threaded_waits(queue, pBindInfo[i].waitSemaphoreCount, wait_semaphore_infos, submit);
++         result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit);
+          if (unlikely(result != VK_SUCCESS)) {
+             vk_queue_submit_destroy(queue, submit);
+             goto fail;
+-- 
+GitLab
+
+
+From 683694ee2d0beb22191e779b5e7eea1e58fc016f Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 12:40:00 -0400
+Subject: [PATCH 15/17] vk/queue: split out allocation+submission for
+ QueueSubmit2KHR
+
+no functional changes
+---
+ src/vulkan/runtime/vk_queue.c | 83 +++++++++++++++++++++--------------
+ 1 file changed, 50 insertions(+), 33 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 328b9abf44386..d2a7fbd76b742 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -1114,6 +1114,52 @@ vk_queue_finish(struct vk_queue *queue)
+    vk_object_base_finish(&queue->base);
+ }
+ 
++static VkResult
++vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmit,
++                      uint32_t wait_count, const VkSemaphoreSubmitInfo *wait_semaphore_infos,
++                      uint32_t cmdbuf_count, const VkCommandBufferSubmitInfo *cmdbufs,
++                      uint32_t perf_pass_index, struct vk_sync *mem_sync, struct vk_fence *fence)
++{
++   VkResult result = VK_SUCCESS;
++   struct vulkan_submit_info info = {
++      .pNext = pSubmit->pNext,
++      .signal_count = pSubmit->signalSemaphoreInfoCount,
++      .signals = pSubmit->pSignalSemaphoreInfos,
++      .fence = fence
++   };
++
++   struct vk_queue_submit *submit =
++      vk_queue_submit_alloc(queue, wait_count,
++                              cmdbuf_count,
++                              info.buffer_bind_count,
++                              info.image_opaque_bind_count,
++                              info.image_bind_count,
++                              0,
++                              0,
++                              info.signal_count +
++                              (mem_sync != NULL) + (info.fence != NULL),
++                              NULL,
++                              NULL);
++   if (unlikely(submit == NULL))
++      return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
++
++   bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit);
++   vk_queue_parse_cmdbufs(queue, cmdbuf_count, cmdbufs, submit);
++
++   if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
++      result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit);
++      if (unlikely(result != VK_SUCCESS)) {
++         vk_queue_submit_destroy(queue, submit);
++         goto fail;
++      }
++   }
++
++   result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
++fail:
++   vk_queue_submit_destroy(queue, submit);
++   return result;
++}
++
+ VKAPI_ATTR VkResult VKAPI_CALL
+ vk_common_QueueSubmit2KHR(VkQueue _queue,
+                           uint32_t submitCount,
+@@ -1157,12 +1203,6 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+       const VkSemaphoreSubmitInfo *wait_semaphore_infos = pSubmits[i].pWaitSemaphoreInfos;
+       uint32_t cmdbuf_count = pSubmits[i].commandBufferInfoCount;
+       const VkCommandBufferSubmitInfo *cmdbufs = pSubmits[i].pCommandBufferInfos;
+-      struct vulkan_submit_info info = {
+-         .pNext = pSubmits[i].pNext,
+-         .signal_count = pSubmits[i].signalSemaphoreInfoCount,
+-         .signals = pSubmits[i].pSignalSemaphoreInfos,
+-         .fence = i == submitCount - 1 ? fence : NULL
+-      };
+ 
+       /* From the Vulkan 1.2.194 spec:
+       *
+@@ -1173,33 +1213,10 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+       uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
+ 
+-      struct vk_queue_submit *submit =
+-         vk_queue_submit_alloc(queue, wait_count,
+-                               cmdbuf_count,
+-                               info.buffer_bind_count,
+-                               info.image_opaque_bind_count,
+-                               info.image_bind_count,
+-                               0,
+-                               0,
+-                               info.signal_count +
+-                               (mem_sync != NULL) + (info.fence != NULL),
+-                               NULL,
+-                               NULL);
+-      if (unlikely(submit == NULL))
+-         return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+-
+-      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit);
+-      vk_queue_parse_cmdbufs(queue, cmdbuf_count, cmdbufs, submit);
+-
+-      if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
+-         result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit);
+-         if (unlikely(result != VK_SUCCESS)) {
+-            vk_queue_submit_destroy(queue, submit);
+-            return result;
+-         }
+-      }
+-
+-      result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
++      result = vk_queue_submit_flush(queue, &pSubmits[i],
++                                     wait_count, wait_semaphore_infos,
++                                     cmdbuf_count, cmdbufs,
++                                     perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+    }
+-- 
+GitLab
+
+
+From cd84beeccd14986b30faf154a0d371a93c94242f Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 12:47:12 -0400
+Subject: [PATCH 16/17] vk/queue: add a count param to vk_queue_submit_flush
+
+not currently used
+---
+ src/vulkan/runtime/vk_queue.c | 74 +++++++++++++++++++----------------
+ 1 file changed, 41 insertions(+), 33 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index d2a7fbd76b742..20d28dfaba821 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -590,6 +590,7 @@ static bool
+ vk_queue_parse_waits(struct vk_device *device,
+                      uint32_t wait_count,
+                      const VkSemaphoreSubmitInfo *waits,
++                     uint32_t offset,
+                      struct vk_queue_submit *submit)
+ {
+    bool has_binary_permanent_semaphore_wait = false;
+@@ -618,7 +619,7 @@ vk_queue_parse_waits(struct vk_device *device,
+       struct vk_sync *sync;
+       if (semaphore->temporary) {
+          assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
+-         sync = submit->_wait_temps[i] = semaphore->temporary;
++         sync = submit->_wait_temps[i + offset] = semaphore->temporary;
+          semaphore->temporary = NULL;
+       } else {
+          if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) {
+@@ -633,7 +634,7 @@ vk_queue_parse_waits(struct vk_device *device,
+       uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ?
+                             waits[i].value : 0;
+ 
+-      submit->waits[i] = (struct vk_sync_wait) {
++      submit->waits[i + offset] = (struct vk_sync_wait) {
+          .sync = sync,
+          .stage_mask = waits[i].stageMask,
+          .wait_value = wait_value,
+@@ -646,6 +647,7 @@ static void
+ vk_queue_parse_cmdbufs(struct vk_queue *queue,
+                        uint32_t command_buffer_count,
+                        const VkCommandBufferSubmitInfo *command_buffers,
++                       uint32_t offset,
+                        struct vk_queue_submit *submit)
+ {
+    for (uint32_t i = 0; i < command_buffer_count; i++) {
+@@ -664,7 +666,7 @@ vk_queue_parse_cmdbufs(struct vk_queue *queue,
+              cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_PENDING);
+       cmd_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_PENDING;
+ 
+-      submit->command_buffers[i] = cmd_buffer;
++      submit->command_buffers[i + offset] = cmd_buffer;
+    }
+ }
+ 
+@@ -672,6 +674,7 @@ static VkResult
+ vk_queue_handle_threaded_waits(struct vk_queue *queue,
+                                uint32_t wait_count,
+                                const VkSemaphoreSubmitInfo *waits,
++                               unsigned offset,
+                                struct vk_queue_submit *submit)
+ {
+    assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED);
+@@ -716,10 +719,10 @@ vk_queue_handle_threaded_waits(struct vk_queue *queue,
+          * using its permanent payload.  In other words, this code
+          * should basically only ever get executed in CTS tests.
+          */
+-      if (submit->_wait_temps[i] != NULL)
++      if (submit->_wait_temps[i + offset] != NULL)
+          continue;
+ 
+-      assert(submit->waits[i].sync == &semaphore->permanent);
++      assert(submit->waits[i + offset].sync == &semaphore->permanent);
+ 
+       /* From the Vulkan 1.2.194 spec:
+          *
+@@ -746,17 +749,17 @@ vk_queue_handle_threaded_waits(struct vk_queue *queue,
+                               semaphore->permanent.type,
+                               0 /* flags */,
+                               0 /* initial value */,
+-                              &submit->_wait_temps[i]);
++                              &submit->_wait_temps[i + offset]);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+ 
+       result = vk_sync_move(queue->base.device,
+-                              submit->_wait_temps[i],
++                              submit->_wait_temps[i + offset],
+                               &semaphore->permanent);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+ 
+-      submit->waits[i].sync = submit->_wait_temps[i];
++      submit->waits[i + offset].sync = submit->_wait_temps[i + offset];
+    }
+    return VK_SUCCESS;
+ }
+@@ -1115,16 +1118,16 @@ vk_queue_finish(struct vk_queue *queue)
+ }
+ 
+ static VkResult
+-vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmit,
+-                      uint32_t wait_count, const VkSemaphoreSubmitInfo *wait_semaphore_infos,
+-                      uint32_t cmdbuf_count, const VkCommandBufferSubmitInfo *cmdbufs,
++vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmits, unsigned submit_count,
++                      uint32_t wait_count,
++                      uint32_t cmdbuf_count,
+                       uint32_t perf_pass_index, struct vk_sync *mem_sync, struct vk_fence *fence)
+ {
+    VkResult result = VK_SUCCESS;
+    struct vulkan_submit_info info = {
+-      .pNext = pSubmit->pNext,
+-      .signal_count = pSubmit->signalSemaphoreInfoCount,
+-      .signals = pSubmit->pSignalSemaphoreInfos,
++      .pNext = pSubmits->pNext,
++      .signal_count = pSubmits[submit_count - 1].signalSemaphoreInfoCount,
++      .signals = pSubmits[submit_count - 1].pSignalSemaphoreInfos,
+       .fence = fence
+    };
+ 
+@@ -1143,21 +1146,28 @@ vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmit,
+    if (unlikely(submit == NULL))
+       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-   bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit);
+-   vk_queue_parse_cmdbufs(queue, cmdbuf_count, cmdbufs, submit);
+-
+-   if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
+-      result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit);
+-      if (unlikely(result != VK_SUCCESS)) {
+-         vk_queue_submit_destroy(queue, submit);
+-         goto fail;
++   uint32_t wait_counter = 0;
++   uint32_t cmdbuf_counter = 0;
++   bool has_binary_permanent_semaphore_wait = false;
++   for (unsigned i = 0; i < submit_count; i++) {
++      uint32_t cur_wait_count = pSubmits[i].waitSemaphoreInfoCount;
++      const VkSemaphoreSubmitInfo *wait_semaphore_infos = pSubmits[i].pWaitSemaphoreInfos;
++      uint32_t cur_cmdbuf_count = pSubmits[i].commandBufferInfoCount;
++      const VkCommandBufferSubmitInfo *cmdbufs = pSubmits[i].pCommandBufferInfos;
++      has_binary_permanent_semaphore_wait |= vk_queue_parse_waits(queue->base.device, cur_wait_count, wait_semaphore_infos, wait_counter, submit);
++      vk_queue_parse_cmdbufs(queue, cur_cmdbuf_count, cmdbufs, cmdbuf_counter, submit);
++      if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
++         result = vk_queue_handle_threaded_waits(queue, cur_wait_count, wait_semaphore_infos, wait_counter, submit);
++         if (unlikely(result != VK_SUCCESS)) {
++            vk_queue_submit_destroy(queue, submit);
++            return result;
++         }
+       }
++      wait_counter += cur_wait_count;
++      cmdbuf_counter += cur_cmdbuf_count;
+    }
+ 
+-   result = vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
+-fail:
+-   vk_queue_submit_destroy(queue, submit);
+-   return result;
++   return vk_queue_submit(queue, &info, submit, perf_pass_index, mem_sync, has_binary_permanent_semaphore_wait, 0, 0);
+ }
+ 
+ VKAPI_ATTR VkResult VKAPI_CALL
+@@ -1200,9 +1210,7 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+    for (uint32_t i = 0; i < submitCount; i++) {
+       VkResult result = VK_SUCCESS;
+       uint32_t wait_count = pSubmits[i].waitSemaphoreInfoCount;
+-      const VkSemaphoreSubmitInfo *wait_semaphore_infos = pSubmits[i].pWaitSemaphoreInfos;
+       uint32_t cmdbuf_count = pSubmits[i].commandBufferInfoCount;
+-      const VkCommandBufferSubmitInfo *cmdbufs = pSubmits[i].pCommandBufferInfos;
+ 
+       /* From the Vulkan 1.2.194 spec:
+       *
+@@ -1213,9 +1221,9 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+       uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
+ 
+-      result = vk_queue_submit_flush(queue, &pSubmits[i],
+-                                     wait_count, wait_semaphore_infos,
+-                                     cmdbuf_count, cmdbufs,
++      result = vk_queue_submit_flush(queue, &pSubmits[i], 1,
++                                     wait_count,
++                                     cmdbuf_count,
+                                      perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL);
+       if (unlikely(result != VK_SUCCESS))
+          return result;
+@@ -1348,10 +1356,10 @@ vk_common_QueueBindSparse(VkQueue _queue,
+       if (unlikely(submit == NULL))
+          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, submit);
++      bool has_binary_permanent_semaphore_wait = vk_queue_parse_waits(queue->base.device, wait_count, wait_semaphore_infos, 0, submit);
+ 
+       if (has_binary_permanent_semaphore_wait && queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
+-         result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, submit);
++         result = vk_queue_handle_threaded_waits(queue, wait_count, wait_semaphore_infos, 0, submit);
+          if (unlikely(result != VK_SUCCESS)) {
+             vk_queue_submit_destroy(queue, submit);
+             goto fail;
+-- 
+GitLab
+
+
+From e2c84bbbfec5cd618909ad0ec98d3304ab7f05f2 Mon Sep 17 00:00:00 2001
+From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
+Date: Thu, 21 Sep 2023 13:12:14 -0400
+Subject: [PATCH 17/17] vk/queue: merge queue submissions when possible
+
+given various conditions, it's possible to merge queue submits into a
+single submission to reduce synchronization overhead
+---
+ src/vulkan/runtime/vk_queue.c | 89 +++++++++++++++++++++++++++++++----
+ 1 file changed, 79 insertions(+), 10 deletions(-)
+
+diff --git a/src/vulkan/runtime/vk_queue.c b/src/vulkan/runtime/vk_queue.c
+index 20d28dfaba821..ebe412a0024fe 100644
+--- a/src/vulkan/runtime/vk_queue.c
++++ b/src/vulkan/runtime/vk_queue.c
+@@ -1117,6 +1117,21 @@ vk_queue_finish(struct vk_queue *queue)
+    vk_object_base_finish(&queue->base);
+ }
+ 
++static bool
++filter_pnexts(const void *pNext)
++{
++   vk_foreach_struct_const(s, pNext) {
++      switch (s->sType) {
++      /* can possibly be merged */
++      case VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR:
++         break;
++      default:
++         return false;
++      }
++   }
++   return true;
++}
++
+ static VkResult
+ vk_queue_submit_flush(struct vk_queue *queue, const VkSubmitInfo2 *pSubmits, unsigned submit_count,
+                       uint32_t wait_count,
+@@ -1207,11 +1222,14 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+       }
+    }
+ 
++   uint32_t prev_perf_pass_index = 0;
++   bool iterate = true;
++   bool has_perf_info = false;
++   bool has_signals = false;
++   bool needs_last = false;
++   uint32_t first = 0, last = 0;
++   uint32_t wait_count = 0, cmdbuf_count = 0;
+    for (uint32_t i = 0; i < submitCount; i++) {
+-      VkResult result = VK_SUCCESS;
+-      uint32_t wait_count = pSubmits[i].waitSemaphoreInfoCount;
+-      uint32_t cmdbuf_count = pSubmits[i].commandBufferInfoCount;
+-
+       /* From the Vulkan 1.2.194 spec:
+       *
+       *    "If the VkSubmitInfo::pNext chain does not include this structure,
+@@ -1221,12 +1239,63 @@ vk_common_QueueSubmit2KHR(VkQueue _queue,
+          vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+       uint32_t perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
+ 
+-      result = vk_queue_submit_flush(queue, &pSubmits[i], 1,
+-                                     wait_count,
+-                                     cmdbuf_count,
+-                                     perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL);
+-      if (unlikely(result != VK_SUCCESS))
+-         return result;
++      /* determine when to split the submits
++       * - split if unhandled pNext is in chain
++       * - split if perf counterPassIndex changes or is added/omitted
++       * - split if signal ordering would be disrupted
++       */
++      if (!filter_pnexts(pSubmits[i].pNext))
++         iterate = false;
++      if (i && (!!perf_info != has_perf_info || (has_perf_info && perf_pass_index != prev_perf_pass_index)))
++         iterate = false;
++      if (has_signals)
++         iterate = false;
++      if (i == submitCount - 1) {
++         /* always flush on last submit*/
++         if (iterate || !i) {
++            /* include last submit for flush if it can be included */
++            wait_count += pSubmits[i].waitSemaphoreInfoCount;
++            cmdbuf_count += pSubmits[i].commandBufferInfoCount;
++            last = i;
++         } else {
++            needs_last = true;
++         }
++         iterate = false;
++      }
++
++      if (!iterate) {
++         /* submits must split: flush pending but NOT current (unless last submit) */
++         VkResult result = vk_queue_submit_flush(queue, &pSubmits[first], last - first + 1,
++                                                 wait_count,
++                                                 cmdbuf_count,
++                                                 perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL);
++         if (unlikely(result != VK_SUCCESS))
++            return result;
++         wait_count = 0;
++         cmdbuf_count = 0;
++         first = last = i;
++         iterate = true;
++      }
++
++      /* always keep accumulating */
++      wait_count += pSubmits[i].waitSemaphoreInfoCount;
++      cmdbuf_count += pSubmits[i].commandBufferInfoCount;
++      last = i;
++
++      has_perf_info = perf_info != NULL;
++      prev_perf_pass_index = perf_pass_index;
++      has_signals = pSubmits[i].signalSemaphoreInfoCount > 0;
++      if (needs_last) {
++         /* catch the last submit if it couldn't be merged above */
++         assert(first == last);
++         assert(first == submitCount - 1);
++         VkResult result = vk_queue_submit_flush(queue, &pSubmits[first], last - first + 1,
++                                                 wait_count,
++                                                 cmdbuf_count,
++                                                 perf_pass_index, mem_sync, i == submitCount - 1 ? fence : NULL);
++         if (unlikely(result != VK_SUCCESS))
++            return result;
++      }
+    }
+ 
+    return VK_SUCCESS;
+-- 
+GitLab
+
+
diff --git a/spec_files/mesa/26105.patch b/spec_files/mesa/26105.patch
deleted file mode 100644
index 6af0603d..00000000
--- a/spec_files/mesa/26105.patch
+++ /dev/null
@@ -1,204 +0,0 @@
-diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
-index 561f3cc02e0..ebc54a900e3 100644
---- a/src/amd/compiler/aco_instruction_selection.cpp
-+++ b/src/amd/compiler/aco_instruction_selection.cpp
-@@ -12526,7 +12526,8 @@ select_rt_prolog(Program* program, ac_shader_config* config,
-     */
-    PhysReg out_uniform_shader_addr = get_arg_reg(out_args, out_args->rt.uniform_shader_addr);
-    PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_size);
--   PhysReg out_launch_size_z = out_launch_size_x.advance(8);
-+   PhysReg out_launch_size_y = out_launch_size_x.advance(4);
-+   PhysReg out_launch_size_z = out_launch_size_y.advance(4);
-    PhysReg out_launch_ids[3];
-    for (unsigned i = 0; i < 3; i++)
-       out_launch_ids[i] = get_arg_reg(out_args, out_args->rt.launch_id).advance(i * 4);
-@@ -12534,9 +12535,13 @@ select_rt_prolog(Program* program, ac_shader_config* config,
-    PhysReg out_record_ptr = get_arg_reg(out_args, out_args->rt.shader_record);
- 
-    /* Temporaries: */
--   num_sgprs = align(num_sgprs, 2) + 4;
--   PhysReg tmp_raygen_sbt = PhysReg{num_sgprs - 4};
--   PhysReg tmp_ring_offsets = PhysReg{num_sgprs - 2};
-+   num_sgprs = align(num_sgprs, 2);
-+   PhysReg tmp_raygen_sbt = PhysReg{num_sgprs};
-+   num_sgprs += 2;
-+   PhysReg tmp_ring_offsets = PhysReg{num_sgprs};
-+   num_sgprs += 2;
-+
-+   PhysReg tmp_invocation_idx = PhysReg{256 + num_vgprs++};
- 
-    /* Confirm some assumptions about register aliasing */
-    assert(in_ring_offsets == out_uniform_shader_addr);
-@@ -12610,6 +12615,36 @@ select_rt_prolog(Program* program, ac_shader_config* config,
-    bld.vop1(aco_opcode::v_mov_b32, Definition(out_record_ptr.advance(4), v1),
-             Operand(tmp_raygen_sbt.advance(4), s1));
- 
-+   /* For 1D dispatches converted into 2D ones, we need to fix up the launch IDs.
-+    * Calculating the 1D launch ID is: id = local_invocation_index + (wg_id.x * wg_size).
-+    * in_wg_id_x now holds wg_id.x * wg_size.
-+    */
-+   bld.sop2(aco_opcode::s_lshl_b32, Definition(in_wg_id_x, s1), Definition(scc, s1),
-+            Operand(in_wg_id_x, s1), Operand::c32(program->workgroup_size == 32 ? 5 : 6));
-+
-+   /* Calculate and add local_invocation_index */
-+   bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, Definition(tmp_invocation_idx, v1), Operand::c32(-1u),
-+            Operand(in_wg_id_x, s1));
-+   if (program->wave_size == 64) {
-+      if (program->gfx_level <= GFX7)
-+         bld.vop2(aco_opcode::v_mbcnt_hi_u32_b32, Definition(tmp_invocation_idx, v1),
-+                  Operand::c32(-1u), Operand(tmp_invocation_idx, v1));
-+      else
-+         bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32_e64, Definition(tmp_invocation_idx, v1),
-+                  Operand::c32(-1u), Operand(tmp_invocation_idx, v1));
-+   }
-+
-+   /* Make fixup operations a no-op if this is not a converted 2D dispatch. */
-+   bld.sopc(aco_opcode::s_cmp_lg_u32, Definition(scc, s1),
-+            Operand::c32(ACO_RT_CONVERTED_2D_LAUNCH_SIZE), Operand(out_launch_size_y, s1));
-+   bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm),
-+            Operand::c32_or_c64(-1u, program->wave_size == 64),
-+            Operand::c32_or_c64(0, program->wave_size == 64), Operand(scc, s1));
-+   bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[0], v1),
-+            Operand(tmp_invocation_idx, v1), Operand(out_launch_ids[0], v1), Operand(vcc, bld.lm));
-+   bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(),
-+            Operand(out_launch_ids[1], v1), Operand(vcc, bld.lm));
-+
-    /* jump to raygen */
-    bld.sop1(aco_opcode::s_setpc_b64, Operand(out_uniform_shader_addr, s2));
- 
-diff --git a/src/amd/compiler/aco_interface.h b/src/amd/compiler/aco_interface.h
-index 8f35e18b5b0..9d2c1dbb2af 100644
---- a/src/amd/compiler/aco_interface.h
-+++ b/src/amd/compiler/aco_interface.h
-@@ -32,6 +32,9 @@
- extern "C" {
- #endif
- 
-+/* Special launch size to indicate this dispatch is a 1D dispatch converted into a 2D one */
-+#define ACO_RT_CONVERTED_2D_LAUNCH_SIZE -1u
-+
- typedef struct nir_shader nir_shader;
- struct ac_shader_config;
- struct aco_shader_info;
-diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
-index cdede679552..c7dd4b216d4 100644
---- a/src/amd/vulkan/radv_cmd_buffer.c
-+++ b/src/amd/vulkan/radv_cmd_buffer.c
-@@ -42,6 +42,8 @@
- #include "ac_debug.h"
- #include "ac_shader_args.h"
- 
-+#include "aco_interface.h"
-+
- #include "util/fast_idiv_by_const.h"
- 
- enum {
-@@ -10003,7 +10005,26 @@ enum radv_rt_mode {
- };
- 
- static void
--radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *tables, uint64_t indirect_va,
-+radv_upload_trace_rays_params(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2KHR *tables,
-+                              enum radv_rt_mode mode, uint64_t *launch_size_va, uint64_t *sbt_va)
-+{
-+   uint32_t upload_size = mode == radv_rt_mode_direct ? sizeof(VkTraceRaysIndirectCommand2KHR)
-+                                                      : offsetof(VkTraceRaysIndirectCommand2KHR, width);
-+
-+   uint32_t offset;
-+   if (!radv_cmd_buffer_upload_data(cmd_buffer, upload_size, tables, &offset))
-+      return;
-+
-+   uint64_t upload_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
-+
-+   if (mode == radv_rt_mode_direct)
-+      *launch_size_va = upload_va + offsetof(VkTraceRaysIndirectCommand2KHR, width);
-+   if (sbt_va)
-+      *sbt_va = upload_va;
-+}
-+
-+static void
-+radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2KHR *tables, uint64_t indirect_va,
-                 enum radv_rt_mode mode)
- {
-    if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_RT)
-@@ -10024,34 +10045,43 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCom
-    cmd_buffer->compute_scratch_size_per_wave_needed =
-       MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, scratch_bytes_per_wave);
- 
-+   /* Since the workgroup size is 8x4 (or 8x8), 1D dispatches can only fill 8 threads per wave at most. To increase
-+    * occupancy, it's beneficial to convert to a 2D dispatch in these cases. */
-+   if (tables && tables->height == 1 && tables->width >= cmd_buffer->state.rt_prolog->info.cs.block_size[0])
-+      tables->height = ACO_RT_CONVERTED_2D_LAUNCH_SIZE;
-+
-    struct radv_dispatch_info info = {0};
-    info.unaligned = true;
- 
--   uint64_t launch_size_va;
--   uint64_t sbt_va;
-+   uint64_t launch_size_va = 0;
-+   uint64_t sbt_va = 0;
- 
-    if (mode != radv_rt_mode_indirect2) {
--      uint32_t upload_size = mode == radv_rt_mode_direct ? sizeof(VkTraceRaysIndirectCommand2KHR)
--                                                         : offsetof(VkTraceRaysIndirectCommand2KHR, width);
--
--      uint32_t offset;
--      if (!radv_cmd_buffer_upload_data(cmd_buffer, upload_size, tables, &offset))
--         return;
--
--      uint64_t upload_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
--
--      launch_size_va =
--         (mode == radv_rt_mode_direct) ? upload_va + offsetof(VkTraceRaysIndirectCommand2KHR, width) : indirect_va;
--      sbt_va = upload_va;
-+      launch_size_va = indirect_va;
-+      radv_upload_trace_rays_params(cmd_buffer, tables, mode, &launch_size_va, &sbt_va);
-    } else {
-       launch_size_va = indirect_va + offsetof(VkTraceRaysIndirectCommand2KHR, width);
-       sbt_va = indirect_va;
-    }
- 
-+   uint32_t remaining_ray_count = 0;
-+
-    if (mode == radv_rt_mode_direct) {
-       info.blocks[0] = tables->width;
-       info.blocks[1] = tables->height;
-       info.blocks[2] = tables->depth;
-+
-+      if (tables->height == ACO_RT_CONVERTED_2D_LAUNCH_SIZE) {
-+         /* We need the ray count for the 2D dispatch to be a multiple of the y block size for the division to work, and
-+          * a multiple of the x block size because the invocation offset must be a multiple of the block size when
-+          * dispatching the remaining rays. Fortunately, the x block size is itself a multiple of the y block size, so
-+          * we only need to ensure that the ray count is a multiple of the x block size. */
-+         remaining_ray_count = tables->width % rt_prolog->info.cs.block_size[0];
-+
-+         uint32_t ray_count = tables->width - remaining_ray_count;
-+         info.blocks[0] = ray_count / rt_prolog->info.cs.block_size[1];
-+         info.blocks[1] = rt_prolog->info.cs.block_size[1];
-+      }
-    } else
-       info.va = launch_size_va;
- 
-@@ -10085,6 +10115,22 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCom
-    assert(cmd_buffer->cs->cdw <= cdw_max);
- 
-    radv_dispatch(cmd_buffer, &info, pipeline, rt_prolog, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
-+
-+   if (remaining_ray_count) {
-+      info.blocks[0] = remaining_ray_count;
-+      info.blocks[1] = 1;
-+      info.offsets[0] = tables->width - remaining_ray_count;
-+
-+      /* Reset the ray launch size so the prolog doesn't think this is a converted dispatch */
-+      tables->height = 1;
-+      radv_upload_trace_rays_params(cmd_buffer, tables, mode, &launch_size_va, NULL);
-+      if (size_loc->sgpr_idx != -1) {
-+         radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + size_loc->sgpr_idx * 4, launch_size_va,
-+                                  true);
-+      }
-+
-+      radv_dispatch(cmd_buffer, &info, pipeline, rt_prolog, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
-+   }
- }
- 
- VKAPI_ATTR void VKAPI_CALL
diff --git a/spec_files/mesa/mesa-meson-c99.patch b/spec_files/mesa/mesa-meson-c99.patch
deleted file mode 100644
index 6cbb0536..00000000
--- a/spec_files/mesa/mesa-meson-c99.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-meson: C type error in strtod_l/strtof_l probe
-
-Future compilers will fail compilation due to the C type error:
-
-…/testfile.c: In function 'main':
-…/testfile.c:12:30: error: passing argument 2 of 'strtod_l' from incompatible pointer type
-   12 |       double d = strtod_l(s, end, loc);
-      |                              ^~~
-      |                              |
-      |                              char *
-/usr/include/stdlib.h:416:43: note: expected 'char ** restrict' but argument is of type 'char *'
-  416 |                         char **__restrict __endptr, locale_t __loc)
-      |                         ~~~~~~~~~~~~~~~~~~^~~~~~~~
-…/testfile.c:13:29: error: passing argument 2 of 'strtof_l' from incompatible pointer type
-   13 |       float f = strtof_l(s, end, loc);
-      |                             ^~~
-      |                             |
-      |                             char *
-/usr/include/stdlib.h:420:42: note: expected 'char ** restrict' but argument is of type 'char *'
-  420 |                        char **__restrict __endptr, locale_t __loc)
-      |                        ~~~~~~~~~~~~~~~~~~^~~~~~~~
-
-This means that the probe no longer tests is objective and always
-fails.
-
-Submitted upstream: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26927>
-
-diff --git a/meson.build b/meson.build
-index 35cc5f1cd5fd9079..1a5d2ba492be0b31 100644
---- a/meson.build
-+++ b/meson.build
-@@ -1425,8 +1425,8 @@ if cc.links('''
-       locale_t loc = newlocale(LC_CTYPE_MASK, "C", NULL);
-       const char *s = "1.0";
-       char *end;
--      double d = strtod_l(s, end, loc);
--      float f = strtof_l(s, end, loc);
-+      double d = strtod_l(s, &end, loc);
-+      float f = strtof_l(s, &end, loc);
-       freelocale(loc);
-       return 0;
-     }''',
diff --git a/spec_files/mesa/mesa.spec b/spec_files/mesa/mesa.spec
index 438024b5..7949d32f 100644
--- a/spec_files/mesa/mesa.spec
+++ b/spec_files/mesa/mesa.spec
@@ -5,10 +5,11 @@
 %global with_va 1
 %if !0%{?rhel}
 %global with_nine 1
+%global with_nvk %{with vulkan_hw}
 %global with_omx 1
 %global with_opencl 1
 %endif
-%global base_vulkan ,amd,nouveau-experimental
+%global base_vulkan ,amd
 %endif
 
 %ifarch %{ix86} x86_64
@@ -34,7 +35,7 @@
 %global with_tegra     1
 %global with_v3d       1
 %global with_xa        1
-%global extra_platform_vulkan ,broadcom,freedreno,panfrost
+%global extra_platform_vulkan ,broadcom,freedreno,panfrost,imagination-experimental
 %endif
 
 %ifnarch s390x
@@ -57,13 +58,13 @@
 %bcond_with valgrind
 %endif
 
-%global vulkan_drivers swrast%{?base_vulkan}%{?intel_platform_vulkan}%{?extra_platform_vulkan}
+%global vulkan_drivers swrast%{?base_vulkan}%{?intel_platform_vulkan}%{?extra_platform_vulkan}%{?with_nvk:,nouveau-experimental}
 
 Name:           mesa
 Summary:        Mesa graphics libraries
-%global ver 23.3.6
+%global ver 24.0.5
 Version:        %{lua:ver = string.gsub(rpm.expand("%{ver}"), "-", "~"); print(ver)}
-Release:        100.bazzite.{{{ git_dir_version }}}
+Release:        %autorelease
 License:        MIT AND BSD-3-Clause AND SGI-B-2.0
 URL:            http://www.mesa3d.org
 
@@ -74,15 +75,18 @@ Source0:        https://archive.mesa3d.org/mesa-%{ver}.tar.xz
 Source1:        Mesa-MLAA-License-Clarification-Email.txt
 
 Patch10:        gnome-shell-glthread-disable.patch
-Patch12:        mesa-meson-c99.patch
+# Backport of https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28740
+# to fix rendering issues using GTK's GSK_RENDERER=ngl on Raspberry Pi:
+# https://bugzilla.redhat.com/show_bug.cgi?id=2269412
+Patch11:        0001-broadcom-compiler-needs_quad_helper_invocation-enabl.patch
 
-# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26105/
-Patch30:        26105.patch
+# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25352
+Patch20:        25352.patch
 
 # https://gitlab.com/evlaV/mesa/
-Patch40:        valve.patch
+Patch21:        valve.patch
 
-BuildRequires:  meson >= 1.2.0
+BuildRequires:  meson >= 1.3.0
 BuildRequires:  gcc
 BuildRequires:  gcc-c++
 BuildRequires:  gettext
@@ -140,7 +144,7 @@ BuildRequires:  pkgconfig(libomxil-bellagio)
 BuildRequires:  pkgconfig(libelf)
 BuildRequires:  pkgconfig(libglvnd) >= 1.3.2
 BuildRequires:  llvm-devel >= 7.0.0
-%if 0%{?with_opencl}
+%if 0%{?with_opencl} || 0%{?with_nvk}
 BuildRequires:  clang-devel
 BuildRequires:  bindgen
 BuildRequires:  rust-packaging
@@ -148,6 +152,12 @@ BuildRequires:  pkgconfig(libclc)
 BuildRequires:  pkgconfig(SPIRV-Tools)
 BuildRequires:  pkgconfig(LLVMSPIRVLib)
 %endif
+%if 0%{?with_nvk}
+BuildRequires:  (crate(proc-macro2) >= 1.0.56 with crate(proc-macro2) < 2)
+BuildRequires:  (crate(quote) >= 1.0.25 with crate(quote) < 2)
+BuildRequires:  (crate(syn/clone-impls) >= 2.0.15 with crate(syn/clone-impls) < 3)
+BuildRequires:  (crate(unicode-ident) >= 1.0.6 with crate(unicode-ident) < 2)
+%endif
 %if %{with valgrind}
 BuildRequires:  pkgconfig(valgrind)
 %endif
@@ -371,6 +381,18 @@ cp %{SOURCE1} docs/
 # ensure standard Rust compiler flags are set
 export RUSTFLAGS="%build_rustflags"
 
+%if 0%{?with_nvk}
+export MESON_PACKAGE_CACHE_DIR="%{cargo_registry}/"
+# So... Meson can't actually find them without tweaks
+%define inst_crate_nameversion() %(basename %{cargo_registry}/%{1}-*)
+%define rewrite_wrap_file() sed -e "/source.*/d" -e "s/%{1}-.*/%{inst_crate_nameversion %{1}}/" -i subprojects/%{1}.wrap
+
+%rewrite_wrap_file proc-macro2
+%rewrite_wrap_file quote
+%rewrite_wrap_file syn
+%rewrite_wrap_file unicode-ident
+%endif
+
 # We've gotten a report that enabling LTO for mesa breaks some games. See
 # https://bugzilla.redhat.com/show_bug.cgi?id=1862771 for details.
 # Disable LTO for now
@@ -411,7 +433,6 @@ export RUSTFLAGS="%build_rustflags"
   -Dllvm=enabled \
   -Dshared-llvm=enabled \
   -Dvalgrind=%{?with_valgrind:enabled}%{!?with_valgrind:disabled} \
-  -Dxlib-lease=enabled \
   -Dbuild-tests=false \
   -Dselinux=true \
 %if !0%{?with_libunwind}
@@ -449,11 +470,6 @@ for i in libOSMesa*.so libGL.so ; do
 done
 popd
 
-%ifarch %{ix86}
-rm -Rf %{buildroot}%{_datadir}/drirc.d/00-radv-defaults.conf
-rm -Rf %{buildroot}%{_datadir}/drirc.d/00-mesa-defaults.conf
-%endif
-
 %files filesystem
 %doc docs/Mesa-MLAA-License-Clarification-Email.txt
 %dir %{_libdir}/dri
@@ -541,9 +557,7 @@ rm -Rf %{buildroot}%{_datadir}/drirc.d/00-mesa-defaults.conf
 
 %files dri-drivers
 %dir %{_datadir}/drirc.d
-%ifarch aarch64 x86_64
 %{_datadir}/drirc.d/00-mesa-defaults.conf
-%endif
 %{_libdir}/dri/kms_swrast_dri.so
 %{_libdir}/dri/swrast_dri.so
 %{_libdir}/dri/virtio_gpu_dri.so
@@ -609,20 +623,26 @@ rm -Rf %{buildroot}%{_datadir}/drirc.d/00-mesa-defaults.conf
 %if 0%{?with_kmsro}
 %{_libdir}/dri/armada-drm_dri.so
 %{_libdir}/dri/exynos_dri.so
+%{_libdir}/dri/gm12u320_dri.so
 %{_libdir}/dri/hdlcd_dri.so
 %{_libdir}/dri/hx8357d_dri.so
+%{_libdir}/dri/ili9163_dri.so
 %{_libdir}/dri/ili9225_dri.so
 %{_libdir}/dri/ili9341_dri.so
+%{_libdir}/dri/ili9486_dri.so
 %{_libdir}/dri/imx-dcss_dri.so
 %{_libdir}/dri/mediatek_dri.so
 %{_libdir}/dri/meson_dri.so
 %{_libdir}/dri/mi0283qt_dri.so
+%{_libdir}/dri/panel-mipi-dbi_dri.so
 %{_libdir}/dri/pl111_dri.so
 %{_libdir}/dri/repaper_dri.so
 %{_libdir}/dri/rockchip_dri.so
 %{_libdir}/dri/st7586_dri.so
 %{_libdir}/dri/st7735r_dri.so
+%{_libdir}/dri/sti_dri.so
 %{_libdir}/dri/sun4i-drm_dri.so
+%{_libdir}/dri/udl_dri.so
 %endif
 %if 0%{?with_vulkan_hw}
 %{_libdir}/dri/zink_dri.so
@@ -664,12 +684,12 @@ rm -Rf %{buildroot}%{_datadir}/drirc.d/00-mesa-defaults.conf
 %{_datadir}/vulkan/implicit_layer.d/VkLayer_MESA_device_select.json
 %if 0%{?with_vulkan_hw}
 %{_libdir}/libvulkan_radeon.so
-%ifarch aarch64 x86_64
 %{_datadir}/drirc.d/00-radv-defaults.conf
-%endif
 %{_datadir}/vulkan/icd.d/radeon_icd.*.json
+%if 0%{?with_nvk}
 %{_libdir}/libvulkan_nouveau.so
 %{_datadir}/vulkan/icd.d/nouveau_icd.*.json
+%endif
 %ifarch %{ix86} x86_64
 %{_libdir}/libvulkan_intel.so
 %{_datadir}/vulkan/icd.d/intel_icd.*.json
@@ -683,6 +703,9 @@ rm -Rf %{buildroot}%{_datadir}/drirc.d/00-mesa-defaults.conf
 %{_datadir}/vulkan/icd.d/freedreno_icd.*.json
 %{_libdir}/libvulkan_panfrost.so
 %{_datadir}/vulkan/icd.d/panfrost_icd.*.json
+%{_libdir}/libpowervr_rogue.so
+%{_libdir}/libvulkan_powervr_mesa.so
+%{_datadir}/vulkan/icd.d/powervr_mesa_icd.*.json
 %endif
 %endif
 
diff --git a/spec_files/mesa/valve.patch b/spec_files/mesa/valve.patch
index 1dd50155..2e9788c4 100644
--- a/spec_files/mesa/valve.patch
+++ b/spec_files/mesa/valve.patch
@@ -140,90 +140,6 @@ index 35f9991ba2f..154cf809a69 100644
      ],
      build_by_default : false,
    )
-diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
-index 72ff193d30a..996fc230673 100644
---- a/src/vulkan/wsi/wsi_common_x11.c
-+++ b/src/vulkan/wsi/wsi_common_x11.c
-@@ -48,6 +48,7 @@
- #include "util/hash_table.h"
- #include "util/os_file.h"
- #include "util/os_time.h"
-+#include "util/simple_mtx.h"
- #include "util/u_debug.h"
- #include "util/u_thread.h"
- #include "util/xmlconfig.h"
-@@ -219,6 +220,30 @@ wsi_x11_detect_xwayland(xcb_connection_t *conn,
-    return is_xwayland;
- }
- 
-+static unsigned
-+gamescope_swapchain_override()
-+{
-+   const char *path = getenv("GAMESCOPE_LIMITER_FILE");
-+   if (!path)
-+      return 0;
-+
-+   static simple_mtx_t mtx = SIMPLE_MTX_INITIALIZER;
-+   static int fd = -1;
-+
-+   simple_mtx_lock(&mtx);
-+   if (fd < 0) {
-+      fd = open(path, O_RDONLY);
-+   }
-+   simple_mtx_unlock(&mtx);
-+
-+   if (fd < 0)
-+      return 0;
-+
-+   uint32_t override_value = 0;
-+   pread(fd, &override_value, sizeof(override_value), 0);
-+   return override_value;
-+}
-+
- static struct wsi_x11_connection *
- wsi_x11_connection_create(struct wsi_device *wsi_dev,
-                           xcb_connection_t *conn)
-@@ -1103,6 +1128,8 @@ struct x11_swapchain {
-    /* Total number of images returned to application in AcquireNextImage. */
-    uint64_t                                     present_poll_acquire_count;
- 
-+   VkPresentModeKHR                             orig_present_mode;
-+
-    struct x11_image                             images[0];
- };
- VK_DEFINE_NONDISP_HANDLE_CASTS(x11_swapchain, base.base, VkSwapchainKHR,
-@@ -1852,6 +1879,12 @@ x11_queue_present(struct wsi_swapchain *anv_chain,
-    if (chain->status < 0)
-       return chain->status;
- 
-+   unsigned gamescope_override = gamescope_swapchain_override();
-+   if ((gamescope_override == 1 && chain->base.present_mode != VK_PRESENT_MODE_FIFO_KHR) ||
-+       (gamescope_override != 1 && chain->base.present_mode != chain->orig_present_mode)) {
-+      return x11_swapchain_result(chain, VK_ERROR_OUT_OF_DATE_KHR);
-+   }
-+
-    if (damage && damage->pRectangles && damage->rectangleCount > 0 &&
-       damage->rectangleCount <= MAX_DAMAGE_RECTS) {
-       xcb_rectangle_t rects[MAX_DAMAGE_RECTS];
-@@ -2610,6 +2643,10 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
-    xcb_void_cookie_t cookie;
-    VkResult result;
-    VkPresentModeKHR present_mode = wsi_swapchain_get_present_mode(wsi_device, pCreateInfo);
-+   VkPresentModeKHR orig_present_mode = present_mode;
-+
-+   if (gamescope_swapchain_override() == 1)
-+      present_mode = VK_PRESENT_MODE_FIFO_KHR;
- 
-    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR);
- 
-@@ -2722,6 +2759,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
-    chain->base.wait_for_present = x11_wait_for_present;
-    chain->base.release_images = x11_release_images;
-    chain->base.present_mode = present_mode;
-+   chain->orig_present_mode = orig_present_mode;
-    chain->base.image_count = num_images;
-    chain->conn = conn;
-    chain->window = window;
 -- 
 2.42.0