diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp index 8ab4767b21..7a0189b193 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp @@ -204,22 +204,16 @@ namespace vk src->pop_layout(cmd); - // Inserting a buffer memory barrier into the pipe fixes sync on RADV. Adding the same barrier as an event dep does not work. - // The dependencies in the CmdSetEvents2 command are handled in a different path from regular pipeline deps. - vk::insert_buffer_memory_barrier(cmd, - dma_mapping.second->value, dma_mapping.first, valid_range.length(), - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, 0); - - // Not providing any deps to the signal makes AMDVLK hang (event never gets signaled) - // We also need to set this up correctly as the buffer barrier above by itself does not work for AMD/AMDVLK. - VkMemoryBarrier2KHR mem_barrier = + VkBufferMemoryBarrier2KHR mem_barrier = { - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR, - .srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT_KHR, - .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT_KHR, - .dstStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, - .dstAccessMask = 0 + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2_KHR, + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, // Finish all transfer... + .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR, + .dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR, // ...before proceeding with any command + .dstAccessMask = 0, + .buffer = dma_mapping.second->value, + .offset = dma_mapping.first, + .size = valid_range.length() }; // Create event object for this transfer and queue signal op @@ -227,8 +221,8 @@ namespace vk dma_fence->signal(cmd, { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .memoryBarrierCount = 1, - .pMemoryBarriers = &mem_barrier + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &mem_barrier }); // Set cb flag for queued dma operations diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 729e3f73b6..2a0d1a2d3e 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -721,6 +721,7 @@ namespace vk { _vkCmdSetEvent2KHR = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCmdSetEvent2KHR")); _vkCmdWaitEvents2KHR = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCmdWaitEvents2KHR")); + _vkCmdPipelineBarrier2KHR = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCmdPipelineBarrier2KHR")); } memory_map = vk::get_memory_mapping(pdev); diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index 8c98c94dbb..eba24278b6 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -138,6 +138,7 @@ namespace vk PFN_vkCmdInsertDebugUtilsLabelEXT _vkCmdInsertDebugUtilsLabelEXT = nullptr; PFN_vkCmdSetEvent2KHR _vkCmdSetEvent2KHR = nullptr; PFN_vkCmdWaitEvents2KHR _vkCmdWaitEvents2KHR = nullptr; + PFN_vkCmdPipelineBarrier2KHR _vkCmdPipelineBarrier2KHR = nullptr; public: render_device() = default; diff --git a/rpcs3/Emu/RSX/VK/vkutils/sync.cpp b/rpcs3/Emu/RSX/VK/vkutils/sync.cpp index 3646e68be5..bb7e77e8cb 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/sync.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/sync.cpp @@ -200,17 +200,54 @@ namespace vk } } - void event::signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) + void event::resolve_dependencies(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) { - if (v2) [[ likely ]] + if (v2) { - m_device->_vkCmdSetEvent2KHR(cmd, m_vk_event, &dependency); + m_device->_vkCmdPipelineBarrier2KHR(cmd, &dependency); } else { - // Legacy fallback. Should be practically unused with the exception of in-development drivers. - const auto stages = v1_utils::gather_src_stages(dependency); - vkCmdSetEvent(cmd, m_vk_event, stages); + const auto src_stages = v1_utils::gather_src_stages(dependency); + const auto dst_stages = v1_utils::gather_dst_stages(dependency); + const auto memory_barriers = v1_utils::get_memory_barriers(dependency); + const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency); + const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency); + + vkCmdPipelineBarrier(cmd, src_stages, dst_stages, dependency.dependencyFlags, + ::size32(memory_barriers), memory_barriers.data(), + ::size32(buffer_memory_barriers), buffer_memory_barriers.data(), + ::size32(image_memory_barriers), image_memory_barriers.data()); + } + } + + void event::signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) + { + // Resolve the actual dependencies on a pipeline barrier + resolve_dependencies(cmd, dependency); + + // Signalling won't wait. The caller is responsible for setting up the dependencies correctly. + if (v2) [[ likely ]] + { + // We need a memory barrier to keep AMDVLK from hanging + VkMemoryBarrier2KHR mem_barrier = + { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR, + .srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT + }; + // Empty dependency that does nothing + VkDependencyInfoKHR empty_dependency + { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR, + .memoryBarrierCount = 1, + .pMemoryBarriers = &mem_barrier + }; + m_device->_vkCmdSetEvent2KHR(cmd, m_vk_event, &empty_dependency); + } + else + { + vkCmdSetEvent(cmd, m_vk_event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); } } diff --git a/rpcs3/Emu/RSX/VK/vkutils/sync.h b/rpcs3/Emu/RSX/VK/vkutils/sync.h index 37c75af826..e2e75bcbf7 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/sync.h +++ b/rpcs3/Emu/RSX/VK/vkutils/sync.h @@ -59,6 +59,8 @@ namespace vk VkEvent m_vk_event = VK_NULL_HANDLE; bool v2 = true; + void resolve_dependencies(const command_buffer& cmd, const VkDependencyInfoKHR& dependency); + public: event(const render_device& dev, sync_domain domain); ~event();