diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 40bc8a1eee..8330b98aa5 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -518,7 +518,7 @@ VKGSRender::VKGSRender() : GSRender() m_current_frame = &frame_context_storage[0]; - m_texture_cache.initialize((*m_device), m_swapchain->get_graphics_queue(), + m_texture_cache.initialize((*m_device), m_device->get_graphics_queue(), m_texture_upload_buffer_ring_info); vk::get_overlay_pass()->init(*m_current_command_buffer, m_texture_upload_buffer_ring_info); @@ -1960,7 +1960,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore m_secondary_command_buffer.end(); - m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(), + m_secondary_command_buffer.submit(m_device->get_graphics_queue(), VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, force_flush); } @@ -1992,7 +1992,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore m_current_command_buffer->end(); m_current_command_buffer->tag(); - m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), + m_current_command_buffer->submit(m_device->get_graphics_queue(), wait_semaphore, signal_semaphore, pFence, pipeline_stage_flags, force_flush); if (force_flush) diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 14eaf2d710..d055d3cb6c 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -228,18 +228,41 @@ namespace vk } // Render Device - The actual usable device - void render_device::create(vk::physical_device& pdev, u32 graphics_queue_idx) + void render_device::create(vk::physical_device& pdev, u32 graphics_queue_idx, u32 present_queue_idx, u32 transfer_queue_idx) { std::string message_on_error; float queue_priorities[1] = { 0.f }; pgpu = &pdev; - VkDeviceQueueCreateInfo queue = {}; - queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - queue.pNext = NULL; - queue.queueFamilyIndex = graphics_queue_idx; - queue.queueCount = 1; - queue.pQueuePriorities = queue_priorities; + ensure(graphics_queue_idx == present_queue_idx || present_queue_idx == UINT32_MAX); // TODO + m_graphics_queue_family = graphics_queue_idx; + m_present_queue_family = present_queue_idx; + m_transfer_queue_family = transfer_queue_idx; + + std::vector device_queues; + device_queues.push_back({}); + + auto & graphics_queue = device_queues.back(); + graphics_queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + graphics_queue.pNext = NULL; + graphics_queue.flags = 0; + graphics_queue.queueFamilyIndex = graphics_queue_idx; + graphics_queue.queueCount = 1; + graphics_queue.pQueuePriorities = queue_priorities; + + if (graphics_queue_idx != transfer_queue_idx) + { + ensure(transfer_queue_idx != UINT32_MAX); + + device_queues.push_back({}); + auto & transfer_queue = device_queues.back(); + transfer_queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + transfer_queue.pNext = NULL; + transfer_queue.flags = 0; + transfer_queue.queueFamilyIndex = transfer_queue_idx; + transfer_queue.queueCount = 1; + transfer_queue.pQueuePriorities = queue_priorities; + } // Set up instance information std::vector requested_extensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME }; @@ -361,8 +384,8 @@ namespace vk VkDeviceCreateInfo device = {}; device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; device.pNext = nullptr; - device.queueCreateInfoCount = 1; - device.pQueueCreateInfos = &queue; + device.queueCreateInfoCount = ::size32(device_queues); + device.pQueueCreateInfos = device_queues.data(); device.enabledLayerCount = 0; device.ppEnabledLayerNames = nullptr; // Deprecated device.enabledExtensionCount = ::size32(requested_extensions); @@ -386,6 +409,15 @@ namespace vk CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error); + // Initialize queues + vkGetDeviceQueue(dev, graphics_queue_idx, 0, &m_graphics_queue); + vkGetDeviceQueue(dev, transfer_queue_idx, 0, &m_transfer_queue); + + if (present_queue_idx != UINT32_MAX) + { + vkGetDeviceQueue(dev, present_queue_idx, 0, &m_present_queue); + } + // Import optional function endpoints if (pgpu->conditional_render_support) { @@ -425,6 +457,36 @@ namespace vk } } + VkQueue render_device::get_present_queue() const + { + return m_present_queue; + } + + VkQueue render_device::get_graphics_queue() const + { + return m_graphics_queue; + } + + VkQueue render_device::get_transfer_queue() const + { + return m_transfer_queue; + } + + u32 render_device::get_graphics_queue_family() const + { + return m_graphics_queue_family; + } + + u32 render_device::get_present_queue_family() const + { + return m_graphics_queue_family; + } + + u32 render_device::get_transfer_queue_family() const + { + return m_transfer_queue_family; + } + const VkFormatProperties render_device::get_format_properties(VkFormat format) { auto found = pgpu->format_properties.find(format); diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index 040ae78290..286c772079 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -89,6 +89,14 @@ namespace vk std::unique_ptr m_allocator; VkDevice dev = VK_NULL_HANDLE; + VkQueue m_graphics_queue = VK_NULL_HANDLE; + VkQueue m_present_queue = VK_NULL_HANDLE; + VkQueue m_transfer_queue = VK_NULL_HANDLE; + + u32 m_graphics_queue_family = 0; + u32 m_present_queue_family = 0; + u32 m_transfer_queue_family = 0; + public: // Exported device endpoints PFN_vkCmdBeginConditionalRenderingEXT cmdBeginConditionalRenderingEXT = nullptr; @@ -98,7 +106,7 @@ namespace vk render_device() = default; ~render_device() = default; - void create(vk::physical_device& pdev, u32 graphics_queue_idx); + void create(vk::physical_device& pdev, u32 graphics_queue_idx, u32 present_queue_idx, u32 transfer_queue_idx); void destroy(); const VkFormatProperties get_format_properties(VkFormat format); @@ -119,6 +127,13 @@ namespace vk bool get_external_memory_host_support() const; bool get_surface_capabilities_2_support() const; + VkQueue get_present_queue() const; + VkQueue get_graphics_queue() const; + VkQueue get_transfer_queue() const; + u32 get_graphics_queue_family() const; + u32 get_present_queue_family() const; + u32 get_transfer_queue_family() const; + mem_allocator_base* get_allocator() const; operator VkDevice() const; diff --git a/rpcs3/Emu/RSX/VK/vkutils/instance.hpp b/rpcs3/Emu/RSX/VK/vkutils/instance.hpp index ce1d6818b8..75992ae298 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/instance.hpp +++ b/rpcs3/Emu/RSX/VK/vkutils/instance.hpp @@ -307,62 +307,54 @@ namespace vk #endif u32 device_queues = dev.get_queue_count(); - std::vector supportsPresent(device_queues, VK_FALSE); - bool present_possible = false; + std::vector supports_present(device_queues, VK_FALSE); + bool present_possible = true; for (u32 index = 0; index < device_queues; index++) { - vkGetPhysicalDeviceSurfaceSupportKHR(dev, index, m_surface, &supportsPresent[index]); + vkGetPhysicalDeviceSurfaceSupportKHR(dev, index, m_surface, &supports_present[index]); } - for (const auto& value : supportsPresent) - { - if (value) - { - present_possible = true; - break; - } - } - - if (!present_possible) - { - rsx_log.error("It is not possible for the currently selected GPU to present to the window (Likely caused by NVIDIA driver running the current display)"); - } - - // Search for a graphics and a present queue in the array of queue - // families, try to find one that supports both u32 graphicsQueueNodeIndex = UINT32_MAX; u32 presentQueueNodeIndex = UINT32_MAX; + u32 transferQueueNodeIndex = UINT32_MAX; - for (u32 i = 0; i < device_queues; i++) + auto test_queue_family = [&](u32 index, u32 desired_flags) { - if ((dev.get_queue_properties(i).queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) + if (const auto flags = dev.get_queue_properties(index).queueFlags; + (flags & desired_flags) == desired_flags) { - if (graphicsQueueNodeIndex == UINT32_MAX) - graphicsQueueNodeIndex = i; + return true; + } - if (supportsPresent[i] == VK_TRUE) + return false; + }; + + for (u32 i = 0; i < device_queues; ++i) + { + // 1. Test for a present queue possibly one that also supports present + if (presentQueueNodeIndex == UINT32_MAX && supports_present[i]) + { + presentQueueNodeIndex = i; + if (test_queue_family(i, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) { graphicsQueueNodeIndex = i; - presentQueueNodeIndex = i; - - break; } } - } - - if (presentQueueNodeIndex == UINT32_MAX) - { - // If didn't find a queue that supports both graphics and present, then - // find a separate present queue. - for (u32 i = 0; i < device_queues; ++i) + // 2. Check for graphics support + else if (graphicsQueueNodeIndex == UINT32_MAX && test_queue_family(i, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) { - if (supportsPresent[i] == VK_TRUE) + graphicsQueueNodeIndex = i; + if (supports_present[i]) { presentQueueNodeIndex = i; - break; } } + // 3. Check if transfer + compute is available + else if (transferQueueNodeIndex == UINT32_MAX && test_queue_family(i, VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT)) + { + transferQueueNodeIndex = i; + } } if (graphicsQueueNodeIndex == UINT32_MAX) @@ -373,15 +365,16 @@ namespace vk if (graphicsQueueNodeIndex != presentQueueNodeIndex) { - //Separate graphics and present, use headless fallback + // Separate graphics and present, use headless fallback present_possible = false; } if (!present_possible) { //Native(sw) swapchain + rsx_log.error("It is not possible for the currently selected GPU to present to the window (Likely caused by NVIDIA driver running the current display)"); rsx_log.warning("Falling back to software present support (native windowing API)"); - auto swapchain = new swapchain_NATIVE(dev, UINT32_MAX, graphicsQueueNodeIndex); + auto swapchain = new swapchain_NATIVE(dev, UINT32_MAX, graphicsQueueNodeIndex, transferQueueNodeIndex); swapchain->create(window_handle); return swapchain; } @@ -418,7 +411,7 @@ namespace vk color_space = surfFormats[0].colorSpace; - return new swapchain_WSI(dev, presentQueueNodeIndex, graphicsQueueNodeIndex, format, m_surface, color_space, force_wm_reporting_off); + return new swapchain_WSI(dev, presentQueueNodeIndex, graphicsQueueNodeIndex, transferQueueNodeIndex, format, m_surface, color_space, force_wm_reporting_off); } }; } diff --git a/rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp b/rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp index 4f731b50aa..948dd8bd0e 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp +++ b/rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp @@ -73,11 +73,6 @@ namespace vk protected: render_device dev; - u32 m_present_queue = UINT32_MAX; - u32 m_graphics_queue = UINT32_MAX; - VkQueue vk_graphics_queue = VK_NULL_HANDLE; - VkQueue vk_present_queue = VK_NULL_HANDLE; - display_handle_t window_handle{}; u32 m_width = 0; u32 m_height = 0; @@ -86,15 +81,9 @@ namespace vk virtual void init_swapchain_images(render_device& dev, u32 count) = 0; public: - swapchain_base(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) + swapchain_base(physical_device& gpu, u32 present_queue, u32 graphics_queue, u32 transfer_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) { - dev.create(gpu, _graphics_queue); - - if (_graphics_queue < UINT32_MAX) vkGetDeviceQueue(dev, _graphics_queue, 0, &vk_graphics_queue); - if (_present_queue < UINT32_MAX) vkGetDeviceQueue(dev, _present_queue, 0, &vk_present_queue); - - m_present_queue = _present_queue; - m_graphics_queue = _graphics_queue; + dev.create(gpu, graphics_queue, present_queue, transfer_queue); m_surface_format = format; } @@ -128,16 +117,6 @@ namespace vk return dev; } - const VkQueue& get_present_queue() - { - return vk_present_queue; - } - - const VkQueue& get_graphics_queue() - { - return vk_graphics_queue; - } - const VkFormat get_surface_format() { return m_surface_format; @@ -145,7 +124,7 @@ namespace vk const bool is_headless() const { - return (vk_present_queue == VK_NULL_HANDLE); + return (dev.get_present_queue() == VK_NULL_HANDLE); } }; @@ -156,8 +135,8 @@ namespace vk std::vector swapchain_images; public: - abstract_swapchain_impl(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) - : swapchain_base(gpu, _present_queue, _graphics_queue, format) + abstract_swapchain_impl(physical_device& gpu, u32 present_queue, u32 graphics_queue, u32 transfer_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) + : swapchain_base(gpu, present_queue, graphics_queue, transfer_queue, format) {} ~abstract_swapchain_impl() override = default; @@ -183,8 +162,8 @@ namespace vk LPVOID hPtr = NULL; public: - swapchain_WIN32(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) - : native_swapchain_base(gpu, _present_queue, _graphics_queue, format) + swapchain_WIN32(physical_device& gpu, u32 present_queue, u32 graphics_queue, u32 transfer_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) + : native_swapchain_base(gpu, present_queue, graphics_queue, transfer_queue, format) {} ~swapchain_WIN32() {} @@ -266,8 +245,8 @@ namespace vk void* nsView = nullptr; public: - swapchain_MacOS(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) - : native_swapchain_base(gpu, _present_queue, _graphics_queue, format) + swapchain_MacOS(physical_device& gpu, u32 present_queue, u32 graphics_queue, u32 transfer_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) + : native_swapchain_base(gpu, present_queue, graphics_queue, transfer_queue, format) {} ~swapchain_MacOS() {} @@ -316,8 +295,8 @@ namespace vk int bit_depth = 24; public: - swapchain_X11(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) - : native_swapchain_base(gpu, _present_queue, _graphics_queue, format) + swapchain_X11(physical_device& gpu, u32 present_queue, u32 graphics_queue, u32 transfer_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) + : native_swapchain_base(gpu, present_queue, graphics_queue, transfer_queue, format) {} ~swapchain_X11() override = default; @@ -418,8 +397,8 @@ namespace vk { public: - swapchain_Wayland(physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) - : native_swapchain_base(gpu, _present_queue, _graphics_queue, format) + swapchain_Wayland(physical_device& gpu, u32 present_queue, u32 graphics_queue, u32 transfer_queue, VkFormat format = VK_FORMAT_B8G8R8A8_UNORM) + : native_swapchain_base(gpu, present_queue, graphics_queue, transfer_queue, format) {} ~swapchain_Wayland() {} @@ -524,8 +503,8 @@ namespace vk } public: - swapchain_WSI(vk::physical_device& gpu, u32 _present_queue, u32 _graphics_queue, VkFormat format, VkSurfaceKHR surface, VkColorSpaceKHR color_space, bool force_wm_reporting_off) - : WSI_swapchain_base(gpu, _present_queue, _graphics_queue, format) + swapchain_WSI(vk::physical_device& gpu, u32 present_queue, u32 graphics_queue, u32 transfer_queue, VkFormat format, VkSurfaceKHR surface, VkColorSpaceKHR color_space, bool force_wm_reporting_off) + : WSI_swapchain_base(gpu, present_queue, graphics_queue, transfer_queue, format) { createSwapchainKHR = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCreateSwapchainKHR")); destroySwapchainKHR = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkDestroySwapchainKHR")); @@ -622,7 +601,7 @@ namespace vk using WSI_swapchain_base::init; bool init() override { - if (vk_present_queue == VK_NULL_HANDLE) + if (dev.get_present_queue() == VK_NULL_HANDLE) { rsx_log.error("Cannot create WSI swapchain without a present queue"); return false; @@ -791,7 +770,7 @@ namespace vk present.waitSemaphoreCount = 1; present.pWaitSemaphores = &semaphore; - return queuePresentKHR(vk_present_queue, &present); + return queuePresentKHR(dev.get_present_queue(), &present); } VkImage get_image(u32 index) override