Vulkan: Add support for queue transfers in HW rendering.

This commit is contained in:
Hans-Kristian Arntzen 2016-06-25 11:39:52 +02:00
parent 82c7aa0e6f
commit d043123f71
6 changed files with 149 additions and 6 deletions

View File

@ -967,7 +967,7 @@ void retro_run(void)
vk.index = vulkan->get_sync_index(vulkan->handle);
vulkan_test_render();
vulkan->set_image(vulkan->handle, &vk.images[vk.index], 0, NULL);
vulkan->set_image(vulkan->handle, &vk.images[vk.index], 0, NULL, VK_QUEUE_FAMILY_IGNORED);
vulkan->set_command_buffers(vulkan->handle, 1, &vk.cmd[vk.index]);
video_cb(RETRO_HW_FRAME_BUFFER_VALID, BASE_WIDTH, BASE_HEIGHT, 0);
}

View File

@ -104,6 +104,31 @@ uint32_t vulkan_find_memory_type_fallback(
device_reqs, host_reqs_second, 0);
}
void vulkan_transfer_image_ownership(VkCommandBuffer cmd,
VkImage image, VkImageLayout layout,
VkPipelineStageFlags src_stages,
VkPipelineStageFlags dst_stages,
uint32_t src_queue_family,
uint32_t dst_queue_family)
{
VkImageMemoryBarrier barrier =
{ VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
barrier.srcAccessMask = 0;
barrier.dstAccessMask = 0;
barrier.oldLayout = layout;
barrier.newLayout = layout;
barrier.srcQueueFamilyIndex = src_queue_family;
barrier.dstQueueFamilyIndex = dst_queue_family;
barrier.image = image;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
VKFUNC(vkCmdPipelineBarrier)(cmd, src_stages, dst_stages,
false, 0, NULL, 0, NULL, 1, &barrier);
}
void vulkan_map_persistent_texture(
VkDevice device,
struct vk_texture *texture)

View File

@ -358,8 +358,10 @@ typedef struct vk
unsigned last_width;
unsigned last_height;
uint32_t src_queue_family;
bool enable;
bool valid_semaphore;
} hw;
struct
@ -393,6 +395,13 @@ struct vk_texture vulkan_create_texture(vk_t *vk,
void vulkan_transition_texture(vk_t *vk, struct vk_texture *texture);
void vulkan_transfer_image_ownership(VkCommandBuffer cmd,
VkImage image, VkImageLayout layout,
VkPipelineStageFlags src_stages,
VkPipelineStageFlags dst_stages,
uint32_t src_queue_family,
uint32_t dst_queue_family);
void vulkan_map_persistent_texture(
VkDevice device,
struct vk_texture *texture);

View File

@ -833,7 +833,8 @@ static uint32_t vulkan_get_sync_index_mask(void *handle)
static void vulkan_set_image(void *handle,
const struct retro_vulkan_image *image,
uint32_t num_semaphores,
const VkSemaphore *semaphores)
const VkSemaphore *semaphores,
uint32_t src_queue_family)
{
unsigned i;
vk_t *vk = (vk_t*)handle;
@ -853,6 +854,9 @@ static void vulkan_set_image(void *handle,
for (i = 0; i < vk->hw.num_semaphores; i++)
vk->hw.wait_dst_stages[i] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
vk->hw.valid_semaphore = true;
vk->hw.src_queue_family = src_queue_family;
}
}
@ -1446,6 +1450,7 @@ static bool vulkan_frame(void *data, const void *frame,
static struct retro_perf_counter copy_frame = {0};
static struct retro_perf_counter swapbuffers = {0};
static struct retro_perf_counter queue_submit = {0};
bool waits_for_semaphores = false;
VkCommandBufferBeginInfo begin_info = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
@ -1488,6 +1493,26 @@ static bool vulkan_frame(void *data, const void *frame,
vulkan_flush_caches(vk);
waits_for_semaphores = vk->hw.enable && frame &&
!vk->hw.num_cmd && vk->hw.valid_semaphore;
if (waits_for_semaphores &&
vk->hw.src_queue_family != VK_QUEUE_FAMILY_IGNORED &&
vk->hw.src_queue_family != vk->context->graphics_queue_index)
{
retro_assert(vk->hw.image);
/* Acquire ownership of image from other queue family. */
vulkan_transfer_image_ownership(vk->cmd,
vk->hw.image->create_info.image,
vk->hw.image->image_layout,
/* Create a dependency chain from semaphore wait. */
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_TRANSFER_BIT,
vk->hw.src_queue_family, vk->context->graphics_queue_index);
}
/* Upload texture */
performance_counter_start(&copy_frame);
if (frame && !vk->hw.enable)
@ -1717,6 +1742,21 @@ static bool vulkan_frame(void *data, const void *frame,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
}
if (waits_for_semaphores &&
vk->hw.src_queue_family != VK_QUEUE_FAMILY_IGNORED &&
vk->hw.src_queue_family != vk->context->graphics_queue_index)
{
retro_assert(vk->hw.image);
/* Release ownership of image back to other queue family. */
vulkan_transfer_image_ownership(vk->cmd,
vk->hw.image->create_info.image,
vk->hw.image->image_layout,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
vk->context->graphics_queue_index, vk->hw.src_queue_family);
}
performance_counter_start(&end_cmd);
VKFUNC(vkEndCommandBuffer)(vk->cmd);
performance_counter_stop(&end_cmd);
@ -1739,11 +1779,14 @@ static bool vulkan_frame(void *data, const void *frame,
submit_info.pCommandBuffers = &vk->cmd;
}
if (vk->hw.enable && frame && !vk->hw.num_cmd)
if (waits_for_semaphores)
{
submit_info.waitSemaphoreCount = vk->hw.num_semaphores;
submit_info.pWaitSemaphores = vk->hw.semaphores;
submit_info.pWaitDstStageMask = vk->hw.wait_dst_stages;
/* Consume the semaphores. */
vk->hw.valid_semaphore = false;
}
submit_info.signalSemaphoreCount =

View File

@ -955,6 +955,27 @@ struct retro_hw_render_interface
* This must be called before the first call to retro_run.
*/
enum retro_hw_render_context_negotiation_interface_type
{
RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN = 0,
RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_DUMMY = INT_MAX
};
/* Base struct. All retro_hw_render_context_negotiation_interface_* types
* contain at least these fields. */
struct retro_hw_render_context_negotiation_interface
{
enum retro_hw_render_context_negotiation_interface_type interface_type;
unsigned interface_version;
};
#define RETRO_ENVIRONMENT_SET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE (43 | RETRO_ENVIRONMENT_EXPERIMENTAL)
/* const struct retro_hw_render_context_negotiation_interface * --
* Sets an interface which lets the libretro core negotiate with frontend how a context is created.
* The semantics of this interface depends on which API is used in SET_HW_RENDER earlier.
* This interface will be used when the frontend is trying to create a HW rendering context,
* so it will be used after SET_HW_RENDER, but before the context_reset callback.
*/
#define RETRO_MEMDESC_CONST (1 << 0) /* The frontend will never change this memory area once retro_load_game has returned. */
#define RETRO_MEMDESC_BIGENDIAN (1 << 1) /* The memory area contains big endian data. Default is little endian. */
#define RETRO_MEMDESC_ALIGN_2 (1 << 16) /* All memory access in this area is aligned to their own size, or 2, whichever is smaller. */

View File

@ -26,7 +26,7 @@
#include <libretro.h>
#include <vulkan/vulkan.h>
#define RETRO_HW_RENDER_INTERFACE_VULKAN_VERSION 2
#define RETRO_HW_RENDER_INTERFACE_VULKAN_VERSION 3
struct retro_vulkan_image
{
@ -38,7 +38,9 @@ struct retro_vulkan_image
typedef void (*retro_vulkan_set_image_t)(void *handle,
const struct retro_vulkan_image *image,
uint32_t num_semaphores,
const VkSemaphore *semaphores);
const VkSemaphore *semaphores,
uint32_t src_queue_family);
typedef uint32_t (*retro_vulkan_get_sync_index_t)(void *handle);
typedef uint32_t (*retro_vulkan_get_sync_index_mask_t)(void *handle);
typedef void (*retro_vulkan_set_command_buffers_t)(void *handle,
@ -105,6 +107,15 @@ struct retro_hw_render_interface_vulkan
* semaphores provided to be signaled before using the results further
* in the pipeline.
*
* Semaphores provided by a single call to set_image will only be
* waited for once (waiting for a semaphore resets it).
* E.g. set_image, video_refresh, and then another
* video_refresh without set_image,
* but same image will only wait for semaphores once.
*
* For this reason, ownership transfer will only occur if semaphores
* are waited on for a particular frame in the frontend.
*
* Using semaphores is optional for synchronization purposes,
* but if not using
* semaphores, an image memory barrier in vkCmdPipelineBarrier
@ -163,7 +174,41 @@ struct retro_hw_render_interface_vulkan
* retro_video_refresh_t should be extended if frame duping is used
* so that the frontend can reuse the older pointer.
*
* If frame duping is used, the frontend will not wait for any semaphores.
* The image itself however, must not be touched by the core until
* wait_sync_index has been completed later. The frontend may perform
* layout transitions on the image, so even read-only access is not defined.
* The exception to read-only rule is if GENERAL layout is used for the image.
* In this case, the frontend is not allowed to perform any layout transitions,
* so concurrent reads from core and frontend are allowed.
*
* If frame duping is used, or if set_command_buffers is used,
* the frontend will not wait for any semaphores.
*
* The src_queue_family is used to specify which queue family
* the image is currently owned by. If using multiple queue families
* (e.g. async compute), the frontend will need to acquire ownership of the
* image before rendering with it and release the image afterwards.
*
* If src_queue_family is equal to the queue family (queue_index),
* no ownership transfer will occur.
* Similarly, if src_queue_family is VK_QUEUE_FAMILY_IGNORED,
* no ownership transfer will occur.
*
* The frontend will always release ownership back to src_queue_family.
* Waiting for frontend to complete with wait_sync_index() ensures that
* the frontend has released ownership back to the application.
* Note that in Vulkan, transfering ownership is a two-part process.
*
* Example frame:
* - core releases ownership from src_queue_index to queue_index with VkImageMemoryBarrier.
* - core calls set_image with src_queue_index.
* - Frontend will acquire the image with src_queue_index -> queue_index as well, completing the ownership transfer.
* - Frontend renders the frame.
* - Frontend releases ownership with queue_index -> src_queue_index.
* - Next time image is used, core must acquire ownership from queue_index ...
*
* Since the frontend releases ownership, we cannot necessarily dupe the frame because
* the core needs to make the roundtrip of ownership transfer.
*/
retro_vulkan_set_image_t set_image;