From 36eb07e067549bc9c250d9ae1e0288ef6188b8d0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 21 Feb 2016 12:33:16 +0100 Subject: [PATCH] Vulkan: Use copy paths if needed. --- gfx/common/vulkan_common.c | 159 +++++++++++++++++++++++++++++-------- gfx/common/vulkan_common.h | 23 ++++++ gfx/drivers/vulkan.c | 89 ++++++++++++++++++--- 3 files changed, 230 insertions(+), 41 deletions(-) diff --git a/gfx/common/vulkan_common.c b/gfx/common/vulkan_common.c index dad24c7442..64042d2636 100644 --- a/gfx/common/vulkan_common.c +++ b/gfx/common/vulkan_common.c @@ -86,6 +86,48 @@ void vulkan_map_persistent_texture(VkDevice device, struct vk_texture *texture) vkMapMemory(device, texture->memory, texture->offset, texture->size, 0, &texture->mapped); } +void vulkan_copy_staging_to_dynamic(vk_t *vk, VkCommandBuffer cmd, + struct vk_texture *dynamic, + struct vk_texture *staging) +{ + VkImageCopy region; + retro_assert(dynamic->type == VULKAN_TEXTURE_DYNAMIC); + retro_assert(staging->type == VULKAN_TEXTURE_STAGING); + + vulkan_transition_texture(vk, staging); + + /* We don't have to sync against previous TRANSFER, since we observed the completion + * by fences. If we have a single texture_optimal, we would need to sync against + * previous transfers to avoid races. + * + * We would also need to optionally maintain extra textures due to changes in resolution, + * so this seems like the sanest and simplest solution. */ + vulkan_image_layout_transition(vk, vk->cmd, dynamic->image, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 0, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + memset(®ion, 0, sizeof(region)); + region.extent.width = dynamic->width; + region.extent.height = dynamic->height; + region.extent.depth = 1; + region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.srcSubresource.layerCount = 1; + region.dstSubresource = region.srcSubresource; + + vkCmdCopyImage(vk->cmd, + staging->image, VK_IMAGE_LAYOUT_GENERAL, + dynamic->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, ®ion); + + vulkan_image_layout_transition(vk, vk->cmd, dynamic->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); +} + #ifdef VULKAN_DEBUG_TEXTURE_ALLOC static VkImage vk_images[4 * 1024]; static unsigned vk_count; @@ -131,13 +173,6 @@ struct vk_texture vulkan_create_texture(vk_t *vk, VkFormat format, const void *initial, const VkComponentMapping *swizzle, enum vk_texture_type type) { - /* TODO: Evaluate how we should do texture uploads on discrete cards optimally. - * For integrated GPUs, using linear texture is highly desirable to avoid extra copies, but - * we might need to take a DMA transfer with block interleave on desktop GPUs. - * - * Also, Vulkan drivers are not required to support sampling from linear textures - * (only TRANSFER), but seems to work fine on GPUs I've tested so far. */ - VkDevice device = vk->context->device; struct vk_texture tex; VkImageCreateInfo info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; @@ -148,9 +183,6 @@ struct vk_texture vulkan_create_texture(vk_t *vk, VkMemoryRequirements mem_reqs; VkSubresourceLayout layout; - if (type == VULKAN_TEXTURE_STATIC && !initial) - retro_assert(0 && "Static textures must have initial data.\n"); - memset(&tex, 0, sizeof(tex)); info.imageType = VK_IMAGE_TYPE_2D; @@ -161,40 +193,104 @@ struct vk_texture vulkan_create_texture(vk_t *vk, info.mipLevels = 1; info.arrayLayers = 1; info.samples = VK_SAMPLE_COUNT_1_BIT; - info.tiling = type != VULKAN_TEXTURE_STATIC ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; - if (type == VULKAN_TEXTURE_STATIC) - info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - if (type == VULKAN_TEXTURE_READBACK) - info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; - info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - /* We'll transition this on first use for streamed textures. */ - info.initialLayout = type == VULKAN_TEXTURE_STREAMED ? - VK_IMAGE_LAYOUT_PREINITIALIZED : - VK_IMAGE_LAYOUT_UNDEFINED; + if (type == VULKAN_TEXTURE_STREAMED) + { + VkFormatProperties format_properties; + VkFormatFeatureFlags required = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + vkGetPhysicalDeviceFormatProperties(vk->context->gpu, format, &format_properties); + + if ((format_properties.linearTilingFeatures & required) != required) + { + RARCH_LOG("[Vulkan]: GPU does not support using linear images as textures. Falling back to copy path.\n"); + type = VULKAN_TEXTURE_STAGING; + } + } + + switch (type) + { + case VULKAN_TEXTURE_STATIC: + retro_assert(initial && "Static textures must have initial data.\n"); + info.tiling = VK_IMAGE_TILING_OPTIMAL; + info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + break; + + case VULKAN_TEXTURE_DYNAMIC: + retro_assert(!initial && "Dynamic textures must not have initial data.\n"); + info.tiling = VK_IMAGE_TILING_OPTIMAL; + info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + break; + + case VULKAN_TEXTURE_STREAMED: + info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + info.tiling = VK_IMAGE_TILING_LINEAR; + info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + break; + + case VULKAN_TEXTURE_STAGING: + info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + info.tiling = VK_IMAGE_TILING_LINEAR; + info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + break; + + case VULKAN_TEXTURE_READBACK: + info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; + info.tiling = VK_IMAGE_TILING_LINEAR; + info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + break; + } vkCreateImage(device, &info, NULL, &tex.image); #if 0 vulkan_track_alloc(tex.image); #endif - vkGetImageMemoryRequirements(device, tex.image, &mem_reqs); - alloc.allocationSize = mem_reqs.size; - if (type == VULKAN_TEXTURE_STATIC) + switch (type) { - alloc.memoryTypeIndex = vulkan_find_memory_type_fallback(&vk->context->memory_properties, - mem_reqs.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0); + case VULKAN_TEXTURE_STATIC: + case VULKAN_TEXTURE_DYNAMIC: + alloc.memoryTypeIndex = vulkan_find_memory_type_fallback(&vk->context->memory_properties, + mem_reqs.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0); + break; + + default: + alloc.memoryTypeIndex = vulkan_find_memory_type_fallback(&vk->context->memory_properties, + mem_reqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + break; } - else + + /* If the texture is STREAMED and it's not DEVICE_LOCAL, we expect to hit a slower path, + * so fallback to copy path. */ + if (type == VULKAN_TEXTURE_STREAMED && + (vk->context->memory_properties.memoryTypes[alloc.memoryTypeIndex].propertyFlags & + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == 0) { + /* Recreate texture but for STAGING this time ... */ + RARCH_LOG("[Vulkan]: GPU supports linear images as textures, but not DEVICE_LOCAL. Falling back to copy path.\n"); + type = VULKAN_TEXTURE_STAGING; + vkDestroyImage(device, tex.image, NULL); + info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + vkCreateImage(device, &info, NULL, &tex.image); + vkGetImageMemoryRequirements(device, tex.image, &mem_reqs); + alloc.allocationSize = mem_reqs.size; alloc.memoryTypeIndex = vulkan_find_memory_type_fallback(&vk->context->memory_properties, mem_reqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); } /* We're not reusing the objects themselves. */ @@ -264,6 +360,7 @@ struct vk_texture vulkan_create_texture(vk_t *vk, tex.width = width; tex.height = height; tex.format = format; + tex.type = type; if (initial && type == VULKAN_TEXTURE_STREAMED) { @@ -293,7 +390,7 @@ struct vk_texture vulkan_create_texture(vk_t *vk, VkCommandBuffer staging; unsigned bpp = vulkan_format_to_bpp(tex.format); struct vk_texture tmp = vulkan_create_texture(vk, NULL, - width, height, format, initial, NULL, VULKAN_TEXTURE_STREAMED); + width, height, format, initial, NULL, VULKAN_TEXTURE_STAGING); info.commandPool = vk->staging_pool; info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; diff --git a/gfx/common/vulkan_common.h b/gfx/common/vulkan_common.h index 2288fc96be..92a0b75ac3 100644 --- a/gfx/common/vulkan_common.h +++ b/gfx/common/vulkan_common.h @@ -71,8 +71,22 @@ enum vk_texture_type { + /* We will use the texture as a sampled linear texture. */ VULKAN_TEXTURE_STREAMED = 0, + + /* We will use the texture as a linear texture, but only + * for copying to a DYNAMIC texture. */ + VULKAN_TEXTURE_STAGING, + + /* We will use the texture as an optimally tiled texture, + * and we will update the texture by copying from STAGING + * textures. */ + VULKAN_TEXTURE_DYNAMIC, + + /* We will upload content once. */ VULKAN_TEXTURE_STATIC, + + /* We will use the texture for reading back transfers from GPU. */ VULKAN_TEXTURE_READBACK }; @@ -188,6 +202,7 @@ struct vk_texture uint32_t memory_type; VkImageLayout layout; + enum vk_texture_type type; bool default_smooth; }; @@ -259,6 +274,7 @@ struct vk_per_frame { struct vk_image backbuffer; struct vk_texture texture; + struct vk_texture texture_optimal; struct vk_buffer_chain vbo; struct vk_buffer_chain ubo; struct vk_descriptor_manager descriptor_manager; @@ -351,6 +367,9 @@ typedef struct vk struct { struct vk_texture textures[VULKAN_MAX_SWAPCHAIN_IMAGES]; + struct vk_texture textures_optimal[VULKAN_MAX_SWAPCHAIN_IMAGES]; + bool dirty[VULKAN_MAX_SWAPCHAIN_IMAGES]; + float alpha; unsigned last_index; bool enable; @@ -420,6 +439,10 @@ void vulkan_transition_texture(vk_t *vk, struct vk_texture *texture); void vulkan_map_persistent_texture(VkDevice device, struct vk_texture *tex); void vulkan_destroy_texture(VkDevice device, struct vk_texture *tex); +void vulkan_copy_staging_to_dynamic(vk_t *vk, VkCommandBuffer cmd, + struct vk_texture *dynamic, + struct vk_texture *staging); + /* VBO will be written to here. */ void vulkan_draw_quad(vk_t *vk, const struct vk_draw_quad *quad); diff --git a/gfx/drivers/vulkan.c b/gfx/drivers/vulkan.c index 53e80dc42f..6df9c8f597 100644 --- a/gfx/drivers/vulkan.c +++ b/gfx/drivers/vulkan.c @@ -429,6 +429,13 @@ static void vulkan_init_textures(vk_t *vk) vk->tex_w, vk->tex_h, vk->tex_fmt, NULL, NULL, VULKAN_TEXTURE_STREAMED); vulkan_map_persistent_texture(vk->context->device, &vk->swapchain[i].texture); + + if (vk->swapchain[i].texture.type == VULKAN_TEXTURE_STAGING) + { + vk->swapchain[i].texture_optimal = vulkan_create_texture(vk, NULL, + vk->tex_w, vk->tex_h, vk->tex_fmt, + NULL, NULL, VULKAN_TEXTURE_DYNAMIC); + } } } } @@ -439,8 +446,12 @@ static void vulkan_deinit_textures(vk_t *vk) unsigned i; for (i = 0; i < vk->num_swapchain_images; i++) + { if (vk->swapchain[i].texture.memory != VK_NULL_HANDLE) vulkan_destroy_texture(vk->context->device, &vk->swapchain[i].texture); + if (vk->swapchain[i].texture_optimal.memory != VK_NULL_HANDLE) + vulkan_destroy_texture(vk->context->device, &vk->swapchain[i].texture_optimal); + } } static void vulkan_deinit_command_buffers(vk_t *vk) @@ -624,8 +635,12 @@ static void vulkan_deinit_menu(vk_t *vk) { unsigned i; for (i = 0; i < VULKAN_MAX_SWAPCHAIN_IMAGES; i++) + { if (vk->menu.textures[i].memory) vulkan_destroy_texture(vk->context->device, &vk->menu.textures[i]); + if (vk->menu.textures_optimal[i].memory) + vulkan_destroy_texture(vk->context->device, &vk->menu.textures_optimal[i]); + } } static void vulkan_free(void *data) @@ -1253,6 +1268,13 @@ static bool vulkan_frame(void *data, const void *frame, vulkan_buffer_chain_discard(&chain->vbo); vulkan_buffer_chain_discard(&chain->ubo); + /* Start recording the command buffer. */ + vk->cmd = chain->cmd; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkResetCommandBuffer(vk->cmd, 0); + vkBeginCommandBuffer(vk->cmd, &begin_info); + memset(&vk->tracker, 0, sizeof(vk->tracker)); + /* Upload texture */ retro_perf_start(©_frame); if (frame && !vk->hw.enable) @@ -1265,8 +1287,16 @@ static bool vulkan_frame(void *data, const void *frame, if (chain->texture.width != frame_width || chain->texture.height != frame_height) { chain->texture = vulkan_create_texture(vk, &chain->texture, - frame_width, frame_height, chain->texture.format, NULL, NULL, VULKAN_TEXTURE_STREAMED); + frame_width, frame_height, chain->texture.format, NULL, NULL, + chain->texture_optimal.memory ? VULKAN_TEXTURE_STAGING : VULKAN_TEXTURE_STREAMED); vulkan_map_persistent_texture(vk->context->device, &chain->texture); + + if (chain->texture.type == VULKAN_TEXTURE_STAGING) + { + chain->texture_optimal = vulkan_create_texture(vk, &chain->texture_optimal, + frame_width, frame_height, chain->texture_optimal.format, + NULL, NULL, VULKAN_TEXTURE_DYNAMIC); + } } if (frame != chain->texture.mapped) @@ -1279,17 +1309,17 @@ static bool vulkan_frame(void *data, const void *frame, memcpy(dst, src, frame_width * bpp); } + /* If we have an optimal texture, copy to that now. */ + if (chain->texture_optimal.image) + { + vulkan_copy_staging_to_dynamic(vk, vk->cmd, + &chain->texture_optimal, &chain->texture); + } + vk->last_valid_index = frame_index; } retro_perf_stop(©_frame); - /* Start recording the command buffer. */ - vk->cmd = chain->cmd; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - vkResetCommandBuffer(vk->cmd, 0); - vkBeginCommandBuffer(vk->cmd, &begin_info); - memset(&vk->tracker, 0, sizeof(vk->tracker)); - /* Notify filter chain about the new sync index. */ vulkan_filter_chain_notify_sync_index(vk->filter_chain, frame_index); @@ -1327,7 +1357,10 @@ static bool vulkan_frame(void *data, const void *frame, else { struct vk_texture *tex = &vk->swapchain[vk->last_valid_index].texture; - vulkan_transition_texture(vk, tex); + if (vk->swapchain[vk->last_valid_index].texture_optimal.memory != VK_NULL_HANDLE) + tex = &vk->swapchain[vk->last_valid_index].texture_optimal; + else + vulkan_transition_texture(vk, tex); input.view = tex->view; input.layout = tex->layout; @@ -1375,10 +1408,24 @@ static bool vulkan_frame(void *data, const void *frame, if (vk->menu.textures[vk->menu.last_index].image != VK_NULL_HANDLE) { struct vk_draw_quad quad; + struct vk_texture *optimal = &vk->menu.textures_optimal[vk->menu.last_index]; vulkan_set_viewport(vk, width, height, vk->menu.full_screen, false); quad.pipeline = vk->pipelines.alpha_blend; quad.texture = &vk->menu.textures[vk->menu.last_index]; + + if (optimal->memory != VK_NULL_HANDLE) + { + if (vk->menu.dirty[vk->menu.last_index]) + { + vulkan_copy_staging_to_dynamic(vk, vk->cmd, + optimal, + quad.texture); + vk->menu.dirty[vk->menu.last_index] = false; + } + quad.texture = optimal; + } + quad.sampler = vk->samplers.linear; quad.mvp = &vk->mvp_no_rot; quad.color.r = 1.0f; @@ -1562,6 +1609,13 @@ static bool vulkan_get_current_sw_framebuffer(void *data, struct retro_framebuff framebuffer->width, framebuffer->height, chain->texture.format, NULL, NULL, VULKAN_TEXTURE_STREAMED); vulkan_map_persistent_texture(vk->context->device, &chain->texture); + + if (chain->texture.type == VULKAN_TEXTURE_STAGING) + { + chain->texture_optimal = vulkan_create_texture(vk, &chain->texture_optimal, + framebuffer->width, framebuffer->height, chain->texture_optimal.format, + NULL, NULL, VULKAN_TEXTURE_DYNAMIC); + } } framebuffer->data = chain->texture.mapped; @@ -1593,6 +1647,8 @@ static void vulkan_set_texture_frame(void *data, vk_t *vk = (vk_t*)data; unsigned index = vk->context->current_swapchain_index; struct vk_texture *texture = &vk->menu.textures[index]; + struct vk_texture *texture_optimal = &vk->menu.textures_optimal[index]; + uint8_t *ptr; unsigned x, y; const VkComponentMapping br_swizzle = { @@ -1611,7 +1667,8 @@ static void vulkan_set_texture_frame(void *data, texture->memory ? texture : NULL, width, height, rgb32 ? VK_FORMAT_B8G8R8A8_UNORM : VK_FORMAT_B4G4R4A4_UNORM_PACK16, - NULL, rgb32 ? NULL : &br_swizzle, VULKAN_TEXTURE_STREAMED); + NULL, rgb32 ? NULL : &br_swizzle, + texture_optimal->memory ? VULKAN_TEXTURE_STAGING : VULKAN_TEXTURE_STREAMED); vkMapMemory(vk->context->device, texture->memory, texture->offset, texture->size, 0, (void**)&ptr); @@ -1625,6 +1682,18 @@ static void vulkan_set_texture_frame(void *data, vkUnmapMemory(vk->context->device, texture->memory); vk->menu.alpha = alpha; vk->menu.last_index = index; + + if (texture->type == VULKAN_TEXTURE_STAGING) + { + *texture_optimal = vulkan_create_texture(vk, + texture_optimal->memory ? texture_optimal : NULL, + width, height, + rgb32 ? VK_FORMAT_B8G8R8A8_UNORM : VK_FORMAT_B4G4R4A4_UNORM_PACK16, + NULL, rgb32 ? NULL : &br_swizzle, + VULKAN_TEXTURE_DYNAMIC); + } + + vk->menu.dirty[index] = true; } static void vulkan_set_texture_enable(void *data, bool state, bool full_screen)