From 5fbc63fbcf15e385f5ff9c4cde16e8c4bdb816c2 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 9 Dec 2016 22:23:04 +1000 Subject: [PATCH] Vulkan: Compute shader support --- .../Vulkan/CommandBufferManager.cpp | 3 +- Source/Core/VideoBackends/Vulkan/Constants.h | 8 + .../Core/VideoBackends/Vulkan/ObjectCache.cpp | 91 +++++++++- .../Core/VideoBackends/Vulkan/ObjectCache.h | 24 +++ .../VideoBackends/Vulkan/ShaderCompiler.cpp | 46 ++++- .../VideoBackends/Vulkan/ShaderCompiler.h | 4 + .../Core/VideoBackends/Vulkan/Texture2D.cpp | 123 +++++++++++++ Source/Core/VideoBackends/Vulkan/Texture2D.h | 11 ++ Source/Core/VideoBackends/Vulkan/Util.cpp | 165 ++++++++++++++++++ Source/Core/VideoBackends/Vulkan/Util.h | 41 +++++ .../VideoBackends/Vulkan/VulkanContext.cpp | 4 +- 11 files changed, 507 insertions(+), 13 deletions(-) diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp index 27a3976b83..460ec919aa 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp @@ -91,7 +91,8 @@ bool CommandBufferManager::CreateCommandBuffers() VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000}, {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024}}; + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024}}; VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, diff --git a/Source/Core/VideoBackends/Vulkan/Constants.h b/Source/Core/VideoBackends/Vulkan/Constants.h index f65aad6cfc..8507d23342 100644 --- a/Source/Core/VideoBackends/Vulkan/Constants.h +++ b/Source/Core/VideoBackends/Vulkan/Constants.h @@ -30,6 +30,7 @@ enum DESCRIPTOR_SET_LAYOUT DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS, DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS, DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS, + DESCRIPTOR_SET_LAYOUT_COMPUTE, NUM_DESCRIPTOR_SET_LAYOUTS }; @@ -52,6 +53,12 @@ enum DESCRIPTOR_SET_BIND_POINT // - Same as standard, plus 128 bytes of push constants, accessible from all stages. // - Texture Decoding // - Same as push constant, plus a single texel buffer accessible from PS. +// - Compute +// - 1 uniform buffer [set=0, binding=0] +// - 4 combined image samplers [set=0, binding=1-4] +// - 1 texel buffer [set=0, binding=5] +// - 1 storage image [set=0, binding=6] +// - 128 bytes of push constants // // All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers). // The third descriptor set (see bind points above) is used for storage or texel buffers. @@ -62,6 +69,7 @@ enum PIPELINE_LAYOUT PIPELINE_LAYOUT_BBOX, PIPELINE_LAYOUT_PUSH_CONSTANT, PIPELINE_LAYOUT_TEXTURE_CONVERSION, + PIPELINE_LAYOUT_COMPUTE, NUM_PIPELINE_LAYOUTS }; diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp index 9c903b0065..1fb083be0f 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -324,6 +324,41 @@ std::pair ObjectCache::GetPipelineWithCacheResult(const Pipeli return {pipeline, false}; } +VkPipeline ObjectCache::CreateComputePipeline(const ComputePipelineInfo& info) +{ + VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + nullptr, + 0, + {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, info.cs, + "main", nullptr}, + info.pipeline_layout, + VK_NULL_HANDLE, + -1}; + + VkPipeline pipeline; + VkResult res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), VK_NULL_HANDLE, 1, + &pipeline_info, nullptr, &pipeline); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: "); + return VK_NULL_HANDLE; + } + + return pipeline; +} + +VkPipeline ObjectCache::GetComputePipeline(const ComputePipelineInfo& info) +{ + auto iter = m_compute_pipeline_objects.find(info); + if (iter != m_compute_pipeline_objects.end()) + return iter->second; + + VkPipeline pipeline = CreateComputePipeline(info); + m_compute_pipeline_objects.emplace(info, pipeline); + return pipeline; +} + std::string ObjectCache::GetDiskCacheFileName(const char* type) { return StringFromFormat("%svulkan-%s-%s.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(), @@ -477,6 +512,13 @@ void ObjectCache::DestroyPipelineCache() } m_pipeline_objects.clear(); + for (const auto& it : m_compute_pipeline_objects) + { + if (it.second != VK_NULL_HANDLE) + vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr); + } + m_compute_pipeline_objects.clear(); + vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr); m_pipeline_cache = VK_NULL_HANDLE; } @@ -725,6 +767,17 @@ bool ObjectCache::CreateDescriptorSetLayouts() {0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, }; + static const VkDescriptorSetLayoutBinding compute_set_bindings[] = { + {0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {5, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {6, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + }; + static const VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = { {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(ubo_set_bindings)), ubo_set_bindings}, @@ -733,7 +786,9 @@ bool ObjectCache::CreateDescriptorSetLayouts() {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(ssbo_set_bindings)), ssbo_set_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings}}; + static_cast(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings}, + {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, + static_cast(ArraySize(compute_set_bindings)), compute_set_bindings}}; for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++) { @@ -774,8 +829,11 @@ bool ObjectCache::CreatePipelineLayouts() m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UNIFORM_BUFFERS], m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS], m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS]}; + VkDescriptorSetLayout compute_sets[] = {m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]}; VkPushConstantRange push_constant_range = { VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE}; + VkPushConstantRange compute_push_constant_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0, + PUSH_CONSTANT_BUFFER_SIZE}; // Info for each pipeline layout VkPipelineLayoutCreateInfo pipeline_layout_info[NUM_PIPELINE_LAYOUTS] = { @@ -794,7 +852,11 @@ bool ObjectCache::CreatePipelineLayouts() // Texture Conversion {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(texture_conversion_sets)), texture_conversion_sets, 1, - &push_constant_range}}; + &push_constant_range}, + + // Compute + {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, + static_cast(ArraySize(compute_sets)), compute_sets, 1, &compute_push_constant_range}}; for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++) { @@ -1007,6 +1069,31 @@ bool operator<(const SamplerState& lhs, const SamplerState& rhs) return lhs.bits < rhs.bits; } +std::size_t ComputePipelineInfoHash::operator()(const ComputePipelineInfo& key) const +{ + return static_cast(XXH64(&key, sizeof(key), 0)); +} + +bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) +{ + return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0; +} + +bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) +{ + return !operator==(lhs, rhs); +} + +bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) +{ + return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0; +} + +bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) +{ + return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0; +} + bool ObjectCache::CompileSharedShaders() { static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"( diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.h b/Source/Core/VideoBackends/Vulkan/ObjectCache.h index 546d1439a5..11d436fc35 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.h +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.h @@ -56,6 +56,22 @@ bool operator!=(const SamplerState& lhs, const SamplerState& rhs); bool operator>(const SamplerState& lhs, const SamplerState& rhs); bool operator<(const SamplerState& lhs, const SamplerState& rhs); +struct ComputePipelineInfo +{ + VkPipelineLayout pipeline_layout; + VkShaderModule cs; +}; + +struct ComputePipelineInfoHash +{ + std::size_t operator()(const ComputePipelineInfo& key) const; +}; + +bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); +bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); +bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); +bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); + class ObjectCache { public: @@ -114,6 +130,12 @@ public: // otherwise for a cache hit it will be true. std::pair GetPipelineWithCacheResult(const PipelineInfo& info); + // Creates a compute pipeline, and does not track the handle. + VkPipeline CreateComputePipeline(const ComputePipelineInfo& info); + + // Find a pipeline by the specified description, if not found, attempts to create it + VkPipeline GetComputePipeline(const ComputePipelineInfo& info); + // Saves the pipeline cache to disk. Call when shutting down. void SavePipelineCache(); @@ -166,6 +188,8 @@ private: ShaderCache m_ps_cache; std::unordered_map m_pipeline_objects; + std::unordered_map + m_compute_pipeline_objects; VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; std::string m_pipeline_cache_filename; diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp index 2265a34364..d4d095bb78 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp @@ -35,7 +35,7 @@ static const TBuiltInResource* GetCompilerResourceLimits(); // Compile a shader to SPIR-V via glslang static bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename, const char* source_code, - size_t source_code_length, bool prepend_header); + size_t source_code_length, const char* header, size_t header_length); // Regarding the UBO bind points, we subtract one from the binding index because // the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV). @@ -73,9 +73,32 @@ static const char SHADER_HEADER[] = R"( #define gl_VertexID gl_VertexIndex #define gl_InstanceID gl_InstanceIndex )"; +static const char COMPUTE_SHADER_HEADER[] = R"( + // Target GLSL 4.5. + #version 450 core + // All resources are packed into one descriptor set for compute. + #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (0 + x)) + #define SAMPLER_BINDING(x) layout(set = 0, binding = (1 + x)) + #define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (5 + x)) + #define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (7 + x)) + + // hlsl to glsl function translation + #define float2 vec2 + #define float3 vec3 + #define float4 vec4 + #define uint2 uvec2 + #define uint3 uvec3 + #define uint4 uvec4 + #define int2 ivec2 + #define int3 ivec3 + #define int4 ivec4 + #define frac fract + #define lerp mix +)"; bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename, - const char* source_code, size_t source_code_length, bool prepend_header) + const char* source_code, size_t source_code_length, const char* header, + size_t header_length) { if (!InitializeGlslang()) return false; @@ -91,10 +114,10 @@ bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char std::string full_source_code; const char* pass_source_code = source_code; int pass_source_code_length = static_cast(source_code_length); - if (prepend_header) + if (header_length > 0) { - full_source_code.reserve(sizeof(SHADER_HEADER) + source_code_length); - full_source_code.append(SHADER_HEADER, sizeof(SHADER_HEADER) - 1); + full_source_code.reserve(header_length + source_code_length); + full_source_code.append(header, header_length); full_source_code.append(source_code, source_code_length); pass_source_code = full_source_code.c_str(); pass_source_code_length = static_cast(full_source_code.length()); @@ -318,21 +341,28 @@ bool CompileVertexShader(SPIRVCodeVector* out_code, const char* source_code, size_t source_code_length, bool prepend_header) { return CompileShaderToSPV(out_code, EShLangVertex, "vs", source_code, source_code_length, - prepend_header); + SHADER_HEADER, sizeof(SHADER_HEADER) - 1); } bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code, size_t source_code_length, bool prepend_header) { return CompileShaderToSPV(out_code, EShLangGeometry, "gs", source_code, source_code_length, - prepend_header); + SHADER_HEADER, sizeof(SHADER_HEADER) - 1); } bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code, size_t source_code_length, bool prepend_header) { return CompileShaderToSPV(out_code, EShLangFragment, "ps", source_code, source_code_length, - prepend_header); + SHADER_HEADER, sizeof(SHADER_HEADER) - 1); +} + +bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code, + size_t source_code_length, bool prepend_header) +{ + return CompileShaderToSPV(out_code, EShLangCompute, "cs", source_code, source_code_length, + COMPUTE_SHADER_HEADER, sizeof(COMPUTE_SHADER_HEADER) - 1); } } // namespace ShaderCompiler diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h index 96bd9081bf..197dc1787c 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h @@ -29,5 +29,9 @@ bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code, bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code, size_t source_code_length, bool prepend_header = true); +// Compile a compute shader to SPIR-V. +bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code, + size_t source_code_length, bool prepend_header = true); + } // namespace ShaderCompiler } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp b/Source/Core/VideoBackends/Vulkan/Texture2D.cpp index 9dda089b21..9b8111aa94 100644 --- a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp +++ b/Source/Core/VideoBackends/Vulkan/Texture2D.cpp @@ -4,6 +4,7 @@ #include +#include "Common/Assert.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/Texture2D.h" #include "VideoBackends/Vulkan/VulkanContext.h" @@ -273,10 +274,132 @@ void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout break; } + // If we were using a compute layout, the stages need to reflect that + switch (m_compute_layout) + { + case ComputeImageLayout::Undefined: + break; + case ComputeImageLayout::ReadOnly: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::WriteOnly: + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::ReadWrite: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + } + m_compute_layout = ComputeImageLayout::Undefined; + vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, &barrier); m_layout = new_layout; } +void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout) +{ + _assert_(new_layout != ComputeImageLayout::Undefined); + if (m_compute_layout == new_layout) + return; + + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAccessFlags srcAccessMask + 0, // VkAccessFlags dstAccessMask + m_layout, // VkImageLayout oldLayout + VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {static_cast(Util::IsDepthFormat(m_format) ? VK_IMAGE_ASPECT_DEPTH_BIT : + VK_IMAGE_ASPECT_COLOR_BIT), + 0, m_levels, 0, m_layers} // VkImageSubresourceRange subresourceRange + }; + + VkPipelineStageFlags srcStageMask, dstStageMask; + switch (m_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + barrier.srcAccessMask = 0; + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_PREINITIALIZED: + // Image has been pre-initialized by the host, so ensure all writes have completed. + barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + // Image was being used as a color attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // Image was being used as a shader resource, make sure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + // Image was being used as a copy source, ensure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + // Image was being used as a copy destination, ensure all writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + default: + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + } + + switch (new_layout) + { + case ComputeImageLayout::ReadOnly: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::WriteOnly: + barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::ReadWrite: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + default: + dstStageMask = 0; + break; + } + + m_layout = barrier.newLayout; + m_compute_layout = new_layout; + + vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, + &barrier); +} + } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.h b/Source/Core/VideoBackends/Vulkan/Texture2D.h index bf0a8a70ab..3fce48d758 100644 --- a/Source/Core/VideoBackends/Vulkan/Texture2D.h +++ b/Source/Core/VideoBackends/Vulkan/Texture2D.h @@ -17,6 +17,15 @@ class ObjectCache; class Texture2D { public: + // Custom image layouts, mainly used for switching to/from compute + enum class ComputeImageLayout + { + Undefined, + ReadOnly, + WriteOnly, + ReadWrite + }; + Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image, VkDeviceMemory device_memory, VkImageView view); @@ -50,6 +59,7 @@ public: void OverrideImageLayout(VkImageLayout new_layout); void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout); + void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout); private: u32 m_width; @@ -60,6 +70,7 @@ private: VkSampleCountFlagBits m_samples; VkImageViewType m_view_type; VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; + ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined; VkImage m_image; VkDeviceMemory m_device_memory; diff --git a/Source/Core/VideoBackends/Vulkan/Util.cpp b/Source/Core/VideoBackends/Vulkan/Util.cpp index f49ca90580..02ad129972 100644 --- a/Source/Core/VideoBackends/Vulkan/Util.cpp +++ b/Source/Core/VideoBackends/Vulkan/Util.cpp @@ -250,6 +250,18 @@ VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code, bo return CreateShaderModule(code.data(), code.size()); } +VkShaderModule CompileAndCreateComputeShader(const std::string& source_code, bool prepend_header) +{ + ShaderCompiler::SPIRVCodeVector code; + if (!ShaderCompiler::CompileComputeShader(&code, source_code.c_str(), source_code.length(), + prepend_header)) + { + return VK_NULL_HANDLE; + } + + return CreateShaderModule(code.data(), code.size()); +} + } // namespace Util UtilityShaderDraw::UtilityShaderDraw(VkCommandBuffer command_buffer, @@ -670,4 +682,157 @@ bool UtilityShaderDraw::BindPipeline() return true; } +ComputeShaderDispatcher::ComputeShaderDispatcher(VkCommandBuffer command_buffer, + VkPipelineLayout pipeline_layout, + VkShaderModule compute_shader) + : m_command_buffer(command_buffer) +{ + // Populate minimal pipeline state + m_pipeline_info.pipeline_layout = pipeline_layout; + m_pipeline_info.cs = compute_shader; +} + +u8* ComputeShaderDispatcher::AllocateUniformBuffer(size_t size) +{ + if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( + size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) + PanicAlert("Failed to allocate util uniforms"); + + return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); +} + +void ComputeShaderDispatcher::CommitUniformBuffer(size_t size) +{ + m_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); + m_uniform_buffer.offset = 0; + m_uniform_buffer.range = size; + m_uniform_buffer_offset = + static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); + + g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); +} + +void ComputeShaderDispatcher::SetPushConstants(const void* data, size_t data_size) +{ + _assert_(static_cast(data_size) < PUSH_CONSTANT_BUFFER_SIZE); + + vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, static_cast(data_size), data); +} + +void ComputeShaderDispatcher::SetSampler(size_t index, VkImageView view, VkSampler sampler) +{ + m_samplers[index].sampler = sampler; + m_samplers[index].imageView = view; + m_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; +} + +void ComputeShaderDispatcher::SetStorageImage(VkImageView view, VkImageLayout image_layout) +{ + m_storage_image.sampler = VK_NULL_HANDLE; + m_storage_image.imageView = view; + m_storage_image.imageLayout = image_layout; +} + +void ComputeShaderDispatcher::SetTexelBuffer(size_t index, VkBufferView view) +{ + m_texel_buffers[index] = view; +} + +void ComputeShaderDispatcher::Dispatch(u32 groups_x, u32 groups_y, u32 groups_z) +{ + BindDescriptors(); + if (!BindPipeline()) + return; + + vkCmdDispatch(m_command_buffer, groups_x, groups_y, groups_z); +} + +void ComputeShaderDispatcher::BindDescriptors() +{ + VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE)); + if (set == VK_NULL_HANDLE) + { + PanicAlert("Failed to allocate descriptor set for compute dispatch"); + return; + } + + // Reserve enough descriptors to write every binding. + std::array set_writes = {}; + u32 num_set_writes = 0; + + if (m_uniform_buffer.buffer != VK_NULL_HANDLE) + { + set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + 0, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + nullptr, + &m_uniform_buffer, + nullptr}; + } + + // Samplers + for (size_t i = 0; i < m_samplers.size(); i++) + { + const VkDescriptorImageInfo& info = m_samplers[i]; + if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE) + { + set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + static_cast(1 + i), + 0, + 1, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + &info, + nullptr, + nullptr}; + } + } + + for (size_t i = 0; i < m_texel_buffers.size(); i++) + { + if (m_texel_buffers[i] != VK_NULL_HANDLE) + { + set_writes[num_set_writes++] = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 5 + static_cast(i), 0, 1, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, nullptr, nullptr, &m_texel_buffers[i]}; + } + } + + if (m_storage_image.imageView != VK_NULL_HANDLE) + { + set_writes[num_set_writes++] = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 7, 0, 1, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &m_storage_image, nullptr, nullptr}; + } + + if (num_set_writes > 0) + { + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0, + nullptr); + } + + vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + m_pipeline_info.pipeline_layout, 0, 1, &set, 1, &m_uniform_buffer_offset); +} + +bool ComputeShaderDispatcher::BindPipeline() +{ + VkPipeline pipeline = g_object_cache->GetComputePipeline(m_pipeline_info); + if (pipeline == VK_NULL_HANDLE) + { + PanicAlert("Failed to get pipeline for backend compute dispatch"); + return false; + } + + vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + return true; +} + } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Util.h b/Source/Core/VideoBackends/Vulkan/Util.h index 7ee8e82356..f5385932bd 100644 --- a/Source/Core/VideoBackends/Vulkan/Util.h +++ b/Source/Core/VideoBackends/Vulkan/Util.h @@ -63,6 +63,10 @@ VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code, // Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V. VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code, bool prepend_header = true); + +// Compile a compute shader and create a shader module, discarding the intermediate SPIR-V. +VkShaderModule CompileAndCreateComputeShader(const std::string& source_code, + bool prepend_header = true); } // Utility shader vertex format @@ -188,4 +192,41 @@ private: PipelineInfo m_pipeline_info = {}; }; +class ComputeShaderDispatcher +{ +public: + ComputeShaderDispatcher(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout, + VkShaderModule compute_shader); + + u8* AllocateUniformBuffer(size_t size); + void CommitUniformBuffer(size_t size); + + void SetPushConstants(const void* data, size_t data_size); + + void SetSampler(size_t index, VkImageView view, VkSampler sampler); + + void SetTexelBuffer(size_t index, VkBufferView view); + + void SetStorageImage(VkImageView view, VkImageLayout image_layout); + + void Dispatch(u32 groups_x, u32 groups_y, u32 groups_z); + +private: + void BindDescriptors(); + bool BindPipeline(); + + VkCommandBuffer m_command_buffer = VK_NULL_HANDLE; + + VkDescriptorBufferInfo m_uniform_buffer = {}; + u32 m_uniform_buffer_offset = 0; + + std::array m_samplers = {}; + + std::array m_texel_buffers = {}; + + VkDescriptorImageInfo m_storage_image = {}; + + ComputePipelineInfo m_pipeline_info = {}; +}; + } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index ae5a6f2bda..25f133fc3b 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -234,6 +234,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsPaletteConversion = true; // Assumed support. config->backend_info.bSupportsClipControl = true; // Assumed support. config->backend_info.bSupportsMultithreading = true; // Assumed support. + config->backend_info.bSupportsComputeShaders = true; // Assumed support. config->backend_info.bSupportsInternalResolutionFrameDumps = true; // Assumed support. config->backend_info.bSupportsPostProcessing = false; // No support yet. config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features. @@ -244,8 +245,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsSSAA = false; // Dependent on features. config->backend_info.bSupportsDepthClamp = false; // Dependent on features. config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs. - config->backend_info.bSupportsComputeShaders = false; // No support yet. - config->backend_info.bSupportsGPUTextureDecoding = false; // No support yet. + config->backend_info.bSupportsGPUTextureDecoding = false; // No support yet. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)