mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-02-01 03:32:58 +00:00
Merge pull request #7904 from stenzek/do-the-atomic-shuffle
PixelShaderGen: Use subgroup reduction for bounding box
This commit is contained in:
commit
a50a34b94b
@ -719,6 +719,29 @@ void ProgramShaderCache::CreateHeader()
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string shader_shuffle_string;
|
||||||
|
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
|
||||||
|
{
|
||||||
|
shader_shuffle_string = R"(
|
||||||
|
#extension GL_NV_shader_thread_group : enable
|
||||||
|
#extension GL_NV_shader_thread_shuffle : enable
|
||||||
|
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||||
|
|
||||||
|
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
|
||||||
|
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
|
||||||
|
|
||||||
|
#define IS_HELPER_INVOCATION gl_HelperThreadNV
|
||||||
|
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
|
||||||
|
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
|
||||||
|
value = func(value, shuffleXorNV(value, 8, 32)); \
|
||||||
|
value = func(value, shuffleXorNV(value, 4, 32)); \
|
||||||
|
value = func(value, shuffleXorNV(value, 2, 32)); \
|
||||||
|
value = func(value, shuffleXorNV(value, 1, 32));
|
||||||
|
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
|
||||||
|
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
|
||||||
|
)";
|
||||||
|
}
|
||||||
|
|
||||||
s_glsl_header = StringFromFormat(
|
s_glsl_header = StringFromFormat(
|
||||||
"%s\n"
|
"%s\n"
|
||||||
"%s\n" // ubo
|
"%s\n" // ubo
|
||||||
@ -737,6 +760,7 @@ void ProgramShaderCache::CreateHeader()
|
|||||||
"%s\n" // ES dual source blend
|
"%s\n" // ES dual source blend
|
||||||
"%s\n" // shader image load store
|
"%s\n" // shader image load store
|
||||||
"%s\n" // shader framebuffer fetch
|
"%s\n" // shader framebuffer fetch
|
||||||
|
"%s\n" // shader thread shuffle
|
||||||
|
|
||||||
// Precision defines for GLSL ES
|
// Precision defines for GLSL ES
|
||||||
"%s\n"
|
"%s\n"
|
||||||
@ -815,8 +839,9 @@ void ProgramShaderCache::CreateHeader()
|
|||||||
((!is_glsles && v < Glsl430) || (is_glsles && v < GlslEs310)) ?
|
((!is_glsles && v < Glsl430) || (is_glsles && v < GlslEs310)) ?
|
||||||
"#extension GL_ARB_shader_image_load_store : enable" :
|
"#extension GL_ARB_shader_image_load_store : enable" :
|
||||||
"",
|
"",
|
||||||
framebuffer_fetch_string.c_str(), is_glsles ? "precision highp float;" : "",
|
framebuffer_fetch_string.c_str(), shader_shuffle_string.c_str(),
|
||||||
is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "",
|
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
|
||||||
|
is_glsles ? "precision highp sampler2DArray;" : "",
|
||||||
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
|
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
|
||||||
"precision highp usamplerBuffer;" :
|
"precision highp usamplerBuffer;" :
|
||||||
"",
|
"",
|
||||||
|
@ -661,6 +661,9 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
|
|||||||
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
||||||
g_ogl_config.max_samples = 1;
|
g_ogl_config.max_samples = 1;
|
||||||
|
|
||||||
|
g_ogl_config.bSupportsShaderThreadShuffleNV =
|
||||||
|
GLExtensions::Supports("GL_NV_shader_thread_shuffle");
|
||||||
|
|
||||||
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
||||||
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
||||||
// enabled in the version check below.
|
// enabled in the version check below.
|
||||||
|
@ -70,6 +70,7 @@ struct VideoConfig
|
|||||||
bool bSupportsBitfield;
|
bool bSupportsBitfield;
|
||||||
bool bSupportsTextureSubImage;
|
bool bSupportsTextureSubImage;
|
||||||
EsFbFetchType SupportedFramebufferFetch;
|
EsFbFetchType SupportedFramebufferFetch;
|
||||||
|
bool bSupportsShaderThreadShuffleNV;
|
||||||
|
|
||||||
const char* gl_vendor;
|
const char* gl_vendor;
|
||||||
const char* gl_renderer;
|
const char* gl_renderer;
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include "ShaderLang.h"
|
#include "ShaderLang.h"
|
||||||
#include "disassemble.h"
|
#include "disassemble.h"
|
||||||
|
|
||||||
|
#include "Common/CommonFuncs.h"
|
||||||
#include "Common/FileUtil.h"
|
#include "Common/FileUtil.h"
|
||||||
#include "Common/Logging/Log.h"
|
#include "Common/Logging/Log.h"
|
||||||
#include "Common/MsgHandler.h"
|
#include "Common/MsgHandler.h"
|
||||||
@ -99,6 +100,18 @@ static const char COMPUTE_SHADER_HEADER[] = R"(
|
|||||||
#define frac fract
|
#define frac fract
|
||||||
#define lerp mix
|
#define lerp mix
|
||||||
)";
|
)";
|
||||||
|
static const char SUBGROUP_HELPER_HEADER[] = R"(
|
||||||
|
#extension GL_KHR_shader_subgroup_basic : enable
|
||||||
|
#extension GL_KHR_shader_subgroup_arithmetic : enable
|
||||||
|
#extension GL_KHR_shader_subgroup_ballot : enable
|
||||||
|
|
||||||
|
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||||
|
#define CAN_USE_SUBGROUP_REDUCTION true
|
||||||
|
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
||||||
|
#define IS_FIRST_ACTIVE_INVOCATION (gl_SubgroupInvocationID == subgroupBallotFindLSB(subgroupBallot(true)))
|
||||||
|
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
||||||
|
#define SUBGROUP_MAX(value) value = subgroupMax(value)
|
||||||
|
)";
|
||||||
|
|
||||||
bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename,
|
bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename,
|
||||||
const char* source_code, size_t source_code_length, const char* header,
|
const char* source_code, size_t source_code_length, const char* header,
|
||||||
@ -120,13 +133,20 @@ bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char
|
|||||||
int pass_source_code_length = static_cast<int>(source_code_length);
|
int pass_source_code_length = static_cast<int>(source_code_length);
|
||||||
if (header_length > 0)
|
if (header_length > 0)
|
||||||
{
|
{
|
||||||
full_source_code.reserve(header_length + source_code_length);
|
constexpr size_t subgroup_helper_header_length = ArraySize(SUBGROUP_HELPER_HEADER) - 1;
|
||||||
|
full_source_code.reserve(header_length + subgroup_helper_header_length + source_code_length);
|
||||||
full_source_code.append(header, header_length);
|
full_source_code.append(header, header_length);
|
||||||
|
if (g_vulkan_context->SupportsShaderSubgroupOperations())
|
||||||
|
full_source_code.append(SUBGROUP_HELPER_HEADER, subgroup_helper_header_length);
|
||||||
full_source_code.append(source_code, source_code_length);
|
full_source_code.append(source_code, source_code_length);
|
||||||
pass_source_code = full_source_code.c_str();
|
pass_source_code = full_source_code.c_str();
|
||||||
pass_source_code_length = static_cast<int>(full_source_code.length());
|
pass_source_code_length = static_cast<int>(full_source_code.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sub-group operations require Vulkan 1.1 and SPIR-V 1.3.
|
||||||
|
if (g_vulkan_context->SupportsShaderSubgroupOperations())
|
||||||
|
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3);
|
||||||
|
|
||||||
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
|
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
|
||||||
|
|
||||||
auto DumpBadShader = [&](const char* msg) {
|
auto DumpBadShader = [&](const char* msg) {
|
||||||
|
@ -384,6 +384,7 @@ std::unique_ptr<VulkanContext> VulkanContext::Create(VkInstance instance, VkPhys
|
|||||||
|
|
||||||
// Initialize DriverDetails so that we can check for bugs to disable features if needed.
|
// Initialize DriverDetails so that we can check for bugs to disable features if needed.
|
||||||
context->InitDriverDetails();
|
context->InitDriverDetails();
|
||||||
|
context->PopulateShaderSubgroupSupport();
|
||||||
|
|
||||||
// Enable debug reports if the "Host GPU" log category is enabled.
|
// Enable debug reports if the "Host GPU" log category is enabled.
|
||||||
if (enable_debug_reports)
|
if (enable_debug_reports)
|
||||||
@ -864,4 +865,31 @@ void VulkanContext::InitDriverDetails()
|
|||||||
static_cast<double>(m_device_properties.driverVersion),
|
static_cast<double>(m_device_properties.driverVersion),
|
||||||
DriverDetails::Family::UNKNOWN);
|
DriverDetails::Family::UNKNOWN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VulkanContext::PopulateShaderSubgroupSupport()
|
||||||
|
{
|
||||||
|
// If this function isn't available, we don't support Vulkan 1.1.
|
||||||
|
if (!vkGetPhysicalDeviceProperties2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
VkPhysicalDeviceProperties2 device_properties_2 = {};
|
||||||
|
device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||||
|
|
||||||
|
VkPhysicalDeviceSubgroupProperties subgroup_properties = {};
|
||||||
|
subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
|
||||||
|
device_properties_2.pNext = &subgroup_properties;
|
||||||
|
|
||||||
|
vkGetPhysicalDeviceProperties2(m_physical_device, &device_properties_2);
|
||||||
|
|
||||||
|
m_shader_subgroup_size = subgroup_properties.subgroupSize;
|
||||||
|
|
||||||
|
// We require basic ops (for gl_SubgroupInvocationID), ballot (for subgroupBallot,
|
||||||
|
// subgroupBallotFindLSB), and arithmetic (for subgroupMin/subgroupMax).
|
||||||
|
constexpr VkSubgroupFeatureFlags required_operations = VK_SUBGROUP_FEATURE_BASIC_BIT |
|
||||||
|
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
|
||||||
|
VK_SUBGROUP_FEATURE_BALLOT_BIT;
|
||||||
|
m_supports_shader_subgroup_operations =
|
||||||
|
(subgroup_properties.supportedOperations & required_operations) == required_operations &&
|
||||||
|
subgroup_properties.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
|
}
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@ -80,6 +80,8 @@ public:
|
|||||||
{
|
{
|
||||||
return m_device_features.occlusionQueryPrecise == VK_TRUE;
|
return m_device_features.occlusionQueryPrecise == VK_TRUE;
|
||||||
}
|
}
|
||||||
|
u32 GetShaderSubgroupSize() const { return m_shader_subgroup_size; }
|
||||||
|
bool SupportsShaderSubgroupOperations() const { return m_supports_shader_subgroup_operations; }
|
||||||
|
|
||||||
// Helpers for getting constants
|
// Helpers for getting constants
|
||||||
VkDeviceSize GetUniformBufferAlignment() const
|
VkDeviceSize GetUniformBufferAlignment() const
|
||||||
@ -112,6 +114,7 @@ private:
|
|||||||
bool SelectDeviceFeatures();
|
bool SelectDeviceFeatures();
|
||||||
bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer);
|
bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer);
|
||||||
void InitDriverDetails();
|
void InitDriverDetails();
|
||||||
|
void PopulateShaderSubgroupSupport();
|
||||||
|
|
||||||
VkInstance m_instance = VK_NULL_HANDLE;
|
VkInstance m_instance = VK_NULL_HANDLE;
|
||||||
VkPhysicalDevice m_physical_device = VK_NULL_HANDLE;
|
VkPhysicalDevice m_physical_device = VK_NULL_HANDLE;
|
||||||
@ -128,6 +131,9 @@ private:
|
|||||||
VkPhysicalDeviceFeatures m_device_features = {};
|
VkPhysicalDeviceFeatures m_device_features = {};
|
||||||
VkPhysicalDeviceProperties m_device_properties = {};
|
VkPhysicalDeviceProperties m_device_properties = {};
|
||||||
VkPhysicalDeviceMemoryProperties m_device_memory_properties = {};
|
VkPhysicalDeviceMemoryProperties m_device_memory_properties = {};
|
||||||
|
|
||||||
|
u32 m_shader_subgroup_size = 1;
|
||||||
|
bool m_supports_shader_subgroup_operations = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern std::unique_ptr<VulkanContext> g_vulkan_context;
|
extern std::unique_ptr<VulkanContext> g_vulkan_context;
|
||||||
|
@ -59,6 +59,7 @@ VULKAN_INSTANCE_ENTRY_POINT(vkCreateMacOSSurfaceMVK, false)
|
|||||||
VULKAN_INSTANCE_ENTRY_POINT(vkCreateDebugReportCallbackEXT, false)
|
VULKAN_INSTANCE_ENTRY_POINT(vkCreateDebugReportCallbackEXT, false)
|
||||||
VULKAN_INSTANCE_ENTRY_POINT(vkDestroyDebugReportCallbackEXT, false)
|
VULKAN_INSTANCE_ENTRY_POINT(vkDestroyDebugReportCallbackEXT, false)
|
||||||
VULKAN_INSTANCE_ENTRY_POINT(vkDebugReportMessageEXT, false)
|
VULKAN_INSTANCE_ENTRY_POINT(vkDebugReportMessageEXT, false)
|
||||||
|
VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceProperties2, false)
|
||||||
|
|
||||||
#endif // VULKAN_INSTANCE_ENTRY_POINT
|
#endif // VULKAN_INSTANCE_ENTRY_POINT
|
||||||
|
|
||||||
|
@ -459,6 +459,17 @@ SSBO_BINDING(0) buffer BBox {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void UpdateBoundingBoxBuffer(float2 min_pos, float2 max_pos) {
|
||||||
|
if (bbox_left > int(min_pos.x))
|
||||||
|
atomicMin(bbox_left, int(min_pos.x));
|
||||||
|
if (bbox_right < int(max_pos.x))
|
||||||
|
atomicMax(bbox_right, int(max_pos.x));
|
||||||
|
if (bbox_top > int(min_pos.y))
|
||||||
|
atomicMin(bbox_top, int(min_pos.y));
|
||||||
|
if (bbox_bottom < int(max_pos.y))
|
||||||
|
atomicMax(bbox_bottom, int(max_pos.y));
|
||||||
|
}
|
||||||
|
|
||||||
void UpdateBoundingBox(float2 rawpos) {
|
void UpdateBoundingBox(float2 rawpos) {
|
||||||
// The pixel center in the GameCube GPU is 7/12, not 0.5 (see VertexShaderGen.cpp)
|
// The pixel center in the GameCube GPU is 7/12, not 0.5 (see VertexShaderGen.cpp)
|
||||||
// Adjust for this by unapplying the offset we added in the vertex shader.
|
// Adjust for this by unapplying the offset we added in the vertex shader.
|
||||||
@ -471,18 +482,25 @@ void UpdateBoundingBox(float2 rawpos) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The bounding box register is exclusive of the right coordinate, hence the +1.
|
// The bounding box register is exclusive of the right coordinate, hence the +1.
|
||||||
int2 pos = iround(rawpos * cefbscale + offset);
|
int2 min_pos = iround(rawpos * cefbscale + offset);
|
||||||
int2 pos_offset = pos + int2(1, 1);
|
int2 max_pos = min_pos + int2(1, 1);
|
||||||
|
|
||||||
if (bbox_left > pos.x)
|
#ifdef SUPPORTS_SUBGROUP_REDUCTION
|
||||||
atomicMin(bbox_left, pos.x);
|
if (CAN_USE_SUBGROUP_REDUCTION) {
|
||||||
if (bbox_right < pos_offset.x)
|
min_pos = IS_HELPER_INVOCATION ? int2(2147483647, 2147483647) : min_pos;
|
||||||
atomicMax(bbox_right, pos_offset.x);
|
max_pos = IS_HELPER_INVOCATION ? int2(-2147483648, -2147483648) : max_pos;
|
||||||
if (bbox_top > pos.y)
|
SUBGROUP_MIN(min_pos);
|
||||||
atomicMin(bbox_top, pos.y);
|
SUBGROUP_MAX(max_pos);
|
||||||
if (bbox_bottom < pos_offset.y)
|
if (IS_FIRST_ACTIVE_INVOCATION)
|
||||||
atomicMax(bbox_bottom, pos_offset.y);
|
UpdateBoundingBoxBuffer(min_pos, max_pos);
|
||||||
|
} else {
|
||||||
|
UpdateBoundingBoxBuffer(min_pos, max_pos);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
UpdateBoundingBoxBuffer(min_pos, max_pos);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
)");
|
)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1332,7 +1350,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
|
|||||||
if (!uid_data->alpha_test_use_zcomploc_hack)
|
if (!uid_data->alpha_test_use_zcomploc_hack)
|
||||||
{
|
{
|
||||||
out.Write("\t\tdiscard;\n");
|
out.Write("\t\tdiscard;\n");
|
||||||
if (ApiType != APIType::D3D)
|
if (ApiType == APIType::D3D)
|
||||||
out.Write("\t\treturn;\n");
|
out.Write("\t\treturn;\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user