mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-17 16:20:38 +00:00
Merge pull request #11523 from degasus/OGL_KHR_subgroup
VideoBackend/OGL: Prefer KHR_shader_subgroup over NV_shader_thread.
This commit is contained in:
commit
258151fe5a
@ -37,6 +37,7 @@
|
||||
#include "Common/GL/GLExtensions/EXT_texture_filter_anisotropic.h"
|
||||
#include "Common/GL/GLExtensions/HP_occlusion_test.h"
|
||||
#include "Common/GL/GLExtensions/KHR_debug.h"
|
||||
#include "Common/GL/GLExtensions/KHR_shader_subgroup.h"
|
||||
#include "Common/GL/GLExtensions/NV_depth_buffer_float.h"
|
||||
#include "Common/GL/GLExtensions/NV_occlusion_query_samples.h"
|
||||
#include "Common/GL/GLExtensions/NV_primitive_restart.h"
|
||||
|
19
Source/Core/Common/GL/GLExtensions/KHR_shader_subgroup.h
Normal file
19
Source/Core/Common/GL/GLExtensions/KHR_shader_subgroup.h
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
** Copyright (c) 2013-2015 The Khronos Group Inc.
|
||||
** SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "Common/GL/GLExtensions/gl_common.h"
|
||||
|
||||
#define GL_SUBGROUP_SIZE_KHR 0x9532
|
||||
#define GL_SUBGROUP_SUPPORTED_STAGES_KHR 0x9533
|
||||
#define GL_SUBGROUP_SUPPORTED_FEATURES_KHR 0x9534
|
||||
#define GL_SUBGROUP_QUAD_ALL_STAGES_KHR 0x9535
|
||||
#define GL_SUBGROUP_FEATURE_BASIC_BIT_KHR 0x00000001
|
||||
#define GL_SUBGROUP_FEATURE_VOTE_BIT_KHR 0x00000002
|
||||
#define GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR 0x00000004
|
||||
#define GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR 0x00000008
|
||||
#define GL_SUBGROUP_FEATURE_SHUFFLE_BIT_KHR 0x00000010
|
||||
#define GL_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT_KHR 0x00000020
|
||||
#define GL_SUBGROUP_FEATURE_CLUSTERED_BIT_KHR 0x00000040
|
||||
#define GL_SUBGROUP_FEATURE_QUAD_BIT_KHR 0x00000080
|
@ -386,7 +386,6 @@ static const std::string_view SUBGROUP_HELPER_HEADER = R"(
|
||||
#extension GL_KHR_shader_subgroup_ballot : enable
|
||||
|
||||
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||
#define CAN_USE_SUBGROUP_REDUCTION true
|
||||
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
||||
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
|
||||
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
||||
|
@ -489,7 +489,14 @@ bool PopulateConfig(GLContext* m_main_gl_context)
|
||||
else if (GLExtensions::Version() >= 430)
|
||||
{
|
||||
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
|
||||
g_ogl_config.eSupportedGLSLVersion = Glsl430;
|
||||
if (GLExtensions::Version() >= 450)
|
||||
{
|
||||
g_ogl_config.eSupportedGLSLVersion = Glsl450;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_ogl_config.eSupportedGLSLVersion = Glsl430;
|
||||
}
|
||||
g_ogl_config.bSupportsTextureStorage = true;
|
||||
g_ogl_config.bSupportsImageLoadStore = true;
|
||||
g_Config.backend_info.bSupportsSSAA = true;
|
||||
@ -531,8 +538,23 @@ bool PopulateConfig(GLContext* m_main_gl_context)
|
||||
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
||||
g_ogl_config.max_samples = 1;
|
||||
|
||||
g_ogl_config.bSupportsShaderThreadShuffleNV =
|
||||
GLExtensions::Supports("GL_NV_shader_thread_shuffle");
|
||||
const bool bSupportsIsHelperInvocation = g_ogl_config.bIsES ?
|
||||
g_ogl_config.eSupportedGLSLVersion >= GlslEs320 :
|
||||
g_ogl_config.eSupportedGLSLVersion >= Glsl450;
|
||||
g_ogl_config.bSupportsKHRShaderSubgroup =
|
||||
GLExtensions::Supports("GL_KHR_shader_subgroup") && bSupportsIsHelperInvocation;
|
||||
if (g_ogl_config.bSupportsKHRShaderSubgroup)
|
||||
{
|
||||
// Check for the features: basic + arithmetic + ballot
|
||||
GLint supported_features = 0;
|
||||
glGetIntegerv(GL_SUBGROUP_SUPPORTED_FEATURES_KHR, &supported_features);
|
||||
if (~supported_features &
|
||||
(GL_SUBGROUP_FEATURE_BASIC_BIT_KHR | GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR |
|
||||
GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR))
|
||||
{
|
||||
g_ogl_config.bSupportsKHRShaderSubgroup = false;
|
||||
}
|
||||
}
|
||||
|
||||
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
||||
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
||||
|
@ -15,8 +15,9 @@ enum GlslVersion
|
||||
Glsl140,
|
||||
Glsl150,
|
||||
Glsl330,
|
||||
Glsl400, // and above
|
||||
Glsl430,
|
||||
Glsl400, // and above
|
||||
Glsl430, // 430 - 440
|
||||
Glsl450, // 450 - xxx
|
||||
GlslEs300, // GLES 3.0
|
||||
GlslEs310, // GLES 3.1
|
||||
GlslEs320, // GLES 3.2
|
||||
@ -61,7 +62,7 @@ struct VideoConfig
|
||||
bool bSupportsBitfield;
|
||||
bool bSupportsTextureSubImage;
|
||||
EsFbFetchType SupportedFramebufferFetch;
|
||||
bool bSupportsShaderThreadShuffleNV;
|
||||
bool bSupportsKHRShaderSubgroup; // basic + arithmetic + ballot
|
||||
|
||||
const char* gl_vendor;
|
||||
const char* gl_renderer;
|
||||
|
@ -78,6 +78,8 @@ static std::string GetGLSLVersionString()
|
||||
return "#version 400";
|
||||
case Glsl430:
|
||||
return "#version 430";
|
||||
case Glsl450:
|
||||
return "#version 450";
|
||||
default:
|
||||
// Shouldn't ever hit this
|
||||
return "#version ERROR";
|
||||
@ -720,25 +722,18 @@ void ProgramShaderCache::CreateHeader()
|
||||
}
|
||||
|
||||
std::string shader_shuffle_string;
|
||||
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
|
||||
if (g_ogl_config.bSupportsKHRShaderSubgroup)
|
||||
{
|
||||
shader_shuffle_string = R"(
|
||||
#extension GL_NV_shader_thread_group : enable
|
||||
#extension GL_NV_shader_thread_shuffle : enable
|
||||
#extension GL_KHR_shader_subgroup_basic : enable
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : enable
|
||||
#extension GL_KHR_shader_subgroup_ballot : enable
|
||||
|
||||
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||
|
||||
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
|
||||
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
|
||||
|
||||
#define IS_HELPER_INVOCATION gl_HelperThreadNV
|
||||
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
|
||||
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
|
||||
value = func(value, shuffleXorNV(value, 8, 32)); \
|
||||
value = func(value, shuffleXorNV(value, 4, 32)); \
|
||||
value = func(value, shuffleXorNV(value, 2, 32)); \
|
||||
value = func(value, shuffleXorNV(value, 1, 32));
|
||||
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
|
||||
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
|
||||
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
||||
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
|
||||
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
||||
#define SUBGROUP_MAX(value) value = subgroupMax(value)
|
||||
)";
|
||||
}
|
||||
|
||||
|
@ -81,9 +81,8 @@ static const char SUBGROUP_HELPER_HEADER[] = R"(
|
||||
#extension GL_KHR_shader_subgroup_ballot : enable
|
||||
|
||||
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||
#define CAN_USE_SUBGROUP_REDUCTION true
|
||||
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
||||
#define IS_FIRST_ACTIVE_INVOCATION (gl_SubgroupInvocationID == subgroupBallotFindLSB(subgroupBallot(!gl_HelperInvocation)))
|
||||
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
|
||||
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
||||
#define SUBGROUP_MAX(value) value = subgroupMax(value)
|
||||
)";
|
||||
|
@ -457,15 +457,12 @@ void UpdateBoundingBox(float2 rawpos) {{
|
||||
int2 pos_br = pos | 1; // round up to odd
|
||||
|
||||
#ifdef SUPPORTS_SUBGROUP_REDUCTION
|
||||
if (CAN_USE_SUBGROUP_REDUCTION) {{
|
||||
int2 min_pos = IS_HELPER_INVOCATION ? int2(2147483647, 2147483647) : pos_tl;
|
||||
int2 max_pos = IS_HELPER_INVOCATION ? int2(-2147483648, -2147483648) : pos_br;
|
||||
SUBGROUP_MIN(min_pos);
|
||||
SUBGROUP_MAX(max_pos);
|
||||
if (!IS_HELPER_INVOCATION)
|
||||
{{
|
||||
SUBGROUP_MIN(pos_tl);
|
||||
SUBGROUP_MAX(pos_br);
|
||||
if (IS_FIRST_ACTIVE_INVOCATION)
|
||||
UpdateBoundingBoxBuffer(min_pos, max_pos);
|
||||
}} else {{
|
||||
UpdateBoundingBoxBuffer(pos_tl, pos_br);
|
||||
UpdateBoundingBoxBuffer(pos_tl, pos_br);
|
||||
}}
|
||||
#else
|
||||
UpdateBoundingBoxBuffer(pos_tl, pos_br);
|
||||
|
Loading…
Reference in New Issue
Block a user