From 61cfd8696ee010758c48d02eac13d3dffc3501a7 Mon Sep 17 00:00:00 2001 From: DevJPM Date: Sat, 2 Jan 2021 13:47:18 +0100 Subject: [PATCH 1/3] Fix CPU Core Count detection and Enable Parallel Shader Compilation This does this following things: - Default to the runtime automatic number of threads for pre-compiling shaders - Adds a distinct automatic thread count computation for pre-compilation (which has less other things going on and should scale better beyond 4 cores) - Removes the unused logical_core_count field from the CPU detection - Changes the semantics of num_cores from maximaum addressable number of cores to actually available CPU cores (which is also how it was actually used) - Updates the computation of the HTT flag now that AMD no longer lies about it for its Zen processors - Background shader compilation is *not* enabled by default --- Source/Core/Common/CPUDetect.h | 1 - Source/Core/Common/x64CPUDetect.cpp | 41 +++++--------------- Source/Core/Core/Config/GraphicsSettings.cpp | 2 +- Source/Core/VideoCommon/VideoConfig.cpp | 10 ++++- 4 files changed, 20 insertions(+), 34 deletions(-) diff --git a/Source/Core/Common/CPUDetect.h b/Source/Core/Common/CPUDetect.h index be807e6fef..db377a2006 100644 --- a/Source/Core/Common/CPUDetect.h +++ b/Source/Core/Common/CPUDetect.h @@ -26,7 +26,6 @@ struct CPUInfo bool HTT = false; int num_cores = 0; - int logical_cpu_count = 0; bool bSSE = false; bool bSSE2 = false; diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp index 87c3d50acd..76ce1af7e5 100644 --- a/Source/Core/Common/x64CPUDetect.cpp +++ b/Source/Core/Common/x64CPUDetect.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "Common/CPUDetect.h" #include "Common/CommonTypes.h" @@ -106,7 +107,6 @@ void CPUInfo::Detect() // Detect family and other misc stuff. bool ht = false; HTT = ht; - logical_cpu_count = 1; if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); @@ -120,9 +120,13 @@ void CPUInfo::Detect() // Detect AMD Zen1, Zen1+ and Zen2 if (family == 23) bZen1p2 = true; - logical_cpu_count = (cpu_id[1] >> 16) & 0xFF; ht = (cpu_id[3] >> 28) & 1; + // AMD CPUs before Zen faked this flag and didn't actually + // implement simultaneous multithreading (SMT; Intel calls it HTT) + // but rather some weird middle-ground between 1-2 cores + HTT = ht && (vendor == CPUVendor::Intel || family >= 23); + if ((cpu_id[3] >> 25) & 1) bSSE = true; if ((cpu_id[3] >> 26) & 1) @@ -200,35 +204,10 @@ void CPUInfo::Detect() bLongMode = true; } - num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count; - - if (max_ex_fn >= 0x80000008) - { - // Get number of cores. This is a bit complicated. Following AMD manual here. - __cpuid(cpu_id, 0x80000008); - int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF; - if (apic_id_core_id_size == 0) - { - if (ht) - { - // New mechanism for modern Intel CPUs. - if (vendor == CPUVendor::Intel) - { - __cpuidex(cpu_id, 0x00000004, 0x00000000); - int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1; - HTT = (cores_x_package < logical_cpu_count); - cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1; - num_cores = (cores_x_package > 1) ? cores_x_package : num_cores; - logical_cpu_count /= cores_x_package; - } - } - } - else - { - // Use AMD's new method. - num_cores = (cpu_id[2] & 0xFF) + 1; - } - } + // this should be much more reliable and easier + // than trying to get the number of cores out of the CPUID data + // ourselves + num_cores = std::max(std::thread::hardware_concurrency(), 1u); } // Turn the CPU info into a string we can show diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 4ce7c1b3a8..7eb5645f30 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -85,7 +85,7 @@ const Info GFX_SHADER_COMPILATION_MODE{ {System::GFX, "Settings", "ShaderCompilationMode"}, ShaderCompilationMode::Synchronous}; const Info GFX_SHADER_COMPILER_THREADS{{System::GFX, "Settings", "ShaderCompilerThreads"}, 1}; const Info GFX_SHADER_PRECOMPILER_THREADS{ - {System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1}; + {System::GFX, "Settings", "ShaderPrecompilerThreads"}, -1}; const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE{ {System::GFX, "Settings", "SaveTextureCacheToState"}, true}; diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 88ec126c7c..9cdf1256f7 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -176,6 +176,14 @@ static u32 GetNumAutoShaderCompilerThreads() return static_cast(std::min(std::max(cpu_info.num_cores - 3, 1), 4)); } +static u32 GetNumAutoShaderPreCompilerThreads() +{ + // Automatic number. We use clamp(cpus - 2, 1, infty) here. + // We chose this because we don't want to limit our speed-up + // and at the same time leave two logical cores for the dolphin UI and the rest of the OS. + return static_cast(std::max(cpu_info.num_cores - 2, 1)); +} + u32 VideoConfig::GetShaderCompilerThreads() const { if (!backend_info.bSupportsBackgroundCompiling) @@ -199,5 +207,5 @@ u32 VideoConfig::GetShaderPrecompilerThreads() const if (iShaderPrecompilerThreads >= 0) return static_cast(iShaderPrecompilerThreads); else - return GetNumAutoShaderCompilerThreads(); + return GetNumAutoShaderPreCompilerThreads(); } From 613c4563c288b5b73b2f1defb23ad645cef97bee Mon Sep 17 00:00:00 2001 From: DevJPM Date: Thu, 3 Jun 2021 16:03:37 +0200 Subject: [PATCH 2/3] VideoCommon: Gate Multi-Threaded Shader Pre-Compilation behind a bug entry --- Source/Core/VideoCommon/DriverDetails.cpp | 15 +++++++++++++++ Source/Core/VideoCommon/DriverDetails.h | 8 ++++++++ Source/Core/VideoCommon/VideoConfig.cpp | 12 +++++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index 6029654e57..7f1d188ff8 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -131,6 +131,21 @@ constexpr BugInfo m_known_bugs[] = { -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_ATI, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true}, + // Default cases for broken MT precompilation + // Default cases get replaced by known-good places during init + {API_OPENGL, OS_ALL, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true}, + {API_VULKAN, OS_ALL, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true}, + // known good cases for broken MT precompilation + {API_OPENGL, OS_OSX, VENDOR_NVIDIA, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, + {API_VULKAN, OS_OSX, VENDOR_NVIDIA, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, + {API_OPENGL, OS_WINDOWS, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, + {API_VULKAN, OS_WINDOWS, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, }; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 5e3f2ce161..a81ed3d6ff 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -314,6 +314,14 @@ enum Bug // Started version: -1 // Ended version: -1 BUG_BROKEN_SUBGROUP_INVOCATION_ID, + + // BUG: Multi-threaded shader pre-compilation sometimes crashes + // Used primarily in Videoconfig.cpp's GetNumAutoShaderPreCompilerThreads() + // refer to https://github.com/dolphin-emu/dolphin/pull/9414 for initial validation coverage + // All untested platforms will report as having this bug as to avoid crashes + // Note that things should highly likely work out fine on D3D + // so we didn't extend the Bug API to also support D3D + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION }; // Initializes our internal vendor, device family, and driver version diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 9cdf1256f7..5e833a4244 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -10,6 +10,7 @@ #include "Core/ConfigManager.h" #include "Core/Core.h" #include "Core/Movie.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -204,8 +205,17 @@ u32 VideoConfig::GetShaderPrecompilerThreads() const if (!backend_info.bSupportsBackgroundCompiling) return 0; + const bool bugDatabaseSupported = + backend_info.api_type == APIType::OpenGL || backend_info.api_type == APIType::Vulkan; + // DirectX has always worked in our tests in PR#9414 + const bool multiThreadingWorking = + !bugDatabaseSupported || + !DriverDetails::HasBug(DriverDetails::BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION); + if (iShaderPrecompilerThreads >= 0) return static_cast(iShaderPrecompilerThreads); - else + else if (multiThreadingWorking) return GetNumAutoShaderPreCompilerThreads(); + else + return 1; } From f43122cd8abbd74c00ec7701bf21892c77f0f967 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 22 Nov 2021 15:32:28 +0100 Subject: [PATCH 3/3] Trust MacOS not to crash during shader pre-compilation --- Source/Core/VideoCommon/DriverDetails.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index 7f1d188ff8..6d9ac464d3 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -138,9 +138,9 @@ constexpr BugInfo m_known_bugs[] = { {API_VULKAN, OS_ALL, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true}, // known good cases for broken MT precompilation - {API_OPENGL, OS_OSX, VENDOR_NVIDIA, DRIVER_ALL, Family::UNKNOWN, + {API_OPENGL, OS_OSX, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, - {API_VULKAN, OS_OSX, VENDOR_NVIDIA, DRIVER_ALL, Family::UNKNOWN, + {API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, {API_OPENGL, OS_WINDOWS, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false},