From 61cfd8696ee010758c48d02eac13d3dffc3501a7 Mon Sep 17 00:00:00 2001 From: DevJPM Date: Sat, 2 Jan 2021 13:47:18 +0100 Subject: [PATCH] Fix CPU Core Count detection and Enable Parallel Shader Compilation This does this following things: - Default to the runtime automatic number of threads for pre-compiling shaders - Adds a distinct automatic thread count computation for pre-compilation (which has less other things going on and should scale better beyond 4 cores) - Removes the unused logical_core_count field from the CPU detection - Changes the semantics of num_cores from maximaum addressable number of cores to actually available CPU cores (which is also how it was actually used) - Updates the computation of the HTT flag now that AMD no longer lies about it for its Zen processors - Background shader compilation is *not* enabled by default --- Source/Core/Common/CPUDetect.h | 1 - Source/Core/Common/x64CPUDetect.cpp | 41 +++++--------------- Source/Core/Core/Config/GraphicsSettings.cpp | 2 +- Source/Core/VideoCommon/VideoConfig.cpp | 10 ++++- 4 files changed, 20 insertions(+), 34 deletions(-) diff --git a/Source/Core/Common/CPUDetect.h b/Source/Core/Common/CPUDetect.h index be807e6fef..db377a2006 100644 --- a/Source/Core/Common/CPUDetect.h +++ b/Source/Core/Common/CPUDetect.h @@ -26,7 +26,6 @@ struct CPUInfo bool HTT = false; int num_cores = 0; - int logical_cpu_count = 0; bool bSSE = false; bool bSSE2 = false; diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp index 87c3d50acd..76ce1af7e5 100644 --- a/Source/Core/Common/x64CPUDetect.cpp +++ b/Source/Core/Common/x64CPUDetect.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "Common/CPUDetect.h" #include "Common/CommonTypes.h" @@ -106,7 +107,6 @@ void CPUInfo::Detect() // Detect family and other misc stuff. bool ht = false; HTT = ht; - logical_cpu_count = 1; if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); @@ -120,9 +120,13 @@ void CPUInfo::Detect() // Detect AMD Zen1, Zen1+ and Zen2 if (family == 23) bZen1p2 = true; - logical_cpu_count = (cpu_id[1] >> 16) & 0xFF; ht = (cpu_id[3] >> 28) & 1; + // AMD CPUs before Zen faked this flag and didn't actually + // implement simultaneous multithreading (SMT; Intel calls it HTT) + // but rather some weird middle-ground between 1-2 cores + HTT = ht && (vendor == CPUVendor::Intel || family >= 23); + if ((cpu_id[3] >> 25) & 1) bSSE = true; if ((cpu_id[3] >> 26) & 1) @@ -200,35 +204,10 @@ void CPUInfo::Detect() bLongMode = true; } - num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count; - - if (max_ex_fn >= 0x80000008) - { - // Get number of cores. This is a bit complicated. Following AMD manual here. - __cpuid(cpu_id, 0x80000008); - int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF; - if (apic_id_core_id_size == 0) - { - if (ht) - { - // New mechanism for modern Intel CPUs. - if (vendor == CPUVendor::Intel) - { - __cpuidex(cpu_id, 0x00000004, 0x00000000); - int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1; - HTT = (cores_x_package < logical_cpu_count); - cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1; - num_cores = (cores_x_package > 1) ? cores_x_package : num_cores; - logical_cpu_count /= cores_x_package; - } - } - } - else - { - // Use AMD's new method. - num_cores = (cpu_id[2] & 0xFF) + 1; - } - } + // this should be much more reliable and easier + // than trying to get the number of cores out of the CPUID data + // ourselves + num_cores = std::max(std::thread::hardware_concurrency(), 1u); } // Turn the CPU info into a string we can show diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 4ce7c1b3a8..7eb5645f30 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -85,7 +85,7 @@ const Info GFX_SHADER_COMPILATION_MODE{ {System::GFX, "Settings", "ShaderCompilationMode"}, ShaderCompilationMode::Synchronous}; const Info GFX_SHADER_COMPILER_THREADS{{System::GFX, "Settings", "ShaderCompilerThreads"}, 1}; const Info GFX_SHADER_PRECOMPILER_THREADS{ - {System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1}; + {System::GFX, "Settings", "ShaderPrecompilerThreads"}, -1}; const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE{ {System::GFX, "Settings", "SaveTextureCacheToState"}, true}; diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 88ec126c7c..9cdf1256f7 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -176,6 +176,14 @@ static u32 GetNumAutoShaderCompilerThreads() return static_cast(std::min(std::max(cpu_info.num_cores - 3, 1), 4)); } +static u32 GetNumAutoShaderPreCompilerThreads() +{ + // Automatic number. We use clamp(cpus - 2, 1, infty) here. + // We chose this because we don't want to limit our speed-up + // and at the same time leave two logical cores for the dolphin UI and the rest of the OS. + return static_cast(std::max(cpu_info.num_cores - 2, 1)); +} + u32 VideoConfig::GetShaderCompilerThreads() const { if (!backend_info.bSupportsBackgroundCompiling) @@ -199,5 +207,5 @@ u32 VideoConfig::GetShaderPrecompilerThreads() const if (iShaderPrecompilerThreads >= 0) return static_cast(iShaderPrecompilerThreads); else - return GetNumAutoShaderCompilerThreads(); + return GetNumAutoShaderPreCompilerThreads(); }