From 49fc618682b51ed52622eab2f98c1239ebd130bd Mon Sep 17 00:00:00 2001
From: Cameron Gutman <aicommander@gmail.com>
Date: Tue, 17 Jan 2023 23:55:44 -0600
Subject: [PATCH] Add support for hybrid graphics systems (NVIDIA Optimus)
 (#782)

---
 CMakeLists.txt                        |   3 +
 src/platform/windows/display_base.cpp | 110 +++++++++++++++++-
 tools/CMakeLists.txt                  |   9 ++
 tools/ddprobe.cpp                     | 160 ++++++++++++++++++++++++++
 4 files changed, 281 insertions(+), 1 deletion(-)
 create mode 100644 tools/ddprobe.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index db58471d..68aa994c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -595,6 +595,9 @@ if(WIN32) # see options at: https://cmake.org/cmake/help/latest/cpack_gen/nsis.h
     install(TARGETS audio-info RUNTIME DESTINATION "tools" COMPONENT audio)
     install(TARGETS sunshinesvc RUNTIME DESTINATION "tools" COMPONENT sunshinesvc)
 
+    # Mandatory tools
+    install(TARGETS ddprobe RUNTIME DESTINATION "tools" COMPONENT application)
+
     # scripts
     install(DIRECTORY "${SUNSHINE_SOURCE_ASSETS_DIR}/windows/misc/firewall/"
             DESTINATION "scripts"
diff --git a/src/platform/windows/display_base.cpp b/src/platform/windows/display_base.cpp
index 6f518f7c..6fe9f9f7 100644
--- a/src/platform/windows/display_base.cpp
+++ b/src/platform/windows/display_base.cpp
@@ -6,6 +6,12 @@
 #include <codecvt>
 #include <initguid.h>
 
+#include <boost/process.hpp>
+
+// We have to include boost/process.hpp before display.h due to WinSock.h,
+// but that prevents the definition of NTSTATUS so we must define it ourself.
+typedef long NTSTATUS;
+
 #include "display.h"
 #include "misc.h"
 #include "src/config.h"
@@ -16,6 +22,8 @@ namespace platf {
 using namespace std::literals;
 }
 namespace platf::dxgi {
+namespace bp = boost::process;
+
 capture_e duplication_t::next_frame(DXGI_OUTDUPL_FRAME_INFO &frame_info, std::chrono::milliseconds timeout, resource_t::pointer *res_p) {
   auto capture_status = release_frame();
   if(capture_status != capture_e::ok) {
@@ -136,6 +144,96 @@ capture_e display_base_t::capture(snapshot_cb_t &&snapshot_cb, std::shared_ptr<:
   return capture_e::ok;
 }
 
+bool set_gpu_preference_on_self(int preference) {
+  // The GPU preferences key uses app path as the value name.
+  WCHAR sunshine_path[MAX_PATH];
+  GetModuleFileNameW(NULL, sunshine_path, ARRAYSIZE(sunshine_path));
+
+  WCHAR value_data[128];
+  swprintf_s(value_data, L"GpuPreference=%d;", preference);
+
+  auto status = RegSetKeyValueW(HKEY_CURRENT_USER,
+    L"Software\\Microsoft\\DirectX\\UserGpuPreferences",
+    sunshine_path,
+    REG_SZ,
+    value_data,
+    (wcslen(value_data) + 1) * sizeof(WCHAR));
+  if(status != ERROR_SUCCESS) {
+    BOOST_LOG(error) << "Failed to set GPU preference: "sv << status;
+    return false;
+  }
+
+  BOOST_LOG(info) << "Set GPU preference: "sv << preference;
+  return true;
+}
+
+// On hybrid graphics systems, Windows will change the order of GPUs reported by
+// DXGI in accordance with the user's GPU preference. If the selected GPU is a
+// render-only device with no displays, DXGI will add virtual outputs to the
+// that device to avoid confusing applications. While this works properly for most
+// applications, it breaks the Desktop Duplication API because DXGI doesn't proxy
+// the virtual DXGIOutput to the real GPU it is attached to. When trying to call
+// DuplicateOutput() on one of these virtual outputs, it fails with DXGI_ERROR_UNSUPPORTED
+// (even if you try sneaky stuff like passing the ID3D11Device for the iGPU and the
+// virtual DXGIOutput from the dGPU). Because the GPU preference is once-per-process,
+// we spawn a helper tool to probe for us before we set our own GPU preference.
+bool probe_for_gpu_preference(const std::string &display_name) {
+  // If we've already been through here, there's nothing to do this time.
+  static bool set_gpu_preference = false;
+  if(set_gpu_preference) {
+    return true;
+  }
+
+  std::string cmd = "tools\\ddprobe.exe";
+
+  // We start at 1 because 0 is automatic selection which can be overridden by
+  // the GPU driver control panel options. Since ddprobe.exe can have different
+  // GPU driver overrides than Sunshine.exe, we want to avoid a scenario where
+  // autoselection might work for ddprobe.exe but not for us.
+  for(int i = 1; i < 5; i++) {
+    // Run the probe tool
+    //
+    // Arg format: [GPU preference] [Display name]
+    //
+    // Exit codes:
+    // < 0 -> Error performing the probe
+    // 0   -> Probe failed (DD API doesn't work with that GPU preference)
+    // 1   -> Probe successful (DD API works)
+    int result;
+    try {
+      result = bp::system(cmd, std::to_string(i), display_name, bp::std_out > bp::null, bp::std_err > bp::null);
+    }
+    catch(bp::process_error &e) {
+      BOOST_LOG(error) << "Failed to start ddprobe.exe: "sv << e.what();
+      return false;
+    }
+
+    BOOST_LOG(debug) << "ddprobe.exe ["sv << i << "] ["sv << display_name << "] returned: "sv << result;
+
+    if(result > 0) {
+      // We found a working GPU preference, so set ourselves to use that.
+      if(set_gpu_preference_on_self(i)) {
+        set_gpu_preference = true;
+        return true;
+      }
+      else {
+        return false;
+      }
+    }
+    else if(result == 0) {
+      // This configuration didn't work, so continue testing others
+      continue;
+    }
+    else {
+      BOOST_LOG(error) << "ddprobe.exe ["sv << i << "] ["sv << display_name << "] failed: "sv << result;
+    }
+  }
+
+  // If none of the manual options worked, we'll try autoselection as a last-ditch effort
+  set_gpu_preference_on_self(0);
+  return false;
+}
+
 int display_base_t::init(int framerate, const std::string &display_name) {
   std::once_flag windows_cpp_once_flag;
 
@@ -164,6 +262,11 @@ int display_base_t::init(int framerate, const std::string &display_name) {
 
   HRESULT status;
 
+  // We must set the GPU preference before calling any DXGI APIs!
+  if(!probe_for_gpu_preference(display_name)) {
+    BOOST_LOG(warning) << "Failed to set GPU preference. Capture may not work!"sv;
+  }
+
   status = CreateDXGIFactory1(IID_IDXGIFactory1, (void **)&factory);
   if(FAILED(status)) {
     BOOST_LOG(error) << "Failed to create DXGIFactory1 [0x"sv << util::hex(status).to_string_view() << ']';
@@ -577,10 +680,15 @@ std::vector<std::string> display_names(mem_type_e) {
 
   std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
 
+  // We must set the GPU preference before calling any DXGI APIs!
+  if(!dxgi::probe_for_gpu_preference("")) {
+    BOOST_LOG(warning) << "Failed to set GPU preference. Capture may not work!"sv;
+  }
+
   dxgi::factory1_t factory;
   status = CreateDXGIFactory1(IID_IDXGIFactory1, (void **)&factory);
   if(FAILED(status)) {
-    BOOST_LOG(error) << "Failed to create DXGIFactory1 [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
+    BOOST_LOG(error) << "Failed to create DXGIFactory1 [0x"sv << util::hex(status).to_string_view() << ']';
     return {};
   }
 
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index a66bbb2c..c2121a7d 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -27,3 +27,12 @@ target_link_libraries(sunshinesvc
         wtsapi32
         ${PLATFORM_LIBRARIES})
 target_compile_options(sunshinesvc PRIVATE ${SUNSHINE_COMPILE_OPTIONS})
+
+add_executable(ddprobe ddprobe.cpp)
+set_target_properties(ddprobe PROPERTIES CXX_STANDARD 17)
+target_link_libraries(ddprobe
+        ${CMAKE_THREAD_LIBS_INIT}
+        dxgi
+        d3d11
+        ${PLATFORM_LIBRARIES})
+target_compile_options(ddprobe PRIVATE ${SUNSHINE_COMPILE_OPTIONS})
diff --git a/tools/ddprobe.cpp b/tools/ddprobe.cpp
new file mode 100644
index 00000000..90628c20
--- /dev/null
+++ b/tools/ddprobe.cpp
@@ -0,0 +1,160 @@
+#include <d3d11.h>
+#include <dxgi1_2.h>
+
+#include <codecvt>
+#include <iostream>
+#include <locale>
+#include <string>
+
+#include "src/utility.h"
+
+using namespace std::literals;
+namespace dxgi {
+template<class T>
+void Release(T *dxgi) {
+  dxgi->Release();
+}
+
+using factory1_t = util::safe_ptr<IDXGIFactory1, Release<IDXGIFactory1>>;
+using adapter_t  = util::safe_ptr<IDXGIAdapter1, Release<IDXGIAdapter1>>;
+using output_t   = util::safe_ptr<IDXGIOutput, Release<IDXGIOutput>>;
+using output1_t  = util::safe_ptr<IDXGIOutput1, Release<IDXGIOutput1>>;
+using device_t   = util::safe_ptr<ID3D11Device, Release<ID3D11Device>>;
+using dup_t      = util::safe_ptr<IDXGIOutputDuplication, Release<IDXGIOutputDuplication>>;
+
+} // namespace dxgi
+
+bool set_gpu_preference(int preference) {
+  // The GPU preferences key uses app path as the value name.
+  WCHAR executable_path[MAX_PATH];
+  GetModuleFileNameW(NULL, executable_path, ARRAYSIZE(executable_path));
+
+  WCHAR value_data[128];
+  swprintf_s(value_data, L"GpuPreference=%d;", preference);
+
+  auto status = RegSetKeyValueW(HKEY_CURRENT_USER,
+    L"Software\\Microsoft\\DirectX\\UserGpuPreferences",
+    executable_path,
+    REG_SZ,
+    value_data,
+    (wcslen(value_data) + 1) * sizeof(WCHAR));
+  if(status != ERROR_SUCCESS) {
+    std::cout << "Failed to set GPU preference: "sv << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool test_dxgi_duplication(dxgi::adapter_t &adapter, dxgi::output_t &output) {
+  D3D_FEATURE_LEVEL featureLevels[] {
+    D3D_FEATURE_LEVEL_11_1,
+    D3D_FEATURE_LEVEL_11_0,
+    D3D_FEATURE_LEVEL_10_1,
+    D3D_FEATURE_LEVEL_10_0,
+    D3D_FEATURE_LEVEL_9_3,
+    D3D_FEATURE_LEVEL_9_2,
+    D3D_FEATURE_LEVEL_9_1
+  };
+
+  dxgi::device_t device;
+  auto status = D3D11CreateDevice(
+    adapter.get(),
+    D3D_DRIVER_TYPE_UNKNOWN,
+    nullptr,
+    D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
+    featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL),
+    D3D11_SDK_VERSION,
+    &device,
+    nullptr,
+    nullptr);
+  if(FAILED(status)) {
+    std::cout << "Failed to create D3D11 device for DD test [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
+    return false;
+  }
+
+  dxgi::output1_t output1;
+  status = output->QueryInterface(IID_IDXGIOutput1, (void **)&output1);
+  if(FAILED(status)) {
+    std::cout << "Failed to query IDXGIOutput1 from the output"sv << std::endl;
+    return false;
+  }
+
+  // Check if we can use the Desktop Duplication API on this output
+  for(int x = 0; x < 2; ++x) {
+    dxgi::dup_t dup;
+    status = output1->DuplicateOutput((IUnknown *)device.get(), &dup);
+    if(SUCCEEDED(status)) {
+      return true;
+    }
+    Sleep(200);
+  }
+
+  return false;
+}
+
+int main(int argc, char *argv[]) {
+  HRESULT status;
+
+  // Display name may be omitted
+  if(argc != 2 && argc != 3) {
+    std::cout << "ddprobe.exe [GPU preference value] [display name]"sv << std::endl;
+    return -1;
+  }
+
+  std::wstring display_name;
+  if(argc == 3) {
+    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
+    display_name = converter.from_bytes(argv[2]);
+  }
+
+  // We must set the GPU preference before making any DXGI/D3D calls
+  if(!set_gpu_preference(atoi(argv[1]))) {
+    return -2;
+  }
+
+  // Remove the GPU preference when we're done
+  auto reset_gpu = util::fail_guard([]() {
+    WCHAR tool_path[MAX_PATH];
+    GetModuleFileNameW(NULL, tool_path, ARRAYSIZE(tool_path));
+
+    RegDeleteKeyValueW(HKEY_CURRENT_USER,
+      L"Software\\Microsoft\\DirectX\\UserGpuPreferences",
+      tool_path);
+  });
+
+  dxgi::factory1_t factory;
+  status = CreateDXGIFactory1(IID_IDXGIFactory1, (void **)&factory);
+  if(FAILED(status)) {
+    std::cout << "Failed to create DXGIFactory1 [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
+    return -3;
+  }
+
+  dxgi::adapter_t::pointer adapter_p {};
+  for(int x = 0; factory->EnumAdapters1(x, &adapter_p) != DXGI_ERROR_NOT_FOUND; ++x) {
+    dxgi::adapter_t adapter { adapter_p };
+
+    dxgi::output_t::pointer output_p {};
+    for(int y = 0; adapter->EnumOutputs(y, &output_p) != DXGI_ERROR_NOT_FOUND; ++y) {
+      dxgi::output_t output { output_p };
+
+      DXGI_OUTPUT_DESC desc;
+      output->GetDesc(&desc);
+
+      // If a display name was specified and this one doesn't match, skip it
+      if(!display_name.empty() && desc.DeviceName != display_name) {
+        continue;
+      }
+
+      // If this display is not part of the desktop, we definitely can't capture it
+      if(!desc.AttachedToDesktop) {
+        continue;
+      }
+
+      // We found the matching output. Test it and return the result.
+      return test_dxgi_duplication(adapter, output) ? 1 : 0;
+    }
+  }
+
+  return 0;
+}