Support audio loopback on Windows using WASAPI

2025-03-15 19:20:59 +00:00 · 2020-01-15 18:31:28 +01:00 · 2020-01-15 18:31:28 +01:00 · 0eebcf7836
commit 0eebcf7836
parent 05fc550c0e
13 changed files with 383 additions and 44 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -13,15 +13,17 @@ find_package(OpenSSL REQUIRED)
 find_package(FFmpeg REQUIRED)
 find_package(Boost COMPONENTS log filesystem REQUIRED)

-
 if(WIN32)
 	set(PLATFORM_TARGET_FILES
 		sunshine/platform/windows.cpp
-		sunshine/platform/windows_dxgi.cpp)
+		sunshine/platform/windows_dxgi.cpp
+		sunshine/platform/windows_wasapi.cpp)
 	set(PLATFORM_LIBRARIES
 		winmm
+		ksuser
 		wsock32
 		ws2_32
+		windowsapp
 		d3d11 dxgi)
 else()
 	find_package(X11 REQUIRED)
--- a/sunshine/audio.cpp
+++ b/sunshine/audio.cpp
@ -82,26 +82,51 @@ void encodeThread(std::shared_ptr<safe::queue_t<packet_t>> packets, sample_queue

 void capture(std::shared_ptr<safe::queue_t<packet_t>> packets, config_t config) {
  auto samples = std::make_shared<sample_queue_t::element_type>();
+  std::thread thread { encodeThread, packets, samples, config };

-  auto mic = platf::microphone();
-  if(!mic) {
-    BOOST_LOG(error) << "Couldn't create audio input"sv ;
-  }
+  auto fg = util::fail_guard([&]() {
+    packets->stop();
+    samples->stop();
+    thread.join();
+  });

  //FIXME: Pick correct opus_stream_config_t based on config.channels
  auto stream = &stereo;

+  auto mic = platf::microphone(stream->sampleRate);
+  if(!mic) {
+    BOOST_LOG(error) << "Couldn't create audio input"sv ;
+
+    return;
+  }
+
  auto frame_size = config.packetDuration * stream->sampleRate / 1000;
  int samples_per_frame = frame_size * stream->channelCount;

-  std::thread thread { encodeThread, packets, samples, config };
  while(packets->running()) {
-    auto sample = mic->sample(samples_per_frame);
+    std::vector<std::int16_t> sample_buffer;
+    sample_buffer.resize(samples_per_frame);

-    samples->raise(std::move(sample));
+    auto status = mic->sample(sample_buffer);
+    switch(status) {
+      case platf::capture_e::ok:
+        break;
+      case platf::capture_e::timeout:
+        continue;
+      case platf::capture_e::reinit:
+        mic.reset();
+        mic = platf::microphone(stream->sampleRate);
+        if(!mic) {
+          BOOST_LOG(error) << "Couldn't re-initialize audio input"sv ;
+
+          return;
+        }
+        return;
+      default:
+        return;
+    }
+
+    samples->raise(std::move(sample_buffer));
  }
-
-  samples->stop();
-  thread.join();
 }
 }
--- a/sunshine/crypto.cpp
+++ b/sunshine/crypto.cpp
@ -23,7 +23,7 @@ void cert_chain_t::add(x509_t &&cert) {
 const char *cert_chain_t::verify(x509_t::element_type *cert) {
  int err_code = 0;
  for(auto &[_,x509_store] : _certs) {
-    util::fail_guard([this]() {
+    auto fg = util::fail_guard([this]() {
      X509_STORE_CTX_cleanup(_cert_ctx.get());
    });

--- a/sunshine/main.cpp
+++ b/sunshine/main.cpp
@ -18,6 +18,7 @@
 #include "config.h"
 #include "thread_pool.h"

+#include "platform/common.h"
 extern "C" {
 #include <rs.h>
 }
--- a/sunshine/nvhttp.cpp
+++ b/sunshine/nvhttp.cpp
@ -744,6 +744,7 @@ std::string read_file(const char *path) {
  std::string input;
  std::string base64_cert;

+  //FIXME:  Being unable to read file could result in infinite loop
  while(!in.eof()) {
    std::getline(in, input);
    base64_cert += input + '\n';
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@ -32,7 +32,7 @@ enum class capture_e : int {

 class display_t {
 public:
-  virtual capture_e snapshot(std::unique_ptr<img_t> &img, bool cursor) = 0;
+  virtual capture_e snapshot(img_t *img, bool cursor) = 0;
  virtual int reinit() = 0;
  virtual std::unique_ptr<img_t> alloc_img() = 0;

@ -41,7 +41,7 @@ public:

 class mic_t {
 public:
-  virtual std::vector<std::int16_t> sample(std::size_t sample_size) = 0;
+  virtual capture_e sample(std::vector<std::int16_t> &frame_buffer) = 0;

  virtual ~mic_t() = default;
 };
@ -52,7 +52,7 @@ using input_t = util::safe_ptr<void, freeInput>;

 std::string get_local_ip();

-std::unique_ptr<mic_t> microphone();
+std::unique_ptr<mic_t> microphone(std::uint32_t sample_rate);
 std::shared_ptr<display_t> display();

 input_t input();
--- a/sunshine/platform/linux.cpp
+++ b/sunshine/platform/linux.cpp
@ -141,7 +141,7 @@ struct x11_attr_t : public display_t {
    XGetWindowAttributes(xdisplay.get(), xwindow, &xattr);
  }

-  capture_e snapshot(std::unique_ptr<img_t> &img_out_base, bool cursor) override {
+  capture_e snapshot(img_t *img_out_base, bool cursor) override {
    refresh();
    XImage *img { XGetImage(
      xdisplay.get(),
@ -151,7 +151,7 @@ struct x11_attr_t : public display_t {
      AllPlanes, ZPixmap)
    };

-    auto img_out = (x11_img_t*)img_out_base.get();
+    auto img_out = (x11_img_t*)img_out_base;
    img_out->width = img->width;
    img_out->height = img->height;
    img_out->data = (uint8_t*)img->data;
@ -204,7 +204,7 @@ struct shm_attr_t : public x11_attr_t {
    while(!task_pool.cancel(refresh_task_id));
  }

-  capture_e snapshot(std::unique_ptr<img_t> &img, bool cursor) override {
+  capture_e snapshot(img_t *img, bool cursor) override {
    if(display->width_in_pixels != xattr.width || display->height_in_pixels != xattr.height) {
      return capture_e::reinit;
    }
@ -300,17 +300,18 @@ struct mic_attr_t : public mic_t {
  util::safe_ptr<pa_simple, pa_simple_free> mic;

  explicit mic_attr_t(const pa_sample_spec& ss) : ss(ss), mic {} {}
-  std::vector<std::int16_t> sample(std::size_t sample_size) override {
-    std::vector<std::int16_t> sample_buf;
-    sample_buf.resize(sample_size);
+  capture_e sample(std::vector<std::int16_t> &sample_buf) override {
+    auto sample_size = sample_buf.size();

    auto buf = sample_buf.data();
    int status;
    if(pa_simple_read(mic.get(), buf, sample_size * 2, &status)) {
      BOOST_LOG(error) << "pa_simple_read() failed: "sv << pa_strerror(status);
+
+      return capture_e::error;
    }

-    return sample_buf;
+    return capture_e::ok;
  }
 };

@ -335,10 +336,10 @@ std::shared_ptr<display_t> display() {
 }

 //FIXME: Pass frame_rate instead of hard coding it
-std::unique_ptr<mic_t> microphone() {
+std::unique_ptr<mic_t> microphone(std::uint32_t sample_rate) {
  std::unique_ptr<mic_attr_t> mic {
    new mic_attr_t {
-      { PA_SAMPLE_S16LE, 48000, 2 }
+      { PA_SAMPLE_S16LE, sample_rate, 2 }
    }
  };

--- a/sunshine/platform/windows.cpp
+++ b/sunshine/platform/windows.cpp
@ -6,20 +6,6 @@ namespace platf {
 using namespace std::literals;
 std::string get_local_ip() { return "192.168.0.119"s; }

-class dummy_mic_t : public mic_t {
-public:
-  std::vector<std::int16_t> sample(std::size_t sample_size) override {
-    std::vector<std::int16_t> sample_buf;
-    sample_buf.resize(sample_size);
-
-    return sample_buf;
-  }
-};
-
-std::unique_ptr<mic_t> microphone() {
-  return std::unique_ptr<mic_t> { new dummy_mic_t {} };
-}
-
 input_t input() {
  return nullptr;
 }
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@ -231,8 +231,8 @@ void blend_cursor(const cursor_t &cursor, img_t &img) {

 class display_t : public ::platf::display_t {
 public:
-  capture_e snapshot(std::unique_ptr<::platf::img_t> &img_base, bool cursor_visible) override {
-    auto img = (img_t *) img_base.get();
+  capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override {
+    auto img = (img_t *) img_base;
    HRESULT status;

    DXGI_OUTDUPL_FRAME_INFO frame_info;
--- a/sunshine/platform/windows_wasapi.cpp
+++ b/sunshine/platform/windows_wasapi.cpp
@ -0,0 +1,323 @@
+//
+// Created by loki on 1/12/20.
+//
+
+// Acquire definition of CreateEventEx
+//#if _WIN32_WINNT < 0x0600
+//#undef _WIN32_WINNT
+//#define _WIN32_WINNT 0x0600
+//#endif
+
+#include <roapi.h>
+#include <mmdeviceapi.h>
+#include <audioclient.h>
+
+#include <synchapi.h>
+
+#include "sunshine/main.h"
+#include "common.h"
+
+const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
+const IID IID_IMMDeviceEnumerator    = __uuidof(IMMDeviceEnumerator);
+const IID IID_IAudioClient           = __uuidof(IAudioClient);
+const IID IID_IAudioCaptureClient    = __uuidof(IAudioCaptureClient);
+
+using namespace std::literals;
+namespace platf::audio {
+template<class T>
+void Release(T *p) {
+  p->Release();
+}
+
+template<class T>
+void co_task_free(T *p) {
+  CoTaskMemFree((LPVOID)p);
+}
+
+using device_enum_t   = util::safe_ptr<IMMDeviceEnumerator, Release<IMMDeviceEnumerator>>;
+using device_t        = util::safe_ptr<IMMDevice, Release<IMMDevice>>;
+using audio_client_t  = util::safe_ptr<IAudioClient, Release<IAudioClient>>;
+using audio_capture_t = util::safe_ptr<IAudioCaptureClient, Release<IAudioCaptureClient>>;
+using wave_format_t   = util::safe_ptr<WAVEFORMATEX, co_task_free<WAVEFORMATEX>>;
+using handle_t = util::safe_ptr_v2<void, BOOL, CloseHandle>;
+
+class mic_wasapi_t : public mic_t {
+public:
+  capture_e sample(std::vector<std::int16_t> &sample_in) override {
+    while(sample_buf_pos - std::begin(sample_buf) < sample_in.size()) {
+      //FIXME: Use IAudioClient3 instead of IAudioClient, that would allows for adjusting the latency of the audio samples
+      auto capture_result = _fill_buffer();
+
+      if(capture_result != capture_e::ok) {
+        return capture_result;
+      }
+    }
+
+    std::copy_n(std::begin(sample_buf), sample_in.size(), std::begin(sample_in));
+
+    // The excess samples should be in front of the queue
+    std::move(&sample_buf[sample_in.size()], sample_buf_pos, std::begin(sample_buf));
+    sample_buf_pos -= sample_in.size();
+
+    return capture_e::ok;
+  }
+
+
+  int init(std::uint32_t sample_rate) {
+    audio_event.reset(CreateEventA(nullptr, FALSE, FALSE, nullptr));
+    if(!audio_event) {
+      BOOST_LOG(error) << "Couldn't create Event handle"sv;
+
+      return -1;
+    }
+
+    HRESULT status;
+
+    device_enum_t::pointer device_enum_p{};
+    status = CoCreateInstance(
+      CLSID_MMDeviceEnumerator,
+      nullptr,
+      CLSCTX_ALL,
+      IID_IMMDeviceEnumerator,
+      (void **) &device_enum_p);
+    device_enum.reset(device_enum_p);
+
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't create Device Enumerator [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    device_t::pointer device_p{};
+    status = device_enum->GetDefaultAudioEndpoint(
+      eRender,
+      eConsole,
+      &device_p);
+    device.reset(device_p);
+
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't create Device [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    audio_client_t::pointer audio_client_p{};
+    status = device->Activate(
+      IID_IAudioClient,
+      CLSCTX_ALL,
+      nullptr,
+      (void **) &audio_client_p);
+    audio_client.reset(audio_client_p);
+
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't activate Device [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    wave_format_t::pointer wave_format_p{};
+    status = audio_client->GetMixFormat(&wave_format_p);
+    wave_format.reset(wave_format_p);
+
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't acquire Wave Format [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    wave_format->nChannels = 2;
+    wave_format->wBitsPerSample = 16;
+    wave_format->nSamplesPerSec = sample_rate;
+    wave_format->nBlockAlign = wave_format->nChannels * wave_format->wBitsPerSample / 8;
+    wave_format->nAvgBytesPerSec = wave_format->nSamplesPerSec * wave_format->nBlockAlign;
+
+    switch(wave_format->wFormatTag) {
+      case WAVE_FORMAT_PCM:
+        break;
+      case WAVE_FORMAT_IEEE_FLOAT:
+        wave_format->wFormatTag = WAVE_FORMAT_PCM;
+        break;
+      case WAVE_FORMAT_EXTENSIBLE: {
+        auto wave_ex = (PWAVEFORMATEXTENSIBLE) wave_format.get();
+        if (IsEqualGUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, wave_ex->SubFormat)) {
+          wave_ex->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
+          wave_ex->Samples.wValidBitsPerSample = 16;
+          break;
+        }
+
+        BOOST_LOG(error) << "Unsupported Sub Format for WAVE_FORMAT_EXTENSIBLE: [0x"sv << util::hex(wave_ex->SubFormat).to_string_view() << ']';
+        return -1;
+      }
+      default:
+        BOOST_LOG(error) << "Unsupported Wave Format: [0x"sv << util::hex(wave_format->wFormatTag).to_string_view() << ']';
+        return -1;
+    };
+
+    REFERENCE_TIME default_latency;
+    audio_client->GetDevicePeriod(&default_latency, nullptr);
+    default_latency_ms = default_latency / 10;
+
+    status = audio_client->Initialize(
+      AUDCLNT_SHAREMODE_SHARED,
+      AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
+      0, 0,
+      wave_format.get(),
+      nullptr);
+
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't initialize audio client [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    std::uint32_t frames;
+    status = audio_client->GetBufferSize(&frames);
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't acquire the number of frames [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    sample_buf = util::buffer_t<std::int16_t> { frames };
+    sample_buf_pos = std::begin(sample_buf);
+
+    audio_capture_t::pointer audio_capture_p {};
+    status = audio_client->GetService(IID_IAudioCaptureClient, (void**)&audio_capture_p);
+    audio_capture.reset(audio_capture_p);
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't initialize audio capture client [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    status = audio_client->SetEventHandle(audio_event.get());
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't set event handle [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    status = audio_client->Start();
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't start recording [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return -1;
+    }
+
+    return 0;
+  }
+
+  ~mic_wasapi_t() override {
+    if(audio_client) {
+      audio_client->Stop();
+    }
+  }
+private:
+  capture_e _fill_buffer() {
+    HRESULT status;
+
+    // Total number of samples
+    struct sample_aligned_t {
+      std::uint32_t uninitialized;
+      std::int16_t *samples;
+    } sample_aligned;
+
+    // number of samples / number of channels
+    struct block_aligned_t {
+      std::uint32_t audio_sample_size;
+    } block_aligned;
+
+    status = WaitForSingleObjectEx(audio_event.get(), default_latency_ms, FALSE);
+    switch (status) {
+      case WAIT_OBJECT_0:
+        break;
+      case WAIT_TIMEOUT:
+        std::fill_n(std::begin(sample_buf), sample_buf.size(), 0);
+        return capture_e::timeout;
+      default:
+        BOOST_LOG(error) << "Couldn't wait for audio event: [0x"sv << util::hex(status).to_string_view() << ']';
+        return capture_e::error;
+    }
+
+    std::uint32_t packet_size{};
+    for (
+      status = audio_capture->GetNextPacketSize(&packet_size);
+      SUCCEEDED(status) && packet_size > 0;
+      status = audio_capture->GetNextPacketSize(&packet_size)
+      ) {
+      DWORD buffer_flags;
+      status = audio_capture->GetBuffer(
+        (BYTE **) &sample_aligned.samples,
+        &block_aligned.audio_sample_size,
+        &buffer_flags,
+        nullptr, nullptr);
+
+      switch (status) {
+        case S_OK:
+          break;
+        case AUDCLNT_E_DEVICE_INVALIDATED:
+          return capture_e::reinit;
+        default:
+          BOOST_LOG(error) << "Couldn't capture audio [0x"sv << util::hex(status).to_string_view() << ']';
+          return capture_e::error;
+      }
+
+      sample_aligned.uninitialized = std::end(sample_buf) - sample_buf_pos;
+      auto n = std::min(sample_aligned.uninitialized, block_aligned.audio_sample_size * wave_format->nChannels);
+
+      if (buffer_flags & AUDCLNT_BUFFERFLAGS_SILENT) {
+        std::fill_n(sample_buf_pos, n, 0);
+      } else {
+        std::copy_n(sample_aligned.samples, n, sample_buf_pos);
+      }
+
+      sample_buf_pos += n;
+
+      audio_capture->ReleaseBuffer(block_aligned.audio_sample_size);
+    }
+
+    if (status == AUDCLNT_E_DEVICE_INVALIDATED) {
+      return capture_e::reinit;
+    }
+
+    if (FAILED(status)) {
+      return capture_e::error;
+    }
+
+    return capture_e::ok;
+  }
+public:
+  handle_t audio_event;
+
+  device_enum_t device_enum;
+  device_t device;
+  audio_client_t audio_client;
+  wave_format_t wave_format;
+  audio_capture_t audio_capture;
+
+  REFERENCE_TIME default_latency_ms;
+
+  util::buffer_t<std::int16_t> sample_buf;
+  std::int16_t *sample_buf_pos;
+};
+}
+
+namespace platf {
+class dummy_mic_t : public mic_t {
+public:
+  capture_e sample(std::vector<std::int16_t> &sample_buf) override {
+    return capture_e::ok;
+  }
+};
+
+std::unique_ptr<mic_t> microphone(std::uint32_t sample_rate) {
+  Windows::Foundation::Initialize(RO_INIT_MULTITHREADED);
+  auto mic = std::make_unique<audio::mic_wasapi_t>();
+
+  if(mic->init(sample_rate)) {
+    return std::make_unique<dummy_mic_t>();
+  }
+
+  return mic;
+}
+}
--- a/sunshine/thread_safe.h
+++ b/sunshine/thread_safe.h
@ -132,7 +132,7 @@ public:
    _cv.notify_all();
  }

-  bool running() const {
+  [[nodiscard]] bool running() const {
    return _continue;
  }

--- a/sunshine/utility.h
+++ b/sunshine/utility.h
@ -134,7 +134,7 @@ private:
 };

 template<class T>
-auto fail_guard(T && f) {
+[[nodiscard]] auto fail_guard(T && f) {
  return FailGuard<T> { std::forward<T>(f) };
 }

--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@ -200,7 +200,7 @@ void capture_display(packet_queue_t packets, idr_event_t idr_events, config_t co
    auto next_snapshot = std::chrono::steady_clock::now() + time_span;

    auto img = disp->alloc_img();
-    auto status = disp->snapshot(img, display_cursor);
+    auto status = disp->snapshot(img.get(), display_cursor);

    switch(status) {
      case platf::capture_e::reinit: