From afbca0f6cded67b9d63527658d8f7f02a76136fa Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Mon, 6 Apr 2020 23:15:03 +0300
Subject: [PATCH] initialize nvenc

---
 pre-compiled                       |   2 +-
 sunshine/platform/common.h         |   4 +-
 sunshine/platform/windows_dxgi.cpp |  44 ++++----
 sunshine/video.cpp                 | 165 ++++++++++++++++++-----------
 4 files changed, 129 insertions(+), 86 deletions(-)
diff --git a/pre-compiled b/pre-compiled
index 51f776db..afd9a9bb 160000
--- a/pre-compiled
+++ b/pre-compiled
@@ -1 +1 @@
-Subproject commit 51f776dbd4b2ead239a966406447d12f7e942636
+Subproject commit afd9a9bbfc6ee1a064b0c1f9210bc20b2170c416
diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index 4ec9923c..2028a554 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -64,7 +64,7 @@ public:
 };
 
 struct hwdevice_ctx_t {
-  std::shared_ptr<void> hwdevice;
+  void *hwdevice {};
 
   virtual const platf::img_t*const convert(platf::img_t &img) {
     return nullptr;
@@ -96,7 +96,7 @@ public:
   }
 
   virtual std::shared_ptr<hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) {
-    return nullptr;
+    return std::make_shared<hwdevice_ctx_t>();
   }
 
   virtual ~display_t() = default;
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 2fed8e13..4729c72e 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -124,6 +124,8 @@ struct img_t : public ::platf::img_t  {
 struct img_d3d_t : public ::platf::img_t {
   std::shared_ptr<platf::display_t> display;
   texture2d_t texture;
+
+  ~img_d3d_t() override = default;
 };
 
 struct cursor_t {
@@ -308,11 +310,11 @@ public:
     D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr };
     auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
     if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed size and color conversion 0x["sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
       return nullptr;
     }
 
-    return &img;
+    return &this->img;
   }
 
   int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
@@ -364,7 +366,8 @@ public:
     t.ArraySize = 1;
     t.SampleDesc.Count = 1;
     t.Usage = D3D11_USAGE_DEFAULT;
-    t.Format = DXGI_FORMAT_420_OPAQUE;
+    t.Format = DXGI_FORMAT_NV12;
+    t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER;
 
     dxgi::texture2d_t::pointer tex_p {};
     status = device_p->CreateTexture2D(&t, nullptr, &tex_p);
@@ -377,19 +380,30 @@ public:
     img.display = std::move(display);
     img.width = out_width;
     img.height = out_height;
+    img.data = (std::uint8_t*)tex_p;
+    img.row_pitch = out_width;
+    img.pixel_pitch = 1;
 
-    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D };
+    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 };
     video::processor_out_t::pointer processor_out_p;
-    device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p);
+    status = device->CreateVideoProcessorOutputView(tex_p, processor_e.get(), &output_desc, &processor_out_p);
     if(FAILED(status)) {
       BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
     }
     processor_out.reset(processor_out_p);
 
+    device_p->AddRef();
+    hwdevice = device_p;
     return 0;
   }
 
+  ~hwdevice_ctx_t() override {
+    if(hwdevice) {
+      ((ID3D11Device*)hwdevice)->Release();
+    }
+  }
+
   img_d3d_t img;
   video::device_t device;
   video::ctx_t ctx;
@@ -837,25 +851,17 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
     dxgi::texture2d_t::pointer tex_p {};
     auto status = device->CreateTexture2D(&t, &data, &tex_p);
     if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
     }
     img->texture.reset(tex_p);
 
-    D3D11_MAPPED_SUBRESOURCE img_info {};
-    // map the texture simply to get the pitch and stride
-    status = device_ctx->Map(img->texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
+    img->height      = 1;
+    img->width       = 1;
+    img->data        = (std::uint8_t*)tex_p;
+    img->row_pitch   = 4;
+    img->pixel_pitch = 4;
 
-    img->row_pitch = img_info.RowPitch;
-    img->height    = 1;
-    img->width     = 1;
-    img->data      = (std::uint8_t*)img->texture.get();
-
-    device_ctx->Unmap(img->texture.get(), 0);
     return 0;
   }
 
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index bf0ff6c9..680b3d61 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -7,6 +7,7 @@
 
 extern "C" {
 #include <libswscale/swscale.h>
+#include <libavutil/hwcontext_d3d11va.h>
 }
 
 #include "platform/common.h"
@@ -43,7 +44,9 @@ using sws_t       = util::safe_ptr<SwsContext, sws_freeContext>;
 using img_event_t = std::shared_ptr<safe::event_t<std::shared_ptr<platf::img_t>>>;
 
 void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
+
 void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
+util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx);
 
 struct encoder_t {
   struct option_t {
@@ -58,8 +61,10 @@ struct encoder_t {
   } profile;
 
   AVHWDeviceType dev_type;
+  AVPixelFormat dev_pix_fmt;
 
-  AVPixelFormat pix_fmt;
+  AVPixelFormat static_pix_fmt;
+  AVPixelFormat dynamic_pix_fmt;
 
   struct {
     std::vector<option_t> options;
@@ -69,6 +74,7 @@ struct encoder_t {
   bool system_memory;
 
   std::function<void(sws_t &, const platf::img_t&, frame_t&)> img_to_frame;
+  std::function<util::Either<buffer_t, int>(platf::hwdevice_ctx_t *hwdevice)> make_hwdevice_ctx;
 };
 
 struct session_t {
@@ -87,23 +93,24 @@ static encoder_t nvenc {
   { 2, 0, 1 },
   AV_HWDEVICE_TYPE_D3D11VA,
   AV_PIX_FMT_D3D11,
+  AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
   {
-    { {"force-idr"s, 1} }, "nvenc_hevc"s
+    { {"force-idr"s, 1} }, "hevc_nvenc"s
   },
   {
-    { {"force-idr"s, 1} }, "nvenc_h264"s
+    { {"force-idr"s, 1} }, "h264_nvenc"s
   },
   false,
 
-  nv_d3d_img_to_frame
-
-  // D3D11Device
+  nv_d3d_img_to_frame,
+  nv_d3d_make_hwdevice_ctx
 };
 
 static encoder_t software {
   { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 },
   AV_HWDEVICE_TYPE_NONE,
   AV_PIX_FMT_NONE,
+  AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
   {
     // x265's Info SEI is so long that it causes the IDR picture data to be
     // kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic.
@@ -123,9 +130,8 @@ static encoder_t software {
   },
   true,
 
-  sw_img_to_frame
-
-  // nullptr
+  sw_img_to_frame,
+  nullptr
 };
 
 static std::vector<encoder_t> encoders {
@@ -192,7 +198,9 @@ void captureThread(
     }
   }
   auto &dummy_img = imgs.front();
-  disp->dummy_img(dummy_img.get(), dummy_data);
+  if(disp->dummy_img(dummy_img.get(), dummy_data)) {
+    return;
+  }
 
   auto next_frame = std::chrono::steady_clock::now();
   while(capture_ctx_queue->running()) {
@@ -257,7 +265,9 @@ void captureThread(
             return;
           }
         }
-        disp->dummy_img(dummy_img.get(), dummy_data);
+        if(disp->dummy_img(dummy_img.get(), dummy_data)) {
+          return;
+        }
 
         reinit_event.reset();
         continue;
@@ -292,13 +302,22 @@ void captureThread(
   }
 }
 
-util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type) {
+util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx) {
   buffer_t ctx;
 
-  AVBufferRef *ref;
-  auto err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
+  int err;
+  if(hwdevice_ctx) {
+    ctx.reset(av_hwdevice_ctx_alloc(type));
+    ((AVHWDeviceContext*)ctx.get())->hwctx = hwdevice_ctx;
+
+    err = av_hwdevice_ctx_init(ctx.get());
+  }
+  else {
+    AVBufferRef *ref  {};
+    err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
+    ctx.reset(ref);
+  }
 
-  ctx.reset(ref);
   if(err < 0) {
     return err;
   }
@@ -314,7 +333,7 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) {
   frame_ctx->sw_format = format;
   frame_ctx->height    = ctx->height;
   frame_ctx->width     = ctx->width;
-  frame_ctx->initial_pool_size = 20;
+  frame_ctx->initial_pool_size = 0;
 
   if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) {
     return err;
@@ -331,7 +350,9 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets
   /* send the frame to the encoder */
   auto ret = avcodec_send_frame(ctx.get(), frame.get());
   if (ret < 0) {
-    BOOST_LOG(error) << "Could not send a frame for encoding"sv;
+    char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
+    BOOST_LOG(error) << "Could not send a frame for encoding: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, ret);
+
     return -1;
   }
 
@@ -375,7 +396,7 @@ void end_capture(capture_thread_ctx_t &capture_thread_ctx) {
   capture_thread_ctx.capture_thread.join();
 }
 
-std::optional<session_t>  make_session(const encoder_t &encoder, const config_t &config, void *device_ctx) {
+std::optional<session_t>  make_session(const encoder_t &encoder, const config_t &config, platf::hwdevice_ctx_t *device_ctx) {
   bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE;
 
   auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc;
@@ -387,21 +408,6 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     return std::nullopt;
   }
 
-  buffer_t hwdevice;
-  if(hardware) {
-    auto buf_or_error = hwdevice_ctx(encoder.dev_type);
-    if(buf_or_error.has_right()) {
-      auto err = buf_or_error.right();
-
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Failed to create FFMpeg "sv << video_format.name << ": "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
-
-      return std::nullopt;;
-    }
-
-    hwdevice = std::move(buf_or_error.left());
-  }
-
   ctx_t ctx {avcodec_alloc_context3(codec) };
   ctx->width = config.width;
   ctx->height = config.height;
@@ -463,21 +469,23 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
 
   AVPixelFormat sw_fmt;
   if(config.dynamicRange == 0) {
-    sw_fmt = AV_PIX_FMT_YUV420P;
+    sw_fmt = encoder.static_pix_fmt;
   }
   else {
-    sw_fmt = AV_PIX_FMT_YUV420P10;
+    sw_fmt = encoder.dynamic_pix_fmt;
   }
 
+  buffer_t hwdevice;
   if(hardware) {
-    ctx->pix_fmt = encoder.pix_fmt;
+    ctx->pix_fmt = encoder.dev_pix_fmt;
 
-    ((AVHWFramesContext *)ctx->hw_frames_ctx->data)->device_ctx = (AVHWDeviceContext*)device_ctx;
-
-    if(auto err = hwframe_ctx(ctx, hwdevice, sw_fmt); err < 0) {
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Failed to initialize hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
+    auto buf_or_error = encoder.make_hwdevice_ctx(device_ctx);
+    if(buf_or_error.has_right()) {
+      return std::nullopt;
+    }
 
+    hwdevice = std::move(buf_or_error.left());
+    if(hwframe_ctx(ctx, hwdevice, sw_fmt)) {
       return std::nullopt;
     }
   }
@@ -516,9 +524,6 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     av_dict_set_int(&options, "qp", config::video.qp, 0);
   }
 
-  av_dict_set(&options, "preset", config::video.preset.c_str(), 0);
-  av_dict_set(&options, "tune", config::video.tune.c_str(), 0);
-
   auto codec_handle = open_codec(ctx, codec, &options);
 
   frame_t frame {av_frame_alloc() };
@@ -528,15 +533,9 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
 
 
   if(hardware) {
-    auto err = av_hwframe_get_buffer(ctx->hw_frames_ctx, frame.get(), 0);
-    if(err < 0) {
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Coudn't create hardware frame: "sv <<  av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
-
-      return std::nullopt;
-    }
+    frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
   }
-  else {
+  else /* software */ {
     av_frame_get_buffer(frame.get(), 0);
   }
 
@@ -562,9 +561,7 @@ void encode_run(
   const encoder_t &encoder,
   void *channel_data) {
 
-  void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
-
-  auto session = make_session(encoder, config, hwdevice);
+  auto session = make_session(encoder, config, hwdevice_ctx);
   if(!session) {
     return;
   }
@@ -626,6 +623,9 @@ void encode_run(
         }
         else {
           auto converted_img = hwdevice_ctx->convert(*img);
+          if(!converted_img) {
+            return;
+          }
  
           encoder.img_to_frame(sws, *converted_img, session->frame);
 
@@ -657,6 +657,10 @@ void capture(
   void *channel_data) {
 
   auto images = std::make_shared<img_event_t::element_type>();
+  auto lg = util::fail_guard([&]() {
+    images->stop();
+    shutdown_event->raise(true);
+  });
 
   // Keep a reference counter to ensure the Fcapture thread only runs when other threads have a reference to the capture thread
   static auto capture_thread = safe::make_shared<capture_thread_ctx_t>(start_capture, end_capture);
@@ -689,12 +693,13 @@ void capture(
 
       auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
       hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+      if(!hwdevice_ctx) {
+        return;
+      }
     }
 
     encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data);
   }
-
-  images->stop();
 }
 
 bool validate_config(const encoder_t &encoder, const config_t &config) {
@@ -706,17 +711,20 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {
 
   auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
   auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+  if(!hwdevice_ctx) {
+    return false;
+  }
 
-  void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
-
-  auto session = make_session(encoder, config, hwdevice);
+  auto session = make_session(encoder, config, hwdevice_ctx.get());
   if(!session) {
     return false;
   }
 
   int dummy_data;
   auto img = disp->alloc_img();
-  disp->dummy_img(img.get(), dummy_data);
+  if(disp->dummy_img(img.get(), dummy_data)) {
+    return false;
+  }
 
   sws_t sws;
   if(encoder.system_memory) {
@@ -734,6 +742,9 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {
   }
   else {
     auto converted_img = hwdevice_ctx->convert(*img);
+    if(!converted_img) {
+      return false;
+    }
 
     encoder.img_to_frame(sws, *converted_img, session->frame);
   }
@@ -754,7 +765,7 @@ bool validate_encoder(const encoder_t &encoder) {
     60,
     1000,
     1,
-    1,
+    0,
     1,
     0,
     0
@@ -765,7 +776,7 @@ bool validate_encoder(const encoder_t &encoder) {
     60,
     1000,
     1,
-    1,
+    0,
     1,
     1,
     0
@@ -809,13 +820,39 @@ void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
 }
 
 void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
+  // Need to have something refcounted
+  if(!frame->buf[0]) {
+    frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor*));
+  }
+
+  auto desc = (AVD3D11FrameDescriptor*)frame->buf[0]->data;
+  desc->texture = (ID3D11Texture2D*)img.data;
+  desc->index = 0;
+
   frame->data[0] = img.data;
   frame->data[1] = 0;
 
   frame->linesize[0] = img.row_pitch;
-  frame->linesize[1] = 0;
 
   frame->height = img.height;
   frame->width = img.width;
 }
+
+util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) {
+  buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) };
+  auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx;
+
+  std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0);
+  std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice);
+
+  auto err = av_hwdevice_ctx_init(ctx_buf.get());
+  if(err) {
+    char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
+    BOOST_LOG(error) << "Failed to create FFMpeg nvenc: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
+
+    return err;
+  }
+
+  return ctx_buf;
+}
 }