initialize nvenc

2025-03-29 22:20:24 +00:00 · 2020-04-06 23:15:03 +03:00 · 2020-04-06 23:15:03 +03:00 · afbca0f6cd
commit afbca0f6cd
parent f2636b163e
4 changed files with 129 additions and 86 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 51f776dbd4b2ead239a966406447d12f7e942636
+Subproject commit afd9a9bbfc6ee1a064b0c1f9210bc20b2170c416
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@ -64,7 +64,7 @@ public:
 };

 struct hwdevice_ctx_t {
-  std::shared_ptr<void> hwdevice;
+  void *hwdevice {};

  virtual const platf::img_t*const convert(platf::img_t &img) {
    return nullptr;
@ -96,7 +96,7 @@ public:
  }

  virtual std::shared_ptr<hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) {
-    return nullptr;
+    return std::make_shared<hwdevice_ctx_t>();
  }

  virtual ~display_t() = default;
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@ -124,6 +124,8 @@ struct img_t : public ::platf::img_t  {
 struct img_d3d_t : public ::platf::img_t {
  std::shared_ptr<platf::display_t> display;
  texture2d_t texture;
+
+  ~img_d3d_t() override = default;
 };

 struct cursor_t {
@ -308,11 +310,11 @@ public:
    D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr };
    auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed size and color conversion 0x["sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
      return nullptr;
    }

-    return &img;
+    return &this->img;
  }

  int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
@ -364,7 +366,8 @@ public:
    t.ArraySize = 1;
    t.SampleDesc.Count = 1;
    t.Usage = D3D11_USAGE_DEFAULT;
-    t.Format = DXGI_FORMAT_420_OPAQUE;
+    t.Format = DXGI_FORMAT_NV12;
+    t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER;

    dxgi::texture2d_t::pointer tex_p {};
    status = device_p->CreateTexture2D(&t, nullptr, &tex_p);
@ -377,19 +380,30 @@ public:
    img.display = std::move(display);
    img.width = out_width;
    img.height = out_height;
+    img.data = (std::uint8_t*)tex_p;
+    img.row_pitch = out_width;
+    img.pixel_pitch = 1;

-    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D };
+    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 };
    video::processor_out_t::pointer processor_out_p;
-    device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p);
+    status = device->CreateVideoProcessorOutputView(tex_p, processor_e.get(), &output_desc, &processor_out_p);
    if(FAILED(status)) {
      BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']';
      return -1;
    }
    processor_out.reset(processor_out_p);

+    device_p->AddRef();
+    hwdevice = device_p;
    return 0;
  }

+  ~hwdevice_ctx_t() override {
+    if(hwdevice) {
+      ((ID3D11Device*)hwdevice)->Release();
+    }
+  }
+
  img_d3d_t img;
  video::device_t device;
  video::ctx_t ctx;
@ -837,25 +851,17 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
    dxgi::texture2d_t::pointer tex_p {};
    auto status = device->CreateTexture2D(&t, &data, &tex_p);
    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
      return -1;
    }
    img->texture.reset(tex_p);

-    D3D11_MAPPED_SUBRESOURCE img_info {};
-    // map the texture simply to get the pitch and stride
-    status = device_ctx->Map(img->texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
+    img->height      = 1;
+    img->width       = 1;
+    img->data        = (std::uint8_t*)tex_p;
+    img->row_pitch   = 4;
+    img->pixel_pitch = 4;

-    img->row_pitch = img_info.RowPitch;
-    img->height    = 1;
-    img->width     = 1;
-    img->data      = (std::uint8_t*)img->texture.get();
-
-    device_ctx->Unmap(img->texture.get(), 0);
    return 0;
  }

--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@ -7,6 +7,7 @@

 extern "C" {
 #include <libswscale/swscale.h>
+#include <libavutil/hwcontext_d3d11va.h>
 }

 #include "platform/common.h"
@ -43,7 +44,9 @@ using sws_t       = util::safe_ptr<SwsContext, sws_freeContext>;
 using img_event_t = std::shared_ptr<safe::event_t<std::shared_ptr<platf::img_t>>>;

 void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
+
 void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
+util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx);

 struct encoder_t {
  struct option_t {
@ -58,8 +61,10 @@ struct encoder_t {
  } profile;

  AVHWDeviceType dev_type;
+  AVPixelFormat dev_pix_fmt;

-  AVPixelFormat pix_fmt;
+  AVPixelFormat static_pix_fmt;
+  AVPixelFormat dynamic_pix_fmt;

  struct {
    std::vector<option_t> options;
@ -69,6 +74,7 @@ struct encoder_t {
  bool system_memory;

  std::function<void(sws_t &, const platf::img_t&, frame_t&)> img_to_frame;
+  std::function<util::Either<buffer_t, int>(platf::hwdevice_ctx_t *hwdevice)> make_hwdevice_ctx;
 };

 struct session_t {
@ -87,23 +93,24 @@ static encoder_t nvenc {
  { 2, 0, 1 },
  AV_HWDEVICE_TYPE_D3D11VA,
  AV_PIX_FMT_D3D11,
+  AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
  {
-    { {"force-idr"s, 1} }, "nvenc_hevc"s
+    { {"force-idr"s, 1} }, "hevc_nvenc"s
  },
  {
-    { {"force-idr"s, 1} }, "nvenc_h264"s
+    { {"force-idr"s, 1} }, "h264_nvenc"s
  },
  false,

-  nv_d3d_img_to_frame
-
-  // D3D11Device
+  nv_d3d_img_to_frame,
+  nv_d3d_make_hwdevice_ctx
 };

 static encoder_t software {
  { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 },
  AV_HWDEVICE_TYPE_NONE,
  AV_PIX_FMT_NONE,
+  AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
  {
    // x265's Info SEI is so long that it causes the IDR picture data to be
    // kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic.
@ -123,9 +130,8 @@ static encoder_t software {
  },
  true,

-  sw_img_to_frame
-
-  // nullptr
+  sw_img_to_frame,
+  nullptr
 };

 static std::vector<encoder_t> encoders {
@ -192,7 +198,9 @@ void captureThread(
    }
  }
  auto &dummy_img = imgs.front();
-  disp->dummy_img(dummy_img.get(), dummy_data);
+  if(disp->dummy_img(dummy_img.get(), dummy_data)) {
+    return;
+  }

  auto next_frame = std::chrono::steady_clock::now();
  while(capture_ctx_queue->running()) {
@ -257,7 +265,9 @@ void captureThread(
            return;
          }
        }
-        disp->dummy_img(dummy_img.get(), dummy_data);
+        if(disp->dummy_img(dummy_img.get(), dummy_data)) {
+          return;
+        }

        reinit_event.reset();
        continue;
@ -292,13 +302,22 @@ void captureThread(
  }
 }

-util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type) {
+util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx) {
  buffer_t ctx;

-  AVBufferRef *ref;
-  auto err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
+  int err;
+  if(hwdevice_ctx) {
+    ctx.reset(av_hwdevice_ctx_alloc(type));
+    ((AVHWDeviceContext*)ctx.get())->hwctx = hwdevice_ctx;
+
+    err = av_hwdevice_ctx_init(ctx.get());
+  }
+  else {
+    AVBufferRef *ref  {};
+    err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
+    ctx.reset(ref);
+  }

-  ctx.reset(ref);
  if(err < 0) {
    return err;
  }
@ -314,7 +333,7 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) {
  frame_ctx->sw_format = format;
  frame_ctx->height    = ctx->height;
  frame_ctx->width     = ctx->width;
-  frame_ctx->initial_pool_size = 20;
+  frame_ctx->initial_pool_size = 0;

  if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) {
    return err;
@ -331,7 +350,9 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets
  /* send the frame to the encoder */
  auto ret = avcodec_send_frame(ctx.get(), frame.get());
  if (ret < 0) {
-    BOOST_LOG(error) << "Could not send a frame for encoding"sv;
+    char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
+    BOOST_LOG(error) << "Could not send a frame for encoding: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, ret);
+
    return -1;
  }

@ -375,7 +396,7 @@ void end_capture(capture_thread_ctx_t &capture_thread_ctx) {
  capture_thread_ctx.capture_thread.join();
 }

-std::optional<session_t>  make_session(const encoder_t &encoder, const config_t &config, void *device_ctx) {
+std::optional<session_t>  make_session(const encoder_t &encoder, const config_t &config, platf::hwdevice_ctx_t *device_ctx) {
  bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE;

  auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc;
@ -387,21 +408,6 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
    return std::nullopt;
  }

-  buffer_t hwdevice;
-  if(hardware) {
-    auto buf_or_error = hwdevice_ctx(encoder.dev_type);
-    if(buf_or_error.has_right()) {
-      auto err = buf_or_error.right();
-
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Failed to create FFMpeg "sv << video_format.name << ": "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
-
-      return std::nullopt;;
-    }
-
-    hwdevice = std::move(buf_or_error.left());
-  }
-
  ctx_t ctx {avcodec_alloc_context3(codec) };
  ctx->width = config.width;
  ctx->height = config.height;
@ -463,21 +469,23 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t

  AVPixelFormat sw_fmt;
  if(config.dynamicRange == 0) {
-    sw_fmt = AV_PIX_FMT_YUV420P;
+    sw_fmt = encoder.static_pix_fmt;
  }
  else {
-    sw_fmt = AV_PIX_FMT_YUV420P10;
+    sw_fmt = encoder.dynamic_pix_fmt;
  }

+  buffer_t hwdevice;
  if(hardware) {
-    ctx->pix_fmt = encoder.pix_fmt;
+    ctx->pix_fmt = encoder.dev_pix_fmt;

-    ((AVHWFramesContext *)ctx->hw_frames_ctx->data)->device_ctx = (AVHWDeviceContext*)device_ctx;
-
-    if(auto err = hwframe_ctx(ctx, hwdevice, sw_fmt); err < 0) {
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Failed to initialize hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
+    auto buf_or_error = encoder.make_hwdevice_ctx(device_ctx);
+    if(buf_or_error.has_right()) {
+      return std::nullopt;
+    }

+    hwdevice = std::move(buf_or_error.left());
+    if(hwframe_ctx(ctx, hwdevice, sw_fmt)) {
      return std::nullopt;
    }
  }
@ -516,9 +524,6 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
    av_dict_set_int(&options, "qp", config::video.qp, 0);
  }

-  av_dict_set(&options, "preset", config::video.preset.c_str(), 0);
-  av_dict_set(&options, "tune", config::video.tune.c_str(), 0);
-
  auto codec_handle = open_codec(ctx, codec, &options);

  frame_t frame {av_frame_alloc() };
@ -528,15 +533,9 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t


  if(hardware) {
-    auto err = av_hwframe_get_buffer(ctx->hw_frames_ctx, frame.get(), 0);
-    if(err < 0) {
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Coudn't create hardware frame: "sv <<  av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
-
-      return std::nullopt;
-    }
+    frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
  }
-  else {
+  else /* software */ {
    av_frame_get_buffer(frame.get(), 0);
  }

@ -562,9 +561,7 @@ void encode_run(
  const encoder_t &encoder,
  void *channel_data) {

-  void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
-
-  auto session = make_session(encoder, config, hwdevice);
+  auto session = make_session(encoder, config, hwdevice_ctx);
  if(!session) {
    return;
  }
@ -626,6 +623,9 @@ void encode_run(
        }
        else {
          auto converted_img = hwdevice_ctx->convert(*img);
+          if(!converted_img) {
+            return;
+          }
 
          encoder.img_to_frame(sws, *converted_img, session->frame);

@ -657,6 +657,10 @@ void capture(
  void *channel_data) {

  auto images = std::make_shared<img_event_t::element_type>();
+  auto lg = util::fail_guard([&]() {
+    images->stop();
+    shutdown_event->raise(true);
+  });

  // Keep a reference counter to ensure the Fcapture thread only runs when other threads have a reference to the capture thread
  static auto capture_thread = safe::make_shared<capture_thread_ctx_t>(start_capture, end_capture);
@ -689,12 +693,13 @@ void capture(

      auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
      hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+      if(!hwdevice_ctx) {
+        return;
+      }
    }

    encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data);
  }
-
-  images->stop();
 }

 bool validate_config(const encoder_t &encoder, const config_t &config) {
@ -706,17 +711,20 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {

  auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
  auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+  if(!hwdevice_ctx) {
+    return false;
+  }

-  void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
-
-  auto session = make_session(encoder, config, hwdevice);
+  auto session = make_session(encoder, config, hwdevice_ctx.get());
  if(!session) {
    return false;
  }

  int dummy_data;
  auto img = disp->alloc_img();
-  disp->dummy_img(img.get(), dummy_data);
+  if(disp->dummy_img(img.get(), dummy_data)) {
+    return false;
+  }

  sws_t sws;
  if(encoder.system_memory) {
@ -734,6 +742,9 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {
  }
  else {
    auto converted_img = hwdevice_ctx->convert(*img);
+    if(!converted_img) {
+      return false;
+    }

    encoder.img_to_frame(sws, *converted_img, session->frame);
  }
@ -754,7 +765,7 @@ bool validate_encoder(const encoder_t &encoder) {
    60,
    1000,
    1,
-    1,
+    0,
    1,
    0,
    0
@ -765,7 +776,7 @@ bool validate_encoder(const encoder_t &encoder) {
    60,
    1000,
    1,
-    1,
+    0,
    1,
    1,
    0
@ -809,13 +820,39 @@ void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
 }

 void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
+  // Need to have something refcounted
+  if(!frame->buf[0]) {
+    frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor*));
+  }
+
+  auto desc = (AVD3D11FrameDescriptor*)frame->buf[0]->data;
+  desc->texture = (ID3D11Texture2D*)img.data;
+  desc->index = 0;
+
  frame->data[0] = img.data;
  frame->data[1] = 0;

  frame->linesize[0] = img.row_pitch;
-  frame->linesize[1] = 0;

  frame->height = img.height;
  frame->width = img.width;
 }
+
+util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) {
+  buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) };
+  auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx;
+
+  std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0);
+  std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice);
+
+  auto err = av_hwdevice_ctx_init(ctx_buf.get());
+  if(err) {
+    char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
+    BOOST_LOG(error) << "Failed to create FFMpeg nvenc: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
+
+    return err;
+  }
+
+  return ctx_buf;
+}
 }