From afbca0f6cded67b9d63527658d8f7f02a76136fa Mon Sep 17 00:00:00 2001 From: loki Date: Mon, 6 Apr 2020 23:15:03 +0300 Subject: [PATCH] initialize nvenc --- pre-compiled | 2 +- sunshine/platform/common.h | 4 +- sunshine/platform/windows_dxgi.cpp | 44 ++++---- sunshine/video.cpp | 165 ++++++++++++++++++----------- 4 files changed, 129 insertions(+), 86 deletions(-) diff --git a/pre-compiled b/pre-compiled index 51f776db..afd9a9bb 160000 --- a/pre-compiled +++ b/pre-compiled @@ -1 +1 @@ -Subproject commit 51f776dbd4b2ead239a966406447d12f7e942636 +Subproject commit afd9a9bbfc6ee1a064b0c1f9210bc20b2170c416 diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h index 4ec9923c..2028a554 100644 --- a/sunshine/platform/common.h +++ b/sunshine/platform/common.h @@ -64,7 +64,7 @@ public: }; struct hwdevice_ctx_t { - std::shared_ptr hwdevice; + void *hwdevice {}; virtual const platf::img_t*const convert(platf::img_t &img) { return nullptr; @@ -96,7 +96,7 @@ public: } virtual std::shared_ptr make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) { - return nullptr; + return std::make_shared(); } virtual ~display_t() = default; diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp index 2fed8e13..4729c72e 100644 --- a/sunshine/platform/windows_dxgi.cpp +++ b/sunshine/platform/windows_dxgi.cpp @@ -124,6 +124,8 @@ struct img_t : public ::platf::img_t { struct img_d3d_t : public ::platf::img_t { std::shared_ptr display; texture2d_t texture; + + ~img_d3d_t() override = default; }; struct cursor_t { @@ -308,11 +310,11 @@ public: D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr }; auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream); if(FAILED(status)) { - BOOST_LOG(error) << "Failed size and color conversion 0x["sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']'; return nullptr; } - return &img; + return &this->img; } int init(std::shared_ptr display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) { @@ -364,7 +366,8 @@ public: t.ArraySize = 1; t.SampleDesc.Count = 1; t.Usage = D3D11_USAGE_DEFAULT; - t.Format = DXGI_FORMAT_420_OPAQUE; + t.Format = DXGI_FORMAT_NV12; + t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER; dxgi::texture2d_t::pointer tex_p {}; status = device_p->CreateTexture2D(&t, nullptr, &tex_p); @@ -377,19 +380,30 @@ public: img.display = std::move(display); img.width = out_width; img.height = out_height; + img.data = (std::uint8_t*)tex_p; + img.row_pitch = out_width; + img.pixel_pitch = 1; - D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D }; + D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 }; video::processor_out_t::pointer processor_out_p; - device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p); + status = device->CreateVideoProcessorOutputView(tex_p, processor_e.get(), &output_desc, &processor_out_p); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } processor_out.reset(processor_out_p); + device_p->AddRef(); + hwdevice = device_p; return 0; } + ~hwdevice_ctx_t() override { + if(hwdevice) { + ((ID3D11Device*)hwdevice)->Release(); + } + } + img_d3d_t img; video::device_t device; video::ctx_t ctx; @@ -837,25 +851,17 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this dxgi::texture2d_t::pointer tex_p {}; auto status = device->CreateTexture2D(&t, &data, &tex_p); if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } img->texture.reset(tex_p); - D3D11_MAPPED_SUBRESOURCE img_info {}; - // map the texture simply to get the pitch and stride - status = device_ctx->Map(img->texture.get(), 0, D3D11_MAP_READ, 0, &img_info); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } + img->height = 1; + img->width = 1; + img->data = (std::uint8_t*)tex_p; + img->row_pitch = 4; + img->pixel_pitch = 4; - img->row_pitch = img_info.RowPitch; - img->height = 1; - img->width = 1; - img->data = (std::uint8_t*)img->texture.get(); - - device_ctx->Unmap(img->texture.get(), 0); return 0; } diff --git a/sunshine/video.cpp b/sunshine/video.cpp index bf0ff6c9..680b3d61 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -7,6 +7,7 @@ extern "C" { #include +#include } #include "platform/common.h" @@ -43,7 +44,9 @@ using sws_t = util::safe_ptr; using img_event_t = std::shared_ptr>>; void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame); + void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame); +util::Either nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx); struct encoder_t { struct option_t { @@ -58,8 +61,10 @@ struct encoder_t { } profile; AVHWDeviceType dev_type; + AVPixelFormat dev_pix_fmt; - AVPixelFormat pix_fmt; + AVPixelFormat static_pix_fmt; + AVPixelFormat dynamic_pix_fmt; struct { std::vector options; @@ -69,6 +74,7 @@ struct encoder_t { bool system_memory; std::function img_to_frame; + std::function(platf::hwdevice_ctx_t *hwdevice)> make_hwdevice_ctx; }; struct session_t { @@ -87,23 +93,24 @@ static encoder_t nvenc { { 2, 0, 1 }, AV_HWDEVICE_TYPE_D3D11VA, AV_PIX_FMT_D3D11, + AV_PIX_FMT_NV12, AV_PIX_FMT_NV12, { - { {"force-idr"s, 1} }, "nvenc_hevc"s + { {"force-idr"s, 1} }, "hevc_nvenc"s }, { - { {"force-idr"s, 1} }, "nvenc_h264"s + { {"force-idr"s, 1} }, "h264_nvenc"s }, false, - nv_d3d_img_to_frame - - // D3D11Device + nv_d3d_img_to_frame, + nv_d3d_make_hwdevice_ctx }; static encoder_t software { { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 }, AV_HWDEVICE_TYPE_NONE, AV_PIX_FMT_NONE, + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, { // x265's Info SEI is so long that it causes the IDR picture data to be // kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic. @@ -123,9 +130,8 @@ static encoder_t software { }, true, - sw_img_to_frame - - // nullptr + sw_img_to_frame, + nullptr }; static std::vector encoders { @@ -192,7 +198,9 @@ void captureThread( } } auto &dummy_img = imgs.front(); - disp->dummy_img(dummy_img.get(), dummy_data); + if(disp->dummy_img(dummy_img.get(), dummy_data)) { + return; + } auto next_frame = std::chrono::steady_clock::now(); while(capture_ctx_queue->running()) { @@ -257,7 +265,9 @@ void captureThread( return; } } - disp->dummy_img(dummy_img.get(), dummy_data); + if(disp->dummy_img(dummy_img.get(), dummy_data)) { + return; + } reinit_event.reset(); continue; @@ -292,13 +302,22 @@ void captureThread( } } -util::Either hwdevice_ctx(AVHWDeviceType type) { +util::Either hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx) { buffer_t ctx; - AVBufferRef *ref; - auto err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0); + int err; + if(hwdevice_ctx) { + ctx.reset(av_hwdevice_ctx_alloc(type)); + ((AVHWDeviceContext*)ctx.get())->hwctx = hwdevice_ctx; + + err = av_hwdevice_ctx_init(ctx.get()); + } + else { + AVBufferRef *ref {}; + err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0); + ctx.reset(ref); + } - ctx.reset(ref); if(err < 0) { return err; } @@ -314,7 +333,7 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { frame_ctx->sw_format = format; frame_ctx->height = ctx->height; frame_ctx->width = ctx->width; - frame_ctx->initial_pool_size = 20; + frame_ctx->initial_pool_size = 0; if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) { return err; @@ -331,7 +350,9 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets /* send the frame to the encoder */ auto ret = avcodec_send_frame(ctx.get(), frame.get()); if (ret < 0) { - BOOST_LOG(error) << "Could not send a frame for encoding"sv; + char err_str[AV_ERROR_MAX_STRING_SIZE] {0}; + BOOST_LOG(error) << "Could not send a frame for encoding: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, ret); + return -1; } @@ -375,7 +396,7 @@ void end_capture(capture_thread_ctx_t &capture_thread_ctx) { capture_thread_ctx.capture_thread.join(); } -std::optional make_session(const encoder_t &encoder, const config_t &config, void *device_ctx) { +std::optional make_session(const encoder_t &encoder, const config_t &config, platf::hwdevice_ctx_t *device_ctx) { bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE; auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc; @@ -387,21 +408,6 @@ std::optional make_session(const encoder_t &encoder, const config_t return std::nullopt; } - buffer_t hwdevice; - if(hardware) { - auto buf_or_error = hwdevice_ctx(encoder.dev_type); - if(buf_or_error.has_right()) { - auto err = buf_or_error.right(); - - char err_str[AV_ERROR_MAX_STRING_SIZE] {0}; - BOOST_LOG(error) << "Failed to create FFMpeg "sv << video_format.name << ": "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); - - return std::nullopt;; - } - - hwdevice = std::move(buf_or_error.left()); - } - ctx_t ctx {avcodec_alloc_context3(codec) }; ctx->width = config.width; ctx->height = config.height; @@ -463,21 +469,23 @@ std::optional make_session(const encoder_t &encoder, const config_t AVPixelFormat sw_fmt; if(config.dynamicRange == 0) { - sw_fmt = AV_PIX_FMT_YUV420P; + sw_fmt = encoder.static_pix_fmt; } else { - sw_fmt = AV_PIX_FMT_YUV420P10; + sw_fmt = encoder.dynamic_pix_fmt; } + buffer_t hwdevice; if(hardware) { - ctx->pix_fmt = encoder.pix_fmt; + ctx->pix_fmt = encoder.dev_pix_fmt; - ((AVHWFramesContext *)ctx->hw_frames_ctx->data)->device_ctx = (AVHWDeviceContext*)device_ctx; - - if(auto err = hwframe_ctx(ctx, hwdevice, sw_fmt); err < 0) { - char err_str[AV_ERROR_MAX_STRING_SIZE] {0}; - BOOST_LOG(error) << "Failed to initialize hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl; + auto buf_or_error = encoder.make_hwdevice_ctx(device_ctx); + if(buf_or_error.has_right()) { + return std::nullopt; + } + hwdevice = std::move(buf_or_error.left()); + if(hwframe_ctx(ctx, hwdevice, sw_fmt)) { return std::nullopt; } } @@ -516,9 +524,6 @@ std::optional make_session(const encoder_t &encoder, const config_t av_dict_set_int(&options, "qp", config::video.qp, 0); } - av_dict_set(&options, "preset", config::video.preset.c_str(), 0); - av_dict_set(&options, "tune", config::video.tune.c_str(), 0); - auto codec_handle = open_codec(ctx, codec, &options); frame_t frame {av_frame_alloc() }; @@ -528,15 +533,9 @@ std::optional make_session(const encoder_t &encoder, const config_t if(hardware) { - auto err = av_hwframe_get_buffer(ctx->hw_frames_ctx, frame.get(), 0); - if(err < 0) { - char err_str[AV_ERROR_MAX_STRING_SIZE] {0}; - BOOST_LOG(error) << "Coudn't create hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl; - - return std::nullopt; - } + frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); } - else { + else /* software */ { av_frame_get_buffer(frame.get(), 0); } @@ -562,9 +561,7 @@ void encode_run( const encoder_t &encoder, void *channel_data) { - void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr; - - auto session = make_session(encoder, config, hwdevice); + auto session = make_session(encoder, config, hwdevice_ctx); if(!session) { return; } @@ -626,6 +623,9 @@ void encode_run( } else { auto converted_img = hwdevice_ctx->convert(*img); + if(!converted_img) { + return; + } encoder.img_to_frame(sws, *converted_img, session->frame); @@ -657,6 +657,10 @@ void capture( void *channel_data) { auto images = std::make_shared(); + auto lg = util::fail_guard([&]() { + images->stop(); + shutdown_event->raise(true); + }); // Keep a reference counter to ensure the Fcapture thread only runs when other threads have a reference to the capture thread static auto capture_thread = safe::make_shared(start_capture, end_capture); @@ -689,12 +693,13 @@ void capture( auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10; hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt); + if(!hwdevice_ctx) { + return; + } } encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data); } - - images->stop(); } bool validate_config(const encoder_t &encoder, const config_t &config) { @@ -706,17 +711,20 @@ bool validate_config(const encoder_t &encoder, const config_t &config) { auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10; auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt); + if(!hwdevice_ctx) { + return false; + } - void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr; - - auto session = make_session(encoder, config, hwdevice); + auto session = make_session(encoder, config, hwdevice_ctx.get()); if(!session) { return false; } int dummy_data; auto img = disp->alloc_img(); - disp->dummy_img(img.get(), dummy_data); + if(disp->dummy_img(img.get(), dummy_data)) { + return false; + } sws_t sws; if(encoder.system_memory) { @@ -734,6 +742,9 @@ bool validate_config(const encoder_t &encoder, const config_t &config) { } else { auto converted_img = hwdevice_ctx->convert(*img); + if(!converted_img) { + return false; + } encoder.img_to_frame(sws, *converted_img, session->frame); } @@ -754,7 +765,7 @@ bool validate_encoder(const encoder_t &encoder) { 60, 1000, 1, - 1, + 0, 1, 0, 0 @@ -765,7 +776,7 @@ bool validate_encoder(const encoder_t &encoder) { 60, 1000, 1, - 1, + 0, 1, 1, 0 @@ -809,13 +820,39 @@ void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) { } void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) { + // Need to have something refcounted + if(!frame->buf[0]) { + frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor*)); + } + + auto desc = (AVD3D11FrameDescriptor*)frame->buf[0]->data; + desc->texture = (ID3D11Texture2D*)img.data; + desc->index = 0; + frame->data[0] = img.data; frame->data[1] = 0; frame->linesize[0] = img.row_pitch; - frame->linesize[1] = 0; frame->height = img.height; frame->width = img.width; } + +util::Either nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) { + buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) }; + auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx; + + std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0); + std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice); + + auto err = av_hwdevice_ctx_init(ctx_buf.get()); + if(err) { + char err_str[AV_ERROR_MAX_STRING_SIZE] {0}; + BOOST_LOG(error) << "Failed to create FFMpeg nvenc: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); + + return err; + } + + return ctx_buf; +} }