initialize nvenc

This commit is contained in:
loki 2020-04-06 23:15:03 +03:00
parent f2636b163e
commit afbca0f6cd
4 changed files with 129 additions and 86 deletions

@ -1 +1 @@
Subproject commit 51f776dbd4b2ead239a966406447d12f7e942636
Subproject commit afd9a9bbfc6ee1a064b0c1f9210bc20b2170c416

View File

@ -64,7 +64,7 @@ public:
};
struct hwdevice_ctx_t {
std::shared_ptr<void> hwdevice;
void *hwdevice {};
virtual const platf::img_t*const convert(platf::img_t &img) {
return nullptr;
@ -96,7 +96,7 @@ public:
}
virtual std::shared_ptr<hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) {
return nullptr;
return std::make_shared<hwdevice_ctx_t>();
}
virtual ~display_t() = default;

View File

@ -124,6 +124,8 @@ struct img_t : public ::platf::img_t {
struct img_d3d_t : public ::platf::img_t {
std::shared_ptr<platf::display_t> display;
texture2d_t texture;
~img_d3d_t() override = default;
};
struct cursor_t {
@ -308,11 +310,11 @@ public:
D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr };
auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed size and color conversion 0x["sv << util::hex(status).to_string_view() << ']';
BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
return nullptr;
}
return &img;
return &this->img;
}
int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
@ -364,7 +366,8 @@ public:
t.ArraySize = 1;
t.SampleDesc.Count = 1;
t.Usage = D3D11_USAGE_DEFAULT;
t.Format = DXGI_FORMAT_420_OPAQUE;
t.Format = DXGI_FORMAT_NV12;
t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER;
dxgi::texture2d_t::pointer tex_p {};
status = device_p->CreateTexture2D(&t, nullptr, &tex_p);
@ -377,19 +380,30 @@ public:
img.display = std::move(display);
img.width = out_width;
img.height = out_height;
img.data = (std::uint8_t*)tex_p;
img.row_pitch = out_width;
img.pixel_pitch = 1;
D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D };
D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 };
video::processor_out_t::pointer processor_out_p;
device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p);
status = device->CreateVideoProcessorOutputView(tex_p, processor_e.get(), &output_desc, &processor_out_p);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
processor_out.reset(processor_out_p);
device_p->AddRef();
hwdevice = device_p;
return 0;
}
~hwdevice_ctx_t() override {
if(hwdevice) {
((ID3D11Device*)hwdevice)->Release();
}
}
img_d3d_t img;
video::device_t device;
video::ctx_t ctx;
@ -837,25 +851,17 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
dxgi::texture2d_t::pointer tex_p {};
auto status = device->CreateTexture2D(&t, &data, &tex_p);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
img->texture.reset(tex_p);
D3D11_MAPPED_SUBRESOURCE img_info {};
// map the texture simply to get the pitch and stride
status = device_ctx->Map(img->texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
img->height = 1;
img->width = 1;
img->data = (std::uint8_t*)tex_p;
img->row_pitch = 4;
img->pixel_pitch = 4;
img->row_pitch = img_info.RowPitch;
img->height = 1;
img->width = 1;
img->data = (std::uint8_t*)img->texture.get();
device_ctx->Unmap(img->texture.get(), 0);
return 0;
}

View File

@ -7,6 +7,7 @@
extern "C" {
#include <libswscale/swscale.h>
#include <libavutil/hwcontext_d3d11va.h>
}
#include "platform/common.h"
@ -43,7 +44,9 @@ using sws_t = util::safe_ptr<SwsContext, sws_freeContext>;
using img_event_t = std::shared_ptr<safe::event_t<std::shared_ptr<platf::img_t>>>;
void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx);
struct encoder_t {
struct option_t {
@ -58,8 +61,10 @@ struct encoder_t {
} profile;
AVHWDeviceType dev_type;
AVPixelFormat dev_pix_fmt;
AVPixelFormat pix_fmt;
AVPixelFormat static_pix_fmt;
AVPixelFormat dynamic_pix_fmt;
struct {
std::vector<option_t> options;
@ -69,6 +74,7 @@ struct encoder_t {
bool system_memory;
std::function<void(sws_t &, const platf::img_t&, frame_t&)> img_to_frame;
std::function<util::Either<buffer_t, int>(platf::hwdevice_ctx_t *hwdevice)> make_hwdevice_ctx;
};
struct session_t {
@ -87,23 +93,24 @@ static encoder_t nvenc {
{ 2, 0, 1 },
AV_HWDEVICE_TYPE_D3D11VA,
AV_PIX_FMT_D3D11,
AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
{
{ {"force-idr"s, 1} }, "nvenc_hevc"s
{ {"force-idr"s, 1} }, "hevc_nvenc"s
},
{
{ {"force-idr"s, 1} }, "nvenc_h264"s
{ {"force-idr"s, 1} }, "h264_nvenc"s
},
false,
nv_d3d_img_to_frame
// D3D11Device
nv_d3d_img_to_frame,
nv_d3d_make_hwdevice_ctx
};
static encoder_t software {
{ FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 },
AV_HWDEVICE_TYPE_NONE,
AV_PIX_FMT_NONE,
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
{
// x265's Info SEI is so long that it causes the IDR picture data to be
// kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic.
@ -123,9 +130,8 @@ static encoder_t software {
},
true,
sw_img_to_frame
// nullptr
sw_img_to_frame,
nullptr
};
static std::vector<encoder_t> encoders {
@ -192,7 +198,9 @@ void captureThread(
}
}
auto &dummy_img = imgs.front();
disp->dummy_img(dummy_img.get(), dummy_data);
if(disp->dummy_img(dummy_img.get(), dummy_data)) {
return;
}
auto next_frame = std::chrono::steady_clock::now();
while(capture_ctx_queue->running()) {
@ -257,7 +265,9 @@ void captureThread(
return;
}
}
disp->dummy_img(dummy_img.get(), dummy_data);
if(disp->dummy_img(dummy_img.get(), dummy_data)) {
return;
}
reinit_event.reset();
continue;
@ -292,13 +302,22 @@ void captureThread(
}
}
util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type) {
util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx) {
buffer_t ctx;
AVBufferRef *ref;
auto err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
int err;
if(hwdevice_ctx) {
ctx.reset(av_hwdevice_ctx_alloc(type));
((AVHWDeviceContext*)ctx.get())->hwctx = hwdevice_ctx;
err = av_hwdevice_ctx_init(ctx.get());
}
else {
AVBufferRef *ref {};
err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
ctx.reset(ref);
}
ctx.reset(ref);
if(err < 0) {
return err;
}
@ -314,7 +333,7 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) {
frame_ctx->sw_format = format;
frame_ctx->height = ctx->height;
frame_ctx->width = ctx->width;
frame_ctx->initial_pool_size = 20;
frame_ctx->initial_pool_size = 0;
if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) {
return err;
@ -331,7 +350,9 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets
/* send the frame to the encoder */
auto ret = avcodec_send_frame(ctx.get(), frame.get());
if (ret < 0) {
BOOST_LOG(error) << "Could not send a frame for encoding"sv;
char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
BOOST_LOG(error) << "Could not send a frame for encoding: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, ret);
return -1;
}
@ -375,7 +396,7 @@ void end_capture(capture_thread_ctx_t &capture_thread_ctx) {
capture_thread_ctx.capture_thread.join();
}
std::optional<session_t> make_session(const encoder_t &encoder, const config_t &config, void *device_ctx) {
std::optional<session_t> make_session(const encoder_t &encoder, const config_t &config, platf::hwdevice_ctx_t *device_ctx) {
bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE;
auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc;
@ -387,21 +408,6 @@ std::optional<session_t> make_session(const encoder_t &encoder, const config_t
return std::nullopt;
}
buffer_t hwdevice;
if(hardware) {
auto buf_or_error = hwdevice_ctx(encoder.dev_type);
if(buf_or_error.has_right()) {
auto err = buf_or_error.right();
char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
BOOST_LOG(error) << "Failed to create FFMpeg "sv << video_format.name << ": "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return std::nullopt;;
}
hwdevice = std::move(buf_or_error.left());
}
ctx_t ctx {avcodec_alloc_context3(codec) };
ctx->width = config.width;
ctx->height = config.height;
@ -463,21 +469,23 @@ std::optional<session_t> make_session(const encoder_t &encoder, const config_t
AVPixelFormat sw_fmt;
if(config.dynamicRange == 0) {
sw_fmt = AV_PIX_FMT_YUV420P;
sw_fmt = encoder.static_pix_fmt;
}
else {
sw_fmt = AV_PIX_FMT_YUV420P10;
sw_fmt = encoder.dynamic_pix_fmt;
}
buffer_t hwdevice;
if(hardware) {
ctx->pix_fmt = encoder.pix_fmt;
ctx->pix_fmt = encoder.dev_pix_fmt;
((AVHWFramesContext *)ctx->hw_frames_ctx->data)->device_ctx = (AVHWDeviceContext*)device_ctx;
if(auto err = hwframe_ctx(ctx, hwdevice, sw_fmt); err < 0) {
char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
BOOST_LOG(error) << "Failed to initialize hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
auto buf_or_error = encoder.make_hwdevice_ctx(device_ctx);
if(buf_or_error.has_right()) {
return std::nullopt;
}
hwdevice = std::move(buf_or_error.left());
if(hwframe_ctx(ctx, hwdevice, sw_fmt)) {
return std::nullopt;
}
}
@ -516,9 +524,6 @@ std::optional<session_t> make_session(const encoder_t &encoder, const config_t
av_dict_set_int(&options, "qp", config::video.qp, 0);
}
av_dict_set(&options, "preset", config::video.preset.c_str(), 0);
av_dict_set(&options, "tune", config::video.tune.c_str(), 0);
auto codec_handle = open_codec(ctx, codec, &options);
frame_t frame {av_frame_alloc() };
@ -528,15 +533,9 @@ std::optional<session_t> make_session(const encoder_t &encoder, const config_t
if(hardware) {
auto err = av_hwframe_get_buffer(ctx->hw_frames_ctx, frame.get(), 0);
if(err < 0) {
char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
BOOST_LOG(error) << "Coudn't create hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
return std::nullopt;
}
frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
}
else {
else /* software */ {
av_frame_get_buffer(frame.get(), 0);
}
@ -562,9 +561,7 @@ void encode_run(
const encoder_t &encoder,
void *channel_data) {
void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
auto session = make_session(encoder, config, hwdevice);
auto session = make_session(encoder, config, hwdevice_ctx);
if(!session) {
return;
}
@ -626,6 +623,9 @@ void encode_run(
}
else {
auto converted_img = hwdevice_ctx->convert(*img);
if(!converted_img) {
return;
}
encoder.img_to_frame(sws, *converted_img, session->frame);
@ -657,6 +657,10 @@ void capture(
void *channel_data) {
auto images = std::make_shared<img_event_t::element_type>();
auto lg = util::fail_guard([&]() {
images->stop();
shutdown_event->raise(true);
});
// Keep a reference counter to ensure the Fcapture thread only runs when other threads have a reference to the capture thread
static auto capture_thread = safe::make_shared<capture_thread_ctx_t>(start_capture, end_capture);
@ -689,12 +693,13 @@ void capture(
auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt);
if(!hwdevice_ctx) {
return;
}
}
encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data);
}
images->stop();
}
bool validate_config(const encoder_t &encoder, const config_t &config) {
@ -706,17 +711,20 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {
auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
if(!hwdevice_ctx) {
return false;
}
void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
auto session = make_session(encoder, config, hwdevice);
auto session = make_session(encoder, config, hwdevice_ctx.get());
if(!session) {
return false;
}
int dummy_data;
auto img = disp->alloc_img();
disp->dummy_img(img.get(), dummy_data);
if(disp->dummy_img(img.get(), dummy_data)) {
return false;
}
sws_t sws;
if(encoder.system_memory) {
@ -734,6 +742,9 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {
}
else {
auto converted_img = hwdevice_ctx->convert(*img);
if(!converted_img) {
return false;
}
encoder.img_to_frame(sws, *converted_img, session->frame);
}
@ -754,7 +765,7 @@ bool validate_encoder(const encoder_t &encoder) {
60,
1000,
1,
1,
0,
1,
0,
0
@ -765,7 +776,7 @@ bool validate_encoder(const encoder_t &encoder) {
60,
1000,
1,
1,
0,
1,
1,
0
@ -809,13 +820,39 @@ void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
}
void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
// Need to have something refcounted
if(!frame->buf[0]) {
frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor*));
}
auto desc = (AVD3D11FrameDescriptor*)frame->buf[0]->data;
desc->texture = (ID3D11Texture2D*)img.data;
desc->index = 0;
frame->data[0] = img.data;
frame->data[1] = 0;
frame->linesize[0] = img.row_pitch;
frame->linesize[1] = 0;
frame->height = img.height;
frame->width = img.width;
}
util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) {
buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) };
auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx;
std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0);
std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice);
auto err = av_hwdevice_ctx_init(ctx_buf.get());
if(err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
BOOST_LOG(error) << "Failed to create FFMpeg nvenc: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return err;
}
return ctx_buf;
}
}