Prepare for hardware encoders

This commit is contained in:
loki 2020-03-25 10:51:32 +01:00
parent a9423574fe
commit 55705af922
6 changed files with 352 additions and 170 deletions

@ -327,8 +327,8 @@ int parse(int argc, char *argv[]) {
std::istreambuf_iterator<char>()
});
for(auto &var : cmd_vars) {
vars.emplace(std::move(var));
for(auto &[name,value] : cmd_vars) {
vars.insert_or_assign(std::move(name), std::move(value));
}
apply_config(std::move(vars));

@ -541,8 +541,6 @@ void launch(resp_https_t response, req_https_t request) {
response->write(data.str());
});
BOOST_LOG(fatal) << stream::session_count();
if(stream::session_count() == config::stream.channels) {
tree.put("root.resume", 0);
tree.put("root.<xmlattr>.status_code", 503);
@ -601,7 +599,6 @@ void resume(resp_https_t response, req_https_t request) {
// It is possible that due a race condition that this if-statement gives a false negative,
// that is automatically resolved in rtsp_server_t
if(stream::session_count() == config::stream.channels) {
BOOST_LOG(fatal) << stream::session_count();
tree.put("root.resume", 0);
tree.put("root.<xmlattr>.status_code", 503);

@ -68,7 +68,7 @@ enum class capture_e : int {
class display_t {
public:
virtual capture_e snapshot(img_t *img, bool cursor) = 0;
virtual std::unique_ptr<img_t> alloc_img() = 0;
virtual std::shared_ptr<img_t> alloc_img() = 0;
virtual ~display_t() = default;
};

@ -176,8 +176,8 @@ struct x11_attr_t : public display_t {
return capture_e::ok;
}
std::unique_ptr<img_t> alloc_img() override {
return std::make_unique<x11_img_t>();
std::shared_ptr<img_t> alloc_img() override {
return std::make_shared<x11_img_t>();
}
xdisplay_t xdisplay;
@ -251,8 +251,8 @@ struct shm_attr_t : public x11_attr_t {
return capture_e::ok;
}
std::unique_ptr<img_t> alloc_img() override {
return std::make_unique<shm_img_t>();
std::shared_ptr<img_t> alloc_img() override {
return std::make_shared<shm_img_t>();
}
int init() {

@ -339,8 +339,8 @@ public:
return capture_e::ok;
}
std::unique_ptr<::platf::img_t> alloc_img() override {
auto img = std::make_unique<img_t>();
std::shared_ptr<::platf::img_t> alloc_img() override {
auto img = std::make_shared<img_t>();
img->data = nullptr;
img->height = 0;

@ -26,15 +26,102 @@ void free_frame(AVFrame *frame) {
av_frame_free(&frame);
}
void free_buffer(AVBufferRef *ref) {
av_buffer_unref(&ref);
}
void free_packet(AVPacket *packet) {
av_packet_free(&packet);
}
using ctx_t = util::safe_ptr<AVCodecContext, free_ctx>;
using codec_t = util::safe_ptr_v2<AVCodecContext, int, avcodec_close>;
using frame_t = util::safe_ptr<AVFrame, free_frame>;
using buffer_t = util::safe_ptr<AVBufferRef, free_buffer>;
using sws_t = util::safe_ptr<SwsContext, sws_freeContext>;
using img_event_t = std::shared_ptr<safe::event_t<std::shared_ptr<platf::img_t>>>;
void sw_img_to_frame(sws_t &sws, platf::img_t &img, frame_t &frame);
struct encoder_t {
struct option_t {
std::string name;
util::Either<std::int64_t, std::string> value;
};
struct {
int h264_high;
int hevc_main;
int hevc_main_10;
} profile;
AVHWDeviceType dev_type;
AVPixelFormat pix_fmt;
struct {
std::vector<option_t> options;
std::string name;
} hevc, h264;
bool system_memory;
std::function<void(sws_t &, platf::img_t&, frame_t&)> img_to_frame;
};
struct session_t {
buffer_t hwdevice;
ctx_t ctx;
codec_t codec_handle;
frame_t frame;
int sws_color_format;
};
static encoder_t nvenc {
{ 2, 0, 1 },
AV_HWDEVICE_TYPE_D3D11VA,
AV_PIX_FMT_D3D11,
{
{ {"force-idr"s, 1} }, "nvenc_hevc"s
},
{
{ {"force-idr"s, 1} }, "nvenc_h264"s
},
false,
nullptr
// D3D11Device
};
static encoder_t software {
{ FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 },
AV_HWDEVICE_TYPE_NONE,
AV_PIX_FMT_NONE,
{
// x265's Info SEI is so long that it causes the IDR picture data to be
// kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic.
// It also looks like gop_size isn't passed on to x265, so we have to set
// 'keyint=-1' in the parameters ourselves.
{{ "x265-params"s, "info=0:keyint=-1"s }}, "libx265"s
},
{
{{}}, "libx264"s
},
true,
sw_img_to_frame
// nullptr
};
static std::vector<encoder_t> encoders {
nvenc, software
};
struct capture_ctx_t {
img_event_t images;
std::chrono::nanoseconds delay;
@ -45,12 +132,10 @@ struct capture_thread_ctx_t {
std::thread capture_thread;
};
[[nodiscard]] auto open_codec(ctx_t &ctx, AVCodec *codec, AVDictionary **options) {
[[nodiscard]] codec_t open_codec(ctx_t &ctx, AVCodec *codec, AVDictionary **options) {
avcodec_open2(ctx.get(), codec, options);
return util::fail_guard([&]() {
avcodec_close(ctx.get());
});
return codec_t { ctx.get() };
}
int capture_display(platf::img_t *img, std::unique_ptr<platf::display_t> &disp) {
@ -140,24 +225,61 @@ void captureThread(std::shared_ptr<safe::queue_t<capture_ctx_t>> capture_ctx_que
}
}
void encode(int64_t frame, ctx_t &ctx, sws_t &sws, frame_t &yuv_frame, platf::img_t &img, packet_queue_t &packets, void *channel_data) {
av_frame_make_writable(yuv_frame.get());
util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type) {
buffer_t ctx;
AVBufferRef *ref;
auto err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
// auto err = av_hwdevice_ctx_create(&ref, type, "/dev/dri/renderD129", nullptr, 0);
ctx.reset(ref);
if(err < 0) {
return err;
}
return ctx;
}
int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) {
buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice.get())};
auto frame_ctx = (AVHWFramesContext*)frame_ref->data;
frame_ctx->format = ctx->pix_fmt;
frame_ctx->sw_format = format;
frame_ctx->height = ctx->height;
frame_ctx->width = ctx->width;
frame_ctx->initial_pool_size = 20;
if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) {
return err;
}
ctx->hw_frames_ctx = av_buffer_ref(frame_ref.get());
return 0;
}
void sw_img_to_frame(sws_t &sws, platf::img_t &img, frame_t &frame) {
av_frame_make_writable(frame.get());
const int linesizes[2] {
img.row_pitch, 0
};
auto data = img.data;
int ret = sws_scale(sws.get(), (uint8_t*const*)&data, linesizes, 0, img.height, yuv_frame->data, yuv_frame->linesize);
int ret = sws_scale(sws.get(), (std::uint8_t*const*)&img.data, linesizes, 0, img.height, frame->data, frame->linesize);
if(ret <= 0) {
exit(1);
}
BOOST_LOG(fatal) << "Couldn't convert image to required format and/or size"sv;
yuv_frame->pts = frame;
log_flush();
std::abort();
}
}
void encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets, void *channel_data) {
frame->pts = frame_nr;
/* send the frame to the encoder */
ret = avcodec_send_frame(ctx.get(), yuv_frame.get());
auto ret = avcodec_send_frame(ctx.get(), frame.get());
if (ret < 0) {
BOOST_LOG(fatal) << "Could not send a frame for encoding"sv;
log_flush();
@ -197,6 +319,180 @@ void end_capture(capture_thread_ctx_t &capture_thread_ctx) {
capture_thread_ctx.capture_thread.join();
}
std::optional<session_t> make_session(const encoder_t &encoder, const config_t &config, void *device_ctx) {
bool hardware = device_ctx;
auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc;
auto codec = avcodec_find_encoder_by_name(video_format.name.c_str());
if(!codec) {
BOOST_LOG(error) << "Couldn't open ["sv << video_format.name << ']';
return std::nullopt;
}
buffer_t hwdevice;
if(hardware) {
auto buf_or_error = hwdevice_ctx(encoder.dev_type);
if(buf_or_error.has_right()) {
auto err = buf_or_error.right();
char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
BOOST_LOG(error) << "Failed to create FFMpeg "sv << video_format.name << ": "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return std::nullopt;;
}
hwdevice = std::move(buf_or_error.left());
}
ctx_t ctx {avcodec_alloc_context3(codec) };
ctx->width = config.width;
ctx->height = config.height;
ctx->time_base = AVRational{1, config.framerate};
ctx->framerate = AVRational{config.framerate, 1};
if(config.videoFormat == 0) {
ctx->profile = encoder.profile.h264_high;
}
else if(config.dynamicRange == 0) {
ctx->profile = encoder.profile.hevc_main;
}
else {
ctx->profile = encoder.profile.hevc_main_10;
}
// B-frames delay decoder output, so never use them
ctx->max_b_frames = 0;
// Use an infinite GOP length since I-frames are generated on demand
ctx->gop_size = std::numeric_limits<int>::max();
ctx->keyint_min = ctx->gop_size;
// Some client decoders have limits on the number of reference frames
ctx->refs = config.numRefFrames;
ctx->flags |= (AV_CODEC_FLAG_CLOSED_GOP | AV_CODEC_FLAG_LOW_DELAY);
ctx->flags2 |= AV_CODEC_FLAG2_FAST;
ctx->color_range = (config.encoderCscMode & 0x1) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
int sws_color_space;
switch (config.encoderCscMode >> 1) {
case 0:
default:
// Rec. 601
ctx->color_primaries = AVCOL_PRI_SMPTE170M;
ctx->color_trc = AVCOL_TRC_SMPTE170M;
ctx->colorspace = AVCOL_SPC_SMPTE170M;
sws_color_space = SWS_CS_SMPTE170M;
break;
case 1:
// Rec. 709
ctx->color_primaries = AVCOL_PRI_BT709;
ctx->color_trc = AVCOL_TRC_BT709;
ctx->colorspace = AVCOL_SPC_BT709;
sws_color_space = SWS_CS_ITU709;
break;
case 2:
// Rec. 2020
ctx->color_primaries = AVCOL_PRI_BT2020;
ctx->color_trc = AVCOL_TRC_BT2020_10;
ctx->colorspace = AVCOL_SPC_BT2020_NCL;
sws_color_space = SWS_CS_BT2020;
break;
}
AVPixelFormat src_fmt;
if(config.dynamicRange == 0) {
src_fmt = AV_PIX_FMT_YUV420P;
}
else {
src_fmt = AV_PIX_FMT_YUV420P10;
}
if(hardware) {
ctx->pix_fmt = encoder.pix_fmt;
((AVHWFramesContext *)ctx->hw_frames_ctx->data)->device_ctx = (AVHWDeviceContext*)device_ctx;
if(auto err = hwframe_ctx(ctx, hwdevice, src_fmt); err < 0) {
char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
BOOST_LOG(error) << "Failed to initialize hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
return std::nullopt;
}
}
else /* software */ {
ctx->pix_fmt = src_fmt;
// Clients will request for the fewest slices per frame to get the
// most efficient encode, but we may want to provide more slices than
// requested to ensure we have enough parallelism for good performance.
ctx->slices = std::max(config.slicesPerFrame, config::video.min_threads);
ctx->thread_type = FF_THREAD_SLICE;
ctx->thread_count = ctx->slices;
}
AVDictionary *options {nullptr};
for(auto &option : video_format.options) {
if(option.value.has_left()) {
av_dict_set_int(&options, option.name.c_str(), option.value.left(), 0);
}
else {
av_dict_set(&options, option.name.c_str(), option.value.right().c_str(), 0);
}
}
if(config.bitrate > 500) {
auto bitrate = config.bitrate * 1000;
ctx->rc_max_rate = bitrate;
ctx->rc_buffer_size = bitrate / 100;
ctx->bit_rate = bitrate;
ctx->rc_min_rate = bitrate;
}
else if(config::video.crf != 0) {
av_dict_set_int(&options, "crf", config::video.crf, 0);
}
else {
av_dict_set_int(&options, "qp", config::video.qp, 0);
}
av_dict_set(&options, "preset", config::video.preset.c_str(), 0);
av_dict_set(&options, "tune", config::video.tune.c_str(), 0);
auto codec_handle = open_codec(ctx, codec, &options);
frame_t frame {av_frame_alloc() };
frame->format = ctx->pix_fmt;
frame->width = ctx->width;
frame->height = ctx->height;
if(config.videoFormat == 1) {
auto err = av_hwframe_get_buffer(ctx->hw_frames_ctx, frame.get(), 0);
if(err < 0) {
char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
BOOST_LOG(error) << "Coudn't create hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
return std::nullopt;
}
}
else {
av_frame_get_buffer(frame.get(), 0);
}
return std::make_optional(session_t {
std::move(hwdevice),
std::move(ctx),
std::move(codec_handle),
std::move(frame),
sws_color_space
});
}
void capture(
safe::signal_t *shutdown_event,
packet_queue_t packets,
@ -204,6 +500,11 @@ void capture(
config_t config,
void *channel_data) {
auto session = make_session(software, config, nullptr);
if(!session) {
return;
}
int framerate = config.framerate;
auto images = std::make_shared<img_event_t::element_type>();
@ -223,127 +524,8 @@ void capture(
return;
}
AVCodec *codec;
if(config.videoFormat == 0) {
codec = avcodec_find_encoder(AV_CODEC_ID_H264);
}
else {
codec = avcodec_find_encoder(AV_CODEC_ID_HEVC);
}
ctx_t ctx{avcodec_alloc_context3(codec)};
frame_t yuv_frame{av_frame_alloc()};
ctx->width = config.width;
ctx->height = config.height;
ctx->time_base = AVRational{1, framerate};
ctx->framerate = AVRational{framerate, 1};
if(config.videoFormat == 0) {
ctx->profile = FF_PROFILE_H264_HIGH;
}
else if(config.dynamicRange == 0) {
ctx->profile = FF_PROFILE_HEVC_MAIN;
}
else {
ctx->profile = FF_PROFILE_HEVC_MAIN_10;
}
if(config.dynamicRange == 0) {
ctx->pix_fmt = AV_PIX_FMT_YUV420P;
}
else {
ctx->pix_fmt = AV_PIX_FMT_YUV420P10;
}
ctx->color_range = (config.encoderCscMode & 0x1) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
int swsColorSpace;
switch (config.encoderCscMode >> 1) {
case 0:
default:
// Rec. 601
ctx->color_primaries = AVCOL_PRI_SMPTE170M;
ctx->color_trc = AVCOL_TRC_SMPTE170M;
ctx->colorspace = AVCOL_SPC_SMPTE170M;
swsColorSpace = SWS_CS_SMPTE170M;
break;
case 1:
// Rec. 709
ctx->color_primaries = AVCOL_PRI_BT709;
ctx->color_trc = AVCOL_TRC_BT709;
ctx->colorspace = AVCOL_SPC_BT709;
swsColorSpace = SWS_CS_ITU709;
break;
case 2:
// Rec. 2020
ctx->color_primaries = AVCOL_PRI_BT2020;
ctx->color_trc = AVCOL_TRC_BT2020_10;
ctx->colorspace = AVCOL_SPC_BT2020_NCL;
swsColorSpace = SWS_CS_BT2020;
break;
}
// B-frames delay decoder output, so never use them
ctx->max_b_frames = 0;
// Use an infinite GOP length since I-frames are generated on demand
ctx->gop_size = std::numeric_limits<int>::max();
ctx->keyint_min = ctx->gop_size;
// Some client decoders have limits on the number of reference frames
ctx->refs = config.numRefFrames;
// Clients will request for the fewest slices per frame to get the
// most efficient encode, but we may want to provide more slices than
// requested to ensure we have enough parallelism for good performance.
ctx->slices = std::max(config.slicesPerFrame, config::video.min_threads);
ctx->thread_type = FF_THREAD_SLICE;
ctx->thread_count = ctx->slices;
AVDictionary *options {nullptr};
av_dict_set(&options, "preset", config::video.preset.c_str(), 0);
av_dict_set(&options, "tune", config::video.tune.c_str(), 0);
if(config.bitrate > 500) {
config.bitrate *= 1000;
ctx->rc_max_rate = config.bitrate;
ctx->rc_buffer_size = config.bitrate / 100;
ctx->bit_rate = config.bitrate;
ctx->rc_min_rate = config.bitrate;
}
else if(config::video.crf != 0) {
av_dict_set_int(&options, "crf", config::video.crf, 0);
}
else {
av_dict_set_int(&options, "qp", config::video.qp, 0);
}
if(config.videoFormat == 1) {
// x265's Info SEI is so long that it causes the IDR picture data to be
// kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic.
// It also looks like gop_size isn't passed on to x265, so we have to set
// 'keyint=-1' in the parameters ourselves.
av_dict_set(&options, "x265-params", "info=0:keyint=-1", 0);
}
ctx->flags |= (AV_CODEC_FLAG_CLOSED_GOP | AV_CODEC_FLAG_LOW_DELAY);
ctx->flags2 |= AV_CODEC_FLAG2_FAST;
auto lg = open_codec(ctx, codec, &options);
yuv_frame->format = ctx->pix_fmt;
yuv_frame->width = ctx->width;
yuv_frame->height = ctx->height;
av_frame_get_buffer(yuv_frame.get(), 0);
int64_t frame = 1;
int64_t key_frame = 1;
int64_t frame_nr = 1;
int64_t key_frame_nr = 1;
auto img_width = 0;
auto img_height = 0;
@ -367,23 +549,23 @@ void capture(
}
if(idr_events->peek()) {
yuv_frame->pict_type = AV_PICTURE_TYPE_I;
session->frame->pict_type = AV_PICTURE_TYPE_I;
auto event = idr_events->pop();
TUPLE_2D_REF(_, end, *event);
frame = end;
key_frame = end + config.framerate;
frame_nr = end;
key_frame_nr = end + config.framerate;
}
else if(frame == key_frame) {
yuv_frame->pict_type = AV_PICTURE_TYPE_I;
else if(frame_nr == key_frame_nr) {
session->frame->pict_type = AV_PICTURE_TYPE_I;
}
std::this_thread::sleep_until(next_frame);
next_frame += delay;
// When Moonlight request an IDR frame, send frames even if there is no new captured frame
if(frame > (key_frame + config.framerate) || images->peek()) {
if(frame_nr > (key_frame_nr + config.framerate) || images->peek()) {
if(auto tmp_img = images->pop(delay)) {
img = std::move(tmp_img);
}
@ -395,29 +577,32 @@ void capture(
}
}
if(software.system_memory) {
auto new_width = img->width;
auto new_height = img->height;
auto new_width = img->width;
auto new_height = img->height;
if(img_width != new_width || img_height != new_height) {
img_width = new_width;
img_height = new_height;
if(img_width != new_width || img_height != new_height) {
img_width = new_width;
img_height = new_height;
sws.reset(
sws_getContext(
img_width, img_height, AV_PIX_FMT_BGR0,
session->ctx->width, session->ctx->height, session->ctx->pix_fmt,
SWS_LANCZOS | SWS_ACCURATE_RND,
nullptr, nullptr, nullptr));
sws.reset(
sws_getContext(
img_width, img_height, AV_PIX_FMT_BGR0,
ctx->width, ctx->height, ctx->pix_fmt,
SWS_LANCZOS | SWS_ACCURATE_RND,
nullptr, nullptr, nullptr));
sws_setColorspaceDetails(sws.get(), sws_getCoefficients(SWS_CS_DEFAULT), 0,
sws_getCoefficients(swsColorSpace), config.encoderCscMode & 0x1,
0, 1 << 16, 1 << 16);
sws_setColorspaceDetails(sws.get(), sws_getCoefficients(SWS_CS_DEFAULT), 0,
sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1,
0, 1 << 16, 1 << 16);
}
}
encode(frame++, ctx, sws, yuv_frame, *img, packets, channel_data);
yuv_frame->pict_type = AV_PICTURE_TYPE_NONE;
software.img_to_frame(sws, *img, session->frame);
encode(frame_nr++, session->ctx, session->frame, packets, channel_data);
session->frame->pict_type = AV_PICTURE_TYPE_NONE;
}
images->stop();