Fix multicasting for nvenc

This commit is contained in:
loki 2020-04-12 02:33:17 +03:00
parent 7edaa0cce0
commit 679f74e53c
4 changed files with 332 additions and 94 deletions

View File

@ -29,9 +29,17 @@ constexpr std::uint16_t B = 0x2000;
constexpr std::uint16_t X = 0x4000;
constexpr std::uint16_t Y = 0x8000;
enum class dev_type_e {
none,
dxgi,
unknown
};
enum class pix_fmt_e {
yuv420p,
yuv420p10
yuv420p10,
nv12,
unknown
};
struct gamepad_state_t {
@ -114,7 +122,7 @@ std::string from_sockaddr(const sockaddr *const);
std::pair<std::uint16_t, std::string> from_sockaddr_ex(const sockaddr *const);
std::unique_ptr<mic_t> microphone(std::uint32_t sample_rate);
std::shared_ptr<display_t> display(int hwdevice_type);
std::shared_ptr<display_t> display(dev_type_e hwdevice_type);
input_t input();
void move_mouse(input_t &input, int deltaX, int deltaY);

View File

@ -38,6 +38,7 @@ using output1_t = util::safe_ptr<IDXGIOutput1, Release<IDXGIOutput1>>;
using dup_t = util::safe_ptr<IDXGIOutputDuplication, Release<IDXGIOutputDuplication>>;
using texture2d_t = util::safe_ptr<ID3D11Texture2D, Release<ID3D11Texture2D>>;
using resource_t = util::safe_ptr<IDXGIResource, Release<IDXGIResource>>;
using multithread_t = util::safe_ptr<ID3D11Multithread, Release<ID3D11Multithread>>;
namespace video {
using device_t = util::safe_ptr<ID3D11VideoDevice, Release<ID3D11VideoDevice>>;
@ -894,6 +895,12 @@ public:
}
std::shared_ptr<platf::hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override {
if(pix_fmt != platf::pix_fmt_e::nv12) {
BOOST_LOG(error) << "display_gpu_t doesn't support pixel format ["sv << (int)pix_fmt << ']';
return nullptr;
}
auto hwdevice = std::make_shared<hwdevice_ctx_t>();
auto ret = hwdevice->init(
@ -909,6 +916,24 @@ public:
return hwdevice;
}
int init() {
if(display_base_t::init()) {
return -1;
}
multithread_t::pointer multithread_p {};
auto status = device->QueryInterface(__uuidof(multithread_t::element_type), (void**)&multithread_p);
multithread_t multithread { multithread_p };
if(FAILED(status)) {
BOOST_LOG(error) << "Couldn't query Multithread interface [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
multithread->SetMultithreadProtected(true);
return 0;
}
};
const char *format_str[] = {
@ -1039,15 +1064,15 @@ const char *format_str[] = {
}
namespace platf {
std::shared_ptr<display_t> display(int hwdevice_type) {
if(hwdevice_type == AV_HWDEVICE_TYPE_D3D11VA) {
std::shared_ptr<display_t> display(platf::dev_type_e hwdevice_type) {
if(hwdevice_type == platf::dev_type_e::dxgi) {
auto disp = std::make_shared<dxgi::display_gpu_t>();
if(!disp->init()) {
return disp;
}
}
else {
else if(hwdevice_type == platf::dev_type_e::none) {
auto disp = std::make_shared<dxgi::display_cpu_t>();
if(!disp->init()) {

View File

@ -33,7 +33,7 @@ public:
// pop and view shoud not be used interchangebly
status_t pop() {
std::unique_lock ul{_lock};
std::unique_lock ul{ _lock };
if (!_continue) {
return util::false_v<status_t>;
@ -55,7 +55,7 @@ public:
// pop and view shoud not be used interchangebly
template<class Rep, class Period>
status_t pop(std::chrono::duration<Rep, Period> delay) {
std::unique_lock ul{_lock};
std::unique_lock ul{ _lock };
if (!_continue) {
return util::false_v<status_t>;
@ -74,7 +74,7 @@ public:
// pop and view shoud not be used interchangebly
const status_t &view() {
std::unique_lock ul{_lock};
std::unique_lock ul{ _lock };
if (!_continue) {
return util::false_v<status_t>;
@ -98,7 +98,7 @@ public:
}
void stop() {
std::lock_guard lg{_lock};
std::lock_guard lg{ _lock };
_continue = false;
@ -106,7 +106,7 @@ public:
}
void reset() {
std::lock_guard lg{_lock};
std::lock_guard lg{ _lock };
_continue = true;
@ -118,8 +118,8 @@ public:
}
private:
bool _continue{true};
status_t _status;
bool _continue { true };
status_t _status { util::false_v<status_t> };
std::condition_variable _cv;
std::mutex _lock;
@ -170,7 +170,7 @@ public:
}
status_t pop() {
std::unique_lock ul{_lock};
std::unique_lock ul{ _lock };
if (!_continue) {
return util::false_v<status_t>;
@ -191,11 +191,12 @@ public:
}
std::vector<T> &unsafe() {
std::lock_guard { _lock };
return _queue;
}
void stop() {
std::lock_guard lg{_lock};
std::lock_guard lg{ _lock };
_continue = false;
@ -208,7 +209,7 @@ public:
private:
bool _continue{true};
bool _continue{ true };
std::mutex _lock;
std::condition_variable _cv;
@ -274,9 +275,8 @@ public:
void release() {
std::lock_guard lg { owner->_lock };
auto c = owner->_count.fetch_sub(1, std::memory_order_acquire);
if(c - 1 == 0) {
if(!--owner->_count) {
owner->_destruct(*get());
(*this)->~element_type();
}
@ -296,10 +296,9 @@ public:
template<class FC, class FD>
shared_t(FC && fc, FD &&fd) : _construct { std::forward<FC>(fc) }, _destruct { std::forward<FD>(fd) } {}
[[nodiscard]] ptr_t ref() {
auto c = _count.fetch_add(1, std::memory_order_acquire);
if(!c) {
std::lock_guard lg { _lock };
std::lock_guard lg { _lock };
if(!_count++) {
new(_object_buf.data()) element_type;
if(_construct(*reinterpret_cast<element_type*>(_object_buf.data()))) {
return ptr_t { nullptr };
@ -314,7 +313,7 @@ private:
std::array<std::uint8_t, sizeof(element_type)> _object_buf;
std::atomic<std::uint32_t> _count;
std::uint32_t _count;
std::mutex _lock;
};

View File

@ -37,6 +37,36 @@ void free_packet(AVPacket *packet) {
av_packet_free(&packet);
}
namespace nv {
enum class preset_e : int {
_default = 0,
slow,
medium,
fast,
hp,
hq,
bd,
ll_default,
llhq,
llhp,
lossless_default, // lossless presets must be the last ones
lossless_hp,
};
enum class profile_h264_e : int {
baseline,
main,
high,
high_444p,
};
enum class profile_hevc_e : int {
main,
main_10,
rext,
};
}
using ctx_t = util::safe_ptr<AVCodecContext, free_ctx>;
using frame_t = util::safe_ptr<AVFrame, free_frame>;
using buffer_t = util::safe_ptr<AVBufferRef, free_buffer>;
@ -104,8 +134,41 @@ struct session_t {
int sws_color_format;
};
struct encode_session_ctx_t {
safe::signal_t *shutdown_event;
safe::signal_t *join_event;
packet_queue_t packets;
idr_event_t idr_events;
config_t config;
int frame_nr;
int key_frame_nr;
void *channel_data;
};
struct encode_session_t {
encode_session_ctx_t *ctx;
std::chrono::steady_clock::time_point next_frame;
std::chrono::milliseconds delay;
platf::img_t *img_tmp;
std::shared_ptr<platf::hwdevice_ctx_t> hwdevice;
session_t session;
};
using encode_session_ctx_queue_t = safe::queue_t<encode_session_ctx_t>;
using encode_e = platf::capture_e;
struct capture_synced_ctx_t {
encode_session_ctx_queue_t encode_session_ctx_queue;
};
int start_capture_sync(capture_synced_ctx_t &ctx);
void end_capture_sync(capture_synced_ctx_t &ctx);
auto capture_thread_sync = safe::make_shared<capture_synced_ctx_t>(start_capture_sync, end_capture_sync);
static encoder_t nvenc {
{ 2, 0, 1 },
{ (int)nv::profile_h264_e::high, (int)nv::profile_hevc_e::main, (int)nv::profile_hevc_e::main_10 },
AV_HWDEVICE_TYPE_D3D11VA,
AV_PIX_FMT_D3D11,
AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
@ -115,9 +178,7 @@ static encoder_t nvenc {
{
{
{ "forced-idr"s, 1},
{ "profile"s, "high"s },
{ "preset"s , "llhp" },
{ "rc"s, "cbr_ld_hq"s },
{ "preset"s , (int)nv::preset_e::llhq },
}, "h264_nvenc"s
},
false,
@ -172,11 +233,39 @@ struct capture_thread_ctx_t {
util::sync_t<std::weak_ptr<platf::display_t>> display_wp;
};
platf::dev_type_e map_dev_type(AVHWDeviceType type) {
switch(type) {
case AV_HWDEVICE_TYPE_D3D11VA:
return platf::dev_type_e::dxgi;
case AV_PICTURE_TYPE_NONE:
return platf::dev_type_e::none;
default:
return platf::dev_type_e::unknown;
}
return platf::dev_type_e::unknown;
}
platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt) {
switch(fmt) {
case AV_PIX_FMT_YUV420P10:
return platf::pix_fmt_e::yuv420p10;
case AV_PIX_FMT_YUV420P:
return platf::pix_fmt_e::yuv420p;
case AV_PIX_FMT_NV12:
return platf::pix_fmt_e::nv12;
default:
return platf::pix_fmt_e::unknown;
}
return platf::pix_fmt_e::unknown;
}
void reset_display(std::shared_ptr<platf::display_t> &disp, AVHWDeviceType type) {
// We try this twice, in case we still get an error on reinitialization
for(int x = 0; x < 2; ++x) {
disp.reset();
disp = platf::display(type);
disp = platf::display(map_dev_type(type));
if(disp) {
break;
}
@ -207,7 +296,7 @@ void captureThread(
std::chrono::nanoseconds delay = 1s;
auto disp = platf::display(encoder.dev_type);
auto disp = platf::display(map_dev_type(encoder.dev_type));
if(!disp) {
return;
}
@ -585,7 +674,6 @@ void encode_run(
if(!session) {
return;
}
hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
auto delay = std::chrono::floor<std::chrono::nanoseconds>(1s) / config.framerate;
@ -671,47 +759,41 @@ void encode_run(
}
}
void capture(
safe::signal_t *shutdown_event,
packet_queue_t packets,
idr_event_t idr_events,
config_t config,
void *channel_data) {
std::optional<encode_session_t> make_session_from_ctx(platf::display_t *disp, const encoder_t &encoder, platf::img_t &img, encode_session_ctx_t &ctx) {
encode_session_t encode_session;
auto lg = util::fail_guard([&]() {
shutdown_event->raise(true);
});
encode_session.ctx = &ctx;
encode_session.next_frame = std::chrono::steady_clock::now();
const auto &encoder = encoders.front();
auto disp = platf::display(encoder.dev_type);
if(!disp) {
return;
}
encode_session.delay = 1000ms / ctx.config.framerate;
auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
auto pix_fmt = ctx.config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt);
auto hwdevice_ctx = disp->make_hwdevice_ctx(ctx.config.width, ctx.config.height, pix_fmt);
if(!hwdevice_ctx) {
return;
return std::nullopt;
}
auto session = make_session(encoder, config, hwdevice_ctx.get());
auto session = make_session(encoder, ctx.config, hwdevice_ctx.get());
if(!session) {
return;
return std::nullopt;
}
hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
auto img = disp->alloc_img();
if(disp->dummy_img(img.get())) {
return;
}
encode_session.img_tmp = &img;
encode_session.hwdevice = std::move(hwdevice_ctx);
encode_session.session = std::move(*session);
const platf::img_t* img_p = hwdevice_ctx->convert(*img);
if(!img_p) {
return;
}
return std::move(encode_session);
}
sws_t sws;
encoder.img_to_frame(sws, *img_p, session->frame);
encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &encode_session_ctxs, encode_session_ctx_queue_t &encode_session_ctx_queue) {
const auto &encoder = encoders.front();
std::shared_ptr<platf::display_t> disp;
reset_display(disp, encoder.dev_type);
if(!disp) {
return encode_e::error;
}
std::vector<std::shared_ptr<platf::img_t>> imgs(12);
for(auto &img : imgs) {
@ -719,26 +801,41 @@ void capture(
}
auto round_robin = util::make_round_robin<std::shared_ptr<platf::img_t>>(std::begin(imgs), std::end(imgs));
int frame_nr = 1;
int key_frame_nr = 1;
auto max_delay = 1000ms / config.framerate;
std::shared_ptr<platf::img_t> img_tmp;
auto next_frame = std::chrono::steady_clock::now();
while(!shutdown_event->peek()) {
if(idr_events->peek()) {
session->frame->pict_type = AV_PICTURE_TYPE_I;
auto dummy_img = disp->alloc_img();
auto img_tmp = dummy_img.get();
if(disp->dummy_img(img_tmp)) {
return encode_e::error;
}
auto event = idr_events->pop();
TUPLE_2D_REF(_, end, *event);
frame_nr = end;
key_frame_nr = end + config.framerate;
std::vector<encode_session_t> encode_sessions;
for(auto &ctx : encode_session_ctxs) {
auto encode_session = make_session_from_ctx(disp.get(), encoder, *dummy_img, *ctx);
if(!encode_session) {
return encode_e::error;
}
else if(frame_nr == key_frame_nr) {
session->frame->pict_type = AV_PICTURE_TYPE_I;
encode_sessions.emplace_back(std::move(*encode_session));
}
auto next_frame = std::chrono::steady_clock::now();
while(encode_session_ctx_queue.running()) {
while(encode_session_ctx_queue.peek()) {
auto encode_session_ctx = encode_session_ctx_queue.pop();
if(!encode_session_ctx) {
return encode_e::ok;
}
encode_session_ctxs.emplace_back(std::make_unique<encode_session_ctx_t>(std::move(*encode_session_ctx)));
auto encode_session = make_session_from_ctx(disp.get(), encoder, *dummy_img, *encode_session_ctxs.back());
if(!encode_session) {
return encode_e::error;
}
encode_sessions.emplace_back(std::move(*encode_session));
next_frame = std::chrono::steady_clock::now();
}
auto delay = std::max(0ms, std::chrono::duration_cast<std::chrono::milliseconds>(next_frame - std::chrono::steady_clock::now()));
@ -746,35 +843,144 @@ void capture(
auto status = disp->snapshot(round_robin->get(), delay, display_cursor);
switch(status) {
case platf::capture_e::reinit:
return;
case platf::capture_e::error:
return;
return status;
case platf::capture_e::timeout:
next_frame += max_delay;
if(!img_tmp && frame_nr > (key_frame_nr + config.framerate)) {
continue;
}
break;
case platf::capture_e::ok:
img_tmp = *round_robin++;
img_tmp = round_robin->get();
++round_robin;
break;
}
if(img_tmp) {
img_p = hwdevice_ctx->convert(*img_tmp);
img_tmp.reset();
}
if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) {
BOOST_LOG(fatal) << "Could not encode video packet"sv;
log_flush();
std::abort();
auto now = std::chrono::steady_clock::now();
next_frame = now + 1s;
{auto pos = std::begin(encode_sessions);while( pos != std::end(encode_sessions)) {
auto ctx = pos->ctx;
if(ctx->shutdown_event->peek()) {
// Let waiting thread know it can delete shutdown_event
ctx->join_event->raise(true);
//FIXME: Causes segfault even if (pos + 1) != std::end()
// *pos = std::move(*(pos + 1));
{encode_session_t t { std::move(*pos) };}
//FIXME: encode_session_t = std::move(encode_session_t) <=> segfault
pos = encode_sessions.erase(pos);
encode_session_ctxs.erase(std::find_if(std::begin(encode_session_ctxs), std::end(encode_session_ctxs), [&ctx_p=ctx](auto &ctx) {
return ctx.get() == ctx_p;
}));
if(encode_sessions.empty()) {
return encode_e::ok;
}
continue;
}
if(ctx->idr_events->peek()) {
pos->session.frame->pict_type = AV_PICTURE_TYPE_I;
auto event = ctx->idr_events->pop();
auto end = event->second;
ctx->frame_nr = end;
ctx->key_frame_nr = end + ctx->config.framerate;
}
else if(ctx->frame_nr == ctx->key_frame_nr) {
pos->session.frame->pict_type = AV_PICTURE_TYPE_I;
}
if(img_tmp) {
pos->img_tmp = img_tmp;
}
auto timeout = now > pos->next_frame;
if(timeout) {
pos->next_frame += pos->delay;
}
next_frame = std::min(next_frame, pos->next_frame);
if(!timeout) {
++pos;
continue;
}
sws_t sws;
if(pos->img_tmp) {
auto img_p = pos->hwdevice->convert(*pos->img_tmp);
pos->img_tmp = nullptr;
encoder.img_to_frame(sws, *img_p, pos->session.frame);
}
if(encode(ctx->frame_nr++, pos->session.ctx, pos->session.frame, ctx->packets, ctx->channel_data)) {
BOOST_LOG(fatal) << "Could not encode video packet"sv;
log_flush();
std::abort();
}
pos->session.frame->pict_type = AV_PICTURE_TYPE_NONE;
++pos;
}}
img_tmp = nullptr;
}
return encode_e::ok;
}
void captureThreadSync() {
auto ref = capture_thread_sync.ref();
std::vector<std::unique_ptr<encode_session_ctx_t>> encode_session_ctxs;
auto &ctx = ref->encode_session_ctx_queue;
auto lg = util::fail_guard([&]() {
ctx.stop();
for(auto &ctx : encode_session_ctxs) {
ctx->shutdown_event->raise(true);
ctx->join_event->raise(true);
}
session->frame->pict_type = AV_PICTURE_TYPE_NONE;
}
for(auto &ctx : ctx.unsafe()) {
ctx.shutdown_event->raise(true);
ctx.join_event->raise(true);
}
});
while(encode_run_sync(encode_session_ctxs, ctx) == encode_e::reinit);
}
int start_capture_sync(capture_synced_ctx_t &ctx) {
std::thread { &captureThreadSync }.detach();
return 0;
}
void end_capture_sync(capture_synced_ctx_t &ctx) {}
void capture(
safe::signal_t *shutdown_event,
packet_queue_t packets,
idr_event_t idr_events,
config_t config,
void *channel_data) {
safe::signal_t join_event;
auto ref = capture_thread_sync.ref();
ref->encode_session_ctx_queue.raise(encode_session_ctx_t {
shutdown_event, &join_event, packets, idr_events, config, 1, 1, channel_data
});
// Wait for join signal
join_event.view();
}
void capture_async(
safe::signal_t *shutdown_event,
packet_queue_t packets,
@ -840,7 +1046,7 @@ bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &e
return false;
}
auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
auto pix_fmt = config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt);
auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
if(!hwdevice_ctx) {
return false;