Add standalone NVENC encoder

This commit is contained in:
ns6089 2023-04-25 16:38:37 +03:00 committed by Cameron Gutman
parent 7fe52bc5f8
commit 68fa43a61c
34 changed files with 2124 additions and 642 deletions

View File

@ -494,6 +494,10 @@ ${CMAKE_BINARY_DIR}/generated-src/${filename}.h")
configure_file(sunshine.service.in sunshine.service @ONLY)
endif()
include_directories(SYSTEM third-party/nv-codec-headers/include)
file(GLOB NVENC_SOURCES CONFIGURE_DEPENDS "src/nvenc/*.cpp" "src/nvenc/*.h")
list(APPEND PLATFORM_TARGET_FILES ${NVENC_SOURCES})
configure_file(src/version.h.in version.h @ONLY)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
@ -528,6 +532,8 @@ set(SUNSHINE_TARGET_FILES
src/stream.h
src/video.cpp
src/video.h
src/video_colorspace.cpp
src/video_colorspace.h
src/input.cpp
src/input.h
src/audio.cpp

View File

@ -68,6 +68,7 @@ namespace mail {
// Local mail
MAIL(touch_port);
MAIL(idr);
MAIL(invalidate_ref_frames);
MAIL(gamepad_feedback);
MAIL(hdr);
#undef MAIL

542
src/nvenc/nvenc_base.cpp Normal file
View File

@ -0,0 +1,542 @@
#include "nvenc_base.h"
#include "src/config.h"
#include "src/utility.h"
namespace {
GUID
quality_preset_guid_from_number(unsigned number) {
if (number > 7) number = 7;
switch (number) {
case 1:
default:
return NV_ENC_PRESET_P1_GUID;
case 2:
return NV_ENC_PRESET_P2_GUID;
case 3:
return NV_ENC_PRESET_P3_GUID;
case 4:
return NV_ENC_PRESET_P4_GUID;
case 5:
return NV_ENC_PRESET_P5_GUID;
case 6:
return NV_ENC_PRESET_P6_GUID;
case 7:
return NV_ENC_PRESET_P7_GUID;
}
};
bool
equal_guids(const GUID &guid1, const GUID &guid2) {
return std::memcmp(&guid1, &guid2, sizeof(GUID)) == 0;
}
auto
quality_preset_string_from_guid(const GUID &guid) {
if (equal_guids(guid, NV_ENC_PRESET_P1_GUID)) {
return "P1";
}
if (equal_guids(guid, NV_ENC_PRESET_P2_GUID)) {
return "P2";
}
if (equal_guids(guid, NV_ENC_PRESET_P3_GUID)) {
return "P3";
}
if (equal_guids(guid, NV_ENC_PRESET_P4_GUID)) {
return "P4";
}
if (equal_guids(guid, NV_ENC_PRESET_P5_GUID)) {
return "P5";
}
if (equal_guids(guid, NV_ENC_PRESET_P6_GUID)) {
return "P6";
}
if (equal_guids(guid, NV_ENC_PRESET_P7_GUID)) {
return "P7";
}
return "Unknown";
}
} // namespace
namespace nvenc {
nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device):
device_type(device_type),
device(device) {
}
nvenc_base::~nvenc_base() {
// Use destroy_encoder() instead
}
bool
nvenc_base::create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format) {
if (!nvenc && !init_library()) return false;
if (encoder) destroy_encoder();
auto fail_guard = util::fail_guard([this] { destroy_encoder(); });
encoder_params.width = client_config.width;
encoder_params.height = client_config.height;
encoder_params.buffer_format = buffer_format;
encoder_params.rfi = true;
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_params = { NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER };
session_params.device = device;
session_params.deviceType = device_type;
session_params.apiVersion = NVENCAPI_VERSION;
if (nvenc_failed(nvenc->nvEncOpenEncodeSessionEx(&session_params, &encoder))) {
BOOST_LOG(error) << "NvEncOpenEncodeSessionEx failed";
return false;
}
uint32_t encode_guid_count = 0;
if (nvenc_failed(nvenc->nvEncGetEncodeGUIDCount(encoder, &encode_guid_count))) {
BOOST_LOG(error) << "NvEncGetEncodeGUIDCount failed: " << last_error_string;
return false;
};
std::vector<GUID> encode_guids(encode_guid_count);
if (nvenc_failed(nvenc->nvEncGetEncodeGUIDs(encoder, encode_guids.data(), encode_guids.size(), &encode_guid_count))) {
BOOST_LOG(error) << "NvEncGetEncodeGUIDs failed: " << last_error_string;
return false;
}
NV_ENC_INITIALIZE_PARAMS init_params = { NV_ENC_INITIALIZE_PARAMS_VER };
switch (client_config.videoFormat) {
case 0:
// H.264
init_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
break;
case 1:
// HEVC
init_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
break;
default:
BOOST_LOG(error) << "NvEnc: unknown video format " << client_config.videoFormat;
return false;
}
{
auto search_predicate = [&](const GUID &guid) {
return equal_guids(init_params.encodeGUID, guid);
};
if (std::find_if(encode_guids.begin(), encode_guids.end(), search_predicate) == encode_guids.end()) {
BOOST_LOG(error) << "NvEnc: encoding format is not supported by the gpu";
return false;
}
}
auto get_encoder_cap = [&](NV_ENC_CAPS cap) {
NV_ENC_CAPS_PARAM param = { NV_ENC_CAPS_PARAM_VER, cap };
int value = 0;
nvenc->nvEncGetEncodeCaps(encoder, init_params.encodeGUID, &param, &value);
return value;
};
auto buffer_is_10bit = [&]() {
return buffer_format == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
};
auto buffer_is_yuv444 = [&]() {
return buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
};
{
auto supported_width = get_encoder_cap(NV_ENC_CAPS_WIDTH_MAX);
auto supported_height = get_encoder_cap(NV_ENC_CAPS_HEIGHT_MAX);
if (encoder_params.width > supported_width || encoder_params.height > supported_height) {
BOOST_LOG(error) << "NvEnc: gpu max encode resolution " << supported_width << "x" << supported_height << ", requested " << encoder_params.width << "x" << encoder_params.height;
return false;
}
}
if (buffer_is_10bit() && !get_encoder_cap(NV_ENC_CAPS_SUPPORT_10BIT_ENCODE)) {
BOOST_LOG(error) << "NvEnc: gpu doesn't support 10-bit encode";
return false;
}
if (buffer_is_yuv444() && !get_encoder_cap(NV_ENC_CAPS_SUPPORT_YUV444_ENCODE)) {
BOOST_LOG(error) << "NvEnc: gpu doesn't support YUV444 encode";
return false;
}
if (async_event_handle && !get_encoder_cap(NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT)) {
BOOST_LOG(warning) << "NvEnc: gpu doesn't support async encode";
async_event_handle = nullptr;
}
encoder_params.rfi = get_encoder_cap(NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION);
init_params.presetGUID = quality_preset_guid_from_number(config.quality_preset);
init_params.tuningInfo = NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
init_params.enablePTD = 1;
init_params.enableEncodeAsync = async_event_handle ? 1 : 0;
init_params.enableWeightedPrediction = config.weighted_prediction && get_encoder_cap(NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION);
init_params.encodeWidth = encoder_params.width;
init_params.darWidth = encoder_params.width;
init_params.encodeHeight = encoder_params.height;
init_params.darHeight = encoder_params.height;
init_params.frameRateNum = client_config.framerate;
init_params.frameRateDen = 1;
NV_ENC_PRESET_CONFIG preset_config = { NV_ENC_PRESET_CONFIG_VER, { NV_ENC_CONFIG_VER } };
if (nvenc_failed(nvenc->nvEncGetEncodePresetConfigEx(encoder, init_params.encodeGUID, init_params.presetGUID, init_params.tuningInfo, &preset_config))) {
BOOST_LOG(error) << "NvEncGetEncodePresetConfigEx failed: " << last_error_string;
return false;
}
NV_ENC_CONFIG enc_config = preset_config.presetCfg;
enc_config.profileGUID = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID;
enc_config.gopLength = NVENC_INFINITE_GOPLENGTH;
enc_config.frameIntervalP = 1;
enc_config.rcParams.enableAQ = config.adaptive_quantization;
enc_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
enc_config.rcParams.zeroReorderDelay = 1;
enc_config.rcParams.enableLookahead = 0;
enc_config.rcParams.lowDelayKeyFrameScale = 1;
enc_config.rcParams.multiPass = config.two_pass == nvenc_two_pass::quarter_resolution ? NV_ENC_TWO_PASS_QUARTER_RESOLUTION :
config.two_pass == nvenc_two_pass::full_resolution ? NV_ENC_TWO_PASS_FULL_RESOLUTION :
NV_ENC_MULTI_PASS_DISABLED;
enc_config.rcParams.enableAQ = config.adaptive_quantization;
enc_config.rcParams.averageBitRate = client_config.bitrate * 1000;
if (get_encoder_cap(NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE)) {
enc_config.rcParams.vbvBufferSize = client_config.bitrate * 1000 / client_config.framerate;
}
auto set_common_format_config = [&](auto &format_config) {
format_config.repeatSPSPPS = 1;
format_config.idrPeriod = NVENC_INFINITE_GOPLENGTH;
format_config.sliceMode = 3;
format_config.sliceModeData = client_config.slicesPerFrame;
if (buffer_is_yuv444()) {
format_config.chromaFormatIDC = 3;
}
format_config.enableFillerDataInsertion = config.insert_filler_data;
};
auto set_ref_frames = [&](uint32_t &ref_frames_option, NV_ENC_NUM_REF_FRAMES &L0_option, uint32_t ref_frames_default) {
if (client_config.numRefFrames > 0) {
ref_frames_option = client_config.numRefFrames;
}
else {
ref_frames_option = ref_frames_default;
}
if (ref_frames_option > 0 && !get_encoder_cap(NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES)) {
ref_frames_option = 1;
encoder_params.rfi = false;
}
// This limits ref frames any frame can use to 1, but allows larger buffer size for fallback if some frames are invalidated through rfi
L0_option = NV_ENC_NUM_REF_FRAMES_1;
};
auto set_minqp_if_enabled = [&](int value) {
if (config.enable_min_qp) {
enc_config.rcParams.enableMinQP = 1;
enc_config.rcParams.minQP.qpInterP = value;
enc_config.rcParams.minQP.qpInterP = value;
}
};
auto fill_vui = [&colorspace](auto &vui_config) {
vui_config.videoSignalTypePresentFlag = 1;
vui_config.videoFormat = NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED;
vui_config.videoFullRangeFlag = colorspace.full_range;
vui_config.colourDescriptionPresentFlag = 1;
vui_config.colourPrimaries = colorspace.primaries;
vui_config.transferCharacteristics = colorspace.tranfer_function;
vui_config.colourMatrix = colorspace.matrix;
};
switch (client_config.videoFormat) {
case 0: {
// H.264
enc_config.profileGUID = buffer_is_yuv444() ? NV_ENC_H264_PROFILE_HIGH_444_GUID : NV_ENC_H264_PROFILE_HIGH_GUID;
auto &format_config = enc_config.encodeCodecConfig.h264Config;
set_common_format_config(format_config);
if (config.h264_cavlc || !get_encoder_cap(NV_ENC_CAPS_SUPPORT_CABAC)) {
format_config.entropyCodingMode = NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC;
}
else {
format_config.entropyCodingMode = NV_ENC_H264_ENTROPY_CODING_MODE_CABAC;
}
set_ref_frames(format_config.maxNumRefFrames, format_config.numRefL0, 5);
set_minqp_if_enabled(config.min_qp_h264);
fill_vui(format_config.h264VUIParameters);
break;
}
case 1: {
// HEVC
auto &format_config = enc_config.encodeCodecConfig.hevcConfig;
set_common_format_config(format_config);
if (buffer_is_10bit()) {
format_config.pixelBitDepthMinus8 = 2;
}
set_ref_frames(format_config.maxNumRefFramesInDPB, format_config.numRefL0, 5);
set_minqp_if_enabled(config.min_qp_hevc);
fill_vui(format_config.hevcVUIParameters);
break;
}
}
init_params.encodeConfig = &enc_config;
if (nvenc_failed(nvenc->nvEncInitializeEncoder(encoder, &init_params))) {
BOOST_LOG(error) << "NvEncInitializeEncoder failed: " << last_error_string;
return false;
}
if (async_event_handle) {
NV_ENC_EVENT_PARAMS event_params = { NV_ENC_EVENT_PARAMS_VER };
event_params.completionEvent = async_event_handle;
if (nvenc_failed(nvenc->nvEncRegisterAsyncEvent(encoder, &event_params))) {
BOOST_LOG(error) << "NvEncRegisterAsyncEvent failed: " << last_error_string;
return false;
}
}
NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = { NV_ENC_CREATE_BITSTREAM_BUFFER_VER };
if (nvenc_failed(nvenc->nvEncCreateBitstreamBuffer(encoder, &create_bitstream_buffer))) {
BOOST_LOG(error) << "NvEncCreateBitstreamBuffer failed: " << last_error_string;
return false;
}
output_bitstream = create_bitstream_buffer.bitstreamBuffer;
if (!create_and_register_input_buffer()) {
return false;
}
{
auto f = stat_trackers::one_digit_after_decimal();
BOOST_LOG(debug) << "NvEnc: requested encoded frame size " << f % (client_config.bitrate / 8. / client_config.framerate) << " kB";
}
{
std::string extra;
if (init_params.enableEncodeAsync) extra += " async";
if (buffer_is_10bit()) extra += " 10-bit";
if (enc_config.rcParams.multiPass != NV_ENC_MULTI_PASS_DISABLED) extra += " two-pass";
if (encoder_params.rfi) extra += " rfi";
if (init_params.enableWeightedPrediction) extra += " weighted-prediction";
if (enc_config.rcParams.enableAQ) extra += " adaptive-quantization";
if (enc_config.rcParams.enableMinQP) extra += " qpmin=" + std::to_string(enc_config.rcParams.minQP.qpInterP);
if (config.insert_filler_data) extra += " filler-data";
BOOST_LOG(info) << "NvEnc: created encoder " << quality_preset_string_from_guid(init_params.presetGUID) << extra;
}
encoder_state = {};
fail_guard.disable();
return true;
}
void
nvenc_base::destroy_encoder() {
if (output_bitstream) {
nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream);
output_bitstream = nullptr;
}
if (encoder && async_event_handle) {
NV_ENC_EVENT_PARAMS event_params = { NV_ENC_EVENT_PARAMS_VER };
event_params.completionEvent = async_event_handle;
nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params);
}
if (registered_input_buffer) {
nvenc->nvEncUnregisterResource(encoder, registered_input_buffer);
registered_input_buffer = nullptr;
}
if (encoder) {
nvenc->nvEncDestroyEncoder(encoder);
encoder = nullptr;
}
encoder_state = {};
encoder_params = {};
}
nvenc_encoded_frame
nvenc_base::encode_frame(uint64_t frame_index, bool force_idr) {
if (!encoder) {
return {};
}
assert(registered_input_buffer);
assert(output_bitstream);
NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = { NV_ENC_MAP_INPUT_RESOURCE_VER };
mapped_input_buffer.registeredResource = registered_input_buffer;
if (nvenc_failed(nvenc->nvEncMapInputResource(encoder, &mapped_input_buffer))) {
BOOST_LOG(error) << "NvEncMapInputResource failed: " << last_error_string;
return {};
}
auto unmap_guard = util::fail_guard([&] { nvenc->nvEncUnmapInputResource(encoder, &mapped_input_buffer); });
NV_ENC_PIC_PARAMS pic_params = { NV_ENC_PIC_PARAMS_VER };
pic_params.inputWidth = encoder_params.width;
pic_params.inputHeight = encoder_params.height;
pic_params.encodePicFlags = force_idr ? NV_ENC_PIC_FLAG_FORCEIDR : 0;
pic_params.inputTimeStamp = frame_index;
pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
pic_params.inputBuffer = mapped_input_buffer.mappedResource;
pic_params.bufferFmt = mapped_input_buffer.mappedBufferFmt;
pic_params.outputBitstream = output_bitstream;
pic_params.completionEvent = async_event_handle;
if (nvenc_failed(nvenc->nvEncEncodePicture(encoder, &pic_params))) {
BOOST_LOG(error) << "NvEncEncodePicture failed: " << last_error_string;
return {};
}
NV_ENC_LOCK_BITSTREAM lock_bitstream = { NV_ENC_LOCK_BITSTREAM_VER };
lock_bitstream.outputBitstream = output_bitstream;
lock_bitstream.doNotWait = 0;
if (async_event_handle && !wait_for_async_event(100)) {
BOOST_LOG(error) << "NvEnc: frame " << frame_index << " encode wait timeout";
return {};
}
if (nvenc_failed(nvenc->nvEncLockBitstream(encoder, &lock_bitstream))) {
BOOST_LOG(error) << "NvEncLockBitstream failed: " << last_error_string;
return {};
}
auto data_pointer = (uint8_t *) lock_bitstream.bitstreamBufferPtr;
nvenc_encoded_frame encoded_frame {
{ data_pointer, data_pointer + lock_bitstream.bitstreamSizeInBytes },
lock_bitstream.outputTimeStamp,
lock_bitstream.pictureType == NV_ENC_PIC_TYPE_IDR,
encoder_state.rfi_needs_confirmation,
};
if (encoder_state.rfi_needs_confirmation) {
// Invalidation request has been fulfilled, and video network packet will be marked as such
encoder_state.rfi_needs_confirmation = false;
}
encoder_state.last_encoded_frame_index = frame_index;
if (encoded_frame.idr) {
BOOST_LOG(debug) << "NvEnc: idr frame " << encoded_frame.frame_index;
}
if (nvenc_failed(nvenc->nvEncUnlockBitstream(encoder, lock_bitstream.outputBitstream))) {
BOOST_LOG(error) << "NvEncUnlockBitstream failed: " << last_error_string;
}
if (config::sunshine.min_log_level <= 1) {
// Print encoded frame size stats to debug log every 20 seconds
auto callback = [&](float stat_min, float stat_max, double stat_avg) {
auto f = stat_trackers::one_digit_after_decimal();
BOOST_LOG(debug) << "NvEnc: encoded frame sizes (min max avg) " << f % stat_min << " " << f % stat_max << " " << f % stat_avg << " kB";
};
using namespace std::literals;
encoder_state.frame_size_tracker.collect_and_callback_on_interval(encoded_frame.data.size() / 1000., callback, 20s);
}
return encoded_frame;
}
bool
nvenc_base::invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame) {
if (!encoder || !encoder_params.rfi) return false;
if (last_frame < first_frame ||
encoder_state.last_encoded_frame_index < first_frame ||
encoder_state.last_encoded_frame_index > first_frame + 100) {
BOOST_LOG(error) << "NvEnc: rfi request " << first_frame << "-" << last_frame << " invalid range (last encoded frame " << encoder_state.last_encoded_frame_index << ")";
return false;
}
if (first_frame >= encoder_state.last_rfi_range.first &&
last_frame <= encoder_state.last_rfi_range.second) {
BOOST_LOG(debug) << "NvEnc: rfi request " << first_frame << "-" << last_frame << " already done";
return true;
}
BOOST_LOG(debug) << "NvEnc: rfi request " << first_frame << "-" << last_frame << " expanding to last encoded frame " << encoder_state.last_encoded_frame_index;
encoder_state.rfi_needs_confirmation = true;
encoder_state.last_rfi_range = { first_frame, encoder_state.last_encoded_frame_index };
bool result = true;
for (auto i = first_frame; i <= encoder_state.last_encoded_frame_index; i++) {
if (nvenc_failed(nvenc->nvEncInvalidateRefFrames(encoder, i))) {
BOOST_LOG(error) << "NvEncInvalidateRefFrames " << i << " failed: " << last_error_string;
result = false;
}
}
return result;
}
bool
nvenc_base::nvenc_failed(NVENCSTATUS status) {
auto status_string = [](NVENCSTATUS status) -> std::string {
switch (status) {
#define nvenc_status_case(x) \
case x: \
return #x;
nvenc_status_case(NV_ENC_SUCCESS);
nvenc_status_case(NV_ENC_ERR_NO_ENCODE_DEVICE);
nvenc_status_case(NV_ENC_ERR_UNSUPPORTED_DEVICE);
nvenc_status_case(NV_ENC_ERR_INVALID_ENCODERDEVICE);
nvenc_status_case(NV_ENC_ERR_INVALID_DEVICE);
nvenc_status_case(NV_ENC_ERR_DEVICE_NOT_EXIST);
nvenc_status_case(NV_ENC_ERR_INVALID_PTR);
nvenc_status_case(NV_ENC_ERR_INVALID_EVENT);
nvenc_status_case(NV_ENC_ERR_INVALID_PARAM);
nvenc_status_case(NV_ENC_ERR_INVALID_CALL);
nvenc_status_case(NV_ENC_ERR_OUT_OF_MEMORY);
nvenc_status_case(NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
nvenc_status_case(NV_ENC_ERR_UNSUPPORTED_PARAM);
nvenc_status_case(NV_ENC_ERR_LOCK_BUSY);
nvenc_status_case(NV_ENC_ERR_NOT_ENOUGH_BUFFER);
nvenc_status_case(NV_ENC_ERR_INVALID_VERSION);
nvenc_status_case(NV_ENC_ERR_MAP_FAILED);
nvenc_status_case(NV_ENC_ERR_NEED_MORE_INPUT);
nvenc_status_case(NV_ENC_ERR_ENCODER_BUSY);
nvenc_status_case(NV_ENC_ERR_EVENT_NOT_REGISTERD);
nvenc_status_case(NV_ENC_ERR_GENERIC);
nvenc_status_case(NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY);
nvenc_status_case(NV_ENC_ERR_UNIMPLEMENTED);
nvenc_status_case(NV_ENC_ERR_RESOURCE_REGISTER_FAILED);
nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_REGISTERED);
nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_MAPPED);
// Newer versions of sdk may add more constants, look for them the end of NVENCSTATUS enum
#undef nvenc_status_case
default:
return std::to_string(status);
}
};
last_error_string.clear();
if (status != NV_ENC_SUCCESS) {
if (nvenc && encoder) {
last_error_string = nvenc->nvEncGetLastErrorString(encoder);
if (!last_error_string.empty()) last_error_string += " ";
}
last_error_string += status_string(status);
return true;
}
return false;
}
} // namespace nvenc

79
src/nvenc/nvenc_base.h Normal file
View File

@ -0,0 +1,79 @@
#pragma once
#include "nvenc_colorspace.h"
#include "nvenc_config.h"
#include "nvenc_encoded_frame.h"
#include "src/stat_trackers.h"
#include "src/video.h"
#include <ffnvcodec/nvEncodeAPI.h>
namespace nvenc {
class nvenc_base {
public:
nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device);
virtual ~nvenc_base();
nvenc_base(const nvenc_base &) = delete;
nvenc_base &
operator=(const nvenc_base &) = delete;
bool
create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format);
void
destroy_encoder();
nvenc_encoded_frame
encode_frame(uint64_t frame_index, bool force_idr);
bool
invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame);
protected:
virtual bool
init_library() = 0;
virtual bool
create_and_register_input_buffer() = 0;
virtual bool
wait_for_async_event(uint32_t timeout_ms) { return false; }
bool
nvenc_failed(NVENCSTATUS status);
const NV_ENC_DEVICE_TYPE device_type;
void *const device;
std::unique_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc;
void *encoder = nullptr;
struct {
uint32_t width = 0;
uint32_t height = 0;
NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
bool rfi = false;
} encoder_params;
// Derived classes set these variables
NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr;
void *async_event_handle = nullptr;
std::string last_error_string;
private:
NV_ENC_OUTPUT_PTR output_bitstream = nullptr;
struct {
uint64_t last_encoded_frame_index = 0;
bool rfi_needs_confirmation = false;
std::pair<uint64_t, uint64_t> last_rfi_range;
stat_trackers::min_max_avg_tracker<float> frame_size_tracker;
} encoder_state;
};
} // namespace nvenc

View File

@ -0,0 +1,12 @@
#pragma once
#include <ffnvcodec/nvEncodeAPI.h>
namespace nvenc {
struct nvenc_colorspace_t {
NV_ENC_VUI_COLOR_PRIMARIES primaries;
NV_ENC_VUI_TRANSFER_CHARACTERISTIC tranfer_function;
NV_ENC_VUI_MATRIX_COEFFS matrix;
bool full_range;
};
} // namespace nvenc

45
src/nvenc/nvenc_config.h Normal file
View File

@ -0,0 +1,45 @@
#pragma once
namespace nvenc {
enum class nvenc_two_pass {
// Single pass, the fastest and no extra vram
disabled,
// Larger motion vectors being caught, faster and uses less extra vram
quarter_resolution,
// Better overall statistics, slower and uses more extra vram
full_resolution,
};
struct nvenc_config {
// Quality preset from 1 to 7, higher is slower
unsigned quality_preset = 1;
// Use optional preliminary pass for better motion vectors, bitrate distribution and stricter VBV(HRD), uses CUDA cores
nvenc_two_pass two_pass = nvenc_two_pass::disabled;
// Improves fades compression, uses CUDA cores
bool weighted_prediction = false;
// Allocate more bitrate to flat regions since they're visually more perceptible, uses CUDA cores
bool adaptive_quantization = false;
// Don't use QP below certain value, limits peak image quality to save bitrate
bool enable_min_qp = false;
// Min QP value for H.264 when enable_min_qp is selected
unsigned min_qp_h264 = 19;
// Min QP value for HEVC when enable_min_qp is selected
unsigned min_qp_hevc = 23;
// Use CAVLC entropy coding in H.264 instead of CABAC, not relevant and here for historical reasons
bool h264_cavlc = false;
// Add filler data to encoded frames to stay at target bitrate, mainly for testing
bool insert_filler_data = false;
};
} // namespace nvenc

104
src/nvenc/nvenc_d3d11.cpp Normal file
View File

@ -0,0 +1,104 @@
#ifdef _WIN32
#include "nvenc_d3d11.h"
#include "nvenc_utils.h"
namespace nvenc {
nvenc_d3d11::nvenc_d3d11(ID3D11Device *d3d_device):
nvenc_base(NV_ENC_DEVICE_TYPE_DIRECTX, d3d_device),
d3d_device(d3d_device) {
}
nvenc_d3d11::~nvenc_d3d11() {
if (encoder) destroy_encoder();
if (dll) {
FreeLibrary(dll);
dll = NULL;
}
}
ID3D11Texture2D *
nvenc_d3d11::get_input_texture() {
return d3d_input_texture.GetInterfacePtr();
}
bool
nvenc_d3d11::init_library() {
if (dll) return true;
#ifdef _WIN64
auto dll_name = "nvEncodeAPI64.dll";
#else
auto dll_name = "nvEncodeAPI.dll";
#endif
if ((dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
if (auto create_instance = (decltype(NvEncodeAPICreateInstance) *) GetProcAddress(dll, "NvEncodeAPICreateInstance")) {
auto new_nvenc = std::make_unique<NV_ENCODE_API_FUNCTION_LIST>();
new_nvenc->version = NV_ENCODE_API_FUNCTION_LIST_VER;
if (nvenc_failed(create_instance(new_nvenc.get()))) {
BOOST_LOG(error) << "NvEncodeAPICreateInstance failed: " << last_error_string;
}
else {
nvenc = std::move(new_nvenc);
return true;
}
}
else {
BOOST_LOG(error) << "No NvEncodeAPICreateInstance in " << dll_name;
}
}
else {
BOOST_LOG(debug) << "Couldn't load NvEnc library " << dll_name;
}
if (dll) {
FreeLibrary(dll);
dll = NULL;
}
return false;
}
bool
nvenc_d3d11::create_and_register_input_buffer() {
if (!d3d_input_texture) {
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = encoder_params.width;
desc.Height = encoder_params.height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
return false;
}
}
if (!registered_input_buffer) {
NV_ENC_REGISTER_RESOURCE register_resource = { NV_ENC_REGISTER_RESOURCE_VER };
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
register_resource.width = encoder_params.width;
register_resource.height = encoder_params.height;
register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
register_resource.bufferFormat = encoder_params.buffer_format;
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
BOOST_LOG(error) << "NvEncRegisterResource failed: " << last_error_string;
return false;
}
registered_input_buffer = register_resource.registeredResource;
}
return true;
}
} // namespace nvenc
#endif

35
src/nvenc/nvenc_d3d11.h Normal file
View File

@ -0,0 +1,35 @@
#pragma once
#ifdef _WIN32
#include <comdef.h>
#include <d3d11.h>
#include "nvenc_base.h"
namespace nvenc {
_COM_SMARTPTR_TYPEDEF(ID3D11Device, IID_ID3D11Device);
_COM_SMARTPTR_TYPEDEF(ID3D11Texture2D, IID_ID3D11Texture2D);
class nvenc_d3d11 final: public nvenc_base {
public:
nvenc_d3d11(ID3D11Device *d3d_device);
~nvenc_d3d11();
ID3D11Texture2D *
get_input_texture();
private:
bool
init_library() override;
bool
create_and_register_input_buffer() override;
HMODULE dll = NULL;
const ID3D11DevicePtr d3d_device;
ID3D11Texture2DPtr d3d_input_texture;
};
} // namespace nvenc
#endif

View File

@ -0,0 +1,13 @@
#pragma once
#include <cstdint>
#include <vector>
namespace nvenc {
struct nvenc_encoded_frame {
std::vector<uint8_t> data;
uint64_t frame_index = 0;
bool idr = false;
bool after_ref_frame_invalidation = false;
};
} // namespace nvenc

76
src/nvenc/nvenc_utils.cpp Normal file
View File

@ -0,0 +1,76 @@
#include "nvenc_utils.h"
namespace nvenc {
#ifdef _WIN32
DXGI_FORMAT
dxgi_format_from_nvenc_format(NV_ENC_BUFFER_FORMAT format) {
switch (format) {
case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
return DXGI_FORMAT_P010;
case NV_ENC_BUFFER_FORMAT_NV12:
return DXGI_FORMAT_NV12;
default:
return DXGI_FORMAT_UNKNOWN;
}
}
#endif
NV_ENC_BUFFER_FORMAT
nvenc_format_from_sunshine_format(platf::pix_fmt_e format) {
switch (format) {
case platf::pix_fmt_e::nv12:
return NV_ENC_BUFFER_FORMAT_NV12;
case platf::pix_fmt_e::p010:
return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
default:
return NV_ENC_BUFFER_FORMAT_UNDEFINED;
}
}
nvenc_colorspace_t
nvenc_colorspace_from_sunshine_colorspace(const video::sunshine_colorspace_t &sunshine_colorspace) {
nvenc_colorspace_t colorspace;
switch (sunshine_colorspace.colorspace) {
case video::colorspace_e::rec601:
// Rec. 601
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_SMPTE170M;
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE170M;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_SMPTE170M;
break;
case video::colorspace_e::rec709:
// Rec. 709
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT709;
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT709;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT709;
break;
case video::colorspace_e::bt2020sdr:
// Rec. 2020
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT2020;
assert(sunshine_colorspace.bit_depth == 10);
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT2020_10;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL;
break;
case video::colorspace_e::bt2020:
// Rec. 2020 with ST 2084 perceptual quantizer
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT2020;
assert(sunshine_colorspace.bit_depth == 10);
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE2084;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL;
break;
}
colorspace.full_range = sunshine_colorspace.full_range;
return colorspace;
}
} // namespace nvenc

27
src/nvenc/nvenc_utils.h Normal file
View File

@ -0,0 +1,27 @@
#pragma once
#ifdef _WIN32
#include <dxgiformat.h>
#endif
#include "nvenc_colorspace.h"
#include "src/platform/common.h"
#include "src/video_colorspace.h"
#include <ffnvcodec/nvEncodeAPI.h>
namespace nvenc {
#ifdef _WIN32
DXGI_FORMAT
dxgi_format_from_nvenc_format(NV_ENC_BUFFER_FORMAT format);
#endif
NV_ENC_BUFFER_FORMAT
nvenc_format_from_sunshine_format(platf::pix_fmt_e format);
nvenc_colorspace_t
nvenc_colorspace_from_sunshine_colorspace(const video::sunshine_colorspace_t &sunshine_colorspace);
} // namespace nvenc

View File

@ -13,6 +13,7 @@
#include "src/main.h"
#include "src/thread_safe.h"
#include "src/utility.h"
#include "src/video_colorspace.h"
extern "C" {
#include <moonlight-common-c/src/Limelight.h>
@ -45,6 +46,9 @@ namespace boost {
namespace video {
struct config_t;
} // namespace video
namespace nvenc {
class nvenc_base;
}
namespace platf {
// Limited by bits in activeGamepadMask
@ -344,15 +348,28 @@ namespace platf {
std::optional<null_t> null;
};
struct hwdevice_t {
struct encode_device_t {
virtual ~encode_device_t() = default;
virtual int
convert(platf::img_t &img) = 0;
video::sunshine_colorspace_t colorspace;
};
struct avcodec_encode_device_t: encode_device_t {
void *data {};
AVFrame *frame {};
virtual int
convert(platf::img_t &img) {
int
convert(platf::img_t &img) override {
return -1;
}
virtual void
apply_colorspace() {
}
/**
* implementations must take ownership of 'frame'
*/
@ -362,9 +379,6 @@ namespace platf {
return -1;
};
virtual void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {};
/**
* Implementations may set parameters during initialization of the hwframes context
*/
@ -378,8 +392,13 @@ namespace platf {
prepare_to_derive_context(int hw_device_type) {
return 0;
};
};
virtual ~hwdevice_t() = default;
struct nvenc_encode_device_t: encode_device_t {
virtual bool
init_encoder(const video::config_t &client_config, const video::sunshine_colorspace_t &colorspace) = 0;
nvenc::nvenc_base *nvenc = nullptr;
};
enum class capture_e : int {
@ -440,9 +459,14 @@ namespace platf {
virtual int
dummy_img(img_t *img) = 0;
virtual std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) {
return std::make_shared<hwdevice_t>();
virtual std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) {
return nullptr;
}
virtual std::unique_ptr<nvenc_encode_device_t>
make_nvenc_encode_device(pix_fmt_e pix_fmt) {
return nullptr;
}
virtual bool

View File

@ -88,7 +88,7 @@ namespace cuda {
return 0;
}
class cuda_t: public platf::hwdevice_t {
class cuda_t: public platf::avcodec_encode_device_t {
public:
int
init(int in_width, int in_height) {
@ -145,8 +145,8 @@ namespace cuda {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
sws.set_colorspace(colorspace, color_range);
apply_colorspace() override {
sws.apply_colorspace(colorspace);
auto tex = tex_t::make(height, width * 4);
if (!tex) {
@ -223,19 +223,19 @@ namespace cuda {
}
};
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram) {
if (init()) {
return nullptr;
}
std::shared_ptr<cuda_t> cuda;
std::unique_ptr<cuda_t> cuda;
if (vram) {
cuda = std::make_shared<cuda_vram_t>();
cuda = std::make_unique<cuda_vram_t>();
}
else {
cuda = std::make_shared<cuda_ram_t>();
cuda = std::make_unique<cuda_ram_t>();
}
if (cuda->init(width, height)) {
@ -675,9 +675,9 @@ namespace cuda {
return platf::capture_e::ok;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
return ::cuda::make_hwdevice(width, height, true);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) {
return ::cuda::make_avcodec_encode_device(width, height, true);
}
std::shared_ptr<platf::img_t>

View File

@ -56,12 +56,11 @@ public:
};
} // namespace platf
namespace video {
using __float4 = float[4];
using __float3 = float[3];
using __float2 = float[2];
// End special declarations
struct alignas(16) color_t {
namespace cuda {
struct alignas(16) cuda_color_t {
float4 color_vec_y;
float4 color_vec_u;
float4 color_vec_v;
@ -69,22 +68,8 @@ struct alignas(16) color_t {
float2 range_uv;
};
struct alignas(16) color_extern_t {
__float4 color_vec_y;
__float4 color_vec_u;
__float4 color_vec_v;
__float2 range_y;
__float2 range_uv;
};
static_assert(sizeof(video::color_t) == sizeof(cuda::cuda_color_t), "color matrix struct mismatch");
static_assert(sizeof(video::color_t) == sizeof(video::color_extern_t), "color matrix struct mismatch");
extern color_t colors[6];
} // namespace video
// End special declarations
namespace cuda {
auto constexpr INVALID_TEXTURE = std::numeric_limits<cudaTextureObject_t>::max();
template<class T>
@ -144,7 +129,7 @@ inline __device__ float3 bgra_to_rgb(float4 vec) {
return make_float3(vec.z, vec.y, vec.x);
}
inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_matrix) {
inline __device__ float2 calcUV(float3 pixel, const cuda_color_t *const color_matrix) {
float4 vec_u = color_matrix->color_vec_u;
float4 vec_v = color_matrix->color_vec_v;
@ -157,7 +142,7 @@ inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_
return make_float2(u, v);
}
inline __device__ float calcY(float3 pixel, const video::color_t *const color_matrix) {
inline __device__ float calcY(float3 pixel, const cuda_color_t *const color_matrix) {
float4 vec_y = color_matrix->color_vec_y;
return (dot(pixel, make_float3(vec_y)) + vec_y.w) * color_matrix->range_y.x + color_matrix->range_y.y;
@ -166,7 +151,7 @@ inline __device__ float calcY(float3 pixel, const video::color_t *const color_ma
__global__ void RGBA_to_NV12(
cudaTextureObject_t srcImage, std::uint8_t *dstY, std::uint8_t *dstUV,
std::uint32_t dstPitchY, std::uint32_t dstPitchUV,
float scale, const viewport_t viewport, const video::color_t *const color_matrix) {
float scale, const viewport_t viewport, const cuda_color_t *const color_matrix) {
int idX = (threadIdx.x + blockDim.x * blockIdx.x) * 2;
int idY = (threadIdx.y + blockDim.y * blockIdx.y) * 2;
@ -297,7 +282,7 @@ std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int
CU_CHECK_OPT(cudaGetDevice(&device), "Couldn't get cuda device");
CU_CHECK_OPT(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties");
auto ptr = make_ptr<video::color_t>();
auto ptr = make_ptr<cuda_color_t>();
if(!ptr) {
return std::nullopt;
}
@ -316,32 +301,13 @@ int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std:
dim3 block(threadsPerBlock);
dim3 grid(div_align(threadsX, threadsPerBlock), threadsY);
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (cuda_color_t *)color_matrix.get());
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
}
void sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
video::color_t *color_p;
switch(colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &video::colors[4];
break;
default:
color_p = &video::colors[0];
};
if(color_range > 1) {
// Full range
++color_p;
}
void sws_t::apply_colorspace(const video::sunshine_colorspace_t& colorspace) {
auto color_p = video::color_vectors_from_colorspace(colorspace);
CU_CHECK_IGNORE(cudaMemcpy(color_matrix.get(), color_p, sizeof(video::color_t), cudaMemcpyHostToDevice), "Couldn't copy color matrix to cuda");
}

View File

@ -6,6 +6,8 @@
#if defined(SUNSHINE_BUILD_CUDA)
#include "src/video_colorspace.h"
#include <cstdint>
#include <memory>
#include <optional>
@ -13,7 +15,7 @@
#include <vector>
namespace platf {
class hwdevice_t;
class avcodec_encode_device_t;
class img_t;
} // namespace platf
@ -23,8 +25,8 @@ namespace cuda {
std::vector<std::string>
display_names();
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram);
int
init();
} // namespace cuda
@ -109,7 +111,7 @@ namespace cuda {
convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport);
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
apply_colorspace(const video::sunshine_colorspace_t &colorspace);
int
load_ram(platf::img_t &img, cudaArray_t array);

View File

@ -607,27 +607,8 @@ namespace egl {
}
void
sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
video::color_t *color_p;
switch (colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &video::colors[4];
break;
default:
BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
color_p = &video::colors[0];
};
if (color_range > 1) {
// Full range
++color_p;
}
sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) {
auto color_p = video::color_vectors_from_colorspace(colorspace);
std::string_view members[] {
util::view(color_p->color_vec_y),
@ -741,7 +722,7 @@ namespace egl {
gl::ctx.UseProgram(sws.program[1].handle());
gl::ctx.Uniform1fv(loc_width_i, 1, &width_i);
auto color_p = &video::colors[0];
auto color_p = video::color_vectors_from_colorspace(video::colorspace_e::rec601, false);
std::pair<const char *, std::string_view> members[] {
std::make_pair("color_vec_y", util::view(color_p->color_vec_y)),
std::make_pair("color_vec_u", util::view(color_p->color_vec_u)),

View File

@ -14,6 +14,7 @@
#include "src/main.h"
#include "src/platform/common.h"
#include "src/utility.h"
#include "src/video_colorspace.h"
#define SUNSHINE_STRINGIFY_HELPER(x) #x
#define SUNSHINE_STRINGIFY(x) SUNSHINE_STRINGIFY_HELPER(x)
@ -327,7 +328,7 @@ namespace egl {
load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture);
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
apply_colorspace(const video::sunshine_colorspace_t &colorspace);
// The first texture is the monitor image.
// The second texture is the cursor image

View File

@ -768,13 +768,13 @@ namespace platf {
return capture_e::ok;
}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
return std::make_shared<hwdevice_t>();
return std::make_unique<avcodec_encode_device_t>();
}
capture_e
@ -843,10 +843,10 @@ namespace platf {
display_vram_t(mem_type_e mem_type):
display_t(mem_type) {}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
return va::make_avcodec_encode_device(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
}
BOOST_LOG(error) << "Unsupported pixel format for egl::display_vram_t: "sv << platf::from_pix_fmt(pix_fmt);

View File

@ -290,9 +290,9 @@ namespace va {
}
int
vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf);
vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf);
class va_t: public platf::hwdevice_t {
class va_t: public platf::avcodec_encode_device_t {
public:
int
init(int in_width, int in_height, file_t &&render_device) {
@ -304,7 +304,7 @@ namespace va {
return -1;
}
this->data = (void *) vaapi_make_hwdevice_ctx;
this->data = (void *) vaapi_init_avcodec_hardware_input_buffer;
gbm.reset(gbm::create_device(file.el));
if (!gbm) {
@ -398,8 +398,8 @@ namespace va {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
sws.set_colorspace(colorspace, color_range);
apply_colorspace() override {
sws.apply_colorspace(colorspace);
}
va::display_t::pointer va_display;
@ -526,7 +526,7 @@ namespace va {
}
int
vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf) {
vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *base, AVBufferRef **hw_device_buf) {
if (!va::initialize) {
BOOST_LOG(warning) << "libva not loaded"sv;
return -1;
@ -653,10 +653,10 @@ namespace va {
return true;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
if (vram) {
auto egl = std::make_shared<va::va_vram_t>();
auto egl = std::make_unique<va::va_vram_t>();
if (egl->init(width, height, std::move(card), offset_x, offset_y)) {
return nullptr;
}
@ -665,7 +665,7 @@ namespace va {
}
else {
auto egl = std::make_shared<va::va_ram_t>();
auto egl = std::make_unique<va::va_ram_t>();
if (egl->init(width, height, std::move(card))) {
return nullptr;
}
@ -674,8 +674,8 @@ namespace va {
}
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram) {
auto render_device = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name.c_str();
file_t file = open(render_device, O_RDWR);
@ -686,11 +686,11 @@ namespace va {
return nullptr;
}
return make_hwdevice(width, height, std::move(file), offset_x, offset_y, vram);
return make_avcodec_encode_device(width, height, std::move(file), offset_x, offset_y, vram);
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram) {
return make_hwdevice(width, height, 0, 0, vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram) {
return make_avcodec_encode_device(width, height, 0, 0, vram);
}
} // namespace va

View File

@ -18,12 +18,12 @@ namespace va {
* offset_y --> Vertical offset of the image in the texture
* file_t card --> The file descriptor of the render device used for encoding
*/
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram);
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram);
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
// Ensure the render device pointed to by fd is capable of encoding h264 with the hevc_mode configured
bool

View File

@ -215,13 +215,13 @@ namespace wl {
return 0;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
if (mem_type == platf::mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
return std::make_shared<platf::hwdevice_t>();
return std::make_unique<platf::avcodec_encode_device_t>();
}
std::shared_ptr<platf::img_t>
@ -323,13 +323,13 @@ namespace wl {
return img;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
if (mem_type == platf::mem_type_e::vaapi) {
return va::make_hwdevice(width, height, 0, 0, true);
return va::make_avcodec_encode_device(width, height, 0, 0, true);
}
return std::make_shared<platf::hwdevice_t>();
return std::make_unique<platf::avcodec_encode_device_t>();
}
int

View File

@ -553,19 +553,19 @@ namespace platf {
return std::make_shared<x11_img_t>();
}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
#ifdef SUNSHINE_BUILD_CUDA
if (mem_type == mem_type_e::cuda) {
return cuda::make_hwdevice(width, height, false);
return cuda::make_avcodec_encode_device(width, height, false);
}
#endif
return std::make_shared<hwdevice_t>();
return std::make_unique<avcodec_encode_device_t>();
}
int

View File

@ -94,15 +94,15 @@ namespace platf {
return std::make_shared<av_img_t>();
}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (pix_fmt == pix_fmt_e::yuv420p) {
av_capture.pixelFormat = kCVPixelFormatType_32BGRA;
return std::make_shared<hwdevice_t>();
return std::make_unique<avcodec_encode_device_t>();
}
else if (pix_fmt == pix_fmt_e::nv12) {
auto device = std::make_shared<nv12_zero_device>();
auto device = std::make_unique<nv12_zero_device>();
device->init(static_cast<void *>(av_capture), setResolution, setPixelFormat);

View File

@ -70,10 +70,6 @@ namespace platf {
return 0;
}
void
nv12_zero_device::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
}
int
nv12_zero_device::init(void *display, resolution_fn_t resolution_fn, pixel_format_fn_t pixel_format_fn) {
pixel_format_fn(display, '420v');

View File

@ -8,7 +8,7 @@
namespace platf {
class nv12_zero_device: public hwdevice_t {
class nv12_zero_device: public avcodec_encode_device_t {
// display holds a pointer to an av_video object. Since the namespaces of AVFoundation
// and FFMPEG collide, we need this opaque pointer and cannot use the definition
void *display;
@ -27,8 +27,6 @@ namespace platf {
convert(img_t &img);
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx);
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
};
} // namespace platf

View File

@ -13,6 +13,7 @@
#include "src/platform/common.h"
#include "src/utility.h"
#include "src/video.h"
namespace platf::dxgi {
extern const char *format_str[];
@ -215,8 +216,11 @@ namespace platf::dxgi {
int
init(const ::video::config_t &config, const std::string &display_name);
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override;
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override;
std::unique_ptr<nvenc_encode_device_t>
make_nvenc_encode_device(pix_fmt_e pix_fmt) override;
sampler_state_t sampler_linear;

View File

@ -16,7 +16,11 @@ extern "C" {
#include "display.h"
#include "misc.h"
#include "src/config.h"
#include "src/main.h"
#include "src/nvenc/nvenc_config.h"
#include "src/nvenc/nvenc_d3d11.h"
#include "src/nvenc/nvenc_utils.h"
#include "src/video.h"
#define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx"
@ -361,10 +365,10 @@ namespace platf::dxgi {
return compile_shader(file, "main_vs", "vs_5_0");
}
class hwdevice_t: public platf::hwdevice_t {
class d3d_base_encode_device final {
public:
int
convert(platf::img_t &img_base) override {
convert(platf::img_t &img_base) {
// Garbage collect mapped capture images whose weak references have expired
for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) {
if (it->second.img_weak.expired()) {
@ -413,28 +417,15 @@ namespace platf::dxgi {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
switch (colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &::video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &::video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &::video::colors[4];
break;
default:
BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
color_p = &::video::colors[0];
};
apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) {
auto color_vectors = ::video::color_vectors_from_colorspace(colorspace);
if (color_range > 1) {
// Full range
++color_p;
if (!color_vectors) {
BOOST_LOG(error) << "No vector data for colorspace"sv;
return;
}
auto color_matrix = make_buffer((device_t::pointer) data, *color_p);
auto color_matrix = make_buffer(device.get(), *color_vectors);
if (!color_matrix) {
BOOST_LOG(warning) << "Failed to create color matrix"sv;
return;
@ -445,78 +436,14 @@ namespace platf::dxgi {
this->color_matrix = std::move(color_matrix);
}
void
init_hwframes(AVHWFramesContext *frames) override {
// We may be called with a QSV or D3D11VA context
if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
// The encoder requires textures with D3D11_BIND_RENDER_TARGET set
d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
d3d11_frames->MiscFlags = 0;
}
// We require a single texture
frames->initial_pool_size = 1;
}
int
prepare_to_derive_context(int hw_device_type) override {
// QuickSync requires our device to be multithread-protected
if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
multithread_t mt;
init_output(ID3D11Texture2D *frame_texture, int width, int height) {
// The underlying frame pool owns the texture, so we must reference it for ourselves
frame_texture->AddRef();
output_texture.reset(frame_texture);
auto status = device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
mt->SetMultithreadProtected(TRUE);
}
return 0;
}
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
this->hwframe.reset(frame);
this->frame = frame;
// Populate this frame with a hardware buffer if one isn't there already
if (!frame->buf[0]) {
auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
}
// If this is a frame from a derived context, we'll need to map it to D3D11
ID3D11Texture2D *frame_texture;
if (frame->format != AV_PIX_FMT_D3D11) {
frame_t d3d11_frame { av_frame_alloc() };
d3d11_frame->format = AV_PIX_FMT_D3D11;
auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
// Get the texture from the mapped frame
frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
}
else {
// Otherwise, we can just use the texture inside the original frame
frame_texture = (ID3D11Texture2D *) frame->data[0];
}
auto out_width = frame->width;
auto out_height = frame->height;
auto out_width = width;
auto out_height = height;
float in_width = display->width;
float in_height = display->height;
@ -533,10 +460,6 @@ namespace platf::dxgi {
outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };
outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };
// The underlying frame pool owns the texture, so we must reference it for ourselves
frame_texture->AddRef();
hwframe_texture.reset(frame_texture);
float info_in[16 / sizeof(float)] { 1.0f / (float) out_width_f }; // aligned to 16-byte
info_scene = make_buffer(device.get(), info_in);
@ -550,7 +473,7 @@ namespace platf::dxgi {
D3D11_RTV_DIMENSION_TEXTURE2D
};
auto status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@ -558,7 +481,7 @@ namespace platf::dxgi {
nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@ -574,9 +497,7 @@ namespace platf::dxgi {
}
int
init(
std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p,
pix_fmt_e pix_fmt) {
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
D3D_FEATURE_LEVEL featureLevels[] {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
@ -615,8 +536,6 @@ namespace platf::dxgi {
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
}
data = device.get();
format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
if (status) {
@ -673,7 +592,13 @@ namespace platf::dxgi {
return -1;
}
color_matrix = make_buffer(device.get(), ::video::colors[0]);
auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
if (!default_color_vectors) {
BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
return -1;
}
color_matrix = make_buffer(device.get(), *default_color_vectors);
if (!color_matrix) {
BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
return -1;
@ -721,7 +646,6 @@ namespace platf::dxgi {
return 0;
}
private:
struct encoder_img_ctx_t {
// Used to determine if the underlying texture changes.
// Not safe for actual use by the encoder!
@ -789,9 +713,6 @@ namespace platf::dxgi {
return 0;
}
public:
frame_t hwframe;
::video::color_t *color_p;
buf_t info_scene;
@ -805,9 +726,6 @@ namespace platf::dxgi {
render_target_t nv12_Y_rt;
render_target_t nv12_UV_rt;
// The image referenced by hwframe
texture2d_t hwframe_texture;
// d3d_img_t::id -> encoder_img_ctx_t
// These store the encoder textures for each img_t that passes through
// convert(). We can't store them in the img_t itself because it is shared
@ -830,6 +748,149 @@ namespace platf::dxgi {
device_t device;
device_ctx_t device_ctx;
texture2d_t output_texture;
};
class d3d_avcodec_encode_device_t: public avcodec_encode_device_t {
public:
int
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
int result = base.init(display, adapter_p, pix_fmt);
data = base.device.get();
return result;
}
int
convert(platf::img_t &img_base) override {
return base.convert(img_base);
}
void
apply_colorspace() override {
base.apply_colorspace(colorspace);
}
void
init_hwframes(AVHWFramesContext *frames) override {
// We may be called with a QSV or D3D11VA context
if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
// The encoder requires textures with D3D11_BIND_RENDER_TARGET set
d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
d3d11_frames->MiscFlags = 0;
}
// We require a single texture
frames->initial_pool_size = 1;
}
int
prepare_to_derive_context(int hw_device_type) override {
// QuickSync requires our device to be multithread-protected
if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
multithread_t mt;
auto status = base.device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
mt->SetMultithreadProtected(TRUE);
}
return 0;
}
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
this->hwframe.reset(frame);
this->frame = frame;
// Populate this frame with a hardware buffer if one isn't there already
if (!frame->buf[0]) {
auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
}
// If this is a frame from a derived context, we'll need to map it to D3D11
ID3D11Texture2D *frame_texture;
if (frame->format != AV_PIX_FMT_D3D11) {
frame_t d3d11_frame { av_frame_alloc() };
d3d11_frame->format = AV_PIX_FMT_D3D11;
auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
// Get the texture from the mapped frame
frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
}
else {
// Otherwise, we can just use the texture inside the original frame
frame_texture = (ID3D11Texture2D *) frame->data[0];
}
return base.init_output(frame_texture, frame->width, frame->height);
}
private:
d3d_base_encode_device base;
frame_t hwframe;
};
class d3d_nvenc_encode_device_t: public nvenc_encode_device_t {
public:
bool
init_device(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt);
if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']';
return false;
}
if (base.init(display, adapter_p, pix_fmt)) return false;
nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11>(base.device.get());
nvenc = nvenc_d3d.get();
return true;
}
bool
init_encoder(const ::video::config_t &client_config, const ::video::sunshine_colorspace_t &colorspace) override {
if (!nvenc_d3d) return false;
nvenc::nvenc_config nvenc_config;
nvenc_config.quality_preset = config::video.nv.nv_preset ? (*config::video.nv.nv_preset - 11) : 1;
nvenc_config.h264_cavlc = (config::video.nv.nv_coder == NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC);
auto nvenc_colorspace = nvenc::nvenc_colorspace_from_sunshine_colorspace(colorspace);
if (!nvenc_d3d->create_encoder(nvenc_config, client_config, nvenc_colorspace, buffer_format)) return false;
base.apply_colorspace(colorspace);
return base.init_output(nvenc_d3d->get_input_texture(), client_config.width, client_config.height) == 0;
}
int
convert(platf::img_t &img_base) override {
return base.convert(img_base);
}
private:
d3d_base_encode_device base;
std::unique_ptr<nvenc::nvenc_d3d11> nvenc_d3d;
NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
};
bool
@ -1464,26 +1525,32 @@ namespace platf::dxgi {
};
}
std::shared_ptr<platf::hwdevice_t>
display_vram_t::make_hwdevice(pix_fmt_e pix_fmt) {
std::unique_ptr<avcodec_encode_device_t>
display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) {
if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']';
return nullptr;
}
auto hwdevice = std::make_shared<hwdevice_t>();
auto device = std::make_unique<d3d_avcodec_encode_device_t>();
auto ret = hwdevice->init(
shared_from_this(),
adapter.get(),
pix_fmt);
auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt);
if (ret) {
return nullptr;
}
return hwdevice;
return device;
}
std::unique_ptr<nvenc_encode_device_t>
display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt) {
auto device = std::make_unique<d3d_nvenc_encode_device_t>();
if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt)) {
return nullptr;
}
return device;
}
int

View File

@ -500,6 +500,10 @@ namespace rtsp_stream {
ss << "sprop-parameter-sets=AAAAAU"sv << std::endl;
}
if (video::last_encoder_probe_supported_ref_frames_invalidation) {
ss << "x-nv-video[0].refPicInvalidation=1"sv << std::endl;
}
for (int x = 0; x < audio::MAX_STREAM_CONFIG; ++x) {
auto &stream_config = audio::stream_configs[x];
std::uint8_t mapping[platf::speaker::MAX_SPEAKERS];

View File

@ -32,9 +32,14 @@ namespace stat_trackers {
data.calls += 1;
}
void
reset() {
data = {};
}
private:
struct {
std::chrono::steady_clock::time_point last_callback_time = std::chrono::steady_clock::now();
std::chrono::steady_clock::steady_clock::time_point last_callback_time = std::chrono::steady_clock::now();
T stat_min = std::numeric_limits<T>::max();
T stat_max = 0;
double stat_total = 0;

View File

@ -355,6 +355,7 @@ namespace stream {
int lowseq;
udp::endpoint peer;
safe::mail_raw_t::event_t<bool> idr_events;
safe::mail_raw_t::event_t<std::pair<int64_t, int64_t>> invalidate_ref_frames_events;
std::unique_ptr<platf::deinit_t> qos;
} video;
@ -833,7 +834,7 @@ namespace stream {
<< "firstFrame [" << firstFrame << ']' << std::endl
<< "lastFrame [" << lastFrame << ']';
session->video.idr_events->raise(true);
session->video.invalidate_ref_frames_events->raise(std::make_pair(firstFrame, lastFrame));
});
server->map(packetTypes[IDX_INPUT_DATA], [&](session_t *session, const std::string_view &payload) {
@ -895,29 +896,23 @@ namespace stream {
return;
}
// Ensure compatibility with old packet type
std::string_view next_payload { (char *) plaintext.data(), plaintext.size() };
auto type = *(std::uint16_t *) next_payload.data();
auto type = *(std::uint16_t *) plaintext.data();
std::string_view next_payload { (char *) plaintext.data() + 4, plaintext.size() - 4 };
if (type == packetTypes[IDX_ENCRYPTED]) {
BOOST_LOG(error) << "Bad packet type [IDX_ENCRYPTED] found"sv;
session::stop(*session);
return;
}
// IDX_INPUT_DATA will attempt to decrypt unencrypted data, therefore we need to skip it.
if (type != packetTypes[IDX_INPUT_DATA]) {
server->call(type, session, next_payload);
return;
// IDX_INPUT_DATA callback will attempt to decrypt unencrypted data, therefore we need pass it directly
if (type == packetTypes[IDX_INPUT_DATA]) {
plaintext.erase(std::begin(plaintext), std::begin(plaintext) + 4);
input::passthrough(session->input, std::move(plaintext));
}
else {
server->call(type, session, next_payload);
}
// Ensure compatibility with IDX_INPUT_DATA
constexpr auto skip = sizeof(std::uint16_t) * 2;
plaintext.erase(std::begin(plaintext), std::begin(plaintext) + skip);
input::passthrough(session->input, std::move(plaintext));
});
// This thread handles latency-sensitive control messages
@ -1124,13 +1119,14 @@ namespace stream {
auto session = (session_t *) packet->channel_data;
auto lowseq = session->video.lowseq;
auto av_packet = packet->av_packet;
std::string_view payload { (char *) av_packet->data, (size_t) av_packet->size };
std::string_view payload { (char *) packet->data(), packet->data_size() };
std::vector<uint8_t> payload_new;
video_short_frame_header_t frame_header = {};
frame_header.headerType = 0x01; // Short header type
frame_header.frameType = (av_packet->flags & AV_PKT_FLAG_KEY) ? 2 : 1;
frame_header.frameType = packet->is_idr() ? 2 :
packet->after_ref_frame_invalidation ? 5 :
1;
if (packet->frame_timestamp) {
auto duration_to_latency = [](const std::chrono::steady_clock::duration &duration) {
@ -1160,7 +1156,7 @@ namespace stream {
payload = { (char *) payload_new.data(), payload_new.size() };
if (av_packet->flags & AV_PKT_FLAG_KEY) {
if (packet->is_idr() && packet->replacements) {
for (auto &replacement : *packet->replacements) {
auto frame_old = replacement.old;
auto frame_new = replacement._new;
@ -1226,9 +1222,8 @@ namespace stream {
for (int x = 0; x < packets; ++x) {
auto *inspect = (video_packet_raw_t *) &current_payload[x * blocksize];
auto av_packet = packet->av_packet;
inspect->packet.frameIndex = av_packet->pts;
inspect->packet.frameIndex = packet->frame_index();
inspect->packet.streamPacketIndex = ((uint32_t) lowseq + x) << 8;
// Match multiFecFlags with Moonlight
@ -1264,7 +1259,7 @@ namespace stream {
inspect->rtp.timestamp = util::endian::big<uint32_t>(timestamp);
inspect->packet.multiFecBlocks = (blockIndex << 4) | lastBlockIndex;
inspect->packet.frameIndex = av_packet->pts;
inspect->packet.frameIndex = packet->frame_index();
}
auto peer_address = session->video.peer.address();
@ -1286,11 +1281,11 @@ namespace stream {
}
}
if (av_packet->flags & AV_PKT_FLAG_KEY) {
BOOST_LOG(verbose) << "Key Frame ["sv << av_packet->pts << "] :: send ["sv << shards.size() << "] shards..."sv;
if (packet->is_idr()) {
BOOST_LOG(verbose) << "Key Frame ["sv << packet->frame_index() << "] :: send ["sv << shards.size() << "] shards..."sv;
}
else {
BOOST_LOG(verbose) << "Frame ["sv << av_packet->pts << "] :: send ["sv << shards.size() << "] shards..."sv << std::endl;
BOOST_LOG(verbose) << "Frame ["sv << packet->frame_index() << "] :: send ["sv << shards.size() << "] shards..."sv << std::endl;
}
++blockIndex;
@ -1754,6 +1749,7 @@ namespace stream {
};
session->video.idr_events = mail->event<bool>(mail::idr);
session->video.invalidate_ref_frames_events = mail->event<std::pair<int64_t, int64_t>>(mail::invalidate_ref_frames);
session->video.lowseq = 0;
constexpr auto max_block_size = crypto::cipher::round_to_pkcs7_padded(2048);

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,7 @@
#include "input.h"
#include "platform/common.h"
#include "thread_safe.h"
#include "video_colorspace.h"
extern "C" {
#include <libavcodec/avcodec.h>
@ -16,25 +17,19 @@ struct AVPacket;
namespace video {
struct packet_raw_t {
void
init_packet() {
this->av_packet = av_packet_alloc();
}
virtual ~packet_raw_t() = default;
template <class P>
explicit packet_raw_t(P *user_data):
channel_data { user_data } {
init_packet();
}
virtual bool
is_idr() = 0;
explicit packet_raw_t(std::nullptr_t):
channel_data { nullptr } {
init_packet();
}
virtual int64_t
frame_index() = 0;
~packet_raw_t() {
av_packet_free(&this->av_packet);
}
virtual uint8_t *
data() = 0;
virtual size_t
data_size() = 0;
struct replace_t {
std::string_view old;
@ -46,13 +41,74 @@ namespace video {
old { std::move(old) }, _new { std::move(_new) } {}
};
AVPacket *av_packet;
std::vector<replace_t> *replacements;
void *channel_data;
std::vector<replace_t> *replacements = nullptr;
void *channel_data = nullptr;
bool after_ref_frame_invalidation = false;
std::optional<std::chrono::steady_clock::time_point> frame_timestamp;
};
struct packet_raw_avcodec: packet_raw_t {
packet_raw_avcodec() {
av_packet = av_packet_alloc();
}
~packet_raw_avcodec() {
av_packet_free(&this->av_packet);
}
bool
is_idr() override {
return av_packet->flags & AV_PKT_FLAG_KEY;
}
int64_t
frame_index() override {
return av_packet->pts;
}
uint8_t *
data() override {
return av_packet->data;
}
size_t
data_size() override {
return av_packet->size;
}
AVPacket *av_packet;
};
struct packet_raw_generic: packet_raw_t {
packet_raw_generic(std::vector<uint8_t> &&frame_data, int64_t frame_index, bool idr):
frame_data { std::move(frame_data) }, index { frame_index }, idr { idr } {
}
bool
is_idr() override {
return idr;
}
int64_t
frame_index() override {
return index;
}
uint8_t *
data() override {
return frame_data.data();
}
size_t
data_size() override {
return frame_data.size();
}
std::vector<uint8_t> frame_data;
int64_t index;
bool idr;
};
using packet_t = std::unique_ptr<packet_raw_t>;
struct hdr_info_raw_t {
@ -67,33 +123,29 @@ namespace video {
using hdr_info_t = std::unique_ptr<hdr_info_raw_t>;
/* Encoding configuration requested by remote client */
struct config_t {
int width;
int height;
int framerate;
int bitrate;
int slicesPerFrame;
int numRefFrames;
int width; // Video width in pixels
int height; // Video height in pixels
int framerate; // Requested framerate, used in individual frame bitrate budget calculation
int bitrate; // Video bitrate in kilobits (1000 bits) for requested framerate
int slicesPerFrame; // Number of slices per frame
int numRefFrames; // Max number of reference frames
/* Requested color range and SDR encoding colorspace, HDR encoding colorspace is always BT.2020+ST2084
Color range (encoderCscMode & 0x1) : 0 - limited, 1 - full
SDR encoding colorspace (encoderCscMode >> 1) : 0 - BT.601, 1 - BT.709, 2 - BT.2020 */
int encoderCscMode;
int videoFormat;
int videoFormat; // 0 - H.264, 1 - HEVC
/* Encoding color depth (bit depth): 0 - 8-bit, 1 - 10-bit
HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */
int dynamicRange;
};
using float4 = float[4];
using float3 = float[3];
using float2 = float[2];
struct alignas(16) color_t {
float4 color_vec_y;
float4 color_vec_u;
float4 color_vec_v;
float2 range_y;
float2 range_uv;
};
extern color_t colors[6];
extern int active_hevc_mode;
extern bool last_encoder_probe_supported_ref_frames_invalidation;
void
capture(

181
src/video_colorspace.cpp Normal file
View File

@ -0,0 +1,181 @@
#include "video_colorspace.h"
#include "main.h"
#include "video.h"
extern "C" {
#include <libswscale/swscale.h>
}
namespace video {
bool
colorspace_is_hdr(const sunshine_colorspace_t &colorspace) {
return colorspace.colorspace == colorspace_e::bt2020;
}
sunshine_colorspace_t
colorspace_from_client_config(const config_t &config, bool hdr_display) {
sunshine_colorspace_t colorspace;
/* See video::config_t declaration for details */
if (config.dynamicRange > 0 && hdr_display) {
// Rec. 2020 with ST 2084 perceptual quantizer
colorspace.colorspace = colorspace_e::bt2020;
}
else {
switch (config.encoderCscMode >> 1) {
case 0:
// Rec. 601
colorspace.colorspace = colorspace_e::rec601;
break;
case 1:
// Rec. 709
colorspace.colorspace = colorspace_e::rec709;
break;
case 2:
// Rec. 2020
colorspace.colorspace = colorspace_e::bt2020sdr;
break;
default:
BOOST_LOG(error) << "Unknown video colorspace in csc, falling back to Rec. 709";
colorspace.colorspace = colorspace_e::rec709;
break;
}
}
colorspace.full_range = (config.encoderCscMode & 0x1);
switch (config.dynamicRange) {
case 0:
colorspace.bit_depth = 8;
break;
case 1:
colorspace.bit_depth = 10;
break;
default:
BOOST_LOG(error) << "Unknown dynamicRange value, falling back to 10-bit color depth";
colorspace.bit_depth = 10;
break;
}
if (colorspace.colorspace == colorspace_e::bt2020sdr && colorspace.bit_depth != 10) {
BOOST_LOG(error) << "BT.2020 SDR colorspace expects 10-bit color depth, falling back to Rec. 709";
colorspace.colorspace = colorspace_e::rec709;
}
return colorspace;
}
avcodec_colorspace_t
avcodec_colorspace_from_sunshine_colorspace(const sunshine_colorspace_t &sunshine_colorspace) {
avcodec_colorspace_t avcodec_colorspace;
switch (sunshine_colorspace.colorspace) {
case colorspace_e::rec601:
// Rec. 601
avcodec_colorspace.primaries = AVCOL_PRI_SMPTE170M;
avcodec_colorspace.transfer_function = AVCOL_TRC_SMPTE170M;
avcodec_colorspace.matrix = AVCOL_SPC_SMPTE170M;
avcodec_colorspace.software_format = SWS_CS_SMPTE170M;
break;
case colorspace_e::rec709:
// Rec. 709
avcodec_colorspace.primaries = AVCOL_PRI_BT709;
avcodec_colorspace.transfer_function = AVCOL_TRC_BT709;
avcodec_colorspace.matrix = AVCOL_SPC_BT709;
avcodec_colorspace.software_format = SWS_CS_ITU709;
break;
case colorspace_e::bt2020sdr:
// Rec. 2020
avcodec_colorspace.primaries = AVCOL_PRI_BT2020;
assert(sunshine_colorspace.bit_depth == 10);
avcodec_colorspace.transfer_function = AVCOL_TRC_BT2020_10;
avcodec_colorspace.matrix = AVCOL_SPC_BT2020_NCL;
avcodec_colorspace.software_format = SWS_CS_BT2020;
break;
case colorspace_e::bt2020:
// Rec. 2020 with ST 2084 perceptual quantizer
avcodec_colorspace.primaries = AVCOL_PRI_BT2020;
assert(sunshine_colorspace.bit_depth == 10);
avcodec_colorspace.transfer_function = AVCOL_TRC_SMPTE2084;
avcodec_colorspace.matrix = AVCOL_SPC_BT2020_NCL;
avcodec_colorspace.software_format = SWS_CS_BT2020;
break;
}
avcodec_colorspace.range = sunshine_colorspace.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
return avcodec_colorspace;
}
const color_t *
color_vectors_from_colorspace(const sunshine_colorspace_t &colorspace) {
return color_vectors_from_colorspace(colorspace.colorspace, colorspace.full_range);
}
const color_t *
color_vectors_from_colorspace(colorspace_e colorspace, bool full_range) {
using float2 = float[2];
auto make_color_matrix = [](float Cr, float Cb, const float2 &range_Y, const float2 &range_UV) -> color_t {
float Cg = 1.0f - Cr - Cb;
float Cr_i = 1.0f - Cr;
float Cb_i = 1.0f - Cb;
float shift_y = range_Y[0] / 255.0f;
float shift_uv = range_UV[0] / 255.0f;
float scale_y = (range_Y[1] - range_Y[0]) / 255.0f;
float scale_uv = (range_UV[1] - range_UV[0]) / 255.0f;
return {
{ Cr, Cg, Cb, 0.0f },
{ -(Cr * 0.5f / Cb_i), -(Cg * 0.5f / Cb_i), 0.5f, 0.5f },
{ 0.5f, -(Cg * 0.5f / Cr_i), -(Cb * 0.5f / Cr_i), 0.5f },
{ scale_y, shift_y },
{ scale_uv, shift_uv },
};
};
static const color_t colors[] {
make_color_matrix(0.299f, 0.114f, { 16.0f, 235.0f }, { 16.0f, 240.0f }), // BT601 MPEG
make_color_matrix(0.299f, 0.114f, { 0.0f, 255.0f }, { 0.0f, 255.0f }), // BT601 JPEG
make_color_matrix(0.2126f, 0.0722f, { 16.0f, 235.0f }, { 16.0f, 240.0f }), // BT709 MPEG
make_color_matrix(0.2126f, 0.0722f, { 0.0f, 255.0f }, { 0.0f, 255.0f }), // BT709 JPEG
make_color_matrix(0.2627f, 0.0593f, { 16.0f, 235.0f }, { 16.0f, 240.0f }), // BT2020 MPEG
make_color_matrix(0.2627f, 0.0593f, { 0.0f, 255.0f }, { 0.0f, 255.0f }), // BT2020 JPEG
};
const color_t *result = nullptr;
switch (colorspace) {
case colorspace_e::rec601:
default:
result = &colors[0];
break;
case colorspace_e::rec709:
result = &colors[2];
break;
case colorspace_e::bt2020:
case colorspace_e::bt2020sdr:
result = &colors[4];
break;
};
if (full_range) {
result++;
}
return result;
}
} // namespace video

56
src/video_colorspace.h Normal file
View File

@ -0,0 +1,56 @@
#pragma once
extern "C" {
#include <libavutil/pixfmt.h>
}
namespace video {
enum class colorspace_e {
rec601,
rec709,
bt2020sdr,
bt2020,
};
struct sunshine_colorspace_t {
colorspace_e colorspace;
bool full_range;
unsigned bit_depth;
};
bool
colorspace_is_hdr(const sunshine_colorspace_t &colorspace);
// Declared in video.h
struct config_t;
sunshine_colorspace_t
colorspace_from_client_config(const config_t &config, bool hdr_display);
struct avcodec_colorspace_t {
AVColorPrimaries primaries;
AVColorTransferCharacteristic transfer_function;
AVColorSpace matrix;
AVColorRange range;
int software_format;
};
avcodec_colorspace_t
avcodec_colorspace_from_sunshine_colorspace(const sunshine_colorspace_t &sunshine_colorspace);
struct alignas(16) color_t {
float color_vec_y[4];
float color_vec_u[4];
float color_vec_v[4];
float range_y[2];
float range_uv[2];
};
const color_t *
color_vectors_from_colorspace(const sunshine_colorspace_t &colorspace);
const color_t *
color_vectors_from_colorspace(colorspace_e colorspace, bool full_range);
} // namespace video