mirror of
https://github.com/LizardByte/Sunshine.git
synced 2025-03-14 16:20:53 +00:00
feat(win/video): support native YUV 4:4:4 encoding (#2533)
Some checks failed
CI / GitHub Env Debug (push) Waiting to run
CI / Setup Release (push) Waiting to run
CI / Setup Flatpak Matrix (push) Waiting to run
CI / Linux Flatpak (push) Blocked by required conditions
CI / Linux ${{ matrix.type }} (--appimage-build, 22.04, AppImage) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (macos, 12) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (macos, 13) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (macos, 14) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (ubuntu, latest) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (ubuntu, latest, true) (push) Blocked by required conditions
CI / Macports (macOS-${{ matrix.os_version }}) (12, true) (push) Blocked by required conditions
CI / Macports (macOS-${{ matrix.os_version }}) (13) (push) Blocked by required conditions
CI / Macports (macOS-${{ matrix.os_version }}) (14) (push) Blocked by required conditions
CI / Windows (push) Blocked by required conditions
CI Docker / Check Dockerfiles (push) Waiting to run
CI Docker / Setup Release (push) Blocked by required conditions
CI Docker / Lint Dockerfile${{ matrix.tag }} (push) Blocked by required conditions
CI Docker / Docker${{ matrix.tag }} (push) Blocked by required conditions
CodeQL / Get language matrix (push) Waiting to run
CodeQL / Analyze (${{ matrix.name }}) (push) Blocked by required conditions
Build GH-Pages / update_pages (push) Waiting to run
localize / Update Localization (push) Has been cancelled
Some checks failed
CI / GitHub Env Debug (push) Waiting to run
CI / Setup Release (push) Waiting to run
CI / Setup Flatpak Matrix (push) Waiting to run
CI / Linux Flatpak (push) Blocked by required conditions
CI / Linux ${{ matrix.type }} (--appimage-build, 22.04, AppImage) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (macos, 12) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (macos, 13) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (macos, 14) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (ubuntu, latest) (push) Blocked by required conditions
CI / Homebrew (${{ matrix.os_name }}-${{ matrix.os_version }}${{ matrix.release == true && ' (Release)' || '' }}) (ubuntu, latest, true) (push) Blocked by required conditions
CI / Macports (macOS-${{ matrix.os_version }}) (12, true) (push) Blocked by required conditions
CI / Macports (macOS-${{ matrix.os_version }}) (13) (push) Blocked by required conditions
CI / Macports (macOS-${{ matrix.os_version }}) (14) (push) Blocked by required conditions
CI / Windows (push) Blocked by required conditions
CI Docker / Check Dockerfiles (push) Waiting to run
CI Docker / Setup Release (push) Blocked by required conditions
CI Docker / Lint Dockerfile${{ matrix.tag }} (push) Blocked by required conditions
CI Docker / Docker${{ matrix.tag }} (push) Blocked by required conditions
CodeQL / Get language matrix (push) Waiting to run
CodeQL / Analyze (${{ matrix.name }}) (push) Blocked by required conditions
Build GH-Pages / update_pages (push) Waiting to run
localize / Update Localization (push) Has been cancelled
This commit is contained in:
parent
e8c837f412
commit
bfdfcebc80
@ -1,6 +1,6 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_base.cpp
|
||||
* @brief Definitions for base NVENC encoder.
|
||||
* @brief Definitions for abstract platform-agnostic base of standalone NVENC encoder.
|
||||
*/
|
||||
#include "nvenc_base.h"
|
||||
|
||||
@ -85,9 +85,8 @@ namespace {
|
||||
|
||||
namespace nvenc {
|
||||
|
||||
nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device):
|
||||
device_type(device_type),
|
||||
device(device) {
|
||||
nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type):
|
||||
device_type(device_type) {
|
||||
}
|
||||
|
||||
nvenc_base::~nvenc_base() {
|
||||
@ -115,19 +114,19 @@ namespace nvenc {
|
||||
session_params.deviceType = device_type;
|
||||
session_params.apiVersion = minimum_api_version;
|
||||
if (nvenc_failed(nvenc->nvEncOpenEncodeSessionEx(&session_params, &encoder))) {
|
||||
BOOST_LOG(error) << "NvEncOpenEncodeSessionEx failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncOpenEncodeSessionEx() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t encode_guid_count = 0;
|
||||
if (nvenc_failed(nvenc->nvEncGetEncodeGUIDCount(encoder, &encode_guid_count))) {
|
||||
BOOST_LOG(error) << "NvEncGetEncodeGUIDCount failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncGetEncodeGUIDCount() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
};
|
||||
|
||||
std::vector<GUID> encode_guids(encode_guid_count);
|
||||
if (nvenc_failed(nvenc->nvEncGetEncodeGUIDs(encoder, encode_guids.data(), encode_guids.size(), &encode_guid_count))) {
|
||||
BOOST_LOG(error) << "NvEncGetEncodeGUIDs failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncGetEncodeGUIDs() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -176,7 +175,7 @@ namespace nvenc {
|
||||
};
|
||||
|
||||
auto buffer_is_yuv444 = [&]() {
|
||||
return buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
|
||||
return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
|
||||
};
|
||||
|
||||
{
|
||||
@ -220,7 +219,7 @@ namespace nvenc {
|
||||
|
||||
NV_ENC_PRESET_CONFIG preset_config = { min_struct_version(NV_ENC_PRESET_CONFIG_VER), { min_struct_version(NV_ENC_CONFIG_VER, 7, 8) } };
|
||||
if (nvenc_failed(nvenc->nvEncGetEncodePresetConfigEx(encoder, init_params.encodeGUID, init_params.presetGUID, init_params.tuningInfo, &preset_config))) {
|
||||
BOOST_LOG(error) << "NvEncGetEncodePresetConfigEx failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncGetEncodePresetConfigEx() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -228,7 +227,6 @@ namespace nvenc {
|
||||
enc_config.profileGUID = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID;
|
||||
enc_config.gopLength = NVENC_INFINITE_GOPLENGTH;
|
||||
enc_config.frameIntervalP = 1;
|
||||
enc_config.rcParams.enableAQ = config.adaptive_quantization;
|
||||
enc_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
|
||||
enc_config.rcParams.zeroReorderDelay = 1;
|
||||
enc_config.rcParams.enableLookahead = 0;
|
||||
@ -282,7 +280,7 @@ namespace nvenc {
|
||||
}
|
||||
};
|
||||
|
||||
auto fill_h264_hevc_vui = [&colorspace](auto &vui_config) {
|
||||
auto fill_h264_hevc_vui = [&](auto &vui_config) {
|
||||
vui_config.videoSignalTypePresentFlag = 1;
|
||||
vui_config.videoFormat = NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED;
|
||||
vui_config.videoFullRangeFlag = colorspace.full_range;
|
||||
@ -290,7 +288,7 @@ namespace nvenc {
|
||||
vui_config.colourPrimaries = colorspace.primaries;
|
||||
vui_config.transferCharacteristics = colorspace.tranfer_function;
|
||||
vui_config.colourMatrix = colorspace.matrix;
|
||||
vui_config.chromaSampleLocationFlag = 1;
|
||||
vui_config.chromaSampleLocationFlag = buffer_is_yuv444() ? 0 : 1;
|
||||
vui_config.chromaSampleLocationTop = 0;
|
||||
vui_config.chromaSampleLocationBot = 0;
|
||||
};
|
||||
@ -331,7 +329,9 @@ namespace nvenc {
|
||||
auto &format_config = enc_config.encodeCodecConfig.av1Config;
|
||||
format_config.repeatSeqHdr = 1;
|
||||
format_config.idrPeriod = NVENC_INFINITE_GOPLENGTH;
|
||||
format_config.chromaFormatIDC = 1; // YUV444 not supported by NVENC yet
|
||||
if (buffer_is_yuv444()) {
|
||||
format_config.chromaFormatIDC = 3;
|
||||
}
|
||||
format_config.enableBitstreamPadding = config.insert_filler_data;
|
||||
if (buffer_is_10bit()) {
|
||||
format_config.inputPixelBitDepthMinus8 = 2;
|
||||
@ -341,7 +341,7 @@ namespace nvenc {
|
||||
format_config.transferCharacteristics = colorspace.tranfer_function;
|
||||
format_config.matrixCoefficients = colorspace.matrix;
|
||||
format_config.colorRange = colorspace.full_range;
|
||||
format_config.chromaSamplePosition = 1;
|
||||
format_config.chromaSamplePosition = buffer_is_yuv444() ? 0 : 1;
|
||||
set_ref_frames(format_config.maxNumRefFramesInDPB, format_config.numFwdRefs, 8);
|
||||
set_minqp_if_enabled(config.min_qp_av1);
|
||||
|
||||
@ -358,7 +358,7 @@ namespace nvenc {
|
||||
init_params.encodeConfig = &enc_config;
|
||||
|
||||
if (nvenc_failed(nvenc->nvEncInitializeEncoder(encoder, &init_params))) {
|
||||
BOOST_LOG(error) << "NvEncInitializeEncoder failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncInitializeEncoder() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -366,14 +366,14 @@ namespace nvenc {
|
||||
NV_ENC_EVENT_PARAMS event_params = { min_struct_version(NV_ENC_EVENT_PARAMS_VER) };
|
||||
event_params.completionEvent = async_event_handle;
|
||||
if (nvenc_failed(nvenc->nvEncRegisterAsyncEvent(encoder, &event_params))) {
|
||||
BOOST_LOG(error) << "NvEncRegisterAsyncEvent failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncRegisterAsyncEvent() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = { min_struct_version(NV_ENC_CREATE_BITSTREAM_BUFFER_VER) };
|
||||
if (nvenc_failed(nvenc->nvEncCreateBitstreamBuffer(encoder, &create_bitstream_buffer))) {
|
||||
BOOST_LOG(error) << "NvEncCreateBitstreamBuffer failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncCreateBitstreamBuffer() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
output_bitstream = create_bitstream_buffer.bitstreamBuffer;
|
||||
@ -388,8 +388,13 @@ namespace nvenc {
|
||||
}
|
||||
|
||||
{
|
||||
auto video_format_string = client_config.videoFormat == 0 ? "H.264 " :
|
||||
client_config.videoFormat == 1 ? "HEVC " :
|
||||
client_config.videoFormat == 2 ? "AV1 " :
|
||||
" ";
|
||||
std::string extra;
|
||||
if (init_params.enableEncodeAsync) extra += " async";
|
||||
if (buffer_is_yuv444()) extra += " yuv444";
|
||||
if (buffer_is_10bit()) extra += " 10-bit";
|
||||
if (enc_config.rcParams.multiPass != NV_ENC_MULTI_PASS_DISABLED) extra += " two-pass";
|
||||
if (config.vbv_percentage_increase > 0 && get_encoder_cap(NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE)) extra += " vbv+" + std::to_string(config.vbv_percentage_increase);
|
||||
@ -398,7 +403,8 @@ namespace nvenc {
|
||||
if (enc_config.rcParams.enableAQ) extra += " spatial-aq";
|
||||
if (enc_config.rcParams.enableMinQP) extra += " qpmin=" + std::to_string(enc_config.rcParams.minQP.qpInterP);
|
||||
if (config.insert_filler_data) extra += " filler-data";
|
||||
BOOST_LOG(info) << "NvEnc: created encoder " << quality_preset_string_from_guid(init_params.presetGUID) << extra;
|
||||
|
||||
BOOST_LOG(info) << "NvEnc: created encoder " << video_format_string << quality_preset_string_from_guid(init_params.presetGUID) << extra;
|
||||
}
|
||||
|
||||
encoder_state = {};
|
||||
@ -409,20 +415,28 @@ namespace nvenc {
|
||||
void
|
||||
nvenc_base::destroy_encoder() {
|
||||
if (output_bitstream) {
|
||||
nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream);
|
||||
if (nvenc_failed(nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream))) {
|
||||
BOOST_LOG(error) << "NvEnc: NvEncDestroyBitstreamBuffer() failed: " << last_nvenc_error_string;
|
||||
}
|
||||
output_bitstream = nullptr;
|
||||
}
|
||||
if (encoder && async_event_handle) {
|
||||
NV_ENC_EVENT_PARAMS event_params = { min_struct_version(NV_ENC_EVENT_PARAMS_VER) };
|
||||
event_params.completionEvent = async_event_handle;
|
||||
nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params);
|
||||
if (nvenc_failed(nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params))) {
|
||||
BOOST_LOG(error) << "NvEnc: NvEncUnregisterAsyncEvent() failed: " << last_nvenc_error_string;
|
||||
}
|
||||
}
|
||||
if (registered_input_buffer) {
|
||||
nvenc->nvEncUnregisterResource(encoder, registered_input_buffer);
|
||||
if (nvenc_failed(nvenc->nvEncUnregisterResource(encoder, registered_input_buffer))) {
|
||||
BOOST_LOG(error) << "NvEnc: NvEncUnregisterResource() failed: " << last_nvenc_error_string;
|
||||
}
|
||||
registered_input_buffer = nullptr;
|
||||
}
|
||||
if (encoder) {
|
||||
nvenc->nvEncDestroyEncoder(encoder);
|
||||
if (nvenc_failed(nvenc->nvEncDestroyEncoder(encoder))) {
|
||||
BOOST_LOG(error) << "NvEnc: NvEncDestroyEncoder() failed: " << last_nvenc_error_string;
|
||||
}
|
||||
encoder = nullptr;
|
||||
}
|
||||
|
||||
@ -439,14 +453,23 @@ namespace nvenc {
|
||||
assert(registered_input_buffer);
|
||||
assert(output_bitstream);
|
||||
|
||||
if (!synchronize_input_buffer()) {
|
||||
BOOST_LOG(error) << "NvEnc: failed to synchronize input buffer";
|
||||
return {};
|
||||
}
|
||||
|
||||
NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = { min_struct_version(NV_ENC_MAP_INPUT_RESOURCE_VER) };
|
||||
mapped_input_buffer.registeredResource = registered_input_buffer;
|
||||
|
||||
if (nvenc_failed(nvenc->nvEncMapInputResource(encoder, &mapped_input_buffer))) {
|
||||
BOOST_LOG(error) << "NvEncMapInputResource failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncMapInputResource() failed: " << last_nvenc_error_string;
|
||||
return {};
|
||||
}
|
||||
auto unmap_guard = util::fail_guard([&] { nvenc->nvEncUnmapInputResource(encoder, &mapped_input_buffer); });
|
||||
auto unmap_guard = util::fail_guard([&] {
|
||||
if (nvenc_failed(nvenc->nvEncUnmapInputResource(encoder, mapped_input_buffer.mappedResource))) {
|
||||
BOOST_LOG(error) << "NvEnc: NvEncUnmapInputResource() failed: " << last_nvenc_error_string;
|
||||
}
|
||||
});
|
||||
|
||||
NV_ENC_PIC_PARAMS pic_params = { min_struct_version(NV_ENC_PIC_PARAMS_VER, 4, 6) };
|
||||
pic_params.inputWidth = encoder_params.width;
|
||||
@ -460,7 +483,7 @@ namespace nvenc {
|
||||
pic_params.completionEvent = async_event_handle;
|
||||
|
||||
if (nvenc_failed(nvenc->nvEncEncodePicture(encoder, &pic_params))) {
|
||||
BOOST_LOG(error) << "NvEncEncodePicture failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncEncodePicture() failed: " << last_nvenc_error_string;
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -474,7 +497,7 @@ namespace nvenc {
|
||||
}
|
||||
|
||||
if (nvenc_failed(nvenc->nvEncLockBitstream(encoder, &lock_bitstream))) {
|
||||
BOOST_LOG(error) << "NvEncLockBitstream failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncLockBitstream() failed: " << last_nvenc_error_string;
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -498,7 +521,7 @@ namespace nvenc {
|
||||
}
|
||||
|
||||
if (nvenc_failed(nvenc->nvEncUnlockBitstream(encoder, lock_bitstream.outputBitstream))) {
|
||||
BOOST_LOG(error) << "NvEncUnlockBitstream failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncUnlockBitstream() failed: " << last_nvenc_error_string;
|
||||
}
|
||||
|
||||
encoder_state.frame_size_logger.collect_and_log(encoded_frame.data.size() / 1000.);
|
||||
@ -535,7 +558,7 @@ namespace nvenc {
|
||||
|
||||
for (auto i = first_frame; i <= last_frame; i++) {
|
||||
if (nvenc_failed(nvenc->nvEncInvalidateRefFrames(encoder, i))) {
|
||||
BOOST_LOG(error) << "NvEncInvalidateRefFrames " << i << " failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncInvalidateRefFrames() " << i << " failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -576,20 +599,22 @@ namespace nvenc {
|
||||
nvenc_status_case(NV_ENC_ERR_RESOURCE_REGISTER_FAILED);
|
||||
nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_REGISTERED);
|
||||
nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_MAPPED);
|
||||
// Newer versions of sdk may add more constants, look for them the end of NVENCSTATUS enum
|
||||
// Newer versions of sdk may add more constants, look for them at the end of NVENCSTATUS enum
|
||||
#undef nvenc_status_case
|
||||
default:
|
||||
return std::to_string(status);
|
||||
}
|
||||
};
|
||||
|
||||
last_error_string.clear();
|
||||
last_nvenc_error_string.clear();
|
||||
if (status != NV_ENC_SUCCESS) {
|
||||
/* This API function gives broken strings more often than not
|
||||
if (nvenc && encoder) {
|
||||
last_error_string = nvenc->nvEncGetLastErrorString(encoder);
|
||||
if (!last_error_string.empty()) last_error_string += " ";
|
||||
last_nvenc_error_string = nvenc->nvEncGetLastErrorString(encoder);
|
||||
if (!last_nvenc_error_string.empty()) last_nvenc_error_string += " ";
|
||||
}
|
||||
last_error_string += status_string(status);
|
||||
*/
|
||||
last_nvenc_error_string += status_string(status);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_base.h
|
||||
* @brief Declarations for base NVENC encoder.
|
||||
* @brief Declarations for abstract platform-agnostic base of standalone NVENC encoder.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
@ -13,36 +13,98 @@
|
||||
|
||||
#include <ffnvcodec/nvEncodeAPI.h>
|
||||
|
||||
/**
|
||||
* @brief Standalone NVENC encoder
|
||||
*/
|
||||
namespace nvenc {
|
||||
|
||||
/**
|
||||
* @brief Abstract platform-agnostic base of standalone NVENC encoder.
|
||||
* Derived classes perform platform-specific operations.
|
||||
*/
|
||||
class nvenc_base {
|
||||
public:
|
||||
nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device);
|
||||
/**
|
||||
* @param device_type Underlying device type used by derived class.
|
||||
*/
|
||||
explicit nvenc_base(NV_ENC_DEVICE_TYPE device_type);
|
||||
virtual ~nvenc_base();
|
||||
|
||||
nvenc_base(const nvenc_base &) = delete;
|
||||
nvenc_base &
|
||||
operator=(const nvenc_base &) = delete;
|
||||
|
||||
/**
|
||||
* @brief Create the encoder.
|
||||
* @param config NVENC encoder configuration.
|
||||
* @param client_config Stream configuration requested by the client.
|
||||
* @param colorspace YUV colorspace.
|
||||
* @param buffer_format Platform-agnostic input surface format.
|
||||
* @return `true` on success, `false` on error
|
||||
*/
|
||||
bool
|
||||
create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format);
|
||||
|
||||
/**
|
||||
* @brief Destroy the encoder.
|
||||
* Derived classes classes call it in the destructor.
|
||||
*/
|
||||
void
|
||||
destroy_encoder();
|
||||
|
||||
/**
|
||||
* @brief Encode the next frame using platform-specific input surface.
|
||||
* @param frame_index Frame index that uniquely identifies the frame.
|
||||
* Afterwards serves as parameter for `invalidate_ref_frames()`.
|
||||
* No restrictions on the first frame index, but later frame indexes must be subsequent.
|
||||
* @param force_idr Whether to encode frame as forced IDR.
|
||||
* @return Encoded frame.
|
||||
*/
|
||||
nvenc_encoded_frame
|
||||
encode_frame(uint64_t frame_index, bool force_idr);
|
||||
|
||||
/**
|
||||
* @brief Perform reference frame invalidation (RFI) procedure.
|
||||
* @param first_frame First frame index of the invalidation range.
|
||||
* @param last_frame Last frame index of the invalidation range.
|
||||
* @return `true` on success, `false` on error.
|
||||
* After error next frame must be encoded with `force_idr = true`.
|
||||
*/
|
||||
bool
|
||||
invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Required. Used for loading NvEnc library and setting `nvenc` variable with `NvEncodeAPICreateInstance()`.
|
||||
* Called during `create_encoder()` if `nvenc` variable is not initialized.
|
||||
* @return `true` on success, `false` on error
|
||||
*/
|
||||
virtual bool
|
||||
init_library() = 0;
|
||||
|
||||
/**
|
||||
* @brief Required. Used for creating outside-facing input surface,
|
||||
* registering this surface with `nvenc->nvEncRegisterResource()` and setting `registered_input_buffer` variable.
|
||||
* Called during `create_encoder()`.
|
||||
* @return `true` on success, `false` on error
|
||||
*/
|
||||
virtual bool
|
||||
create_and_register_input_buffer() = 0;
|
||||
|
||||
/**
|
||||
* @brief Optional. Override if you must perform additional operations on the registered input surface in the beginning of `encode_frame()`.
|
||||
* Typically used for interop copy.
|
||||
* @return `true` on success, `false` on error
|
||||
*/
|
||||
virtual bool
|
||||
synchronize_input_buffer() { return true; }
|
||||
|
||||
/**
|
||||
* @brief Optional. Override if you want to create encoder in async mode.
|
||||
* In this case must also set `async_event_handle` variable.
|
||||
* @param timeout_ms Wait timeout in milliseconds
|
||||
* @return `true` on success, `false` on timeout or error
|
||||
*/
|
||||
virtual bool
|
||||
wait_for_async_event(uint32_t timeout_ms) { return false; }
|
||||
|
||||
@ -61,9 +123,6 @@ namespace nvenc {
|
||||
min_struct_version(uint32_t version, uint32_t v11_struct_version = 0, uint32_t v12_struct_version = 0);
|
||||
|
||||
const NV_ENC_DEVICE_TYPE device_type;
|
||||
void *const device;
|
||||
|
||||
std::unique_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc;
|
||||
|
||||
void *encoder = nullptr;
|
||||
|
||||
@ -75,11 +134,17 @@ namespace nvenc {
|
||||
bool rfi = false;
|
||||
} encoder_params;
|
||||
|
||||
// Derived classes set these variables
|
||||
NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr;
|
||||
void *async_event_handle = nullptr;
|
||||
std::string last_nvenc_error_string;
|
||||
|
||||
std::string last_error_string;
|
||||
// Derived classes set these variables
|
||||
void *device = nullptr; ///< Platform-specific handle of encoding device.
|
||||
///< Should be set in constructor or `init_library()`.
|
||||
std::shared_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc; ///< Function pointers list produced by `NvEncodeAPICreateInstance()`.
|
||||
///< Should be set in `init_library()`.
|
||||
NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr; ///< Platform-specific input surface registered with `NvEncRegisterResource()`.
|
||||
///< Should be set in `create_and_register_input_buffer()`.
|
||||
void *async_event_handle = nullptr; ///< (optional) Platform-specific handle of event object event.
|
||||
///< Can be set in constructor or `init_library()`, must override `wait_for_async_event()`.
|
||||
|
||||
private:
|
||||
NV_ENC_OUTPUT_PTR output_bitstream = nullptr;
|
||||
|
@ -1,16 +1,21 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_colorspace.h
|
||||
* @brief Declarations for base NVENC colorspace.
|
||||
* @brief Declarations for NVENC YUV colorspace.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <ffnvcodec/nvEncodeAPI.h>
|
||||
|
||||
namespace nvenc {
|
||||
|
||||
/**
|
||||
* @brief YUV colorspace and color range.
|
||||
*/
|
||||
struct nvenc_colorspace_t {
|
||||
NV_ENC_VUI_COLOR_PRIMARIES primaries;
|
||||
NV_ENC_VUI_TRANSFER_CHARACTERISTIC tranfer_function;
|
||||
NV_ENC_VUI_MATRIX_COEFFS matrix;
|
||||
bool full_range;
|
||||
};
|
||||
|
||||
} // namespace nvenc
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_config.h
|
||||
* @brief Declarations for base NVENC configuration.
|
||||
* @brief Declarations for NVENC encoder configuration.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
@ -12,6 +12,9 @@ namespace nvenc {
|
||||
full_resolution, ///< Better overall statistics, slower and uses more extra vram
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief NVENC encoder configuration.
|
||||
*/
|
||||
struct nvenc_config {
|
||||
// Quality preset from 1 to 7, higher is slower
|
||||
int quality_preset = 1;
|
||||
|
@ -1,43 +1,29 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_d3d11.cpp
|
||||
* @brief Definitions for base NVENC d3d11.
|
||||
* @brief Definitions for abstract Direct3D11 NVENC encoder.
|
||||
*/
|
||||
#include "src/logging.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "nvenc_d3d11.h"
|
||||
|
||||
#include "nvenc_utils.h"
|
||||
|
||||
namespace nvenc {
|
||||
|
||||
nvenc_d3d11::nvenc_d3d11(ID3D11Device *d3d_device):
|
||||
nvenc_base(NV_ENC_DEVICE_TYPE_DIRECTX, d3d_device),
|
||||
d3d_device(d3d_device) {
|
||||
}
|
||||
|
||||
nvenc_d3d11::~nvenc_d3d11() {
|
||||
if (encoder) destroy_encoder();
|
||||
|
||||
if (dll) {
|
||||
FreeLibrary(dll);
|
||||
dll = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ID3D11Texture2D *
|
||||
nvenc_d3d11::get_input_texture() {
|
||||
return d3d_input_texture.GetInterfacePtr();
|
||||
}
|
||||
|
||||
bool
|
||||
nvenc_d3d11::init_library() {
|
||||
if (dll) return true;
|
||||
|
||||
#ifdef _WIN64
|
||||
auto dll_name = "nvEncodeAPI64.dll";
|
||||
constexpr auto dll_name = "nvEncodeAPI64.dll";
|
||||
#else
|
||||
auto dll_name = "nvEncodeAPI.dll";
|
||||
constexpr auto dll_name = "nvEncodeAPI.dll";
|
||||
#endif
|
||||
|
||||
if ((dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
|
||||
@ -45,7 +31,7 @@ namespace nvenc {
|
||||
auto new_nvenc = std::make_unique<NV_ENCODE_API_FUNCTION_LIST>();
|
||||
new_nvenc->version = min_struct_version(NV_ENCODE_API_FUNCTION_LIST_VER);
|
||||
if (nvenc_failed(create_instance(new_nvenc.get()))) {
|
||||
BOOST_LOG(error) << "NvEncodeAPICreateInstance failed: " << last_error_string;
|
||||
BOOST_LOG(error) << "NvEnc: NvEncodeAPICreateInstance() failed: " << last_nvenc_error_string;
|
||||
}
|
||||
else {
|
||||
nvenc = std::move(new_nvenc);
|
||||
@ -53,11 +39,11 @@ namespace nvenc {
|
||||
}
|
||||
}
|
||||
else {
|
||||
BOOST_LOG(error) << "No NvEncodeAPICreateInstance in " << dll_name;
|
||||
BOOST_LOG(error) << "NvEnc: No NvEncodeAPICreateInstance() in " << dll_name;
|
||||
}
|
||||
}
|
||||
else {
|
||||
BOOST_LOG(debug) << "Couldn't load NvEnc library " << dll_name;
|
||||
BOOST_LOG(debug) << "NvEnc: Couldn't load NvEnc library " << dll_name;
|
||||
}
|
||||
|
||||
if (dll) {
|
||||
@ -68,43 +54,5 @@ namespace nvenc {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
nvenc_d3d11::create_and_register_input_buffer() {
|
||||
if (!d3d_input_texture) {
|
||||
D3D11_TEXTURE2D_DESC desc = {};
|
||||
desc.Width = encoder_params.width;
|
||||
desc.Height = encoder_params.height;
|
||||
desc.MipLevels = 1;
|
||||
desc.ArraySize = 1;
|
||||
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
|
||||
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
|
||||
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!registered_input_buffer) {
|
||||
NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
|
||||
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
|
||||
register_resource.width = encoder_params.width;
|
||||
register_resource.height = encoder_params.height;
|
||||
register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
|
||||
register_resource.bufferFormat = encoder_params.buffer_format;
|
||||
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
|
||||
|
||||
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, ®ister_resource))) {
|
||||
BOOST_LOG(error) << "NvEncRegisterResource failed: " << last_error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
registered_input_buffer = register_resource.registeredResource;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace nvenc
|
||||
#endif
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_d3d11.h
|
||||
* @brief Declarations for base NVENC d3d11.
|
||||
* @brief Declarations for abstract Direct3D11 NVENC encoder.
|
||||
*/
|
||||
#pragma once
|
||||
#ifdef _WIN32
|
||||
@ -14,25 +14,33 @@ namespace nvenc {
|
||||
|
||||
_COM_SMARTPTR_TYPEDEF(ID3D11Device, IID_ID3D11Device);
|
||||
_COM_SMARTPTR_TYPEDEF(ID3D11Texture2D, IID_ID3D11Texture2D);
|
||||
_COM_SMARTPTR_TYPEDEF(IDXGIDevice, IID_IDXGIDevice);
|
||||
_COM_SMARTPTR_TYPEDEF(IDXGIAdapter, IID_IDXGIAdapter);
|
||||
|
||||
class nvenc_d3d11 final: public nvenc_base {
|
||||
/**
|
||||
* @brief Abstract Direct3D11 NVENC encoder.
|
||||
* Encapsulates common code used by native and interop implementations.
|
||||
*/
|
||||
class nvenc_d3d11: public nvenc_base {
|
||||
public:
|
||||
nvenc_d3d11(ID3D11Device *d3d_device);
|
||||
explicit nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type):
|
||||
nvenc_base(device_type) {}
|
||||
|
||||
~nvenc_d3d11();
|
||||
|
||||
ID3D11Texture2D *
|
||||
get_input_texture();
|
||||
/**
|
||||
* @brief Get input surface texture.
|
||||
* @return Input surface texture.
|
||||
*/
|
||||
virtual ID3D11Texture2D *
|
||||
get_input_texture() = 0;
|
||||
|
||||
private:
|
||||
protected:
|
||||
bool
|
||||
init_library() override;
|
||||
|
||||
bool
|
||||
create_and_register_input_buffer() override;
|
||||
|
||||
private:
|
||||
HMODULE dll = NULL;
|
||||
const ID3D11DevicePtr d3d_device;
|
||||
ID3D11Texture2DPtr d3d_input_texture;
|
||||
};
|
||||
|
||||
} // namespace nvenc
|
||||
|
71
src/nvenc/nvenc_d3d11_native.cpp
Normal file
71
src/nvenc/nvenc_d3d11_native.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_d3d11_native.cpp
|
||||
* @brief Definitions for native Direct3D11 NVENC encoder.
|
||||
*/
|
||||
#ifdef _WIN32
|
||||
#include "nvenc_d3d11_native.h"
|
||||
|
||||
#include "nvenc_utils.h"
|
||||
|
||||
namespace nvenc {
|
||||
|
||||
nvenc_d3d11_native::nvenc_d3d11_native(ID3D11Device *d3d_device):
|
||||
nvenc_d3d11(NV_ENC_DEVICE_TYPE_DIRECTX),
|
||||
d3d_device(d3d_device) {
|
||||
device = d3d_device;
|
||||
}
|
||||
|
||||
nvenc_d3d11_native::~nvenc_d3d11_native() {
|
||||
if (encoder) destroy_encoder();
|
||||
}
|
||||
|
||||
ID3D11Texture2D *
|
||||
nvenc_d3d11_native::get_input_texture() {
|
||||
return d3d_input_texture.GetInterfacePtr();
|
||||
}
|
||||
|
||||
bool
|
||||
nvenc_d3d11_native::create_and_register_input_buffer() {
|
||||
if (encoder_params.buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
|
||||
BOOST_LOG(error) << "NvEnc: 10-bit 4:4:4 encoding is incompatible with D3D11 surface formats, use CUDA interop";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!d3d_input_texture) {
|
||||
D3D11_TEXTURE2D_DESC desc = {};
|
||||
desc.Width = encoder_params.width;
|
||||
desc.Height = encoder_params.height;
|
||||
desc.MipLevels = 1;
|
||||
desc.ArraySize = 1;
|
||||
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
|
||||
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
|
||||
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!registered_input_buffer) {
|
||||
NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
|
||||
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
|
||||
register_resource.width = encoder_params.width;
|
||||
register_resource.height = encoder_params.height;
|
||||
register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
|
||||
register_resource.bufferFormat = encoder_params.buffer_format;
|
||||
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
|
||||
|
||||
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, ®ister_resource))) {
|
||||
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
registered_input_buffer = register_resource.registeredResource;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace nvenc
|
||||
#endif
|
38
src/nvenc/nvenc_d3d11_native.h
Normal file
38
src/nvenc/nvenc_d3d11_native.h
Normal file
@ -0,0 +1,38 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_d3d11_native.h
|
||||
* @brief Declarations for native Direct3D11 NVENC encoder.
|
||||
*/
|
||||
#pragma once
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <comdef.h>
|
||||
#include <d3d11.h>
|
||||
|
||||
#include "nvenc_d3d11.h"
|
||||
|
||||
namespace nvenc {
|
||||
|
||||
/**
|
||||
* @brief Native Direct3D11 NVENC encoder.
|
||||
*/
|
||||
class nvenc_d3d11_native final: public nvenc_d3d11 {
|
||||
public:
|
||||
/**
|
||||
* @param d3d_device Direct3D11 device used for encoding.
|
||||
*/
|
||||
explicit nvenc_d3d11_native(ID3D11Device *d3d_device);
|
||||
~nvenc_d3d11_native();
|
||||
|
||||
ID3D11Texture2D *
|
||||
get_input_texture() override;
|
||||
|
||||
private:
|
||||
bool
|
||||
create_and_register_input_buffer() override;
|
||||
|
||||
const ID3D11DevicePtr d3d_device;
|
||||
ID3D11Texture2DPtr d3d_input_texture;
|
||||
};
|
||||
|
||||
} // namespace nvenc
|
||||
#endif
|
267
src/nvenc/nvenc_d3d11_on_cuda.cpp
Normal file
267
src/nvenc/nvenc_d3d11_on_cuda.cpp
Normal file
@ -0,0 +1,267 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_d3d11_on_cuda.cpp
|
||||
* @brief Definitions for CUDA NVENC encoder with Direct3D11 input surfaces.
|
||||
*/
|
||||
#ifdef _WIN32
|
||||
#include "nvenc_d3d11_on_cuda.h"
|
||||
|
||||
#include "nvenc_utils.h"
|
||||
|
||||
namespace nvenc {
|
||||
|
||||
nvenc_d3d11_on_cuda::nvenc_d3d11_on_cuda(ID3D11Device *d3d_device):
|
||||
nvenc_d3d11(NV_ENC_DEVICE_TYPE_CUDA),
|
||||
d3d_device(d3d_device) {
|
||||
}
|
||||
|
||||
nvenc_d3d11_on_cuda::~nvenc_d3d11_on_cuda() {
|
||||
if (encoder) destroy_encoder();
|
||||
|
||||
if (cuda_context) {
|
||||
{
|
||||
auto autopop_context = push_context();
|
||||
|
||||
if (cuda_d3d_input_texture) {
|
||||
if (cuda_failed(cuda_functions.cuGraphicsUnregisterResource(cuda_d3d_input_texture))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuGraphicsUnregisterResource() failed: error " << last_cuda_error;
|
||||
}
|
||||
cuda_d3d_input_texture = nullptr;
|
||||
}
|
||||
|
||||
if (cuda_surface) {
|
||||
if (cuda_failed(cuda_functions.cuMemFree(cuda_surface))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuMemFree() failed: error " << last_cuda_error;
|
||||
}
|
||||
cuda_surface = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (cuda_failed(cuda_functions.cuCtxDestroy(cuda_context))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuCtxDestroy() failed: error " << last_cuda_error;
|
||||
}
|
||||
cuda_context = nullptr;
|
||||
}
|
||||
|
||||
if (cuda_functions.dll) {
|
||||
FreeLibrary(cuda_functions.dll);
|
||||
cuda_functions = {};
|
||||
}
|
||||
}
|
||||
|
||||
ID3D11Texture2D *
|
||||
nvenc_d3d11_on_cuda::get_input_texture() {
|
||||
return d3d_input_texture.GetInterfacePtr();
|
||||
}
|
||||
|
||||
bool
|
||||
nvenc_d3d11_on_cuda::init_library() {
|
||||
if (!nvenc_d3d11::init_library()) return false;
|
||||
|
||||
constexpr auto dll_name = "nvcuda.dll";
|
||||
|
||||
if ((cuda_functions.dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
|
||||
auto load_function = [&]<typename T>(T &location, auto symbol) -> bool {
|
||||
location = (T) GetProcAddress(cuda_functions.dll, symbol);
|
||||
return location != nullptr;
|
||||
};
|
||||
if (!load_function(cuda_functions.cuInit, "cuInit") ||
|
||||
!load_function(cuda_functions.cuD3D11GetDevice, "cuD3D11GetDevice") ||
|
||||
!load_function(cuda_functions.cuCtxCreate, "cuCtxCreate_v2") ||
|
||||
!load_function(cuda_functions.cuCtxDestroy, "cuCtxDestroy_v2") ||
|
||||
!load_function(cuda_functions.cuCtxPushCurrent, "cuCtxPushCurrent_v2") ||
|
||||
!load_function(cuda_functions.cuCtxPopCurrent, "cuCtxPopCurrent_v2") ||
|
||||
!load_function(cuda_functions.cuMemAllocPitch, "cuMemAllocPitch_v2") ||
|
||||
!load_function(cuda_functions.cuMemFree, "cuMemFree_v2") ||
|
||||
!load_function(cuda_functions.cuGraphicsD3D11RegisterResource, "cuGraphicsD3D11RegisterResource") ||
|
||||
!load_function(cuda_functions.cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource") ||
|
||||
!load_function(cuda_functions.cuGraphicsMapResources, "cuGraphicsMapResources") ||
|
||||
!load_function(cuda_functions.cuGraphicsUnmapResources, "cuGraphicsUnmapResources") ||
|
||||
!load_function(cuda_functions.cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray") ||
|
||||
!load_function(cuda_functions.cuMemcpy2D, "cuMemcpy2D_v2")) {
|
||||
BOOST_LOG(error) << "NvEnc: missing CUDA functions in " << dll_name;
|
||||
FreeLibrary(cuda_functions.dll);
|
||||
cuda_functions = {};
|
||||
}
|
||||
}
|
||||
else {
|
||||
BOOST_LOG(debug) << "NvEnc: couldn't load CUDA dynamic library " << dll_name;
|
||||
}
|
||||
|
||||
if (cuda_functions.dll) {
|
||||
IDXGIDevicePtr dxgi_device;
|
||||
IDXGIAdapterPtr dxgi_adapter;
|
||||
if (d3d_device &&
|
||||
SUCCEEDED(d3d_device->QueryInterface(IID_PPV_ARGS(&dxgi_device))) &&
|
||||
SUCCEEDED(dxgi_device->GetAdapter(&dxgi_adapter))) {
|
||||
CUdevice cuda_device;
|
||||
if (cuda_succeeded(cuda_functions.cuInit(0)) &&
|
||||
cuda_succeeded(cuda_functions.cuD3D11GetDevice(&cuda_device, dxgi_adapter)) &&
|
||||
cuda_succeeded(cuda_functions.cuCtxCreate(&cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, cuda_device)) &&
|
||||
cuda_succeeded(cuda_functions.cuCtxPopCurrent(&cuda_context))) {
|
||||
device = cuda_context;
|
||||
}
|
||||
else {
|
||||
BOOST_LOG(error) << "NvEnc: couldn't create CUDA interop context: error " << last_cuda_error;
|
||||
}
|
||||
}
|
||||
else {
|
||||
BOOST_LOG(error) << "NvEnc: couldn't get DXGI adapter for CUDA interop";
|
||||
}
|
||||
}
|
||||
|
||||
return device != nullptr;
|
||||
}
|
||||
|
||||
bool
|
||||
nvenc_d3d11_on_cuda::create_and_register_input_buffer() {
|
||||
if (encoder_params.buffer_format != NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
|
||||
BOOST_LOG(error) << "NvEnc: CUDA interop is expected to be used only for 10-bit 4:4:4 encoding";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!d3d_input_texture) {
|
||||
D3D11_TEXTURE2D_DESC desc = {};
|
||||
desc.Width = encoder_params.width;
|
||||
desc.Height = encoder_params.height * 3; // Planar YUV
|
||||
desc.MipLevels = 1;
|
||||
desc.ArraySize = 1;
|
||||
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
|
||||
|
||||
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
|
||||
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto autopop_context = push_context();
|
||||
if (!autopop_context) return false;
|
||||
|
||||
if (!cuda_d3d_input_texture) {
|
||||
if (cuda_failed(cuda_functions.cuGraphicsD3D11RegisterResource(
|
||||
&cuda_d3d_input_texture,
|
||||
d3d_input_texture,
|
||||
CU_GRAPHICS_REGISTER_FLAGS_NONE))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuGraphicsD3D11RegisterResource() failed: error " << last_cuda_error;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cuda_surface) {
|
||||
if (cuda_failed(cuda_functions.cuMemAllocPitch(
|
||||
&cuda_surface,
|
||||
&cuda_surface_pitch,
|
||||
// Planar 16-bit YUV
|
||||
encoder_params.width * 2,
|
||||
encoder_params.height * 3, 16))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuMemAllocPitch() failed: error " << last_cuda_error;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!registered_input_buffer) {
|
||||
NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
|
||||
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
|
||||
register_resource.width = encoder_params.width;
|
||||
register_resource.height = encoder_params.height;
|
||||
register_resource.pitch = cuda_surface_pitch;
|
||||
register_resource.resourceToRegister = (void *) cuda_surface;
|
||||
register_resource.bufferFormat = encoder_params.buffer_format;
|
||||
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
|
||||
|
||||
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, ®ister_resource))) {
|
||||
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
registered_input_buffer = register_resource.registeredResource;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
nvenc_d3d11_on_cuda::synchronize_input_buffer() {
|
||||
auto autopop_context = push_context();
|
||||
if (!autopop_context) return false;
|
||||
|
||||
if (cuda_failed(cuda_functions.cuGraphicsMapResources(1, &cuda_d3d_input_texture, 0))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuGraphicsMapResources() failed: error " << last_cuda_error;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto unmap = [&]() -> bool {
|
||||
if (cuda_failed(cuda_functions.cuGraphicsUnmapResources(1, &cuda_d3d_input_texture, 0))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuGraphicsUnmapResources() failed: error " << last_cuda_error;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
auto unmap_guard = util::fail_guard(unmap);
|
||||
|
||||
CUarray input_texture_array;
|
||||
if (cuda_failed(cuda_functions.cuGraphicsSubResourceGetMappedArray(&input_texture_array, cuda_d3d_input_texture, 0, 0))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuGraphicsSubResourceGetMappedArray() failed: error " << last_cuda_error;
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
CUDA_MEMCPY2D copy_params = {};
|
||||
copy_params.srcMemoryType = CU_MEMORYTYPE_ARRAY;
|
||||
copy_params.srcArray = input_texture_array;
|
||||
copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
copy_params.dstDevice = cuda_surface;
|
||||
copy_params.dstPitch = cuda_surface_pitch;
|
||||
// Planar 16-bit YUV
|
||||
copy_params.WidthInBytes = encoder_params.width * 2;
|
||||
copy_params.Height = encoder_params.height * 3;
|
||||
|
||||
if (cuda_failed(cuda_functions.cuMemcpy2D(©_params))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuMemcpy2D() failed: error " << last_cuda_error;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
unmap_guard.disable();
|
||||
return unmap();
|
||||
}
|
||||
|
||||
bool
|
||||
nvenc_d3d11_on_cuda::cuda_succeeded(CUresult result) {
|
||||
last_cuda_error = result;
|
||||
return result == CUDA_SUCCESS;
|
||||
}
|
||||
|
||||
bool
|
||||
nvenc_d3d11_on_cuda::cuda_failed(CUresult result) {
|
||||
last_cuda_error = result;
|
||||
return result != CUDA_SUCCESS;
|
||||
}
|
||||
|
||||
nvenc_d3d11_on_cuda::autopop_context::~autopop_context() {
|
||||
if (pushed_context) {
|
||||
CUcontext popped_context;
|
||||
if (parent.cuda_failed(parent.cuda_functions.cuCtxPopCurrent(&popped_context))) {
|
||||
BOOST_LOG(error) << "NvEnc: cuCtxPopCurrent() failed: error " << parent.last_cuda_error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nvenc_d3d11_on_cuda::autopop_context
|
||||
nvenc_d3d11_on_cuda::push_context() {
|
||||
if (cuda_context &&
|
||||
cuda_succeeded(cuda_functions.cuCtxPushCurrent(cuda_context))) {
|
||||
return { *this, cuda_context };
|
||||
}
|
||||
else {
|
||||
BOOST_LOG(error) << "NvEnc: cuCtxPushCurrent() failed: error " << last_cuda_error;
|
||||
return { *this, nullptr };
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace nvenc
|
||||
#endif
|
96
src/nvenc/nvenc_d3d11_on_cuda.h
Normal file
96
src/nvenc/nvenc_d3d11_on_cuda.h
Normal file
@ -0,0 +1,96 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_d3d11_on_cuda.h
|
||||
* @brief Declarations for CUDA NVENC encoder with Direct3D11 input surfaces.
|
||||
*/
|
||||
#pragma once
|
||||
#ifdef _WIN32
|
||||
|
||||
#include "nvenc_d3d11.h"
|
||||
|
||||
#include <ffnvcodec/dynlink_cuda.h>
|
||||
|
||||
namespace nvenc {
|
||||
|
||||
/**
|
||||
* @brief Interop Direct3D11 on CUDA NVENC encoder.
|
||||
* Input surface is Direct3D11, encoding is performed by CUDA.
|
||||
*/
|
||||
class nvenc_d3d11_on_cuda final: public nvenc_d3d11 {
|
||||
public:
|
||||
/**
|
||||
* @param d3d_device Direct3D11 device that will create input surface texture.
|
||||
* CUDA encoding device will be derived from it.
|
||||
*/
|
||||
explicit nvenc_d3d11_on_cuda(ID3D11Device *d3d_device);
|
||||
~nvenc_d3d11_on_cuda();
|
||||
|
||||
ID3D11Texture2D *
|
||||
get_input_texture() override;
|
||||
|
||||
private:
|
||||
bool
|
||||
init_library() override;
|
||||
|
||||
bool
|
||||
create_and_register_input_buffer() override;
|
||||
|
||||
bool
|
||||
synchronize_input_buffer() override;
|
||||
|
||||
bool
|
||||
cuda_succeeded(CUresult result);
|
||||
|
||||
bool
|
||||
cuda_failed(CUresult result);
|
||||
|
||||
struct autopop_context {
|
||||
autopop_context(nvenc_d3d11_on_cuda &parent, CUcontext pushed_context):
|
||||
parent(parent),
|
||||
pushed_context(pushed_context) {
|
||||
}
|
||||
|
||||
~autopop_context();
|
||||
|
||||
explicit
|
||||
operator bool() const {
|
||||
return pushed_context != nullptr;
|
||||
}
|
||||
|
||||
nvenc_d3d11_on_cuda &parent;
|
||||
CUcontext pushed_context = nullptr;
|
||||
};
|
||||
|
||||
autopop_context
|
||||
push_context();
|
||||
|
||||
HMODULE dll = NULL;
|
||||
const ID3D11DevicePtr d3d_device;
|
||||
ID3D11Texture2DPtr d3d_input_texture;
|
||||
|
||||
struct {
|
||||
tcuInit *cuInit;
|
||||
tcuD3D11GetDevice *cuD3D11GetDevice;
|
||||
tcuCtxCreate_v2 *cuCtxCreate;
|
||||
tcuCtxDestroy_v2 *cuCtxDestroy;
|
||||
tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
|
||||
tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
|
||||
tcuMemAllocPitch_v2 *cuMemAllocPitch;
|
||||
tcuMemFree_v2 *cuMemFree;
|
||||
tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource;
|
||||
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
|
||||
tcuGraphicsMapResources *cuGraphicsMapResources;
|
||||
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
|
||||
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
|
||||
tcuMemcpy2D_v2 *cuMemcpy2D;
|
||||
HMODULE dll;
|
||||
} cuda_functions = {};
|
||||
|
||||
CUresult last_cuda_error = CUDA_SUCCESS;
|
||||
CUcontext cuda_context = nullptr;
|
||||
CUgraphicsResource cuda_d3d_input_texture = nullptr;
|
||||
CUdeviceptr cuda_surface = 0;
|
||||
size_t cuda_surface_pitch = 0;
|
||||
};
|
||||
|
||||
} // namespace nvenc
|
||||
#endif
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_encoded_frame.h
|
||||
* @brief Declarations for base NVENC encoded frame.
|
||||
* @brief Declarations for NVENC encoded frame.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
@ -8,10 +8,15 @@
|
||||
#include <vector>
|
||||
|
||||
namespace nvenc {
|
||||
|
||||
/**
|
||||
* @brief Encoded frame.
|
||||
*/
|
||||
struct nvenc_encoded_frame {
|
||||
std::vector<uint8_t> data;
|
||||
uint64_t frame_index = 0;
|
||||
bool idr = false;
|
||||
bool after_ref_frame_invalidation = false;
|
||||
};
|
||||
|
||||
} // namespace nvenc
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_utils.cpp
|
||||
* @brief Definitions for base NVENC utilities.
|
||||
* @brief Definitions for NVENC utilities.
|
||||
*/
|
||||
#include <cassert>
|
||||
|
||||
@ -18,6 +18,12 @@ namespace nvenc {
|
||||
case NV_ENC_BUFFER_FORMAT_NV12:
|
||||
return DXGI_FORMAT_NV12;
|
||||
|
||||
case NV_ENC_BUFFER_FORMAT_AYUV:
|
||||
return DXGI_FORMAT_AYUV;
|
||||
|
||||
case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
|
||||
return DXGI_FORMAT_R16_UINT;
|
||||
|
||||
default:
|
||||
return DXGI_FORMAT_UNKNOWN;
|
||||
}
|
||||
@ -33,6 +39,12 @@ namespace nvenc {
|
||||
case platf::pix_fmt_e::p010:
|
||||
return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
|
||||
|
||||
case platf::pix_fmt_e::ayuv:
|
||||
return NV_ENC_BUFFER_FORMAT_AYUV;
|
||||
|
||||
case platf::pix_fmt_e::yuv444p16:
|
||||
return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
|
||||
|
||||
default:
|
||||
return NV_ENC_BUFFER_FORMAT_UNDEFINED;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
* @file src/nvenc/nvenc_utils.h
|
||||
* @brief Declarations for base NVENC utilities.
|
||||
* @brief Declarations for NVENC utilities.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
|
@ -720,17 +720,32 @@ namespace nvhttp {
|
||||
}
|
||||
|
||||
uint32_t codec_mode_flags = SCM_H264;
|
||||
if (video::last_encoder_probe_supported_yuv444_for_codec[0]) {
|
||||
codec_mode_flags |= SCM_H264_HIGH8_444;
|
||||
}
|
||||
if (video::active_hevc_mode >= 2) {
|
||||
codec_mode_flags |= SCM_HEVC;
|
||||
if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
|
||||
codec_mode_flags |= SCM_HEVC_REXT8_444;
|
||||
}
|
||||
}
|
||||
if (video::active_hevc_mode >= 3) {
|
||||
codec_mode_flags |= SCM_HEVC_MAIN10;
|
||||
if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
|
||||
codec_mode_flags |= SCM_HEVC_REXT10_444;
|
||||
}
|
||||
}
|
||||
if (video::active_av1_mode >= 2) {
|
||||
codec_mode_flags |= SCM_AV1_MAIN8;
|
||||
if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
|
||||
codec_mode_flags |= SCM_AV1_HIGH8_444;
|
||||
}
|
||||
}
|
||||
if (video::active_av1_mode >= 3) {
|
||||
codec_mode_flags |= SCM_AV1_MAIN10;
|
||||
if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
|
||||
codec_mode_flags |= SCM_AV1_HIGH10_444;
|
||||
}
|
||||
}
|
||||
tree.put("root.ServerCodecModeSupport", codec_mode_flags);
|
||||
|
||||
|
@ -209,6 +209,9 @@ namespace platf {
|
||||
yuv420p10, ///< YUV 4:2:0 10-bit
|
||||
nv12, ///< NV12
|
||||
p010, ///< P010
|
||||
ayuv, ///< AYUV
|
||||
yuv444p16, ///< Planar 10-bit (shifted to 16-bit) YUV 4:4:4
|
||||
y410, ///< Y410
|
||||
unknown ///< Unknown
|
||||
};
|
||||
|
||||
@ -223,6 +226,9 @@ namespace platf {
|
||||
_CONVERT(yuv420p10);
|
||||
_CONVERT(nv12);
|
||||
_CONVERT(p010);
|
||||
_CONVERT(ayuv);
|
||||
_CONVERT(yuv444p16);
|
||||
_CONVERT(y410);
|
||||
_CONVERT(unknown);
|
||||
}
|
||||
#undef _CONVERT
|
||||
|
@ -17,7 +17,8 @@ extern "C" {
|
||||
#include "src/config.h"
|
||||
#include "src/logging.h"
|
||||
#include "src/nvenc/nvenc_config.h"
|
||||
#include "src/nvenc/nvenc_d3d11.h"
|
||||
#include "src/nvenc/nvenc_d3d11_native.h"
|
||||
#include "src/nvenc/nvenc_d3d11_on_cuda.h"
|
||||
#include "src/nvenc/nvenc_utils.h"
|
||||
#include "src/video.h"
|
||||
|
||||
@ -110,6 +111,16 @@ namespace platf::dxgi {
|
||||
blob_t convert_yuv420_planar_y_ps_linear_hlsl;
|
||||
blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl;
|
||||
blob_t convert_yuv420_planar_y_vs_hlsl;
|
||||
blob_t convert_yuv444_packed_ayuv_ps_hlsl;
|
||||
blob_t convert_yuv444_packed_ayuv_ps_linear_hlsl;
|
||||
blob_t convert_yuv444_packed_vs_hlsl;
|
||||
blob_t convert_yuv444_planar_ps_hlsl;
|
||||
blob_t convert_yuv444_planar_ps_linear_hlsl;
|
||||
blob_t convert_yuv444_planar_ps_perceptual_quantizer_hlsl;
|
||||
blob_t convert_yuv444_packed_y410_ps_hlsl;
|
||||
blob_t convert_yuv444_packed_y410_ps_linear_hlsl;
|
||||
blob_t convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl;
|
||||
blob_t convert_yuv444_planar_vs_hlsl;
|
||||
blob_t cursor_ps_hlsl;
|
||||
blob_t cursor_ps_normalize_white_hlsl;
|
||||
blob_t cursor_vs_hlsl;
|
||||
@ -402,18 +413,38 @@ namespace platf::dxgi {
|
||||
return -1;
|
||||
}
|
||||
|
||||
device_ctx->OMSetRenderTargets(1, &nv12_Y_rt, nullptr);
|
||||
device_ctx->VSSetShader(scene_vs.get(), nullptr, 0);
|
||||
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_fp16_ps.get() : convert_Y_ps.get(), nullptr, 0);
|
||||
device_ctx->RSSetViewports(1, &outY_view);
|
||||
device_ctx->PSSetShaderResources(0, 1, &img_ctx.encoder_input_res);
|
||||
device_ctx->Draw(3, 0);
|
||||
auto draw = [&](auto &input, auto &y_or_yuv_viewports, auto &uv_viewport) {
|
||||
device_ctx->PSSetShaderResources(0, 1, &input);
|
||||
|
||||
device_ctx->OMSetRenderTargets(1, &nv12_UV_rt, nullptr);
|
||||
device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0);
|
||||
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0);
|
||||
device_ctx->RSSetViewports(1, &outUV_view);
|
||||
device_ctx->Draw(3, 0);
|
||||
// Draw Y/YUV
|
||||
device_ctx->OMSetRenderTargets(1, &out_Y_or_YUV_rtv, nullptr);
|
||||
device_ctx->VSSetShader(convert_Y_or_YUV_vs.get(), nullptr, 0);
|
||||
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_or_YUV_fp16_ps.get() : convert_Y_or_YUV_ps.get(), nullptr, 0);
|
||||
auto viewport_count = (format == DXGI_FORMAT_R16_UINT) ? 3 : 1;
|
||||
assert(viewport_count <= y_or_yuv_viewports.size());
|
||||
device_ctx->RSSetViewports(viewport_count, y_or_yuv_viewports.data());
|
||||
device_ctx->Draw(3 * viewport_count, 0); // vertex shader will spread vertices across viewports
|
||||
|
||||
// Draw UV if needed
|
||||
if (out_UV_rtv) {
|
||||
assert(format == DXGI_FORMAT_NV12 || format == DXGI_FORMAT_P010);
|
||||
device_ctx->OMSetRenderTargets(1, &out_UV_rtv, nullptr);
|
||||
device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0);
|
||||
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0);
|
||||
device_ctx->RSSetViewports(1, &uv_viewport);
|
||||
device_ctx->Draw(3, 0);
|
||||
}
|
||||
};
|
||||
|
||||
// Clear render target view(s) once so that the aspect ratio mismatch "bars" appear black
|
||||
if (!rtvs_cleared) {
|
||||
auto black = create_black_texture_for_rtv_clear();
|
||||
if (black) draw(black, out_Y_or_YUV_viewports_for_clear, out_UV_viewport_for_clear);
|
||||
rtvs_cleared = true;
|
||||
}
|
||||
|
||||
// Draw captured frame
|
||||
draw(img_ctx.encoder_input_res, out_Y_or_YUV_viewports, out_UV_viewport);
|
||||
|
||||
// Release encoder mutex to allow capture code to reuse this image
|
||||
img_ctx.encoder_mutex->ReleaseSync(0);
|
||||
@ -429,6 +460,12 @@ namespace platf::dxgi {
|
||||
apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) {
|
||||
auto color_vectors = ::video::color_vectors_from_colorspace(colorspace);
|
||||
|
||||
if (format == DXGI_FORMAT_AYUV ||
|
||||
format == DXGI_FORMAT_R16_UINT ||
|
||||
format == DXGI_FORMAT_Y410) {
|
||||
color_vectors = ::video::new_color_vectors_from_colorspace(colorspace);
|
||||
}
|
||||
|
||||
if (!color_vectors) {
|
||||
BOOST_LOG(error) << "No vector data for colorspace"sv;
|
||||
return;
|
||||
@ -440,6 +477,7 @@ namespace platf::dxgi {
|
||||
return;
|
||||
}
|
||||
|
||||
device_ctx->VSSetConstantBuffers(3, 1, &color_matrix);
|
||||
device_ctx->PSSetConstantBuffers(0, 1, &color_matrix);
|
||||
this->color_matrix = std::move(color_matrix);
|
||||
}
|
||||
@ -465,8 +503,20 @@ namespace platf::dxgi {
|
||||
auto offsetX = (out_width - out_width_f) / 2;
|
||||
auto offsetY = (out_height - out_height_f) / 2;
|
||||
|
||||
outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };
|
||||
outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };
|
||||
out_Y_or_YUV_viewports[0] = { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; // Y plane
|
||||
out_Y_or_YUV_viewports[1] = out_Y_or_YUV_viewports[0]; // U plane
|
||||
out_Y_or_YUV_viewports[1].TopLeftY += out_height;
|
||||
out_Y_or_YUV_viewports[2] = out_Y_or_YUV_viewports[1]; // V plane
|
||||
out_Y_or_YUV_viewports[2].TopLeftY += out_height;
|
||||
|
||||
out_Y_or_YUV_viewports_for_clear[0] = { 0, 0, (float) out_width, (float) out_height, 0.0f, 1.0f }; // Y plane
|
||||
out_Y_or_YUV_viewports_for_clear[1] = out_Y_or_YUV_viewports_for_clear[0]; // U plane
|
||||
out_Y_or_YUV_viewports_for_clear[1].TopLeftY += out_height;
|
||||
out_Y_or_YUV_viewports_for_clear[2] = out_Y_or_YUV_viewports_for_clear[1]; // V plane
|
||||
out_Y_or_YUV_viewports_for_clear[2].TopLeftY += out_height;
|
||||
|
||||
out_UV_viewport = { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };
|
||||
out_UV_viewport_for_clear = { 0, 0, (float) out_width / 2, (float) out_height / 2, 0.0f, 1.0f };
|
||||
|
||||
float subsample_offset_in[16 / sizeof(float)] { 1.0f / (float) out_width_f, 1.0f / (float) out_height_f }; // aligned to 16-byte
|
||||
subsample_offset = make_buffer(device.get(), subsample_offset_in);
|
||||
@ -488,36 +538,106 @@ namespace platf::dxgi {
|
||||
device_ctx->VSSetConstantBuffers(1, 1, &rotation);
|
||||
}
|
||||
|
||||
D3D11_RENDER_TARGET_VIEW_DESC nv12_rt_desc {
|
||||
format == DXGI_FORMAT_P010 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM,
|
||||
D3D11_RTV_DIMENSION_TEXTURE2D
|
||||
DXGI_FORMAT rtv_Y_or_YUV_format = DXGI_FORMAT_UNKNOWN;
|
||||
DXGI_FORMAT rtv_UV_format = DXGI_FORMAT_UNKNOWN;
|
||||
bool rtv_simple_clear = false;
|
||||
|
||||
switch (format) {
|
||||
case DXGI_FORMAT_NV12:
|
||||
rtv_Y_or_YUV_format = DXGI_FORMAT_R8_UNORM;
|
||||
rtv_UV_format = DXGI_FORMAT_R8G8_UNORM;
|
||||
rtv_simple_clear = true;
|
||||
break;
|
||||
|
||||
case DXGI_FORMAT_P010:
|
||||
rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UNORM;
|
||||
rtv_UV_format = DXGI_FORMAT_R16G16_UNORM;
|
||||
rtv_simple_clear = true;
|
||||
break;
|
||||
|
||||
case DXGI_FORMAT_AYUV:
|
||||
rtv_Y_or_YUV_format = DXGI_FORMAT_R8G8B8A8_UINT;
|
||||
break;
|
||||
|
||||
case DXGI_FORMAT_R16_UINT:
|
||||
rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UINT;
|
||||
break;
|
||||
|
||||
case DXGI_FORMAT_Y410:
|
||||
rtv_Y_or_YUV_format = DXGI_FORMAT_R10G10B10A2_UINT;
|
||||
break;
|
||||
|
||||
default:
|
||||
BOOST_LOG(error) << "Unable to create render target views because of the unrecognized surface format";
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto create_rtv = [&](auto &rt, DXGI_FORMAT rt_format) -> bool {
|
||||
D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
|
||||
rtv_desc.Format = rt_format;
|
||||
rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
|
||||
|
||||
auto status = device->CreateRenderTargetView(output_texture.get(), &rtv_desc, &rt);
|
||||
if (FAILED(status)) {
|
||||
BOOST_LOG(error) << "Failed to create render target view: " << util::log_hex(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
|
||||
if (FAILED(status)) {
|
||||
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
// Create Y/YUV render target view
|
||||
if (!create_rtv(out_Y_or_YUV_rtv, rtv_Y_or_YUV_format)) return -1;
|
||||
|
||||
// Create UV render target view if needed
|
||||
if (rtv_UV_format != DXGI_FORMAT_UNKNOWN && !create_rtv(out_UV_rtv, rtv_UV_format)) return -1;
|
||||
|
||||
if (rtv_simple_clear) {
|
||||
// Clear the RTVs to ensure the aspect ratio padding is black
|
||||
const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
||||
device_ctx->ClearRenderTargetView(out_Y_or_YUV_rtv.get(), y_black);
|
||||
if (out_UV_rtv) {
|
||||
const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
device_ctx->ClearRenderTargetView(out_UV_rtv.get(), uv_black);
|
||||
}
|
||||
rtvs_cleared = true;
|
||||
}
|
||||
|
||||
nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
|
||||
|
||||
status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
|
||||
if (FAILED(status)) {
|
||||
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
else {
|
||||
// Can't use ClearRenderTargetView(), will clear on first convert()
|
||||
rtvs_cleared = false;
|
||||
}
|
||||
|
||||
// Clear the RTVs to ensure the aspect ratio padding is black
|
||||
const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
||||
device_ctx->ClearRenderTargetView(nv12_Y_rt.get(), y_black);
|
||||
const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
device_ctx->ClearRenderTargetView(nv12_UV_rt.get(), uv_black);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
|
||||
switch (pix_fmt) {
|
||||
case pix_fmt_e::nv12:
|
||||
format = DXGI_FORMAT_NV12;
|
||||
break;
|
||||
|
||||
case pix_fmt_e::p010:
|
||||
format = DXGI_FORMAT_P010;
|
||||
break;
|
||||
|
||||
case pix_fmt_e::ayuv:
|
||||
format = DXGI_FORMAT_AYUV;
|
||||
break;
|
||||
|
||||
case pix_fmt_e::yuv444p16:
|
||||
format = DXGI_FORMAT_R16_UINT;
|
||||
break;
|
||||
|
||||
case pix_fmt_e::y410:
|
||||
format = DXGI_FORMAT_Y410;
|
||||
break;
|
||||
|
||||
default:
|
||||
BOOST_LOG(error) << "D3D11 backend doesn't support pixel format: " << from_pix_fmt(pix_fmt);
|
||||
return -1;
|
||||
}
|
||||
|
||||
D3D_FEATURE_LEVEL featureLevels[] {
|
||||
D3D_FEATURE_LEVEL_11_1,
|
||||
D3D_FEATURE_LEVEL_11_0,
|
||||
@ -556,61 +676,82 @@ namespace platf::dxgi {
|
||||
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
|
||||
}
|
||||
|
||||
format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
|
||||
status = device->CreateVertexShader(convert_yuv420_planar_y_vs_hlsl->GetBufferPointer(), convert_yuv420_planar_y_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
|
||||
if (status) {
|
||||
BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
}
|
||||
#define create_vertex_shader_helper(x, y) \
|
||||
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
|
||||
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
|
||||
return -1; \
|
||||
}
|
||||
#define create_pixel_shader_helper(x, y) \
|
||||
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
|
||||
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
status = device->CreateVertexShader(convert_yuv420_packed_uv_type0_vs_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_vs_hlsl->GetBufferSize(), nullptr, &convert_UV_vs);
|
||||
if (status) {
|
||||
BOOST_LOG(error) << "Failed to create convertUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
}
|
||||
switch (format) {
|
||||
case DXGI_FORMAT_NV12:
|
||||
// Semi-planar 8-bit YUV 4:2:0
|
||||
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
|
||||
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
|
||||
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
|
||||
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
|
||||
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
|
||||
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
|
||||
break;
|
||||
|
||||
// If the display is in HDR and we're streaming HDR, we'll be converting scRGB to SMPTE 2084 PQ.
|
||||
if (format == DXGI_FORMAT_P010 && display->is_hdr()) {
|
||||
status = device->CreatePixelShader(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps);
|
||||
if (status) {
|
||||
BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
case DXGI_FORMAT_P010:
|
||||
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
|
||||
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
|
||||
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
|
||||
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
|
||||
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
|
||||
if (display->is_hdr()) {
|
||||
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
|
||||
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
|
||||
}
|
||||
else {
|
||||
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
|
||||
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
|
||||
}
|
||||
break;
|
||||
|
||||
case DXGI_FORMAT_R16_UINT:
|
||||
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
|
||||
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
|
||||
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
|
||||
if (display->is_hdr()) {
|
||||
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
|
||||
}
|
||||
else {
|
||||
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
|
||||
}
|
||||
break;
|
||||
|
||||
case DXGI_FORMAT_AYUV:
|
||||
// Packed 8-bit YUV 4:4:4
|
||||
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
|
||||
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
|
||||
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
|
||||
break;
|
||||
|
||||
case DXGI_FORMAT_Y410:
|
||||
// Packed 10-bit YUV 4:4:4
|
||||
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
|
||||
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
|
||||
if (display->is_hdr()) {
|
||||
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
|
||||
}
|
||||
else {
|
||||
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
|
||||
return -1;
|
||||
}
|
||||
|
||||
status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps);
|
||||
if (status) {
|
||||
BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// If the display is in Advanced Color mode, the desktop format will be scRGB FP16.
|
||||
// scRGB uses linear gamma, so we must use our linear to sRGB conversion shaders.
|
||||
status = device->CreatePixelShader(convert_yuv420_planar_y_ps_linear_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_linear_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps);
|
||||
if (status) {
|
||||
BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
}
|
||||
|
||||
status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_linear_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_linear_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps);
|
||||
if (status) {
|
||||
BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// These shaders consume standard 8-bit sRGB input
|
||||
status = device->CreatePixelShader(convert_yuv420_planar_y_ps_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_ps);
|
||||
if (status) {
|
||||
BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
}
|
||||
|
||||
status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_ps);
|
||||
if (status) {
|
||||
BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
|
||||
return -1;
|
||||
}
|
||||
#undef create_vertex_shader_helper
|
||||
#undef create_pixel_shader_helper
|
||||
|
||||
auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
|
||||
if (!default_color_vectors) {
|
||||
@ -623,6 +764,7 @@ namespace platf::dxgi {
|
||||
BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
|
||||
return -1;
|
||||
}
|
||||
device_ctx->VSSetConstantBuffers(3, 1, &color_matrix);
|
||||
device_ctx->PSSetConstantBuffers(0, 1, &color_matrix);
|
||||
|
||||
this->display = std::dynamic_pointer_cast<display_base_t>(display);
|
||||
@ -653,7 +795,7 @@ namespace platf::dxgi {
|
||||
|
||||
device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
|
||||
device_ctx->PSSetSamplers(0, 1, &sampler_linear);
|
||||
device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -725,6 +867,41 @@ namespace platf::dxgi {
|
||||
return 0;
|
||||
}
|
||||
|
||||
shader_res_t
|
||||
create_black_texture_for_rtv_clear() {
|
||||
constexpr auto width = 32;
|
||||
constexpr auto height = 32;
|
||||
|
||||
D3D11_TEXTURE2D_DESC texture_desc = {};
|
||||
texture_desc.Width = width;
|
||||
texture_desc.Height = height;
|
||||
texture_desc.MipLevels = 1;
|
||||
texture_desc.ArraySize = 1;
|
||||
texture_desc.SampleDesc.Count = 1;
|
||||
texture_desc.Usage = D3D11_USAGE_IMMUTABLE;
|
||||
texture_desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
|
||||
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
|
||||
std::vector<uint8_t> mem(4 * width * height, 0);
|
||||
D3D11_SUBRESOURCE_DATA texture_data = { mem.data(), 4 * width, 0 };
|
||||
|
||||
texture2d_t texture;
|
||||
auto status = device->CreateTexture2D(&texture_desc, &texture_data, &texture);
|
||||
if (FAILED(status)) {
|
||||
BOOST_LOG(error) << "Failed to create black texture: " << util::log_hex(status);
|
||||
return {};
|
||||
}
|
||||
|
||||
shader_res_t resource_view;
|
||||
status = device->CreateShaderResourceView(texture.get(), nullptr, &resource_view);
|
||||
if (FAILED(status)) {
|
||||
BOOST_LOG(error) << "Failed to create black texture resource view: " << util::log_hex(status);
|
||||
return {};
|
||||
}
|
||||
|
||||
return resource_view;
|
||||
}
|
||||
|
||||
::video::color_t *color_p;
|
||||
|
||||
buf_t subsample_offset;
|
||||
@ -733,8 +910,9 @@ namespace platf::dxgi {
|
||||
blend_t blend_disable;
|
||||
sampler_state_t sampler_linear;
|
||||
|
||||
render_target_t nv12_Y_rt;
|
||||
render_target_t nv12_UV_rt;
|
||||
render_target_t out_Y_or_YUV_rtv;
|
||||
render_target_t out_UV_rtv;
|
||||
bool rtvs_cleared = false;
|
||||
|
||||
// d3d_img_t::id -> encoder_img_ctx_t
|
||||
// These store the encoder textures for each img_t that passes through
|
||||
@ -744,15 +922,16 @@ namespace platf::dxgi {
|
||||
|
||||
std::shared_ptr<display_base_t> display;
|
||||
|
||||
vs_t convert_Y_or_YUV_vs;
|
||||
ps_t convert_Y_or_YUV_ps;
|
||||
ps_t convert_Y_or_YUV_fp16_ps;
|
||||
|
||||
vs_t convert_UV_vs;
|
||||
ps_t convert_UV_ps;
|
||||
ps_t convert_UV_fp16_ps;
|
||||
ps_t convert_Y_ps;
|
||||
ps_t convert_Y_fp16_ps;
|
||||
vs_t scene_vs;
|
||||
|
||||
D3D11_VIEWPORT outY_view;
|
||||
D3D11_VIEWPORT outUV_view;
|
||||
std::array<D3D11_VIEWPORT, 3> out_Y_or_YUV_viewports, out_Y_or_YUV_viewports_for_clear;
|
||||
D3D11_VIEWPORT out_UV_viewport, out_UV_viewport_for_clear;
|
||||
|
||||
DXGI_FORMAT format;
|
||||
|
||||
@ -871,7 +1050,12 @@ namespace platf::dxgi {
|
||||
|
||||
if (base.init(display, adapter_p, pix_fmt)) return false;
|
||||
|
||||
nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11>(base.device.get());
|
||||
if (pix_fmt == pix_fmt_e::yuv444p16) {
|
||||
nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11_on_cuda>(base.device.get());
|
||||
}
|
||||
else {
|
||||
nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11_native>(base.device.get());
|
||||
}
|
||||
nvenc = nvenc_d3d.get();
|
||||
|
||||
return true;
|
||||
@ -1409,7 +1593,7 @@ namespace platf::dxgi {
|
||||
|
||||
device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
|
||||
device_ctx->PSSetSamplers(0, 1, &sampler_linear);
|
||||
device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1703,20 +1887,10 @@ namespace platf::dxgi {
|
||||
|
||||
std::unique_ptr<avcodec_encode_device_t>
|
||||
display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) {
|
||||
if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
|
||||
BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']';
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto device = std::make_unique<d3d_avcodec_encode_device_t>();
|
||||
|
||||
auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt);
|
||||
|
||||
if (ret) {
|
||||
if (device->init(shared_from_this(), adapter.get(), pix_fmt) != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
@ -1746,6 +1920,16 @@ namespace platf::dxgi {
|
||||
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear);
|
||||
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer);
|
||||
compile_vertex_shader_helper(convert_yuv420_planar_y_vs);
|
||||
compile_pixel_shader_helper(convert_yuv444_packed_ayuv_ps);
|
||||
compile_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear);
|
||||
compile_vertex_shader_helper(convert_yuv444_packed_vs);
|
||||
compile_pixel_shader_helper(convert_yuv444_planar_ps);
|
||||
compile_pixel_shader_helper(convert_yuv444_planar_ps_linear);
|
||||
compile_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer);
|
||||
compile_pixel_shader_helper(convert_yuv444_packed_y410_ps);
|
||||
compile_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear);
|
||||
compile_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer);
|
||||
compile_vertex_shader_helper(convert_yuv444_planar_vs);
|
||||
compile_pixel_shader_helper(cursor_ps);
|
||||
compile_pixel_shader_helper(cursor_ps_normalize_white);
|
||||
compile_vertex_shader_helper(cursor_vs);
|
||||
|
@ -978,6 +978,7 @@ namespace rtsp_stream {
|
||||
args.try_emplace("x-nv-aqos.qosTrafficType"sv, "4"sv);
|
||||
args.try_emplace("x-ml-video.configuredBitrateKbps"sv, "0"sv);
|
||||
args.try_emplace("x-ss-general.encryptionEnabled"sv, "0"sv);
|
||||
args.try_emplace("x-ss-video[0].chromaSamplingType"sv, "0"sv);
|
||||
|
||||
stream::config_t config;
|
||||
|
||||
@ -1013,6 +1014,7 @@ namespace rtsp_stream {
|
||||
config.monitor.encoderCscMode = util::from_view(args.at("x-nv-video[0].encoderCscMode"sv));
|
||||
config.monitor.videoFormat = util::from_view(args.at("x-nv-vqos[0].bitStreamFormat"sv));
|
||||
config.monitor.dynamicRange = util::from_view(args.at("x-nv-video[0].dynamicRangeMode"sv));
|
||||
config.monitor.chromaSamplingType = util::from_view(args.at("x-ss-video[0].chromaSamplingType"sv));
|
||||
|
||||
configuredBitrateKbps = util::from_view(args.at("x-ml-video.configuredBitrateKbps"sv));
|
||||
}
|
||||
|
@ -267,6 +267,12 @@ namespace util {
|
||||
return Hex<T>(elem, rev);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string
|
||||
log_hex(const T &value) {
|
||||
return "0x" + Hex<T>(value, false).to_string();
|
||||
}
|
||||
|
||||
template <class It>
|
||||
std::string
|
||||
hex_vec(It begin, It end, bool rev = false) {
|
||||
|
329
src/video.cpp
329
src/video.cpp
@ -53,31 +53,36 @@ namespace video {
|
||||
namespace nv {
|
||||
|
||||
enum class profile_h264_e : int {
|
||||
baseline, ///< Baseline profile
|
||||
main, ///< Main profile
|
||||
high, ///< High profile
|
||||
high_444p, ///< High 4:4:4 Predictive profile
|
||||
high = 2, ///< High profile
|
||||
high_444p = 3, ///< High 4:4:4 Predictive profile
|
||||
};
|
||||
|
||||
enum class profile_hevc_e : int {
|
||||
main, ///< Main profile
|
||||
main_10, ///< Main 10 profile
|
||||
rext, ///< Rext profile
|
||||
main = 0, ///< Main profile
|
||||
main_10 = 1, ///< Main 10 profile
|
||||
rext = 2, ///< Rext profile
|
||||
};
|
||||
|
||||
} // namespace nv
|
||||
|
||||
namespace qsv {
|
||||
|
||||
enum class profile_h264_e : int {
|
||||
baseline = 66, ///< Baseline profile
|
||||
main = 77, ///< Main profile
|
||||
high = 100, ///< High profile
|
||||
high_444p = 244, ///< High 4:4:4 Predictive profile
|
||||
};
|
||||
|
||||
enum class profile_hevc_e : int {
|
||||
main = 1, ///< Main profile
|
||||
main_10 = 2, ///< Main 10 profile
|
||||
rext = 4, ///< RExt profile
|
||||
};
|
||||
|
||||
enum class profile_av1_e : int {
|
||||
main = 1, ///< Main profile
|
||||
high = 2, ///< High profile
|
||||
};
|
||||
|
||||
} // namespace qsv
|
||||
|
||||
util::Either<avcodec_buffer_t, int>
|
||||
@ -274,6 +279,7 @@ namespace video {
|
||||
NO_RC_BUF_LIMIT = 1 << 7, ///< Don't set rc_buffer_size
|
||||
REF_FRAMES_INVALIDATION = 1 << 8, ///< Support reference frames invalidation
|
||||
ALWAYS_REPROBE = 1 << 9, ///< This is an encoder of last resort and we want to aggressively probe for a better one
|
||||
YUV444_SUPPORT = 1 << 10, ///< Encoder may support 4:4:4 chroma sampling depending on hardware
|
||||
};
|
||||
|
||||
class avcodec_encode_session_t: public encode_session_t {
|
||||
@ -447,44 +453,39 @@ namespace video {
|
||||
"nvenc"sv,
|
||||
std::make_unique<encoder_platform_formats_nvenc>(
|
||||
platf::mem_type_e::dxgi,
|
||||
platf::pix_fmt_e::nv12, platf::pix_fmt_e::p010),
|
||||
platf::pix_fmt_e::nv12, platf::pix_fmt_e::p010,
|
||||
platf::pix_fmt_e::ayuv, platf::pix_fmt_e::yuv444p16),
|
||||
{
|
||||
// Common options
|
||||
{},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{},
|
||||
{}, // Common options
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"av1_nvenc"s,
|
||||
},
|
||||
{
|
||||
// Common options
|
||||
{},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{},
|
||||
{}, // Common options
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"hevc_nvenc"s,
|
||||
},
|
||||
{
|
||||
// Common options
|
||||
{},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{},
|
||||
{}, // Common options
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"h264_nvenc"s,
|
||||
},
|
||||
PARALLEL_ENCODING | REF_FRAMES_INVALIDATION // flags
|
||||
PARALLEL_ENCODING | REF_FRAMES_INVALIDATION | YUV444_SUPPORT // flags
|
||||
};
|
||||
#elif !defined(__APPLE__)
|
||||
encoder_t nvenc {
|
||||
@ -498,6 +499,7 @@ namespace video {
|
||||
AV_PIX_FMT_CUDA,
|
||||
#endif
|
||||
AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
|
||||
AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
|
||||
#ifdef _WIN32
|
||||
dxgi_init_avcodec_hardware_input_buffer
|
||||
#else
|
||||
@ -516,12 +518,11 @@ namespace video {
|
||||
{ "multipass"s, &config::video.nv_legacy.multipass },
|
||||
{ "aq"s, &config::video.nv_legacy.aq },
|
||||
},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"av1_nvenc"s,
|
||||
},
|
||||
@ -537,14 +538,16 @@ namespace video {
|
||||
{ "multipass"s, &config::video.nv_legacy.multipass },
|
||||
{ "aq"s, &config::video.nv_legacy.aq },
|
||||
},
|
||||
// SDR-specific options
|
||||
{
|
||||
// SDR-specific options
|
||||
{ "profile"s, (int) nv::profile_hevc_e::main },
|
||||
},
|
||||
// HDR-specific options
|
||||
{
|
||||
// HDR-specific options
|
||||
{ "profile"s, (int) nv::profile_hevc_e::main_10 },
|
||||
},
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"hevc_nvenc"s,
|
||||
@ -561,11 +564,13 @@ namespace video {
|
||||
{ "multipass"s, &config::video.nv_legacy.multipass },
|
||||
{ "aq"s, &config::video.nv_legacy.aq },
|
||||
},
|
||||
// SDR-specific options
|
||||
{
|
||||
// SDR-specific options
|
||||
{ "profile"s, (int) nv::profile_h264_e::high },
|
||||
},
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"h264_nvenc"s,
|
||||
@ -581,6 +586,7 @@ namespace video {
|
||||
AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_QSV,
|
||||
AV_PIX_FMT_QSV,
|
||||
AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
|
||||
AV_PIX_FMT_VUYX, AV_PIX_FMT_XV30,
|
||||
dxgi_init_avcodec_hardware_input_buffer),
|
||||
{
|
||||
// Common options
|
||||
@ -591,12 +597,23 @@ namespace video {
|
||||
{ "low_delay_brc"s, 1 },
|
||||
{ "low_power"s, 1 },
|
||||
},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{},
|
||||
{
|
||||
// SDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_av1_e::main },
|
||||
},
|
||||
{
|
||||
// HDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_av1_e::main },
|
||||
},
|
||||
{
|
||||
// YUV444 SDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_av1_e::high },
|
||||
},
|
||||
{
|
||||
// YUV444 HDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_av1_e::high },
|
||||
},
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"av1_qsv"s,
|
||||
},
|
||||
@ -611,16 +628,24 @@ namespace video {
|
||||
{ "recovery_point_sei"s, 0 },
|
||||
{ "pic_timing_sei"s, 0 },
|
||||
},
|
||||
// SDR-specific options
|
||||
{
|
||||
// SDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_hevc_e::main },
|
||||
},
|
||||
// HDR-specific options
|
||||
{
|
||||
// HDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_hevc_e::main_10 },
|
||||
},
|
||||
// Fallback options
|
||||
{
|
||||
// YUV444 SDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_hevc_e::rext },
|
||||
},
|
||||
{
|
||||
// YUV444 HDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_hevc_e::rext },
|
||||
},
|
||||
{
|
||||
// Fallback options
|
||||
{ "low_power"s, []() { return config::video.qsv.qsv_slow_hevc ? 0 : 1; } },
|
||||
},
|
||||
std::nullopt, // QP rate control fallback
|
||||
@ -640,20 +665,24 @@ namespace video {
|
||||
{ "pic_timing_sei"s, 0 },
|
||||
{ "max_dec_frame_buffering"s, 1 },
|
||||
},
|
||||
// SDR-specific options
|
||||
{
|
||||
// SDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_h264_e::high },
|
||||
},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{}, // HDR-specific options
|
||||
{
|
||||
// YUV444 SDR-specific options
|
||||
{ "profile"s, (int) qsv::profile_h264_e::high_444p },
|
||||
},
|
||||
{}, // YUV444 HDR-specific options
|
||||
{
|
||||
// Fallback options
|
||||
{ "low_power"s, 0 }, // Some old/low-end Intel GPUs don't support low power encoding
|
||||
},
|
||||
std::nullopt, // QP rate control fallback
|
||||
"h264_qsv"s,
|
||||
},
|
||||
PARALLEL_ENCODING | CBR_WITH_VBR | RELAXED_COMPLIANCE | NO_RC_BUF_LIMIT
|
||||
PARALLEL_ENCODING | CBR_WITH_VBR | RELAXED_COMPLIANCE | NO_RC_BUF_LIMIT | YUV444_SUPPORT
|
||||
};
|
||||
|
||||
encoder_t amdvce {
|
||||
@ -662,6 +691,7 @@ namespace video {
|
||||
AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_NONE,
|
||||
AV_PIX_FMT_D3D11,
|
||||
AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
|
||||
AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
|
||||
dxgi_init_avcodec_hardware_input_buffer),
|
||||
{
|
||||
// Common options
|
||||
@ -676,6 +706,8 @@ namespace video {
|
||||
},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"av1_amf"s,
|
||||
@ -698,6 +730,8 @@ namespace video {
|
||||
},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"hevc_amf"s,
|
||||
@ -716,12 +750,12 @@ namespace video {
|
||||
{ "vbaq"s, &config::video.amd.amd_vbaq },
|
||||
{ "enforce_hrd"s, &config::video.amd.amd_enforce_hrd },
|
||||
},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{
|
||||
// Fallback options
|
||||
{ "usage"s, 2 /* AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY */ }, // Workaround for https://github.com/GPUOpen-LibrariesAndSDKs/AMF/issues/410
|
||||
},
|
||||
std::nullopt, // QP rate control fallback
|
||||
@ -737,6 +771,7 @@ namespace video {
|
||||
AV_HWDEVICE_TYPE_NONE, AV_HWDEVICE_TYPE_NONE,
|
||||
AV_PIX_FMT_NONE,
|
||||
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
|
||||
AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV444P10,
|
||||
nullptr),
|
||||
{
|
||||
// libsvtav1 takes different presets than libx264/libx265.
|
||||
@ -749,6 +784,8 @@ namespace video {
|
||||
},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
|
||||
// QP rate control fallback
|
||||
@ -776,6 +813,8 @@ namespace video {
|
||||
},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"libx265"s,
|
||||
@ -788,11 +827,13 @@ namespace video {
|
||||
},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt, // QP rate control fallback
|
||||
"libx264"s,
|
||||
},
|
||||
H264_ONLY | PARALLEL_ENCODING | ALWAYS_REPROBE
|
||||
H264_ONLY | PARALLEL_ENCODING | ALWAYS_REPROBE | YUV444_SUPPORT
|
||||
};
|
||||
|
||||
#ifdef __linux__
|
||||
@ -802,6 +843,7 @@ namespace video {
|
||||
AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_NONE,
|
||||
AV_PIX_FMT_VAAPI,
|
||||
AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
|
||||
AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
|
||||
vaapi_init_avcodec_hardware_input_buffer),
|
||||
{
|
||||
// Common options
|
||||
@ -810,12 +852,12 @@ namespace video {
|
||||
{ "async_depth"s, 1 },
|
||||
{ "idr_interval"s, std::numeric_limits<int>::max() },
|
||||
},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{
|
||||
// Fallback options
|
||||
{ "low_power"s, 0 }, // Not all VAAPI drivers expose LP entrypoints
|
||||
},
|
||||
std::make_optional<encoder_t::option_t>("qp"s, &config::video.qp),
|
||||
@ -829,12 +871,12 @@ namespace video {
|
||||
{ "sei"s, 0 },
|
||||
{ "idr_interval"s, std::numeric_limits<int>::max() },
|
||||
},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{
|
||||
// Fallback options
|
||||
{ "low_power"s, 0 }, // Not all VAAPI drivers expose LP entrypoints
|
||||
},
|
||||
std::make_optional<encoder_t::option_t>("qp"s, &config::video.qp),
|
||||
@ -848,12 +890,12 @@ namespace video {
|
||||
{ "sei"s, 0 },
|
||||
{ "idr_interval"s, std::numeric_limits<int>::max() },
|
||||
},
|
||||
// SDR-specific options
|
||||
{},
|
||||
// HDR-specific options
|
||||
{},
|
||||
// Fallback options
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{
|
||||
// Fallback options
|
||||
{ "low_power"s, 0 }, // Not all VAAPI drivers expose LP entrypoints
|
||||
},
|
||||
std::make_optional<encoder_t::option_t>("qp"s, &config::video.qp),
|
||||
@ -871,6 +913,7 @@ namespace video {
|
||||
AV_HWDEVICE_TYPE_VIDEOTOOLBOX, AV_HWDEVICE_TYPE_NONE,
|
||||
AV_PIX_FMT_VIDEOTOOLBOX,
|
||||
AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
|
||||
AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
|
||||
vt_init_avcodec_hardware_input_buffer),
|
||||
{
|
||||
// Common options
|
||||
@ -882,6 +925,8 @@ namespace video {
|
||||
},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt,
|
||||
"av1_videotoolbox"s,
|
||||
@ -896,6 +941,8 @@ namespace video {
|
||||
},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{}, // Fallback options
|
||||
std::nullopt,
|
||||
"hevc_videotoolbox"s,
|
||||
@ -910,9 +957,12 @@ namespace video {
|
||||
},
|
||||
{}, // SDR-specific options
|
||||
{}, // HDR-specific options
|
||||
{}, // YUV444 SDR-specific options
|
||||
{}, // YUV444 HDR-specific options
|
||||
{
|
||||
// Fallback options
|
||||
{ "flags"s, "-low_delay" },
|
||||
}, // Fallback options
|
||||
},
|
||||
std::nullopt,
|
||||
"h264_videotoolbox"s,
|
||||
},
|
||||
@ -941,6 +991,7 @@ namespace video {
|
||||
int active_hevc_mode;
|
||||
int active_av1_mode;
|
||||
bool last_encoder_probe_supported_ref_frames_invalidation = false;
|
||||
std::array<bool, 3> last_encoder_probe_supported_yuv444_for_codec = {};
|
||||
|
||||
void
|
||||
reset_display(std::shared_ptr<platf::display_t> &disp, const platf::mem_type_e &type, const std::string &display_name, const config_t &config) {
|
||||
@ -1396,6 +1447,11 @@ namespace video {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (config.chromaSamplingType == 1 && !video_format[encoder_t::YUV444]) {
|
||||
BOOST_LOG(error) << video_format.name << ": YUV 4:4:4 not supported"sv;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto codec = avcodec_find_encoder_by_name(video_format.name.c_str());
|
||||
if (!codec) {
|
||||
BOOST_LOG(error) << "Couldn't open ["sv << video_format.name << ']';
|
||||
@ -1404,7 +1460,11 @@ namespace video {
|
||||
}
|
||||
|
||||
auto colorspace = encode_device->colorspace;
|
||||
auto sw_fmt = (colorspace.bit_depth == 10) ? platform_formats->avcodec_pix_fmt_10bit : platform_formats->avcodec_pix_fmt_8bit;
|
||||
auto sw_fmt = (colorspace.bit_depth == 8 && config.chromaSamplingType == 0) ? platform_formats->avcodec_pix_fmt_8bit :
|
||||
(colorspace.bit_depth == 8 && config.chromaSamplingType == 1) ? platform_formats->avcodec_pix_fmt_yuv444_8bit :
|
||||
(colorspace.bit_depth == 10 && config.chromaSamplingType == 0) ? platform_formats->avcodec_pix_fmt_10bit :
|
||||
(colorspace.bit_depth == 10 && config.chromaSamplingType == 1) ? platform_formats->avcodec_pix_fmt_yuv444_10bit :
|
||||
AV_PIX_FMT_NONE;
|
||||
|
||||
// Allow up to 1 retry to apply the set of fallback options.
|
||||
//
|
||||
@ -1421,16 +1481,25 @@ namespace video {
|
||||
|
||||
switch (config.videoFormat) {
|
||||
case 0:
|
||||
ctx->profile = FF_PROFILE_H264_HIGH;
|
||||
// 10-bit h264 encoding is not supported by our streaming protocol
|
||||
assert(!config.dynamicRange);
|
||||
ctx->profile = (config.chromaSamplingType == 1) ? FF_PROFILE_H264_HIGH_444_PREDICTIVE : FF_PROFILE_H264_HIGH;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
ctx->profile = config.dynamicRange ? FF_PROFILE_HEVC_MAIN_10 : FF_PROFILE_HEVC_MAIN;
|
||||
if (config.chromaSamplingType == 1) {
|
||||
// HEVC uses the same RExt profile for both 8 and 10 bit YUV 4:4:4 encoding
|
||||
ctx->profile = FF_PROFILE_HEVC_REXT;
|
||||
}
|
||||
else {
|
||||
ctx->profile = config.dynamicRange ? FF_PROFILE_HEVC_MAIN_10 : FF_PROFILE_HEVC_MAIN;
|
||||
}
|
||||
break;
|
||||
|
||||
case 2:
|
||||
// AV1 supports both 8 and 10 bit encoding with the same Main profile
|
||||
ctx->profile = FF_PROFILE_AV1_MAIN;
|
||||
// but YUV 4:4:4 sampling requires High profile
|
||||
ctx->profile = (config.chromaSamplingType == 1) ? FF_PROFILE_AV1_HIGH : FF_PROFILE_AV1_MAIN;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1561,6 +1630,11 @@ namespace video {
|
||||
for (auto &option : (config.dynamicRange ? video_format.hdr_options : video_format.sdr_options)) {
|
||||
handle_option(option);
|
||||
}
|
||||
if (config.chromaSamplingType == 1) {
|
||||
for (auto &option : (config.dynamicRange ? video_format.hdr444_options : video_format.sdr444_options)) {
|
||||
handle_option(option);
|
||||
}
|
||||
}
|
||||
if (retries > 0) {
|
||||
for (auto &option : video_format.fallback_options) {
|
||||
handle_option(option);
|
||||
@ -1856,7 +1930,24 @@ namespace video {
|
||||
std::unique_ptr<platf::encode_device_t> result;
|
||||
|
||||
auto colorspace = colorspace_from_client_config(config, disp.is_hdr());
|
||||
auto pix_fmt = (colorspace.bit_depth == 10) ? encoder.platform_formats->pix_fmt_10bit : encoder.platform_formats->pix_fmt_8bit;
|
||||
|
||||
platf::pix_fmt_e pix_fmt;
|
||||
if (config.chromaSamplingType == 1) {
|
||||
// YUV 4:4:4
|
||||
if (!(encoder.flags & YUV444_SUPPORT)) {
|
||||
// Encoder can't support YUV 4:4:4 regardless of hardware capabilities
|
||||
return {};
|
||||
}
|
||||
pix_fmt = (colorspace.bit_depth == 10) ?
|
||||
encoder.platform_formats->pix_fmt_yuv444_10bit :
|
||||
encoder.platform_formats->pix_fmt_yuv444_8bit;
|
||||
}
|
||||
else {
|
||||
// YUV 4:2:0
|
||||
pix_fmt = (colorspace.bit_depth == 10) ?
|
||||
encoder.platform_formats->pix_fmt_10bit :
|
||||
encoder.platform_formats->pix_fmt_8bit;
|
||||
}
|
||||
|
||||
{
|
||||
auto encoder_name = config.videoFormat == 0 ? encoder.h264.name :
|
||||
@ -2300,8 +2391,8 @@ namespace video {
|
||||
encoder.av1.capabilities.set();
|
||||
|
||||
// First, test encoder viability
|
||||
config_t config_max_ref_frames { 1920, 1080, 60, 1000, 1, 1, 1, 0, 0 };
|
||||
config_t config_autoselect { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0 };
|
||||
config_t config_max_ref_frames { 1920, 1080, 60, 1000, 1, 1, 1, 0, 0, 0 };
|
||||
config_t config_autoselect { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0, 0 };
|
||||
|
||||
// If the encoder isn't supported at all (not even H.264), bail early
|
||||
reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, config_autoselect);
|
||||
@ -2420,35 +2511,49 @@ namespace video {
|
||||
encoder.av1.capabilities.reset();
|
||||
}
|
||||
|
||||
std::vector<std::pair<encoder_t::flag_e, config_t>> configs {
|
||||
{ encoder_t::DYNAMIC_RANGE, { 1920, 1080, 60, 1000, 1, 0, 3, 1, 1 } },
|
||||
};
|
||||
// Test HDR and YUV444 support
|
||||
{
|
||||
// H.264 is special because encoders may support YUV 4:4:4 without supporting 10-bit color depth
|
||||
if (encoder.flags & YUV444_SUPPORT) {
|
||||
config_t config_h264_yuv444 { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0, 1 };
|
||||
encoder.h264[encoder_t::YUV444] = validate_config(disp, encoder, config_h264_yuv444);
|
||||
}
|
||||
else {
|
||||
encoder.h264[encoder_t::YUV444] = false;
|
||||
}
|
||||
|
||||
for (auto &[flag, config] : configs) {
|
||||
auto h264 = config;
|
||||
auto hevc = config;
|
||||
auto av1 = config;
|
||||
|
||||
h264.videoFormat = 0;
|
||||
hevc.videoFormat = 1;
|
||||
av1.videoFormat = 2;
|
||||
const config_t generic_hdr_config = { 1920, 1080, 60, 1000, 1, 0, 3, 1, 1, 0 };
|
||||
|
||||
// Reset the display since we're switching from SDR to HDR
|
||||
reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, config);
|
||||
reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, generic_hdr_config);
|
||||
if (!disp) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto test_hdr_and_yuv444 = [&](auto &flag_map, auto video_format) {
|
||||
auto config = generic_hdr_config;
|
||||
config.videoFormat = video_format;
|
||||
|
||||
if (!flag_map[encoder_t::PASSED]) return;
|
||||
|
||||
// Test 4:4:4 HDR first. If 4:4:4 is supported, 4:2:0 should also be supported.
|
||||
config.chromaSamplingType = 1;
|
||||
if ((encoder.flags & YUV444_SUPPORT) && validate_config(disp, encoder, config) >= 0) {
|
||||
flag_map[encoder_t::DYNAMIC_RANGE] = true;
|
||||
flag_map[encoder_t::YUV444] = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Test 4:2:0 HDR
|
||||
config.chromaSamplingType = 0;
|
||||
flag_map[encoder_t::DYNAMIC_RANGE] = validate_config(disp, encoder, config) >= 0;
|
||||
};
|
||||
|
||||
// HDR is not supported with H.264. Don't bother even trying it.
|
||||
encoder.h264[flag] = flag != encoder_t::DYNAMIC_RANGE && validate_config(disp, encoder, h264) >= 0;
|
||||
encoder.h264[encoder_t::DYNAMIC_RANGE] = false;
|
||||
|
||||
if (encoder.hevc[encoder_t::PASSED]) {
|
||||
encoder.hevc[flag] = validate_config(disp, encoder, hevc) >= 0;
|
||||
}
|
||||
|
||||
if (encoder.av1[encoder_t::PASSED]) {
|
||||
encoder.av1[flag] = validate_config(disp, encoder, av1) >= 0;
|
||||
}
|
||||
test_hdr_and_yuv444(encoder.hevc, 1);
|
||||
test_hdr_and_yuv444(encoder.av1, 2);
|
||||
}
|
||||
|
||||
encoder.h264[encoder_t::VUI_PARAMETERS] = encoder.h264[encoder_t::VUI_PARAMETERS] && !config::sunshine.flags[config::flag::FORCE_VIDEO_HEADER_REPLACE];
|
||||
@ -2605,6 +2710,12 @@ namespace video {
|
||||
auto &encoder = *chosen_encoder;
|
||||
|
||||
last_encoder_probe_supported_ref_frames_invalidation = (encoder.flags & REF_FRAMES_INVALIDATION);
|
||||
last_encoder_probe_supported_yuv444_for_codec[0] = encoder.h264[encoder_t::PASSED] &&
|
||||
encoder.h264[encoder_t::YUV444];
|
||||
last_encoder_probe_supported_yuv444_for_codec[1] = encoder.hevc[encoder_t::PASSED] &&
|
||||
encoder.hevc[encoder_t::YUV444];
|
||||
last_encoder_probe_supported_yuv444_for_codec[2] = encoder.av1[encoder_t::PASSED] &&
|
||||
encoder.av1[encoder_t::YUV444];
|
||||
|
||||
BOOST_LOG(debug) << "------ h264 ------"sv;
|
||||
for (int x = 0; x < encoder_t::MAX_FLAGS; ++x) {
|
||||
@ -2793,6 +2904,10 @@ namespace video {
|
||||
platf::pix_fmt_e
|
||||
map_pix_fmt(AVPixelFormat fmt) {
|
||||
switch (fmt) {
|
||||
case AV_PIX_FMT_VUYX:
|
||||
return platf::pix_fmt_e::ayuv;
|
||||
case AV_PIX_FMT_XV30:
|
||||
return platf::pix_fmt_e::y410;
|
||||
case AV_PIX_FMT_YUV420P10:
|
||||
return platf::pix_fmt_e::yuv420p10;
|
||||
case AV_PIX_FMT_YUV420P:
|
||||
|
21
src/video.h
21
src/video.h
@ -39,6 +39,7 @@ namespace video {
|
||||
virtual ~encoder_platform_formats_t() = default;
|
||||
platf::mem_type_e dev_type;
|
||||
platf::pix_fmt_e pix_fmt_8bit, pix_fmt_10bit;
|
||||
platf::pix_fmt_e pix_fmt_yuv444_8bit, pix_fmt_yuv444_10bit;
|
||||
};
|
||||
|
||||
struct encoder_platform_formats_avcodec: encoder_platform_formats_t {
|
||||
@ -50,21 +51,28 @@ namespace video {
|
||||
const AVPixelFormat &avcodec_dev_pix_fmt,
|
||||
const AVPixelFormat &avcodec_pix_fmt_8bit,
|
||||
const AVPixelFormat &avcodec_pix_fmt_10bit,
|
||||
const AVPixelFormat &avcodec_pix_fmt_yuv444_8bit,
|
||||
const AVPixelFormat &avcodec_pix_fmt_yuv444_10bit,
|
||||
const init_buffer_function_t &init_avcodec_hardware_input_buffer_function):
|
||||
avcodec_base_dev_type { avcodec_base_dev_type },
|
||||
avcodec_derived_dev_type { avcodec_derived_dev_type },
|
||||
avcodec_dev_pix_fmt { avcodec_dev_pix_fmt },
|
||||
avcodec_pix_fmt_8bit { avcodec_pix_fmt_8bit },
|
||||
avcodec_pix_fmt_10bit { avcodec_pix_fmt_10bit },
|
||||
avcodec_pix_fmt_yuv444_8bit { avcodec_pix_fmt_yuv444_8bit },
|
||||
avcodec_pix_fmt_yuv444_10bit { avcodec_pix_fmt_yuv444_10bit },
|
||||
init_avcodec_hardware_input_buffer { init_avcodec_hardware_input_buffer_function } {
|
||||
dev_type = map_base_dev_type(avcodec_base_dev_type);
|
||||
pix_fmt_8bit = map_pix_fmt(avcodec_pix_fmt_8bit);
|
||||
pix_fmt_10bit = map_pix_fmt(avcodec_pix_fmt_10bit);
|
||||
pix_fmt_yuv444_8bit = map_pix_fmt(avcodec_pix_fmt_yuv444_8bit);
|
||||
pix_fmt_yuv444_10bit = map_pix_fmt(avcodec_pix_fmt_yuv444_10bit);
|
||||
}
|
||||
|
||||
AVHWDeviceType avcodec_base_dev_type, avcodec_derived_dev_type;
|
||||
AVPixelFormat avcodec_dev_pix_fmt;
|
||||
AVPixelFormat avcodec_pix_fmt_8bit, avcodec_pix_fmt_10bit;
|
||||
AVPixelFormat avcodec_pix_fmt_yuv444_8bit, avcodec_pix_fmt_yuv444_10bit;
|
||||
|
||||
init_buffer_function_t init_avcodec_hardware_input_buffer;
|
||||
};
|
||||
@ -73,10 +81,14 @@ namespace video {
|
||||
encoder_platform_formats_nvenc(
|
||||
const platf::mem_type_e &dev_type,
|
||||
const platf::pix_fmt_e &pix_fmt_8bit,
|
||||
const platf::pix_fmt_e &pix_fmt_10bit) {
|
||||
const platf::pix_fmt_e &pix_fmt_10bit,
|
||||
const platf::pix_fmt_e &pix_fmt_yuv444_8bit,
|
||||
const platf::pix_fmt_e &pix_fmt_yuv444_10bit) {
|
||||
encoder_platform_formats_t::dev_type = dev_type;
|
||||
encoder_platform_formats_t::pix_fmt_8bit = pix_fmt_8bit;
|
||||
encoder_platform_formats_t::pix_fmt_10bit = pix_fmt_10bit;
|
||||
encoder_platform_formats_t::pix_fmt_yuv444_8bit = pix_fmt_yuv444_8bit;
|
||||
encoder_platform_formats_t::pix_fmt_yuv444_10bit = pix_fmt_yuv444_10bit;
|
||||
}
|
||||
};
|
||||
|
||||
@ -87,6 +99,7 @@ namespace video {
|
||||
REF_FRAMES_RESTRICT, ///< Set maximum reference frames.
|
||||
CBR, ///< Some encoders don't support CBR, if not supported attempt constant quantization parameter instead.
|
||||
DYNAMIC_RANGE, ///< HDR support.
|
||||
YUV444, ///< YUV 4:4:4 support.
|
||||
VUI_PARAMETERS, ///< AMD encoder with VAAPI doesn't add VUI parameters to SPS.
|
||||
MAX_FLAGS ///< Maximum number of flags.
|
||||
};
|
||||
@ -101,6 +114,7 @@ namespace video {
|
||||
_CONVERT(REF_FRAMES_RESTRICT);
|
||||
_CONVERT(CBR);
|
||||
_CONVERT(DYNAMIC_RANGE);
|
||||
_CONVERT(YUV444);
|
||||
_CONVERT(VUI_PARAMETERS);
|
||||
_CONVERT(MAX_FLAGS);
|
||||
}
|
||||
@ -126,6 +140,8 @@ namespace video {
|
||||
std::vector<option_t> common_options;
|
||||
std::vector<option_t> sdr_options;
|
||||
std::vector<option_t> hdr_options;
|
||||
std::vector<option_t> sdr444_options;
|
||||
std::vector<option_t> hdr444_options;
|
||||
std::vector<option_t> fallback_options;
|
||||
|
||||
// QP option to set in the case that CBR/VBR is not supported
|
||||
@ -312,11 +328,14 @@ namespace video {
|
||||
/* Encoding color depth (bit depth): 0 - 8-bit, 1 - 10-bit
|
||||
HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */
|
||||
int dynamicRange;
|
||||
|
||||
int chromaSamplingType; // 0 - 4:2:0, 1 - 4:4:4
|
||||
};
|
||||
|
||||
extern int active_hevc_mode;
|
||||
extern int active_av1_mode;
|
||||
extern bool last_encoder_probe_supported_ref_frames_invalidation;
|
||||
extern std::array<bool, 3> last_encoder_probe_supported_yuv444_for_codec; // 0 - H.264, 1 - HEVC, 2 - AV1
|
||||
|
||||
void
|
||||
capture(
|
||||
|
@ -182,4 +182,109 @@ namespace video {
|
||||
return result;
|
||||
}
|
||||
|
||||
const color_t *
|
||||
new_color_vectors_from_colorspace(const sunshine_colorspace_t &colorspace) {
|
||||
constexpr auto generate_color_vectors = [](const sunshine_colorspace_t &colorspace) -> color_t {
|
||||
double Kr, Kb;
|
||||
switch (colorspace.colorspace) {
|
||||
case colorspace_e::rec601:
|
||||
Kr = 0.299;
|
||||
Kb = 0.114;
|
||||
break;
|
||||
case colorspace_e::rec709:
|
||||
default:
|
||||
Kr = 0.2126;
|
||||
Kb = 0.0722;
|
||||
break;
|
||||
case colorspace_e::bt2020:
|
||||
case colorspace_e::bt2020sdr:
|
||||
Kr = 0.2627;
|
||||
Kb = 0.0593;
|
||||
break;
|
||||
}
|
||||
double Kg = 1.0 - Kr - Kb;
|
||||
|
||||
double y_mult, y_add;
|
||||
double uv_mult, uv_add;
|
||||
|
||||
// "Matrix coefficients" section of ITU-T H.273
|
||||
if (colorspace.full_range) {
|
||||
y_mult = (1 << colorspace.bit_depth) - 1;
|
||||
y_add = 0;
|
||||
uv_mult = (1 << colorspace.bit_depth) - 1;
|
||||
uv_add = (1 << (colorspace.bit_depth - 1));
|
||||
}
|
||||
else {
|
||||
y_mult = (1 << (colorspace.bit_depth - 8)) * 219;
|
||||
y_add = (1 << (colorspace.bit_depth - 8)) * 16;
|
||||
uv_mult = (1 << (colorspace.bit_depth - 8)) * 224;
|
||||
uv_add = (1 << (colorspace.bit_depth - 8)) * 128;
|
||||
}
|
||||
|
||||
// For rounding
|
||||
y_add += 0.5;
|
||||
uv_add += 0.5;
|
||||
|
||||
color_t color_vectors;
|
||||
|
||||
color_vectors.color_vec_y[0] = Kr * y_mult;
|
||||
color_vectors.color_vec_y[1] = Kg * y_mult;
|
||||
color_vectors.color_vec_y[2] = Kb * y_mult;
|
||||
color_vectors.color_vec_y[3] = y_add;
|
||||
|
||||
color_vectors.color_vec_u[0] = -0.5 * Kr / (1.0 - Kb) * uv_mult;
|
||||
color_vectors.color_vec_u[1] = -0.5 * Kg / (1.0 - Kb) * uv_mult;
|
||||
color_vectors.color_vec_u[2] = 0.5 * uv_mult;
|
||||
color_vectors.color_vec_u[3] = uv_add;
|
||||
|
||||
color_vectors.color_vec_v[0] = 0.5 * uv_mult;
|
||||
color_vectors.color_vec_v[1] = -0.5 * Kg / (1.0 - Kr) * uv_mult;
|
||||
color_vectors.color_vec_v[2] = -0.5 * Kb / (1.0 - Kr) * uv_mult;
|
||||
color_vectors.color_vec_v[3] = uv_add;
|
||||
|
||||
// Unused
|
||||
color_vectors.range_y[0] = 1;
|
||||
color_vectors.range_y[1] = 0;
|
||||
color_vectors.range_uv[0] = 1;
|
||||
color_vectors.range_uv[1] = 0;
|
||||
|
||||
return color_vectors;
|
||||
};
|
||||
|
||||
static constexpr color_t colors[] = {
|
||||
generate_color_vectors({ colorspace_e::rec601, false, 8 }),
|
||||
generate_color_vectors({ colorspace_e::rec601, true, 8 }),
|
||||
generate_color_vectors({ colorspace_e::rec601, false, 10 }),
|
||||
generate_color_vectors({ colorspace_e::rec601, true, 10 }),
|
||||
generate_color_vectors({ colorspace_e::rec709, false, 8 }),
|
||||
generate_color_vectors({ colorspace_e::rec709, true, 8 }),
|
||||
generate_color_vectors({ colorspace_e::rec709, false, 10 }),
|
||||
generate_color_vectors({ colorspace_e::rec709, true, 10 }),
|
||||
generate_color_vectors({ colorspace_e::bt2020, false, 8 }),
|
||||
generate_color_vectors({ colorspace_e::bt2020, true, 8 }),
|
||||
generate_color_vectors({ colorspace_e::bt2020, false, 10 }),
|
||||
generate_color_vectors({ colorspace_e::bt2020, true, 10 }),
|
||||
};
|
||||
|
||||
const color_t *result = nullptr;
|
||||
|
||||
switch (colorspace.colorspace) {
|
||||
case colorspace_e::rec601:
|
||||
result = &colors[0];
|
||||
break;
|
||||
case colorspace_e::rec709:
|
||||
default:
|
||||
result = &colors[4];
|
||||
break;
|
||||
case colorspace_e::bt2020:
|
||||
case colorspace_e::bt2020sdr:
|
||||
result = &colors[8];
|
||||
break;
|
||||
}
|
||||
|
||||
if (colorspace.bit_depth == 10) result += 2;
|
||||
if (colorspace.full_range) result += 1;
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace video
|
||||
|
@ -57,4 +57,17 @@ namespace video {
|
||||
const color_t *
|
||||
color_vectors_from_colorspace(colorspace_e colorspace, bool full_range);
|
||||
|
||||
/**
|
||||
* @brief New version of `color_vectors_from_colorspace()` function that better adheres to the standards.
|
||||
* Returned vectors are used to perform RGB->YUV conversion.
|
||||
* Unlike its predecessor, color vectors will produce output in `UINT` range, not `UNORM` range.
|
||||
* Input is still in `UNORM` range. Returned vectors won't modify color primaries and color
|
||||
* transfer function.
|
||||
* @param colorspace Targeted YUV colorspace.
|
||||
* @return `const color_t*` that contains RGB->YUV transformation vectors.
|
||||
* Components `range_y` and `range_uv` are there for backwards compatibility
|
||||
* and can be ignored in the computation.
|
||||
*/
|
||||
const color_t *
|
||||
new_color_vectors_from_colorspace(const sunshine_colorspace_t &colorspace);
|
||||
} // namespace video
|
||||
|
@ -0,0 +1,3 @@
|
||||
#include "include/convert_base.hlsl"
|
||||
|
||||
#include "include/convert_yuv444_ps_base.hlsl"
|
@ -0,0 +1,3 @@
|
||||
#include "include/convert_linear_base.hlsl"
|
||||
|
||||
#include "include/convert_yuv444_ps_base.hlsl"
|
@ -0,0 +1,10 @@
|
||||
cbuffer rotate_texture_steps_cbuffer : register(b1) {
|
||||
int rotate_texture_steps;
|
||||
};
|
||||
|
||||
#include "include/base_vs.hlsl"
|
||||
|
||||
vertex_t main_vs(uint vertex_id : SV_VertexID)
|
||||
{
|
||||
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
#include "include/convert_base.hlsl"
|
||||
|
||||
#define Y410
|
||||
#include "include/convert_yuv444_ps_base.hlsl"
|
@ -0,0 +1,4 @@
|
||||
#include "include/convert_linear_base.hlsl"
|
||||
|
||||
#define Y410
|
||||
#include "include/convert_yuv444_ps_base.hlsl"
|
@ -0,0 +1,4 @@
|
||||
#include "include/convert_perceptual_quantizer_base.hlsl"
|
||||
|
||||
#define Y410
|
||||
#include "include/convert_yuv444_ps_base.hlsl"
|
@ -0,0 +1,4 @@
|
||||
#include "include/convert_base.hlsl"
|
||||
|
||||
#define PLANAR_VIEWPORTS
|
||||
#include "include/convert_yuv444_ps_base.hlsl"
|
@ -0,0 +1,4 @@
|
||||
#include "include/convert_linear_base.hlsl"
|
||||
|
||||
#define PLANAR_VIEWPORTS
|
||||
#include "include/convert_yuv444_ps_base.hlsl"
|
@ -0,0 +1,4 @@
|
||||
#include "include/convert_perceptual_quantizer_base.hlsl"
|
||||
|
||||
#define PLANAR_VIEWPORTS
|
||||
#include "include/convert_yuv444_ps_base.hlsl"
|
@ -0,0 +1,33 @@
|
||||
cbuffer rotate_texture_steps_cbuffer : register(b1) {
|
||||
int rotate_texture_steps;
|
||||
};
|
||||
|
||||
cbuffer color_matrix_cbuffer : register(b3) {
|
||||
float4 color_vec_y;
|
||||
float4 color_vec_u;
|
||||
float4 color_vec_v;
|
||||
float2 range_y;
|
||||
float2 range_uv;
|
||||
};
|
||||
|
||||
#define PLANAR_VIEWPORTS
|
||||
#include "include/base_vs.hlsl"
|
||||
|
||||
vertex_t main_vs(uint vertex_id : SV_VertexID)
|
||||
{
|
||||
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps);
|
||||
|
||||
output.viewport = vertex_id / 3;
|
||||
|
||||
if (output.viewport == 0) {
|
||||
output.color_vec = color_vec_y;
|
||||
}
|
||||
else if (output.viewport == 1) {
|
||||
output.color_vec = color_vec_u;
|
||||
}
|
||||
else {
|
||||
output.color_vec = color_vec_v;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
@ -19,7 +19,7 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_
|
||||
output.viewpoint_pos = float4(-1, 3, 0, 1);
|
||||
tex_coord = float2(0, -1);
|
||||
}
|
||||
else if (vertex_id == 2) {
|
||||
else {
|
||||
output.viewpoint_pos = float4(3, -1, 0, 1);
|
||||
tex_coord = float2(2, 1);
|
||||
}
|
||||
|
@ -9,4 +9,8 @@ struct vertex_t
|
||||
#else
|
||||
float2 tex_coord : TEXCOORD;
|
||||
#endif
|
||||
#ifdef PLANAR_VIEWPORTS
|
||||
uint viewport : SV_ViewportArrayIndex;
|
||||
nointerpolation float4 color_vec : COLOR0;
|
||||
#endif
|
||||
};
|
||||
|
@ -0,0 +1,39 @@
|
||||
Texture2D image : register(t0);
|
||||
SamplerState def_sampler : register(s0);
|
||||
|
||||
#ifndef PLANAR_VIEWPORTS
|
||||
cbuffer color_matrix_cbuffer : register(b0) {
|
||||
float4 color_vec_y;
|
||||
float4 color_vec_u;
|
||||
float4 color_vec_v;
|
||||
float2 range_y;
|
||||
float2 range_uv;
|
||||
};
|
||||
#endif
|
||||
|
||||
#include "include/base_vs_types.hlsl"
|
||||
|
||||
#ifdef PLANAR_VIEWPORTS
|
||||
uint main_ps(vertex_t input) : SV_Target
|
||||
#else
|
||||
uint4 main_ps(vertex_t input) : SV_Target
|
||||
#endif
|
||||
{
|
||||
float3 rgb = CONVERT_FUNCTION(image.Sample(def_sampler, input.tex_coord, 0).rgb);
|
||||
|
||||
#ifdef PLANAR_VIEWPORTS
|
||||
// Planar R16, 10 most significant bits store the value
|
||||
return uint(dot(input.color_vec.xyz, rgb) + input.color_vec.w) << 6;
|
||||
#else
|
||||
float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
|
||||
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
|
||||
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
|
||||
|
||||
#ifdef Y410
|
||||
return uint4(u, y, v, 0);
|
||||
#else
|
||||
// AYUV
|
||||
return uint4(v, u, y, 0);
|
||||
#endif
|
||||
#endif
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user