1534 lines
54 KiB
C++

#include <cmath>
#include <codecvt>
#include <d3dcompiler.h>
#include <directxmath.h>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/hwcontext_d3d11va.h>
}
#include "display.h"
#include "src/main.h"
#include "src/video.h"
#define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx"
namespace platf {
using namespace std::literals;
}
static void
free_frame(AVFrame *frame) {
av_frame_free(&frame);
}
using frame_t = util::safe_ptr<AVFrame, free_frame>;
namespace platf::dxgi {
template <class T>
buf_t
make_buffer(device_t::pointer device, const T &t) {
static_assert(sizeof(T) % 16 == 0, "Buffer needs to be aligned on a 16-byte alignment");
D3D11_BUFFER_DESC buffer_desc {
sizeof(T),
D3D11_USAGE_IMMUTABLE,
D3D11_BIND_CONSTANT_BUFFER
};
D3D11_SUBRESOURCE_DATA init_data {
&t
};
buf_t::pointer buf_p;
auto status = device->CreateBuffer(&buffer_desc, &init_data, &buf_p);
if (status) {
BOOST_LOG(error) << "Failed to create buffer: [0x"sv << util::hex(status).to_string_view() << ']';
return nullptr;
}
return buf_t { buf_p };
}
blend_t
make_blend(device_t::pointer device, bool enable, bool invert) {
D3D11_BLEND_DESC bdesc {};
auto &rt = bdesc.RenderTarget[0];
rt.BlendEnable = enable;
rt.RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
if (enable) {
rt.BlendOp = D3D11_BLEND_OP_ADD;
rt.BlendOpAlpha = D3D11_BLEND_OP_ADD;
if (invert) {
// Invert colors
rt.SrcBlend = D3D11_BLEND_INV_DEST_COLOR;
rt.DestBlend = D3D11_BLEND_INV_SRC_COLOR;
}
else {
// Regular alpha blending
rt.SrcBlend = D3D11_BLEND_SRC_ALPHA;
rt.DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
}
rt.SrcBlendAlpha = D3D11_BLEND_ZERO;
rt.DestBlendAlpha = D3D11_BLEND_ZERO;
}
blend_t blend;
auto status = device->CreateBlendState(&bdesc, &blend);
if (status) {
BOOST_LOG(error) << "Failed to create blend state: [0x"sv << util::hex(status).to_string_view() << ']';
return nullptr;
}
return blend;
}
blob_t convert_UV_vs_hlsl;
blob_t convert_UV_ps_hlsl;
blob_t convert_UV_linear_ps_hlsl;
blob_t convert_UV_PQ_ps_hlsl;
blob_t scene_vs_hlsl;
blob_t convert_Y_ps_hlsl;
blob_t convert_Y_linear_ps_hlsl;
blob_t convert_Y_PQ_ps_hlsl;
blob_t scene_ps_hlsl;
blob_t scene_NW_ps_hlsl;
struct img_d3d_t: public platf::img_t {
std::shared_ptr<platf::display_t> display;
// These objects are owned by the display_t's ID3D11Device
texture2d_t capture_texture;
render_target_t capture_rt;
keyed_mutex_t capture_mutex;
// This is the shared handle used by hwdevice_t to open capture_texture
HANDLE encoder_texture_handle = {};
// Set to true if the image corresponds to a dummy texture used prior to
// the first successful capture of a desktop frame
bool dummy = false;
// Unique identifier for this image
uint32_t id = 0;
// DXGI format of this image texture
DXGI_FORMAT format;
virtual ~img_d3d_t() override {
if (encoder_texture_handle) {
CloseHandle(encoder_texture_handle);
}
};
};
struct texture_lock_helper {
keyed_mutex_t _mutex;
bool _locked = false;
texture_lock_helper(const texture_lock_helper &) = delete;
texture_lock_helper &
operator=(const texture_lock_helper &) = delete;
texture_lock_helper(texture_lock_helper &&other) {
_mutex.reset(other._mutex.release());
_locked = other._locked;
other._locked = false;
}
texture_lock_helper &
operator=(texture_lock_helper &&other) {
if (_locked) _mutex->ReleaseSync(0);
_mutex.reset(other._mutex.release());
_locked = other._locked;
other._locked = false;
return *this;
}
texture_lock_helper(IDXGIKeyedMutex *mutex):
_mutex(mutex) {
if (_mutex) _mutex->AddRef();
}
~texture_lock_helper() {
if (_locked) _mutex->ReleaseSync(0);
}
bool
lock() {
if (_locked) return true;
HRESULT status = _mutex->AcquireSync(0, INFINITE);
if (status == S_OK) {
_locked = true;
}
else {
BOOST_LOG(error) << "Failed to acquire texture mutex [0x"sv << util::hex(status).to_string_view() << ']';
}
return _locked;
}
};
util::buffer_t<std::uint8_t>
make_cursor_xor_image(const util::buffer_t<std::uint8_t> &img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info) {
constexpr std::uint32_t inverted = 0xFFFFFFFF;
constexpr std::uint32_t transparent = 0;
switch (shape_info.Type) {
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
// This type doesn't require any XOR-blending
return {};
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR: {
util::buffer_t<std::uint8_t> cursor_img = img_data;
std::for_each((std::uint32_t *) std::begin(cursor_img), (std::uint32_t *) std::end(cursor_img), [](auto &pixel) {
auto alpha = (std::uint8_t)((pixel >> 24) & 0xFF);
if (alpha == 0xFF) {
// Pixels with 0xFF alpha will be XOR-blended as is.
}
else if (alpha == 0x00) {
// Pixels with 0x00 alpha will be blended by make_cursor_alpha_image().
// We make them transparent for the XOR-blended cursor image.
pixel = transparent;
}
else {
// Other alpha values are illegal in masked color cursors
BOOST_LOG(warning) << "Illegal alpha value in masked color cursor: " << alpha;
}
});
return cursor_img;
}
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME:
// Monochrome is handled below
break;
default:
BOOST_LOG(error) << "Invalid cursor shape type: " << shape_info.Type;
return {};
}
shape_info.Height /= 2;
util::buffer_t<std::uint8_t> cursor_img { shape_info.Width * shape_info.Height * 4 };
auto bytes = shape_info.Pitch * shape_info.Height;
auto pixel_begin = (std::uint32_t *) std::begin(cursor_img);
auto pixel_data = pixel_begin;
auto and_mask = std::begin(img_data);
auto xor_mask = std::begin(img_data) + bytes;
for (auto x = 0; x < bytes; ++x) {
for (auto c = 7; c >= 0; --c) {
auto bit = 1 << c;
auto color_type = ((*and_mask & bit) ? 1 : 0) + ((*xor_mask & bit) ? 2 : 0);
switch (color_type) {
case 0: // Opaque black (handled by alpha-blending)
case 2: // Opaque white (handled by alpha-blending)
case 1: // Color of screen (transparent)
*pixel_data = transparent;
break;
case 3: // Inverse of screen
*pixel_data = inverted;
break;
}
++pixel_data;
}
++and_mask;
++xor_mask;
}
return cursor_img;
}
util::buffer_t<std::uint8_t>
make_cursor_alpha_image(const util::buffer_t<std::uint8_t> &img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info) {
constexpr std::uint32_t black = 0xFF000000;
constexpr std::uint32_t white = 0xFFFFFFFF;
constexpr std::uint32_t transparent = 0;
switch (shape_info.Type) {
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR: {
util::buffer_t<std::uint8_t> cursor_img = img_data;
std::for_each((std::uint32_t *) std::begin(cursor_img), (std::uint32_t *) std::end(cursor_img), [](auto &pixel) {
auto alpha = (std::uint8_t)((pixel >> 24) & 0xFF);
if (alpha == 0xFF) {
// Pixels with 0xFF alpha will be XOR-blended by make_cursor_xor_image().
// We make them transparent for the alpha-blended cursor image.
pixel = transparent;
}
else if (alpha == 0x00) {
// Pixels with 0x00 alpha will be blended as opaque with the alpha-blended image.
pixel |= 0xFF000000;
}
else {
// Other alpha values are illegal in masked color cursors
BOOST_LOG(warning) << "Illegal alpha value in masked color cursor: " << alpha;
}
});
return cursor_img;
}
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
// Color cursors are just an ARGB bitmap which requires no processing.
return img_data;
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME:
// Monochrome cursors are handled below.
break;
default:
BOOST_LOG(error) << "Invalid cursor shape type: " << shape_info.Type;
return {};
}
shape_info.Height /= 2;
util::buffer_t<std::uint8_t> cursor_img { shape_info.Width * shape_info.Height * 4 };
auto bytes = shape_info.Pitch * shape_info.Height;
auto pixel_begin = (std::uint32_t *) std::begin(cursor_img);
auto pixel_data = pixel_begin;
auto and_mask = std::begin(img_data);
auto xor_mask = std::begin(img_data) + bytes;
for (auto x = 0; x < bytes; ++x) {
for (auto c = 7; c >= 0; --c) {
auto bit = 1 << c;
auto color_type = ((*and_mask & bit) ? 1 : 0) + ((*xor_mask & bit) ? 2 : 0);
switch (color_type) {
case 0: // Opaque black
*pixel_data = black;
break;
case 2: // Opaque white
*pixel_data = white;
break;
case 3: // Inverse of screen (handled by XOR blending)
case 1: // Color of screen (transparent)
*pixel_data = transparent;
break;
}
++pixel_data;
}
++and_mask;
++xor_mask;
}
return cursor_img;
}
blob_t
compile_shader(LPCSTR file, LPCSTR entrypoint, LPCSTR shader_model) {
blob_t::pointer msg_p = nullptr;
blob_t::pointer compiled_p;
DWORD flags = D3DCOMPILE_ENABLE_STRICTNESS;
#ifndef NDEBUG
flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
#endif
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
auto wFile = converter.from_bytes(file);
auto status = D3DCompileFromFile(wFile.c_str(), nullptr, nullptr, entrypoint, shader_model, flags, 0, &compiled_p, &msg_p);
if (msg_p) {
BOOST_LOG(warning) << std::string_view { (const char *) msg_p->GetBufferPointer(), msg_p->GetBufferSize() - 1 };
msg_p->Release();
}
if (status) {
BOOST_LOG(error) << "Couldn't compile ["sv << file << "] [0x"sv << util::hex(status).to_string_view() << ']';
return nullptr;
}
return blob_t { compiled_p };
}
blob_t
compile_pixel_shader(LPCSTR file) {
return compile_shader(file, "main_ps", "ps_5_0");
}
blob_t
compile_vertex_shader(LPCSTR file) {
return compile_shader(file, "main_vs", "vs_5_0");
}
class hwdevice_t: public platf::hwdevice_t {
public:
int
convert(platf::img_t &img_base) override {
// Garbage collect mapped capture images whose weak references have expired
for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) {
if (it->second.img_weak.expired()) {
it = img_ctx_map.erase(it);
}
else {
it++;
}
}
auto &img = (img_d3d_t &) img_base;
auto &img_ctx = img_ctx_map[img.id];
// Open the shared capture texture with our ID3D11Device
if (initialize_image_context(img, img_ctx)) {
return -1;
}
// Acquire encoder mutex to synchronize with capture code
auto status = img_ctx.encoder_mutex->AcquireSync(0, INFINITE);
if (status != S_OK) {
BOOST_LOG(error) << "Failed to acquire encoder mutex [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
device_ctx->OMSetRenderTargets(1, &nv12_Y_rt, nullptr);
device_ctx->VSSetShader(scene_vs.get(), nullptr, 0);
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_fp16_ps.get() : convert_Y_ps.get(), nullptr, 0);
device_ctx->RSSetViewports(1, &outY_view);
device_ctx->PSSetShaderResources(0, 1, &img_ctx.encoder_input_res);
device_ctx->Draw(3, 0);
device_ctx->OMSetRenderTargets(1, &nv12_UV_rt, nullptr);
device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0);
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0);
device_ctx->RSSetViewports(1, &outUV_view);
device_ctx->Draw(3, 0);
// Release encoder mutex to allow capture code to reuse this image
img_ctx.encoder_mutex->ReleaseSync(0);
ID3D11ShaderResourceView *emptyShaderResourceView = nullptr;
device_ctx->PSSetShaderResources(0, 1, &emptyShaderResourceView);
return 0;
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
switch (colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &::video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &::video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &::video::colors[4];
break;
default:
BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
color_p = &::video::colors[0];
};
if (color_range > 1) {
// Full range
++color_p;
}
auto color_matrix = make_buffer((device_t::pointer) data, *color_p);
if (!color_matrix) {
BOOST_LOG(warning) << "Failed to create color matrix"sv;
return;
}
device_ctx->VSSetConstantBuffers(0, 1, &info_scene);
device_ctx->PSSetConstantBuffers(0, 1, &color_matrix);
this->color_matrix = std::move(color_matrix);
}
void
init_hwframes(AVHWFramesContext *frames) override {
// We may be called with a QSV or D3D11VA context
if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
// The encoder requires textures with D3D11_BIND_RENDER_TARGET set
d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
d3d11_frames->MiscFlags = 0;
}
// We require a single texture
frames->initial_pool_size = 1;
}
int
prepare_to_derive_context(int hw_device_type) override {
// QuickSync requires our device to be multithread-protected
if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
multithread_t mt;
auto status = device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
mt->SetMultithreadProtected(TRUE);
}
return 0;
}
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
this->hwframe.reset(frame);
this->frame = frame;
// Populate this frame with a hardware buffer if one isn't there already
if (!frame->buf[0]) {
auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
}
// If this is a frame from a derived context, we'll need to map it to D3D11
ID3D11Texture2D *frame_texture;
if (frame->format != AV_PIX_FMT_D3D11) {
frame_t d3d11_frame { av_frame_alloc() };
d3d11_frame->format = AV_PIX_FMT_D3D11;
auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
// Get the texture from the mapped frame
frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
}
else {
// Otherwise, we can just use the texture inside the original frame
frame_texture = (ID3D11Texture2D *) frame->data[0];
}
auto out_width = frame->width;
auto out_height = frame->height;
float in_width = display->width;
float in_height = display->height;
// Ensure aspect ratio is maintained
auto scalar = std::fminf(out_width / in_width, out_height / in_height);
auto out_width_f = in_width * scalar;
auto out_height_f = in_height * scalar;
// result is always positive
auto offsetX = (out_width - out_width_f) / 2;
auto offsetY = (out_height - out_height_f) / 2;
outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };
outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };
// The underlying frame pool owns the texture, so we must reference it for ourselves
frame_texture->AddRef();
hwframe_texture.reset(frame_texture);
float info_in[16 / sizeof(float)] { 1.0f / (float) out_width_f }; //aligned to 16-byte
info_scene = make_buffer(device.get(), info_in);
if (!info_scene) {
BOOST_LOG(error) << "Failed to create info scene buffer"sv;
return -1;
}
D3D11_RENDER_TARGET_VIEW_DESC nv12_rt_desc {
format == DXGI_FORMAT_P010 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM,
D3D11_RTV_DIMENSION_TEXTURE2D
};
auto status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Clear the RTVs to ensure the aspect ratio padding is black
const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f };
device_ctx->ClearRenderTargetView(nv12_Y_rt.get(), y_black);
const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f };
device_ctx->ClearRenderTargetView(nv12_UV_rt.get(), uv_black);
return 0;
}
int
init(
std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p,
pix_fmt_e pix_fmt) {
D3D_FEATURE_LEVEL featureLevels[] {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_9_3,
D3D_FEATURE_LEVEL_9_2,
D3D_FEATURE_LEVEL_9_1
};
HRESULT status = D3D11CreateDevice(
adapter_p,
D3D_DRIVER_TYPE_UNKNOWN,
nullptr,
D3D11_CREATE_DEVICE_FLAGS,
featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL),
D3D11_SDK_VERSION,
&device,
nullptr,
&device_ctx);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create encoder D3D11 device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
dxgi::dxgi_t dxgi;
status = device->QueryInterface(IID_IDXGIDevice, (void **) &dxgi);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = dxgi->SetGPUThreadPriority(7);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
}
data = device.get();
format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
if (status) {
BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device->CreateVertexShader(convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), nullptr, &convert_UV_vs);
if (status) {
BOOST_LOG(error) << "Failed to create convertUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// If the display is in HDR and we're streaming HDR, we'll be converting scRGB to SMPTE 2084 PQ.
if (format == DXGI_FORMAT_P010 && display->is_hdr()) {
status = device->CreatePixelShader(convert_Y_PQ_ps_hlsl->GetBufferPointer(), convert_Y_PQ_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps);
if (status) {
BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device->CreatePixelShader(convert_UV_PQ_ps_hlsl->GetBufferPointer(), convert_UV_PQ_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps);
if (status) {
BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
}
else {
// If the display is in Advanced Color mode, the desktop format will be scRGB FP16.
// scRGB uses linear gamma, so we must use our linear to sRGB conversion shaders.
status = device->CreatePixelShader(convert_Y_linear_ps_hlsl->GetBufferPointer(), convert_Y_linear_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps);
if (status) {
BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device->CreatePixelShader(convert_UV_linear_ps_hlsl->GetBufferPointer(), convert_UV_linear_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps);
if (status) {
BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
}
// These shaders consume standard 8-bit sRGB input
status = device->CreatePixelShader(convert_Y_ps_hlsl->GetBufferPointer(), convert_Y_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_ps);
if (status) {
BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device->CreatePixelShader(convert_UV_ps_hlsl->GetBufferPointer(), convert_UV_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_ps);
if (status) {
BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
color_matrix = make_buffer(device.get(), ::video::colors[0]);
if (!color_matrix) {
BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
return -1;
}
D3D11_INPUT_ELEMENT_DESC layout_desc {
"SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0
};
status = device->CreateInputLayout(
&layout_desc, 1,
convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(),
&input_layout);
this->display = std::move(display);
blend_disable = make_blend(device.get(), false, false);
if (!blend_disable) {
return -1;
}
D3D11_SAMPLER_DESC sampler_desc {};
sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP;
sampler_desc.ComparisonFunc = D3D11_COMPARISON_NEVER;
sampler_desc.MinLOD = 0;
sampler_desc.MaxLOD = D3D11_FLOAT32_MAX;
status = device->CreateSamplerState(&sampler_desc, &sampler_linear);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
device_ctx->IASetInputLayout(input_layout.get());
device_ctx->PSSetConstantBuffers(0, 1, &color_matrix);
device_ctx->VSSetConstantBuffers(0, 1, &info_scene);
device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
device_ctx->PSSetSamplers(0, 1, &sampler_linear);
device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
return 0;
}
private:
struct encoder_img_ctx_t {
// Used to determine if the underlying texture changes.
// Not safe for actual use by the encoder!
texture2d_t::pointer capture_texture_p;
texture2d_t encoder_texture;
shader_res_t encoder_input_res;
keyed_mutex_t encoder_mutex;
std::weak_ptr<const platf::img_t> img_weak;
void
reset() {
capture_texture_p = nullptr;
encoder_texture.reset();
encoder_input_res.reset();
encoder_mutex.reset();
img_weak.reset();
}
};
int
initialize_image_context(const img_d3d_t &img, encoder_img_ctx_t &img_ctx) {
// If we've already opened the shared texture, we're done
if (img_ctx.encoder_texture && img.capture_texture.get() == img_ctx.capture_texture_p) {
return 0;
}
// Reset this image context in case it was used before with a different texture.
// Textures can change when transitioning from a dummy image to a real image.
img_ctx.reset();
device1_t device1;
auto status = device->QueryInterface(__uuidof(ID3D11Device1), (void **) &device1);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to query ID3D11Device1 [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Open a handle to the shared texture
status = device1->OpenSharedResource1(img.encoder_texture_handle, __uuidof(ID3D11Texture2D), (void **) &img_ctx.encoder_texture);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to open shared image texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Get the keyed mutex to synchronize with the capture code
status = img_ctx.encoder_texture->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **) &img_ctx.encoder_mutex);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to query IDXGIKeyedMutex [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Create the SRV for the encoder texture
status = device->CreateShaderResourceView(img_ctx.encoder_texture.get(), nullptr, &img_ctx.encoder_input_res);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create shader resource view for encoding [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
img_ctx.capture_texture_p = img.capture_texture.get();
img_ctx.img_weak = img.weak_from_this();
return 0;
}
public:
frame_t hwframe;
::video::color_t *color_p;
buf_t info_scene;
buf_t color_matrix;
input_layout_t input_layout;
blend_t blend_disable;
sampler_state_t sampler_linear;
render_target_t nv12_Y_rt;
render_target_t nv12_UV_rt;
// The image referenced by hwframe
texture2d_t hwframe_texture;
// d3d_img_t::id -> encoder_img_ctx_t
// These store the encoder textures for each img_t that passes through
// convert(). We can't store them in the img_t itself because it is shared
// amongst multiple hwdevice_t objects (and therefore multiple ID3D11Devices).
std::map<uint32_t, encoder_img_ctx_t> img_ctx_map;
std::shared_ptr<platf::display_t> display;
vs_t convert_UV_vs;
ps_t convert_UV_ps;
ps_t convert_UV_fp16_ps;
ps_t convert_Y_ps;
ps_t convert_Y_fp16_ps;
vs_t scene_vs;
D3D11_VIEWPORT outY_view;
D3D11_VIEWPORT outUV_view;
DXGI_FORMAT format;
device_t device;
device_ctx_t device_ctx;
};
bool
set_cursor_texture(device_t::pointer device, gpu_cursor_t &cursor, util::buffer_t<std::uint8_t> &&cursor_img, DXGI_OUTDUPL_POINTER_SHAPE_INFO &shape_info) {
// This cursor image may not be used
if (cursor_img.size() == 0) {
cursor.input_res.reset();
cursor.set_texture(0, 0, nullptr);
return true;
}
D3D11_SUBRESOURCE_DATA data {
std::begin(cursor_img),
4 * shape_info.Width,
0
};
// Create texture for cursor
D3D11_TEXTURE2D_DESC t {};
t.Width = shape_info.Width;
t.Height = cursor_img.size() / data.SysMemPitch;
t.MipLevels = 1;
t.ArraySize = 1;
t.SampleDesc.Count = 1;
t.Usage = D3D11_USAGE_IMMUTABLE;
t.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
t.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texture2d_t texture;
auto status = device->CreateTexture2D(&t, &data, &texture);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create mouse texture [0x"sv << util::hex(status).to_string_view() << ']';
return false;
}
// Free resources before allocating on the next line.
cursor.input_res.reset();
status = device->CreateShaderResourceView(texture.get(), nullptr, &cursor.input_res);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create cursor shader resource view [0x"sv << util::hex(status).to_string_view() << ']';
return false;
}
cursor.set_texture(t.Width, t.Height, std::move(texture));
return true;
}
capture_e
display_vram_t::snapshot(const pull_free_image_cb_t &pull_free_image_cb, std::shared_ptr<platf::img_t> &img_out, std::chrono::milliseconds timeout, bool cursor_visible) {
HRESULT status;
DXGI_OUTDUPL_FRAME_INFO frame_info;
resource_t::pointer res_p {};
auto capture_status = dup.next_frame(frame_info, timeout, &res_p);
resource_t res { res_p };
if (capture_status != capture_e::ok) {
return capture_status;
}
const bool mouse_update_flag = frame_info.LastMouseUpdateTime.QuadPart != 0 || frame_info.PointerShapeBufferSize > 0;
const bool frame_update_flag = frame_info.AccumulatedFrames != 0 || frame_info.LastPresentTime.QuadPart != 0;
const bool update_flag = mouse_update_flag || frame_update_flag;
if (!update_flag) {
return capture_e::timeout;
}
if (frame_info.PointerShapeBufferSize > 0) {
DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info {};
util::buffer_t<std::uint8_t> img_data { frame_info.PointerShapeBufferSize };
UINT dummy;
status = dup.dup->GetFramePointerShape(img_data.size(), std::begin(img_data), &dummy, &shape_info);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
return capture_e::error;
}
auto alpha_cursor_img = make_cursor_alpha_image(img_data, shape_info);
auto xor_cursor_img = make_cursor_xor_image(img_data, shape_info);
if (!set_cursor_texture(device.get(), cursor_alpha, std::move(alpha_cursor_img), shape_info) ||
!set_cursor_texture(device.get(), cursor_xor, std::move(xor_cursor_img), shape_info)) {
return capture_e::error;
}
}
if (frame_info.LastMouseUpdateTime.QuadPart) {
cursor_alpha.set_pos(frame_info.PointerPosition.Position.x, frame_info.PointerPosition.Position.y, frame_info.PointerPosition.Visible);
cursor_xor.set_pos(frame_info.PointerPosition.Position.x, frame_info.PointerPosition.Position.y, frame_info.PointerPosition.Visible);
}
const bool blend_mouse_cursor_flag = (cursor_alpha.visible || cursor_xor.visible) && cursor_visible;
texture2d_t src {};
if (frame_update_flag) {
// Get the texture object from this frame
status = res->QueryInterface(IID_ID3D11Texture2D, (void **) &src);
if (FAILED(status)) {
BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']';
return capture_e::error;
}
D3D11_TEXTURE2D_DESC desc;
src->GetDesc(&desc);
// It's possible for our display enumeration to race with mode changes and result in
// mismatched image pool and desktop texture sizes. If this happens, just reinit again.
if (desc.Width != width || desc.Height != height) {
BOOST_LOG(info) << "Capture size changed ["sv << width << 'x' << height << " -> "sv << desc.Width << 'x' << desc.Height << ']';
return capture_e::reinit;
}
// If we don't know the capture format yet, grab it from this texture
if (capture_format == DXGI_FORMAT_UNKNOWN) {
capture_format = desc.Format;
BOOST_LOG(info) << "Capture format ["sv << dxgi_format_to_string(capture_format) << ']';
}
// It's also possible for the capture format to change on the fly. If that happens,
// reinitialize capture to try format detection again and create new images.
if (capture_format != desc.Format) {
BOOST_LOG(info) << "Capture format changed ["sv << dxgi_format_to_string(capture_format) << " -> "sv << dxgi_format_to_string(desc.Format) << ']';
return capture_e::reinit;
}
}
enum class lfa {
nothing,
replace_surface_with_img,
replace_img_with_surface,
copy_src_to_img,
copy_src_to_surface,
};
enum class ofa {
forward_last_img,
copy_last_surface_and_blend_cursor,
dummy_fallback,
};
auto last_frame_action = lfa::nothing;
auto out_frame_action = ofa::dummy_fallback;
if (capture_format == DXGI_FORMAT_UNKNOWN) {
// We don't know the final capture format yet, so we will encode a black dummy image
last_frame_action = lfa::nothing;
out_frame_action = ofa::dummy_fallback;
}
else {
if (src) {
// We got a new frame from DesktopDuplication...
if (blend_mouse_cursor_flag) {
// ...and we need to blend the mouse cursor onto it.
// Copy the frame to intermediate surface so we can blend this and future mouse cursor updates
// without new frames from DesktopDuplication. We use direct3d surface directly here and not
// an image from pull_free_image_cb mainly because it's lighter (surface sharing between
// direct3d devices produce significant memory overhead).
last_frame_action = lfa::copy_src_to_surface;
// Copy the intermediate surface to a new image from pull_free_image_cb and blend the mouse cursor onto it.
out_frame_action = ofa::copy_last_surface_and_blend_cursor;
}
else {
// ...and we don't need to blend the mouse cursor.
// Copy the frame to a new image from pull_free_image_cb and save the shared pointer to the image
// in case the mouse cursor appears without a new frame from DesktopDuplication.
last_frame_action = lfa::copy_src_to_img;
// Use saved last image shared pointer as output image evading copy.
out_frame_action = ofa::forward_last_img;
}
}
else if (!std::holds_alternative<std::monostate>(last_frame_variant)) {
// We didn't get a new frame from DesktopDuplication...
if (blend_mouse_cursor_flag) {
// ...but we need to blend the mouse cursor.
if (std::holds_alternative<std::shared_ptr<platf::img_t>>(last_frame_variant)) {
// We have the shared pointer of the last image, replace it with intermediate surface
// while copying contents so we can blend this and future mouse cursor updates.
last_frame_action = lfa::replace_img_with_surface;
}
// Copy the intermediate surface which contains last DesktopDuplication frame
// to a new image from pull_free_image_cb and blend the mouse cursor onto it.
out_frame_action = ofa::copy_last_surface_and_blend_cursor;
}
else {
// ...and we don't need to blend the mouse cursor.
// This happens when the mouse cursor disappears from screen,
// or there's mouse cursor on screen, but its drawing is disabled in sunshine.
if (std::holds_alternative<texture2d_t>(last_frame_variant)) {
// We have the intermediate surface that was used as the mouse cursor blending base.
// Replace it with an image from pull_free_image_cb copying contents and freeing up the surface memory.
// Save the shared pointer to the image in case the mouse cursor reappears.
last_frame_action = lfa::replace_surface_with_img;
}
// Use saved last image shared pointer as output image evading copy.
out_frame_action = ofa::forward_last_img;
}
}
}
auto create_surface = [&](texture2d_t &surface) -> bool {
// Try to reuse the old surface if it hasn't been destroyed yet.
if (old_surface_delayed_destruction) {
surface.reset(old_surface_delayed_destruction.release());
return true;
}
// Otherwise create a new surface.
D3D11_TEXTURE2D_DESC t {};
t.Width = width;
t.Height = height;
t.MipLevels = 1;
t.ArraySize = 1;
t.SampleDesc.Count = 1;
t.Usage = D3D11_USAGE_DEFAULT;
t.Format = capture_format;
t.BindFlags = 0;
status = device->CreateTexture2D(&t, nullptr, &surface);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create frame copy texture [0x"sv << util::hex(status).to_string_view() << ']';
return false;
}
return true;
};
auto get_locked_d3d_img = [&](std::shared_ptr<platf::img_t> &img, bool dummy = false) -> std::tuple<std::shared_ptr<img_d3d_t>, texture_lock_helper> {
auto d3d_img = std::static_pointer_cast<img_d3d_t>(img);
// Finish creating the image (if it hasn't happened already),
// also creates synchonization primitives for shared access from multiple direct3d devices.
if (complete_img(d3d_img.get(), dummy)) return { nullptr, nullptr };
// This image is shared between capture direct3d device and encoders direct3d devices,
// we must acquire lock before doing anything to it.
texture_lock_helper lock_helper(d3d_img->capture_mutex.get());
if (!lock_helper.lock()) {
BOOST_LOG(error) << "Failed to lock capture texture";
return { nullptr, nullptr };
}
return { std::move(d3d_img), std::move(lock_helper) };
};
switch (last_frame_action) {
case lfa::nothing: {
break;
}
case lfa::replace_surface_with_img: {
auto p_surface = std::get_if<texture2d_t>(&last_frame_variant);
if (!p_surface) {
BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__;
return capture_e::error;
}
std::shared_ptr<platf::img_t> img;
if (!pull_free_image_cb(img)) return capture_e::interrupted;
auto [d3d_img, lock] = get_locked_d3d_img(img);
if (!d3d_img) return capture_e::error;
device_ctx->CopyResource(d3d_img->capture_texture.get(), p_surface->get());
// We delay the destruction of intermediate surface in case the mouse cursor reappears shortly.
old_surface_delayed_destruction.reset(p_surface->release());
old_surface_timestamp = std::chrono::steady_clock::now();
last_frame_variant = img;
break;
}
case lfa::replace_img_with_surface: {
auto p_img = std::get_if<std::shared_ptr<platf::img_t>>(&last_frame_variant);
if (!p_img) {
BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__;
return capture_e::error;
}
auto [d3d_img, lock] = get_locked_d3d_img(*p_img);
if (!d3d_img) return capture_e::error;
p_img = nullptr;
last_frame_variant = texture2d_t {};
auto &surface = std::get<texture2d_t>(last_frame_variant);
if (!create_surface(surface)) return capture_e::error;
device_ctx->CopyResource(surface.get(), d3d_img->capture_texture.get());
break;
}
case lfa::copy_src_to_img: {
last_frame_variant = {};
std::shared_ptr<platf::img_t> img;
if (!pull_free_image_cb(img)) return capture_e::interrupted;
auto [d3d_img, lock] = get_locked_d3d_img(img);
if (!d3d_img) return capture_e::error;
device_ctx->CopyResource(d3d_img->capture_texture.get(), src.get());
last_frame_variant = img;
break;
}
case lfa::copy_src_to_surface: {
auto p_surface = std::get_if<texture2d_t>(&last_frame_variant);
if (!p_surface) {
last_frame_variant = texture2d_t {};
p_surface = std::get_if<texture2d_t>(&last_frame_variant);
if (!create_surface(*p_surface)) return capture_e::error;
}
device_ctx->CopyResource(p_surface->get(), src.get());
break;
}
}
auto blend_cursor = [&](img_d3d_t &d3d_img) {
device_ctx->VSSetShader(scene_vs.get(), nullptr, 0);
device_ctx->PSSetShader(scene_ps.get(), nullptr, 0);
device_ctx->OMSetRenderTargets(1, &d3d_img.capture_rt, nullptr);
if (cursor_alpha.texture.get()) {
// Perform an alpha blending operation
device_ctx->OMSetBlendState(blend_alpha.get(), nullptr, 0xFFFFFFFFu);
device_ctx->PSSetShaderResources(0, 1, &cursor_alpha.input_res);
device_ctx->RSSetViewports(1, &cursor_alpha.cursor_view);
device_ctx->Draw(3, 0);
}
if (cursor_xor.texture.get()) {
// Perform an invert blending without touching alpha values
device_ctx->OMSetBlendState(blend_invert.get(), nullptr, 0x00FFFFFFu);
device_ctx->PSSetShaderResources(0, 1, &cursor_xor.input_res);
device_ctx->RSSetViewports(1, &cursor_xor.cursor_view);
device_ctx->Draw(3, 0);
}
device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
ID3D11RenderTargetView *emptyRenderTarget = nullptr;
device_ctx->OMSetRenderTargets(1, &emptyRenderTarget, nullptr);
device_ctx->RSSetViewports(0, nullptr);
ID3D11ShaderResourceView *emptyShaderResourceView = nullptr;
device_ctx->PSSetShaderResources(0, 1, &emptyShaderResourceView);
};
switch (out_frame_action) {
case ofa::forward_last_img: {
auto p_img = std::get_if<std::shared_ptr<platf::img_t>>(&last_frame_variant);
if (!p_img) {
BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__;
return capture_e::error;
}
img_out = *p_img;
break;
}
case ofa::copy_last_surface_and_blend_cursor: {
auto p_surface = std::get_if<texture2d_t>(&last_frame_variant);
if (!p_surface) {
BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__;
return capture_e::error;
}
if (!blend_mouse_cursor_flag) {
BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__;
return capture_e::error;
}
if (!pull_free_image_cb(img_out)) return capture_e::interrupted;
auto [d3d_img, lock] = get_locked_d3d_img(img_out);
if (!d3d_img) return capture_e::error;
device_ctx->CopyResource(d3d_img->capture_texture.get(), p_surface->get());
blend_cursor(*d3d_img);
break;
}
case ofa::dummy_fallback: {
if (!pull_free_image_cb(img_out)) return capture_e::interrupted;
// Clear the image if it has been used as a dummy.
// It can have the mouse cursor blended onto it.
auto old_d3d_img = (img_d3d_t *) img_out.get();
bool reclear_dummy = old_d3d_img->dummy && old_d3d_img->capture_texture;
auto [d3d_img, lock] = get_locked_d3d_img(img_out, true);
if (!d3d_img) return capture_e::error;
if (reclear_dummy) {
auto dummy_data = std::make_unique<std::uint8_t[]>(d3d_img->row_pitch * d3d_img->height);
std::fill_n(dummy_data.get(), d3d_img->row_pitch * d3d_img->height, 0);
device_ctx->UpdateSubresource(d3d_img->capture_texture.get(), 0, nullptr, dummy_data.get(), d3d_img->row_pitch, 0);
}
if (blend_mouse_cursor_flag) {
blend_cursor(*d3d_img);
}
break;
}
}
// Perform delayed destruction of the unused surface if the time is due.
if (old_surface_delayed_destruction && old_surface_timestamp + 10s < std::chrono::steady_clock::now()) {
old_surface_delayed_destruction.reset();
}
return capture_e::ok;
}
int
display_vram_t::init(const ::video::config_t &config, const std::string &display_name) {
if (display_base_t::init(config, display_name)) {
return -1;
}
D3D11_SAMPLER_DESC sampler_desc {};
sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP;
sampler_desc.ComparisonFunc = D3D11_COMPARISON_NEVER;
sampler_desc.MinLOD = 0;
sampler_desc.MaxLOD = D3D11_FLOAT32_MAX;
auto status = device->CreateSamplerState(&sampler_desc, &sampler_linear);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
if (status) {
BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
if (config.dynamicRange && is_hdr()) {
// This shader will normalize scRGB white levels to a user-defined white level
status = device->CreatePixelShader(scene_NW_ps_hlsl->GetBufferPointer(), scene_NW_ps_hlsl->GetBufferSize(), nullptr, &scene_ps);
if (status) {
BOOST_LOG(error) << "Failed to create scene pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Use a 300 nit target for the mouse cursor. We should really get
// the user's SDR white level in nits, but there is no API that
// provides that information to Win32 apps.
float sdr_multiplier_data[16 / sizeof(float)] { 300.0f / 80.f }; // aligned to 16-byte
auto sdr_multiplier = make_buffer(device.get(), sdr_multiplier_data);
if (!sdr_multiplier) {
BOOST_LOG(warning) << "Failed to create SDR multiplier"sv;
return -1;
}
device_ctx->PSSetConstantBuffers(0, 1, &sdr_multiplier);
}
else {
status = device->CreatePixelShader(scene_ps_hlsl->GetBufferPointer(), scene_ps_hlsl->GetBufferSize(), nullptr, &scene_ps);
if (status) {
BOOST_LOG(error) << "Failed to create scene pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
}
blend_alpha = make_blend(device.get(), true, false);
blend_invert = make_blend(device.get(), true, true);
blend_disable = make_blend(device.get(), false, false);
if (!blend_disable || !blend_alpha || !blend_invert) {
return -1;
}
device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
device_ctx->PSSetSamplers(0, 1, &sampler_linear);
device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
return 0;
}
std::shared_ptr<platf::img_t>
display_vram_t::alloc_img() {
auto img = std::make_shared<img_d3d_t>();
// Initialize format-independent fields
img->width = width;
img->height = height;
img->display = shared_from_this();
img->id = next_image_id++;
return img;
}
// This cannot use ID3D11DeviceContext because it can be called concurrently by the encoding thread
int
display_vram_t::complete_img(platf::img_t *img_base, bool dummy) {
auto img = (img_d3d_t *) img_base;
// If this already has a capture texture and it's not switching dummy state, nothing to do
if (img->capture_texture && img->dummy == dummy) {
return 0;
}
// If this is not a dummy image, we must know the format by now
if (!dummy && capture_format == DXGI_FORMAT_UNKNOWN) {
BOOST_LOG(error) << "display_vram_t::complete_img() called with unknown capture format!";
return -1;
}
// Reset the image (in case this was previously a dummy)
img->capture_texture.reset();
img->capture_rt.reset();
img->capture_mutex.reset();
img->data = nullptr;
if (img->encoder_texture_handle) {
CloseHandle(img->encoder_texture_handle);
img->encoder_texture_handle = NULL;
}
// Initialize format-dependent fields
img->pixel_pitch = get_pixel_pitch();
img->row_pitch = img->pixel_pitch * img->width;
img->dummy = dummy;
img->format = (capture_format == DXGI_FORMAT_UNKNOWN) ? DXGI_FORMAT_B8G8R8A8_UNORM : capture_format;
D3D11_TEXTURE2D_DESC t {};
t.Width = img->width;
t.Height = img->height;
t.MipLevels = 1;
t.ArraySize = 1;
t.SampleDesc.Count = 1;
t.Usage = D3D11_USAGE_DEFAULT;
t.Format = img->format;
t.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET;
t.MiscFlags = D3D11_RESOURCE_MISC_SHARED_NTHANDLE | D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX;
HRESULT status;
if (dummy) {
auto dummy_data = std::make_unique<std::uint8_t[]>(img->row_pitch * img->height);
std::fill_n(dummy_data.get(), img->row_pitch * img->height, 0);
D3D11_SUBRESOURCE_DATA initial_data {
dummy_data.get(),
(UINT) img->row_pitch,
0
};
status = device->CreateTexture2D(&t, &initial_data, &img->capture_texture);
}
else {
status = device->CreateTexture2D(&t, nullptr, &img->capture_texture);
}
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create img buf texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device->CreateRenderTargetView(img->capture_texture.get(), nullptr, &img->capture_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Get the keyed mutex to synchronize with the encoding code
status = img->capture_texture->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **) &img->capture_mutex);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to query IDXGIKeyedMutex [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
resource1_t resource;
status = img->capture_texture->QueryInterface(__uuidof(IDXGIResource1), (void **) &resource);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to query IDXGIResource1 [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Create a handle for the encoder device to use to open this texture
status = resource->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ, nullptr, &img->encoder_texture_handle);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create shared texture handle [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
img->data = (std::uint8_t *) img->capture_texture.get();
return 0;
}
// This cannot use ID3D11DeviceContext because it can be called concurrently by the encoding thread
int
display_vram_t::dummy_img(platf::img_t *img_base) {
return complete_img(img_base, true);
}
std::vector<DXGI_FORMAT>
display_vram_t::get_supported_capture_formats() {
return {
// scRGB FP16 is the ideal format for Wide Color Gamut and Advanced Color
// displays (both SDR and HDR). This format uses linear gamma, so we will
// use a linear->PQ shader for HDR and a linear->sRGB shader for SDR.
DXGI_FORMAT_R16G16B16A16_FLOAT,
// DXGI_FORMAT_R10G10B10A2_UNORM seems like it might give us frames already
// converted to SMPTE 2084 PQ, however it seems to actually just clamp the
// scRGB FP16 values that DWM is using when the desktop format is scRGB FP16.
//
// If there is a case where the desktop format is really SMPTE 2084 PQ, it
// might make sense to support capturing it without conversion to scRGB,
// but we avoid it for now.
// We include the 8-bit modes too for when the display is in SDR mode,
// while the client stream is HDR-capable. These UNORM formats can
// use our normal pixel shaders that expect sRGB input.
DXGI_FORMAT_B8G8R8A8_UNORM,
DXGI_FORMAT_B8G8R8X8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM,
};
}
std::shared_ptr<platf::hwdevice_t>
display_vram_t::make_hwdevice(pix_fmt_e pix_fmt) {
if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']';
return nullptr;
}
auto hwdevice = std::make_shared<hwdevice_t>();
auto ret = hwdevice->init(
shared_from_this(),
adapter.get(),
pix_fmt);
if (ret) {
return nullptr;
}
return hwdevice;
}
int
init() {
BOOST_LOG(info) << "Compiling shaders..."sv;
scene_vs_hlsl = compile_vertex_shader(SUNSHINE_SHADERS_DIR "/SceneVS.hlsl");
if (!scene_vs_hlsl) {
return -1;
}
convert_Y_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertYPS.hlsl");
if (!convert_Y_ps_hlsl) {
return -1;
}
convert_Y_PQ_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertYPS_PQ.hlsl");
if (!convert_Y_PQ_ps_hlsl) {
return -1;
}
convert_Y_linear_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertYPS_Linear.hlsl");
if (!convert_Y_linear_ps_hlsl) {
return -1;
}
convert_UV_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertUVPS.hlsl");
if (!convert_UV_ps_hlsl) {
return -1;
}
convert_UV_PQ_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertUVPS_PQ.hlsl");
if (!convert_UV_PQ_ps_hlsl) {
return -1;
}
convert_UV_linear_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertUVPS_Linear.hlsl");
if (!convert_UV_linear_ps_hlsl) {
return -1;
}
convert_UV_vs_hlsl = compile_vertex_shader(SUNSHINE_SHADERS_DIR "/ConvertUVVS.hlsl");
if (!convert_UV_vs_hlsl) {
return -1;
}
scene_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ScenePS.hlsl");
if (!scene_ps_hlsl) {
return -1;
}
scene_NW_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ScenePS_NW.hlsl");
if (!scene_NW_ps_hlsl) {
return -1;
}
BOOST_LOG(info) << "Compiled shaders"sv;
return 0;
}
} // namespace platf::dxgi