Use separate encoding and capture devices to enable parallel encoding and capture (#668)

This commit is contained in:
Cameron Gutman 2023-01-02 14:56:58 -06:00 committed by GitHub
parent 76ffa2a0b5
commit 0439d7a83a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 294 additions and 105 deletions

View File

@ -18,6 +18,10 @@
namespace platf::dxgi {
extern const char *format_str[];
// Add D3D11_CREATE_DEVICE_DEBUG here to enable the D3D11 debug runtime.
// You should have a debugger like WinDbg attached to receive debug messages.
auto constexpr D3D11_CREATE_DEVICE_FLAGS = D3D11_CREATE_DEVICE_VIDEO_SUPPORT;
template<class T>
void Release(T *dxgi) {
dxgi->Release();
@ -27,6 +31,7 @@ using factory1_t = util::safe_ptr<IDXGIFactory1, Release<IDXGIFactory
using dxgi_t = util::safe_ptr<IDXGIDevice, Release<IDXGIDevice>>;
using dxgi1_t = util::safe_ptr<IDXGIDevice1, Release<IDXGIDevice1>>;
using device_t = util::safe_ptr<ID3D11Device, Release<ID3D11Device>>;
using device1_t = util::safe_ptr<ID3D11Device1, Release<ID3D11Device1>>;
using device_ctx_t = util::safe_ptr<ID3D11DeviceContext, Release<ID3D11DeviceContext>>;
using adapter_t = util::safe_ptr<IDXGIAdapter1, Release<IDXGIAdapter1>>;
using output_t = util::safe_ptr<IDXGIOutput, Release<IDXGIOutput>>;
@ -36,6 +41,7 @@ using dup_t = util::safe_ptr<IDXGIOutputDuplication, Release<IDX
using texture2d_t = util::safe_ptr<ID3D11Texture2D, Release<ID3D11Texture2D>>;
using texture1d_t = util::safe_ptr<ID3D11Texture1D, Release<ID3D11Texture1D>>;
using resource_t = util::safe_ptr<IDXGIResource, Release<IDXGIResource>>;
using resource1_t = util::safe_ptr<IDXGIResource1, Release<IDXGIResource1>>;
using multithread_t = util::safe_ptr<ID3D11Multithread, Release<ID3D11Multithread>>;
using vs_t = util::safe_ptr<ID3D11VertexShader, Release<ID3D11VertexShader>>;
using ps_t = util::safe_ptr<ID3D11PixelShader, Release<ID3D11PixelShader>>;
@ -49,6 +55,7 @@ using sampler_state_t = util::safe_ptr<ID3D11SamplerState, Release<ID3D11S
using blob_t = util::safe_ptr<ID3DBlob, Release<ID3DBlob>>;
using depth_stencil_state_t = util::safe_ptr<ID3D11DepthStencilState, Release<ID3D11DepthStencilState>>;
using depth_stencil_view_t = util::safe_ptr<ID3D11DepthStencilView, Release<ID3D11DepthStencilView>>;
using keyed_mutex_t = util::safe_ptr<IDXGIKeyedMutex, Release<IDXGIKeyedMutex>>;
namespace video {
using device_t = util::safe_ptr<ID3D11VideoDevice, Release<ID3D11VideoDevice>>;

View File

@ -186,7 +186,7 @@ int display_base_t::init(int framerate, const std::string &display_name) {
adapter_p,
D3D_DRIVER_TYPE_UNKNOWN,
nullptr,
D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
D3D11_CREATE_DEVICE_FLAGS,
featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL),
D3D11_SDK_VERSION,
&device,
@ -272,7 +272,10 @@ int display_base_t::init(int framerate, const std::string &display_name) {
return -1;
}
dxgi->SetGPUThreadPriority(7);
status = dxgi->SetGPUThreadPriority(7);
if(FAILED(status)) {
BOOST_LOG(warning) << "Failed to increase capture GPU thread priority. Please run application as administrator for optimal performance.";
}
}
// Try to reduce latency

View File

@ -88,13 +88,26 @@ blob_t scene_ps_hlsl;
struct img_d3d_t : public platf::img_t {
std::shared_ptr<platf::display_t> display;
shader_res_t input_res;
render_target_t scene_rt;
// These objects are owned by the display_t's ID3D11Device
texture2d_t capture_texture;
render_target_t capture_rt;
keyed_mutex_t capture_mutex;
// These objects are owned by the hwdevice_t's ID3D11Device
texture2d_t encoder_texture;
shader_res_t encoder_input_res;
keyed_mutex_t encoder_mutex;
// This is the shared handle used by hwdevice_t to open capture_texture
HANDLE encoder_texture_handle = {};
texture2d_t texture;
bool dummy = false;
~img_d3d_t() override = default;
virtual ~img_d3d_t() override {
if(encoder_texture_handle) {
CloseHandle(encoder_texture_handle);
}
};
};
util::buffer_t<std::uint8_t> make_cursor_image(util::buffer_t<std::uint8_t> &&img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info) {
@ -216,55 +229,65 @@ blob_t compile_vertex_shader(LPCSTR file) {
return compile_shader(file, "main_vs", "vs_5_0");
}
int init_rt(device_t::pointer device, shader_res_t &shader_res, render_target_t &render_target, int width, int height, texture2d_t::pointer tex) {
auto status = device->CreateShaderResourceView(tex, nullptr, &shader_res);
if(status) {
BOOST_LOG(error) << "Failed to create shader resource view for luma [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device->CreateRenderTargetView(tex, nullptr, &render_target);
if(status) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
return 0;
}
class hwdevice_t : public platf::hwdevice_t {
public:
int convert(platf::img_t &img_base) override {
auto &img = (img_d3d_t &)img_base;
auto &img = (img_d3d_t &)img_base;
auto back_d3d_img = (img_d3d_t *)back_img.get();
device_ctx_p->IASetInputLayout(input_layout.get());
// Open the shared capture texture with our ID3D11Device
if(share_img(&img_base)) {
return -1;
}
// Acquire encoder mutex to synchronize with capture code
auto status = img.encoder_mutex->AcquireSync(0, INFINITE);
if(status != S_OK) {
BOOST_LOG(error) << "Failed to acquire encoder mutex [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Even though this image will never have racing updates, we must acquire the
// keyed mutex for PSSetShaderResources() to succeed.
status = back_d3d_img->encoder_mutex->AcquireSync(0, INFINITE);
if(status != S_OK) {
img.encoder_mutex->ReleaseSync(0);
BOOST_LOG(error) << "Failed to acquire back_d3d_img mutex [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
device_ctx->IASetInputLayout(input_layout.get());
_init_view_port(this->img.width, this->img.height);
device_ctx_p->OMSetRenderTargets(1, &nv12_Y_rt, nullptr);
device_ctx_p->VSSetShader(scene_vs.get(), nullptr, 0);
device_ctx_p->PSSetShader(convert_Y_ps.get(), nullptr, 0);
device_ctx_p->PSSetShaderResources(0, 1, &((img_d3d_t *)back_img.get())->input_res);
device_ctx_p->Draw(3, 0);
device_ctx->OMSetRenderTargets(1, &nv12_Y_rt, nullptr);
device_ctx->VSSetShader(scene_vs.get(), nullptr, 0);
device_ctx->PSSetShader(convert_Y_ps.get(), nullptr, 0);
device_ctx->PSSetShaderResources(0, 1, &back_d3d_img->encoder_input_res);
device_ctx->Draw(3, 0);
device_ctx_p->RSSetViewports(1, &outY_view);
device_ctx_p->PSSetShaderResources(0, 1, &img.input_res);
device_ctx_p->Draw(3, 0);
device_ctx->RSSetViewports(1, &outY_view);
device_ctx->PSSetShaderResources(0, 1, &img.encoder_input_res);
device_ctx->Draw(3, 0);
// Artifacts start appearing on the rendered image if Sunshine doesn't flush
// before rendering on the UV part of the image.
device_ctx_p->Flush();
device_ctx->Flush();
_init_view_port(this->img.width / 2, this->img.height / 2);
device_ctx_p->OMSetRenderTargets(1, &nv12_UV_rt, nullptr);
device_ctx_p->VSSetShader(convert_UV_vs.get(), nullptr, 0);
device_ctx_p->PSSetShader(convert_UV_ps.get(), nullptr, 0);
device_ctx_p->PSSetShaderResources(0, 1, &((img_d3d_t *)back_img.get())->input_res);
device_ctx_p->Draw(3, 0);
device_ctx->OMSetRenderTargets(1, &nv12_UV_rt, nullptr);
device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0);
device_ctx->PSSetShader(convert_UV_ps.get(), nullptr, 0);
device_ctx->PSSetShaderResources(0, 1, &back_d3d_img->encoder_input_res);
device_ctx->Draw(3, 0);
device_ctx_p->RSSetViewports(1, &outUV_view);
device_ctx_p->PSSetShaderResources(0, 1, &img.input_res);
device_ctx_p->Draw(3, 0);
device_ctx_p->Flush();
device_ctx->RSSetViewports(1, &outUV_view);
device_ctx->PSSetShaderResources(0, 1, &img.encoder_input_res);
device_ctx->Draw(3, 0);
device_ctx->Flush();
// Release encoder mutexes to allow capture code to reuse this image
back_d3d_img->encoder_mutex->ReleaseSync(0);
img.encoder_mutex->ReleaseSync(0);
return 0;
}
@ -294,8 +317,8 @@ public:
return;
}
device_ctx_p->VSSetConstantBuffers(0, 1, &info_scene);
device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix);
device_ctx->VSSetConstantBuffers(0, 1, &info_scene);
device_ctx->PSSetConstantBuffers(0, 1, &color_matrix);
this->color_matrix = std::move(color_matrix);
}
@ -303,8 +326,6 @@ public:
this->hwframe.reset(frame);
this->frame = frame;
auto device_p = (device_t::pointer)data;
auto out_width = frame->width;
auto out_height = frame->height;
@ -333,7 +354,7 @@ public:
t.Format = format;
t.BindFlags = D3D11_BIND_RENDER_TARGET;
auto status = device_p->CreateTexture2D(&t, nullptr, &img.texture);
auto status = device->CreateTexture2D(&t, nullptr, &img.encoder_texture);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@ -341,12 +362,12 @@ public:
img.width = out_width;
img.height = out_height;
img.data = (std::uint8_t *)img.texture.get();
img.data = (std::uint8_t *)img.encoder_texture.get();
img.row_pitch = out_width * 4;
img.pixel_pitch = 4;
float info_in[16 / sizeof(float)] { 1.0f / (float)out_width_f }; //aligned to 16-byte
info_scene = make_buffer(device_p, info_in);
info_scene = make_buffer(device.get(), info_in);
if(!info_scene) {
BOOST_LOG(error) << "Failed to create info scene buffer"sv;
@ -358,7 +379,7 @@ public:
D3D11_RTV_DIMENSION_TEXTURE2D
};
status = device_p->CreateRenderTargetView(img.texture.get(), &nv12_rt_desc, &nv12_Y_rt);
status = device->CreateRenderTargetView(img.encoder_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@ -366,7 +387,7 @@ public:
nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
status = device_p->CreateRenderTargetView(img.texture.get(), &nv12_rt_desc, &nv12_UV_rt);
status = device->CreateRenderTargetView(img.encoder_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@ -393,48 +414,81 @@ public:
}
int init(
std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p,
std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p,
pix_fmt_e pix_fmt) {
HRESULT status;
D3D_FEATURE_LEVEL featureLevels[] {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_9_3,
D3D_FEATURE_LEVEL_9_2,
D3D_FEATURE_LEVEL_9_1
};
device_p->AddRef();
data = device_p;
HRESULT status = D3D11CreateDevice(
adapter_p,
D3D_DRIVER_TYPE_UNKNOWN,
nullptr,
D3D11_CREATE_DEVICE_FLAGS,
featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL),
D3D11_SDK_VERSION,
&device,
nullptr,
&device_ctx);
this->device_ctx_p = device_ctx_p;
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create encoder D3D11 device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
dxgi::dxgi_t dxgi;
status = device->QueryInterface(IID_IDXGIDevice, (void **)&dxgi);
if(FAILED(status)) {
BOOST_LOG(warning) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = dxgi->SetGPUThreadPriority(7);
if(FAILED(status)) {
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
}
data = device.get();
format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
status = device_p->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
if(status) {
BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device_p->CreatePixelShader(convert_Y_ps_hlsl->GetBufferPointer(), convert_Y_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_ps);
status = device->CreatePixelShader(convert_Y_ps_hlsl->GetBufferPointer(), convert_Y_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_ps);
if(status) {
BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device_p->CreatePixelShader(convert_UV_ps_hlsl->GetBufferPointer(), convert_UV_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_ps);
status = device->CreatePixelShader(convert_UV_ps_hlsl->GetBufferPointer(), convert_UV_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_ps);
if(status) {
BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device_p->CreateVertexShader(convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), nullptr, &convert_UV_vs);
status = device->CreateVertexShader(convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), nullptr, &convert_UV_vs);
if(status) {
BOOST_LOG(error) << "Failed to create convertUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
status = device_p->CreatePixelShader(scene_ps_hlsl->GetBufferPointer(), scene_ps_hlsl->GetBufferSize(), nullptr, &scene_ps);
status = device->CreatePixelShader(scene_ps_hlsl->GetBufferPointer(), scene_ps_hlsl->GetBufferSize(), nullptr, &scene_ps);
if(status) {
BOOST_LOG(error) << "Failed to create scene pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
color_matrix = make_buffer(device_p, ::video::colors[0]);
color_matrix = make_buffer(device.get(), ::video::colors[0]);
if(!color_matrix) {
BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
return -1;
@ -444,7 +498,7 @@ public:
"SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0
};
status = device_p->CreateInputLayout(
status = device->CreateInputLayout(
&layout_desc, 1,
convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(),
&input_layout);
@ -454,24 +508,42 @@ public:
// Color the background black, so that the padding for keeping the aspect ratio
// is black
back_img = img.display->alloc_img();
if(img.display->dummy_img(back_img.get())) {
if(img.display->dummy_img(back_img.get()) || share_img(back_img.get())) {
BOOST_LOG(warning) << "Couldn't create an image to set background color to black"sv;
return -1;
}
device_ctx_p->IASetInputLayout(input_layout.get());
device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix);
device_ctx_p->VSSetConstantBuffers(0, 1, &info_scene);
blend_disable = make_blend(device.get(), false);
if(!blend_disable) {
return -1;
}
D3D11_SAMPLER_DESC sampler_desc {};
sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP;
sampler_desc.ComparisonFunc = D3D11_COMPARISON_NEVER;
sampler_desc.MinLOD = 0;
sampler_desc.MaxLOD = D3D11_FLOAT32_MAX;
status = device->CreateSamplerState(&sampler_desc, &sampler_linear);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
device_ctx->IASetInputLayout(input_layout.get());
device_ctx->PSSetConstantBuffers(0, 1, &color_matrix);
device_ctx->VSSetConstantBuffers(0, 1, &info_scene);
device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
device_ctx->PSSetSamplers(0, 1, &sampler_linear);
device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
return 0;
}
~hwdevice_t() override {
if(data) {
((ID3D11Device *)data)->Release();
}
}
private:
void _init_view_port(float x, float y, float width, float height) {
D3D11_VIEWPORT view {
@ -480,13 +552,52 @@ private:
0.0f, 1.0f
};
device_ctx_p->RSSetViewports(1, &view);
device_ctx->RSSetViewports(1, &view);
}
void _init_view_port(float width, float height) {
_init_view_port(0.0f, 0.0f, width, height);
}
int share_img(platf::img_t *img_base) {
auto img = (img_d3d_t *)img_base;
// If we've already opened the shared texture, we're done
if(img->encoder_texture) {
return 0;
}
device1_t device1;
auto status = device->QueryInterface(__uuidof(ID3D11Device1), (void **)&device1);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to query ID3D11Device1 [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Open a handle to the shared texture
status = device1->OpenSharedResource1(img->encoder_texture_handle, __uuidof(ID3D11Texture2D), (void **)&img->encoder_texture);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to open shared image texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Get the keyed mutex to synchronize with the capture code
status = img->encoder_texture->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **)&img->encoder_mutex);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to query IDXGIKeyedMutex [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Create the SRV for the encoder texture
status = device->CreateShaderResourceView(img->encoder_texture.get(), nullptr, &img->encoder_input_res);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create shader resource view for encoding [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
return 0;
}
public:
frame_t hwframe;
@ -497,6 +608,9 @@ public:
input_layout_t input_layout;
blend_t blend_disable;
sampler_state_t sampler_linear;
render_target_t nv12_Y_rt;
render_target_t nv12_UV_rt;
@ -518,7 +632,8 @@ public:
DXGI_FORMAT format;
device_ctx_t::pointer device_ctx_p;
device_t device;
device_ctx_t device_ctx;
};
capture_e display_vram_t::capture(snapshot_cb_t &&snapshot_cb, std::shared_ptr<::platf::img_t> img, bool *cursor) {
@ -664,7 +779,7 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec
t.BindFlags = 0;
// Create a texture to store the most recent copy of the desktop
auto status = device->CreateTexture2D(&t, nullptr, &last_frame_copy);
status = device->CreateTexture2D(&t, nullptr, &last_frame_copy);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create frame copy texture [0x"sv << util::hex(status).to_string_view() << ']';
return capture_e::error;
@ -683,17 +798,38 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec
return capture_e::error;
}
// Copy the texture into this image and the staging texture
device_ctx->CopyResource(img->texture.get(), src.get());
// Copy the texture to use for cursor-only updates
device_ctx->CopyResource(last_frame_copy.get(), src.get());
// Copy into the capture texture on the image with the mutex held
status = img->capture_mutex->AcquireSync(0, INFINITE);
if(status != S_OK) {
BOOST_LOG(error) << "Failed to acquire capture mutex [0x"sv << util::hex(status).to_string_view() << ']';
return capture_e::error;
}
device_ctx->CopyResource(img->capture_texture.get(), src.get());
}
else if(capture_format == DXGI_FORMAT_UNKNOWN) {
// We don't know the final capture format yet, so we will encode a dummy image
BOOST_LOG(debug) << "Capture format is still unknown. Encoding a blank image"sv;
if(dummy_img(img)) {
// Finish creating the image as a dummy (if it hasn't happened already)
if(complete_img(img, true)) {
return capture_e::error;
}
auto dummy_data = std::make_unique<std::uint8_t[]>(img->row_pitch * img->height);
std::fill_n(dummy_data.get(), img->row_pitch * img->height, 0);
status = img->capture_mutex->AcquireSync(0, INFINITE);
if(status != S_OK) {
BOOST_LOG(error) << "Failed to acquire capture mutex [0x"sv << util::hex(status).to_string_view() << ']';
return capture_e::error;
}
// Populate the image with dummy data. This is required because these images could be reused
// after rendering (in which case they would have a cursor already rendered into them).
device_ctx->UpdateSubresource(img->capture_texture.get(), 0, nullptr, dummy_data.get(), img->row_pitch, 0);
}
else {
// We must know the capture format in this path or we would have hit the above unknown format case
@ -705,7 +841,12 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec
// the call to AcquireNextFrame() because that won't be valid. It seems to return a texture
// in the unmodified desktop format (rather than the formats we passed to DuplicateOutput1())
// if called in that case.
device_ctx->CopyResource(img->texture.get(), last_frame_copy.get());
status = img->capture_mutex->AcquireSync(0, INFINITE);
if(status != S_OK) {
BOOST_LOG(error) << "Failed to acquire capture mutex [0x"sv << util::hex(status).to_string_view() << ']';
return capture_e::error;
}
device_ctx->CopyResource(img->capture_texture.get(), last_frame_copy.get());
}
if(cursor.visible && cursor_visible) {
@ -719,13 +860,16 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec
device_ctx->PSSetShader(scene_ps.get(), nullptr, 0);
device_ctx->RSSetViewports(1, &view);
device_ctx->PSSetShaderResources(0, 1, &cursor.input_res);
device_ctx->OMSetRenderTargets(1, &img->scene_rt, nullptr);
device_ctx->OMSetRenderTargets(1, &img->capture_rt, nullptr);
device_ctx->OMSetBlendState(blend_enable.get(), nullptr, 0xFFFFFFFFu);
device_ctx->RSSetViewports(1, &cursor.cursor_view);
device_ctx->Draw(3, 0);
device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
}
// Release the mutex to allow encoding of this frame
img->capture_mutex->ReleaseSync(0);
return capture_e::ok;
}
@ -786,11 +930,12 @@ std::shared_ptr<platf::img_t> display_vram_t::alloc_img() {
return img;
}
// This cannot use ID3D11DeviceContext because it can be called concurrently by the encoding thread
int display_vram_t::complete_img(platf::img_t *img_base, bool dummy) {
auto img = (img_d3d_t *)img_base;
// If this already has a texture and it's not switching dummy state, nothing to do
if(img->texture && img->dummy == dummy) {
// If this already has a capture texture and it's not switching dummy state, nothing to do
if(img->capture_texture && img->dummy == dummy) {
return 0;
}
@ -801,10 +946,17 @@ int display_vram_t::complete_img(platf::img_t *img_base, bool dummy) {
}
// Reset the image (in case this was previously a dummy)
img->texture.reset();
img->input_res.reset();
img->scene_rt.reset();
img->capture_texture.reset();
img->capture_rt.reset();
img->capture_mutex.reset();
img->encoder_texture.reset();
img->encoder_input_res.reset();
img->encoder_mutex.reset();
img->data = nullptr;
if(img->encoder_texture_handle) {
CloseHandle(img->encoder_texture_handle);
img->encoder_texture_handle = NULL;
}
// Initialize format-dependent fields
img->pixel_pitch = get_pixel_pitch();
@ -820,34 +972,57 @@ int display_vram_t::complete_img(platf::img_t *img_base, bool dummy) {
t.Usage = D3D11_USAGE_DEFAULT;
t.Format = (capture_format == DXGI_FORMAT_UNKNOWN) ? DXGI_FORMAT_B8G8R8A8_UNORM : capture_format;
t.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET;
t.MiscFlags = D3D11_RESOURCE_MISC_SHARED_NTHANDLE | D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX;
auto status = device->CreateTexture2D(&t, nullptr, &img->texture);
auto dummy_data = std::make_unique<std::uint8_t[]>(img->row_pitch * img->height);
std::fill_n(dummy_data.get(), img->row_pitch * img->height, 0);
D3D11_SUBRESOURCE_DATA initial_data {
dummy_data.get(),
(UINT)img->row_pitch,
0
};
auto status = device->CreateTexture2D(&t, &initial_data, &img->capture_texture);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create img buf texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
if(init_rt(device.get(), img->input_res, img->scene_rt, img->width, img->height, img->texture.get())) {
status = device->CreateRenderTargetView(img->capture_texture.get(), nullptr, &img->capture_rt);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
img->data = (std::uint8_t *)img->texture.get();
// Get the keyed mutex to synchronize with the encoding code
status = img->capture_texture->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **)&img->capture_mutex);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to query IDXGIKeyedMutex [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
resource1_t resource;
status = img->capture_texture->QueryInterface(__uuidof(IDXGIResource1), (void **)&resource);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to query IDXGIResource1 [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// Create a handle for the encoder device to use to open this texture
status = resource->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ, nullptr, &img->encoder_texture_handle);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create shared texture handle [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
img->data = (std::uint8_t *)img->capture_texture.get();
return 0;
}
// This cannot use ID3D11DeviceContext because it can be called concurrently by the encoding thread
int display_vram_t::dummy_img(platf::img_t *img_base) {
auto img = (img_d3d_t *)img_base;
if(complete_img(img, true)) {
return -1;
}
auto dummy_data = std::make_unique<std::uint8_t[]>(img->row_pitch * img->height);
std::fill_n(dummy_data.get(), img->row_pitch * img->height, 0);
device_ctx->UpdateSubresource(img->texture.get(), 0, nullptr, dummy_data.get(), img->row_pitch, 0);
return 0;
return complete_img(img_base, true);
}
std::vector<DXGI_FORMAT> display_vram_t::get_supported_sdr_capture_formats() {
@ -865,8 +1040,7 @@ std::shared_ptr<platf::hwdevice_t> display_vram_t::make_hwdevice(pix_fmt_e pix_f
auto ret = hwdevice->init(
shared_from_this(),
device.get(),
device_ctx.get(),
adapter.get(),
pix_fmt);
if(ret) {

View File

@ -441,11 +441,10 @@ static encoder_t nvenc {
std::make_optional<encoder_t::option_t>({ "qp"s, &config::video.qp }),
"h264_nvenc"s,
},
PARALLEL_ENCODING,
#ifdef _WIN32
DEFAULT,
dxgi_make_hwdevice_ctx
#else
PARALLEL_ENCODING,
cuda_make_hwdevice_ctx
#endif
};
@ -486,7 +485,7 @@ static encoder_t amdvce {
std::make_optional<encoder_t::option_t>({ "qp_p"s, &config::video.qp }),
"h264_amf"s,
},
DEFAULT,
PARALLEL_ENCODING,
dxgi_make_hwdevice_ctx
};
#endif
@ -1409,6 +1408,12 @@ void capture_async(
std::move(hwdevice),
ref->reinit_event, *ref->encoder_p,
channel_data);
// Free images that weren't consumed by the encoder before it quit.
// This is critical to allow the display_t to be freed correctly.
while(images->peek()) {
images->pop();
}
}
}