From 127b5501d9abc50e03fc1a22be5719968cd3c868 Mon Sep 17 00:00:00 2001 From: loki Date: Fri, 30 Apr 2021 20:01:15 +0200 Subject: [PATCH 01/18] Render luma onto nv12 surface --- CMakeLists.txt | 35 +- assets/MergeUVPS.hlsl | 27 + assets/MergeUVVS.hlsl | 23 + assets/ScreenPS.hlsl | 20 + assets/ScreenVS.hlsl | 23 + assets/YCbCrPS.hlsl | 40 ++ sunshine/platform/common.h | 15 + sunshine/platform/windows/display.h | 1 + sunshine/platform/windows/display_vram.cpp | 549 +++++++++++++++++---- 9 files changed, 601 insertions(+), 132 deletions(-) create mode 100644 assets/MergeUVPS.hlsl create mode 100644 assets/MergeUVVS.hlsl create mode 100644 assets/ScreenPS.hlsl create mode 100644 assets/ScreenVS.hlsl create mode 100644 assets/YCbCrPS.hlsl diff --git a/CMakeLists.txt b/CMakeLists.txt index 1347b7d7..55d04342 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,39 +4,6 @@ project(Sunshine) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) -# On MSYS2, building a stand-alone binary that links with ffmpeg is not possible, -# Therefore, ffmpeg, libx264 and libx265 must be build from source -if(WIN32) - file( - DOWNLOAD "https://github.com/TheElixZammuto/sunshine-prebuilt/releases/download/1.0.0/pre-compiled.zip" "${CMAKE_CURRENT_BINARY_DIR}/pre-compiled.zip" - TIMEOUT 60 - EXPECTED_HASH SHA256=5d59986bd7f619eaaf82b2dd56b5127b747c9cbe8db61e3b898ff6b485298ed6) - - file(ARCHIVE_EXTRACT - INPUT "${CMAKE_CURRENT_BINARY_DIR}/pre-compiled.zip" - DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/pre-compiled) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") - - if(NOT DEFINED SUNSHINE_PREPARED_BINARIES) - set(SUNSHINE_PREPARED_BINARIES "${CMAKE_CURRENT_BINARY_DIR}/pre-compiled/windows") - endif() - - set(FFMPEG_INCLUDE_DIRS - ${SUNSHINE_PREPARED_BINARIES}/include) - set(FFMPEG_LIBRARIES - ${SUNSHINE_PREPARED_BINARIES}/lib/libavcodec.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libavdevice.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libavfilter.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libavformat.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libavutil.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libpostproc.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libswresample.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libswscale.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libx264.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libx265.a - ${SUNSHINE_PREPARED_BINARIES}/lib/libhdr10plus.a - z lzma bcrypt libiconv.a) -endif() add_subdirectory(Simple-Web-Server) add_subdirectory(moonlight-common-c/enet) @@ -108,7 +75,7 @@ if(WIN32) wsock32 ws2_32 iphlpapi - d3d11 dxgi + d3d11 dxgi D3DCompiler setupapi ) diff --git a/assets/MergeUVPS.hlsl b/assets/MergeUVPS.hlsl new file mode 100644 index 00000000..501425ef --- /dev/null +++ b/assets/MergeUVPS.hlsl @@ -0,0 +1,27 @@ +//-------------------------------------------------------------------------------------- +// CombinedUVMipsPS.hlsl +//-------------------------------------------------------------------------------------- +Texture2D txInputU : register(t0); +Texture2D txInputV : register(t1); +Texture1D txInputShift : register(t2); + +SamplerState GenericSampler : register(s0); + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD; +}; + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float PS(PS_INPUT input) : SV_Target +{ + float fShift = (float)txInputShift.Sample(GenericSampler, input.Tex.x); + + if(fShift == 0.0f) + return (float)txInputU.SampleLevel(GenericSampler, input.Tex, 1.0f); + else + return (float)txInputV.SampleLevel(GenericSampler, input.Tex, 1.0f); +} \ No newline at end of file diff --git a/assets/MergeUVVS.hlsl b/assets/MergeUVVS.hlsl new file mode 100644 index 00000000..618b0f58 --- /dev/null +++ b/assets/MergeUVVS.hlsl @@ -0,0 +1,23 @@ +//-------------------------------------------------------------------------------------- +// CombinedUVVS.hlsl +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS(uint vI : SV_VERTEXID) +{ + PS_INPUT output = (PS_INPUT)0; + + float2 texcoord = float2(vI & 1, vI >> 1); + + output.Pos = float4((texcoord.x - 0.5f) * 2.0f, -(texcoord.y + 0.0f) * 0.5f, 0.0f, 1.0f); + output.Tex = texcoord; + + return output; +} \ No newline at end of file diff --git a/assets/ScreenPS.hlsl b/assets/ScreenPS.hlsl new file mode 100644 index 00000000..2e354877 --- /dev/null +++ b/assets/ScreenPS.hlsl @@ -0,0 +1,20 @@ +//-------------------------------------------------------------------------------------- +// ScreenPS.hlsl +//-------------------------------------------------------------------------------------- +Texture2D txInput : register(t0); + +SamplerState GenericSampler : register(s0); + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD; +}; + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS(PS_INPUT input) : SV_Target +{ + return txInput.Sample(GenericSampler, input.Tex); +} \ No newline at end of file diff --git a/assets/ScreenVS.hlsl b/assets/ScreenVS.hlsl new file mode 100644 index 00000000..c89c5cf9 --- /dev/null +++ b/assets/ScreenVS.hlsl @@ -0,0 +1,23 @@ +//-------------------------------------------------------------------------------------- +// ScreenVS.hlsl +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS(uint vI : SV_VERTEXID) +{ + PS_INPUT output = (PS_INPUT)0; + + float2 texcoord = float2(vI & 1, vI >> 1); + + output.Pos = float4((texcoord.x - 0.5f) * 2.0f, -(texcoord.y - 0.5f) * 2.0f, 0.0f, 1.0f); + output.Tex = texcoord; + + return output; +} \ No newline at end of file diff --git a/assets/YCbCrPS.hlsl b/assets/YCbCrPS.hlsl new file mode 100644 index 00000000..88ca0d21 --- /dev/null +++ b/assets/YCbCrPS.hlsl @@ -0,0 +1,40 @@ +//-------------------------------------------------------------------------------------- +// YCbCrPS2.hlsl +//-------------------------------------------------------------------------------------- +Texture2D txInput : register(t0); + +SamplerState GenericSampler : register(s0); + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD; +}; + +struct PS_OUTPUT +{ + float ColorY : SV_Target0; + float2 ColorU: SV_Target1; + float2 ColorV: SV_Target2; +}; + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +PS_OUTPUT PS(PS_INPUT input) : SV_Target +{ + PS_OUTPUT output; + + float4 InputColor = txInput.Sample(GenericSampler, input.Tex); + + // Range 0-255 + output.ColorY = (0.257f * InputColor.r + 0.504f * InputColor.g + 0.098f * InputColor.b) + (16 / 256.0f); + output.ColorU = (-0.148f * InputColor.r - 0.291f * InputColor.g + 0.439f * InputColor.b) + (128.0f / 256.0f); + output.ColorV = (0.439f * InputColor.r - 0.368f * InputColor.g - 0.071f * InputColor.b) + (128.0f / 256.0f); + + output.ColorY = clamp(output.ColorY, 0.0f, 255.0f); + output.ColorU = clamp(output.ColorU, 0.0f, 255.0f); + output.ColorV = clamp(output.ColorV, 0.0f, 255.0f); + + return output; +} \ No newline at end of file diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h index 140b54b1..fa6b3be7 100644 --- a/sunshine/platform/common.h +++ b/sunshine/platform/common.h @@ -43,6 +43,21 @@ enum class pix_fmt_e { unknown }; +inline std::string_view from_pix_fmt(pix_fmt_e pix_fmt) { +using namespace std::literals; +#define _CONVERT(x) case pix_fmt_e:: x : return #x ## sv + switch(pix_fmt) { + _CONVERT(yuv420p); + _CONVERT(yuv420p10); + _CONVERT(nv12); + _CONVERT(p010); + _CONVERT(unknown); + } +#undef _CONVERT + + return "unknown"sv; +} + struct gamepad_state_t { std::uint16_t buttonFlags; std::uint8_t lt; diff --git a/sunshine/platform/windows/display.h b/sunshine/platform/windows/display.h index 039e6a75..8bc7ae9e 100644 --- a/sunshine/platform/windows/display.h +++ b/sunshine/platform/windows/display.h @@ -32,6 +32,7 @@ using output_t = util::safe_ptr>; using output1_t = util::safe_ptr>; using dup_t = util::safe_ptr>; using texture2d_t = util::safe_ptr>; +using texture1d_t = util::safe_ptr>; using resource_t = util::safe_ptr>; using multithread_t = util::safe_ptr>; diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index 0d87b40c..af386886 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -1,3 +1,7 @@ +#include + +#include + #include "sunshine/main.h" #include "display.h" @@ -6,9 +10,29 @@ using namespace std::literals; } namespace platf::dxgi { +constexpr float aquamarine[] { 0.498039246f, 1.000000000f, 0.831372619f, 1.000000000f }; + +using input_layout_t = util::safe_ptr>; +using render_target_t = util::safe_ptr>; +using shader_res_t = util::safe_ptr>; +using raster_state_t = util::safe_ptr>; +using sampler_state_t = util::safe_ptr>; +using vs_t = util::safe_ptr>; +using ps_t = util::safe_ptr>; +using blob_t = util::safe_ptr>; +using depth_stencil_state_t = util::safe_ptr>; +using depth_stencil_view_t = util::safe_ptr>; + +blob_t merge_UV_vs_hlsl; +blob_t merge_UV_ps_hlsl; +blob_t screen_vs_hlsl; +blob_t screen_ps_hlsl; +blob_t YCrCb_ps_hlsl; + struct img_d3d_t : public platf::img_t { - std::shared_ptr display; + shader_res_t input_res; texture2d_t texture; + std::shared_ptr display; ~img_d3d_t() override = default; }; @@ -97,9 +121,43 @@ util::buffer_t make_cursor_image(util::buffer_t &&im return cursor_img; } +blob_t compile_shader(LPCSTR file, LPCSTR entrypoint, LPCSTR shader_model) { + blob_t::pointer msg_p = nullptr; + blob_t::pointer compiled_p; + + DWORD flags = D3DCOMPILE_ENABLE_STRICTNESS; + +#ifndef NDEBUG + flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#endif + std::wstring_convert, wchar_t> converter; + + auto wFile = converter.from_bytes(file); + auto status = D3DCompileFromFile(wFile.c_str(), nullptr, nullptr, entrypoint, shader_model, flags, 0, &compiled_p, &msg_p); + + if(msg_p) { + BOOST_LOG(warning) << std::string_view { (const char *)msg_p->GetBufferPointer(), msg_p->GetBufferSize() - 1 }; + msg_p->Release(); + } + + if(status) { + BOOST_LOG(error) << "Couldn't compile ["sv << file << "] [0x"sv << util::hex(status).to_string_view() << ']'; + return nullptr; + } + + return blob_t { compiled_p }; +} + +blob_t compile_pixel_shader(LPCSTR file) { + return compile_shader(file, "PS", "ps_5_0"); +} + +blob_t compile_vertex_shader(LPCSTR file) { + return compile_shader(file, "VS", "vs_5_0"); +} + class hwdevice_t : public platf::hwdevice_t { public: - hwdevice_t(std::vector *hwdevices_p) : hwdevices_p { hwdevices_p } {} hwdevice_t() = delete; @@ -126,23 +184,9 @@ public: RECT rect_in { left_in, top_in, right_in, bottom_in }; RECT rect_out { left_out, top_out, right_out, bottom_out }; - - ctx->VideoProcessorSetStreamSourceRect(processor.get(), 1, TRUE, &rect_in); - ctx->VideoProcessorSetStreamDestRect(processor.get(), 1, TRUE, &rect_out); } int set_cursor_texture(texture2d_t::pointer texture, LONG width, LONG height) { - D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC input_desc = { 0, (D3D11_VPIV_DIMENSION)D3D11_VPIV_DIMENSION_TEXTURE2D, { 0, 0 } }; - - video::processor_in_t::pointer processor_in_p; - auto status = device->CreateVideoProcessorInputView(texture, processor_e.get(), &input_desc, &processor_in_p); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create cursor VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - - cursor_in.reset(processor_in_p); - cursor_width = width; cursor_height = height; cursor_scaled_width = ((double)width) / in_width * out_width; @@ -154,38 +198,75 @@ public: int convert(platf::img_t &img_base) override { auto &img = (img_d3d_t&)img_base; - auto it = texture_to_processor_in.find(img.texture.get()); - if(it == std::end(texture_to_processor_in)) { - D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC input_desc = { 0, (D3D11_VPIV_DIMENSION)D3D11_VPIV_DIMENSION_TEXTURE2D, { 0, 0 } }; + if(!img.input_res) { + auto device = (device_t::pointer)data; - video::processor_in_t::pointer processor_in_p; - auto status = device->CreateVideoProcessorInputView(img.texture.get(), processor_e.get(), &input_desc, &processor_in_p); + D3D11_SHADER_RESOURCE_VIEW_DESC desc { + DXGI_FORMAT_B8G8R8A8_UNORM, + D3D11_SRV_DIMENSION_TEXTURE2D + }; + desc.Texture2D.MipLevels = 1; + + shader_res_t::pointer input_rec_p; + auto status = device->CreateShaderResourceView(img.texture.get(), &desc, &input_rec_p); if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create input shader resource view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - it = texture_to_processor_in.emplace(img.texture.get(), processor_in_p).first; + img.input_res.reset(input_rec_p); } - auto &processor_in = it->second; - D3D11_VIDEO_PROCESSOR_STREAM stream[] { - { TRUE, 0, 0, 0, 0, nullptr, processor_in.get() }, - { TRUE, 0, 0, 0, 0, nullptr, cursor_in.get() } + auto nv12_rt_p = nv12_rt.get(); + auto sampler_point_p = sampler_point.get(); + auto input_res_p = img.input_res.get(); + auto luma_sr_p = luma_sr.get(); + + render_target_t::pointer pYCbCrRT[] { + luma_rt.get(), chromaCB_rt.get(), chromaCR_rt.get() }; - auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, cursor_visible ? 2 : 1, stream); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; + shader_res_t::pointer merge_ress[] { + chromaCB_sr.get(), chromaCR_sr.get(), shift_sr.get() + }; + + _init_view_port(out_width, out_height); + device_ctx_p->PSSetSamplers(0, 1, &sampler_point_p); + + device_ctx_p->OMSetRenderTargets(3, pYCbCrRT, nullptr); + for(auto rt : pYCbCrRT) { + device_ctx_p->ClearRenderTargetView(rt, aquamarine); } + device_ctx_p->VSSetShader(screen_vs.get(), nullptr, 0); + device_ctx_p->PSSetShader(YCrCb_ps.get(), nullptr, 0); + device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); + device_ctx_p->Draw(4, 0); + device_ctx_p->Flush(); + + // downsample + device_ctx_p->GenerateMips(chromaCR_sr.get()); + device_ctx_p->GenerateMips(chromaCB_sr.get()); + + device_ctx_p->OMSetRenderTargets(1, &nv12_rt_p, nullptr); + device_ctx_p->ClearRenderTargetView(nv12_rt_p, aquamarine); + device_ctx_p->VSSetShader(screen_vs.get(), nullptr, 0); + device_ctx_p->PSSetShader(screen_ps.get(), nullptr, 0); + device_ctx_p->PSSetShaderResources(0, 1, &luma_sr_p); + device_ctx_p->Draw(4, 0); + device_ctx_p->Flush(); + + _init_view_port(out_width, out_height *2); + device_ctx_p->VSSetShader(merge_UV_vs.get(), nullptr, 0); + device_ctx_p->PSSetShader(merge_UV_ps.get(), nullptr, 0); + for(int x = 0; x < ARRAYSIZE(merge_ress); ++x) { + device_ctx_p->PSSetShaderResources(x, 1, &merge_ress[x]); + } + device_ctx_p->Draw(4, 0); + device_ctx_p->Flush(); return 0; } - void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { - colorspace |= (color_range >> 4); - ctx->VideoProcessorSetOutputColorSpace(processor.get(), (D3D11_VIDEO_PROCESSOR_COLOR_SPACE*)&colorspace); - } + void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {} int init( std::shared_ptr display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, @@ -194,6 +275,11 @@ public: ) { HRESULT status; + device_p->AddRef(); + data = device_p; + + this->device_ctx_p = device_ctx_p; + cursor_visible = false; platf::hwdevice_t::img = &img; @@ -203,53 +289,56 @@ public: this->in_width = in_width; this->in_height = in_height; - video::device_t::pointer vdevice_p; - status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to query ID3D11VideoDevice interface [0x"sv << util::hex(status).to_string_view() << ']'; + vs_t::pointer screen_vs_p; + status = device_p->CreateVertexShader(screen_vs_hlsl->GetBufferPointer(), screen_vs_hlsl->GetBufferSize(), nullptr, &screen_vs_p); + if(status) { + BOOST_LOG(error) << "Failed to create screen vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - device.reset(vdevice_p); + screen_vs.reset(screen_vs_p); - video::ctx_t::pointer ctx_p; - status = device_ctx_p->QueryInterface(IID_ID3D11VideoContext, (void**)&ctx_p); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to query ID3D11VideoContext interface [0x"sv << util::hex(status).to_string_view() << ']'; + ps_t::pointer screen_ps_p; + status = device_p->CreatePixelShader(screen_ps_hlsl->GetBufferPointer(), screen_ps_hlsl->GetBufferSize(), nullptr, &screen_ps_p); + if(status) { + BOOST_LOG(error) << "Failed to create screen pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - ctx.reset(ctx_p); + screen_ps.reset(screen_ps_p); - D3D11_VIDEO_PROCESSOR_CONTENT_DESC contentDesc { - D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE, - { 1, 1 }, (UINT)in_width, (UINT)in_height, - { 1, 1 }, (UINT)out_width, (UINT)out_height, - D3D11_VIDEO_USAGE_OPTIMAL_QUALITY + ps_t::pointer YCrCb_ps_p; + status = device_p->CreatePixelShader(YCrCb_ps_hlsl->GetBufferPointer(), YCrCb_ps_hlsl->GetBufferSize(), nullptr, &YCrCb_ps_p); + if(status) { + BOOST_LOG(error) << "Failed to create YCrCb pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + YCrCb_ps.reset(YCrCb_ps_p); + + ps_t::pointer merge_UV_ps_p; + status = device_p->CreatePixelShader(merge_UV_ps_hlsl->GetBufferPointer(), merge_UV_ps_hlsl->GetBufferSize(), nullptr, &merge_UV_ps_p); + if(status) { + BOOST_LOG(error) << "Failed to create mergeUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + merge_UV_ps.reset(merge_UV_ps_p); + + vs_t::pointer merge_UV_vs_p; + status = device_p->CreateVertexShader(merge_UV_vs_hlsl->GetBufferPointer(), merge_UV_vs_hlsl->GetBufferSize(), nullptr, &merge_UV_vs_p); + if(status) { + BOOST_LOG(error) << "Failed to create mergeUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + merge_UV_vs.reset(merge_UV_vs_p); + + D3D11_INPUT_ELEMENT_DESC layout_desc { + "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }; - video::processor_enum_t::pointer vp_e_p; - status = device->CreateVideoProcessorEnumerator(&contentDesc, &vp_e_p); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create video processor enumerator [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - processor_e.reset(vp_e_p); - - D3D11_VIDEO_PROCESSOR_CAPS proc_caps; - processor_e->GetVideoProcessorCaps(&proc_caps); - if(!(proc_caps.FeatureCaps & D3D11_VIDEO_PROCESSOR_FEATURE_CAPS_ALPHA_STREAM)) { - BOOST_LOG(warning) << "VideoProcessorSetStreamAlpha() not supported, hardware accelerated mouse cannot be added to the video stream"sv; - } - - video::processor_t::pointer processor_p; - status = device->CreateVideoProcessor(processor_e.get(), 0, &processor_p); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create video processor [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - processor.reset(processor_p); - - // Tell video processor alpha values need to be enabled - ctx->VideoProcessorSetStreamAlpha(processor.get(), 1, TRUE, 1.0f); + input_layout_t::pointer input_layout_p; + status = device_p->CreateInputLayout( + &layout_desc, 1, + merge_UV_vs_hlsl->GetBufferPointer(), merge_UV_vs_hlsl->GetBufferSize(), + &input_layout_p); + input_layout.reset(input_layout_p); D3D11_TEXTURE2D_DESC t {}; t.Width = out_width; @@ -259,12 +348,12 @@ public: t.SampleDesc.Count = 1; t.Usage = D3D11_USAGE_DEFAULT; t.Format = pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010; - t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER; + t.BindFlags = D3D11_BIND_RENDER_TARGET; dxgi::texture2d_t::pointer tex_p {}; status = device_p->CreateTexture2D(&t, nullptr, &tex_p); if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create video output texture [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create render target texture [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } @@ -276,17 +365,118 @@ public: img.row_pitch = out_width; img.pixel_pitch = 1; - D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 }; - video::processor_out_t::pointer processor_out_p; - status = device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p); + D3D11_RENDER_TARGET_VIEW_DESC nv12_rt_desc { + DXGI_FORMAT_R8_UNORM, + D3D11_RTV_DIMENSION_TEXTURE2D + }; + + render_target_t::pointer nv12_rt_p; + status = device_p->CreateRenderTargetView(img.texture.get(), &nv12_rt_desc, &nv12_rt_p); if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - processor_out.reset(processor_out_p); + nv12_rt.reset(nv12_rt_p); + + if( + _init_rt(&luma_sr, &luma_rt, out_width, out_height, 1, DXGI_FORMAT_R8_UNORM) || + _init_rt(&chromaCB_sr, &chromaCB_rt, out_width, out_height, 2, DXGI_FORMAT_R8_UNORM, D3D11_RESOURCE_MISC_GENERATE_MIPS) || + _init_rt(&chromaCR_sr, &chromaCR_rt, out_width, out_height, 2, DXGI_FORMAT_R8_UNORM, D3D11_RESOURCE_MISC_GENERATE_MIPS) || + _init_shift_sr(out_width)) + { + return -1; + } + + // t.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + // t.BindFlags = D3D11_BIND_DEPTH_STENCIL; + // status = device_p->CreateTexture2D(&t, nullptr, &tex_p); + // if(FAILED(status)) { + // BOOST_LOG(error) << "Failed to create depth stencil texture [0x"sv << util::hex(status).to_string_view() << ']'; + // return -1; + // } + // depth_stencil.reset(tex_p); + + D3D11_SAMPLER_DESC sampler_desc {}; + sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; + sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; + sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; + sampler_desc.ComparisonFunc = D3D11_COMPARISON_NEVER; + sampler_desc.MinLOD = 0; + sampler_desc.MaxLOD = D3D11_FLOAT32_MAX; + + sampler_state_t::pointer sampler_state_p; + status = device_p->CreateSamplerState(&sampler_desc, &sampler_state_p); + if(FAILED(status)) { + BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + sampler_point.reset(sampler_state_p); + + // D3D11_DEPTH_STENCIL_DESC depth_stencil_desc {}; + // depth_stencil_desc.DepthEnable = FALSE; + // depth_stencil_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; + // depth_stencil_desc.StencilEnable = true; + // depth_stencil_desc.StencilReadMask = 0xFF; + // depth_stencil_desc.StencilWriteMask = 0xFF; + + // depth_stencil_desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + // depth_stencil_desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_INCR; + // depth_stencil_desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + // depth_stencil_desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + + // depth_stencil_desc.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + // depth_stencil_desc.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_DECR; + // depth_stencil_desc.BackFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + // depth_stencil_desc.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + + // depth_stencil_state_t::pointer depth_state_p; + // status = device_p->CreateDepthStencilState(&depth_stencil_desc, &depth_state_p); + // if(FAILED(status)) { + // BOOST_LOG(error) << "Failed to create depth stencil state [0x"sv << util::hex(status).to_string_view() << ']'; + // return -1; + // } + // depth_state.reset(depth_state_p); + + // D3D11_DEPTH_STENCIL_VIEW_DESC depth_view_desc {}; + // depth_view_desc.Format = t.Format; + // depth_view_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; + + // depth_stencil_view_t::pointer depth_view_p; + // status = device_p->CreateDepthStencilView(depth_stencil.get(), &depth_view_desc, &depth_view_p); + // if(FAILED(status)) { + // BOOST_LOG(error) << "Failed to create depth stencil view [0x"sv << util::hex(status).to_string_view() << ']'; + // return -1; + // } + // depth_view.reset(depth_view_p); + + // // Setup the raster description which will determine how and what polygons will be drawn. + // D3D11_RASTERIZER_DESC raster_desc; + // raster_desc.AntialiasedLineEnable = false; + // raster_desc.CullMode = D3D11_CULL_BACK; + // raster_desc.DepthBias = 0; + // raster_desc.DepthBiasClamp = 0.0f; + // raster_desc.DepthClipEnable = true; + // raster_desc.FillMode = D3D11_FILL_SOLID; + // raster_desc.FrontCounterClockwise = false; + // raster_desc.MultisampleEnable = false; + // raster_desc.ScissorEnable = false; + // raster_desc.SlopeScaledDepthBias = 0.0f; + + // raster_state_t::pointer raster_state_p; + // status = device_p->CreateRasterizerState(&raster_desc, &raster_state_p); + // if(FAILED(status)) { + // BOOST_LOG(error) << "Failed to create rasterizer state [0x"sv << util::hex(status).to_string_view() << ']'; + // return -1; + // } + // raster_state.reset(raster_state_p); + + auto sampler_p = sampler_point.get(); + device_ctx_p->PSSetSamplers(0, 1, &sampler_p); + // device_ctx_p->RSSetState(raster_state.get()); + device_ctx_p->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + device_ctx_p->IASetInputLayout(input_layout.get()); - device_p->AddRef(); - data = device_p; return 0; } @@ -300,16 +490,148 @@ public: hwdevices_p->erase(it); } } +private: + void _init_view_port(float width, float height) { + D3D11_VIEWPORT view { + 0.0f, 0.0f, + width, height, + 0.0f, 1.0f + }; + + device_ctx_p->RSSetViewports(1, &view); + } + + int _init_rt(shader_res_t *shader_res, render_target_t *render_target, int width, int height, int mip_levels, DXGI_FORMAT format, int flags = 0) { + D3D11_TEXTURE2D_DESC desc {}; + + desc.Width = width; + desc.Height = height; + desc.Format = format; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; + desc.MipLevels = mip_levels; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.MiscFlags = flags; + + auto device = (device_t::pointer)data; + + texture2d_t::pointer tex_p; + auto status = device->CreateTexture2D(&desc, nullptr, &tex_p); + if(status) { + BOOST_LOG(error) << "Failed to create render target texture for luma [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + texture2d_t tex { tex_p }; + + if(shader_res) { + D3D11_SHADER_RESOURCE_VIEW_DESC shader_resource_desc { + format, + D3D11_SRV_DIMENSION_TEXTURE2D + }; + shader_resource_desc.Texture2D.MipLevels = mip_levels; + + shader_res_t::pointer shader_res_p; + device->CreateShaderResourceView(tex_p, &shader_resource_desc, &shader_res_p); + if(status) { + BOOST_LOG(error) << "Failed to create render target texture for luma [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + shader_res->reset(shader_res_p); + } + + if(render_target) { + D3D11_RENDER_TARGET_VIEW_DESC render_target_desc { + format, + D3D11_RTV_DIMENSION_TEXTURE2D + }; + + render_target_t::pointer render_target_p; + device->CreateRenderTargetView(tex_p, &render_target_desc, &render_target_p); + if(status) { + BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + render_target->reset(render_target_p); + } + + return 0; + } + + int _init_shift_sr(int width) { + auto device = (device_t::pointer)data; + D3D11_TEXTURE1D_DESC desc {}; + desc.Width = width; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_R8_UNORM; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + util::buffer_t data { (std::size_t)width }; + for(int x = 0; x < data.size(); ++x) { + data[x] = x & 1; + } + + D3D11_SUBRESOURCE_DATA data_res { + std::begin(data), + (UINT)data.size() + }; + + texture1d_t::pointer tex_p {}; + auto status = device->CreateTexture1D(&desc, &data_res, &tex_p); + if(FAILED(status)) { + BOOST_LOG(error) << "Failed to create shift texture [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + texture1d_t tex { tex_p }; + + D3D11_SHADER_RESOURCE_VIEW_DESC res_desc { + DXGI_FORMAT_R8_UNORM, + D3D11_SRV_DIMENSION_TEXTURE1D + }; + res_desc.Texture1D.MipLevels = 1; + + shader_res_t::pointer shader_res_p; + device->CreateShaderResourceView(tex_p, &res_desc, &shader_res_p); + if(status) { + BOOST_LOG(error) << "Failed to create render target texture for luma [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + shift_sr.reset(shader_res_p); + + return 0; + } + +public: + // raster_state_t raster_state; + + sampler_state_t sampler_point; + + // depth_stencil_view_t depth_view; + // depth_stencil_state_t depth_state; + + shader_res_t chromaCB_sr; + shader_res_t chromaCR_sr; + shader_res_t luma_sr; + shader_res_t shift_sr; + + input_layout_t input_layout; + // texture2d_t depth_stencil; + + render_target_t luma_rt; + render_target_t nv12_rt; + render_target_t chromaCB_rt; + render_target_t chromaCR_rt; img_d3d_t img; - video::device_t device; - video::ctx_t ctx; - video::processor_enum_t processor_e; - video::processor_t processor; - video::processor_out_t processor_out; - std::unordered_map texture_to_processor_in; - video::processor_in_t cursor_in; + vs_t merge_UV_vs; + ps_t merge_UV_ps; + vs_t screen_vs; + ps_t screen_ps; + ps_t YCrCb_ps; + ps_t ChromaCbCr_ps; bool cursor_visible; @@ -319,6 +641,8 @@ public: LONG in_width, in_height; double out_width, out_height; + device_ctx_t::pointer device_ctx_p; + std::vector *hwdevices_p; }; @@ -429,7 +753,7 @@ std::shared_ptr display_vram_t::alloc_img() { t.SampleDesc.Count = 1; t.Usage = D3D11_USAGE_DEFAULT; t.Format = format; - t.BindFlags = D3D11_BIND_RENDER_TARGET; + t.BindFlags = D3D11_BIND_SHADER_RESOURCE; dxgi::texture2d_t::pointer tex_p {}; auto status = device->CreateTexture2D(&t, nullptr, &tex_p); @@ -438,12 +762,12 @@ std::shared_ptr display_vram_t::alloc_img() { return nullptr; } + img->texture.reset(tex_p); img->data = (std::uint8_t*)tex_p; img->row_pitch = 0; img->pixel_pitch = 4; img->width = 0; img->height = 0; - img->texture.reset(tex_p); img->display = shared_from_this(); return img; @@ -456,8 +780,7 @@ int display_vram_t::dummy_img(platf::img_t *img_base) { auto dummy_data = std::make_unique(width * height); D3D11_SUBRESOURCE_DATA data { dummy_data.get(), - (UINT)img->row_pitch, - 0 + (UINT)img->row_pitch }; D3D11_TEXTURE2D_DESC t {}; @@ -468,7 +791,7 @@ int display_vram_t::dummy_img(platf::img_t *img_base) { t.SampleDesc.Count = 1; t.Usage = D3D11_USAGE_DEFAULT; t.Format = format; - t.BindFlags = D3D11_BIND_RENDER_TARGET; + t.BindFlags = D3D11_BIND_SHADER_RESOURCE; dxgi::texture2d_t::pointer tex_p {}; auto status = device->CreateTexture2D(&t, &data, &tex_p); @@ -477,8 +800,8 @@ int display_vram_t::dummy_img(platf::img_t *img_base) { return -1; } - img->data = (std::uint8_t*)tex_p; img->texture.reset(tex_p); + img->data = (std::uint8_t*)tex_p; img->height = height; img->width = width; img->pixel_pitch = 4; @@ -487,12 +810,42 @@ int display_vram_t::dummy_img(platf::img_t *img_base) { } std::shared_ptr display_vram_t::make_hwdevice(int width, int height, pix_fmt_e pix_fmt) { - if(pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) { - BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << (int)pix_fmt << ']'; + if(pix_fmt != platf::pix_fmt_e::nv12) { + BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']'; return nullptr; } + if(!screen_ps_hlsl) { + BOOST_LOG(info) << "Compiling shaders..."sv; + screen_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/ScreenVS.hlsl"); + if(!screen_vs_hlsl) { + return nullptr; + } + + screen_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/ScreenPS.hlsl"); + if(!screen_ps_hlsl) { + return nullptr; + } + + YCrCb_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/YCbCrPS.hlsl"); + if(!YCrCb_ps_hlsl) { + return nullptr; + } + + merge_UV_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/MergeUVPS.hlsl"); + if(!merge_UV_ps_hlsl) { + return nullptr; + } + + merge_UV_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/MergeUVVS.hlsl"); + if(!merge_UV_vs_hlsl) { + return nullptr; + } + + BOOST_LOG(info) << "Compiled shaders"sv; + } + auto hwdevice = std::make_shared(&hwdevices); auto ret = hwdevice->init( From 37a925658724114e8398192364a895dbce969524 Mon Sep 17 00:00:00 2001 From: loki Date: Sun, 2 May 2021 22:35:19 +0200 Subject: [PATCH 02/18] Render NV12 color format --- assets/MergeUVPS.hlsl | 39 ++- assets/MergeUVVS.hlsl | 29 +- assets/MergeYPS.hlsl | 26 ++ assets/MergeYVS.hlsl | 22 ++ assets/ScreenPS.hlsl | 20 -- assets/ScreenVS.hlsl | 23 -- assets/YCbCrPS.hlsl | 40 --- sunshine/main.cpp | 4 + sunshine/platform/linux/input.cpp | 2 +- sunshine/platform/windows/audio.cpp | 9 + sunshine/platform/windows/display_vram.cpp | 374 +++++++-------------- sunshine/video.cpp | 3 + 12 files changed, 220 insertions(+), 371 deletions(-) create mode 100644 assets/MergeYPS.hlsl create mode 100644 assets/MergeYVS.hlsl delete mode 100644 assets/ScreenPS.hlsl delete mode 100644 assets/ScreenVS.hlsl delete mode 100644 assets/YCbCrPS.hlsl diff --git a/assets/MergeUVPS.hlsl b/assets/MergeUVPS.hlsl index 501425ef..597c02d8 100644 --- a/assets/MergeUVPS.hlsl +++ b/assets/MergeUVPS.hlsl @@ -1,27 +1,32 @@ -//-------------------------------------------------------------------------------------- -// CombinedUVMipsPS.hlsl -//-------------------------------------------------------------------------------------- -Texture2D txInputU : register(t0); -Texture2D txInputV : register(t1); -Texture1D txInputShift : register(t2); +Texture2D image : register(t0); -SamplerState GenericSampler : register(s0); +SamplerState def_sampler : register(s0); -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD; +struct FragTexWide { + float3 uuv : TEXCOORD0; }; //-------------------------------------------------------------------------------------- // Pixel Shader //-------------------------------------------------------------------------------------- -float PS(PS_INPUT input) : SV_Target +float2 PS(FragTexWide input) : SV_Target { - float fShift = (float)txInputShift.Sample(GenericSampler, input.Tex.x); + // float4 color_vec_y = { 0.301f, 0.586f, 0.113f, 0.0f }; + // float4 color_vec_u = { -0.168f, -0.328f, 0.496f, 128.0f / 256.0f }; + // float4 color_vec_v = { 0.496f, 0.414f, 0.082f, 128.0f / 256.0f }; + float4 color_vec_y = { 0.299, 0.587, 0.114, 0.0625 }; + float4 color_vec_u = { -0.168736, -0.331264, 0.5, 0.5 }; + float4 color_vec_v = { 0.5, -0.418688, -0.081312, 0.5 }; - if(fShift == 0.0f) - return (float)txInputU.SampleLevel(GenericSampler, input.Tex, 1.0f); - else - return (float)txInputV.SampleLevel(GenericSampler, input.Tex, 1.0f); + // float4 color_vec_y = { 0.2578f, 0.5039f, 0.0977, 0.0625 }; + // float4 color_vec_u = { -0.1484, 0.2891, 0.4375, 128.0f / 256.0f }; + // float4 color_vec_v = { 0.4375, -0.3672, -0.0703, 128.0f / 256.0f }; + + float3 rgb_left = image.Sample(def_sampler, input.uuv.xz).rgb; + float3 rgb_right = image.Sample(def_sampler, input.uuv.yz).rgb; + float3 rgb = (rgb_left + rgb_right) * 0.5; + + float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; + float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; + return float2(u, v); } \ No newline at end of file diff --git a/assets/MergeUVVS.hlsl b/assets/MergeUVVS.hlsl index 618b0f58..0e0b1249 100644 --- a/assets/MergeUVVS.hlsl +++ b/assets/MergeUVVS.hlsl @@ -1,23 +1,26 @@ -//-------------------------------------------------------------------------------------- -// CombinedUVVS.hlsl -//-------------------------------------------------------------------------------------- -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD; +struct VertTexPosWide { + float3 uuv : TEXCOORD; + float4 pos : SV_POSITION; }; //-------------------------------------------------------------------------------------- // Vertex Shader //-------------------------------------------------------------------------------------- -PS_INPUT VS(uint vI : SV_VERTEXID) +VertTexPosWide VS(uint vI : SV_VERTEXID) { - PS_INPUT output = (PS_INPUT)0; + float width_i = 1.0f / 1920.0f; + float idHigh = float(vI >> 1); + float idLow = float(vI & uint(1)); - float2 texcoord = float2(vI & 1, vI >> 1); + float x = idHigh * 4.0 - 1.0; + float y = idLow * 4.0 - 1.0; - output.Pos = float4((texcoord.x - 0.5f) * 2.0f, -(texcoord.y + 0.0f) * 0.5f, 0.0f, 1.0f); - output.Tex = texcoord; + float u_right = idHigh * 2.0; + float u_left = u_right - width_i; + float v = 1.0 - idLow * 2.0; - return output; + VertTexPosWide vert_out; + vert_out.uuv = float3(u_left, u_right, v); + vert_out.pos = float4(x, y, 0.0, 1.0); + return vert_out; } \ No newline at end of file diff --git a/assets/MergeYPS.hlsl b/assets/MergeYPS.hlsl new file mode 100644 index 00000000..146d517c --- /dev/null +++ b/assets/MergeYPS.hlsl @@ -0,0 +1,26 @@ +//-------------------------------------------------------------------------------------- +// YCbCrPS2.hlsl +//-------------------------------------------------------------------------------------- +Texture2D image : register(t0); + +SamplerState def_sampler : register(s0); + +struct PS_INPUT +{ + float4 pos : SV_POSITION; + float2 tex : TEXCOORD; +}; + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float PS(PS_INPUT frag_in) : SV_Target +{ + float4 color_vec_y = { 0.299, 0.587, 0.114, 0.0625 }; + float4 color_vec_u = { -0.168736, -0.331264, 0.5, 0.5 }; + float4 color_vec_v = { 0.5, -0.418688, -0.081312, 0.5 }; + + float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb; + float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w; + return y; +} \ No newline at end of file diff --git a/assets/MergeYVS.hlsl b/assets/MergeYVS.hlsl new file mode 100644 index 00000000..d95ed7b4 --- /dev/null +++ b/assets/MergeYVS.hlsl @@ -0,0 +1,22 @@ +struct PS_INPUT +{ + float4 pos : SV_POSITION; + float2 tex : TEXCOORD; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS(uint vI : SV_VERTEXID) +{ + float idHigh = float(vI >> 1); + float idLow = float(vI & uint(1)); + + float x = idHigh * 4.0 - 1.0; + float y = idLow * 4.0 - 1.0; + + PS_INPUT vert_out; + vert_out.pos = float4(x, y, 0.0, 1.0); + vert_out.tex = float2(idHigh, idLow); + return vert_out; +} \ No newline at end of file diff --git a/assets/ScreenPS.hlsl b/assets/ScreenPS.hlsl deleted file mode 100644 index 2e354877..00000000 --- a/assets/ScreenPS.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -//-------------------------------------------------------------------------------------- -// ScreenPS.hlsl -//-------------------------------------------------------------------------------------- -Texture2D txInput : register(t0); - -SamplerState GenericSampler : register(s0); - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD; -}; - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS(PS_INPUT input) : SV_Target -{ - return txInput.Sample(GenericSampler, input.Tex); -} \ No newline at end of file diff --git a/assets/ScreenVS.hlsl b/assets/ScreenVS.hlsl deleted file mode 100644 index c89c5cf9..00000000 --- a/assets/ScreenVS.hlsl +++ /dev/null @@ -1,23 +0,0 @@ -//-------------------------------------------------------------------------------------- -// ScreenVS.hlsl -//-------------------------------------------------------------------------------------- -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD; -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS(uint vI : SV_VERTEXID) -{ - PS_INPUT output = (PS_INPUT)0; - - float2 texcoord = float2(vI & 1, vI >> 1); - - output.Pos = float4((texcoord.x - 0.5f) * 2.0f, -(texcoord.y - 0.5f) * 2.0f, 0.0f, 1.0f); - output.Tex = texcoord; - - return output; -} \ No newline at end of file diff --git a/assets/YCbCrPS.hlsl b/assets/YCbCrPS.hlsl deleted file mode 100644 index 88ca0d21..00000000 --- a/assets/YCbCrPS.hlsl +++ /dev/null @@ -1,40 +0,0 @@ -//-------------------------------------------------------------------------------------- -// YCbCrPS2.hlsl -//-------------------------------------------------------------------------------------- -Texture2D txInput : register(t0); - -SamplerState GenericSampler : register(s0); - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD; -}; - -struct PS_OUTPUT -{ - float ColorY : SV_Target0; - float2 ColorU: SV_Target1; - float2 ColorV: SV_Target2; -}; - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -PS_OUTPUT PS(PS_INPUT input) : SV_Target -{ - PS_OUTPUT output; - - float4 InputColor = txInput.Sample(GenericSampler, input.Tex); - - // Range 0-255 - output.ColorY = (0.257f * InputColor.r + 0.504f * InputColor.g + 0.098f * InputColor.b) + (16 / 256.0f); - output.ColorU = (-0.148f * InputColor.r - 0.291f * InputColor.g + 0.439f * InputColor.b) + (128.0f / 256.0f); - output.ColorV = (0.439f * InputColor.r - 0.368f * InputColor.g - 0.071f * InputColor.b) + (128.0f / 256.0f); - - output.ColorY = clamp(output.ColorY, 0.0f, 255.0f); - output.ColorU = clamp(output.ColorU, 0.0f, 255.0f); - output.ColorV = clamp(output.ColorV, 0.0f, 255.0f); - - return output; -} \ No newline at end of file diff --git a/sunshine/main.cpp b/sunshine/main.cpp index c21f81fc..a24b562e 100644 --- a/sunshine/main.cpp +++ b/sunshine/main.cpp @@ -138,6 +138,10 @@ int main(int argc, char *argv[]) { proc::proc = std::move(*proc_opt); auto deinit_guard = platf::init(); + if(!deinit_guard) { + return 4; + } + input::init(); reed_solomon_init(); if(video::init()) { diff --git a/sunshine/platform/linux/input.cpp b/sunshine/platform/linux/input.cpp index f5696522..6bb5d495 100644 --- a/sunshine/platform/linux/input.cpp +++ b/sunshine/platform/linux/input.cpp @@ -502,5 +502,5 @@ void freeInput(void *p) { delete input; } -std::unique_ptr init() { return nullptr; } +std::unique_ptr init() { return std::make_unique(); } } diff --git a/sunshine/platform/windows/audio.cpp b/sunshine/platform/windows/audio.cpp index a71e4a77..0ba9a168 100644 --- a/sunshine/platform/windows/audio.cpp +++ b/sunshine/platform/windows/audio.cpp @@ -319,6 +319,12 @@ public: } namespace platf { + +// It's not big enough to justify it's own source file :/ +namespace dxgi { +int init(); +} + std::unique_ptr microphone(std::uint32_t sample_rate) { auto mic = std::make_unique(); @@ -330,6 +336,9 @@ std::unique_ptr microphone(std::uint32_t sample_rate) { } std::unique_ptr init() { + if(dxgi::init()) { + return nullptr; + } return std::make_unique(); } } diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index af386886..29b938e4 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -15,6 +15,7 @@ constexpr float aquamarine[] { 0.498039246f, 1.000000000f, 0.831372619f, 1.00000 using input_layout_t = util::safe_ptr>; using render_target_t = util::safe_ptr>; using shader_res_t = util::safe_ptr>; +using blend_t = util::safe_ptr>; using raster_state_t = util::safe_ptr>; using sampler_state_t = util::safe_ptr>; using vs_t = util::safe_ptr>; @@ -25,9 +26,8 @@ using depth_stencil_view_t = util::safe_ptrOMSetBlendState(blend.get(), nullptr, 0xffffffff); _init_view_port(out_width, out_height); - device_ctx_p->PSSetSamplers(0, 1, &sampler_point_p); + device_ctx_p->PSSetSamplers(0, 1, &sampler_linear_p); - device_ctx_p->OMSetRenderTargets(3, pYCbCrRT, nullptr); - for(auto rt : pYCbCrRT) { - device_ctx_p->ClearRenderTargetView(rt, aquamarine); - } - device_ctx_p->VSSetShader(screen_vs.get(), nullptr, 0); - device_ctx_p->PSSetShader(YCrCb_ps.get(), nullptr, 0); + device_ctx_p->OMSetRenderTargets(1, &Y_rt_p, nullptr); + device_ctx_p->ClearRenderTargetView(Y_rt_p, aquamarine); + device_ctx_p->VSSetShader(merge_Y_vs.get(), nullptr, 0); + device_ctx_p->PSSetShader(merge_Y_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); device_ctx_p->Draw(4, 0); device_ctx_p->Flush(); - // downsample - device_ctx_p->GenerateMips(chromaCR_sr.get()); - device_ctx_p->GenerateMips(chromaCB_sr.get()); - - device_ctx_p->OMSetRenderTargets(1, &nv12_rt_p, nullptr); - device_ctx_p->ClearRenderTargetView(nv12_rt_p, aquamarine); - device_ctx_p->VSSetShader(screen_vs.get(), nullptr, 0); - device_ctx_p->PSSetShader(screen_ps.get(), nullptr, 0); - device_ctx_p->PSSetShaderResources(0, 1, &luma_sr_p); - device_ctx_p->Draw(4, 0); - device_ctx_p->Flush(); - - _init_view_port(out_width, out_height *2); + _init_view_port(out_width / 2, out_height / 2); + device_ctx_p->OMSetRenderTargets(1, &UV_rt_p, nullptr); + device_ctx_p->ClearRenderTargetView(UV_rt_p, aquamarine); device_ctx_p->VSSetShader(merge_UV_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(merge_UV_ps.get(), nullptr, 0); - for(int x = 0; x < ARRAYSIZE(merge_ress); ++x) { - device_ctx_p->PSSetShaderResources(x, 1, &merge_ress[x]); - } + device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); device_ctx_p->Draw(4, 0); device_ctx_p->Flush(); @@ -289,45 +269,35 @@ public: this->in_width = in_width; this->in_height = in_height; - vs_t::pointer screen_vs_p; - status = device_p->CreateVertexShader(screen_vs_hlsl->GetBufferPointer(), screen_vs_hlsl->GetBufferSize(), nullptr, &screen_vs_p); + vs_t::pointer vs_p; + status = device_p->CreateVertexShader(merge_Y_vs_hlsl->GetBufferPointer(), merge_Y_vs_hlsl->GetBufferSize(), nullptr, &vs_p); if(status) { BOOST_LOG(error) << "Failed to create screen vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - screen_vs.reset(screen_vs_p); + merge_Y_vs.reset(vs_p); - ps_t::pointer screen_ps_p; - status = device_p->CreatePixelShader(screen_ps_hlsl->GetBufferPointer(), screen_ps_hlsl->GetBufferSize(), nullptr, &screen_ps_p); - if(status) { - BOOST_LOG(error) << "Failed to create screen pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - screen_ps.reset(screen_ps_p); - - ps_t::pointer YCrCb_ps_p; - status = device_p->CreatePixelShader(YCrCb_ps_hlsl->GetBufferPointer(), YCrCb_ps_hlsl->GetBufferSize(), nullptr, &YCrCb_ps_p); + ps_t::pointer ps_p; + status = device_p->CreatePixelShader(merge_Y_ps_hlsl->GetBufferPointer(), merge_Y_ps_hlsl->GetBufferSize(), nullptr, &ps_p); if(status) { BOOST_LOG(error) << "Failed to create YCrCb pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - YCrCb_ps.reset(YCrCb_ps_p); + merge_Y_ps.reset(ps_p); - ps_t::pointer merge_UV_ps_p; - status = device_p->CreatePixelShader(merge_UV_ps_hlsl->GetBufferPointer(), merge_UV_ps_hlsl->GetBufferSize(), nullptr, &merge_UV_ps_p); + status = device_p->CreatePixelShader(merge_UV_ps_hlsl->GetBufferPointer(), merge_UV_ps_hlsl->GetBufferSize(), nullptr, &ps_p); if(status) { BOOST_LOG(error) << "Failed to create mergeUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - merge_UV_ps.reset(merge_UV_ps_p); + merge_UV_ps.reset(ps_p); - vs_t::pointer merge_UV_vs_p; - status = device_p->CreateVertexShader(merge_UV_vs_hlsl->GetBufferPointer(), merge_UV_vs_hlsl->GetBufferSize(), nullptr, &merge_UV_vs_p); + status = device_p->CreateVertexShader(merge_UV_vs_hlsl->GetBufferPointer(), merge_UV_vs_hlsl->GetBufferSize(), nullptr, &vs_p); if(status) { BOOST_LOG(error) << "Failed to create mergeUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - merge_UV_vs.reset(merge_UV_vs_p); + merge_UV_vs.reset(vs_p); D3D11_INPUT_ELEMENT_DESC layout_desc { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 @@ -340,6 +310,20 @@ public: &input_layout_p); input_layout.reset(input_layout_p); + D3D11_BLEND_DESC blend_desc {}; + blend_desc.RenderTarget[0].BlendEnable = FALSE; + blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; + blend_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + + blend_t::pointer blend_p; + status = device_p->CreateBlendState(&blend_desc, &blend_p); + if(status) { + BOOST_LOG(error) << "Failed to create blend state [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + blend.reset(blend_p); + D3D11_TEXTURE2D_DESC t {}; t.Width = out_width; t.Height = out_height; @@ -376,30 +360,20 @@ public: BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - nv12_rt.reset(nv12_rt_p); + nv12_Y_rt.reset(nv12_rt_p); - if( - _init_rt(&luma_sr, &luma_rt, out_width, out_height, 1, DXGI_FORMAT_R8_UNORM) || - _init_rt(&chromaCB_sr, &chromaCB_rt, out_width, out_height, 2, DXGI_FORMAT_R8_UNORM, D3D11_RESOURCE_MISC_GENERATE_MIPS) || - _init_rt(&chromaCR_sr, &chromaCR_rt, out_width, out_height, 2, DXGI_FORMAT_R8_UNORM, D3D11_RESOURCE_MISC_GENERATE_MIPS) || - _init_shift_sr(out_width)) - { + nv12_rt_desc.Format = DXGI_FORMAT_R8G8_UNORM; + status = device_p->CreateRenderTargetView(img.texture.get(), &nv12_rt_desc, &nv12_rt_p); + if(FAILED(status)) { + BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - - // t.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - // t.BindFlags = D3D11_BIND_DEPTH_STENCIL; - // status = device_p->CreateTexture2D(&t, nullptr, &tex_p); - // if(FAILED(status)) { - // BOOST_LOG(error) << "Failed to create depth stencil texture [0x"sv << util::hex(status).to_string_view() << ']'; - // return -1; - // } - // depth_stencil.reset(tex_p); + nv12_UV_rt.reset(nv12_rt_p); D3D11_SAMPLER_DESC sampler_desc {}; - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; - sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; + sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; sampler_desc.ComparisonFunc = D3D11_COMPARISON_NEVER; sampler_desc.MinLOD = 0; @@ -411,69 +385,16 @@ public: BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } + sampler_linear.reset(sampler_state_p); + + sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + status = device_p->CreateSamplerState(&sampler_desc, &sampler_state_p); + if(FAILED(status)) { + BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } sampler_point.reset(sampler_state_p); - // D3D11_DEPTH_STENCIL_DESC depth_stencil_desc {}; - // depth_stencil_desc.DepthEnable = FALSE; - // depth_stencil_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; - // depth_stencil_desc.StencilEnable = true; - // depth_stencil_desc.StencilReadMask = 0xFF; - // depth_stencil_desc.StencilWriteMask = 0xFF; - - // depth_stencil_desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; - // depth_stencil_desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_INCR; - // depth_stencil_desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; - // depth_stencil_desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; - - // depth_stencil_desc.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; - // depth_stencil_desc.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_DECR; - // depth_stencil_desc.BackFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; - // depth_stencil_desc.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS; - - // depth_stencil_state_t::pointer depth_state_p; - // status = device_p->CreateDepthStencilState(&depth_stencil_desc, &depth_state_p); - // if(FAILED(status)) { - // BOOST_LOG(error) << "Failed to create depth stencil state [0x"sv << util::hex(status).to_string_view() << ']'; - // return -1; - // } - // depth_state.reset(depth_state_p); - - // D3D11_DEPTH_STENCIL_VIEW_DESC depth_view_desc {}; - // depth_view_desc.Format = t.Format; - // depth_view_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; - - // depth_stencil_view_t::pointer depth_view_p; - // status = device_p->CreateDepthStencilView(depth_stencil.get(), &depth_view_desc, &depth_view_p); - // if(FAILED(status)) { - // BOOST_LOG(error) << "Failed to create depth stencil view [0x"sv << util::hex(status).to_string_view() << ']'; - // return -1; - // } - // depth_view.reset(depth_view_p); - - // // Setup the raster description which will determine how and what polygons will be drawn. - // D3D11_RASTERIZER_DESC raster_desc; - // raster_desc.AntialiasedLineEnable = false; - // raster_desc.CullMode = D3D11_CULL_BACK; - // raster_desc.DepthBias = 0; - // raster_desc.DepthBiasClamp = 0.0f; - // raster_desc.DepthClipEnable = true; - // raster_desc.FillMode = D3D11_FILL_SOLID; - // raster_desc.FrontCounterClockwise = false; - // raster_desc.MultisampleEnable = false; - // raster_desc.ScissorEnable = false; - // raster_desc.SlopeScaledDepthBias = 0.0f; - - // raster_state_t::pointer raster_state_p; - // status = device_p->CreateRasterizerState(&raster_desc, &raster_state_p); - // if(FAILED(status)) { - // BOOST_LOG(error) << "Failed to create rasterizer state [0x"sv << util::hex(status).to_string_view() << ']'; - // return -1; - // } - // raster_state.reset(raster_state_p); - - auto sampler_p = sampler_point.get(); - device_ctx_p->PSSetSamplers(0, 1, &sampler_p); - // device_ctx_p->RSSetState(raster_state.get()); device_ctx_p->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); device_ctx_p->IASetInputLayout(input_layout.get()); @@ -491,9 +412,9 @@ public: } } private: - void _init_view_port(float width, float height) { + void _init_view_port(float x, float y, float width, float height) { D3D11_VIEWPORT view { - 0.0f, 0.0f, + x, y, width, height, 0.0f, 1.0f }; @@ -501,7 +422,11 @@ private: device_ctx_p->RSSetViewports(1, &view); } - int _init_rt(shader_res_t *shader_res, render_target_t *render_target, int width, int height, int mip_levels, DXGI_FORMAT format, int flags = 0) { + void _init_view_port(float width, float height) { + _init_view_port(0.0f, 0.0f, width, height); + } + + int _init_rt(shader_res_t &shader_res, render_target_t &render_target, int width, int height, DXGI_FORMAT format) { D3D11_TEXTURE2D_DESC desc {}; desc.Width = width; @@ -509,10 +434,9 @@ private: desc.Format = format; desc.Usage = D3D11_USAGE_DEFAULT; desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; - desc.MipLevels = mip_levels; + desc.MipLevels = 1; desc.ArraySize = 1; desc.SampleDesc.Count = 1; - desc.MiscFlags = flags; auto device = (device_t::pointer)data; @@ -524,114 +448,54 @@ private: } texture2d_t tex { tex_p }; - if(shader_res) { - D3D11_SHADER_RESOURCE_VIEW_DESC shader_resource_desc { - format, - D3D11_SRV_DIMENSION_TEXTURE2D - }; - shader_resource_desc.Texture2D.MipLevels = mip_levels; - shader_res_t::pointer shader_res_p; - device->CreateShaderResourceView(tex_p, &shader_resource_desc, &shader_res_p); - if(status) { - BOOST_LOG(error) << "Failed to create render target texture for luma [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - shader_res->reset(shader_res_p); - } - - if(render_target) { - D3D11_RENDER_TARGET_VIEW_DESC render_target_desc { - format, - D3D11_RTV_DIMENSION_TEXTURE2D - }; - - render_target_t::pointer render_target_p; - device->CreateRenderTargetView(tex_p, &render_target_desc, &render_target_p); - if(status) { - BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - render_target->reset(render_target_p); - } - - return 0; - } - - int _init_shift_sr(int width) { - auto device = (device_t::pointer)data; - D3D11_TEXTURE1D_DESC desc {}; - desc.Width = width; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_R8_UNORM; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - - util::buffer_t data { (std::size_t)width }; - for(int x = 0; x < data.size(); ++x) { - data[x] = x & 1; - } - - D3D11_SUBRESOURCE_DATA data_res { - std::begin(data), - (UINT)data.size() + D3D11_SHADER_RESOURCE_VIEW_DESC shader_resource_desc { + format, + D3D11_SRV_DIMENSION_TEXTURE2D }; - - texture1d_t::pointer tex_p {}; - auto status = device->CreateTexture1D(&desc, &data_res, &tex_p); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create shift texture [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - texture1d_t tex { tex_p }; - - D3D11_SHADER_RESOURCE_VIEW_DESC res_desc { - DXGI_FORMAT_R8_UNORM, - D3D11_SRV_DIMENSION_TEXTURE1D - }; - res_desc.Texture1D.MipLevels = 1; + shader_resource_desc.Texture2D.MipLevels = 1; shader_res_t::pointer shader_res_p; - device->CreateShaderResourceView(tex_p, &res_desc, &shader_res_p); + device->CreateShaderResourceView(tex_p, &shader_resource_desc, &shader_res_p); if(status) { BOOST_LOG(error) << "Failed to create render target texture for luma [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - shift_sr.reset(shader_res_p); + shader_res.reset(shader_res_p); + + D3D11_RENDER_TARGET_VIEW_DESC render_target_desc { + format, + D3D11_RTV_DIMENSION_TEXTURE2D + }; + + render_target_t::pointer render_target_p; + device->CreateRenderTargetView(tex_p, &render_target_desc, &render_target_p); + if(status) { + BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + render_target.reset(render_target_p); return 0; } public: - // raster_state_t raster_state; - + sampler_state_t sampler_linear; sampler_state_t sampler_point; - // depth_stencil_view_t depth_view; - // depth_stencil_state_t depth_state; - - shader_res_t chromaCB_sr; - shader_res_t chromaCR_sr; - shader_res_t luma_sr; - shader_res_t shift_sr; - input_layout_t input_layout; - // texture2d_t depth_stencil; - render_target_t luma_rt; - render_target_t nv12_rt; - render_target_t chromaCB_rt; - render_target_t chromaCR_rt; + render_target_t nv12_Y_rt; + render_target_t nv12_UV_rt; + + blend_t blend; img_d3d_t img; vs_t merge_UV_vs; ps_t merge_UV_ps; - vs_t screen_vs; - ps_t screen_ps; - ps_t YCrCb_ps; - ps_t ChromaCbCr_ps; + vs_t merge_Y_vs; + ps_t merge_Y_ps; bool cursor_visible; @@ -816,36 +680,6 @@ std::shared_ptr display_vram_t::make_hwdevice(int width, int return nullptr; } - if(!screen_ps_hlsl) { - BOOST_LOG(info) << "Compiling shaders..."sv; - screen_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/ScreenVS.hlsl"); - if(!screen_vs_hlsl) { - return nullptr; - } - - screen_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/ScreenPS.hlsl"); - if(!screen_ps_hlsl) { - return nullptr; - } - - YCrCb_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/YCbCrPS.hlsl"); - if(!YCrCb_ps_hlsl) { - return nullptr; - } - - merge_UV_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/MergeUVPS.hlsl"); - if(!merge_UV_ps_hlsl) { - return nullptr; - } - - merge_UV_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/MergeUVVS.hlsl"); - if(!merge_UV_vs_hlsl) { - return nullptr; - } - - BOOST_LOG(info) << "Compiled shaders"sv; - } - auto hwdevice = std::make_shared(&hwdevices); auto ret = hwdevice->init( @@ -868,4 +702,30 @@ std::shared_ptr display_vram_t::make_hwdevice(int width, int return hwdevice; } + +int init() { + BOOST_LOG(info) << "Compiling shaders..."sv; + merge_Y_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/MergeYVS.hlsl"); + if(!merge_Y_vs_hlsl) { + return -1; + } + + merge_Y_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/MergeYPS.hlsl"); + if(!merge_Y_ps_hlsl) { + return -1; + } + + merge_UV_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/MergeUVPS.hlsl"); + if(!merge_UV_ps_hlsl) { + return -1; + } + + merge_UV_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/MergeUVVS.hlsl"); + if(!merge_UV_vs_hlsl) { + return -1; + } + BOOST_LOG(info) << "Compiled shaders"sv; + + return 0; +} } \ No newline at end of file diff --git a/sunshine/video.cpp b/sunshine/video.cpp index b2e23855..53b7b760 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -622,6 +622,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & case 0: default: // Rec. 601 + BOOST_LOG(info) << "Color coding [Rec. 601]"sv; ctx->color_primaries = AVCOL_PRI_SMPTE170M; ctx->color_trc = AVCOL_TRC_SMPTE170M; ctx->colorspace = AVCOL_SPC_SMPTE170M; @@ -630,6 +631,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & case 1: // Rec. 709 + BOOST_LOG(info) << "Color coding [Rec. 709]"sv; ctx->color_primaries = AVCOL_PRI_BT709; ctx->color_trc = AVCOL_TRC_BT709; ctx->colorspace = AVCOL_SPC_BT709; @@ -638,6 +640,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & case 2: // Rec. 2020 + BOOST_LOG(info) << "Color coding [Rec. 2020]"sv; ctx->color_primaries = AVCOL_PRI_BT2020; ctx->color_trc = AVCOL_TRC_BT2020_10; ctx->colorspace = AVCOL_SPC_BT2020_NCL; From 900d59b3ac075c529b6db9e2e831596cfed0560a Mon Sep 17 00:00:00 2001 From: loki Date: Mon, 3 May 2021 22:06:55 +0200 Subject: [PATCH 03/18] Dynamically set colors during runtime --- assets/MergeUVPS.hlsl | 18 +-- assets/MergeUVVS.hlsl | 5 +- assets/MergeYPS.hlsl | 11 +- sunshine/platform/windows/display_vram.cpp | 148 +++++++++++++++------ sunshine/video.cpp | 1 + 5 files changed, 130 insertions(+), 53 deletions(-) diff --git a/assets/MergeUVPS.hlsl b/assets/MergeUVPS.hlsl index 597c02d8..7754f046 100644 --- a/assets/MergeUVPS.hlsl +++ b/assets/MergeUVPS.hlsl @@ -6,27 +6,23 @@ struct FragTexWide { float3 uuv : TEXCOORD0; }; +cbuffer ColorMatrix : register(b0) { + float4 color_vec_y; + float4 color_vec_u; + float4 color_vec_v; +}; + //-------------------------------------------------------------------------------------- // Pixel Shader //-------------------------------------------------------------------------------------- float2 PS(FragTexWide input) : SV_Target { - // float4 color_vec_y = { 0.301f, 0.586f, 0.113f, 0.0f }; - // float4 color_vec_u = { -0.168f, -0.328f, 0.496f, 128.0f / 256.0f }; - // float4 color_vec_v = { 0.496f, 0.414f, 0.082f, 128.0f / 256.0f }; - float4 color_vec_y = { 0.299, 0.587, 0.114, 0.0625 }; - float4 color_vec_u = { -0.168736, -0.331264, 0.5, 0.5 }; - float4 color_vec_v = { 0.5, -0.418688, -0.081312, 0.5 }; - - // float4 color_vec_y = { 0.2578f, 0.5039f, 0.0977, 0.0625 }; - // float4 color_vec_u = { -0.1484, 0.2891, 0.4375, 128.0f / 256.0f }; - // float4 color_vec_v = { 0.4375, -0.3672, -0.0703, 128.0f / 256.0f }; - float3 rgb_left = image.Sample(def_sampler, input.uuv.xz).rgb; float3 rgb_right = image.Sample(def_sampler, input.uuv.yz).rgb; float3 rgb = (rgb_left + rgb_right) * 0.5; float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; + return float2(u, v); } \ No newline at end of file diff --git a/assets/MergeUVVS.hlsl b/assets/MergeUVVS.hlsl index 0e0b1249..7aa0e171 100644 --- a/assets/MergeUVVS.hlsl +++ b/assets/MergeUVVS.hlsl @@ -3,12 +3,15 @@ struct VertTexPosWide { float4 pos : SV_POSITION; }; +cbuffer info : register(b0) { + float width_i; +}; + //-------------------------------------------------------------------------------------- // Vertex Shader //-------------------------------------------------------------------------------------- VertTexPosWide VS(uint vI : SV_VERTEXID) { - float width_i = 1.0f / 1920.0f; float idHigh = float(vI >> 1); float idLow = float(vI & uint(1)); diff --git a/assets/MergeYPS.hlsl b/assets/MergeYPS.hlsl index 146d517c..4a015f04 100644 --- a/assets/MergeYPS.hlsl +++ b/assets/MergeYPS.hlsl @@ -5,6 +5,12 @@ Texture2D image : register(t0); SamplerState def_sampler : register(s0); +cbuffer ColorMatrix : register(b0) { + float4 color_vec_y; + float4 color_vec_u; + float4 color_vec_v; +}; + struct PS_INPUT { float4 pos : SV_POSITION; @@ -16,11 +22,8 @@ struct PS_INPUT //-------------------------------------------------------------------------------------- float PS(PS_INPUT frag_in) : SV_Target { - float4 color_vec_y = { 0.299, 0.587, 0.114, 0.0625 }; - float4 color_vec_u = { -0.168736, -0.331264, 0.5, 0.5 }; - float4 color_vec_v = { 0.5, -0.418688, -0.081312, 0.5 }; - float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb; float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w; + return y; } \ No newline at end of file diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index 29b938e4..5f5bef76 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -1,6 +1,7 @@ #include #include +#include #include "sunshine/main.h" #include "display.h" @@ -15,6 +16,7 @@ constexpr float aquamarine[] { 0.498039246f, 1.000000000f, 0.831372619f, 1.00000 using input_layout_t = util::safe_ptr>; using render_target_t = util::safe_ptr>; using shader_res_t = util::safe_ptr>; +using buf_t = util::safe_ptr>; using blend_t = util::safe_ptr>; using raster_state_t = util::safe_ptr>; using sampler_state_t = util::safe_ptr>; @@ -24,6 +26,49 @@ using blob_t = util::safe_ptr>; using depth_stencil_state_t = util::safe_ptr>; using depth_stencil_view_t = util::safe_ptr>; +struct __attribute__ ((__aligned__ (16))) color_t { + DirectX::XMFLOAT4 color_vec_y; + DirectX::XMFLOAT4 color_vec_u; + DirectX::XMFLOAT4 color_vec_v; +}; + +color_t colors[] { + { + { 0.299f, 0.587f, 0.114f, 0.0625f }, // Color Luma (Y) + { -0.14713f, -0.28886f, 0.436f, 0.5f }, // Color Cb (U) + { 0.615f, -0.51499f, -0.10001f, 0.5f }, // Color Cr (V) + }, // BT602 -- MPEG + { + { 0.299f, 0.587f, 0.114f, 0.0f }, // Color Luma (Y) + { -0.168736f, -0.331264f, 0.5f, 0.5f }, // Color Cb (U) + { 0.5f, -0.418688f, -0.081312f, 0.5f }, // Color Cr (V) + } // BT601 -- JPEG +}; + +template +buf_t make_buffer(device_t::pointer device, const T& t) { + static_assert(sizeof(T) % 16 == 0, "Buffer needs to be aligned on a 16-byte alignment"); + + D3D11_BUFFER_DESC buffer_desc { + sizeof(T), + D3D11_USAGE_IMMUTABLE, + D3D11_BIND_CONSTANT_BUFFER, + }; + + D3D11_SUBRESOURCE_DATA init_data { + &t + }; + + buf_t::pointer buf_p; + auto status = device->CreateBuffer(&buffer_desc, &init_data, &buf_p); + if(status) { + BOOST_LOG(error) << "Failed to create shader resource view"sv; + return nullptr; + } + + return buf_t { buf_p }; +} + blob_t merge_UV_vs_hlsl; blob_t merge_UV_ps_hlsl; blob_t merge_Y_vs_hlsl; @@ -168,22 +213,22 @@ public: return; } - LONG x = ((double)rel_x) * out_width / (double)in_width; - LONG y = ((double)rel_y) * out_height / (double)in_height; + // LONG x = ((double)rel_x) * out_width / (double)in_width; + // LONG y = ((double)rel_y) * out_height / (double)in_height; - // Ensure it's within bounds - auto left_out = std::min(out_width, std::max(0, x)); - auto top_out = std::min(out_height, std::max(0, y)); - auto right_out = std::max(0, std::min(out_width, x + cursor_scaled_width)); - auto bottom_out = std::max(0, std::min(out_height, y + cursor_scaled_height)); + // // Ensure it's within bounds + // auto left_out = std::min(out_width, std::max(0, x)); + // auto top_out = std::min(out_height, std::max(0, y)); + // auto right_out = std::max(0, std::min(out_width, x + cursor_scaled_width)); + // auto bottom_out = std::max(0, std::min(out_height, y + cursor_scaled_height)); - auto left_in = std::max(0, -rel_x); - auto top_in = std::max(0, -rel_y); - auto right_in = std::min(in_width - rel_x, cursor_width); - auto bottom_in = std::min(in_height - rel_y, cursor_height); + // auto left_in = std::max(0, -rel_x); + // auto top_in = std::max(0, -rel_y); + // auto right_in = std::min(in_width - rel_x, cursor_width); + // auto bottom_in = std::min(in_height - rel_y, cursor_height); - RECT rect_in { left_in, top_in, right_in, bottom_in }; - RECT rect_out { left_out, top_out, right_out, bottom_out }; + // RECT rect_in { left_in, top_in, right_in, bottom_in }; + // RECT rect_out { left_out, top_out, right_out, bottom_out }; } int set_cursor_texture(texture2d_t::pointer texture, LONG width, LONG height) { @@ -216,23 +261,19 @@ public: img.input_res.reset(input_rec_p); } - auto sampler_linear_p = sampler_linear.get(); auto input_res_p = img.input_res.get(); auto Y_rt_p = nv12_Y_rt.get(); auto UV_rt_p = nv12_UV_rt.get(); - // device_ctx_p->OMSetBlendState(blend.get(), nullptr, 0xffffffff); _init_view_port(out_width, out_height); - device_ctx_p->PSSetSamplers(0, 1, &sampler_linear_p); device_ctx_p->OMSetRenderTargets(1, &Y_rt_p, nullptr); device_ctx_p->ClearRenderTargetView(Y_rt_p, aquamarine); device_ctx_p->VSSetShader(merge_Y_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(merge_Y_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); - device_ctx_p->Draw(4, 0); - device_ctx_p->Flush(); + device_ctx_p->Draw(3, 0); _init_view_port(out_width / 2, out_height / 2); device_ctx_p->OMSetRenderTargets(1, &UV_rt_p, nullptr); @@ -240,13 +281,38 @@ public: device_ctx_p->VSSetShader(merge_UV_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(merge_UV_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); - device_ctx_p->Draw(4, 0); - device_ctx_p->Flush(); + device_ctx_p->Draw(3, 0); return 0; } - void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {} + void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { + switch (colorspace) { + case 5: // SWS_CS_SMPTE170M + color_p = &colors[0]; + break; + case 1: // SWS_CS_ITU709 + case 9: // SWS_CS_BT2020 + default: + BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv; + color_p = &colors[0]; + }; + + if(color_range > 1) { + // Full range + ++color_p; + } + + auto color_matrix = make_buffer((device_t::pointer)data, *color_p); + if(!color_matrix) { + BOOST_LOG(warning) << "Failed to create color matrix"sv; + return; + } + + auto buf_p = color_matrix.get(); + device_ctx_p->PSSetConstantBuffers(0, 1, &buf_p); + this->color_matrix = std::move(color_matrix); + } int init( std::shared_ptr display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, @@ -299,6 +365,19 @@ public: } merge_UV_vs.reset(vs_p); + color_matrix = make_buffer(device_p, colors[0]); + if(!color_matrix) { + BOOST_LOG(error) << "Failed to create color matrix buffer"sv; + return -1; + } + + float info_in[16] { 1.0f / (float)out_width }; //aligned to 16-byte + info = make_buffer(device_p, info_in); + if(!info_in) { + BOOST_LOG(error) << "Failed to create info buffer"sv; + return -1; + } + D3D11_INPUT_ELEMENT_DESC layout_desc { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }; @@ -310,20 +389,6 @@ public: &input_layout_p); input_layout.reset(input_layout_p); - D3D11_BLEND_DESC blend_desc {}; - blend_desc.RenderTarget[0].BlendEnable = FALSE; - blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; - blend_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - - blend_t::pointer blend_p; - status = device_p->CreateBlendState(&blend_desc, &blend_p); - if(status) { - BOOST_LOG(error) << "Failed to create blend state [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - blend.reset(blend_p); - D3D11_TEXTURE2D_DESC t {}; t.Width = out_width; t.Height = out_height; @@ -395,6 +460,12 @@ public: } sampler_point.reset(sampler_state_p); + auto sampler_linear_p = sampler_linear.get(); + auto color_matrix_buf_p = color_matrix.get(); + auto info_buf_p = info.get(); + device_ctx_p->PSSetSamplers(0, 1, &sampler_linear_p); + device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix_buf_p); + device_ctx_p->VSSetConstantBuffers(0, 1, &info_buf_p); device_ctx_p->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); device_ctx_p->IASetInputLayout(input_layout.get()); @@ -480,6 +551,11 @@ private: } public: + color_t *color_p; + + buf_t info; + buf_t color_matrix; + sampler_state_t sampler_linear; sampler_state_t sampler_point; @@ -488,8 +564,6 @@ public: render_target_t nv12_Y_rt; render_target_t nv12_UV_rt; - blend_t blend; - img_d3d_t img; vs_t merge_UV_vs; diff --git a/sunshine/video.cpp b/sunshine/video.cpp index 53b7b760..b41ffb17 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -647,6 +647,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & sws_color_space = SWS_CS_BT2020; break; } + BOOST_LOG(info) << "Color range: ["sv << ((config.encoderCscMode & 0x1) ? "JPEG"sv : "MPEG"sv) << ']'; AVPixelFormat sw_fmt; if(config.dynamicRange == 0) { From 1b7e103ef62d80465982bfee593e65f8b68135c4 Mon Sep 17 00:00:00 2001 From: loki Date: Tue, 4 May 2021 10:21:56 +0200 Subject: [PATCH 04/18] Allow resizing the image during conversion --- assets/MergeYPS.hlsl | 2 +- assets/MergeYVS.hlsl | 5 +++- sunshine/video.cpp | 65 +++++++------------------------------------- 3 files changed, 15 insertions(+), 57 deletions(-) diff --git a/assets/MergeYPS.hlsl b/assets/MergeYPS.hlsl index 4a015f04..66c53afd 100644 --- a/assets/MergeYPS.hlsl +++ b/assets/MergeYPS.hlsl @@ -22,7 +22,7 @@ struct PS_INPUT //-------------------------------------------------------------------------------------- float PS(PS_INPUT frag_in) : SV_Target { - float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb; + float3 rgb = image.Sample(def_sampler, frag_in.tex, 0).rgb; float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w; return y; diff --git a/assets/MergeYVS.hlsl b/assets/MergeYVS.hlsl index d95ed7b4..f38bb3bf 100644 --- a/assets/MergeYVS.hlsl +++ b/assets/MergeYVS.hlsl @@ -15,8 +15,11 @@ PS_INPUT VS(uint vI : SV_VERTEXID) float x = idHigh * 4.0 - 1.0; float y = idLow * 4.0 - 1.0; + float u = idHigh * 2.0; + float v = 1.0 - idLow * 2.0; + PS_INPUT vert_out; vert_out.pos = float4(x, y, 0.0, 1.0); - vert_out.tex = float2(idHigh, idLow); + vert_out.tex = float2(u, v); return vert_out; } \ No newline at end of file diff --git a/sunshine/video.cpp b/sunshine/video.cpp index b41ffb17..986ef05f 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -82,10 +82,10 @@ platf::dev_type_e map_dev_type(AVHWDeviceType type); platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt); void sw_img_to_frame(const platf::img_t &img, frame_t &frame); -void nv_d3d_img_to_frame(const platf::img_t &img, frame_t &frame); -util::Either nv_d3d_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); -void amd_d3d_img_to_frame(const platf::img_t &img, frame_t &frame); -util::Either amd_d3d_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); +void dxgi_img_to_frame(const platf::img_t &img, frame_t &frame); +util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); +void dxgi_img_to_frame(const platf::img_t &img, frame_t &frame); +util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); util::Either make_hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx); int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format); @@ -297,8 +297,8 @@ static encoder_t nvenc { false, true, - nv_d3d_img_to_frame, - nv_d3d_make_hwdevice_ctx + dxgi_img_to_frame, + dxgi_make_hwdevice_ctx }; static encoder_t amdvce { @@ -331,8 +331,8 @@ static encoder_t amdvce { false, true, - amd_d3d_img_to_frame, - amd_d3d_make_hwdevice_ctx + dxgi_img_to_frame, + dxgi_make_hwdevice_ctx }; #endif @@ -1304,7 +1304,7 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { void sw_img_to_frame(const platf::img_t &img, frame_t &frame) {} #ifdef _WIN32 -void nv_d3d_img_to_frame(const platf::img_t &img, frame_t &frame) { +void dxgi_img_to_frame(const platf::img_t &img, frame_t &frame) { if(img.data == frame->data[0]) { return; } @@ -1327,31 +1327,7 @@ void nv_d3d_img_to_frame(const platf::img_t &img, frame_t &frame) { frame->width = img.width; } -void amd_d3d_img_to_frame(const platf::img_t &img, frame_t &frame) { - if(img.data == frame->data[0]) { - return; - } - - // Need to have something refcounted - if(!frame->buf[0]) { - frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor)); - } - - auto desc = (AVD3D11FrameDescriptor*)frame->buf[0]->data; - desc->texture = (ID3D11Texture2D*)img.data; - desc->index = 0; - - frame->data[0] = img.data; - frame->data[1] = 0; - - frame->linesize[0] = img.row_pitch; - - frame->height = img.height; - frame->width = img.width; -} - - -util::Either nv_d3d_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { +util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) }; auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx; @@ -1371,27 +1347,6 @@ util::Either nv_d3d_make_hwdevice_ctx(platf::hwdevice_t *hwdevice return ctx_buf; } - -util::Either amd_d3d_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { - buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) }; - auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx; - - std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0); - - auto device = (ID3D11Device*)hwdevice_ctx->data; - device->AddRef(); - ctx->device = device; - - auto err = av_hwdevice_ctx_init(ctx_buf.get()); - if(err) { - char err_str[AV_ERROR_MAX_STRING_SIZE] {0}; - BOOST_LOG(error) << "Failed to create FFMpeg amddech: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); - - return err; - } - - return ctx_buf; -} #endif int start_capture_async(capture_thread_async_ctx_t &capture_thread_ctx) { From c19853f03fbffc3f52533f2ad7960cbd7c785d96 Mon Sep 17 00:00:00 2001 From: loki Date: Wed, 5 May 2021 11:28:57 +0200 Subject: [PATCH 05/18] Add color for BT701 colorspace --- sunshine/platform/windows/display_vram.cpp | 33 ++++++++++++++-------- sunshine/video.cpp | 16 ++++++++++- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index 5f5bef76..aa96d85b 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -32,17 +32,24 @@ struct __attribute__ ((__aligned__ (16))) color_t { DirectX::XMFLOAT4 color_vec_v; }; +color_t make_color_matrix(float Cr, float Cb, float U_max, float V_max, float add_Y, float add_UV) { + float Cg = 1.0f - Cr - Cb; + + float Cr_i = 1.0f - Cr; + float Cb_i = 1.0f - Cb; + + return { + { Cr, Cg, Cb, add_Y }, + { -(Cr * U_max / Cb_i), -(Cg * U_max / Cb_i), U_max, add_UV }, + { V_max, -(Cg * V_max / Cr_i), -(Cb * V_max / Cr_i), add_UV } + }; +} + color_t colors[] { - { - { 0.299f, 0.587f, 0.114f, 0.0625f }, // Color Luma (Y) - { -0.14713f, -0.28886f, 0.436f, 0.5f }, // Color Cb (U) - { 0.615f, -0.51499f, -0.10001f, 0.5f }, // Color Cr (V) - }, // BT602 -- MPEG - { - { 0.299f, 0.587f, 0.114f, 0.0f }, // Color Luma (Y) - { -0.168736f, -0.331264f, 0.5f, 0.5f }, // Color Cb (U) - { 0.5f, -0.418688f, -0.081312f, 0.5f }, // Color Cr (V) - } // BT601 -- JPEG + make_color_matrix(0.299f, 0.114f, 0.436f, 0.615f, 0.0625, 0.5f), // BT601 MPEG + make_color_matrix(0.299f, 0.114f, 0.5f, 0.5f, 0.0f, 0.5f), // BT601 JPEG + make_color_matrix(0.2126f, 0.0722f, 0.436f, 0.615f, 0.0625, 0.5f), //BT701 MPEG + make_color_matrix(0.2126f, 0.0722f, 0.5f, 0.5f, 0.0f, 0.5f), //BT701 JPEG }; template @@ -267,7 +274,6 @@ public: auto UV_rt_p = nv12_UV_rt.get(); _init_view_port(out_width, out_height); - device_ctx_p->OMSetRenderTargets(1, &Y_rt_p, nullptr); device_ctx_p->ClearRenderTargetView(Y_rt_p, aquamarine); device_ctx_p->VSSetShader(merge_Y_vs.get(), nullptr, 0); @@ -275,6 +281,8 @@ public: device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); device_ctx_p->Draw(3, 0); + device_ctx_p->Flush(); + _init_view_port(out_width / 2, out_height / 2); device_ctx_p->OMSetRenderTargets(1, &UV_rt_p, nullptr); device_ctx_p->ClearRenderTargetView(UV_rt_p, aquamarine); @@ -292,6 +300,8 @@ public: color_p = &colors[0]; break; case 1: // SWS_CS_ITU709 + color_p = &colors[2]; + break; case 9: // SWS_CS_BT2020 default: BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv; @@ -581,6 +591,7 @@ public: device_ctx_t::pointer device_ctx_p; + // The destructor will remove itself from the list of hardware devices, this is done synchronously std::vector *hwdevices_p; }; diff --git a/sunshine/video.cpp b/sunshine/video.cpp index 986ef05f..e8e9b4b5 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -1304,6 +1304,15 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { void sw_img_to_frame(const platf::img_t &img, frame_t &frame) {} #ifdef _WIN32 +} + +// Ugly, but need to declare for wio +namespace platf::dxgi { +void lock(void *hwdevice); +void unlock(void *hwdevice); +} +void do_nothing(void*) {} +namespace video { void dxgi_img_to_frame(const platf::img_t &img, frame_t &frame) { if(img.data == frame->data[0]) { return; @@ -1334,13 +1343,18 @@ util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_c std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0); auto device = (ID3D11Device*)hwdevice_ctx->data; + device->AddRef(); ctx->device = device; + ctx->lock_ctx = (void*)1; + ctx->lock = do_nothing; + ctx->unlock = do_nothing; + auto err = av_hwdevice_ctx_init(ctx_buf.get()); if(err) { char err_str[AV_ERROR_MAX_STRING_SIZE] {0}; - BOOST_LOG(error) << "Failed to create FFMpeg nvenc: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); + BOOST_LOG(error) << "Failed to create FFMpeg hardware device context: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); return err; } From 88c3828ad3515b002aa6842885544460d02b61c5 Mon Sep 17 00:00:00 2001 From: loki Date: Wed, 5 May 2021 12:17:25 +0200 Subject: [PATCH 06/18] Fixed not testing for 10bit pixels support --- sunshine/video.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sunshine/video.cpp b/sunshine/video.cpp index e8e9b4b5..e0a11dc6 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -306,7 +306,7 @@ static encoder_t amdvce { { (int)amd::profile_h264_e::high, (int)amd::profile_hevc_e::main }, AV_HWDEVICE_TYPE_D3D11VA, AV_PIX_FMT_D3D11, - AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, + AV_PIX_FMT_NV12, AV_PIX_FMT_P010, { { { "header_insertion_mode"s, "idr"s }, @@ -1200,7 +1200,7 @@ bool validate_encoder(encoder_t &encoder) { encoder.hevc[encoder_t::PASSED] = test_hevc; std::vector> configs { - { encoder_t::DYNAMIC_RANGE, { 1920, 1080, 60, 1000, 1, 0, 1, 1, 1 } } + { encoder_t::DYNAMIC_RANGE, { 1920, 1080, 60, 1000, 1, 0, 3, 1, 1 } } }; for(auto &[flag, config] : configs) { auto h264 = config; From a93bad4cf3832238ab52dc477a26ec1634e26241 Mon Sep 17 00:00:00 2001 From: loki Date: Wed, 5 May 2021 15:53:22 +0200 Subject: [PATCH 07/18] Fix crash when sending SIGINT before starting the http server --- sunshine/main.cpp | 2 ++ sunshine/nvhttp.cpp | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/sunshine/main.cpp b/sunshine/main.cpp index a24b562e..801493aa 100644 --- a/sunshine/main.cpp +++ b/sunshine/main.cpp @@ -154,6 +154,8 @@ int main(int argc, char *argv[]) { stream::rtpThread(shutdown_event); httpThread.join(); + task_pool.stop(); + task_pool.join(); return 0; } diff --git a/sunshine/nvhttp.cpp b/sunshine/nvhttp.cpp index da402706..98f5a79d 100644 --- a/sunshine/nvhttp.cpp +++ b/sunshine/nvhttp.cpp @@ -846,8 +846,22 @@ void start(std::shared_ptr shutdown_event) { return; } - std::thread ssl { &https_server_t::accept_and_run, &https_server }; - std::thread tcp { &http_server_t::accept_and_run, &http_server }; + auto accept_and_run = [&](auto *http_server) { + try { + http_server->accept_and_run(); + } catch(boost::system::system_error &err) { + // It's possible the exception gets thrown after calling http_server->stop() from a different thread + if(shutdown_event->peek()) { + return; + } + + BOOST_LOG(fatal) << "Couldn't start http server to ports ["sv << PORT_HTTPS << ", "sv << PORT_HTTP << "]: "sv << err.what(); + shutdown_event->raise(true); + return; + } + }; + std::thread ssl { accept_and_run, &https_server }; + std::thread tcp { accept_and_run, &http_server }; // Wait for any event shutdown_event->view(); From 3a0377851dd8fd973bd743e163a366d3fe0a72d7 Mon Sep 17 00:00:00 2001 From: loki Date: Wed, 5 May 2021 16:00:27 +0200 Subject: [PATCH 08/18] Update gitignore --- .gitignore | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c15ad42a..2584b70d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ build -cmake-build-* +cmake-build* .DS_Store - +.vscode +.vs *.swp *.kdev4 From 0232d8027ce939dc51342b2fb5b90521959b4e9a Mon Sep 17 00:00:00 2001 From: loki Date: Thu, 6 May 2021 12:00:39 +0200 Subject: [PATCH 09/18] Render cursor on duplicated image --- assets/MergeUVPS.hlsl | 2 +- assets/MergeUVVS.hlsl | 2 +- assets/MergeYPS.hlsl | 8 +- assets/MergeYVS.hlsl | 5 +- assets/ScenePS.hlsl | 26 ++++ sunshine/platform/windows/display_vram.cpp | 134 +++++++++++++-------- 6 files changed, 115 insertions(+), 62 deletions(-) create mode 100644 assets/ScenePS.hlsl diff --git a/assets/MergeUVPS.hlsl b/assets/MergeUVPS.hlsl index 7754f046..05c9f0fa 100644 --- a/assets/MergeUVPS.hlsl +++ b/assets/MergeUVPS.hlsl @@ -15,7 +15,7 @@ cbuffer ColorMatrix : register(b0) { //-------------------------------------------------------------------------------------- // Pixel Shader //-------------------------------------------------------------------------------------- -float2 PS(FragTexWide input) : SV_Target +float2 main_ps(FragTexWide input) : SV_Target { float3 rgb_left = image.Sample(def_sampler, input.uuv.xz).rgb; float3 rgb_right = image.Sample(def_sampler, input.uuv.yz).rgb; diff --git a/assets/MergeUVVS.hlsl b/assets/MergeUVVS.hlsl index 7aa0e171..66d97d43 100644 --- a/assets/MergeUVVS.hlsl +++ b/assets/MergeUVVS.hlsl @@ -10,7 +10,7 @@ cbuffer info : register(b0) { //-------------------------------------------------------------------------------------- // Vertex Shader //-------------------------------------------------------------------------------------- -VertTexPosWide VS(uint vI : SV_VERTEXID) +VertTexPosWide main_vs(uint vI : SV_VERTEXID) { float idHigh = float(vI >> 1); float idLow = float(vI & uint(1)); diff --git a/assets/MergeYPS.hlsl b/assets/MergeYPS.hlsl index 66c53afd..f46a7232 100644 --- a/assets/MergeYPS.hlsl +++ b/assets/MergeYPS.hlsl @@ -1,6 +1,3 @@ -//-------------------------------------------------------------------------------------- -// YCbCrPS2.hlsl -//-------------------------------------------------------------------------------------- Texture2D image : register(t0); SamplerState def_sampler : register(s0); @@ -17,10 +14,7 @@ struct PS_INPUT float2 tex : TEXCOORD; }; -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float PS(PS_INPUT frag_in) : SV_Target +float main_ps(PS_INPUT frag_in) : SV_Target { float3 rgb = image.Sample(def_sampler, frag_in.tex, 0).rgb; float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w; diff --git a/assets/MergeYVS.hlsl b/assets/MergeYVS.hlsl index f38bb3bf..51319ddb 100644 --- a/assets/MergeYVS.hlsl +++ b/assets/MergeYVS.hlsl @@ -4,10 +4,7 @@ struct PS_INPUT float2 tex : TEXCOORD; }; -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS(uint vI : SV_VERTEXID) +PS_INPUT main_vs(uint vI : SV_VERTEXID) { float idHigh = float(vI >> 1); float idLow = float(vI & uint(1)); diff --git a/assets/ScenePS.hlsl b/assets/ScenePS.hlsl new file mode 100644 index 00000000..8b4368bd --- /dev/null +++ b/assets/ScenePS.hlsl @@ -0,0 +1,26 @@ +Texture2D image : register(t0); + +SamplerState def_sampler : register(s0); + +cbuffer ColorMatrix : register(b0) { + float4 color_vec_y; + float4 color_vec_u; + float4 color_vec_v; +}; + +struct PS_INPUT +{ + float4 pos : SV_POSITION; + float2 tex : TEXCOORD; +}; + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 main_ps(PS_INPUT frag_in) : SV_Target +{ + float4 color = image.Sample(def_sampler, frag_in.tex, 0); + + clip(color.a < 0.1f ? -1 : 1); + return color; +} \ No newline at end of file diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index aa96d85b..889ce030 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -59,7 +59,7 @@ buf_t make_buffer(device_t::pointer device, const T& t) { D3D11_BUFFER_DESC buffer_desc { sizeof(T), D3D11_USAGE_IMMUTABLE, - D3D11_BIND_CONSTANT_BUFFER, + D3D11_BIND_CONSTANT_BUFFER }; D3D11_SUBRESOURCE_DATA init_data { @@ -69,7 +69,7 @@ buf_t make_buffer(device_t::pointer device, const T& t) { buf_t::pointer buf_p; auto status = device->CreateBuffer(&buffer_desc, &init_data, &buf_p); if(status) { - BOOST_LOG(error) << "Failed to create shader resource view"sv; + BOOST_LOG(error) << "Failed to create buffer"sv; return nullptr; } @@ -80,6 +80,7 @@ blob_t merge_UV_vs_hlsl; blob_t merge_UV_ps_hlsl; blob_t merge_Y_vs_hlsl; blob_t merge_Y_ps_hlsl; +blob_t scene_ps_hlsl; struct img_d3d_t : public platf::img_t { shader_res_t input_res; @@ -201,11 +202,11 @@ blob_t compile_shader(LPCSTR file, LPCSTR entrypoint, LPCSTR shader_model) { } blob_t compile_pixel_shader(LPCSTR file) { - return compile_shader(file, "PS", "ps_5_0"); + return compile_shader(file, "main_ps", "ps_5_0"); } blob_t compile_vertex_shader(LPCSTR file) { - return compile_shader(file, "VS", "vs_5_0"); + return compile_shader(file, "main_vs", "vs_5_0"); } class hwdevice_t : public platf::hwdevice_t { @@ -220,29 +221,34 @@ public: return; } - // LONG x = ((double)rel_x) * out_width / (double)in_width; - // LONG y = ((double)rel_y) * out_height / (double)in_height; + auto x = ((float)rel_x) * cursor_scale; + auto y = ((float)rel_y) * cursor_scale; - // // Ensure it's within bounds - // auto left_out = std::min(out_width, std::max(0, x)); - // auto top_out = std::min(out_height, std::max(0, y)); - // auto right_out = std::max(0, std::min(out_width, x + cursor_scaled_width)); - // auto bottom_out = std::max(0, std::min(out_height, y + cursor_scaled_height)); - - // auto left_in = std::max(0, -rel_x); - // auto top_in = std::max(0, -rel_y); - // auto right_in = std::min(in_width - rel_x, cursor_width); - // auto bottom_in = std::min(in_height - rel_y, cursor_height); - - // RECT rect_in { left_in, top_in, right_in, bottom_in }; - // RECT rect_out { left_out, top_out, right_out, bottom_out }; + cursor_view.TopLeftX = x; + cursor_view.TopLeftY = y; + cursor_view.Width = cursor_scaled_width; + cursor_view.Height = cursor_scaled_height; } int set_cursor_texture(texture2d_t::pointer texture, LONG width, LONG height) { - cursor_width = width; - cursor_height = height; - cursor_scaled_width = ((double)width) / in_width * out_width; - cursor_scaled_height = ((double)height) / in_height * out_height; + auto device = (device_t::pointer)data; + + cursor_scaled_width = ((float)width) * cursor_scale; + cursor_scaled_height = ((float)height) * cursor_scale; + + D3D11_SHADER_RESOURCE_VIEW_DESC desc { + DXGI_FORMAT_B8G8R8A8_UNORM, + D3D11_SRV_DIMENSION_TEXTURE2D + }; + desc.Texture2D.MipLevels = 1; + + shader_res_t::pointer cursor_res_p; + auto status = device->CreateShaderResourceView(texture, &desc, &cursor_res_p); + if(FAILED(status)) { + BOOST_LOG(error) << "Failed to create cursor shader resource view [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + img.input_res.reset(cursor_res_p); return 0; } @@ -259,23 +265,43 @@ public: }; desc.Texture2D.MipLevels = 1; - shader_res_t::pointer input_rec_p; - auto status = device->CreateShaderResourceView(img.texture.get(), &desc, &input_rec_p); + shader_res_t::pointer input_res_p; + auto status = device->CreateShaderResourceView(img.texture.get(), &desc, &input_res_p); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create input shader resource view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - img.input_res.reset(input_rec_p); + img.input_res.reset(input_res_p); } auto input_res_p = img.input_res.get(); + auto cursor_res_p = this->img.input_res.get(); + auto scene_rt_p = scene_rt.get(); auto Y_rt_p = nv12_Y_rt.get(); auto UV_rt_p = nv12_UV_rt.get(); + if(cursor_visible) { + _init_view_port(img.width, img.height); + + device_ctx_p->OMSetRenderTargets(1, &scene_rt_p, nullptr); + device_ctx_p->VSSetShader(merge_Y_vs.get(), nullptr, 0); + device_ctx_p->PSSetShader(scene_ps.get(), nullptr, 0); + device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); + + device_ctx_p->Draw(3, 0); + device_ctx_p->Flush(); + + device_ctx_p->RSSetViewports(1, &cursor_view); + device_ctx_p->PSSetShaderResources(0, 1, &cursor_res_p); + device_ctx_p->Draw(3, 0); + device_ctx_p->Flush(); + + input_res_p = scene_sr.get(); + } + _init_view_port(out_width, out_height); device_ctx_p->OMSetRenderTargets(1, &Y_rt_p, nullptr); - device_ctx_p->ClearRenderTargetView(Y_rt_p, aquamarine); device_ctx_p->VSSetShader(merge_Y_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(merge_Y_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); @@ -285,7 +311,6 @@ public: _init_view_port(out_width / 2, out_height / 2); device_ctx_p->OMSetRenderTargets(1, &UV_rt_p, nullptr); - device_ctx_p->ClearRenderTargetView(UV_rt_p, aquamarine); device_ctx_p->VSSetShader(merge_UV_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(merge_UV_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); @@ -336,14 +361,15 @@ public: this->device_ctx_p = device_ctx_p; + cursor_scale = (float)out_width / (float)in_width; cursor_visible = false; + cursor_view.MinDepth = 0.0f; + cursor_view.MaxDepth = 1.0f; platf::hwdevice_t::img = &img; this->out_width = out_width; this->out_height = out_height; - this->in_width = in_width; - this->in_height = in_height; vs_t::pointer vs_p; status = device_p->CreateVertexShader(merge_Y_vs_hlsl->GetBufferPointer(), merge_Y_vs_hlsl->GetBufferSize(), nullptr, &vs_p); @@ -375,6 +401,17 @@ public: } merge_UV_vs.reset(vs_p); + status = device_p->CreatePixelShader(scene_ps_hlsl->GetBufferPointer(), scene_ps_hlsl->GetBufferSize(), nullptr, &ps_p); + if(status) { + BOOST_LOG(error) << "Failed to create scene pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + scene_ps.reset(ps_p); + + if(_init_rt(scene_sr, scene_rt, in_width, in_height, DXGI_FORMAT_B8G8R8A8_UNORM)) { + return -1; + } + color_matrix = make_buffer(device_p, colors[0]); if(!color_matrix) { BOOST_LOG(error) << "Failed to create color matrix buffer"sv; @@ -382,9 +419,9 @@ public: } float info_in[16] { 1.0f / (float)out_width }; //aligned to 16-byte - info = make_buffer(device_p, info_in); + info_scene = make_buffer(device_p, info_in); if(!info_in) { - BOOST_LOG(error) << "Failed to create info buffer"sv; + BOOST_LOG(error) << "Failed to create info scene buffer"sv; return -1; } @@ -462,17 +499,9 @@ public: } sampler_linear.reset(sampler_state_p); - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - status = device_p->CreateSamplerState(&sampler_desc, &sampler_state_p); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - sampler_point.reset(sampler_state_p); - auto sampler_linear_p = sampler_linear.get(); auto color_matrix_buf_p = color_matrix.get(); - auto info_buf_p = info.get(); + auto info_buf_p = info_scene.get(); device_ctx_p->PSSetSamplers(0, 1, &sampler_linear_p); device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix_buf_p); device_ctx_p->VSSetConstantBuffers(0, 1, &info_buf_p); @@ -563,31 +592,33 @@ private: public: color_t *color_p; - buf_t info; + buf_t info_scene; buf_t color_matrix; sampler_state_t sampler_linear; - sampler_state_t sampler_point; input_layout_t input_layout; render_target_t nv12_Y_rt; render_target_t nv12_UV_rt; + render_target_t scene_rt; + shader_res_t scene_sr; + img_d3d_t img; vs_t merge_UV_vs; ps_t merge_UV_ps; vs_t merge_Y_vs; ps_t merge_Y_ps; + ps_t scene_ps; + D3D11_VIEWPORT cursor_view; + float cursor_scaled_width, cursor_scaled_height; + float cursor_scale; bool cursor_visible; - LONG cursor_width, cursor_height; - LONG cursor_scaled_width, cursor_scaled_height; - - LONG in_width, in_height; - double out_width, out_height; + float out_width, out_height; device_ctx_t::pointer device_ctx_p; @@ -648,7 +679,7 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec t.SampleDesc.Count = 1; t.Usage = D3D11_USAGE_DEFAULT; t.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - t.BindFlags = D3D11_BIND_RENDER_TARGET; + t.BindFlags = D3D11_BIND_SHADER_RESOURCE; dxgi::texture2d_t::pointer tex_p {}; auto status = device->CreateTexture2D(&t, &data, &tex_p); @@ -809,6 +840,11 @@ int init() { if(!merge_UV_vs_hlsl) { return -1; } + + scene_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/scenePS.hlsl"); + if(!scene_ps_hlsl) { + return -1; + } BOOST_LOG(info) << "Compiled shaders"sv; return 0; From 0f661e467e95c50f56439dd422ce16bfb5211428 Mon Sep 17 00:00:00 2001 From: loki Date: Thu, 6 May 2021 12:36:26 +0200 Subject: [PATCH 10/18] Blend cursor onto the image --- assets/ScenePS.hlsl | 14 +------ sunshine/platform/windows/display_vram.cpp | 48 ++++++++++++++++++---- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/assets/ScenePS.hlsl b/assets/ScenePS.hlsl index 8b4368bd..aa601231 100644 --- a/assets/ScenePS.hlsl +++ b/assets/ScenePS.hlsl @@ -2,25 +2,13 @@ Texture2D image : register(t0); SamplerState def_sampler : register(s0); -cbuffer ColorMatrix : register(b0) { - float4 color_vec_y; - float4 color_vec_u; - float4 color_vec_v; -}; - struct PS_INPUT { float4 pos : SV_POSITION; float2 tex : TEXCOORD; }; -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- float4 main_ps(PS_INPUT frag_in) : SV_Target { - float4 color = image.Sample(def_sampler, frag_in.tex, 0); - - clip(color.a < 0.1f ? -1 : 1); - return color; + return image.Sample(def_sampler, frag_in.tex, 0); } \ No newline at end of file diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index 889ce030..30fd2eba 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -69,13 +69,40 @@ buf_t make_buffer(device_t::pointer device, const T& t) { buf_t::pointer buf_p; auto status = device->CreateBuffer(&buffer_desc, &init_data, &buf_p); if(status) { - BOOST_LOG(error) << "Failed to create buffer"sv; + BOOST_LOG(error) << "Failed to create buffer: [0x"sv << util::hex(status).to_string_view() << ']'; return nullptr; } return buf_t { buf_p }; } +blend_t make_blend(device_t::pointer device, bool enable) { + D3D11_BLEND_DESC bdesc {}; + auto &rt = bdesc.RenderTarget[0]; + rt.BlendEnable = enable; + rt.RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + + if(enable) { + rt.BlendOp = D3D11_BLEND_OP_ADD; + rt.BlendOpAlpha = D3D11_BLEND_OP_ADD; + + rt.SrcBlend = D3D11_BLEND_SRC_ALPHA; + rt.DestBlend = D3D11_BLEND_INV_SRC_ALPHA; + + rt.SrcBlendAlpha = D3D11_BLEND_ZERO; + rt.DestBlendAlpha = D3D11_BLEND_ZERO; + } + + blend_t::pointer blend_p; + auto status = device->CreateBlendState(&bdesc, &blend_p); + if(status) { + BOOST_LOG(error) << "Failed to create blend state: [0x"sv << util::hex(status).to_string_view() << ']'; + return nullptr; + } + + return blend_t { blend_p }; +} + blob_t merge_UV_vs_hlsl; blob_t merge_UV_ps_hlsl; blob_t merge_Y_vs_hlsl; @@ -290,12 +317,12 @@ public: device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); device_ctx_p->Draw(3, 0); - device_ctx_p->Flush(); + device_ctx_p->OMSetBlendState(blend_enable.get(), nullptr, 0xFFFFFFFFu); device_ctx_p->RSSetViewports(1, &cursor_view); device_ctx_p->PSSetShaderResources(0, 1, &cursor_res_p); device_ctx_p->Draw(3, 0); - device_ctx_p->Flush(); + device_ctx_p->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); input_res_p = scene_sr.get(); } @@ -307,8 +334,6 @@ public: device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); device_ctx_p->Draw(3, 0); - device_ctx_p->Flush(); - _init_view_port(out_width / 2, out_height / 2); device_ctx_p->OMSetRenderTargets(1, &UV_rt_p, nullptr); device_ctx_p->VSSetShader(merge_UV_vs.get(), nullptr, 0); @@ -374,7 +399,7 @@ public: vs_t::pointer vs_p; status = device_p->CreateVertexShader(merge_Y_vs_hlsl->GetBufferPointer(), merge_Y_vs_hlsl->GetBufferSize(), nullptr, &vs_p); if(status) { - BOOST_LOG(error) << "Failed to create screen vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create mergeY vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } merge_Y_vs.reset(vs_p); @@ -382,7 +407,7 @@ public: ps_t::pointer ps_p; status = device_p->CreatePixelShader(merge_Y_ps_hlsl->GetBufferPointer(), merge_Y_ps_hlsl->GetBufferSize(), nullptr, &ps_p); if(status) { - BOOST_LOG(error) << "Failed to create YCrCb pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create mergeY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } merge_Y_ps.reset(ps_p); @@ -408,6 +433,9 @@ public: } scene_ps.reset(ps_p); + blend_disable = make_blend(device_p, false); + blend_enable = make_blend(device_p, true); + if(_init_rt(scene_sr, scene_rt, in_width, in_height, DXGI_FORMAT_B8G8R8A8_UNORM)) { return -1; } @@ -418,7 +446,7 @@ public: return -1; } - float info_in[16] { 1.0f / (float)out_width }; //aligned to 16-byte + float info_in[16 / sizeof(float)] { 1.0f / (float)out_width }; //aligned to 16-byte info_scene = make_buffer(device_p, info_in); if(!info_in) { BOOST_LOG(error) << "Failed to create info scene buffer"sv; @@ -502,6 +530,7 @@ public: auto sampler_linear_p = sampler_linear.get(); auto color_matrix_buf_p = color_matrix.get(); auto info_buf_p = info_scene.get(); + device_ctx_p->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); device_ctx_p->PSSetSamplers(0, 1, &sampler_linear_p); device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix_buf_p); device_ctx_p->VSSetConstantBuffers(0, 1, &info_buf_p); @@ -592,6 +621,9 @@ private: public: color_t *color_p; + blend_t blend_enable; + blend_t blend_disable; + buf_t info_scene; buf_t color_matrix; From 7b45f0d89909685c03b5bab3221e08977579144e Mon Sep 17 00:00:00 2001 From: loki Date: Thu, 6 May 2021 13:51:29 +0200 Subject: [PATCH 11/18] Fix cursor height and width --- sunshine/platform/windows/display_vram.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index 30fd2eba..8dd5c7f7 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -253,21 +253,19 @@ public: cursor_view.TopLeftX = x; cursor_view.TopLeftY = y; - cursor_view.Width = cursor_scaled_width; - cursor_view.Height = cursor_scaled_height; } int set_cursor_texture(texture2d_t::pointer texture, LONG width, LONG height) { auto device = (device_t::pointer)data; - cursor_scaled_width = ((float)width) * cursor_scale; - cursor_scaled_height = ((float)height) * cursor_scale; + cursor_view.Width = width; + cursor_view.Height = height; D3D11_SHADER_RESOURCE_VIEW_DESC desc { - DXGI_FORMAT_B8G8R8A8_UNORM, - D3D11_SRV_DIMENSION_TEXTURE2D - }; - desc.Texture2D.MipLevels = 1; + DXGI_FORMAT_B8G8R8A8_UNORM, + D3D11_SRV_DIMENSION_TEXTURE2D + }; + desc.Texture2D.MipLevels = 1; shader_res_t::pointer cursor_res_p; auto status = device->CreateShaderResourceView(texture, &desc, &cursor_res_p); @@ -646,7 +644,6 @@ public: ps_t scene_ps; D3D11_VIEWPORT cursor_view; - float cursor_scaled_width, cursor_scaled_height; float cursor_scale; bool cursor_visible; From 67df04e0a2baab57c5fdc1ce2296b54635bbbced Mon Sep 17 00:00:00 2001 From: loki Date: Thu, 6 May 2021 16:51:59 +0200 Subject: [PATCH 12/18] Fix cursor position on lower resolution screens --- sunshine/platform/windows/display_vram.cpp | 6 ++---- sunshine/video.cpp | 4 +--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index 8dd5c7f7..6dd7852f 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -248,8 +248,8 @@ public: return; } - auto x = ((float)rel_x) * cursor_scale; - auto y = ((float)rel_y) * cursor_scale; + auto x = ((float)rel_x); + auto y = ((float)rel_y); cursor_view.TopLeftX = x; cursor_view.TopLeftY = y; @@ -384,7 +384,6 @@ public: this->device_ctx_p = device_ctx_p; - cursor_scale = (float)out_width / (float)in_width; cursor_visible = false; cursor_view.MinDepth = 0.0f; cursor_view.MaxDepth = 1.0f; @@ -644,7 +643,6 @@ public: ps_t scene_ps; D3D11_VIEWPORT cursor_view; - float cursor_scale; bool cursor_visible; float out_width, out_height; diff --git a/sunshine/video.cpp b/sunshine/video.cpp index e0a11dc6..ad298922 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -373,11 +373,9 @@ static encoder_t software { static std::vector encoders { #ifdef _WIN32 nvenc, -#endif - software, -#ifdef _WIN32 amdvce, #endif + software }; void reset_display(std::shared_ptr &disp, AVHWDeviceType type) { From 513942c888b3f7386d89989eb2400c91433d5383 Mon Sep 17 00:00:00 2001 From: loki Date: Sat, 8 May 2021 12:03:58 +0200 Subject: [PATCH 13/18] Improve colors for nv12 --- assets/MergeUVPS.hlsl | 7 +++- assets/MergeYPS.hlsl | 6 ++-- sunshine/platform/windows/display_vram.cpp | 39 +++++++++++++++++----- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/assets/MergeUVPS.hlsl b/assets/MergeUVPS.hlsl index 05c9f0fa..2b72cddf 100644 --- a/assets/MergeUVPS.hlsl +++ b/assets/MergeUVPS.hlsl @@ -10,6 +10,8 @@ cbuffer ColorMatrix : register(b0) { float4 color_vec_y; float4 color_vec_u; float4 color_vec_v; + float2 range_y; + float2 range_uv; }; //-------------------------------------------------------------------------------------- @@ -24,5 +26,8 @@ float2 main_ps(FragTexWide input) : SV_Target float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; - return float2(u, v); + u = u * range_uv.x + range_uv.y; + v = v * range_uv.x + range_uv.y; + + return float2(u, v * 224.0f/256.0f + 0.0625); } \ No newline at end of file diff --git a/assets/MergeYPS.hlsl b/assets/MergeYPS.hlsl index f46a7232..386133c8 100644 --- a/assets/MergeYPS.hlsl +++ b/assets/MergeYPS.hlsl @@ -6,6 +6,8 @@ cbuffer ColorMatrix : register(b0) { float4 color_vec_y; float4 color_vec_u; float4 color_vec_v; + float2 range_y; + float2 range_uv; }; struct PS_INPUT @@ -17,7 +19,7 @@ struct PS_INPUT float main_ps(PS_INPUT frag_in) : SV_Target { float3 rgb = image.Sample(def_sampler, frag_in.tex, 0).rgb; - float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w; + float y = dot(color_vec_y.xyz, rgb); - return y; + return y * range_y.x + range_y.y; } \ No newline at end of file diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index 6dd7852f..81172036 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -26,30 +26,42 @@ using blob_t = util::safe_ptr>; using depth_stencil_state_t = util::safe_ptr>; using depth_stencil_view_t = util::safe_ptr>; +using float4 = DirectX::XMFLOAT4; +using float3 = DirectX::XMFLOAT3; +using float2 = DirectX::XMFLOAT2; struct __attribute__ ((__aligned__ (16))) color_t { - DirectX::XMFLOAT4 color_vec_y; - DirectX::XMFLOAT4 color_vec_u; - DirectX::XMFLOAT4 color_vec_v; + float4 color_vec_y; + float4 color_vec_u; + float4 color_vec_v; + float2 range_y; + float2 range_uv; }; -color_t make_color_matrix(float Cr, float Cb, float U_max, float V_max, float add_Y, float add_UV) { +color_t make_color_matrix(float Cr, float Cb, float U_max, float V_max, float add_Y, float add_UV, float2 range_Y, float2 range_UV) { float Cg = 1.0f - Cr - Cb; float Cr_i = 1.0f - Cr; float Cb_i = 1.0f - Cb; + float shift_y = range_Y.x / 256.0f; + float shift_uv = range_UV.x / 256.0f; + + float scale_y = (range_Y.y - range_Y.x) / 256.0f; + float scale_uv = (range_UV.y - range_UV.x) / 256.0f; return { { Cr, Cg, Cb, add_Y }, { -(Cr * U_max / Cb_i), -(Cg * U_max / Cb_i), U_max, add_UV }, - { V_max, -(Cg * V_max / Cr_i), -(Cb * V_max / Cr_i), add_UV } + { V_max, -(Cg * V_max / Cr_i), -(Cb * V_max / Cr_i), add_UV }, + { scale_y, shift_y }, + { scale_uv, shift_uv }, }; } color_t colors[] { - make_color_matrix(0.299f, 0.114f, 0.436f, 0.615f, 0.0625, 0.5f), // BT601 MPEG - make_color_matrix(0.299f, 0.114f, 0.5f, 0.5f, 0.0f, 0.5f), // BT601 JPEG - make_color_matrix(0.2126f, 0.0722f, 0.436f, 0.615f, 0.0625, 0.5f), //BT701 MPEG - make_color_matrix(0.2126f, 0.0722f, 0.5f, 0.5f, 0.0f, 0.5f), //BT701 JPEG + make_color_matrix(0.299f, 0.114f, 0.436f, 0.615f, 0.0625, 0.5f, { 16.0f, 235.0f }, { 16.0f, 240.0f }), // BT601 MPEG + make_color_matrix(0.299f, 0.114f, 0.5f, 0.5f, 0.0f, 0.5f, { 0.0f, 255.0f }, { 0.0f, 255.0f }), // BT601 JPEG + make_color_matrix(0.2126f, 0.0722f, 0.436f, 0.615f, 0.0625, 0.5f, { 16.0f, 235.0f }, { 16.0f, 240.0f }), //BT701 MPEG + make_color_matrix(0.2126f, 0.0722f, 0.5f, 0.5f, 0.0f, 0.5f, { 0.0f, 255.0f }, { 0.0f, 255.0f }), //BT701 JPEG }; template @@ -847,6 +859,15 @@ std::shared_ptr display_vram_t::make_hwdevice(int width, int } int init() { + for(auto &color : colors) { + BOOST_LOG(debug) << "Color Matrix"sv; + BOOST_LOG(debug) << "Y ["sv << color.color_vec_y.x << ", "sv << color.color_vec_y.y << ", "sv << color.color_vec_y.z << ", "sv << color.color_vec_y.w << ']'; + BOOST_LOG(debug) << "U ["sv << color.color_vec_u.x << ", "sv << color.color_vec_u.y << ", "sv << color.color_vec_u.z << ", "sv << color.color_vec_u.w << ']'; + BOOST_LOG(debug) << "V ["sv << color.color_vec_v.x << ", "sv << color.color_vec_v.y << ", "sv << color.color_vec_v.z << ", "sv << color.color_vec_v.w << ']'; + BOOST_LOG(debug) << "range Y ["sv << color.range_y.x << ", "sv << color.range_y.y << ']'; + BOOST_LOG(debug) << "range UV ["sv << color.range_uv.x << ", "sv << color.range_uv.y << ']'; + } + BOOST_LOG(info) << "Compiling shaders..."sv; merge_Y_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/MergeYVS.hlsl"); if(!merge_Y_vs_hlsl) { From 13c2da07e8118dc6e42ffd6142b08e977a63d682 Mon Sep 17 00:00:00 2001 From: loki Date: Sat, 8 May 2021 13:22:04 +0200 Subject: [PATCH 14/18] Move shaders to seperate folder in assets --- .../ConvertUVPS.hlsl} | 0 .../ConvertUVVS.hlsl} | 0 .../ConvertYPS.hlsl} | 0 assets/{ => shaders}/ScenePS.hlsl | 0 .../{MergeYVS.hlsl => shaders/SceneVS.hlsl} | 0 sunshine/platform/windows/display_vram.cpp | 63 ++++++++++--------- 6 files changed, 32 insertions(+), 31 deletions(-) rename assets/{MergeUVPS.hlsl => shaders/ConvertUVPS.hlsl} (100%) rename assets/{MergeUVVS.hlsl => shaders/ConvertUVVS.hlsl} (100%) rename assets/{MergeYPS.hlsl => shaders/ConvertYPS.hlsl} (100%) rename assets/{ => shaders}/ScenePS.hlsl (100%) rename assets/{MergeYVS.hlsl => shaders/SceneVS.hlsl} (100%) diff --git a/assets/MergeUVPS.hlsl b/assets/shaders/ConvertUVPS.hlsl similarity index 100% rename from assets/MergeUVPS.hlsl rename to assets/shaders/ConvertUVPS.hlsl diff --git a/assets/MergeUVVS.hlsl b/assets/shaders/ConvertUVVS.hlsl similarity index 100% rename from assets/MergeUVVS.hlsl rename to assets/shaders/ConvertUVVS.hlsl diff --git a/assets/MergeYPS.hlsl b/assets/shaders/ConvertYPS.hlsl similarity index 100% rename from assets/MergeYPS.hlsl rename to assets/shaders/ConvertYPS.hlsl diff --git a/assets/ScenePS.hlsl b/assets/shaders/ScenePS.hlsl similarity index 100% rename from assets/ScenePS.hlsl rename to assets/shaders/ScenePS.hlsl diff --git a/assets/MergeYVS.hlsl b/assets/shaders/SceneVS.hlsl similarity index 100% rename from assets/MergeYVS.hlsl rename to assets/shaders/SceneVS.hlsl diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index 81172036..c8744b18 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -6,6 +6,7 @@ #include "sunshine/main.h" #include "display.h" +#define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders" namespace platf { using namespace std::literals; } @@ -115,10 +116,10 @@ blend_t make_blend(device_t::pointer device, bool enable) { return blend_t { blend_p }; } -blob_t merge_UV_vs_hlsl; -blob_t merge_UV_ps_hlsl; -blob_t merge_Y_vs_hlsl; -blob_t merge_Y_ps_hlsl; +blob_t convert_UV_vs_hlsl; +blob_t convert_UV_ps_hlsl; +blob_t scene_vs_hlsl; +blob_t convert_Y_ps_hlsl; blob_t scene_ps_hlsl; struct img_d3d_t : public platf::img_t { @@ -322,7 +323,7 @@ public: _init_view_port(img.width, img.height); device_ctx_p->OMSetRenderTargets(1, &scene_rt_p, nullptr); - device_ctx_p->VSSetShader(merge_Y_vs.get(), nullptr, 0); + device_ctx_p->VSSetShader(scene_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(scene_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); @@ -339,15 +340,15 @@ public: _init_view_port(out_width, out_height); device_ctx_p->OMSetRenderTargets(1, &Y_rt_p, nullptr); - device_ctx_p->VSSetShader(merge_Y_vs.get(), nullptr, 0); - device_ctx_p->PSSetShader(merge_Y_ps.get(), nullptr, 0); + device_ctx_p->VSSetShader(scene_vs.get(), nullptr, 0); + device_ctx_p->PSSetShader(convert_Y_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); device_ctx_p->Draw(3, 0); _init_view_port(out_width / 2, out_height / 2); device_ctx_p->OMSetRenderTargets(1, &UV_rt_p, nullptr); - device_ctx_p->VSSetShader(merge_UV_vs.get(), nullptr, 0); - device_ctx_p->PSSetShader(merge_UV_ps.get(), nullptr, 0); + device_ctx_p->VSSetShader(convert_UV_vs.get(), nullptr, 0); + device_ctx_p->PSSetShader(convert_UV_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); device_ctx_p->Draw(3, 0); @@ -406,34 +407,34 @@ public: this->out_height = out_height; vs_t::pointer vs_p; - status = device_p->CreateVertexShader(merge_Y_vs_hlsl->GetBufferPointer(), merge_Y_vs_hlsl->GetBufferSize(), nullptr, &vs_p); + status = device_p->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &vs_p); if(status) { BOOST_LOG(error) << "Failed to create mergeY vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - merge_Y_vs.reset(vs_p); + scene_vs.reset(vs_p); ps_t::pointer ps_p; - status = device_p->CreatePixelShader(merge_Y_ps_hlsl->GetBufferPointer(), merge_Y_ps_hlsl->GetBufferSize(), nullptr, &ps_p); + status = device_p->CreatePixelShader(convert_Y_ps_hlsl->GetBufferPointer(), convert_Y_ps_hlsl->GetBufferSize(), nullptr, &ps_p); if(status) { BOOST_LOG(error) << "Failed to create mergeY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - merge_Y_ps.reset(ps_p); + convert_Y_ps.reset(ps_p); - status = device_p->CreatePixelShader(merge_UV_ps_hlsl->GetBufferPointer(), merge_UV_ps_hlsl->GetBufferSize(), nullptr, &ps_p); + status = device_p->CreatePixelShader(convert_UV_ps_hlsl->GetBufferPointer(), convert_UV_ps_hlsl->GetBufferSize(), nullptr, &ps_p); if(status) { BOOST_LOG(error) << "Failed to create mergeUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - merge_UV_ps.reset(ps_p); + convert_UV_ps.reset(ps_p); - status = device_p->CreateVertexShader(merge_UV_vs_hlsl->GetBufferPointer(), merge_UV_vs_hlsl->GetBufferSize(), nullptr, &vs_p); + status = device_p->CreateVertexShader(convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), nullptr, &vs_p); if(status) { BOOST_LOG(error) << "Failed to create mergeUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - merge_UV_vs.reset(vs_p); + convert_UV_vs.reset(vs_p); status = device_p->CreatePixelShader(scene_ps_hlsl->GetBufferPointer(), scene_ps_hlsl->GetBufferSize(), nullptr, &ps_p); if(status) { @@ -469,7 +470,7 @@ public: input_layout_t::pointer input_layout_p; status = device_p->CreateInputLayout( &layout_desc, 1, - merge_UV_vs_hlsl->GetBufferPointer(), merge_UV_vs_hlsl->GetBufferSize(), + convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), &input_layout_p); input_layout.reset(input_layout_p); @@ -648,11 +649,11 @@ public: img_d3d_t img; - vs_t merge_UV_vs; - ps_t merge_UV_ps; - vs_t merge_Y_vs; - ps_t merge_Y_ps; + vs_t convert_UV_vs; + ps_t convert_UV_ps; + ps_t convert_Y_ps; ps_t scene_ps; + vs_t scene_vs; D3D11_VIEWPORT cursor_view; bool cursor_visible; @@ -869,27 +870,27 @@ int init() { } BOOST_LOG(info) << "Compiling shaders..."sv; - merge_Y_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/MergeYVS.hlsl"); - if(!merge_Y_vs_hlsl) { + scene_vs_hlsl = compile_vertex_shader(SUNSHINE_SHADERS_DIR "/SceneVS.hlsl"); + if(!scene_vs_hlsl) { return -1; } - merge_Y_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/MergeYPS.hlsl"); - if(!merge_Y_ps_hlsl) { + convert_Y_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertYPS.hlsl"); + if(!convert_Y_ps_hlsl) { return -1; } - merge_UV_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/MergeUVPS.hlsl"); - if(!merge_UV_ps_hlsl) { + convert_UV_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertUVPS.hlsl"); + if(!convert_UV_ps_hlsl) { return -1; } - merge_UV_vs_hlsl = compile_vertex_shader(SUNSHINE_ASSETS_DIR "/MergeUVVS.hlsl"); - if(!merge_UV_vs_hlsl) { + convert_UV_vs_hlsl = compile_vertex_shader(SUNSHINE_SHADERS_DIR "/ConvertUVVS.hlsl"); + if(!convert_UV_vs_hlsl) { return -1; } - scene_ps_hlsl = compile_pixel_shader(SUNSHINE_ASSETS_DIR "/scenePS.hlsl"); + scene_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ScenePS.hlsl"); if(!scene_ps_hlsl) { return -1; } From 2970ad662c79ca4996bf0a4133eb23444f2d7685 Mon Sep 17 00:00:00 2001 From: loki Date: Sun, 9 May 2021 11:40:12 +0200 Subject: [PATCH 15/18] No more dumb pointers for initialization --- sunshine/crypto.cpp | 8 +- sunshine/platform/windows/audio.cpp | 30 +-- sunshine/platform/windows/display_base.cpp | 54 ++-- sunshine/platform/windows/display_ram.cpp | 10 +- sunshine/platform/windows/display_vram.cpp | 128 ++++------ sunshine/utility.h | 283 ++++++++++++++++----- 6 files changed, 307 insertions(+), 206 deletions(-) diff --git a/sunshine/crypto.cpp b/sunshine/crypto.cpp index 70a98289..398c8e09 100644 --- a/sunshine/crypto.cpp +++ b/sunshine/crypto.cpp @@ -187,10 +187,10 @@ x509_t x509(const std::string_view &x) { BIO_write(io.get(), x.data(), x.size()); - X509 *p = nullptr; + x509_t p; PEM_read_bio_X509(io.get(), &p, nullptr, nullptr); - return x509_t { p }; + return p; } pkey_t pkey(const std::string_view &k) { @@ -198,10 +198,10 @@ pkey_t pkey(const std::string_view &k) { BIO_write(io.get(), k.data(), k.size()); - EVP_PKEY *p = nullptr; + pkey_t p = nullptr; PEM_read_bio_PrivateKey(io.get(), &p, nullptr, nullptr); - return pkey_t { p }; + return p; } std::string pem(x509_t &x509) { diff --git a/sunshine/platform/windows/audio.cpp b/sunshine/platform/windows/audio.cpp index 0ba9a168..63369ccb 100644 --- a/sunshine/platform/windows/audio.cpp +++ b/sunshine/platform/windows/audio.cpp @@ -81,50 +81,43 @@ public: HRESULT status; - device_enum_t::pointer device_enum_p{}; status = CoCreateInstance( CLSID_MMDeviceEnumerator, nullptr, CLSCTX_ALL, IID_IMMDeviceEnumerator, - (void **) &device_enum_p); - device_enum.reset(device_enum_p); + (void **) &device_enum); - if (FAILED(status)) { + if(FAILED(status)) { BOOST_LOG(error) << "Couldn't create Device Enumerator [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - device_t::pointer device_p{}; - if(config::audio.sink.empty()) { status = device_enum->GetDefaultAudioEndpoint( eRender, eConsole, - &device_p); + &device); } else { std::wstring_convert, wchar_t> converter; auto wstring_device_id = converter.from_bytes(config::audio.sink); - status = device_enum->GetDevice(wstring_device_id.c_str(), &device_p); + status = device_enum->GetDevice(wstring_device_id.c_str(), &device); } - device.reset(device_p); - if (FAILED(status)) { + if(FAILED(status)) { BOOST_LOG(error) << "Couldn't create audio Device [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - audio_client_t::pointer audio_client_p{}; status = device->Activate( IID_IAudioClient, CLSCTX_ALL, nullptr, - (void **) &audio_client_p); - audio_client.reset(audio_client_p); + (void **) &audio_client); if (FAILED(status)) { BOOST_LOG(error) << "Couldn't activate audio Device [0x"sv << util::hex(status).to_string_view() << ']'; @@ -132,11 +125,8 @@ public: return -1; } - wave_format_t::pointer wave_format_p{}; - status = audio_client->GetMixFormat(&wave_format_p); - wave_format.reset(wave_format_p); - - if (FAILED(status)) { + status = audio_client->GetMixFormat(&wave_format); + if(FAILED(status)) { BOOST_LOG(error) << "Couldn't acquire Wave Format [0x"sv << util::hex(status).to_string_view() << ']'; return -1; @@ -198,9 +188,7 @@ public: sample_buf = util::buffer_t { frames }; sample_buf_pos = std::begin(sample_buf); - audio_capture_t::pointer audio_capture_p {}; - status = audio_client->GetService(IID_IAudioCaptureClient, (void**)&audio_capture_p); - audio_capture.reset(audio_capture_p); + status = audio_client->GetService(IID_IAudioCaptureClient, (void**)&audio_capture); if (FAILED(status)) { BOOST_LOG(error) << "Couldn't initialize audio capture client [0x"sv << util::hex(status).to_string_view() << ']'; diff --git a/sunshine/platform/windows/display_base.cpp b/sunshine/platform/windows/display_base.cpp index 29b53523..8644ed88 100644 --- a/sunshine/platform/windows/display_base.cpp +++ b/sunshine/platform/windows/display_base.cpp @@ -90,17 +90,10 @@ int display_base_t::init() { FreeLibrary(user32); }); */ - - dxgi::factory1_t::pointer factory_p {}; - dxgi::adapter_t::pointer adapter_p {}; - dxgi::output_t::pointer output_p {}; - dxgi::device_t::pointer device_p {}; - dxgi::device_ctx_t::pointer device_ctx_p {}; HRESULT status; - status = CreateDXGIFactory1(IID_IDXGIFactory1, (void**)&factory_p); - factory.reset(factory_p); + status = CreateDXGIFactory1(IID_IDXGIFactory1, (void**)&factory); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create DXGIFactory1 [0x"sv << util::hex(status).to_string_view() << ']'; return -1; @@ -111,7 +104,8 @@ int display_base_t::init() { auto adapter_name = converter.from_bytes(config::video.adapter_name); auto output_name = converter.from_bytes(config::video.output_name); - for(int x = 0; factory_p->EnumAdapters1(x, &adapter_p) != DXGI_ERROR_NOT_FOUND; ++x) { + adapter_t::pointer adapter_p; + for(int x = 0; factory->EnumAdapters1(x, &adapter_p) != DXGI_ERROR_NOT_FOUND; ++x) { dxgi::adapter_t adapter_tmp { adapter_p }; DXGI_ADAPTER_DESC1 adapter_desc; @@ -121,8 +115,9 @@ int display_base_t::init() { continue; } + dxgi::output_t::pointer output_p; for(int y = 0; adapter_tmp->EnumOutputs(y, &output_p) != DXGI_ERROR_NOT_FOUND; ++y) { - dxgi::output_t output_tmp {output_p }; + dxgi::output_t output_tmp { output_p }; DXGI_OUTPUT_DESC desc; output_tmp->GetDesc(&desc); @@ -173,14 +168,12 @@ int display_base_t::init() { D3D11_CREATE_DEVICE_VIDEO_SUPPORT, featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL), D3D11_SDK_VERSION, - &device_p, + &device, &feature_level, - &device_ctx_p); + &device_ctx); adapter_p->Release(); - device.reset(device_p); - device_ctx.reset(device_ctx_p); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create D3D11 device [0x"sv << util::hex(status).to_string_view() << ']'; @@ -216,7 +209,7 @@ int display_base_t::init() { tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; if (!AdjustTokenPrivileges(token, false, &tp, sizeof(tp), NULL, NULL)) { - BOOST_LOG(error) << "Could not set privilege to increase GPU priority"; + BOOST_LOG(warning) << "Could not set privilege to increase GPU priority"; } } @@ -229,17 +222,15 @@ int display_base_t::init() { if (fn) { status = fn(GetCurrentProcess(), D3DKMT_SCHEDULINGPRIORITYCLASS_REALTIME); if (FAILED(status)) { - BOOST_LOG(error) << "Failed to set realtime GPU priority. Please run application as administrator for optimal performance."; + BOOST_LOG(warning) << "Failed to set realtime GPU priority. Please run application as administrator for optimal performance."; } } } - dxgi::dxgi_t::pointer dxgi_p {}; - status = device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_p); - dxgi::dxgi_t dxgi { dxgi_p }; - + dxgi::dxgi_t dxgi; + status = device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi); if(FAILED(status)) { - BOOST_LOG(error) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(warning) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } @@ -248,25 +239,24 @@ int display_base_t::init() { // Try to reduce latency { - dxgi::dxgi1_t::pointer dxgi_p {}; - status = device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_p); - dxgi::dxgi1_t dxgi { dxgi_p }; - + dxgi::dxgi1_t dxgi {}; + status = device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi); if(FAILED(status)) { BOOST_LOG(error) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - dxgi->SetMaximumFrameLatency(1); + status = dxgi->SetMaximumFrameLatency(1); + if(FAILED(status)) { + BOOST_LOG(warning) << "Failed to set maximum frame latency [0x"sv << util::hex(status).to_string_view() << ']'; + } } //FIXME: Duplicate output on RX580 in combination with DOOM (2016) --> BSOD //TODO: Use IDXGIOutput5 for improved performance { - dxgi::output1_t::pointer output1_p {}; - status = output->QueryInterface(IID_IDXGIOutput1, (void**)&output1_p); - dxgi::output1_t output1 {output1_p }; - + dxgi::output1_t output1 {}; + status = output->QueryInterface(IID_IDXGIOutput1, (void**)&output1); if(FAILED(status)) { BOOST_LOG(error) << "Failed to query IDXGIOutput1 from the output"sv; return -1; @@ -274,10 +264,8 @@ int display_base_t::init() { // We try this twice, in case we still get an error on reinitialization for(int x = 0; x < 2; ++x) { - dxgi::dup_t::pointer dup_p {}; - status = output1->DuplicateOutput((IUnknown*)device.get(), &dup_p); + status = output1->DuplicateOutput((IUnknown*)device.get(), &dup.dup); if(SUCCEEDED(status)) { - dup.reset(dup_p); break; } std::this_thread::sleep_for(200ms); diff --git a/sunshine/platform/windows/display_ram.cpp b/sunshine/platform/windows/display_ram.cpp index 15d608fe..ffcbbd25 100644 --- a/sunshine/platform/windows/display_ram.cpp +++ b/sunshine/platform/windows/display_ram.cpp @@ -203,9 +203,8 @@ capture_e display_ram_t::snapshot(::platf::img_t *img_base, std::chrono::millise // If frame has been updated if (frame_info.LastPresentTime.QuadPart != 0) { { - texture2d_t::pointer src_p {}; - status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p); - texture2d_t src{src_p}; + texture2d_t src {}; + status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src); if (FAILED(status)) { BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']'; @@ -279,10 +278,7 @@ int display_ram_t::init() { t.Format = format; t.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - dxgi::texture2d_t::pointer tex_p {}; - auto status = device->CreateTexture2D(&t, nullptr, &tex_p); - - texture.reset(tex_p); + auto status = device->CreateTexture2D(&t, nullptr, &texture); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']'; diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp index c8744b18..00e0c69d 100644 --- a/sunshine/platform/windows/display_vram.cpp +++ b/sunshine/platform/windows/display_vram.cpp @@ -106,14 +106,14 @@ blend_t make_blend(device_t::pointer device, bool enable) { rt.DestBlendAlpha = D3D11_BLEND_ZERO; } - blend_t::pointer blend_p; - auto status = device->CreateBlendState(&bdesc, &blend_p); + blend_t blend; + auto status = device->CreateBlendState(&bdesc, &blend); if(status) { BOOST_LOG(error) << "Failed to create blend state: [0x"sv << util::hex(status).to_string_view() << ']'; return nullptr; } - return blend_t { blend_p }; + return blend; } blob_t convert_UV_vs_hlsl; @@ -280,13 +280,11 @@ public: }; desc.Texture2D.MipLevels = 1; - shader_res_t::pointer cursor_res_p; - auto status = device->CreateShaderResourceView(texture, &desc, &cursor_res_p); + auto status = device->CreateShaderResourceView(texture, &desc, &img.input_res); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create cursor shader resource view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - img.input_res.reset(cursor_res_p); return 0; } @@ -303,26 +301,19 @@ public: }; desc.Texture2D.MipLevels = 1; - shader_res_t::pointer input_res_p; - auto status = device->CreateShaderResourceView(img.texture.get(), &desc, &input_res_p); + auto status = device->CreateShaderResourceView(img.texture.get(), &desc, &img.input_res); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create input shader resource view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - img.input_res.reset(input_res_p); } auto input_res_p = img.input_res.get(); - auto cursor_res_p = this->img.input_res.get(); - - auto scene_rt_p = scene_rt.get(); - auto Y_rt_p = nv12_Y_rt.get(); - auto UV_rt_p = nv12_UV_rt.get(); if(cursor_visible) { _init_view_port(img.width, img.height); - device_ctx_p->OMSetRenderTargets(1, &scene_rt_p, nullptr); + device_ctx_p->OMSetRenderTargets(1, &scene_rt, nullptr); device_ctx_p->VSSetShader(scene_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(scene_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); @@ -331,7 +322,7 @@ public: device_ctx_p->OMSetBlendState(blend_enable.get(), nullptr, 0xFFFFFFFFu); device_ctx_p->RSSetViewports(1, &cursor_view); - device_ctx_p->PSSetShaderResources(0, 1, &cursor_res_p); + device_ctx_p->PSSetShaderResources(0, 1, &this->img.input_res); device_ctx_p->Draw(3, 0); device_ctx_p->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); @@ -339,14 +330,14 @@ public: } _init_view_port(out_width, out_height); - device_ctx_p->OMSetRenderTargets(1, &Y_rt_p, nullptr); + device_ctx_p->OMSetRenderTargets(1, &nv12_Y_rt, nullptr); device_ctx_p->VSSetShader(scene_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(convert_Y_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); device_ctx_p->Draw(3, 0); _init_view_port(out_width / 2, out_height / 2); - device_ctx_p->OMSetRenderTargets(1, &UV_rt_p, nullptr); + device_ctx_p->OMSetRenderTargets(1, &nv12_UV_rt, nullptr); device_ctx_p->VSSetShader(convert_UV_vs.get(), nullptr, 0); device_ctx_p->PSSetShader(convert_UV_ps.get(), nullptr, 0); device_ctx_p->PSSetShaderResources(0, 1, &input_res_p); @@ -380,8 +371,7 @@ public: return; } - auto buf_p = color_matrix.get(); - device_ctx_p->PSSetConstantBuffers(0, 1, &buf_p); + device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix); this->color_matrix = std::move(color_matrix); } @@ -406,46 +396,43 @@ public: this->out_width = out_width; this->out_height = out_height; - vs_t::pointer vs_p; - status = device_p->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &vs_p); + status = device_p->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs); if(status) { - BOOST_LOG(error) << "Failed to create mergeY vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - scene_vs.reset(vs_p); - ps_t::pointer ps_p; - status = device_p->CreatePixelShader(convert_Y_ps_hlsl->GetBufferPointer(), convert_Y_ps_hlsl->GetBufferSize(), nullptr, &ps_p); + status = device_p->CreatePixelShader(convert_Y_ps_hlsl->GetBufferPointer(), convert_Y_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_ps); if(status) { - BOOST_LOG(error) << "Failed to create mergeY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - convert_Y_ps.reset(ps_p); - status = device_p->CreatePixelShader(convert_UV_ps_hlsl->GetBufferPointer(), convert_UV_ps_hlsl->GetBufferSize(), nullptr, &ps_p); + status = device_p->CreatePixelShader(convert_UV_ps_hlsl->GetBufferPointer(), convert_UV_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_ps); if(status) { - BOOST_LOG(error) << "Failed to create mergeUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - convert_UV_ps.reset(ps_p); - status = device_p->CreateVertexShader(convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), nullptr, &vs_p); + status = device_p->CreateVertexShader(convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), nullptr, &convert_UV_vs); if(status) { - BOOST_LOG(error) << "Failed to create mergeUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create convertUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - convert_UV_vs.reset(vs_p); - status = device_p->CreatePixelShader(scene_ps_hlsl->GetBufferPointer(), scene_ps_hlsl->GetBufferSize(), nullptr, &ps_p); + status = device_p->CreatePixelShader(scene_ps_hlsl->GetBufferPointer(), scene_ps_hlsl->GetBufferSize(), nullptr, &scene_ps); if(status) { BOOST_LOG(error) << "Failed to create scene pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - scene_ps.reset(ps_p); blend_disable = make_blend(device_p, false); blend_enable = make_blend(device_p, true); + if(!blend_disable || !blend_enable) { + return -1; + } + if(_init_rt(scene_sr, scene_rt, in_width, in_height, DXGI_FORMAT_B8G8R8A8_UNORM)) { return -1; } @@ -467,12 +454,10 @@ public: "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }; - input_layout_t::pointer input_layout_p; status = device_p->CreateInputLayout( &layout_desc, 1, convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), - &input_layout_p); - input_layout.reset(input_layout_p); + &input_layout); D3D11_TEXTURE2D_DESC t {}; t.Width = out_width; @@ -484,18 +469,16 @@ public: t.Format = pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010; t.BindFlags = D3D11_BIND_RENDER_TARGET; - dxgi::texture2d_t::pointer tex_p {}; - status = device_p->CreateTexture2D(&t, nullptr, &tex_p); + status = device_p->CreateTexture2D(&t, nullptr, &img.texture); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create render target texture [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - img.texture.reset(tex_p); img.display = std::move(display); img.width = out_width; img.height = out_height; - img.data = (std::uint8_t*)tex_p; + img.data = (std::uint8_t*)img.texture.get(); img.row_pitch = out_width; img.pixel_pitch = 1; @@ -504,21 +487,18 @@ public: D3D11_RTV_DIMENSION_TEXTURE2D }; - render_target_t::pointer nv12_rt_p; - status = device_p->CreateRenderTargetView(img.texture.get(), &nv12_rt_desc, &nv12_rt_p); + status = device_p->CreateRenderTargetView(img.texture.get(), &nv12_rt_desc, &nv12_Y_rt); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - nv12_Y_rt.reset(nv12_rt_p); nv12_rt_desc.Format = DXGI_FORMAT_R8G8_UNORM; - status = device_p->CreateRenderTargetView(img.texture.get(), &nv12_rt_desc, &nv12_rt_p); + status = device_p->CreateRenderTargetView(img.texture.get(), &nv12_rt_desc, &nv12_UV_rt); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - nv12_UV_rt.reset(nv12_rt_p); D3D11_SAMPLER_DESC sampler_desc {}; sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; @@ -529,21 +509,16 @@ public: sampler_desc.MinLOD = 0; sampler_desc.MaxLOD = D3D11_FLOAT32_MAX; - sampler_state_t::pointer sampler_state_p; - status = device_p->CreateSamplerState(&sampler_desc, &sampler_state_p); + status = device_p->CreateSamplerState(&sampler_desc, &sampler_linear); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - sampler_linear.reset(sampler_state_p); - auto sampler_linear_p = sampler_linear.get(); - auto color_matrix_buf_p = color_matrix.get(); - auto info_buf_p = info_scene.get(); device_ctx_p->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); - device_ctx_p->PSSetSamplers(0, 1, &sampler_linear_p); - device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix_buf_p); - device_ctx_p->VSSetConstantBuffers(0, 1, &info_buf_p); + device_ctx_p->PSSetSamplers(0, 1, &sampler_linear); + device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix); + device_ctx_p->VSSetConstantBuffers(0, 1, &info_scene); device_ctx_p->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); device_ctx_p->IASetInputLayout(input_layout.get()); @@ -589,13 +564,12 @@ private: auto device = (device_t::pointer)data; - texture2d_t::pointer tex_p; - auto status = device->CreateTexture2D(&desc, nullptr, &tex_p); + texture2d_t tex; + auto status = device->CreateTexture2D(&desc, nullptr, &tex); if(status) { BOOST_LOG(error) << "Failed to create render target texture for luma [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - texture2d_t tex { tex_p }; D3D11_SHADER_RESOURCE_VIEW_DESC shader_resource_desc { @@ -604,26 +578,22 @@ private: }; shader_resource_desc.Texture2D.MipLevels = 1; - shader_res_t::pointer shader_res_p; - device->CreateShaderResourceView(tex_p, &shader_resource_desc, &shader_res_p); + device->CreateShaderResourceView(tex.get(), &shader_resource_desc, &shader_res); if(status) { BOOST_LOG(error) << "Failed to create render target texture for luma [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - shader_res.reset(shader_res_p); D3D11_RENDER_TARGET_VIEW_DESC render_target_desc { format, D3D11_RTV_DIMENSION_TEXTURE2D }; - render_target_t::pointer render_target_p; - device->CreateRenderTargetView(tex_p, &render_target_desc, &render_target_p); + device->CreateRenderTargetView(tex.get(), &render_target_desc, &render_target); if(status) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - render_target.reset(render_target_p); return 0; } @@ -721,16 +691,15 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec t.Format = DXGI_FORMAT_B8G8R8A8_UNORM; t.BindFlags = D3D11_BIND_SHADER_RESOURCE; - dxgi::texture2d_t::pointer tex_p {}; - auto status = device->CreateTexture2D(&t, &data, &tex_p); + texture2d_t texture; + auto status = device->CreateTexture2D(&t, &data, &texture); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create mouse texture [0x"sv << util::hex(status).to_string_view() << ']'; return capture_e::error; } - texture2d_t texture { tex_p }; for(auto *hwdevice : hwdevices) { - if(hwdevice->set_cursor_texture(tex_p, t.Width, t.Height)) { + if(hwdevice->set_cursor_texture(texture.get(), t.Width, t.Height)) { return capture_e::error; } } @@ -747,15 +716,14 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec } if(frame_update_flag) { - texture2d_t::pointer src_p {}; - status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p); + texture2d_t src; + status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src); if(FAILED(status)) { BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']'; return capture_e::error; } - texture2d_t src { src_p }; device_ctx->CopyResource(img->texture.get(), src.get()); } @@ -775,15 +743,13 @@ std::shared_ptr display_vram_t::alloc_img() { t.Format = format; t.BindFlags = D3D11_BIND_SHADER_RESOURCE; - dxgi::texture2d_t::pointer tex_p {}; - auto status = device->CreateTexture2D(&t, nullptr, &tex_p); + auto status = device->CreateTexture2D(&t, nullptr, &img->texture); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create img buf texture [0x"sv << util::hex(status).to_string_view() << ']'; return nullptr; } - img->texture.reset(tex_p); - img->data = (std::uint8_t*)tex_p; + img->data = (std::uint8_t*)img->texture.get(); img->row_pitch = 0; img->pixel_pitch = 4; img->width = 0; @@ -813,15 +779,15 @@ int display_vram_t::dummy_img(platf::img_t *img_base) { t.Format = format; t.BindFlags = D3D11_BIND_SHADER_RESOURCE; - dxgi::texture2d_t::pointer tex_p {}; - auto status = device->CreateTexture2D(&t, &data, &tex_p); + dxgi::texture2d_t tex; + auto status = device->CreateTexture2D(&t, &data, &tex); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - img->texture.reset(tex_p); - img->data = (std::uint8_t*)tex_p; + img->texture = std::move(tex); + img->data = (std::uint8_t*)img->texture.get(); img->height = height; img->width = width; img->pixel_pitch = 4; diff --git a/sunshine/utility.h b/sunshine/utility.h index 0c03c51d..63be108c 100644 --- a/sunshine/utility.h +++ b/sunshine/utility.h @@ -76,37 +76,6 @@ struct __either { template using either_t = typename __either::type; -template -struct __false_v; - -template -struct __false_v>> { - static constexpr std::nullopt_t value = std::nullopt; -}; - -template -struct __false_v || instantiation_of_v || instantiation_of_v) - >> { - static constexpr std::nullptr_t value = nullptr; -}; - -template -struct __false_v>> { - static constexpr bool value = false; -}; - -template -static constexpr auto false_v = __false_v::value; - -template -using optional_t = either_t< - (std::is_same_v || - instantiation_of_v || - instantiation_of_v || - std::is_pointer_v), - T, std::optional>; - template struct overloaded : Ts... { using Ts::operator()...; }; template overloaded(Ts...) -> overloaded; @@ -362,35 +331,6 @@ auto enm(T& val) -> std::underlying_type_t& { return *reinterpret_cast*>(&val); } -template -struct Function { - typedef ReturnType (*type)(Args...); -}; - -template::type function> -struct Destroy { - typedef T pointer; - - void operator()(pointer p) { - function(p); - } -}; - -template::type function> -using safe_ptr = std::unique_ptr>; - -// You cannot specialize an alias -template::type function> -using safe_ptr_v2 = std::unique_ptr>; - -template -void c_free(T *p) { - free(p); -} - -template -using c_ptr = safe_ptr>; - inline std::int64_t from_chars(const char *begin, const char *end) { std::int64_t res {}; std::int64_t mul = 1; @@ -436,6 +376,163 @@ public: } }; +// Compared to std::unique_ptr, it adds the ability to get the address of the pointer itself +template > +class uniq_ptr { +public: + using element_type = T; + using pointer = element_type*; + using deleter_type = D; + + constexpr uniq_ptr() noexcept : _p { nullptr } {} + constexpr uniq_ptr(std::nullptr_t) noexcept : _p { nullptr } {} + + uniq_ptr(const uniq_ptr &other) noexcept = delete; + uniq_ptr &operator=(const uniq_ptr &other) noexcept = delete; + + template + uniq_ptr(V *p) noexcept : _p { p } { + static_assert(std::is_same_v || std::is_same_v || std::is_base_of_v, "element_type must be base class of V"); + } + + template + uniq_ptr(std::unique_ptr &&uniq) noexcept : _p { uniq.release() } { + static_assert(std::is_same_v || std::is_same_v || std::is_base_of_v, "element_type must be base class of V"); + } + + template + uniq_ptr(uniq_ptr &&other) noexcept : _p { other.release() } { + static_assert(std::is_same_v || std::is_same_v || std::is_base_of_v, "element_type must be base class of V"); + } + + template + uniq_ptr &operator=(uniq_ptr &&other) noexcept { + static_assert(std::is_same_v || std::is_same_v || std::is_base_of_v, "element_type must be base class of V"); + reset(other.release()); + + return *this; + } + + template + uniq_ptr &operator=(std::unique_ptr &&uniq) noexcept { + static_assert(std::is_same_v || std::is_same_v || std::is_base_of_v, "element_type must be base class of V"); + + reset(uniq.release()); + + return *this; + } + + ~uniq_ptr() { + reset(); + } + + void reset(pointer p = pointer()) { + if(_p) { + _deleter(_p); + } + + _p = p; + } + + pointer release() { + auto tmp = _p; + _p = nullptr; + return tmp; + } + + pointer get() { + return _p; + } + + const pointer get() const { + return _p; + } + + const std::add_lvalue_reference_t operator*() const { + return *_p; + } + std::add_lvalue_reference_t operator*() { + return *_p; + } + const pointer operator->() const { + return _p; + } + pointer operator->() { + return _p; + } + pointer *operator&() const { + return &_p; + } + + pointer *operator&() { + return &_p; + } + + deleter_type& get_deleter() { + return _deleter; + } + + const deleter_type& get_deleter() const { + return _deleter; + } + + explicit operator bool() const { + return _p != nullptr; + } +protected: + pointer _p; + deleter_type _deleter; +}; + +template +bool operator==(const uniq_ptr& x, const uniq_ptr& y) { + return x.get() == y.get(); +} + +template +bool operator!=(const uniq_ptr& x, const uniq_ptr& y) { + return x.get() != y.get(); +} + +template +bool operator==(const std::unique_ptr& x, const uniq_ptr& y) { + return x.get() == y.get(); +} + +template +bool operator!=(const std::unique_ptr& x, const uniq_ptr& y) { + return x.get() != y.get(); +} + +template +bool operator==(const uniq_ptr& x, const std::unique_ptr& y) { + return x.get() == y.get(); +} + +template +bool operator!=(const uniq_ptr& x, const std::unique_ptr& y) { + return x.get() != y.get(); +} + +template +bool operator==(const uniq_ptr& x, std::nullptr_t) { + return !(bool)x; +} + +template +bool operator!=(const uniq_ptr& x, std::nullptr_t) { + return (bool)x; +} + +template +bool operator==(std::nullptr_t, const uniq_ptr& y) { + return !(bool)y; +} + +template +bool operator!=(std::nullptr_t, const uniq_ptr& y) { + return (bool)y; +} template class wrap_ptr { @@ -510,6 +607,43 @@ private: pointer _p; }; +template +struct __false_v; + +template +struct __false_v>> { + static constexpr std::nullopt_t value = std::nullopt; +}; + +template +struct __false_v || + instantiation_of_v || + instantiation_of_v || + instantiation_of_v + ) + >> { + static constexpr std::nullptr_t value = nullptr; +}; + +template +struct __false_v>> { + static constexpr bool value = false; +}; + +template +static constexpr auto false_v = __false_v::value; + +template +using optional_t = either_t< + (std::is_same_v || + instantiation_of_v || + instantiation_of_v || + instantiation_of_v || + std::is_pointer_v), + T, std::optional>; + template class buffer_t { public: @@ -569,6 +703,35 @@ T either(std::optional &&l, T &&r) { return std::forward(r); } +template +struct Function { + typedef ReturnType (*type)(Args...); +}; + +template::type function> +struct Destroy { + typedef T pointer; + + void operator()(pointer p) { + function(p); + } +}; + +template::type function> +using safe_ptr = uniq_ptr>; + +// You cannot specialize an alias +template::type function> +using safe_ptr_v2 = uniq_ptr>; + +template +void c_free(T *p) { + free(p); +} + +template +using c_ptr = safe_ptr>; + namespace endian { template struct endianness { From 6f428eb316310501dcd5f5e08500a2b10b968eef Mon Sep 17 00:00:00 2001 From: loki Date: Sun, 9 May 2021 11:56:53 +0200 Subject: [PATCH 16/18] Ensure no input remains in the task_pool before resetting --- sunshine/input.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sunshine/input.cpp b/sunshine/input.cpp index 402d4ce3..9a41dd03 100644 --- a/sunshine/input.cpp +++ b/sunshine/input.cpp @@ -402,11 +402,18 @@ void passthrough(std::shared_ptr &input, std::vector &&in task_pool.push(passthrough_helper, input, util::cmove(input_data)); } -void reset(){ - for(auto& kp : key_press){ +void reset() { + if(task_id) { + task_pool.cancel(task_id); + } + + // Ensure input is synchronous + task_pool.push([]() { + for(auto& kp : key_press) { platf::keyboard(platf_input, kp.first & 0x00FF, true); key_press[kp.first] = false; } + }); } void init() { From ade2ef3a15f4b2a3ad9e898203cfb8e238845840 Mon Sep 17 00:00:00 2001 From: loki Date: Sun, 9 May 2021 16:19:05 +0200 Subject: [PATCH 17/18] Fix profile values for amfvce --- sunshine/video.cpp | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/sunshine/video.cpp b/sunshine/video.cpp index ad298922..c6befe8d 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -58,20 +58,6 @@ enum class profile_hevc_e : int { }; } -namespace amd { - -enum class profile_h264_e : int { - main, - high, - constrained_baseline, - constrained_high, -}; - -enum class profile_hevc_e : int { - main, -}; -} - using ctx_t = util::safe_ptr; using frame_t = util::safe_ptr; using buffer_t = util::safe_ptr; @@ -303,7 +289,7 @@ static encoder_t nvenc { static encoder_t amdvce { "amdvce"sv, - { (int)amd::profile_h264_e::high, (int)amd::profile_hevc_e::main }, + { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN }, AV_HWDEVICE_TYPE_D3D11VA, AV_PIX_FMT_D3D11, AV_PIX_FMT_NV12, AV_PIX_FMT_P010, From 377b0868827e1a4120704bc1ce82a96efe8fdf71 Mon Sep 17 00:00:00 2001 From: loki Date: Sun, 9 May 2021 16:37:40 +0200 Subject: [PATCH 18/18] Fix amd_rc config options --- assets/sunshine.conf | 12 +++++------- sunshine/config.cpp | 16 ++++++---------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/assets/sunshine.conf b/assets/sunshine.conf index 356acd79..12377eda 100644 --- a/assets/sunshine.conf +++ b/assets/sunshine.conf @@ -183,13 +183,11 @@ # amd_preset = balanced # ####### rate control ##### -# auto -- let ffmpeg decide rate control -# constqp -- constant QP mode -# vbr -- variable bitrate -# cbr -- constant bitrate -# cbr_hq -- cbr high quality -# cbr_ld_hq -- cbr low delay high quality -# vbr_hq -- vbr high quality +# auto -- let ffmpeg decide rate control +# constqp -- constant QP mode +# vbr_latency -- Latency Constrained Variable Bitrate +# vbr_peak -- Peak Contrained Variable Bitrate +# cbr -- constant bitrate ########################## # amd_rc = auto diff --git a/sunshine/config.cpp b/sunshine/config.cpp index 88cc4502..9408578c 100644 --- a/sunshine/config.cpp +++ b/sunshine/config.cpp @@ -94,12 +94,10 @@ enum quality_e : int { }; enum rc_e : int { - constqp = 0x0, /**< Constant QP mode */ - vbr = 0x1, /**< Variable bitrate mode */ - cbr = 0x2, /**< Constant bitrate mode */ - cbr_ld_hq = 0x8, /**< low-delay CBR, high quality */ - cbr_hq = 0x10, /**< CBR, high quality (slower) */ - vbr_hq = 0x20 /**< VBR, high quality (slower) */ + constqp, /**< Constant QP mode */ + vbr_latency, /**< Latency Constrained Variable Bitrate */ + vbr_peak, /**< Peak Contrained Variable Bitrate */ + cbr, /**< Constant bitrate mode */ }; enum coder_e : int { @@ -121,11 +119,9 @@ std::optional quality_from_view(const std::string_view &quality) { std::optional rc_from_view(const std::string_view &rc) { #define _CONVERT_(x) if(rc == #x##sv) return x _CONVERT_(constqp); - _CONVERT_(vbr); + _CONVERT_(vbr_latency); + _CONVERT_(vbr_peak); _CONVERT_(cbr); - _CONVERT_(cbr_hq); - _CONVERT_(vbr_hq); - _CONVERT_(cbr_ld_hq); #undef _CONVERT_ return std::nullopt; }