fix(win/video): don't offload chroma subsampling math to texture sampler when downscaling (#3014)

* Don't use sampler math for chroma if downscaling

* Correct portrait rotation offsets
This commit is contained in:
ns6089 2024-09-03 03:02:05 +03:00 committed by GitHub
parent 9d7e90ec2e
commit 7ce8547d6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 177 additions and 93 deletions

View File

@ -107,6 +107,10 @@ namespace platf::dxgi {
blob_t convert_yuv420_packed_uv_type0_ps_linear_hlsl;
blob_t convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl;
blob_t convert_yuv420_packed_uv_type0_vs_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_linear_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl;
blob_t convert_yuv420_packed_uv_type0s_vs_hlsl;
blob_t convert_yuv420_planar_y_ps_hlsl;
blob_t convert_yuv420_planar_y_ps_linear_hlsl;
blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl;
@ -488,6 +492,110 @@ namespace platf::dxgi {
frame_texture->AddRef();
output_texture.reset(frame_texture);
HRESULT status = S_OK;
#define create_vertex_shader_helper(x, y) \
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
return -1; \
}
#define create_pixel_shader_helper(x, y) \
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
return -1; \
}
const bool downscaling = display->width > width || display->height > height;
switch (format) {
case DXGI_FORMAT_NV12:
// Semi-planar 8-bit YUV 4:2:0
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
if (downscaling) {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
}
else {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
break;
case DXGI_FORMAT_P010:
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
if (downscaling) {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
}
}
else {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
}
break;
case DXGI_FORMAT_R16_UINT:
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;
case DXGI_FORMAT_AYUV:
// Packed 8-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
break;
case DXGI_FORMAT_Y410:
// Packed 10-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;
default:
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
return -1;
}
#undef create_vertex_shader_helper
#undef create_pixel_shader_helper
auto out_width = width;
auto out_height = height;
@ -676,83 +784,6 @@ namespace platf::dxgi {
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
}
#define create_vertex_shader_helper(x, y) \
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
return -1; \
}
#define create_pixel_shader_helper(x, y) \
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
return -1; \
}
switch (format) {
case DXGI_FORMAT_NV12:
// Semi-planar 8-bit YUV 4:2:0
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
break;
case DXGI_FORMAT_P010:
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
break;
case DXGI_FORMAT_R16_UINT:
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;
case DXGI_FORMAT_AYUV:
// Packed 8-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
break;
case DXGI_FORMAT_Y410:
// Packed 10-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;
default:
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
return -1;
}
#undef create_vertex_shader_helper
#undef create_pixel_shader_helper
auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
if (!default_color_vectors) {
BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
@ -1923,6 +1954,10 @@ namespace platf::dxgi {
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer);
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer);
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer);

View File

@ -11,5 +11,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {
vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset.x, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps);
}

View File

@ -0,0 +1,5 @@
#include "include/convert_base.hlsl"
#define LEFT_SUBSAMPLING_SCALE
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"

View File

@ -0,0 +1,5 @@
#include "include/convert_linear_base.hlsl"
#define LEFT_SUBSAMPLING_SCALE
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"

View File

@ -0,0 +1,5 @@
#include "include/convert_perceptual_quantizer_base.hlsl"
#define LEFT_SUBSAMPLING_SCALE
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"

View File

@ -0,0 +1,15 @@
cbuffer subsample_offset_cbuffer : register(b0) {
float2 subsample_offset;
};
cbuffer rotate_texture_steps_cbuffer : register(b1) {
int rotate_texture_steps;
};
#define LEFT_SUBSAMPLING_SCALE
#include "include/base_vs.hlsl"
vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps);
}

View File

@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {
vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}

View File

@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {
vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}

View File

@ -15,7 +15,7 @@ cbuffer color_matrix_cbuffer : register(b3) {
vertex_t main_vs(uint vertex_id : SV_VertexID)
{
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps);
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, float2(0, 0), rotate_texture_steps);
output.viewport = vertex_id / 3;

View File

@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b2) {
vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}

View File

@ -1,12 +1,6 @@
#include "include/base_vs_types.hlsl"
#if defined(LEFT_SUBSAMPLING)
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float subsample_offset, int rotate_texture_steps)
#elif defined(TOPLEFT_SUBSAMPLING)
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 subsample_offset, int rotate_texture_steps)
#else
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_steps)
#endif
{
vertex_t output;
float2 tex_coord;
@ -30,11 +24,24 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_
sin(rotation_radians), cos(rotation_radians) };
float2 rotation_center = { 0.5, 0.5 };
tex_coord = round(rotation_center + mul(rotation_matrix, tex_coord - rotation_center));
if (rotate_texture_steps % 2) {
subsample_offset.xy = subsample_offset.yx;
}
}
#if defined(LEFT_SUBSAMPLING)
output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset, tex_coord.y);
#elif defined (TOPLEFT_SUBSAMPLING)
output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y);
#elif defined(LEFT_SUBSAMPLING_SCALE)
float2 halfsample_offset = subsample_offset / 2;
float3 right_center_left = float3(tex_coord.x + halfsample_offset.x,
tex_coord.x - halfsample_offset.x,
tex_coord.x - 3 * halfsample_offset.x);
float2 top_bottom = float2(tex_coord.y - halfsample_offset.y,
tex_coord.y + halfsample_offset.y);
output.tex_right_center_left_top = float4(right_center_left, top_bottom.x);
output.tex_right_center_left_bottom = float4(right_center_left, top_bottom.y);
#elif defined(TOPLEFT_SUBSAMPLING)
output.tex_right_left_top = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y - subsample_offset.y);
output.tex_right_left_bottom = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y);
#else

View File

@ -3,9 +3,12 @@ struct vertex_t
float4 viewpoint_pos : SV_Position;
#if defined(LEFT_SUBSAMPLING)
float3 tex_right_left_center : TEXCOORD;
#elif defined (TOPLEFT_SUBSAMPLING)
float3 tex_right_left_top : TEXCOORD;
float3 tex_right_left_bottom : TEXCOORD;
#elif defined(LEFT_SUBSAMPLING_SCALE)
float4 tex_right_center_left_top : TEXCOORD0;
float4 tex_right_center_left_bottom : TEXCOORD1;
#elif defined(TOPLEFT_SUBSAMPLING)
float3 tex_right_left_top : TEXCOORD0;
float3 tex_right_left_bottom : TEXCOORD1;
#else
float2 tex_coord : TEXCOORD;
#endif

View File

@ -17,6 +17,15 @@ float2 main_ps(vertex_t input) : SV_Target
float3 rgb_left = image.Sample(def_sampler, input.tex_right_left_center.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, input.tex_right_left_center.yz).rgb;
float3 rgb = CONVERT_FUNCTION((rgb_left + rgb_right) * 0.5);
#elif defined(LEFT_SUBSAMPLING_SCALE)
float3 rgb = image.Sample(def_sampler, input.tex_right_center_left_top.yw).rgb; // top-center
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.yw).rgb; // bottom-center
rgb *= 2;
rgb += image.Sample(def_sampler, input.tex_right_center_left_top.xw).rgb; // top-right
rgb += image.Sample(def_sampler, input.tex_right_center_left_top.zw).rgb; // top-left
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.xw).rgb; // bottom-right
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.zw).rgb; // bottom-left
rgb = CONVERT_FUNCTION(rgb * (1./8));
#elif defined(TOPLEFT_SUBSAMPLING)
float3 rgb_top_left = image.Sample(def_sampler, input.tex_right_left_top.xz).rgb;
float3 rgb_top_right = image.Sample(def_sampler, input.tex_right_left_top.yz).rgb;