From 6fe379d978c79b425ce317a738ccd015b88ecb50 Mon Sep 17 00:00:00 2001 From: aliaspider Date: Fri, 9 Feb 2018 14:57:07 +0100 Subject: [PATCH] (D3D12) add a compute shader for mipmap generation. --- gfx/common/d3d12_common.c | 212 +++++++++++++++--- gfx/common/d3d12_common.h | 21 +- gfx/drivers/d3d12.c | 27 ++- gfx/drivers/d3d_shaders/bokeh_sm4.hlsl.h | 2 +- gfx/drivers/d3d_shaders/mimpapgen_sm5.h | 44 ++++ gfx/drivers/d3d_shaders/opaque_sm5.hlsl.h | 2 +- .../d3d_shaders/ribbon_simple_sm4.hlsl.h | 2 +- gfx/drivers/d3d_shaders/ribbon_sm4.hlsl.h | 2 +- .../d3d_shaders/simple_snow_sm4.hlsl.h | 2 +- gfx/drivers/d3d_shaders/snow_sm4.hlsl.h | 2 +- gfx/drivers/d3d_shaders/snowflake_sm4.hlsl.h | 2 +- gfx/drivers/d3d_shaders/sprite_sm4.hlsl.h | 2 +- menu/drivers_display/menu_display_d3d12.c | 11 +- 13 files changed, 273 insertions(+), 58 deletions(-) create mode 100644 gfx/drivers/d3d_shaders/mimpapgen_sm5.h diff --git a/gfx/common/d3d12_common.c b/gfx/common/d3d12_common.c index 4fd20d3e49..9a39dcc24d 100644 --- a/gfx/common/d3d12_common.c +++ b/gfx/common/d3d12_common.c @@ -329,27 +329,30 @@ bool d3d12_create_root_signature( bool d3d12_init_descriptors(d3d12_video_t* d3d12) { D3D12_ROOT_SIGNATURE_DESC desc; - D3D12_DESCRIPTOR_RANGE srv_tbl[] = { { D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1 } }; - D3D12_DESCRIPTOR_RANGE sampler_tbl[] = { { D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1 } }; - D3D12_ROOT_PARAMETER rootParameters[ROOT_ID_MAX]; + D3D12_DESCRIPTOR_RANGE srv_tbl[] = { { D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1 } }; + D3D12_DESCRIPTOR_RANGE uav_tbl[] = { { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1 } }; + D3D12_DESCRIPTOR_RANGE sampler_tbl[] = { { D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1 } }; + D3D12_STATIC_SAMPLER_DESC static_sampler = { D3D12_FILTER_MIN_MAG_MIP_POINT }; + D3D12_ROOT_PARAMETER root_params[ROOT_ID_MAX]; + D3D12_ROOT_PARAMETER cs_root_params[CS_ROOT_ID_MAX]; - rootParameters[ROOT_ID_TEXTURE_T].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - rootParameters[ROOT_ID_TEXTURE_T].DescriptorTable.NumDescriptorRanges = countof(srv_tbl); - rootParameters[ROOT_ID_TEXTURE_T].DescriptorTable.pDescriptorRanges = srv_tbl; - rootParameters[ROOT_ID_TEXTURE_T].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + root_params[ROOT_ID_TEXTURE_T].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_params[ROOT_ID_TEXTURE_T].DescriptorTable.NumDescriptorRanges = countof(srv_tbl); + root_params[ROOT_ID_TEXTURE_T].DescriptorTable.pDescriptorRanges = srv_tbl; + root_params[ROOT_ID_TEXTURE_T].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - rootParameters[ROOT_ID_SAMPLER_T].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - rootParameters[ROOT_ID_SAMPLER_T].DescriptorTable.NumDescriptorRanges = countof(sampler_tbl); - rootParameters[ROOT_ID_SAMPLER_T].DescriptorTable.pDescriptorRanges = sampler_tbl; - rootParameters[ROOT_ID_SAMPLER_T].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + root_params[ROOT_ID_SAMPLER_T].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_params[ROOT_ID_SAMPLER_T].DescriptorTable.NumDescriptorRanges = countof(sampler_tbl); + root_params[ROOT_ID_SAMPLER_T].DescriptorTable.pDescriptorRanges = sampler_tbl; + root_params[ROOT_ID_SAMPLER_T].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - rootParameters[ROOT_ID_UBO].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; - rootParameters[ROOT_ID_UBO].Descriptor.RegisterSpace = 0; - rootParameters[ROOT_ID_UBO].Descriptor.ShaderRegister = 0; - rootParameters[ROOT_ID_UBO].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_params[ROOT_ID_UBO].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + root_params[ROOT_ID_UBO].Descriptor.RegisterSpace = 0; + root_params[ROOT_ID_UBO].Descriptor.ShaderRegister = 0; + root_params[ROOT_ID_UBO].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - desc.NumParameters = countof(rootParameters); - desc.pParameters = rootParameters; + desc.NumParameters = countof(root_params); + desc.pParameters = root_params; desc.NumStaticSamplers = 0; desc.pStaticSamplers = NULL; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; @@ -360,13 +363,51 @@ bool d3d12_init_descriptors(d3d12_video_t* d3d12) sampler_tbl[0].NumDescriptors = SLANG_NUM_BINDINGS; d3d12_create_root_signature(d3d12->device, &desc, &d3d12->desc.sl_rootSignature); + cs_root_params[CS_ROOT_ID_TEXTURE_T].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + cs_root_params[CS_ROOT_ID_TEXTURE_T].DescriptorTable.NumDescriptorRanges = countof(srv_tbl); + cs_root_params[CS_ROOT_ID_TEXTURE_T].DescriptorTable.pDescriptorRanges = srv_tbl; + cs_root_params[CS_ROOT_ID_TEXTURE_T].ShaderVisibility = 0; + + cs_root_params[CS_ROOT_ID_UAV_T].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + cs_root_params[CS_ROOT_ID_UAV_T].DescriptorTable.NumDescriptorRanges = countof(uav_tbl); + cs_root_params[CS_ROOT_ID_UAV_T].DescriptorTable.pDescriptorRanges = uav_tbl; + cs_root_params[CS_ROOT_ID_UAV_T].ShaderVisibility = 0; + + cs_root_params[CS_ROOT_ID_CONSTANTS].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + cs_root_params[CS_ROOT_ID_CONSTANTS].Constants.Num32BitValues = 3; + cs_root_params[CS_ROOT_ID_CONSTANTS].Constants.RegisterSpace = 0; + cs_root_params[CS_ROOT_ID_CONSTANTS].Constants.ShaderRegister = 0; + cs_root_params[CS_ROOT_ID_CONSTANTS].ShaderVisibility = 0; + + static_sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + static_sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + static_sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; +#if 0 + static_sampler.MaxAnisotropy = 1; + static_sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + static_sampler.MinLOD = -D3D12_FLOAT32_MAX; + static_sampler.MaxLOD = D3D12_FLOAT32_MAX; +#endif + + desc.NumParameters = countof(cs_root_params); + desc.pParameters = cs_root_params; + desc.NumStaticSamplers = 1; + desc.pStaticSamplers = &static_sampler; + desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; + + d3d12_create_root_signature(d3d12->device, &desc, &d3d12->desc.cs_rootSignature); + d3d12->desc.rtv_heap.desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; d3d12->desc.rtv_heap.desc.NumDescriptors = countof(d3d12->chain.renderTargets) + GFX_MAX_SHADERS; d3d12->desc.rtv_heap.desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; d3d12_init_descriptor_heap(d3d12->device, &d3d12->desc.rtv_heap); d3d12->desc.srv_heap.desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - d3d12->desc.srv_heap.desc.NumDescriptors = 256; + d3d12->desc.srv_heap.desc.NumDescriptors = 1024; d3d12->desc.srv_heap.desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; d3d12_init_descriptor_heap(d3d12->device, &d3d12->desc.srv_heap); @@ -518,29 +559,62 @@ d3d12_create_buffer(D3D12Device device, UINT size_in_bytes, D3D12Resource* buffe void d3d12_release_texture(d3d12_texture_t* texture) { - if (texture->srv_heap) - d3d12_descriptor_heap_slot_free(texture->srv_heap, texture->cpu_descriptor); + if (!texture->handle) + return; + + if (texture->srv_heap && texture->desc.MipLevels <= countof(texture->cpu_descriptor)) + { + int i; + for (i = 0; i < texture->desc.MipLevels; i++) + { + d3d12_descriptor_heap_slot_free(texture->srv_heap, texture->cpu_descriptor[i]); + texture->cpu_descriptor[i].ptr = 0; + } + } Release(texture->handle); Release(texture->upload_buffer); } void d3d12_init_texture(D3D12Device device, d3d12_texture_t* texture) { - bool is_render_target = texture->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - D3D12_FORMAT_SUPPORT1 format_support = - D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE; - d3d12_release_texture(texture); + if (texture->desc.MipLevels > 1) { + unsigned width = texture->desc.Width >> 5; + unsigned height = texture->desc.Height >> 5; + texture->desc.MipLevels = 1; + while (width && height) + { + width >>= 1; + height >>= 1; + texture->desc.MipLevels++; + } + } + else + texture->desc.MipLevels = 1; + + { + D3D12_FEATURE_DATA_FORMAT_SUPPORT format_support = { + texture->desc.Format, D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE + }; D3D12_HEAP_PROPERTIES heap_props = { D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 1, 1 }; + if (texture->desc.MipLevels > 1) + { + texture->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + format_support.Support1 |= D3D12_FORMAT_SUPPORT1_MIP; + format_support.Support2 |= D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE; + } + + if (texture->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) + format_support.Support1 |= D3D12_FORMAT_SUPPORT1_RENDER_TARGET; + texture->desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; texture->desc.DepthOrArraySize = 1; - texture->desc.MipLevels = 1; texture->desc.SampleDesc.Count = 1; - texture->desc.Format = d3d12_get_closest_match(device, texture->desc.Format, format_support); + texture->desc.Format = d3d12_get_closest_match(device, &format_support); D3D12CreateCommittedResource( device, &heap_props, D3D12_HEAP_FLAG_NONE, &texture->desc, @@ -549,19 +623,35 @@ void d3d12_init_texture(D3D12Device device, d3d12_texture_t* texture) if (texture->srv_heap) { + int i; D3D12_SHADER_RESOURCE_VIEW_DESC view_desc = { texture->desc.Format }; view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; view_desc.Texture2D.MipLevels = texture->desc.MipLevels; - texture->cpu_descriptor = d3d12_descriptor_heap_slot_alloc(texture->srv_heap); - D3D12CreateShaderResourceView(device, texture->handle, &view_desc, texture->cpu_descriptor); - texture->gpu_descriptor.ptr = - texture->cpu_descriptor.ptr - texture->srv_heap->cpu.ptr + texture->srv_heap->gpu.ptr; + texture->cpu_descriptor[0] = d3d12_descriptor_heap_slot_alloc(texture->srv_heap); + D3D12CreateShaderResourceView( + device, texture->handle, &view_desc, texture->cpu_descriptor[0]); + texture->gpu_descriptor[0].ptr = texture->cpu_descriptor[0].ptr - texture->srv_heap->cpu.ptr + + texture->srv_heap->gpu.ptr; + + for (i = 1; i < texture->desc.MipLevels; i++) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC desc = { texture->desc.Format }; + + desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + desc.Texture2D.MipSlice = i; + + texture->cpu_descriptor[i] = d3d12_descriptor_heap_slot_alloc(texture->srv_heap); + D3D12CreateUnorderedAccessView( + device, texture->handle, NULL, &desc, texture->cpu_descriptor[i]); + texture->gpu_descriptor[i].ptr = texture->cpu_descriptor[i].ptr - + texture->srv_heap->cpu.ptr + texture->srv_heap->gpu.ptr; + } } - if (is_render_target) + if (texture->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) { } else @@ -631,6 +721,56 @@ void d3d12_upload_texture(D3D12GraphicsCommandList cmd, d3d12_texture_t* texture cmd, texture->handle, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + if (texture->desc.MipLevels > 1) + { + unsigned i; + d3d12_video_t* d3d12 = (d3d12_video_t*)video_driver_get_ptr(false); + + D3D12SetComputeRootSignature(cmd, d3d12->desc.cs_rootSignature); + D3D12SetPipelineState(cmd, d3d12->mipmapgen_pipe); + D3D12SetComputeRootDescriptorTable(cmd, CS_ROOT_ID_TEXTURE_T, texture->gpu_descriptor[0]); + + for (i = 1; i < texture->desc.MipLevels; i++) + { + unsigned width = texture->desc.Width >> i; + unsigned height = texture->desc.Height >> i; + struct + { + uint32_t src_level; + float texel_size[2]; + } cbuffer = { i - 1, { 1.0f / width, 1.0f / height } }; + + { + D3D12_RESOURCE_BARRIER barrier = { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION }; + barrier.Transition.pResource = texture->handle; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.Subresource = i; + D3D12ResourceBarrier(cmd, 1, &barrier); + } + + D3D12SetComputeRootDescriptorTable(cmd, CS_ROOT_ID_UAV_T, texture->gpu_descriptor[i]); + D3D12SetComputeRoot32BitConstants( + cmd, CS_ROOT_ID_CONSTANTS, sizeof(cbuffer) / sizeof(uint32_t), &cbuffer, 0); + D3D12Dispatch(cmd, (width + 0x7) >> 3, (height + 0x7) >> 3, 1); + + { + D3D12_RESOURCE_BARRIER barrier = { D3D12_RESOURCE_BARRIER_TYPE_UAV }; + barrier.UAV.pResource = texture->handle; + D3D12ResourceBarrier(cmd, 1, &barrier); + } + + { + D3D12_RESOURCE_BARRIER barrier = { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION }; + barrier.Transition.pResource = texture->handle; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + barrier.Transition.Subresource = i; + D3D12ResourceBarrier(cmd, 1, &barrier); + } + } + } + texture->dirty = false; } @@ -658,11 +798,10 @@ void d3d12_create_fullscreen_quad_vbo( } } -DXGI_FORMAT d3d12_get_closest_match( - D3D12Device device, DXGI_FORMAT desired_format, D3D12_FORMAT_SUPPORT1 desired_format_support) +DXGI_FORMAT d3d12_get_closest_match(D3D12Device device, D3D12_FEATURE_DATA_FORMAT_SUPPORT* desired) { - DXGI_FORMAT default_list[] = { desired_format, DXGI_FORMAT_UNKNOWN }; - DXGI_FORMAT* format = dxgi_get_format_fallback_list(desired_format); + DXGI_FORMAT default_list[] = { desired->Format, DXGI_FORMAT_UNKNOWN }; + DXGI_FORMAT* format = dxgi_get_format_fallback_list(desired->Format); if (!format) format = default_list; @@ -672,7 +811,8 @@ DXGI_FORMAT d3d12_get_closest_match( D3D12_FEATURE_DATA_FORMAT_SUPPORT format_support = { *format }; if (SUCCEEDED(D3D12CheckFeatureSupport( device, D3D12_FEATURE_FORMAT_SUPPORT, &format_support, sizeof(format_support))) && - ((format_support.Support1 & desired_format_support) == desired_format_support)) + ((format_support.Support1 & desired->Support1) == desired->Support1) && + ((format_support.Support2 & desired->Support2) == desired->Support2)) break; format++; } diff --git a/gfx/common/d3d12_common.h b/gfx/common/d3d12_common.h index 0d72221f8c..bb8a0d613c 100644 --- a/gfx/common/d3d12_common.h +++ b/gfx/common/d3d12_common.h @@ -1302,8 +1302,9 @@ typedef struct D3D12Resource handle; D3D12Resource upload_buffer; D3D12_RESOURCE_DESC desc; - D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor; - D3D12_GPU_DESCRIPTOR_HANDLE gpu_descriptor; + /* the first view is srv, the rest are mip levels uavs */ + D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor[D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP - 5]; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_descriptor[D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP - 5]; D3D12_GPU_DESCRIPTOR_HANDLE sampler; D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; UINT num_rows; @@ -1354,6 +1355,7 @@ typedef struct struct { + D3D12RootSignature cs_rootSignature; /* descriptor layout */ D3D12RootSignature sl_rootSignature; /* descriptor layout */ D3D12RootSignature rootSignature; /* descriptor layout */ d3d12_descriptor_heap_t srv_heap; /* ShaderResouceView descritor heap */ @@ -1410,6 +1412,7 @@ typedef struct } sprites; D3D12PipelineState pipes[GFX_MAX_SHADERS]; + D3D12PipelineState mipmapgen_pipe; d3d12_uniform_t ubo_values; D3D12Resource ubo; D3D12_CONSTANT_BUFFER_VIEW_DESC ubo_view; @@ -1435,6 +1438,13 @@ typedef enum { ROOT_ID_MAX, } root_signature_parameter_index_t; +typedef enum { + CS_ROOT_ID_TEXTURE_T = 0, + CS_ROOT_ID_UAV_T, + CS_ROOT_ID_CONSTANTS, + CS_ROOT_ID_MAX, +} compute_root_index_t; + RETRO_BEGIN_DECLS extern D3D12_RENDER_TARGET_BLEND_DESC d3d12_blend_enable_desc; @@ -1475,8 +1485,7 @@ void d3d12_upload_texture(D3D12GraphicsCommandList cmd, d3d12_texture_t* texture void d3d12_create_fullscreen_quad_vbo( D3D12Device device, D3D12_VERTEX_BUFFER_VIEW* view, D3D12Resource* vbo); -DXGI_FORMAT d3d12_get_closest_match( - D3D12Device device, DXGI_FORMAT desired_format, D3D12_FORMAT_SUPPORT1 desired_format_support); +DXGI_FORMAT d3d12_get_closest_match(D3D12Device device, D3D12_FEATURE_DATA_FORMAT_SUPPORT* desired); #if !defined(__cplusplus) || defined(CINTERFACE) static INLINE void d3d12_resource_transition( @@ -1497,7 +1506,7 @@ static INLINE void d3d12_resource_transition( static INLINE void d3d12_set_texture(D3D12GraphicsCommandList cmd, const d3d12_texture_t* texture) { - D3D12SetGraphicsRootDescriptorTable(cmd, ROOT_ID_TEXTURE_T, texture->gpu_descriptor); + D3D12SetGraphicsRootDescriptorTable(cmd, ROOT_ID_TEXTURE_T, texture->gpu_descriptor[0]); } static INLINE void @@ -1509,7 +1518,7 @@ d3d12_set_sampler(D3D12GraphicsCommandList cmd, D3D12_GPU_DESCRIPTOR_HANDLE samp static INLINE void d3d12_set_texture_and_sampler(D3D12GraphicsCommandList cmd, const d3d12_texture_t* texture) { - D3D12SetGraphicsRootDescriptorTable(cmd, ROOT_ID_TEXTURE_T, texture->gpu_descriptor); + D3D12SetGraphicsRootDescriptorTable(cmd, ROOT_ID_TEXTURE_T, texture->gpu_descriptor[0]); D3D12SetGraphicsRootDescriptorTable(cmd, ROOT_ID_SAMPLER_T, texture->sampler); } diff --git a/gfx/drivers/d3d12.c b/gfx/drivers/d3d12.c index a59eef879c..c1f0aadd37 100644 --- a/gfx/drivers/d3d12.c +++ b/gfx/drivers/d3d12.c @@ -94,6 +94,7 @@ static bool d3d12_gfx_init_pipelines(d3d12_video_t* d3d12) D3DBlob vs_code = NULL; D3DBlob ps_code = NULL; D3DBlob gs_code = NULL; + D3DBlob cs_code = NULL; D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { 0 }; desc.BlendState.RenderTarget[0] = d3d12_blend_enable_desc; @@ -324,12 +325,29 @@ static bool d3d12_gfx_init_pipelines(d3d12_video_t* d3d12) ps_code = NULL; } + { + static const char shader[] = +#include "d3d_shaders/mimpapgen_sm5.h" + ; + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {d3d12->desc.cs_rootSignature}; + if (!d3d_compile(shader, sizeof(shader), NULL, "CSMain", "cs_5_0", &cs_code)) + goto error; + + desc.CS.pShaderBytecode = D3DGetBufferPointer(cs_code); + desc.CS.BytecodeLength = D3DGetBufferSize(cs_code); + if(!D3D12CreateComputePipelineState(d3d12->device, &desc, &d3d12->mipmapgen_pipe)) + + Release(cs_code); + cs_code = NULL; + } + return true; error: Release(vs_code); Release(ps_code); Release(gs_code); + Release(cs_code); return false; } @@ -340,7 +358,7 @@ static void d3d12_gfx_free(void* data) font_driver_free_osd(); - Release(d3d12->sprites.vbo); + Release(d3d12->sprites.vbo); Release(d3d12->menu_pipeline_vbo); Release(d3d12->frame.ubo); @@ -358,6 +376,7 @@ static void d3d12_gfx_free(void* data) Release(d3d12->desc.srv_heap.handle); Release(d3d12->desc.rtv_heap.handle); + Release(d3d12->desc.cs_rootSignature); Release(d3d12->desc.sl_rootSignature); Release(d3d12->desc.rootSignature); @@ -366,6 +385,7 @@ static void d3d12_gfx_free(void* data) for (i = 0; i < GFX_MAX_SHADERS; i++) Release(d3d12->pipes[i]); + Release(d3d12->mipmapgen_pipe); Release(d3d12->sprites.pipe_blend); Release(d3d12->sprites.pipe_noblend); Release(d3d12->sprites.pipe_font); @@ -810,16 +830,15 @@ static uintptr_t d3d12_gfx_load_texture( if (!texture) return 0; - /* todo : mipmapping */ switch (filter_type) { case TEXTURE_FILTER_MIPMAP_LINEAR: - /* fallthrough */ + texture->desc.MipLevels = UINT16_MAX; case TEXTURE_FILTER_LINEAR: texture->sampler = d3d12->samplers[RARCH_FILTER_LINEAR][RARCH_WRAP_EDGE]; break; case TEXTURE_FILTER_MIPMAP_NEAREST: - /* fallthrough */ + texture->desc.MipLevels = UINT16_MAX; case TEXTURE_FILTER_NEAREST: texture->sampler = d3d12->samplers[RARCH_FILTER_NEAREST][RARCH_WRAP_EDGE]; break; diff --git a/gfx/drivers/d3d_shaders/bokeh_sm4.hlsl.h b/gfx/drivers/d3d_shaders/bokeh_sm4.hlsl.h index b0a66b9b35..5b79f63040 100644 --- a/gfx/drivers/d3d_shaders/bokeh_sm4.hlsl.h +++ b/gfx/drivers/d3d_shaders/bokeh_sm4.hlsl.h @@ -1,5 +1,5 @@ -#define SRC(src) #src +#define SRC(...) #__VA_ARGS__ SRC( struct UBO { diff --git a/gfx/drivers/d3d_shaders/mimpapgen_sm5.h b/gfx/drivers/d3d_shaders/mimpapgen_sm5.h new file mode 100644 index 0000000000..937b9a2ab0 --- /dev/null +++ b/gfx/drivers/d3d_shaders/mimpapgen_sm5.h @@ -0,0 +1,44 @@ +/* RetroArch - A frontend for libretro. + * Copyright (C) 2014-2018 - Ali Bouhlel + * + * RetroArch is free software: you can redistribute it and/or modify it under the terms + * of the GNU General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with RetroArch. + * If not, see . + */ + +#define SRC(...) #__VA_ARGS__ +SRC( + + Texture2D t0; + RWTexture2D u0; + sampler s0; + + cbuffer CBr + { + uint src_level; + float2 texel_size; + } + + static float w[4]= {0.090845, 0.409155, 0.409155, 0.090845}; + [numthreads(8, 8, 1)] + void CSMain(uint3 DTid : SV_DispatchThreadID) + { + int i; + int j; + float4 c = 0.0f; + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + c += w[i] * w[j] * t0.SampleLevel(s0, texel_size * (DTid.xy + 0.5f * float2(i - 0.5f,j - 0.5f)), src_level); + + u0[DTid.xy] = c; + return; + + } +) diff --git a/gfx/drivers/d3d_shaders/opaque_sm5.hlsl.h b/gfx/drivers/d3d_shaders/opaque_sm5.hlsl.h index 8822347900..a8a01d0fb7 100644 --- a/gfx/drivers/d3d_shaders/opaque_sm5.hlsl.h +++ b/gfx/drivers/d3d_shaders/opaque_sm5.hlsl.h @@ -1,5 +1,5 @@ -#define SRC(src) #src +#define SRC(...) #__VA_ARGS__ SRC( struct UBO { diff --git a/gfx/drivers/d3d_shaders/ribbon_simple_sm4.hlsl.h b/gfx/drivers/d3d_shaders/ribbon_simple_sm4.hlsl.h index 22b1f11d36..fe5347f82e 100644 --- a/gfx/drivers/d3d_shaders/ribbon_simple_sm4.hlsl.h +++ b/gfx/drivers/d3d_shaders/ribbon_simple_sm4.hlsl.h @@ -1,5 +1,5 @@ -#define SRC(src) #src +#define SRC(...) #__VA_ARGS__ SRC( struct UBO diff --git a/gfx/drivers/d3d_shaders/ribbon_sm4.hlsl.h b/gfx/drivers/d3d_shaders/ribbon_sm4.hlsl.h index f50f725d2e..e0cbc223de 100644 --- a/gfx/drivers/d3d_shaders/ribbon_sm4.hlsl.h +++ b/gfx/drivers/d3d_shaders/ribbon_sm4.hlsl.h @@ -1,5 +1,5 @@ -#define SRC(src) #src +#define SRC(...) #__VA_ARGS__ SRC( struct PSInput diff --git a/gfx/drivers/d3d_shaders/simple_snow_sm4.hlsl.h b/gfx/drivers/d3d_shaders/simple_snow_sm4.hlsl.h index c26eaedf15..ba919266fd 100644 --- a/gfx/drivers/d3d_shaders/simple_snow_sm4.hlsl.h +++ b/gfx/drivers/d3d_shaders/simple_snow_sm4.hlsl.h @@ -1,5 +1,5 @@ -#define SRC(src) #src +#define SRC(...) #__VA_ARGS__ SRC( struct UBO { diff --git a/gfx/drivers/d3d_shaders/snow_sm4.hlsl.h b/gfx/drivers/d3d_shaders/snow_sm4.hlsl.h index 3b53bbf978..fd42e1425d 100644 --- a/gfx/drivers/d3d_shaders/snow_sm4.hlsl.h +++ b/gfx/drivers/d3d_shaders/snow_sm4.hlsl.h @@ -1,5 +1,5 @@ -#define SRC(src) #src +#define SRC(...) #__VA_ARGS__ SRC( struct UBO { diff --git a/gfx/drivers/d3d_shaders/snowflake_sm4.hlsl.h b/gfx/drivers/d3d_shaders/snowflake_sm4.hlsl.h index 899dfd642c..c02908e442 100644 --- a/gfx/drivers/d3d_shaders/snowflake_sm4.hlsl.h +++ b/gfx/drivers/d3d_shaders/snowflake_sm4.hlsl.h @@ -1,5 +1,5 @@ -#define SRC(src) #src +#define SRC(...) #__VA_ARGS__ SRC( struct UBO { diff --git a/gfx/drivers/d3d_shaders/sprite_sm4.hlsl.h b/gfx/drivers/d3d_shaders/sprite_sm4.hlsl.h index 7330bbf507..1a5feefbe7 100644 --- a/gfx/drivers/d3d_shaders/sprite_sm4.hlsl.h +++ b/gfx/drivers/d3d_shaders/sprite_sm4.hlsl.h @@ -1,5 +1,5 @@ -#define SRC(src) #src +#define SRC(...) #__VA_ARGS__ SRC( struct VSInput diff --git a/menu/drivers_display/menu_display_d3d12.c b/menu/drivers_display/menu_display_d3d12.c index 8006b728f6..2702dc02bd 100644 --- a/menu/drivers_display/menu_display_d3d12.c +++ b/menu/drivers_display/menu_display_d3d12.c @@ -161,12 +161,15 @@ static void menu_display_d3d12_draw(void* data) { d3d12_texture_t* texture = (d3d12_texture_t*)draw->texture; if (texture->dirty) + { d3d12_upload_texture(d3d12->queue.cmd, texture); - d3d12_set_texture_and_sampler(d3d12->queue.cmd, texture); - D3D12SetGraphicsRootDescriptorTable( - d3d12->queue.cmd, ROOT_ID_SAMPLER_T, - d3d12->samplers[RARCH_FILTER_NEAREST][RARCH_WRAP_BORDER]); + if (vertex_count > 1) + D3D12SetPipelineState(d3d12->queue.cmd, d3d12->pipes[VIDEO_SHADER_STOCK_BLEND]); + else + D3D12SetPipelineState(d3d12->queue.cmd, d3d12->sprites.pipe); + } + d3d12_set_texture_and_sampler(d3d12->queue.cmd, texture); } D3D12DrawInstanced(d3d12->queue.cmd, vertex_count, 1, d3d12->sprites.offset, 0);