From 56316549da04385fdf1ed4a6517a65c47bd4832e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dar=C3=ADo?= Date: Fri, 12 Jul 2024 00:15:21 -0300 Subject: [PATCH] Use DXIL Linker to reduce compilation time of specializations significantly. (#55) * Use DXIL Linker to reduce shader optimization time. * Use string view on render params text. --- CMakeLists.txt | 41 ++++++-- src/render/rt64_raster_shader.cpp | 92 ++++++++++------- src/render/rt64_raster_shader.h | 4 +- src/render/rt64_raster_shader_cache.cpp | 8 +- src/render/rt64_shader_compiler.cpp | 66 ++++++++---- src/render/rt64_shader_compiler.h | 4 + src/shaders/Library.hlsli | 11 ++ src/shaders/RasterPS.hlsl | 45 ++++---- src/shaders/RasterVS.hlsl | 4 +- src/shaders/RenderParams.hlsli | 5 + src/shaders/TextureSampler.hlsli | 1 + src/shared/rt64_blender.h | 1 + src/shared/rt64_render_flags.h | 130 ++++++++++++++++++++++++ src/shared/rt64_render_params.h | 117 +-------------------- 14 files changed, 334 insertions(+), 195 deletions(-) create mode 100644 src/shaders/Library.hlsli create mode 100644 src/shaders/RenderParams.hlsli create mode 100644 src/shared/rt64_render_flags.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 818eff9..79383b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,10 +69,11 @@ set(ANDROID_ABI arm64-v8a) set (DXC_COMMON_OPTS "-I${PROJECT_SOURCE_DIR}/src") set (DXC_DXIL_OPTS "-Wno-ignored-attributes") set (DXC_SPV_OPTS "-spirv" "-fspv-target-env=vulkan1.0" "-fvk-use-dx-layout") -set (DXC_PS_OPTS "${DXC_COMMON_OPTS}" "-E" "PSMain" "-T ps_6_0") -set (DXC_VS_OPTS "${DXC_COMMON_OPTS}" "-E" "VSMain" "-T vs_6_0" "-fvk-invert-y") -set (DXC_CS_OPTS "${DXC_COMMON_OPTS}" "-E" "CSMain" "-T cs_6_0") -set (DXC_GS_OPTS "${DXC_COMMON_OPTS}" "-E" "GSMain" "-T gs_6_0") +set (DXC_LB_OPTS "${DXC_COMMON_OPTS}" "-D" "LIBRARY" "-T" "lib_6_3") +set (DXC_PS_OPTS "${DXC_COMMON_OPTS}" "-E" "PSMain" "-T ps_6_3") +set (DXC_VS_OPTS "${DXC_COMMON_OPTS}" "-E" "VSMain" "-T vs_6_3" "-fvk-invert-y") +set (DXC_CS_OPTS "${DXC_COMMON_OPTS}" "-E" "CSMain" "-T cs_6_3") +set (DXC_GS_OPTS "${DXC_COMMON_OPTS}" "-E" "GSMain" "-T gs_6_3") set (DXC_RT_OPTS "${DXC_COMMON_OPTS}" "-D" "RT_SHADER" "-T" "lib_6_3" "-fspv-target-env=vulkan1.1spirv1.4" "-fspv-extension=SPV_KHR_ray_tracing" "-fspv-extension=SPV_EXT_descriptor_indexing") function(build_shader_spirv_impl TARGETOBJ FILENAME TARGET_NAME OUTNAME) @@ -119,6 +120,25 @@ function(build_shader TARGETOBJ SHADERNAME OPTIONS) build_shader_spirv_impl(${TARGETOBJ} ${FILENAME} ${TARGET_NAME} ${OUTNAME} ${OPTIONS} ${EXTRA_ARGS}) endfunction() +function(build_shader_dxil TARGETOBJ SHADERNAME OPTIONS) + set(FILENAME "${PROJECT_SOURCE_DIR}/${SHADERNAME}") + if (${ARGC} GREATER 3) + set(OUTNAME "${CMAKE_BINARY_DIR}/${ARGV3}") + else() + set(OUTNAME "${CMAKE_BINARY_DIR}/${SHADERNAME}") + endif() + # Get any optional compiler args passed to this function + if (${ARGC} GREATER 4) + set(EXTRA_ARGS "${ARGN}") + list(REMOVE_AT EXTRA_ARGS 0) + endif() + cmake_path(GET OUTNAME STEM TARGET_NAME) + cmake_path(GET OUTNAME PARENT_PATH OUTPUT_DIR) + file(MAKE_DIRECTORY ${OUTPUT_DIR}) + # Compile DXIL shader binaries if building on Windows + build_shader_dxil_impl(${TARGETOBJ} ${FILENAME} ${TARGET_NAME} ${OUTNAME} ${OPTIONS} ${EXTRA_ARGS}) +endfunction() + function(build_shader_spirv TARGETOBJ SHADERNAME OPTIONS) set(FILENAME "${PROJECT_SOURCE_DIR}/${SHADERNAME}") if (${ARGC} GREATER 3) @@ -151,6 +171,10 @@ function(preprocess_shader TARGETOBJ SHADERNAME) target_sources(${TARGETOBJ} PRIVATE ${OUTNAME}.rw.c) endfunction() +function(build_library_shader TARGETOBJ SHADERNAME) + build_shader_dxil(${TARGETOBJ} ${SHADERNAME} "${DXC_LB_OPTS}" ${ARGN}) +endfunction() + function(build_pixel_shader TARGETOBJ SHADERNAME) build_shader(${TARGETOBJ} ${SHADERNAME} "${DXC_PS_OPTS}" ${ARGN}) endfunction() @@ -356,8 +380,13 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") target_link_libraries(rt64 ${X11_LIBRARIES} ${X11_Xrandr_LIB}) endif() -preprocess_shader(rt64 "src/shaders/RasterPS.hlsl") -preprocess_shader(rt64 "src/shaders/RasterVS.hlsl") +preprocess_shader(rt64 "src/shaders/RenderParams.hlsli") + +if (${WIN32}) + build_library_shader(rt64 "src/shaders/RasterPS.hlsl" "src/shaders/RasterPSLibrary.hlsl") + build_library_shader(rt64 "src/shaders/RasterPS.hlsl" "src/shaders/RasterPSLibraryMS.hlsl" "-D MULTISAMPLING") + build_library_shader(rt64 "src/shaders/RasterVS.hlsl" "src/shaders/RasterVSLibrary.hlsl") +endif() build_pixel_shader( rt64 "src/shaders/RasterPS.hlsl" "src/shaders/RasterPSDynamic.hlsl" "-D DYNAMIC_RENDER_PARAMS") build_pixel_shader( rt64 "src/shaders/RasterPS.hlsl" "src/shaders/RasterPSDynamicMS.hlsl" "-D DYNAMIC_RENDER_PARAMS" "-D MULTISAMPLING") diff --git a/src/render/rt64_raster_shader.cpp b/src/render/rt64_raster_shader.cpp index 3040655..0ae1093 100644 --- a/src/render/rt64_raster_shader.cpp +++ b/src/render/rt64_raster_shader.cpp @@ -6,6 +6,7 @@ #include "xxHash/xxh3.h" +#include "shaders/RenderParams.hlsli.rw.h" #include "shaders/RasterPSDynamic.hlsl.spirv.h" #include "shaders/RasterPSDynamicMS.hlsl.spirv.h" #include "shaders/RasterPSSpecConstant.hlsl.spirv.h" @@ -20,23 +21,21 @@ #include "shaders/PostBlendDitherNoiseAddPS.hlsl.spirv.h" #include "shaders/PostBlendDitherNoiseSubPS.hlsl.spirv.h" #ifdef _WIN32 +# include "shaders/RasterPSLibrary.hlsl.dxil.h" +# include "shaders/RasterPSLibraryMS.hlsl.dxil.h" +# include "shaders/RasterVSLibrary.hlsl.dxil.h" # include "shaders/RasterPSDynamic.hlsl.dxil.h" # include "shaders/RasterPSDynamicMS.hlsl.dxil.h" # include "shaders/RasterVSDynamic.hlsl.dxil.h" # include "shaders/PostBlendDitherNoiseAddPS.hlsl.dxil.h" # include "shaders/PostBlendDitherNoiseSubPS.hlsl.dxil.h" #endif -#include "shaders/RasterPS.hlsl.rw.h" -#include "shaders/RasterVS.hlsl.rw.h" #include "shared/rt64_raster_params.h" #include "rt64_descriptor_sets.h" #include "rt64_render_target.h" namespace RT64 { - static const std::string RasterPSString(reinterpret_cast(RasterPSText), sizeof(RasterPSText)); - static const std::string RasterVSString(reinterpret_cast(RasterVSText), sizeof(RasterVSText)); - static const RenderFormat RasterPositionFormat = RenderFormat::R32G32B32A32_FLOAT; static const RenderFormat RasterTexcoordFormat = RenderFormat::R32G32_FLOAT; static const RenderFormat RasterColorFormat = RenderFormat::R32G32B32A32_FLOAT; @@ -125,12 +124,27 @@ namespace RT64 { RasterShaderText shaderText = generateShaderText(desc, useMSAA); // Compile both shaders from text with the constants hard-coded in. - IDxcBlob *blobVS, *blobPS; - const std::wstring VertexShaderName = L"VSMain"; - shaderCompiler->compile(shaderText.vertexShader, VertexShaderName, L"vs_6_3", shaderFormat, &blobVS); + static const wchar_t *blobVSLibraryNames[] = { L"RasterVSEntry", L"RasterVSLibrary" }; + static const wchar_t *blobPSLibraryNames[] = { L"RasterPSEntry", L"RasterPSLibrary" }; + IDxcBlob *blobVSLibraries[] = { nullptr, nullptr }; + IDxcBlob *blobPSLibraries[] = { nullptr, nullptr }; + shaderCompiler->dxcUtils->CreateBlobFromPinned(RasterVSLibraryBlobDXIL, sizeof(RasterVSLibraryBlobDXIL), DXC_CP_ACP, (IDxcBlobEncoding **)(&blobVSLibraries[1])); + const void *PSLibraryBlob = useMSAA ? RasterPSLibraryMSBlobDXIL : RasterPSLibraryBlobDXIL; + uint32_t PSLibraryBlobSize = useMSAA ? sizeof(RasterPSLibraryMSBlobDXIL) : sizeof(RasterPSLibraryBlobDXIL); + shaderCompiler->dxcUtils->CreateBlobFromPinned(PSLibraryBlob, PSLibraryBlobSize, DXC_CP_ACP, (IDxcBlobEncoding **)(&blobPSLibraries[1])); + + // Compile both the vertex and pixel shader functions as libraries. + const std::wstring VertexShaderName = L"VSMain"; const std::wstring PixelShaderName = L"PSMain"; - shaderCompiler->compile(shaderText.pixelShader, PixelShaderName, L"ps_6_3", shaderFormat, &blobPS); + shaderCompiler->compile(shaderText.vertexShader, VertexShaderName, L"lib_6_3", shaderFormat, &blobVSLibraries[0]); + shaderCompiler->compile(shaderText.pixelShader, PixelShaderName, L"lib_6_3", shaderFormat, &blobPSLibraries[0]); + + // Link the vertex and pixel shaders with the libraries that define their main functions. + IDxcBlob *blobVS = nullptr; + IDxcBlob *blobPS = nullptr; + shaderCompiler->link(VertexShaderName, L"vs_6_3", blobVSLibraries, blobVSLibraryNames, std::size(blobVSLibraries), &blobVS); + shaderCompiler->link(PixelShaderName, L"ps_6_3", blobPSLibraries, blobPSLibraryNames, std::size(blobPSLibraries), &blobPS); vertexShader = device->createShader(blobVS->GetBufferPointer(), blobVS->GetBufferSize(), "VSMain", shaderFormat); pixelShader = device->createShader(blobPS->GetBufferPointer(), blobPS->GetBufferSize(), "PSMain", shaderFormat); @@ -145,6 +159,10 @@ namespace RT64 { } // Blobs can be discarded once the shaders are created. + blobVSLibraries[0]->Release(); + blobVSLibraries[1]->Release(); + blobPSLibraries[0]->Release(); + blobPSLibraries[1]->Release(); blobPS->Release(); blobVS->Release(); } @@ -180,9 +198,11 @@ namespace RT64 { // Generate vertex shader. std::stringstream vss; - vss << RasterVSString; + vss << std::string_view(RenderParamsText, sizeof(RenderParamsText)); vss << "RenderParams getRenderParams() {" + renderParamsCode + "; return rp; }"; vss << + "void RasterVS(const RenderParams, in float4, in float2, in float4, out float4, out float2, out float4, out float4);" + "[shader(\"vertex\")]" "void VSMain(" " in float4 iPosition : POSITION," " in float2 iUV : TEXCOORD," @@ -204,20 +224,11 @@ namespace RT64 { // Generate pixel shader. std::stringstream pss; - if (multisampling) { - pss << - "Texture2DMS gBackgroundDepth : register(t2, space3);" - "float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) { return gBackgroundDepth.Load(pixelPos, sampleIndex); }"; - } - else { - pss << - "Texture2D gBackgroundDepth : register(t2, space3);" - "float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) { return gBackgroundDepth.Load(int3(pixelPos, 0)); }"; - } - - pss << RasterPSString; + pss << std::string_view(RenderParamsText, sizeof(RenderParamsText)); pss << "RenderParams getRenderParams() {" + renderParamsCode + "; return rp; }"; pss << + "bool RasterPS(const RenderParams, bool, float4, float2, float4, float4, uint, out float4, out float4, out float);" + "[shader(\"pixel\")]" "void PSMain(" " in float4 vertexPosition : SV_POSITION" ", in float2 vertexUV : TEXCOORD" @@ -232,18 +243,15 @@ namespace RT64 { } pss << - ", [[vk::location(0)]] [[vk::index(0)]] out float4 resultColor : SV_TARGET0" - ", [[vk::location(0)]] [[vk::index(1)]] out float4 resultAlpha : SV_TARGET1"; + ", out float4 pixelColor : SV_TARGET0" + ", out float4 pixelAlpha : SV_TARGET1"; if (desc.outputDepth(multisampling)) { - pss << ", out float resultDepth : SV_DEPTH"; - } - - if (desc.outputDepth(multisampling)) { - pss << ") { bool outputDepth = true;"; + pss << + ", out float pixelDepth : SV_DEPTH) { bool outputDepth = true;"; } else { - pss << ") { float resultDepth; bool outputDepth = false;"; + pss << ") { bool outputDepth = false;"; } if (desc.flags.smoothShade) { @@ -255,8 +263,18 @@ namespace RT64 { } pss << - " RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth);" - "}"; + " float4 resultColor;" + " float4 resultAlpha;" + " float resultDepth;" + " if (!RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth)) discard;" + " pixelColor = resultColor;" + " pixelAlpha = resultAlpha;"; + + if (desc.outputDepth(multisampling)) { + pss << "pixelDepth = resultDepth;"; + } + + pss << "}"; return { vss.str(), pss.str() }; } @@ -365,8 +383,14 @@ namespace RT64 { // RasterShaderUber - const uint64_t RasterShaderUber::RasterVSTextHash = XXH3_64bits(RasterVSText, sizeof(RasterVSText)); - const uint64_t RasterShaderUber::RasterPSTextHash = XXH3_64bits(RasterPSText, sizeof(RasterPSText)); +#if defined(_WIN32) + const uint64_t RasterShaderUber::RasterVSLibraryHash = XXH3_64bits(RasterVSLibraryBlobDXIL, sizeof(RasterVSLibraryBlobDXIL)); + const uint64_t RasterShaderUber::RasterPSLibraryHash = XXH3_64bits(RasterPSLibraryBlobDXIL, sizeof(RasterPSLibraryBlobDXIL)); +#else + // Shader hashes are not required in other platforms as they don't use a shader cache. + const uint64_t RasterShaderUber::RasterVSLibraryHash = 0; + const uint64_t RasterShaderUber::RasterPSLibraryHash = 0; +#endif RasterShaderUber::RasterShaderUber(RenderDevice *device, RenderShaderFormat shaderFormat, const RenderMultisampling &multisampling, const ShaderLibrary *shaderLibrary, uint32_t threadCount) { assert(device != nullptr); diff --git a/src/render/rt64_raster_shader.h b/src/render/rt64_raster_shader.h index 11a48aa..ac3fa96 100644 --- a/src/render/rt64_raster_shader.h +++ b/src/render/rt64_raster_shader.h @@ -58,8 +58,8 @@ namespace RT64 { }; struct RasterShaderUber { - static const uint64_t RasterVSTextHash; - static const uint64_t RasterPSTextHash; + static const uint64_t RasterVSLibraryHash; + static const uint64_t RasterPSLibraryHash; std::unique_ptr pipelines[64]; std::unique_ptr postBlendDitherNoiseAddPipeline; diff --git a/src/render/rt64_raster_shader_cache.cpp b/src/render/rt64_raster_shader_cache.cpp index 7ba7950..a5077a2 100644 --- a/src/render/rt64_raster_shader_cache.cpp +++ b/src/render/rt64_raster_shader_cache.cpp @@ -33,7 +33,7 @@ namespace RT64 { break; } - if ((magic != OfflineMagic) || (version != OfflineVersion) || (vsHash != RasterShaderUber::RasterVSTextHash) || (psHash != RasterShaderUber::RasterPSTextHash)) { + if ((magic != OfflineMagic) || (version != OfflineVersion) || (vsHash != RasterShaderUber::RasterVSLibraryHash) || (psHash != RasterShaderUber::RasterPSLibraryHash)) { return false; } @@ -100,8 +100,8 @@ namespace RT64 { uint32_t psDxilSize = uint32_t(psDxilBytes.size()); dumpStream.write(reinterpret_cast(&OfflineMagic), sizeof(uint32_t)); dumpStream.write(reinterpret_cast(&OfflineVersion), sizeof(uint32_t)); - dumpStream.write(reinterpret_cast(&RasterShaderUber::RasterVSTextHash), sizeof(uint64_t)); - dumpStream.write(reinterpret_cast(&RasterShaderUber::RasterPSTextHash), sizeof(uint64_t)); + dumpStream.write(reinterpret_cast(&RasterShaderUber::RasterVSLibraryHash), sizeof(uint64_t)); + dumpStream.write(reinterpret_cast(&RasterShaderUber::RasterPSLibraryHash), sizeof(uint64_t)); dumpStream.write(reinterpret_cast(&shaderDesc), sizeof(ShaderDescription)); dumpStream.write(reinterpret_cast(&vsDxilSize), sizeof(uint32_t)); dumpStream.write(reinterpret_cast(vsDxilBytes.data()), vsDxilSize); @@ -220,7 +220,7 @@ namespace RT64 { // Toggle the use of HDR and compile another shader. ShaderDescription shaderDescAlt = shaderDesc; shaderDescAlt.flags.usesHDR = (shaderDescAlt.flags.usesHDR == 0); - std::make_unique(shaderCache->device, shaderDescAlt, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes); + std::unique_ptr altShader = std::make_unique(shaderCache->device, shaderDescAlt, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes); shaderCache->offlineDumper.stepDumping(shaderDescAlt, dumperVsBytes, dumperPsBytes); } } diff --git a/src/render/rt64_shader_compiler.cpp b/src/render/rt64_shader_compiler.cpp index 503f7d6..cbf8b63 100644 --- a/src/render/rt64_shader_compiler.cpp +++ b/src/render/rt64_shader_compiler.cpp @@ -36,11 +36,35 @@ namespace RT64 { } } + static void checkResultForError(IDxcOperationResult *result) { + HRESULT resultCode; + result->GetStatus(&resultCode); + if (FAILED(resultCode)) { + IDxcBlobEncoding *error; + HRESULT hr = result->GetErrorBuffer(&error); + if (FAILED(hr)) { + throw std::runtime_error("Failed to get shader compiler error"); + } + + // Convert error blob to a string. + std::vector infoLog(error->GetBufferSize() + 1); + memcpy(infoLog.data(), error->GetBufferPointer(), error->GetBufferSize()); + infoLog[error->GetBufferSize()] = 0; + + RT64_LOG_PRINTF("Shader compilation error: %s\n", infoLog.data()); + throw std::runtime_error("Shader compilation error: " + std::string(infoLog.data())); + } + } + void ShaderCompiler::compile(const std::string &shaderCode, const std::wstring &entryName, const std::wstring &profile, RenderShaderFormat shaderFormat, IDxcBlob **shaderBlob) const { IDxcBlobEncoding *textBlob = nullptr; - dxcUtils->CreateBlobFromPinned((LPBYTE)shaderCode.c_str(), (uint32_t)shaderCode.size(), DXC_CP_ACP, &textBlob); + HRESULT res = dxcUtils->CreateBlobFromPinned((LPBYTE)shaderCode.c_str(), (uint32_t)shaderCode.size(), DXC_CP_ACP, &textBlob); + if (FAILED(res)) { + fprintf(stderr, "CreateBlobFromPinned failed with error code 0x%X.\n", res); + return; + } std::vector arguments; arguments.push_back(L"-Qstrip_debug"); @@ -70,26 +94,34 @@ namespace RT64 { IDxcOperationResult *result = nullptr; dxcCompiler->Compile(textBlob, L"", entryName.c_str(), profile.c_str(), arguments.data(), (UINT32)(arguments.size()), nullptr, 0, nullptr, &result); - HRESULT resultCode; - result->GetStatus(&resultCode); - if (FAILED(resultCode)) { - IDxcBlobEncoding *error; - HRESULT hr = result->GetErrorBuffer(&error); - if (FAILED(hr)) { - throw std::runtime_error("Failed to get shader compiler error"); - } + checkResultForError(result); + result->GetResult(shaderBlob); + textBlob->Release(); + } - // Convert error blob to a string. - std::vector infoLog(error->GetBufferSize() + 1); - memcpy(infoLog.data(), error->GetBufferPointer(), error->GetBufferSize()); - infoLog[error->GetBufferSize()] = 0; - - RT64_LOG_PRINTF("Shader: %s\n", shaderCode.data()); - RT64_LOG_PRINTF("Shader compilation error: %s\n", infoLog.data()); - throw std::runtime_error("Shader compilation error: " + std::string(infoLog.data())); + void ShaderCompiler::link(const std::wstring &entryName, const std::wstring &profile, IDxcBlob **libraryBlobs, + const wchar_t **libraryBlobNames, uint32_t libraryBlobCount, IDxcBlob **shaderBlob) const + { + IDxcLinker *dxcLinker = nullptr; + HRESULT res = DxcCreateInstance(CLSID_DxcLinker, __uuidof(IDxcLinker), (void **)(&dxcLinker)); + if (FAILED(res)) { + fprintf(stderr, "DxcCreateInstance(DxcLinker) failed with error code 0x%X.\n", res); + return; } + for (uint32_t i = 0; i < libraryBlobCount; i++) { + res = dxcLinker->RegisterLibrary(libraryBlobNames[i], libraryBlobs[i]); + if (FAILED(res)) { + fprintf(stderr, "RegisterLibrary failed with error code 0x%X.\n", res); + return; + } + } + + IDxcOperationResult *result = nullptr; + dxcLinker->Link(entryName.c_str(), profile.c_str(), libraryBlobNames, libraryBlobCount, nullptr, 0, &result); + checkResultForError(result); result->GetResult(shaderBlob); + dxcLinker->Release(); } }; diff --git a/src/render/rt64_shader_compiler.h b/src/render/rt64_shader_compiler.h index d26973a..f546614 100644 --- a/src/render/rt64_shader_compiler.h +++ b/src/render/rt64_shader_compiler.h @@ -20,8 +20,12 @@ namespace RT64 { ShaderCompiler(); ~ShaderCompiler(); + void compile(const std::string &shaderCode, const std::wstring &entryName, const std::wstring &profile, RenderShaderFormat shaderFormat, IDxcBlob **shaderBlob) const; + + void link(const std::wstring &entryName, const std::wstring &profile, IDxcBlob **libraryBlobs, + const wchar_t **libraryBlobNames, uint32_t libraryBlobCount, IDxcBlob **shaderBlob) const; }; }; diff --git a/src/shaders/Library.hlsli b/src/shaders/Library.hlsli new file mode 100644 index 0000000..0e9ef2b --- /dev/null +++ b/src/shaders/Library.hlsli @@ -0,0 +1,11 @@ +// +// RT64 +// + +#pragma once + +#ifdef LIBRARY +# define LIBRARY_EXPORT export +#else +# define LIBRARY_EXPORT +#endif \ No newline at end of file diff --git a/src/shaders/RasterPS.hlsl b/src/shaders/RasterPS.hlsl index e5d3e9b..eff15e4 100644 --- a/src/shaders/RasterPS.hlsl +++ b/src/shaders/RasterPS.hlsl @@ -8,6 +8,7 @@ #include "Depth.hlsli" #include "FbRendererCommon.hlsli" +#include "Library.hlsli" #include "Random.hlsli" #include "TextureSampler.hlsli" @@ -19,7 +20,7 @@ Texture2DMS gBackgroundDepth : register(t2, space3); float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) { return gBackgroundDepth.Load(pixelPos, sampleIndex); } -#elif defined(DYNAMIC_RENDER_PARAMS) || defined(SPEC_CONSTANT_RENDER_PARAMS) +#elif defined(DYNAMIC_RENDER_PARAMS) || defined(SPEC_CONSTANT_RENDER_PARAMS) || defined(LIBRARY) Texture2D gBackgroundDepth : register(t2, space3); float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) { @@ -27,8 +28,8 @@ float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) { } #endif -void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, float2 vertexUV, float4 vertexSmoothColor, float4 vertexFlatColor, - uint sampleIndex, inout float4 resultColor, inout float4 resultAlpha, out float resultDepth) +LIBRARY_EXPORT bool RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, float2 vertexUV, float4 vertexSmoothColor, float4 vertexFlatColor, + uint sampleIndex, out float4 resultColor, out float4 resultAlpha, out float resultDepth) { const uint instanceIndex = instanceRenderIndices[gConstants.renderIndex].instanceIndex; const float4 vertexColor = renderFlagSmoothShade(rp.flags) ? vertexSmoothColor : float4(vertexFlatColor.rgb, vertexSmoothColor.a); @@ -54,14 +55,14 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl if (depthClampNear || depthDecal) { // Since depth clip is disabled on the PSO so near clip can be ignored, we manually clip any values above the allowed depth. if (resultDepth > 1.0f) { - discard; + return false; } } #ifdef DYNAMIC_RENDER_PARAMS // We emulate depth clip on the dynamic version of the shader. else if (!renderFlagNoN(rp.flags)) { if ((resultDepth < 0.0f) || (resultDepth > 1.0f)) { - discard; + return false; } } #endif @@ -82,7 +83,7 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl resultDepth = surfaceDepth; } else { - discard; + return false; } } } @@ -182,12 +183,12 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl // Alpha compare. if (otherMode.alphaCompare() == G_AC_DITHER) { if (alphaCompareValue < nextRand(randomSeed)) { - discard; + return false; } } else if (otherMode.alphaCompare() == G_AC_THRESHOLD) { if (alphaCompareValue < instanceRDPParams[instanceIndex].blendColor.a) { - discard; + return false; } } @@ -199,7 +200,7 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl // Discard all pixels without coverage. const float CoverageThreshold = 1.0f / cvgRange; if (resultCvg < CoverageThreshold) { - discard; + return false; } // Add the blender if it can be replicated with simple emulation. @@ -245,6 +246,8 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl resultColor.rgb = lerp(resultColor.rgb, float3(1.0f, 0.0f, 0.0f), 0.5f); } #endif + + return true; } #if defined(DYNAMIC_RENDER_PARAMS) @@ -267,19 +270,16 @@ void PSMain( #if defined(MULTISAMPLING) , in uint sampleIndex : SV_SampleIndex #endif - , [[vk::location(0)]] [[vk::index(0)]] out float4 resultColor : SV_TARGET0 - , [[vk::location(0)]] [[vk::index(1)]] out float4 resultAlpha : SV_TARGET1 + , [[vk::location(0)]] [[vk::index(0)]] out float4 pixelColor : SV_TARGET0 + , [[vk::location(0)]] [[vk::index(1)]] out float4 pixelAlpha : SV_TARGET1 #if defined(DYNAMIC_RENDER_PARAMS) || defined(OUTPUT_DEPTH) - , out float resultDepth : SV_DEPTH + , out float pixelDepth : SV_DEPTH #endif ) { #if !defined(DYNAMIC_RENDER_PARAMS) #if !defined(VERTEX_FLAT_COLOR) - float4 vertexFlatColor; -#endif -#if !defined(OUTPUT_DEPTH) - float resultDepth; + float4 vertexFlatColor = 0.0f; #endif #endif #if !defined(MULTISAMPLING) @@ -290,6 +290,17 @@ void PSMain( #else const bool outputDepth = false; #endif - RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth); + float4 resultColor; + float4 resultAlpha; + float resultDepth; + if (!RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth)) { + discard; + } + + pixelColor = resultColor; + pixelAlpha = resultAlpha; +#if defined(DYNAMIC_RENDER_PARAMS) || defined(OUTPUT_DEPTH) + pixelDepth = resultDepth; +#endif } #endif \ No newline at end of file diff --git a/src/shaders/RasterVS.hlsl b/src/shaders/RasterVS.hlsl index a7b2bf5..366da2b 100644 --- a/src/shaders/RasterVS.hlsl +++ b/src/shaders/RasterVS.hlsl @@ -3,12 +3,14 @@ // #include "shared/rt64_raster_params.h" +#include "shared/rt64_render_flags.h" #include "FbRendererCommon.hlsli" +#include "Library.hlsli" [[vk::push_constant]] ConstantBuffer gConstants : register(b0, space0); -void RasterVS(const RenderParams rp, in float4 iPosition, in float2 iUV, in float4 iColor, out float4 oPosition, out float2 oUV, out float4 oSmoothColor, out float4 oFlatColor) { +LIBRARY_EXPORT void RasterVS(const RenderParams rp, in float4 iPosition, in float2 iUV, in float4 iColor, out float4 oPosition, out float2 oUV, out float4 oSmoothColor, out float4 oFlatColor) { float4 ndcPos = iPosition; // Skip any sort of transformation on the coordinates when rendering rects. diff --git a/src/shaders/RenderParams.hlsli b/src/shaders/RenderParams.hlsli new file mode 100644 index 0000000..14b992e --- /dev/null +++ b/src/shaders/RenderParams.hlsli @@ -0,0 +1,5 @@ +// +// RT64 +// + +#include "shared/rt64_render_params.h" \ No newline at end of file diff --git a/src/shaders/TextureSampler.hlsli b/src/shaders/TextureSampler.hlsli index 43af553..cf57704 100644 --- a/src/shaders/TextureSampler.hlsli +++ b/src/shaders/TextureSampler.hlsli @@ -8,6 +8,7 @@ #include "TextureDecoder.hlsli" #include "shared/rt64_other_mode.h" +#include "shared/rt64_render_flags.h" #include "shared/rt64_render_params.h" #define SIMULATE_LOW_PRECISION 1 diff --git a/src/shared/rt64_blender.h b/src/shared/rt64_blender.h index 1f515fd..a88c431 100644 --- a/src/shared/rt64_blender.h +++ b/src/shared/rt64_blender.h @@ -7,6 +7,7 @@ #include "shared/rt64_hlsl.h" #include "shared/rt64_other_mode.h" +#include "shared/rt64_render_flags.h" #include "shared/rt64_render_params.h" #ifdef HLSL_CPU diff --git a/src/shared/rt64_render_flags.h b/src/shared/rt64_render_flags.h new file mode 100644 index 0000000..de2ec01 --- /dev/null +++ b/src/shared/rt64_render_flags.h @@ -0,0 +1,130 @@ +// +// RT64 +// + +#pragma once + +#include "shared/rt64_hlsl.h" + +#ifdef HLSL_CPU +namespace interop { +#endif +#ifdef HLSL_CPU + union RenderFlags { + struct { + uint rect : 1; + uint NoN : 1; + uint culling : 1; + uint smoothShade : 1; + uint linearFiltering : 1; + uint smoothNormal : 1; + uint normalMap : 1; + uint shadowAlpha : 1; + uint oneCycleHardwareBug : 1; + uint blenderApproximation : 2; + uint dynamicTiles : 1; + uint canDecodeTMEM : 1; + uint cms0 : 2; + uint cmt0 : 2; + uint cms1 : 2; + uint cmt1 : 2; + uint usesTexture0 : 1; + uint usesTexture1 : 1; + uint upscale2D : 1; + uint upscaleLOD : 1; + uint usesHDR : 1; + }; + + uint value; + }; +#else + // SPIR-V code generation does not seem to like bitfields at the moment, so we work around it by querying the flags manually. + typedef uint RenderFlags; + + bool renderFlagRect(RenderFlags flags) { + return (flags & 0x1) != 0; + } + + bool renderFlagNoN(RenderFlags flags) { + return ((flags >> 1) & 0x1) != 0; + } + + bool renderFlagCulling(RenderFlags flags) { + return ((flags >> 2) & 0x1) != 0; + } + + bool renderFlagSmoothShade(RenderFlags flags) { + return ((flags >> 3) & 0x1) != 0; + } + + bool renderFlagLinearFiltering(RenderFlags flags) { + return ((flags >> 4) & 0x1) != 0; + } + + bool renderFlagSmoothNormal(RenderFlags flags) { + return ((flags >> 5) & 0x1) != 0; + } + + bool renderFlagNormalMap(RenderFlags flags) { + return ((flags >> 6) & 0x1) != 0; + } + + bool renderFlagShadowAlpha(RenderFlags flags) { + return ((flags >> 7) & 0x1) != 0; + } + + bool renderFlagOneCycleHardwareBug(RenderFlags flags) { + return ((flags >> 8) & 0x1) != 0; + } + + uint renderBlenderApproximation(RenderFlags flags) { + return (flags >> 9) & 0x3; + } + + bool renderFlagDynamicTiles(RenderFlags flags) { + return ((flags >> 11) & 0x1) != 0; + } + + bool renderFlagCanDecodeTMEM(RenderFlags flags) { + return ((flags >> 12) & 0x1) != 0; + } + + uint renderCMS0(RenderFlags flags) { + return (flags >> 13) & 0x3; + } + + uint renderCMT0(RenderFlags flags) { + return (flags >> 15) & 0x3; + } + + uint renderCMS1(RenderFlags flags) { + return (flags >> 17) & 0x3; + } + + uint renderCMT1(RenderFlags flags) { + return (flags >> 19) & 0x3; + } + + bool renderFlagUsesTexture0(RenderFlags flags) { + return ((flags >> 21) & 0x1) != 0; + } + + bool renderFlagUsesTexture1(RenderFlags flags) { + return ((flags >> 22) & 0x1) != 0; + } + + bool renderFlagUpscale2D(RenderFlags flags) { + return ((flags >> 23) & 0x1) != 0; + } + + bool renderFlagUpscaleLOD(RenderFlags flags) { + return ((flags >> 24) & 0x1) != 0; + } + + bool renderFlagUsesHDR(RenderFlags flags) { + return ((flags >> 25) & 0x1) != 0; + } +#endif +#ifdef HLSL_CPU +}; +#endif \ No newline at end of file diff --git a/src/shared/rt64_render_params.h b/src/shared/rt64_render_params.h index ba45aa4..b7a6750 100644 --- a/src/shared/rt64_render_params.h +++ b/src/shared/rt64_render_params.h @@ -7,123 +7,12 @@ #include "shared/rt64_hlsl.h" #ifdef HLSL_CPU +#include "shared/rt64_render_flags.h" + namespace interop { #endif -#ifdef HLSL_CPU - union RenderFlags { - struct { - uint rect : 1; - uint NoN : 1; - uint culling : 1; - uint smoothShade : 1; - uint linearFiltering : 1; - uint smoothNormal : 1; - uint normalMap : 1; - uint shadowAlpha : 1; - uint oneCycleHardwareBug : 1; - uint blenderApproximation : 2; - uint dynamicTiles : 1; - uint canDecodeTMEM : 1; - uint cms0 : 2; - uint cmt0 : 2; - uint cms1 : 2; - uint cmt1 : 2; - uint usesTexture0 : 1; - uint usesTexture1 : 1; - uint upscale2D : 1; - uint upscaleLOD : 1; - uint usesHDR : 1; - }; - - uint value; - }; -#else - // SPIR-V code generation does not seem to like bitfields at the moment, so we work around it by querying the flags manually. +#ifndef HLSL_CPU typedef uint RenderFlags; - - bool renderFlagRect(RenderFlags flags) { - return (flags & 0x1) != 0; - } - - bool renderFlagNoN(RenderFlags flags) { - return ((flags >> 1) & 0x1) != 0; - } - - bool renderFlagCulling(RenderFlags flags) { - return ((flags >> 2) & 0x1) != 0; - } - - bool renderFlagSmoothShade(RenderFlags flags) { - return ((flags >> 3) & 0x1) != 0; - } - - bool renderFlagLinearFiltering(RenderFlags flags) { - return ((flags >> 4) & 0x1) != 0; - } - - bool renderFlagSmoothNormal(RenderFlags flags) { - return ((flags >> 5) & 0x1) != 0; - } - - bool renderFlagNormalMap(RenderFlags flags) { - return ((flags >> 6) & 0x1) != 0; - } - - bool renderFlagShadowAlpha(RenderFlags flags) { - return ((flags >> 7) & 0x1) != 0; - } - - bool renderFlagOneCycleHardwareBug(RenderFlags flags) { - return ((flags >> 8) & 0x1) != 0; - } - - uint renderBlenderApproximation(RenderFlags flags) { - return (flags >> 9) & 0x3; - } - - bool renderFlagDynamicTiles(RenderFlags flags) { - return ((flags >> 11) & 0x1) != 0; - } - - bool renderFlagCanDecodeTMEM(RenderFlags flags) { - return ((flags >> 12) & 0x1) != 0; - } - - uint renderCMS0(RenderFlags flags) { - return (flags >> 13) & 0x3; - } - - uint renderCMT0(RenderFlags flags) { - return (flags >> 15) & 0x3; - } - - uint renderCMS1(RenderFlags flags) { - return (flags >> 17) & 0x3; - } - - uint renderCMT1(RenderFlags flags) { - return (flags >> 19) & 0x3; - } - - bool renderFlagUsesTexture0(RenderFlags flags) { - return ((flags >> 21) & 0x1) != 0; - } - - bool renderFlagUsesTexture1(RenderFlags flags) { - return ((flags >> 22) & 0x1) != 0; - } - - bool renderFlagUpscale2D(RenderFlags flags) { - return ((flags >> 23) & 0x1) != 0; - } - - bool renderFlagUpscaleLOD(RenderFlags flags) { - return ((flags >> 24) & 0x1) != 0; - } - - bool renderFlagUsesHDR(RenderFlags flags) { - return ((flags >> 25) & 0x1) != 0; - } #endif struct RenderParams {