mirror of
synced 2024-12-26 03:15:44 +00:00
Use DXIL Linker to reduce compilation time of specializations significantly. (#55)
* Use DXIL Linker to reduce shader optimization time. * Use string view on render params text.
This commit is contained in:
@ -69,10 +69,11 @@ set(ANDROID_ABI arm64-v8a)
set (DXC_DXIL_OPTS "-Wno-ignored-attributes")
set (DXC_SPV_OPTS "-spirv" "-fspv-target-env=vulkan1.0" "-fvk-use-dx-layout")
set (DXC_PS_OPTS "${DXC_COMMON_OPTS}" "-E" "PSMain" "-T ps_6_0")
set (DXC_VS_OPTS "${DXC_COMMON_OPTS}" "-E" "VSMain" "-T vs_6_0" "-fvk-invert-y")
set (DXC_CS_OPTS "${DXC_COMMON_OPTS}" "-E" "CSMain" "-T cs_6_0")
set (DXC_GS_OPTS "${DXC_COMMON_OPTS}" "-E" "GSMain" "-T gs_6_0")
set (DXC_LB_OPTS "${DXC_COMMON_OPTS}" "-D" "LIBRARY" "-T" "lib_6_3")
set (DXC_PS_OPTS "${DXC_COMMON_OPTS}" "-E" "PSMain" "-T ps_6_3")
set (DXC_VS_OPTS "${DXC_COMMON_OPTS}" "-E" "VSMain" "-T vs_6_3" "-fvk-invert-y")
set (DXC_CS_OPTS "${DXC_COMMON_OPTS}" "-E" "CSMain" "-T cs_6_3")
set (DXC_GS_OPTS "${DXC_COMMON_OPTS}" "-E" "GSMain" "-T gs_6_3")
set (DXC_RT_OPTS "${DXC_COMMON_OPTS}" "-D" "RT_SHADER" "-T" "lib_6_3" "-fspv-target-env=vulkan1.1spirv1.4" "-fspv-extension=SPV_KHR_ray_tracing" "-fspv-extension=SPV_EXT_descriptor_indexing")
function(build_shader_spirv_impl TARGETOBJ FILENAME TARGET_NAME OUTNAME)
@ -119,6 +120,25 @@ function(build_shader TARGETOBJ SHADERNAME OPTIONS)
build_shader_spirv_impl(${TARGETOBJ} ${FILENAME} ${TARGET_NAME} ${OUTNAME} ${OPTIONS} ${EXTRA_ARGS})
function(build_shader_dxil TARGETOBJ SHADERNAME OPTIONS)
if (${ARGC} GREATER 3)
# Get any optional compiler args passed to this function
if (${ARGC} GREATER 4)
# Compile DXIL shader binaries if building on Windows
function(build_shader_spirv TARGETOBJ SHADERNAME OPTIONS)
if (${ARGC} GREATER 3)
@ -151,6 +171,10 @@ function(preprocess_shader TARGETOBJ SHADERNAME)
target_sources(${TARGETOBJ} PRIVATE ${OUTNAME}.rw.c)
function(build_library_shader TARGETOBJ SHADERNAME)
build_shader_dxil(${TARGETOBJ} ${SHADERNAME} "${DXC_LB_OPTS}" ${ARGN})
function(build_pixel_shader TARGETOBJ SHADERNAME)
build_shader(${TARGETOBJ} ${SHADERNAME} "${DXC_PS_OPTS}" ${ARGN})
@ -356,8 +380,13 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(rt64 ${X11_LIBRARIES} ${X11_Xrandr_LIB})
preprocess_shader(rt64 "src/shaders/RasterPS.hlsl")
preprocess_shader(rt64 "src/shaders/RasterVS.hlsl")
preprocess_shader(rt64 "src/shaders/RenderParams.hlsli")
if (${WIN32})
build_library_shader(rt64 "src/shaders/RasterPS.hlsl" "src/shaders/RasterPSLibrary.hlsl")
build_library_shader(rt64 "src/shaders/RasterPS.hlsl" "src/shaders/RasterPSLibraryMS.hlsl" "-D MULTISAMPLING")
build_library_shader(rt64 "src/shaders/RasterVS.hlsl" "src/shaders/RasterVSLibrary.hlsl")
build_pixel_shader( rt64 "src/shaders/RasterPS.hlsl" "src/shaders/RasterPSDynamic.hlsl" "-D DYNAMIC_RENDER_PARAMS")
build_pixel_shader( rt64 "src/shaders/RasterPS.hlsl" "src/shaders/RasterPSDynamicMS.hlsl" "-D DYNAMIC_RENDER_PARAMS" "-D MULTISAMPLING")
@ -6,6 +6,7 @@
#include "xxHash/xxh3.h"
#include "shaders/RenderParams.hlsli.rw.h"
#include "shaders/RasterPSDynamic.hlsl.spirv.h"
#include "shaders/RasterPSDynamicMS.hlsl.spirv.h"
#include "shaders/RasterPSSpecConstant.hlsl.spirv.h"
@ -20,23 +21,21 @@
#include "shaders/PostBlendDitherNoiseAddPS.hlsl.spirv.h"
#include "shaders/PostBlendDitherNoiseSubPS.hlsl.spirv.h"
#ifdef _WIN32
# include "shaders/RasterPSLibrary.hlsl.dxil.h"
# include "shaders/RasterPSLibraryMS.hlsl.dxil.h"
# include "shaders/RasterVSLibrary.hlsl.dxil.h"
# include "shaders/RasterPSDynamic.hlsl.dxil.h"
# include "shaders/RasterPSDynamicMS.hlsl.dxil.h"
# include "shaders/RasterVSDynamic.hlsl.dxil.h"
# include "shaders/PostBlendDitherNoiseAddPS.hlsl.dxil.h"
# include "shaders/PostBlendDitherNoiseSubPS.hlsl.dxil.h"
#include "shaders/RasterPS.hlsl.rw.h"
#include "shaders/RasterVS.hlsl.rw.h"
#include "shared/rt64_raster_params.h"
#include "rt64_descriptor_sets.h"
#include "rt64_render_target.h"
namespace RT64 {
static const std::string RasterPSString(reinterpret_cast<const char *>(RasterPSText), sizeof(RasterPSText));
static const std::string RasterVSString(reinterpret_cast<const char *>(RasterVSText), sizeof(RasterVSText));
static const RenderFormat RasterPositionFormat = RenderFormat::R32G32B32A32_FLOAT;
static const RenderFormat RasterTexcoordFormat = RenderFormat::R32G32_FLOAT;
static const RenderFormat RasterColorFormat = RenderFormat::R32G32B32A32_FLOAT;
@ -125,12 +124,27 @@ namespace RT64 {
RasterShaderText shaderText = generateShaderText(desc, useMSAA);
// Compile both shaders from text with the constants hard-coded in.
IDxcBlob *blobVS, *blobPS;
const std::wstring VertexShaderName = L"VSMain";
shaderCompiler->compile(shaderText.vertexShader, VertexShaderName, L"vs_6_3", shaderFormat, &blobVS);
static const wchar_t *blobVSLibraryNames[] = { L"RasterVSEntry", L"RasterVSLibrary" };
static const wchar_t *blobPSLibraryNames[] = { L"RasterPSEntry", L"RasterPSLibrary" };
IDxcBlob *blobVSLibraries[] = { nullptr, nullptr };
IDxcBlob *blobPSLibraries[] = { nullptr, nullptr };
shaderCompiler->dxcUtils->CreateBlobFromPinned(RasterVSLibraryBlobDXIL, sizeof(RasterVSLibraryBlobDXIL), DXC_CP_ACP, (IDxcBlobEncoding **)(&blobVSLibraries[1]));
const void *PSLibraryBlob = useMSAA ? RasterPSLibraryMSBlobDXIL : RasterPSLibraryBlobDXIL;
uint32_t PSLibraryBlobSize = useMSAA ? sizeof(RasterPSLibraryMSBlobDXIL) : sizeof(RasterPSLibraryBlobDXIL);
shaderCompiler->dxcUtils->CreateBlobFromPinned(PSLibraryBlob, PSLibraryBlobSize, DXC_CP_ACP, (IDxcBlobEncoding **)(&blobPSLibraries[1]));
// Compile both the vertex and pixel shader functions as libraries.
const std::wstring VertexShaderName = L"VSMain";
const std::wstring PixelShaderName = L"PSMain";
shaderCompiler->compile(shaderText.pixelShader, PixelShaderName, L"ps_6_3", shaderFormat, &blobPS);
shaderCompiler->compile(shaderText.vertexShader, VertexShaderName, L"lib_6_3", shaderFormat, &blobVSLibraries[0]);
shaderCompiler->compile(shaderText.pixelShader, PixelShaderName, L"lib_6_3", shaderFormat, &blobPSLibraries[0]);
// Link the vertex and pixel shaders with the libraries that define their main functions.
IDxcBlob *blobVS = nullptr;
IDxcBlob *blobPS = nullptr;
shaderCompiler->link(VertexShaderName, L"vs_6_3", blobVSLibraries, blobVSLibraryNames, std::size(blobVSLibraries), &blobVS);
shaderCompiler->link(PixelShaderName, L"ps_6_3", blobPSLibraries, blobPSLibraryNames, std::size(blobPSLibraries), &blobPS);
vertexShader = device->createShader(blobVS->GetBufferPointer(), blobVS->GetBufferSize(), "VSMain", shaderFormat);
pixelShader = device->createShader(blobPS->GetBufferPointer(), blobPS->GetBufferSize(), "PSMain", shaderFormat);
@ -145,6 +159,10 @@ namespace RT64 {
// Blobs can be discarded once the shaders are created.
@ -180,9 +198,11 @@ namespace RT64 {
// Generate vertex shader.
std::stringstream vss;
vss << RasterVSString;
vss << std::string_view(RenderParamsText, sizeof(RenderParamsText));
vss << "RenderParams getRenderParams() {" + renderParamsCode + "; return rp; }";
vss <<
"void RasterVS(const RenderParams, in float4, in float2, in float4, out float4, out float2, out float4, out float4);"
"void VSMain("
" in float4 iPosition : POSITION,"
" in float2 iUV : TEXCOORD,"
@ -204,20 +224,11 @@ namespace RT64 {
// Generate pixel shader.
std::stringstream pss;
if (multisampling) {
pss <<
"Texture2DMS<float> gBackgroundDepth : register(t2, space3);"
"float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) { return gBackgroundDepth.Load(pixelPos, sampleIndex); }";
else {
pss <<
"Texture2D<float> gBackgroundDepth : register(t2, space3);"
"float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) { return gBackgroundDepth.Load(int3(pixelPos, 0)); }";
pss << RasterPSString;
pss << std::string_view(RenderParamsText, sizeof(RenderParamsText));
pss << "RenderParams getRenderParams() {" + renderParamsCode + "; return rp; }";
pss <<
"bool RasterPS(const RenderParams, bool, float4, float2, float4, float4, uint, out float4, out float4, out float);"
"void PSMain("
" in float4 vertexPosition : SV_POSITION"
", in float2 vertexUV : TEXCOORD"
@ -232,18 +243,15 @@ namespace RT64 {
pss <<
", [[vk::location(0)]] [[vk::index(0)]] out float4 resultColor : SV_TARGET0"
", [[vk::location(0)]] [[vk::index(1)]] out float4 resultAlpha : SV_TARGET1";
", out float4 pixelColor : SV_TARGET0"
", out float4 pixelAlpha : SV_TARGET1";
if (desc.outputDepth(multisampling)) {
pss << ", out float resultDepth : SV_DEPTH";
if (desc.outputDepth(multisampling)) {
pss << ") { bool outputDepth = true;";
pss <<
", out float pixelDepth : SV_DEPTH) { bool outputDepth = true;";
else {
pss << ") { float resultDepth; bool outputDepth = false;";
pss << ") { bool outputDepth = false;";
if (desc.flags.smoothShade) {
@ -255,8 +263,18 @@ namespace RT64 {
pss <<
" RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth);"
" float4 resultColor;"
" float4 resultAlpha;"
" float resultDepth;"
" if (!RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth)) discard;"
" pixelColor = resultColor;"
" pixelAlpha = resultAlpha;";
if (desc.outputDepth(multisampling)) {
pss << "pixelDepth = resultDepth;";
pss << "}";
return { vss.str(), pss.str() };
@ -365,8 +383,14 @@ namespace RT64 {
// RasterShaderUber
const uint64_t RasterShaderUber::RasterVSTextHash = XXH3_64bits(RasterVSText, sizeof(RasterVSText));
const uint64_t RasterShaderUber::RasterPSTextHash = XXH3_64bits(RasterPSText, sizeof(RasterPSText));
#if defined(_WIN32)
const uint64_t RasterShaderUber::RasterVSLibraryHash = XXH3_64bits(RasterVSLibraryBlobDXIL, sizeof(RasterVSLibraryBlobDXIL));
const uint64_t RasterShaderUber::RasterPSLibraryHash = XXH3_64bits(RasterPSLibraryBlobDXIL, sizeof(RasterPSLibraryBlobDXIL));
// Shader hashes are not required in other platforms as they don't use a shader cache.
const uint64_t RasterShaderUber::RasterVSLibraryHash = 0;
const uint64_t RasterShaderUber::RasterPSLibraryHash = 0;
RasterShaderUber::RasterShaderUber(RenderDevice *device, RenderShaderFormat shaderFormat, const RenderMultisampling &multisampling, const ShaderLibrary *shaderLibrary, uint32_t threadCount) {
assert(device != nullptr);
@ -58,8 +58,8 @@ namespace RT64 {
struct RasterShaderUber {
static const uint64_t RasterVSTextHash;
static const uint64_t RasterPSTextHash;
static const uint64_t RasterVSLibraryHash;
static const uint64_t RasterPSLibraryHash;
std::unique_ptr<RenderPipeline> pipelines[64];
std::unique_ptr<RenderPipeline> postBlendDitherNoiseAddPipeline;
@ -33,7 +33,7 @@ namespace RT64 {
if ((magic != OfflineMagic) || (version != OfflineVersion) || (vsHash != RasterShaderUber::RasterVSTextHash) || (psHash != RasterShaderUber::RasterPSTextHash)) {
if ((magic != OfflineMagic) || (version != OfflineVersion) || (vsHash != RasterShaderUber::RasterVSLibraryHash) || (psHash != RasterShaderUber::RasterPSLibraryHash)) {
return false;
@ -100,8 +100,8 @@ namespace RT64 {
uint32_t psDxilSize = uint32_t(psDxilBytes.size());
dumpStream.write(reinterpret_cast<const char *>(&OfflineMagic), sizeof(uint32_t));
dumpStream.write(reinterpret_cast<const char *>(&OfflineVersion), sizeof(uint32_t));
dumpStream.write(reinterpret_cast<const char *>(&RasterShaderUber::RasterVSTextHash), sizeof(uint64_t));
dumpStream.write(reinterpret_cast<const char *>(&RasterShaderUber::RasterPSTextHash), sizeof(uint64_t));
dumpStream.write(reinterpret_cast<const char *>(&RasterShaderUber::RasterVSLibraryHash), sizeof(uint64_t));
dumpStream.write(reinterpret_cast<const char *>(&RasterShaderUber::RasterPSLibraryHash), sizeof(uint64_t));
dumpStream.write(reinterpret_cast<const char *>(&shaderDesc), sizeof(ShaderDescription));
dumpStream.write(reinterpret_cast<const char *>(&vsDxilSize), sizeof(uint32_t));
dumpStream.write(reinterpret_cast<const char *>(vsDxilBytes.data()), vsDxilSize);
@ -220,7 +220,7 @@ namespace RT64 {
// Toggle the use of HDR and compile another shader.
ShaderDescription shaderDescAlt = shaderDesc;
shaderDescAlt.flags.usesHDR = (shaderDescAlt.flags.usesHDR == 0);
std::make_unique<RasterShader>(shaderCache->device, shaderDescAlt, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes);
std::unique_ptr<RasterShader> altShader = std::make_unique<RasterShader>(shaderCache->device, shaderDescAlt, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes);
shaderCache->offlineDumper.stepDumping(shaderDescAlt, dumperVsBytes, dumperPsBytes);
@ -36,11 +36,35 @@ namespace RT64 {
static void checkResultForError(IDxcOperationResult *result) {
HRESULT resultCode;
if (FAILED(resultCode)) {
IDxcBlobEncoding *error;
HRESULT hr = result->GetErrorBuffer(&error);
if (FAILED(hr)) {
throw std::runtime_error("Failed to get shader compiler error");
// Convert error blob to a string.
std::vector<char> infoLog(error->GetBufferSize() + 1);
memcpy(infoLog.data(), error->GetBufferPointer(), error->GetBufferSize());
infoLog[error->GetBufferSize()] = 0;
RT64_LOG_PRINTF("Shader compilation error: %s\n", infoLog.data());
throw std::runtime_error("Shader compilation error: " + std::string(infoLog.data()));
void ShaderCompiler::compile(const std::string &shaderCode, const std::wstring &entryName, const std::wstring &profile,
RenderShaderFormat shaderFormat, IDxcBlob **shaderBlob) const
IDxcBlobEncoding *textBlob = nullptr;
dxcUtils->CreateBlobFromPinned((LPBYTE)shaderCode.c_str(), (uint32_t)shaderCode.size(), DXC_CP_ACP, &textBlob);
HRESULT res = dxcUtils->CreateBlobFromPinned((LPBYTE)shaderCode.c_str(), (uint32_t)shaderCode.size(), DXC_CP_ACP, &textBlob);
if (FAILED(res)) {
fprintf(stderr, "CreateBlobFromPinned failed with error code 0x%X.\n", res);
std::vector<LPCWSTR> arguments;
@ -70,26 +94,34 @@ namespace RT64 {
IDxcOperationResult *result = nullptr;
dxcCompiler->Compile(textBlob, L"", entryName.c_str(), profile.c_str(), arguments.data(), (UINT32)(arguments.size()), nullptr, 0, nullptr, &result);
HRESULT resultCode;
if (FAILED(resultCode)) {
IDxcBlobEncoding *error;
HRESULT hr = result->GetErrorBuffer(&error);
if (FAILED(hr)) {
throw std::runtime_error("Failed to get shader compiler error");
// Convert error blob to a string.
std::vector<char> infoLog(error->GetBufferSize() + 1);
memcpy(infoLog.data(), error->GetBufferPointer(), error->GetBufferSize());
infoLog[error->GetBufferSize()] = 0;
RT64_LOG_PRINTF("Shader: %s\n", shaderCode.data());
RT64_LOG_PRINTF("Shader compilation error: %s\n", infoLog.data());
throw std::runtime_error("Shader compilation error: " + std::string(infoLog.data()));
void ShaderCompiler::link(const std::wstring &entryName, const std::wstring &profile, IDxcBlob **libraryBlobs,
const wchar_t **libraryBlobNames, uint32_t libraryBlobCount, IDxcBlob **shaderBlob) const
IDxcLinker *dxcLinker = nullptr;
HRESULT res = DxcCreateInstance(CLSID_DxcLinker, __uuidof(IDxcLinker), (void **)(&dxcLinker));
if (FAILED(res)) {
fprintf(stderr, "DxcCreateInstance(DxcLinker) failed with error code 0x%X.\n", res);
for (uint32_t i = 0; i < libraryBlobCount; i++) {
res = dxcLinker->RegisterLibrary(libraryBlobNames[i], libraryBlobs[i]);
if (FAILED(res)) {
fprintf(stderr, "RegisterLibrary failed with error code 0x%X.\n", res);
IDxcOperationResult *result = nullptr;
dxcLinker->Link(entryName.c_str(), profile.c_str(), libraryBlobNames, libraryBlobCount, nullptr, 0, &result);
@ -20,8 +20,12 @@ namespace RT64 {
void compile(const std::string &shaderCode, const std::wstring &entryName, const std::wstring &profile,
RenderShaderFormat shaderFormat, IDxcBlob **shaderBlob) const;
void link(const std::wstring &entryName, const std::wstring &profile, IDxcBlob **libraryBlobs,
const wchar_t **libraryBlobNames, uint32_t libraryBlobCount, IDxcBlob **shaderBlob) const;
Normal file
Normal file
@ -0,0 +1,11 @@
// RT64
#pragma once
#ifdef LIBRARY
# define LIBRARY_EXPORT export
@ -8,6 +8,7 @@
#include "Depth.hlsli"
#include "FbRendererCommon.hlsli"
#include "Library.hlsli"
#include "Random.hlsli"
#include "TextureSampler.hlsli"
@ -19,7 +20,7 @@ Texture2DMS<float> gBackgroundDepth : register(t2, space3);
float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) {
return gBackgroundDepth.Load(pixelPos, sampleIndex);
Texture2D<float> gBackgroundDepth : register(t2, space3);
float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) {
@ -27,8 +28,8 @@ float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) {
void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, float2 vertexUV, float4 vertexSmoothColor, float4 vertexFlatColor,
uint sampleIndex, inout float4 resultColor, inout float4 resultAlpha, out float resultDepth)
LIBRARY_EXPORT bool RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, float2 vertexUV, float4 vertexSmoothColor, float4 vertexFlatColor,
uint sampleIndex, out float4 resultColor, out float4 resultAlpha, out float resultDepth)
const uint instanceIndex = instanceRenderIndices[gConstants.renderIndex].instanceIndex;
const float4 vertexColor = renderFlagSmoothShade(rp.flags) ? vertexSmoothColor : float4(vertexFlatColor.rgb, vertexSmoothColor.a);
@ -54,14 +55,14 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
if (depthClampNear || depthDecal) {
// Since depth clip is disabled on the PSO so near clip can be ignored, we manually clip any values above the allowed depth.
if (resultDepth > 1.0f) {
return false;
// We emulate depth clip on the dynamic version of the shader.
else if (!renderFlagNoN(rp.flags)) {
if ((resultDepth < 0.0f) || (resultDepth > 1.0f)) {
return false;
@ -82,7 +83,7 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
resultDepth = surfaceDepth;
else {
return false;
@ -182,12 +183,12 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
// Alpha compare.
if (otherMode.alphaCompare() == G_AC_DITHER) {
if (alphaCompareValue < nextRand(randomSeed)) {
return false;
else if (otherMode.alphaCompare() == G_AC_THRESHOLD) {
if (alphaCompareValue < instanceRDPParams[instanceIndex].blendColor.a) {
return false;
@ -199,7 +200,7 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
// Discard all pixels without coverage.
const float CoverageThreshold = 1.0f / cvgRange;
if (resultCvg < CoverageThreshold) {
return false;
// Add the blender if it can be replicated with simple emulation.
@ -245,6 +246,8 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
resultColor.rgb = lerp(resultColor.rgb, float3(1.0f, 0.0f, 0.0f), 0.5f);
return true;
@ -267,19 +270,16 @@ void PSMain(
#if defined(MULTISAMPLING)
, in uint sampleIndex : SV_SampleIndex
, [[vk::location(0)]] [[vk::index(0)]] out float4 resultColor : SV_TARGET0
, [[vk::location(0)]] [[vk::index(1)]] out float4 resultAlpha : SV_TARGET1
, [[vk::location(0)]] [[vk::index(0)]] out float4 pixelColor : SV_TARGET0
, [[vk::location(0)]] [[vk::index(1)]] out float4 pixelAlpha : SV_TARGET1
, out float resultDepth : SV_DEPTH
, out float pixelDepth : SV_DEPTH
#if !defined(VERTEX_FLAT_COLOR)
float4 vertexFlatColor;
#if !defined(OUTPUT_DEPTH)
float resultDepth;
float4 vertexFlatColor = 0.0f;
#if !defined(MULTISAMPLING)
@ -290,6 +290,17 @@ void PSMain(
const bool outputDepth = false;
RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth);
float4 resultColor;
float4 resultAlpha;
float resultDepth;
if (!RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth)) {
pixelColor = resultColor;
pixelAlpha = resultAlpha;
pixelDepth = resultDepth;
@ -3,12 +3,14 @@
#include "shared/rt64_raster_params.h"
#include "shared/rt64_render_flags.h"
#include "FbRendererCommon.hlsli"
#include "Library.hlsli"
[[vk::push_constant]] ConstantBuffer<RasterParams> gConstants : register(b0, space0);
void RasterVS(const RenderParams rp, in float4 iPosition, in float2 iUV, in float4 iColor, out float4 oPosition, out float2 oUV, out float4 oSmoothColor, out float4 oFlatColor) {
LIBRARY_EXPORT void RasterVS(const RenderParams rp, in float4 iPosition, in float2 iUV, in float4 iColor, out float4 oPosition, out float2 oUV, out float4 oSmoothColor, out float4 oFlatColor) {
float4 ndcPos = iPosition;
// Skip any sort of transformation on the coordinates when rendering rects.
Normal file
Normal file
@ -0,0 +1,5 @@
// RT64
#include "shared/rt64_render_params.h"
@ -8,6 +8,7 @@
#include "TextureDecoder.hlsli"
#include "shared/rt64_other_mode.h"
#include "shared/rt64_render_flags.h"
#include "shared/rt64_render_params.h"
@ -7,6 +7,7 @@
#include "shared/rt64_hlsl.h"
#include "shared/rt64_other_mode.h"
#include "shared/rt64_render_flags.h"
#include "shared/rt64_render_params.h"
#ifdef HLSL_CPU
Normal file
Normal file
@ -0,0 +1,130 @@
// RT64
#pragma once
#include "shared/rt64_hlsl.h"
#ifdef HLSL_CPU
namespace interop {
#ifdef HLSL_CPU
union RenderFlags {
struct {
uint rect : 1;
uint NoN : 1;
uint culling : 1;
uint smoothShade : 1;
uint linearFiltering : 1;
uint smoothNormal : 1;
uint normalMap : 1;
uint shadowAlpha : 1;
uint oneCycleHardwareBug : 1;
uint blenderApproximation : 2;
uint dynamicTiles : 1;
uint canDecodeTMEM : 1;
uint cms0 : 2;
uint cmt0 : 2;
uint cms1 : 2;
uint cmt1 : 2;
uint usesTexture0 : 1;
uint usesTexture1 : 1;
uint upscale2D : 1;
uint upscaleLOD : 1;
uint usesHDR : 1;
uint value;
// SPIR-V code generation does not seem to like bitfields at the moment, so we work around it by querying the flags manually.
typedef uint RenderFlags;
bool renderFlagRect(RenderFlags flags) {
return (flags & 0x1) != 0;
bool renderFlagNoN(RenderFlags flags) {
return ((flags >> 1) & 0x1) != 0;
bool renderFlagCulling(RenderFlags flags) {
return ((flags >> 2) & 0x1) != 0;
bool renderFlagSmoothShade(RenderFlags flags) {
return ((flags >> 3) & 0x1) != 0;
bool renderFlagLinearFiltering(RenderFlags flags) {
return ((flags >> 4) & 0x1) != 0;
bool renderFlagSmoothNormal(RenderFlags flags) {
return ((flags >> 5) & 0x1) != 0;
bool renderFlagNormalMap(RenderFlags flags) {
return ((flags >> 6) & 0x1) != 0;
bool renderFlagShadowAlpha(RenderFlags flags) {
return ((flags >> 7) & 0x1) != 0;
bool renderFlagOneCycleHardwareBug(RenderFlags flags) {
return ((flags >> 8) & 0x1) != 0;
uint renderBlenderApproximation(RenderFlags flags) {
return (flags >> 9) & 0x3;
bool renderFlagDynamicTiles(RenderFlags flags) {
return ((flags >> 11) & 0x1) != 0;
bool renderFlagCanDecodeTMEM(RenderFlags flags) {
return ((flags >> 12) & 0x1) != 0;
uint renderCMS0(RenderFlags flags) {
return (flags >> 13) & 0x3;
uint renderCMT0(RenderFlags flags) {
return (flags >> 15) & 0x3;
uint renderCMS1(RenderFlags flags) {
return (flags >> 17) & 0x3;
uint renderCMT1(RenderFlags flags) {
return (flags >> 19) & 0x3;
bool renderFlagUsesTexture0(RenderFlags flags) {
return ((flags >> 21) & 0x1) != 0;
bool renderFlagUsesTexture1(RenderFlags flags) {
return ((flags >> 22) & 0x1) != 0;
bool renderFlagUpscale2D(RenderFlags flags) {
return ((flags >> 23) & 0x1) != 0;
bool renderFlagUpscaleLOD(RenderFlags flags) {
return ((flags >> 24) & 0x1) != 0;
bool renderFlagUsesHDR(RenderFlags flags) {
return ((flags >> 25) & 0x1) != 0;
#ifdef HLSL_CPU
@ -7,123 +7,12 @@
#include "shared/rt64_hlsl.h"
#ifdef HLSL_CPU
#include "shared/rt64_render_flags.h"
namespace interop {
#ifdef HLSL_CPU
union RenderFlags {
struct {
uint rect : 1;
uint NoN : 1;
uint culling : 1;
uint smoothShade : 1;
uint linearFiltering : 1;
uint smoothNormal : 1;
uint normalMap : 1;
uint shadowAlpha : 1;
uint oneCycleHardwareBug : 1;
uint blenderApproximation : 2;
uint dynamicTiles : 1;
uint canDecodeTMEM : 1;
uint cms0 : 2;
uint cmt0 : 2;
uint cms1 : 2;
uint cmt1 : 2;
uint usesTexture0 : 1;
uint usesTexture1 : 1;
uint upscale2D : 1;
uint upscaleLOD : 1;
uint usesHDR : 1;
uint value;
// SPIR-V code generation does not seem to like bitfields at the moment, so we work around it by querying the flags manually.
#ifndef HLSL_CPU
typedef uint RenderFlags;
bool renderFlagRect(RenderFlags flags) {
return (flags & 0x1) != 0;
bool renderFlagNoN(RenderFlags flags) {
return ((flags >> 1) & 0x1) != 0;
bool renderFlagCulling(RenderFlags flags) {
return ((flags >> 2) & 0x1) != 0;
bool renderFlagSmoothShade(RenderFlags flags) {
return ((flags >> 3) & 0x1) != 0;
bool renderFlagLinearFiltering(RenderFlags flags) {
return ((flags >> 4) & 0x1) != 0;
bool renderFlagSmoothNormal(RenderFlags flags) {
return ((flags >> 5) & 0x1) != 0;
bool renderFlagNormalMap(RenderFlags flags) {
return ((flags >> 6) & 0x1) != 0;
bool renderFlagShadowAlpha(RenderFlags flags) {
return ((flags >> 7) & 0x1) != 0;
bool renderFlagOneCycleHardwareBug(RenderFlags flags) {
return ((flags >> 8) & 0x1) != 0;
uint renderBlenderApproximation(RenderFlags flags) {
return (flags >> 9) & 0x3;
bool renderFlagDynamicTiles(RenderFlags flags) {
return ((flags >> 11) & 0x1) != 0;
bool renderFlagCanDecodeTMEM(RenderFlags flags) {
return ((flags >> 12) & 0x1) != 0;
uint renderCMS0(RenderFlags flags) {
return (flags >> 13) & 0x3;
uint renderCMT0(RenderFlags flags) {
return (flags >> 15) & 0x3;
uint renderCMS1(RenderFlags flags) {
return (flags >> 17) & 0x3;
uint renderCMT1(RenderFlags flags) {
return (flags >> 19) & 0x3;
bool renderFlagUsesTexture0(RenderFlags flags) {
return ((flags >> 21) & 0x1) != 0;
bool renderFlagUsesTexture1(RenderFlags flags) {
return ((flags >> 22) & 0x1) != 0;
bool renderFlagUpscale2D(RenderFlags flags) {
return ((flags >> 23) & 0x1) != 0;
bool renderFlagUpscaleLOD(RenderFlags flags) {
return ((flags >> 24) & 0x1) != 0;
bool renderFlagUsesHDR(RenderFlags flags) {
return ((flags >> 25) & 0x1) != 0;
struct RenderParams {
Reference in New Issue
Block a user