mirror of
https://github.com/rt64/rt64.git
synced 2024-12-25 18:14:31 +00:00
Use re-spirv instead of native specialization constants. Optimize the amount of ubershader pipelines that need to be created. (#69)
This commit is contained in:
parent
cb631739b0
commit
b91d6a7441
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -52,3 +52,6 @@
|
||||
[submodule "src/contrib/zstd"]
|
||||
path = src/contrib/zstd
|
||||
url = https://github.com/facebook/zstd
|
||||
[submodule "src/contrib/re-spirv"]
|
||||
path = src/contrib/re-spirv
|
||||
url = https://github.com/rt64/re-spirv
|
||||
|
@ -3,11 +3,17 @@ project(rt64)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
|
||||
|
||||
option(RT64_STATIC "Build RT64 as a static library" OFF)
|
||||
|
||||
option(RT64_BUILD_EXAMPLES "Build examples for RT64" OFF)
|
||||
if (${RT64_BUILD_EXAMPLES})
|
||||
set(RT64_STATIC ON)
|
||||
endif()
|
||||
|
||||
if (NOT ${RT64_STATIC})
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
|
||||
function(preprocess INFILE OUTFILE OPTIONS)
|
||||
if (CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
@ -56,6 +62,7 @@ set(ZSTD_BUILD_STATIC ON)
|
||||
set(ZSTD_BUILD_SHARED OFF)
|
||||
|
||||
add_subdirectory(src/tools/file_to_c)
|
||||
add_subdirectory(src/contrib/re-spirv)
|
||||
add_subdirectory(src/contrib/nativefiledialog-extended)
|
||||
add_subdirectory(src/contrib/zstd/build/cmake)
|
||||
|
||||
@ -353,7 +360,6 @@ if (WIN32)
|
||||
include_directories("${PROJECT_SOURCE_DIR}/src/contrib/dxc/inc")
|
||||
endif()
|
||||
|
||||
option(RT64_STATIC "Build RT64 as a static library" OFF)
|
||||
if (${RT64_STATIC})
|
||||
add_library(rt64 STATIC ${SOURCES})
|
||||
else()
|
||||
@ -364,6 +370,7 @@ set_target_properties(rt64 PROPERTIES OUTPUT_NAME "rt64")
|
||||
set_target_properties(rt64 PROPERTIES PREFIX "")
|
||||
|
||||
# Add common libraries.
|
||||
target_link_libraries(rt64 re-spirv)
|
||||
target_link_libraries(rt64 nfd)
|
||||
target_link_libraries(rt64 libzstd_static)
|
||||
|
||||
|
1
src/contrib/re-spirv
Submodule
1
src/contrib/re-spirv
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f0ad27a50339e72d4c86b3436b9f74de83a20544
|
@ -228,9 +228,12 @@ namespace RT64 {
|
||||
shaderLibrary->setupCommonShaders(renderInterface.get(), device.get());
|
||||
shaderLibrary->setupMultisamplingShaders(renderInterface.get(), device.get(), multisampling);
|
||||
|
||||
// Create the shader caches. Estimate the amount of shader compiler threads by trying to use about half of the system's available threads.
|
||||
// Create the shader caches.
|
||||
// Estimate the amount of shader compiler threads by trying to use about half of the system's available threads.
|
||||
// We need the ubershader pipelines done as soon as possible, so we use a different thread count that demands more of the system.
|
||||
const uint32_t rasterShaderThreads = std::max(threadsAvailable / 2U, 1U);
|
||||
rasterShaderCache = std::make_unique<RasterShaderCache>(rasterShaderThreads);
|
||||
const uint32_t ubershaderThreads = uint32_t(std::max(int(threadsAvailable) - 2, 1));
|
||||
rasterShaderCache = std::make_unique<RasterShaderCache>(rasterShaderThreads, ubershaderThreads);
|
||||
rasterShaderCache->setup(device.get(), renderInterface->getCapabilities().shaderFormat, shaderLibrary.get(), multisampling);
|
||||
|
||||
# if RT_ENABLED
|
||||
|
@ -1544,11 +1544,8 @@ namespace RT64 {
|
||||
else {
|
||||
const bool copyMode = (call.shaderDesc.otherMode.cycleType() == G_CYC_COPY);
|
||||
triangles.pipeline = rasterShaderUber->getPipeline(
|
||||
!copyMode && interop::Blender::usesAlphaBlend(call.shaderDesc.otherMode),
|
||||
!copyMode && call.shaderDesc.flags.culling,
|
||||
!copyMode && call.shaderDesc.otherMode.zCmp(),
|
||||
!copyMode && call.shaderDesc.otherMode.zCmp() && (call.shaderDesc.otherMode.zMode() != ZMODE_DEC),
|
||||
!copyMode && call.shaderDesc.otherMode.zUpd(),
|
||||
!copyMode && (call.shaderDesc.otherMode.zMode() == ZMODE_DEC),
|
||||
(call.shaderDesc.otherMode.cvgDst() == CVG_DST_WRAP) || (call.shaderDesc.otherMode.cvgDst() == CVG_DST_SAVE));
|
||||
}
|
||||
|
||||
|
@ -52,10 +52,31 @@ namespace RT64 {
|
||||
RenderInputElement("COLOR", 0, 2, RasterColorFormat, 2, 0)
|
||||
};
|
||||
|
||||
// OptimizerCacheSPIRV
|
||||
|
||||
void OptimizerCacheSPIRV::initialize() {
|
||||
rasterVS.parse(RasterVSSpecConstantBlobSPIRV, std::size(RasterVSSpecConstantBlobSPIRV));
|
||||
rasterVSFlat.parse(RasterVSSpecConstantFlatBlobSPIRV, std::size(RasterVSSpecConstantFlatBlobSPIRV));
|
||||
rasterPS.parse(RasterPSSpecConstantBlobSPIRV, std::size(RasterPSSpecConstantBlobSPIRV));
|
||||
rasterPSDepth.parse(RasterPSSpecConstantDepthBlobSPIRV, std::size(RasterPSSpecConstantDepthBlobSPIRV));
|
||||
rasterPSDepthMS.parse(RasterPSSpecConstantDepthMSBlobSPIRV, std::size(RasterPSSpecConstantDepthMSBlobSPIRV));
|
||||
rasterPSFlatDepth.parse(RasterPSSpecConstantFlatDepthBlobSPIRV, std::size(RasterPSSpecConstantFlatDepthBlobSPIRV));
|
||||
rasterPSFlatDepthMS.parse(RasterPSSpecConstantFlatDepthMSBlobSPIRV, std::size(RasterPSSpecConstantFlatDepthMSBlobSPIRV));
|
||||
rasterPSFlat.parse(RasterPSSpecConstantFlatBlobSPIRV, std::size(RasterPSSpecConstantFlatBlobSPIRV));
|
||||
assert(!rasterVS.empty());
|
||||
assert(!rasterVSFlat.empty());
|
||||
assert(!rasterPS.empty());
|
||||
assert(!rasterPSDepth.empty());
|
||||
assert(!rasterPSDepthMS.empty());
|
||||
assert(!rasterPSFlatDepth.empty());
|
||||
assert(!rasterPSFlatDepthMS.empty());
|
||||
assert(!rasterPSFlat.empty());
|
||||
}
|
||||
|
||||
// RasterShader
|
||||
|
||||
RasterShader::RasterShader(RenderDevice *device, const ShaderDescription &desc, const RenderPipelineLayout *pipelineLayout, RenderShaderFormat shaderFormat, const RenderMultisampling &multisampling,
|
||||
const ShaderCompiler *shaderCompiler, std::vector<uint8_t> *vsBytes, std::vector<uint8_t> *psBytes, bool useBytes)
|
||||
const ShaderCompiler *shaderCompiler, const OptimizerCacheSPIRV *optimizerCacheSPIRV, std::vector<uint8_t> *vsBytes, std::vector<uint8_t> *psBytes, bool useBytes)
|
||||
{
|
||||
assert(device != nullptr);
|
||||
|
||||
@ -65,54 +86,55 @@ namespace RT64 {
|
||||
const bool useMSAA = (multisampling.sampleCount > 1);
|
||||
std::unique_ptr<RenderShader> vertexShader;
|
||||
std::unique_ptr<RenderShader> pixelShader;
|
||||
std::vector<RenderSpecConstant> specConstants;
|
||||
if (shaderFormat == RenderShaderFormat::SPIRV) {
|
||||
// Choose the pre-compiled shader permutations.
|
||||
const void *VSBlob = nullptr;
|
||||
const void *PSBlob = nullptr;
|
||||
uint32_t VSBlobSize = 0;
|
||||
uint32_t PSBlobSize = 0;
|
||||
const respv::Shader *VS = nullptr;
|
||||
const respv::Shader *PS = nullptr;
|
||||
const bool outputDepth = desc.outputDepth(useMSAA);
|
||||
if (desc.flags.smoothShade) {
|
||||
VSBlob = RasterVSSpecConstantBlobSPIRV;
|
||||
VSBlobSize = uint32_t(std::size(RasterVSSpecConstantBlobSPIRV));
|
||||
}
|
||||
else {
|
||||
VSBlob = RasterVSSpecConstantFlatBlobSPIRV;
|
||||
VSBlobSize = uint32_t(std::size(RasterVSSpecConstantFlatBlobSPIRV));
|
||||
}
|
||||
|
||||
VS = desc.flags.smoothShade ? &optimizerCacheSPIRV->rasterVS : &optimizerCacheSPIRV->rasterVSFlat;
|
||||
|
||||
// Pick the correct SPIR-V based on the configuration.
|
||||
if (desc.flags.smoothShade) {
|
||||
if (outputDepth) {
|
||||
PSBlob = useMSAA ? RasterPSSpecConstantDepthMSBlobSPIRV : RasterPSSpecConstantDepthBlobSPIRV;
|
||||
PSBlobSize = uint32_t(useMSAA ? std::size(RasterPSSpecConstantDepthMSBlobSPIRV) : std::size(RasterPSSpecConstantDepthBlobSPIRV));
|
||||
PS = useMSAA ? &optimizerCacheSPIRV->rasterPSDepthMS : &optimizerCacheSPIRV->rasterPSDepth;
|
||||
}
|
||||
else {
|
||||
PSBlob = RasterPSSpecConstantBlobSPIRV;
|
||||
PSBlobSize = uint32_t(std::size(RasterPSSpecConstantBlobSPIRV));
|
||||
PS = &optimizerCacheSPIRV->rasterPS;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (outputDepth) {
|
||||
PSBlob = useMSAA ? RasterPSSpecConstantFlatDepthMSBlobSPIRV : RasterPSSpecConstantFlatDepthBlobSPIRV;
|
||||
PSBlobSize = uint32_t(useMSAA ? std::size(RasterPSSpecConstantFlatDepthMSBlobSPIRV) : std::size(RasterPSSpecConstantFlatDepthBlobSPIRV));
|
||||
PS = useMSAA ? &optimizerCacheSPIRV->rasterPSFlatDepthMS : &optimizerCacheSPIRV->rasterPSFlatDepth;
|
||||
}
|
||||
else {
|
||||
PSBlob = RasterPSSpecConstantFlatBlobSPIRV;
|
||||
PSBlobSize = uint32_t(std::size(RasterPSSpecConstantFlatBlobSPIRV));
|
||||
PS = &optimizerCacheSPIRV->rasterPSFlat;
|
||||
}
|
||||
}
|
||||
|
||||
thread_local std::vector<respv::SpecConstant> specConstants;
|
||||
thread_local bool specConstantsSetup = false;
|
||||
thread_local std::vector<uint8_t> optimizedVS;
|
||||
thread_local std::vector<uint8_t> optimizedPS;
|
||||
if (!specConstantsSetup) {
|
||||
for (uint32_t i = 0; i < 5; i++) {
|
||||
specConstants.push_back(respv::SpecConstant(i, { 0 }));
|
||||
}
|
||||
|
||||
specConstantsSetup = true;
|
||||
}
|
||||
|
||||
vertexShader = device->createShader(VSBlob, VSBlobSize, "VSMain", shaderFormat);
|
||||
pixelShader = device->createShader(PSBlob, PSBlobSize, "PSMain", shaderFormat);
|
||||
specConstants[0].values[0] = desc.otherMode.L;
|
||||
specConstants[1].values[0] = desc.otherMode.H;
|
||||
specConstants[2].values[0] = desc.colorCombiner.L;
|
||||
specConstants[3].values[0] = desc.colorCombiner.H;
|
||||
specConstants[4].values[0] = desc.flags.value;
|
||||
|
||||
// Spec constants should replace the constants embedded in the shader directly.
|
||||
specConstants.emplace_back(0, desc.otherMode.L);
|
||||
specConstants.emplace_back(1, desc.otherMode.H);
|
||||
specConstants.emplace_back(2, desc.colorCombiner.L);
|
||||
specConstants.emplace_back(3, desc.colorCombiner.H);
|
||||
specConstants.emplace_back(4, desc.flags.value);
|
||||
bool vsRun = respv::Optimizer::run(*VS, specConstants.data(), uint32_t(specConstants.size()), optimizedVS);
|
||||
bool psRun = respv::Optimizer::run(*PS, specConstants.data(), uint32_t(specConstants.size()), optimizedPS);
|
||||
assert(vsRun && psRun && "Shader optimization must always succeed as the inputs are always the same.");
|
||||
|
||||
vertexShader = device->createShader(optimizedVS.data(), optimizedVS.size(), "VSMain", shaderFormat);
|
||||
pixelShader = device->createShader(optimizedPS.data(), optimizedPS.size(), "PSMain", shaderFormat);
|
||||
}
|
||||
else {
|
||||
# if defined(_WIN32)
|
||||
@ -180,13 +202,11 @@ namespace RT64 {
|
||||
creation.pixelShader = pixelShader.get();
|
||||
creation.alphaBlend = !copyMode && interop::Blender::usesAlphaBlend(desc.otherMode);
|
||||
creation.culling = !copyMode && desc.flags.culling;
|
||||
creation.zCmp = !copyMode && desc.otherMode.zCmp();
|
||||
creation.zCmp = !copyMode && desc.otherMode.zCmp() && (desc.otherMode.zMode() != ZMODE_DEC);
|
||||
creation.zUpd = !copyMode && desc.otherMode.zUpd();
|
||||
creation.zDecal = !copyMode && (desc.otherMode.zMode() == ZMODE_DEC);
|
||||
creation.cvgAdd = (desc.otherMode.cvgDst() == CVG_DST_WRAP) || (desc.otherMode.cvgDst() == CVG_DST_SAVE);
|
||||
creation.NoN = desc.flags.NoN;
|
||||
creation.usesHDR = desc.flags.usesHDR;
|
||||
creation.specConstants = specConstants;
|
||||
creation.multisampling = multisampling;
|
||||
pipeline = createPipeline(creation);
|
||||
}
|
||||
@ -227,7 +247,7 @@ namespace RT64 {
|
||||
pss << std::string_view(RenderParamsText, sizeof(RenderParamsText));
|
||||
pss << "RenderParams getRenderParams() {" + renderParamsCode + "; return rp; }";
|
||||
pss <<
|
||||
"bool RasterPS(const RenderParams, bool, float4, float2, float4, float4, uint, out float4, out float4, out float);"
|
||||
"bool RasterPS(const RenderParams, bool, float4, float2, float4, float4, bool, uint, out float4, out float4, out float);"
|
||||
"[shader(\"pixel\")]"
|
||||
"void PSMain("
|
||||
" in float4 vertexPosition : SV_POSITION"
|
||||
@ -266,7 +286,7 @@ namespace RT64 {
|
||||
" float4 resultColor;"
|
||||
" float4 resultAlpha;"
|
||||
" float resultDepth;"
|
||||
" if (!RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth)) discard;"
|
||||
" if (!RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, false, sampleIndex, resultColor, resultAlpha, resultDepth)) discard;"
|
||||
" pixelColor = resultColor;"
|
||||
" pixelAlpha = resultAlpha;";
|
||||
|
||||
@ -280,8 +300,6 @@ namespace RT64 {
|
||||
}
|
||||
|
||||
std::unique_ptr<RenderPipeline> RasterShader::createPipeline(const PipelineCreation &c) {
|
||||
assert((!c.zDecal || !c.zUpd) && "Decals with depth write should never be created.");
|
||||
|
||||
RenderGraphicsPipelineDesc pipelineDesc;
|
||||
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(c.usesHDR);
|
||||
@ -289,6 +307,7 @@ namespace RT64 {
|
||||
pipelineDesc.cullMode = c.culling ? RenderCullMode::FRONT : RenderCullMode::NONE;
|
||||
pipelineDesc.depthClipEnabled = !c.NoN;
|
||||
pipelineDesc.depthEnabled = c.zCmp || c.zUpd;
|
||||
pipelineDesc.depthFunction = c.zCmp ? RenderComparisonFunction::LESS : RenderComparisonFunction::ALWAYS;
|
||||
pipelineDesc.depthWriteEnabled = c.zUpd;
|
||||
pipelineDesc.depthTargetFormat = RenderFormat::D32_FLOAT;
|
||||
pipelineDesc.multisampling = c.multisampling;
|
||||
@ -303,20 +322,6 @@ namespace RT64 {
|
||||
pipelineDesc.specConstants = c.specConstants.data();
|
||||
pipelineDesc.specConstantsCount = uint32_t(c.specConstants.size());
|
||||
|
||||
if (c.zCmp) {
|
||||
// While these modes evaluate equality in the hardware, we use LEQUAL to simulate the depth comparison in the shader instead.
|
||||
if (c.zDecal) {
|
||||
pipelineDesc.depthFunction = RenderComparisonFunction::LESS_EQUAL;
|
||||
}
|
||||
// ZMODE_OPA, ZMODE_XLU and ZMODE_INTER only differ based on coverage, which is not emulated, so they can all be approximated the same way.
|
||||
else {
|
||||
pipelineDesc.depthFunction = RenderComparisonFunction::LESS;
|
||||
}
|
||||
}
|
||||
else {
|
||||
pipelineDesc.depthFunction = RenderComparisonFunction::ALWAYS;
|
||||
}
|
||||
|
||||
// Alpha blending is performed by using dual source blending. The blending factor will be in the secondary output.
|
||||
RenderBlendDesc &targetBlend = pipelineDesc.renderTargetBlend[0];
|
||||
if (c.alphaBlend) {
|
||||
@ -438,32 +443,26 @@ namespace RT64 {
|
||||
pipelineLayout = layoutBuilder.create(device);
|
||||
|
||||
// Generate all possible combinations of pipeline creations and assign them to each thread. Skip the ones that are invalid.
|
||||
uint32_t pipelineCount = uint32_t(std::size(pipelines));
|
||||
pipelineThreadCreations.clear();
|
||||
pipelineThreadCreations.resize(threadCount);
|
||||
pipelineThreadCreations.resize(std::min(threadCount, pipelineCount));
|
||||
|
||||
PipelineCreation creation;
|
||||
creation.device = device;
|
||||
creation.pipelineLayout = pipelineLayout.get();
|
||||
creation.vertexShader = vertexShader.get();
|
||||
creation.pixelShader = pixelShader.get();
|
||||
creation.alphaBlend = true;
|
||||
creation.culling = false;
|
||||
creation.NoN = true;
|
||||
creation.usesHDR = shaderLibrary->usesHDR;
|
||||
creation.multisampling = multisampling;
|
||||
|
||||
uint32_t threadIndex = 0;
|
||||
uint32_t pipelineCount = uint32_t(std::size(pipelines));
|
||||
for (uint32_t i = 0; i < pipelineCount; i++) {
|
||||
creation.alphaBlend = i & (1 << 0);
|
||||
creation.culling = i & (1 << 1);
|
||||
creation.zCmp = i & (1 << 2);
|
||||
creation.zUpd = i & (1 << 3);
|
||||
creation.zDecal = i & (1 << 4);
|
||||
creation.cvgAdd = i & (1 << 5);
|
||||
|
||||
// Skip all PSOs that would lead to invalid decal behavior.
|
||||
if (creation.zDecal && (creation.zUpd || !creation.zCmp)) {
|
||||
continue;
|
||||
}
|
||||
creation.zCmp = i & (1 << 0);
|
||||
creation.zUpd = i & (1 << 1);
|
||||
creation.cvgAdd = i & (1 << 2);
|
||||
|
||||
pipelineThreadCreations[threadIndex].emplace_back(creation);
|
||||
threadIndex = (threadIndex + 1) % threadCount;
|
||||
@ -471,8 +470,8 @@ namespace RT64 {
|
||||
|
||||
// Spawn the threads that will compile all the pipelines.
|
||||
pipelineThreads.clear();
|
||||
pipelineThreads.resize(threadCount);
|
||||
for (uint32_t i = 0; i < threadCount; i++) {
|
||||
pipelineThreads.resize(pipelineThreadCreations.size());
|
||||
for (uint32_t i = 0; i < uint32_t(pipelineThreads.size()); i++) {
|
||||
pipelineThreads[i] = std::make_unique<std::thread>(&RasterShaderUber::threadCreatePipelines, this, i);
|
||||
}
|
||||
|
||||
@ -533,7 +532,7 @@ namespace RT64 {
|
||||
|
||||
void RasterShaderUber::threadCreatePipelines(uint32_t threadIndex) {
|
||||
for (const PipelineCreation &creation : pipelineThreadCreations[threadIndex]) {
|
||||
uint32_t pipelineIndex = pipelineStateIndex(creation.alphaBlend, creation.culling, creation.zCmp, creation.zUpd, creation.zDecal, creation.cvgAdd);
|
||||
uint32_t pipelineIndex = pipelineStateIndex(creation.zCmp, creation.zUpd, creation.cvgAdd);
|
||||
pipelines[pipelineIndex] = RasterShader::createPipeline(creation);
|
||||
}
|
||||
}
|
||||
@ -552,23 +551,14 @@ namespace RT64 {
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t RasterShaderUber::pipelineStateIndex(bool alphaBlend, bool culling, bool zCmp, bool zUpd, bool zDecal, bool cvgAdd) const {
|
||||
uint32_t RasterShaderUber::pipelineStateIndex(bool zCmp, bool zUpd, bool cvgAdd) const {
|
||||
return
|
||||
(uint32_t(alphaBlend) << 0) |
|
||||
(uint32_t(culling) << 1) |
|
||||
(uint32_t(zCmp) << 2) |
|
||||
(uint32_t(zUpd) << 3) |
|
||||
(uint32_t(zDecal) << 4) |
|
||||
(uint32_t(cvgAdd) << 5);
|
||||
(uint32_t(zCmp) << 0) |
|
||||
(uint32_t(zUpd) << 1) |
|
||||
(uint32_t(cvgAdd) << 2);
|
||||
}
|
||||
|
||||
const RenderPipeline *RasterShaderUber::getPipeline(bool alphaBlend, bool culling, bool zCmp, bool zUpd, bool zDecal, bool cvgAdd) const {
|
||||
// Force read and turn off writing on decal modes since those PSOs are not generated.
|
||||
if (zDecal) {
|
||||
zCmp = true;
|
||||
zUpd = false;
|
||||
}
|
||||
|
||||
return pipelines[pipelineStateIndex(alphaBlend, culling, zCmp, zUpd, zDecal, cvgAdd)].get();
|
||||
const RenderPipeline *RasterShaderUber::getPipeline(bool zCmp, bool zUpd, bool cvgAdd) const {
|
||||
return pipelines[pipelineStateIndex(zCmp, zUpd, cvgAdd)].get();
|
||||
}
|
||||
};
|
@ -9,6 +9,8 @@
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include "re-spirv/re-spirv.h"
|
||||
|
||||
#include "rhi/rt64_render_interface.h"
|
||||
#include "shared/rt64_blender.h"
|
||||
#include "shared/rt64_color_combiner.h"
|
||||
@ -21,6 +23,19 @@
|
||||
#define SAMPLE_LOCATIONS_REQUIRED 1
|
||||
|
||||
namespace RT64 {
|
||||
struct OptimizerCacheSPIRV {
|
||||
respv::Shader rasterVS;
|
||||
respv::Shader rasterVSFlat;
|
||||
respv::Shader rasterPS;
|
||||
respv::Shader rasterPSDepth;
|
||||
respv::Shader rasterPSDepthMS;
|
||||
respv::Shader rasterPSFlatDepth;
|
||||
respv::Shader rasterPSFlatDepthMS;
|
||||
respv::Shader rasterPSFlat;
|
||||
|
||||
void initialize();
|
||||
};
|
||||
|
||||
struct PipelineCreation {
|
||||
RenderDevice *device;
|
||||
const RenderPipelineLayout *pipelineLayout;
|
||||
@ -31,7 +46,6 @@ namespace RT64 {
|
||||
bool NoN;
|
||||
bool zCmp;
|
||||
bool zUpd;
|
||||
bool zDecal;
|
||||
bool cvgAdd;
|
||||
bool usesHDR;
|
||||
std::vector<RenderSpecConstant> specConstants;
|
||||
@ -49,7 +63,7 @@ namespace RT64 {
|
||||
std::unique_ptr<RenderPipeline> pipeline;
|
||||
|
||||
RasterShader(RenderDevice *device, const ShaderDescription &desc, const RenderPipelineLayout *pipelineLayout, RenderShaderFormat shaderFormat, const RenderMultisampling &multisampling,
|
||||
const ShaderCompiler *shaderCompiler, std::vector<uint8_t> *vsBytes = nullptr, std::vector<uint8_t> *psBytes = nullptr, bool useBytes = false);
|
||||
const ShaderCompiler *shaderCompiler, const OptimizerCacheSPIRV *optimizerCacheSPIRV, std::vector<uint8_t> *vsBytes = nullptr, std::vector<uint8_t> *psBytes = nullptr, bool useBytes = false);
|
||||
|
||||
~RasterShader();
|
||||
static RasterShaderText generateShaderText(const ShaderDescription &desc, bool multisampling);
|
||||
@ -61,7 +75,7 @@ namespace RT64 {
|
||||
static const uint64_t RasterVSLibraryHash;
|
||||
static const uint64_t RasterPSLibraryHash;
|
||||
|
||||
std::unique_ptr<RenderPipeline> pipelines[64];
|
||||
std::unique_ptr<RenderPipeline> pipelines[8];
|
||||
std::unique_ptr<RenderPipeline> postBlendDitherNoiseAddPipeline;
|
||||
std::unique_ptr<RenderPipeline> postBlendDitherNoiseSubPipeline;
|
||||
std::mutex pipelinesMutex;
|
||||
@ -76,7 +90,7 @@ namespace RT64 {
|
||||
~RasterShaderUber();
|
||||
void threadCreatePipelines(uint32_t threadIndex);
|
||||
void waitForPipelineCreation();
|
||||
uint32_t pipelineStateIndex(bool alphaBlend, bool culling, bool zCmp, bool zUpd, bool zDecal, bool cvgAdd) const;
|
||||
const RenderPipeline *getPipeline(bool alphaBlend, bool culling, bool zCmp, bool zUpd, bool zDecal, bool cvgAdd) const;
|
||||
uint32_t pipelineStateIndex(bool zCmp, bool zUpd, bool cvgAdd) const;
|
||||
const RenderPipeline *getPipeline(bool zCmp, bool zUpd, bool cvgAdd) const;
|
||||
};
|
||||
};
|
@ -209,7 +209,7 @@ namespace RT64 {
|
||||
assert((shaderCache->shaderUber != nullptr) && "Ubershader should've been created by the time a new shader is submitted to the cache.");
|
||||
const RenderPipelineLayout *uberPipelineLayout = shaderCache->shaderUber->pipelineLayout.get();
|
||||
const RenderMultisampling multisampling = shaderCache->multisampling;
|
||||
std::unique_ptr<RasterShader> newShader = std::make_unique<RasterShader>(shaderCache->device, shaderDesc, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes);
|
||||
std::unique_ptr<RasterShader> newShader = std::make_unique<RasterShader>(shaderCache->device, shaderDesc, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), &shaderCache->optimizerCacheSPIRV, shaderVsBytes, shaderPsBytes, useShaderBytes);
|
||||
|
||||
// Dump the bytes of the shader if requested.
|
||||
if (!useShaderBytes && (shaderVsBytes != nullptr) && (shaderPsBytes != nullptr)) {
|
||||
@ -220,7 +220,7 @@ namespace RT64 {
|
||||
// Toggle the use of HDR and compile another shader.
|
||||
ShaderDescription shaderDescAlt = shaderDesc;
|
||||
shaderDescAlt.flags.usesHDR = (shaderDescAlt.flags.usesHDR == 0);
|
||||
std::unique_ptr<RasterShader> altShader = std::make_unique<RasterShader>(shaderCache->device, shaderDescAlt, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes);
|
||||
std::unique_ptr<RasterShader> altShader = std::make_unique<RasterShader>(shaderCache->device, shaderDescAlt, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), &shaderCache->optimizerCacheSPIRV, shaderVsBytes, shaderPsBytes, useShaderBytes);
|
||||
shaderCache->offlineDumper.stepDumping(shaderDescAlt, dumperVsBytes, dumperPsBytes);
|
||||
}
|
||||
}
|
||||
@ -235,10 +235,11 @@ namespace RT64 {
|
||||
|
||||
// RasterShaderCache
|
||||
|
||||
RasterShaderCache::RasterShaderCache(uint32_t threadCount) {
|
||||
RasterShaderCache::RasterShaderCache(uint32_t threadCount, uint32_t ubershaderThreadCount) {
|
||||
assert(threadCount > 0);
|
||||
|
||||
this->threadCount = threadCount;
|
||||
this->ubershaderThreadCount = ubershaderThreadCount;
|
||||
|
||||
#ifdef ENABLE_OPTIMIZED_SHADER_GENERATION
|
||||
# ifdef _WIN32
|
||||
@ -264,8 +265,13 @@ namespace RT64 {
|
||||
this->shaderFormat = shaderFormat;
|
||||
this->multisampling = multisampling;
|
||||
|
||||
shaderUber = std::make_unique<RasterShaderUber>(device, shaderFormat, multisampling, shaderLibrary, threadCount);
|
||||
shaderUber = std::make_unique<RasterShaderUber>(device, shaderFormat, multisampling, shaderLibrary, ubershaderThreadCount);
|
||||
usesHDR = shaderLibrary->usesHDR;
|
||||
|
||||
// Initialize the re-spirv optimizer cache.
|
||||
if (shaderFormat == RenderShaderFormat::SPIRV) {
|
||||
optimizerCacheSPIRV.initialize();
|
||||
}
|
||||
}
|
||||
|
||||
void RasterShaderCache::submit(const ShaderDescription &desc) {
|
||||
|
@ -55,6 +55,7 @@ namespace RT64 {
|
||||
|
||||
RenderDevice *device;
|
||||
std::unique_ptr<RasterShaderUber> shaderUber;
|
||||
OptimizerCacheSPIRV optimizerCacheSPIRV;
|
||||
std::mutex submissionMutex;
|
||||
std::queue<ShaderDescription> descQueue;
|
||||
std::mutex descQueueMutex;
|
||||
@ -65,6 +66,7 @@ namespace RT64 {
|
||||
std::mutex GPUShadersMutex;
|
||||
std::list<std::unique_ptr<CompilationThread>> compilationThreads;
|
||||
uint32_t threadCount;
|
||||
uint32_t ubershaderThreadCount;
|
||||
RenderShaderFormat shaderFormat;
|
||||
std::unique_ptr<ShaderCompiler> shaderCompiler;
|
||||
RenderMultisampling multisampling;
|
||||
@ -73,7 +75,7 @@ namespace RT64 {
|
||||
std::mutex offlineDumperMutex;
|
||||
bool usesHDR = false;
|
||||
|
||||
RasterShaderCache(uint32_t threadCount);
|
||||
RasterShaderCache(uint32_t threadCount, uint32_t ubershaderThreadCount);
|
||||
~RasterShaderCache();
|
||||
void setup(RenderDevice *device, RenderShaderFormat shaderFormat, const ShaderLibrary *shaderLibrary, const RenderMultisampling &multisampling);
|
||||
void submit(const ShaderDescription &desc);
|
||||
|
@ -29,12 +29,19 @@ float sampleBackgroundDepth(int2 pixelPos, uint sampleIndex) {
|
||||
#endif
|
||||
|
||||
LIBRARY_EXPORT bool RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, float2 vertexUV, float4 vertexSmoothColor, float4 vertexFlatColor,
|
||||
uint sampleIndex, out float4 resultColor, out float4 resultAlpha, out float resultDepth)
|
||||
bool isFrontFace, uint sampleIndex, out float4 resultColor, out float4 resultAlpha, out float resultDepth)
|
||||
{
|
||||
const OtherMode otherMode = { rp.omL, rp.omH };
|
||||
#if defined(DYNAMIC_RENDER_PARAMS)
|
||||
if ((otherMode.cycleType() != G_CYC_COPY) && renderFlagCulling(rp.flags) && isFrontFace) {
|
||||
resultDepth = vertexPosition.z;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint instanceIndex = instanceRenderIndices[gConstants.renderIndex].instanceIndex;
|
||||
const float4 vertexColor = renderFlagSmoothShade(rp.flags) ? vertexSmoothColor : float4(vertexFlatColor.rgb, vertexSmoothColor.a);
|
||||
const ColorCombiner colorCombiner = { rp.ccL, rp.ccH };
|
||||
const OtherMode otherMode = { rp.omL, rp.omH };
|
||||
const bool depthClampNear = renderFlagNoN(rp.flags);
|
||||
const bool depthDecal = (otherMode.zMode() == ZMODE_DEC);
|
||||
const bool zSourcePrim = (otherMode.zSource() == G_ZS_PRIM);
|
||||
@ -269,6 +276,9 @@ void PSMain(
|
||||
#if defined(DYNAMIC_RENDER_PARAMS) || defined(VERTEX_FLAT_COLOR)
|
||||
, nointerpolation in float4 vertexFlatColor : COLOR1
|
||||
#endif
|
||||
#if defined(DYNAMIC_RENDER_PARAMS)
|
||||
, bool isFrontFace : SV_IsFrontFace
|
||||
#endif
|
||||
#if defined(MULTISAMPLING)
|
||||
, in uint sampleIndex : SV_SampleIndex
|
||||
#endif
|
||||
@ -283,6 +293,7 @@ void PSMain(
|
||||
#if !defined(VERTEX_FLAT_COLOR)
|
||||
float4 vertexFlatColor = 0.0f;
|
||||
#endif
|
||||
bool isFrontFace = false;
|
||||
#endif
|
||||
#if !defined(MULTISAMPLING)
|
||||
uint sampleIndex = 0;
|
||||
@ -295,7 +306,7 @@ void PSMain(
|
||||
float4 resultColor;
|
||||
float4 resultAlpha;
|
||||
float resultDepth;
|
||||
if (!RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, sampleIndex, resultColor, resultAlpha, resultDepth)) {
|
||||
if (!RasterPS(getRenderParams(), outputDepth, vertexPosition, vertexUV, vertexSmoothColor, vertexFlatColor, isFrontFace, sampleIndex, resultColor, resultAlpha, resultDepth)) {
|
||||
discard;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user