Support for high precision framebuffers. (#31)

* Support for high precision framebuffers.

* Missing header.

* Add detection logic for devices with low VRAM.

* Add HDR handling to shader cache dumping and parsing.

* Internal color format in configuration.
This commit is contained in:
Darío 2024-05-24 16:39:34 -03:00 committed by GitHub
parent c84eb6a984
commit 9b9237f097
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
39 changed files with 219 additions and 72 deletions

View File

@ -28,6 +28,7 @@ namespace RT64 {
j["threePointFiltering"] = cfg.threePointFiltering;
j["refreshRate"] = cfg.refreshRate;
j["refreshRateTarget"] = cfg.refreshRateTarget;
j["internalColorFormat"] = cfg.internalColorFormat;
j["idleWorkActive"] = cfg.idleWorkActive;
j["developerMode"] = cfg.developerMode;
}
@ -48,6 +49,7 @@ namespace RT64 {
cfg.threePointFiltering = j.value("threePointFiltering", defaultCfg.threePointFiltering);
cfg.refreshRate = j.value("refreshRate", defaultCfg.refreshRate);
cfg.refreshRateTarget = j.value("refreshRateTarget", defaultCfg.refreshRateTarget);
cfg.internalColorFormat = j.value("internalColorFormat", defaultCfg.internalColorFormat);
cfg.idleWorkActive = j.value("idleWorkActive", defaultCfg.idleWorkActive);
cfg.developerMode = j.value("developerMode", defaultCfg.developerMode);
}
@ -76,6 +78,7 @@ namespace RT64 {
threePointFiltering = true;
refreshRate = RefreshRate::Original;
refreshRateTarget = 60;
internalColorFormat = InternalColorFormat::Automatic;
idleWorkActive = true;
developerMode = false;
}
@ -89,6 +92,7 @@ namespace RT64 {
clampEnum<AspectRatio>(extAspectRatio);
clampEnum<Upscale2D>(upscale2D);
clampEnum<RefreshRate>(refreshRate);
clampEnum<InternalColorFormat>(internalColorFormat);
resolutionMultiplier = std::clamp<double>(resolutionMultiplier, 0.0f, ResolutionMultiplierLimit);
downsampleMultiplier = std::clamp<int>(downsampleMultiplier, 1, ResolutionMultiplierLimit);
aspectTarget = std::clamp<double>(aspectTarget, 0.1f, 100.0f);

View File

@ -63,6 +63,13 @@ namespace RT64 {
OptionCount
};
enum class InternalColorFormat {
Standard,
High,
Automatic,
OptionCount
};
GraphicsAPI graphicsAPI;
Resolution resolution;
Antialiasing antialiasing;
@ -77,6 +84,7 @@ namespace RT64 {
bool threePointFiltering;
RefreshRate refreshRate;
int refreshRateTarget;
InternalColorFormat internalColorFormat;
bool idleWorkActive;
bool developerMode;
@ -131,6 +139,12 @@ namespace RT64 {
{ UserConfiguration::RefreshRate::Manual, "Manual" }
});
NLOHMANN_JSON_SERIALIZE_ENUM(UserConfiguration::InternalColorFormat, {
{ UserConfiguration::InternalColorFormat::Standard, "Standard" },
{ UserConfiguration::InternalColorFormat::High, "High" },
{ UserConfiguration::InternalColorFormat::Automatic, "Automatic" }
});
struct ConfigurationJSON {
static bool read(UserConfiguration &cfg, std::istream &stream);
static bool write(const UserConfiguration &cfg, std::ostream &stream);

View File

@ -3058,6 +3058,7 @@ namespace RT64 {
capabilities.descriptorIndexing = true;
capabilities.scalarBlockLayout = true;
capabilities.presentWait = true;
capabilities.preferHDR = dedicatedVideoMemory > (512 * 1024 * 1024);
// Create descriptor heaps allocator.
descriptorHeapAllocator = std::make_unique<D3D12DescriptorHeapAllocator>(this, ShaderDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);

View File

@ -179,10 +179,25 @@ namespace RT64 {
workloadGraphicsWorker = std::make_unique<RenderWorker>(device.get(), "Workload Graphics", RenderCommandListType::DIRECT);
presentGraphicsWorker = std::make_unique<RenderWorker>(device.get(), "Present Graphics", RenderCommandListType::DIRECT);
swapChain = presentGraphicsWorker->commandQueue->createSwapChain(appWindow->windowHandle, 2, RenderFormat::B8G8R8A8_UNORM);
// Detect if the application should use HDR framebuffers or not.
bool usesHDR;
switch (userConfig.internalColorFormat) {
case UserConfiguration::InternalColorFormat::High:
usesHDR = true;
break;
case UserConfiguration::InternalColorFormat::Automatic:
usesHDR = device->getCapabilities().preferHDR;
break;
case UserConfiguration::InternalColorFormat::Standard:
default:
usesHDR = false;
break;
}
// Before configuring multisampling, make sure the device actually supports it for the formats we'll use. If it doesn't, turn off antialiasing in the configuration.
const RenderSampleCounts colorSampleCounts = device->getSampleCountsSupported(RenderTarget::ColorBufferFormat);
const RenderSampleCounts depthSampleCounts = device->getSampleCountsSupported(RenderTarget::DepthBufferFormat);
const RenderSampleCounts colorSampleCounts = device->getSampleCountsSupported(RenderTarget::colorBufferFormat(usesHDR));
const RenderSampleCounts depthSampleCounts = device->getSampleCountsSupported(RenderTarget::depthBufferFormat());
const RenderSampleCounts commonSampleCounts = colorSampleCounts & depthSampleCounts;
if ((commonSampleCounts & userConfig.msaaSampleCount()) == 0) {
userConfig.antialiasing = UserConfiguration::Antialiasing::None;
@ -190,7 +205,7 @@ namespace RT64 {
// Create the shader library.
const RenderMultisampling multisampling = RasterShader::generateMultisamplingPattern(userConfig.msaaSampleCount(), device->getCapabilities().sampleLocations);
shaderLibrary = std::make_unique<ShaderLibrary>();
shaderLibrary = std::make_unique<ShaderLibrary>(usesHDR);
shaderLibrary->setupCommonShaders(renderInterface.get(), device.get());
shaderLibrary->setupMultisamplingShaders(renderInterface.get(), device.get(), multisampling);
@ -230,6 +245,7 @@ namespace RT64 {
sharedQueueResources->setSwapChainSize(swapChain->getWidth(), swapChain->getHeight());
sharedQueueResources->setSwapChainRate(appWindow->getRefreshRate());
sharedQueueResources->renderTargetManager.setMultisampling(multisampling);
sharedQueueResources->renderTargetManager.setUsesHDR(usesHDR);
WorkloadQueue::External workloadExt;
workloadExt.device = device.get();

View File

@ -126,7 +126,7 @@ namespace RT64 {
# elif defined(__APPLE__)
windowHandle.window = wmInfo.info.cocoa.window;
# else
static_assert(false && "Android unimplemented");
static_assert(false && "Unimplemented");
# endif
usingSdl = true;
#else

View File

@ -99,7 +99,7 @@ namespace RT64 {
assert(worker != nullptr);
assert(src != nullptr);
FramebufferChange &changeUsed = fbChangePool.use(worker, (type == Type::Depth) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color, width, rowCount);
FramebufferChange &changeUsed = fbChangePool.use(worker, (type == Type::Depth) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color, width, rowCount, shaderLibrary->usesHDR);
uint32_t readPixels = copyRAMToNativeAndChanges(worker, changeUsed, src, rowStart, rowCount, fmt, true, shaderLibrary);
if (readPixels > 0) {
return &changeUsed;

View File

@ -33,7 +33,7 @@ namespace RT64 {
}
}
FramebufferChange &FramebufferChangePool::use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height) {
FramebufferChange &FramebufferChangePool::use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height, bool usesHDR) {
// To increase the chances of reusing buffers, we extend the width and height to a multiple of 32.
const uint32_t Alignment = 32;
uint32_t alignedWidth = ((width / Alignment) + ((width % Alignment) ? 1 : 0)) * Alignment;
@ -65,7 +65,7 @@ namespace RT64 {
RenderFormat pixelFormat;
switch (type) {
case FramebufferChange::Type::Color:
pixelFormat = RenderTarget::ColorBufferFormat;
pixelFormat = RenderTarget::colorBufferFormat(usesHDR);
break;
case FramebufferChange::Type::Depth:
pixelFormat = RenderFormat::R32_FLOAT;

View File

@ -45,7 +45,7 @@ namespace RT64 {
FramebufferChangePool();
~FramebufferChangePool();
void reset();
FramebufferChange &use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height);
FramebufferChange &use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height, bool usesHDR);
const FramebufferChange *get(uint64_t id) const;
void release(uint64_t id);
};

View File

@ -143,7 +143,7 @@ namespace RT64 {
RenderTextureFlags textureFlags = RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS;
textureFlags |= RenderTextureFlag::RENDER_TARGET;
const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(tileCopy.textureWidth, tileCopy.textureHeight, 1, RenderTarget::ColorBufferFormat, textureFlags);
const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(tileCopy.textureWidth, tileCopy.textureHeight, 1, RenderTarget::colorBufferFormat(targetManager.usesHDR), textureFlags);
tileCopy.texture = renderWorker->device->createTexture(textureDesc);
}
@ -238,7 +238,7 @@ namespace RT64 {
cmdListCopies.cmdListCopyRegions.push_back(copyRegion);
}
void FramebufferManager::reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale) {
void FramebufferManager::reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale, bool usesHDR) {
assert(tileCopies.find(op.reinterpretTile.srcId) != tileCopies.end());
// Source tile must exist.
@ -295,13 +295,13 @@ namespace RT64 {
RenderTextureFlags textureFlags = RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS;
textureFlags |= RenderTextureFlag::RENDER_TARGET;
const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(dstTile.textureWidth, dstTile.textureHeight, 1, RenderTarget::ColorBufferFormat, textureFlags);
const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(dstTile.textureWidth, dstTile.textureHeight, 1, RenderTarget::colorBufferFormat(usesHDR), textureFlags);
dstTile.texture = renderWorker->device->createTexture(textureDesc);
}
}
void FramebufferManager::reinterpretTileRecord(RenderWorker *renderWorker, const FramebufferOperation &op, TextureCache &textureCache, hlslpp::float2 resolutionScale,
uint64_t submissionFrame, CommandListReinterpretations &cmdListReinterpretations)
uint64_t submissionFrame, bool usesHDR, CommandListReinterpretations &cmdListReinterpretations)
{
assert(tileCopies.find(op.reinterpretTile.srcId) != tileCopies.end());
@ -325,6 +325,7 @@ namespace RT64 {
c.ditherOffset = dstTile.ditherOffset;
c.ditherPattern = dstTile.ditherPattern;
c.ditherRandomSeed = uint32_t(writeTimestamp) + op.reinterpretTile.dstId;
c.usesHDR = usesHDR;
dispatch.srcTexture = srcTile.texture.get();
dispatch.dstTexture = dstTile.texture.get();
@ -808,7 +809,7 @@ namespace RT64 {
Framebuffer *fb = differentFbs[i];
const uint8_t *fbRAM = &RDRAM[fb->addressStart];
const FramebufferChange::Type fbChangeType = (fb->lastWriteFmt == G_IM_FMT_DEPTH) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color;
FramebufferChange &fbChange = fbChangePool.use(renderWorker, fbChangeType, fb->width, fb->height);
FramebufferChange &fbChange = fbChangePool.use(renderWorker, fbChangeType, fb->width, fb->height, shaderLibrary->usesHDR);
const uint32_t DifferenceFractionNum = 1;
const uint32_t DifferenceFractionDiv = 4;
const uint32_t differentPixels = fb->copyRAMToNativeAndChanges(renderWorker, fbChange, fbRAM, 0, fb->height, fb->lastWriteFmt, false, shaderLibrary);
@ -883,7 +884,7 @@ namespace RT64 {
break;
}
case FramebufferOperation::Type::ReinterpretTile: {
reinterpretTileSetup(renderWorker, op, resolutionScale);
reinterpretTileSetup(renderWorker, op, resolutionScale, targetManager.usesHDR);
break;
}
default:
@ -917,7 +918,7 @@ namespace RT64 {
}
case FramebufferOperation::Type::ReinterpretTile: {
assert(textureCache != nullptr);
reinterpretTileRecord(renderWorker, op, *textureCache, resolutionScale, submissionFrame, cmdListReinterpretations);
reinterpretTileRecord(renderWorker, op, *textureCache, resolutionScale, submissionFrame, shaderLibrary->usesHDR, cmdListReinterpretations);
break;
}
default:

View File

@ -174,9 +174,9 @@ namespace RT64 {
void createTileCopyRecord(RenderWorker *renderWorker, const FramebufferOperation &op, const FramebufferStorage &fbStorage, RenderTargetManager &targetManager,
hlslpp::float2 resolutionScale, uint32_t maxFbPairIndex, CommandListCopies &cmdListCopies, const ShaderLibrary *shaderLibrary);
void reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale);
void reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale, bool usesHDR);
void reinterpretTileRecord(RenderWorker *renderWorker, const FramebufferOperation &op, TextureCache &textureCache, hlslpp::float2 resolutionScale,
uint64_t submissionFrame, CommandListReinterpretations &cmdListReinterpretations);
uint64_t submissionFrame, bool usesHDR, CommandListReinterpretations &cmdListReinterpretations);
bool makeFramebufferTile(Framebuffer *fb, uint32_t addressStart, uint32_t addressEnd, uint32_t lineWidth, uint32_t tileHeight, FramebufferTile &outTile, bool RGBA32);

View File

@ -73,6 +73,7 @@ namespace RT64 {
const RenderMultisampling multisampling = RasterShader::generateMultisamplingPattern(ext.userConfig->msaaSampleCount(), ext.device->getCapabilities().sampleLocations);
renderTargetManager.setMultisampling(multisampling);
renderTargetManager.setUsesHDR(ext.shaderLibrary->usesHDR);
}
void State::reset() {
@ -779,6 +780,7 @@ namespace RT64 {
// Fill out all the rendering data for the framebuffer pairs that will be uploaded.
const UserConfiguration::Upscale2D upscale2D = ext.userConfig->upscale2D;
const bool scaleLOD = ext.enhancementConfig->textureLOD.scale;
const bool usesHDR = ext.shaderLibrary->usesHDR;
const std::vector<uint32_t> &faceIndices = workload.drawData.faceIndices;
const std::vector<int16_t> &posShorts = workload.drawData.posShorts;
uint32_t faceIndex = uint32_t(workload.drawRanges.faceIndices.first);
@ -940,6 +942,7 @@ namespace RT64 {
flags.usesTexture0 = callDesc.colorCombiner.usesTexture(callDesc.otherMode, 0, flags.oneCycleHardwareBug);
flags.usesTexture1 = callDesc.colorCombiner.usesTexture(callDesc.otherMode, 1, flags.oneCycleHardwareBug);
flags.blenderApproximation = static_cast<unsigned>(blenderEmuReqs.approximateEmulation);
flags.usesHDR = usesHDR;
// Set whether the LOD should be scaled to the display resolution according to the configuration mode and the extended GBI flags.
const bool usesLOD = (callDesc.otherMode.textLOD() == G_TL_LOD);
@ -1440,7 +1443,7 @@ namespace RT64 {
// Set up the dummy target used for rendering the depth if no depth framebuffer is active.
if (depthFb == nullptr) {
if (dummyDepthTarget == nullptr) {
dummyDepthTarget = std::make_unique<RenderTarget>(0, Framebuffer::Type::Depth, renderTargetManager.multisampling);
dummyDepthTarget = std::make_unique<RenderTarget>(0, Framebuffer::Type::Depth, renderTargetManager.multisampling, renderTargetManager.usesHDR);
dummyDepthTarget->setupDepth(ext.framebufferGraphicsWorker, rtWidth, rtHeight);
}
@ -1883,7 +1886,8 @@ namespace RT64 {
genConfigChanged = ImGui::InputInt("Downsample Multiplier", &userConfig.downsampleMultiplier) || genConfigChanged;
ImGui::BeginDisabled(!ext.device->getCapabilities().sampleLocations);
const RenderSampleCounts sampleCountsSupported = ext.device->getSampleCountsSupported(RenderTarget::ColorBufferFormat) & ext.device->getSampleCountsSupported(RenderTarget::DepthBufferFormat);
const bool usesHDR = ext.shaderLibrary->usesHDR;
const RenderSampleCounts sampleCountsSupported = ext.device->getSampleCountsSupported(RenderTarget::colorBufferFormat(usesHDR)) & ext.device->getSampleCountsSupported(RenderTarget::depthBufferFormat());
const uint32_t antialiasingOptionCount = uint32_t(UserConfiguration::Antialiasing::OptionCount);
const char *antialiasingNames[antialiasingOptionCount] = { "None", "MSAA 2X", "MSAA 4X", "MSAA 8X" };
if (ImGui::BeginCombo("Antialiasing", antialiasingNames[uint32_t(userConfig.antialiasing)])) {
@ -1929,6 +1933,17 @@ namespace RT64 {
genConfigChanged = ImGui::InputInt("Refresh Rate Target", &userConfig.refreshRateTarget) || genConfigChanged;
}
// Store the user configuration that was used during initialization the first time we check this.
static UserConfiguration::InternalColorFormat configColorFormat = UserConfiguration::InternalColorFormat::OptionCount;
if (configColorFormat == UserConfiguration::InternalColorFormat::OptionCount) {
configColorFormat = userConfig.internalColorFormat;
}
genConfigChanged = ImGui::Combo("Color Format", reinterpret_cast<int *>(&userConfig.internalColorFormat), "Standard\0High\0Automatic\0") || genConfigChanged;
if (userConfig.internalColorFormat != configColorFormat) {
ImGui::Text("You must restart the application for this change to be applied.");
}
genConfigChanged = ImGui::Checkbox("Three-Point Filtering", &userConfig.threePointFiltering) || genConfigChanged;
genConfigChanged = ImGui::Checkbox("High Performance State", &userConfig.idleWorkActive) || genConfigChanged;
@ -2198,6 +2213,7 @@ namespace RT64 {
ImGui::Text("Scalar Block Layout: %d", capabilities.scalarBlockLayout);
ImGui::Text("Present Wait: %d", capabilities.presentWait);
ImGui::Text("Display Timing: %d", capabilities.displayTiming);
ImGui::Text("Prefer HDR: %d", capabilities.preferHDR);
ImGui::EndTabItem();
}

View File

@ -494,7 +494,7 @@ namespace RT64 {
// Set up the dummy target used for rendering the depth if no depth framebuffer is active.
if (depthFb == nullptr) {
if (dummyDepthTarget == nullptr) {
dummyDepthTarget = std::make_unique<RenderTarget>(0, Framebuffer::Type::Depth, targetManager.multisampling);
dummyDepthTarget = std::make_unique<RenderTarget>(0, Framebuffer::Type::Depth, targetManager.multisampling, targetManager.usesHDR);
dummyDepthTarget->setupDepth(ext.workloadGraphicsWorker, rtWidth, rtHeight);
}
@ -958,12 +958,13 @@ namespace RT64 {
// Create as many render targets as required to store the interpolated targets.
auto &interpolatedTargets = ext.sharedResources->interpolatedColorTargets;
const bool usingMSAA = (ext.sharedResources->renderTargetManager.multisampling.sampleCount > 1);
const bool usesHDR = ext.sharedResources->renderTargetManager.usesHDR;
uint32_t requiredFrames = (usingMSAA && generateInterpolatedFrames) ? displayFrames : (displayFrames - 1);
if ((requiredFrames > 0) && (interpolatedTargets.size() < requiredFrames)) {
uint32_t previousSize = uint32_t(interpolatedTargets.size());
interpolatedTargets.resize(requiredFrames);
for (uint32_t i = previousSize; i < requiredFrames; i++) {
interpolatedTargets[i] = std::make_unique<RenderTarget>(interpolationTargetKey.address, Framebuffer::Type::Color, RenderMultisampling());
interpolatedTargets[i] = std::make_unique<RenderTarget>(interpolationTargetKey.address, Framebuffer::Type::Color, RenderMultisampling(), usesHDR);
}
}

View File

@ -2,8 +2,8 @@
// RT64
//
#include <cstring>
#include <algorithm>
#include <cstring>
#include "common/rt64_thread.h"
@ -92,7 +92,7 @@ namespace RT64 {
// Recreate the buffer pair.
const uint64_t BlockAlignment = 256;
bufferPair.allocatedSize = std::max(((uint64_t)requiredSize * 3) / 2, BlockAlignment);
bufferPair.allocatedSize = std::max(uint64_t((requiredSize * 3) / 2), BlockAlignment);
bufferPair.allocatedSize = roundUp(bufferPair.allocatedSize, BlockAlignment);
bufferPair.uploadBuffer = worker->device->createBuffer(RenderBufferDesc::UploadBuffer(bufferPair.allocatedSize));
bufferPair.defaultBuffer = worker->device->createBuffer(RenderBufferDesc::DefaultBuffer(bufferPair.allocatedSize, u.bufferFlags));

View File

@ -456,7 +456,7 @@ namespace RT64 {
RenderTargetCopyDescriptorSet(RenderDevice *device = nullptr) {
builder.begin();
gInput = builder.addTexture(0);
gInput = builder.addTexture(1);
builder.end();
if (device != nullptr) {

View File

@ -161,6 +161,7 @@ namespace RT64 {
nativeCB.siz = siz;
nativeCB.ditherPattern = 0;
nativeCB.ditherRandomSeed = 0;
nativeCB.usesHDR = shaderLibrary->usesHDR;
// Assert for formats that have not been implemented yet because hardware verification is pending.
assert((nativeCB.siz != G_IM_SIZ_4b) && "Unimplemented 4 bits Readback mode.");
@ -247,6 +248,7 @@ namespace RT64 {
nativeCB.siz = siz;
nativeCB.ditherPattern = ditherPattern;
nativeCB.ditherRandomSeed = ditherRandomSeed;
nativeCB.usesHDR = shaderLibrary->usesHDR;
// Assert for formats that have not been implemented yet because hardware verification is pending.
assert((nativeCB.siz != G_IM_SIZ_4b) && "Unimplemented 4 bits Writeback mode.");

View File

@ -147,6 +147,7 @@ namespace RT64 {
creation.zDecal = !copyMode && (desc.otherMode.zMode() == ZMODE_DEC);
creation.cvgAdd = (desc.otherMode.cvgDst() == CVG_DST_WRAP) || (desc.otherMode.cvgDst() == CVG_DST_SAVE);
creation.NoN = desc.flags.NoN;
creation.usesHDR = desc.flags.usesHDR;
creation.specConstants = specConstants;
creation.multisampling = multisampling;
pipeline = createPipeline(creation);
@ -260,7 +261,7 @@ namespace RT64 {
RenderGraphicsPipelineDesc pipelineDesc;
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(c.usesHDR);
pipelineDesc.renderTargetCount = 1;
pipelineDesc.cullMode = c.culling ? RenderCullMode::FRONT : RenderCullMode::NONE;
pipelineDesc.depthClipEnabled = !c.NoN;
@ -418,6 +419,7 @@ namespace RT64 {
creation.vertexShader = vertexShader.get();
creation.pixelShader = pixelShader.get();
creation.NoN = true;
creation.usesHDR = shaderLibrary->usesHDR;
creation.multisampling = multisampling;
uint32_t threadIndex = 0;

View File

@ -33,6 +33,7 @@ namespace RT64 {
bool zUpd;
bool zDecal;
bool cvgAdd;
bool usesHDR;
std::vector<RenderSpecConstant> specConstants;
RenderMultisampling multisampling;
};

View File

@ -12,7 +12,7 @@ namespace RT64 {
// RasterShaderCache::OfflineList
static const uint32_t OfflineMagic = 0x43535452;
static const uint32_t OfflineVersion = 1;
static const uint32_t OfflineVersion = 2;
RasterShaderCache::OfflineList::OfflineList() {
entryIterator = entries.end();
@ -174,8 +174,11 @@ namespace RT64 {
shaderCache->offlineList.step(offlineListEntry);
// Make sure the hash hasn't been submitted yet by the game. If it hasn't, mark it as such and use this entry of the list.
// Also make sure the internal color format used by the shader is compatible.
uint64_t shaderHash = offlineListEntry.shaderDesc.hash();
if (shaderCache->shaderHashes.find(shaderHash) == shaderCache->shaderHashes.end()) {
const bool matchesColorFormat = (offlineListEntry.shaderDesc.flags.usesHDR == shaderCache->usesHDR);
const bool hashMissing = (shaderCache->shaderHashes.find(shaderHash) == shaderCache->shaderHashes.end());
if (matchesColorFormat && hashMissing) {
shaderDesc = offlineListEntry.shaderDesc;
shaderCache->shaderHashes[shaderHash] = true;
fromOfflineList = true;
@ -213,6 +216,11 @@ namespace RT64 {
const std::unique_lock<std::mutex> lock(shaderCache->offlineDumperMutex);
if (shaderCache->offlineDumper.isDumping()) {
shaderCache->offlineDumper.stepDumping(shaderDesc, dumperVsBytes, dumperPsBytes);
// Toggle the use of HDR and compile another shader.
shaderDesc.flags.usesHDR = (shaderDesc.flags.usesHDR == 0);
std::make_unique<RasterShader>(shaderCache->device, shaderDesc, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes);
shaderCache->offlineDumper.stepDumping(shaderDesc, dumperVsBytes, dumperPsBytes);
}
}
@ -256,6 +264,7 @@ namespace RT64 {
this->multisampling = multisampling;
shaderUber = std::make_unique<RasterShaderUber>(device, shaderFormat, multisampling, shaderLibrary, threadCount);
usesHDR = shaderLibrary->usesHDR;
}
void RasterShaderCache::submit(const ShaderDescription &desc) {

View File

@ -71,6 +71,7 @@ namespace RT64 {
OfflineList offlineList;
OfflineDumper offlineDumper;
std::mutex offlineDumperMutex;
bool usesHDR = false;
RasterShaderCache(uint32_t threadCount);
~RasterShaderCache();

View File

@ -8,6 +8,7 @@
#include "gbi/rt64_f3d.h"
#include "shared/rt64_fb_common.h"
#include "shared/rt64_render_target_copy.h"
#include "rt64_raster_shader.h"
@ -16,14 +17,13 @@
namespace RT64 {
// RenderTarget
const RenderFormat RenderTarget::ColorBufferFormat = RenderFormat::R8G8B8A8_UNORM;
const RenderFormat RenderTarget::DepthBufferFormat = RenderFormat::D32_FLOAT;
const long RenderTarget::MaxDimension = 0x4000L;
RenderTarget::RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling) {
RenderTarget::RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling, bool usesHDR) {
this->addressForName = addressForName;
this->type = type;
this->multisampling = multisampling;
this->usesHDR = usesHDR;
#if PRINT_CONSTRUCTOR_DESTRUCTOR
fprintf(stdout, "RenderTarget(0x%p)\n", this);
@ -80,7 +80,7 @@ namespace RT64 {
this->height = height;
downsampledTextureMultiplier = 0;
format = ColorBufferFormat;
format = colorBufferFormat(usesHDR);
RenderClearValue clearValue = RenderClearValue::Color(RenderColor(), format);
texture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, format, multisampling, &clearValue));
@ -104,7 +104,7 @@ namespace RT64 {
this->height = height;
downsampledTextureMultiplier = 0;
format = DepthBufferFormat;
format = depthBufferFormat();
RenderClearValue clearValue = RenderClearValue::Depth(RenderDepth(), RenderFormat::D32_FLOAT);
texture = worker->device->createTexture(RenderTextureDesc::DepthTarget(width, height, format, multisampling, &clearValue));
@ -117,7 +117,7 @@ namespace RT64 {
assert(worker != nullptr);
if (dummyTexture == nullptr) {
dummyTexture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, ColorBufferFormat, multisampling));
dummyTexture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, colorBufferFormat(usesHDR), multisampling));
dummyTexture->setName("Render Target Dummy");
}
}
@ -152,12 +152,12 @@ namespace RT64 {
const ShaderRecord *shaderRecord = nullptr;
bool useDummyTexture = false;
const bool srcUsesMSAA = (src->multisampling.sampleCount > 1);
if ((format == ColorBufferFormat) && (src->format == DepthBufferFormat)) {
if ((format == colorBufferFormat(usesHDR)) && (src->format == depthBufferFormat())) {
shaderRecord = srcUsesMSAA ? &shaderLibrary->rtCopyDepthToColorMS : &shaderLibrary->rtCopyDepthToColor;
requiredTextureLayout = RenderTextureLayout::COLOR_WRITE;
setupColorFramebuffer(worker);
}
else if ((format == DepthBufferFormat) && (src->format == ColorBufferFormat)) {
else if ((format == depthBufferFormat()) && (src->format == colorBufferFormat(usesHDR))) {
useDummyTexture = true;
shaderRecord = srcUsesMSAA ? &shaderLibrary->rtCopyColorToDepthMS : &shaderLibrary->rtCopyColorToDepth;
requiredTextureLayout = RenderTextureLayout::DEPTH_WRITE;
@ -183,6 +183,9 @@ namespace RT64 {
worker->commandList->barriers(RenderBarrierStage::GRAPHICS, framebufferBarriers);
worker->commandList->setFramebuffer(textureFramebuffer.get());
interop::RenderTargetCopyCB copyCB;
copyCB.usesHDR = usesHDR;
// Record the drawing command.
RenderViewport targetViewport = RenderViewport(float(x), float(y), float(width), float(height));
RenderRect targetRect(x, y, x + width, y + height);
@ -191,6 +194,7 @@ namespace RT64 {
worker->commandList->setPipeline(shaderRecord->pipeline.get());
worker->commandList->setGraphicsPipelineLayout(shaderRecord->pipelineLayout.get());
worker->commandList->setGraphicsDescriptorSet(src->targetCopyDescSet->get(), 0);
worker->commandList->setGraphicsPushConstants(0, &copyCB);
worker->commandList->setVertexBuffers(0, nullptr, 0, nullptr);
worker->commandList->drawInstanced(3, 1, 0, 0);
@ -224,12 +228,12 @@ namespace RT64 {
RenderTextureLayout requiredTextureLayout = RenderTextureLayout::UNKNOWN;
const ShaderRecord *shaderRecord = nullptr;
bool useDummyTexture = false;
if (format == ColorBufferFormat) {
if (format == colorBufferFormat(usesHDR)) {
shaderRecord = &shaderLibrary->fbChangesDrawColor;
requiredTextureLayout = RenderTextureLayout::COLOR_WRITE;
setupColorFramebuffer(worker);
}
else if (format == DepthBufferFormat) {
else if (format == depthBufferFormat()) {
useDummyTexture = true;
shaderRecord = &shaderLibrary->fbChangesDrawDepth;
requiredTextureLayout = RenderTextureLayout::DEPTH_WRITE;
@ -341,7 +345,7 @@ namespace RT64 {
uint32_t scaledWidth = std::max(width / downsampleMultiplier, 1U);
uint32_t scaledHeight = std::max(height / downsampleMultiplier, 1U);
if (downsampledTexture == nullptr) {
downsampledTexture = worker->device->createTexture(RenderTextureDesc::Texture2D(scaledWidth, scaledHeight, 1, ColorBufferFormat, RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS));
downsampledTexture = worker->device->createTexture(RenderTextureDesc::Texture2D(scaledWidth, scaledHeight, 1, colorBufferFormat(usesHDR), RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS));
downsampledTexture->setName("Render Target Downsampled");
downsampledTextureMultiplier = downsampleMultiplier;
}
@ -433,4 +437,12 @@ namespace RT64 {
resolutionScale.x = float(expandedColorWidthClamped) / float(nativeWidth);
return resolutionScale;
}
RenderFormat RenderTarget::colorBufferFormat(bool usesHDR) {
return usesHDR ? RenderFormat::R16G16B16A16_UNORM : RenderFormat::R8G8B8A8_UNORM;
}
RenderFormat RenderTarget::depthBufferFormat() {
return RenderFormat::D32_FLOAT;
}
};

View File

@ -17,8 +17,6 @@ namespace RT64 {
struct RenderWorker;
struct RenderTarget {
static const RenderFormat ColorBufferFormat;
static const RenderFormat DepthBufferFormat;
static const long MaxDimension;
std::unique_ptr<RenderTexture> texture;
@ -45,8 +43,9 @@ namespace RT64 {
int32_t misalignX = 0;
int32_t invMisalignX = 0;
bool resolvedTextureDirty = false;
bool usesHDR = false;
RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling);
RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling, bool usesHDR);
~RenderTarget();
void releaseTextures();
bool resize(RenderWorker *worker, uint32_t newWidth, uint32_t newHeight);
@ -69,5 +68,7 @@ namespace RT64 {
bool isEmpty() const;
static void computeScaledSize(uint32_t nativeWidth, uint32_t nativeHeight, hlslpp::float2 resolutionScale, uint32_t &scaledWidth, uint32_t &scaledHeight, uint32_t &misalignmentX);
static hlslpp::float2 computeFixedResolutionScale(uint32_t nativeWidth, hlslpp::float2 resolutionScale);
static RenderFormat colorBufferFormat(bool usesHDR);
static RenderFormat depthBufferFormat();
};
};

View File

@ -26,10 +26,16 @@ namespace RT64 {
// RenderTargetManager
RenderTargetManager::RenderTargetManager() { }
void RenderTargetManager::setMultisampling(const RenderMultisampling &multisampling) {
this->multisampling = multisampling;
}
void RenderTargetManager::setUsesHDR(bool usesHDR) {
this->usesHDR = usesHDR;
}
RenderTarget &RenderTargetManager::get(const RenderTargetKey &key, bool ignoreOverrides) {
const uint64_t keyHash = key.hash();
if (!ignoreOverrides) {
@ -44,7 +50,7 @@ namespace RT64 {
return *target;
}
target = std::make_unique<RenderTarget>(key.address, key.fbType, multisampling);
target = std::make_unique<RenderTarget>(key.address, key.fbType, multisampling, usesHDR);
return *target;
}

View File

@ -28,8 +28,11 @@ namespace RT64 {
std::unordered_map<uint64_t, std::unique_ptr<RenderTarget>> targetMap;
std::unordered_map<uint64_t, RenderTarget *> overrideMap;
RenderMultisampling multisampling;
bool usesHDR = false;
RenderTargetManager();
void setMultisampling(const RenderMultisampling &multisampling);
void setUsesHDR(bool usesHDR);
RenderTarget &get(const RenderTargetKey &key, bool ignoreOverrides = false);
void destroyAll();
void setOverride(const RenderTargetKey &key, RenderTarget *target);

View File

@ -5,6 +5,7 @@
#include "rt64_shader_library.h"
#include "common/rt64_common.h"
#include "shared/rt64_render_target_copy.h"
#include "shared/rt64_rsp_vertex_test_z.h"
#include "shaders/FbChangesClearCS.hlsl.spirv.h"
@ -114,7 +115,9 @@
namespace RT64 {
// ShaderLibrary
ShaderLibrary::ShaderLibrary() { }
ShaderLibrary::ShaderLibrary(bool usesHDR) {
this->usesHDR = usesHDR;
}
ShaderLibrary::~ShaderLibrary() { }
@ -476,7 +479,7 @@ namespace RT64 {
RenderGraphicsPipelineDesc pipelineDesc;
pipelineDesc.pipelineLayout = textureCopy.pipelineLayout.get();
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
pipelineDesc.renderTargetCount = 1;
pipelineDesc.vertexShader = fullScreenVertexShader.get();
pipelineDesc.pixelShader = pixelShader.get();
@ -562,7 +565,7 @@ namespace RT64 {
RenderGraphicsPipelineDesc pipelineDesc;
pipelineDesc.pipelineLayout = fbChangesDrawColor.pipelineLayout.get();
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
pipelineDesc.renderTargetCount = 1;
pipelineDesc.vertexShader = fullScreenVertexShader.get();
pipelineDesc.pixelShader = colorShader.get();
@ -584,6 +587,7 @@ namespace RT64 {
RenderTargetCopyDescriptorSet descriptorSet;
layoutBuilder.begin();
layoutBuilder.addDescriptorSet(descriptorSet);
layoutBuilder.addPushConstant(0, 0, sizeof(interop::RenderTargetCopyCB), RenderShaderStageFlag::PIXEL);
layoutBuilder.end();
rtCopyDepthToColor.pipelineLayout = layoutBuilder.create(device);
rtCopyDepthToColorMS.pipelineLayout = layoutBuilder.create(device);
@ -594,7 +598,7 @@ namespace RT64 {
std::unique_ptr<RenderShader> depthToColorMSShader = device->createShader(CREATE_SHADER_INPUTS(RtCopyDepthToColorPSMSBlobDXIL, RtCopyDepthToColorPSMSBlobSPIRV, "PSMain", shaderFormat));
RenderGraphicsPipelineDesc pipelineDesc;
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
pipelineDesc.renderTargetCount = 1;
pipelineDesc.vertexShader = fullScreenVertexShader.get();
pipelineDesc.pipelineLayout = rtCopyDepthToColor.pipelineLayout.get();
@ -633,7 +637,7 @@ namespace RT64 {
RenderGraphicsPipelineDesc pipelineDesc;
pipelineDesc.pipelineLayout = postProcess.pipelineLayout.get();
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::AlphaBlend();
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
pipelineDesc.renderTargetCount = 1;
pipelineDesc.vertexShader = fullScreenVertexShader.get();
pipelineDesc.pixelShader = pixelShader.get();
@ -658,7 +662,7 @@ namespace RT64 {
RenderGraphicsPipelineDesc pipelineDesc;
pipelineDesc.pipelineLayout = debug.pipelineLayout.get();
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::AlphaBlend();
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
pipelineDesc.renderTargetCount = 1;
pipelineDesc.vertexShader = fullScreenVertexShader.get();
pipelineDesc.pixelShader = pixelShader.get();

View File

@ -13,6 +13,8 @@ namespace RT64 {
};
struct ShaderLibrary {
bool usesHDR = false;
std::unique_ptr<RenderSampler> nearestClampSampler;
std::unique_ptr<RenderSampler> linearClampSampler;
std::unique_ptr<RenderSampler> nearestBorderSampler;
@ -59,7 +61,7 @@ namespace RT64 {
ShaderRecord videoInterfaceNearest;
ShaderRecord videoInterfacePixel;
ShaderLibrary();
ShaderLibrary(bool usesHDR);
~ShaderLibrary();
void setupCommonShaders(RenderInterface *rhi, RenderDevice *device);
void setupMultisamplingShaders(RenderInterface *rhi, RenderDevice *device, const RenderMultisampling &multisampling);

View File

@ -1544,6 +1544,9 @@ namespace RT64 {
// Present.
bool presentWait = false;
bool displayTiming = false;
// HDR.
bool preferHDR = false;
};
struct RenderInterfaceCapabilities {

View File

@ -103,10 +103,10 @@ uint Float4ToUINT8(float4 i, uint fmt, bool oddColumn) {
}
}
uint Float4ToUINT16(float4 i, uint fmt, uint dither) {
uint Float4ToUINT16(float4 i, uint fmt, uint dither, bool usesHDR) {
switch (fmt) {
case G_IM_FMT_RGBA:
return Float4ToRGBA16(i, dither);
return Float4ToRGBA16(i, dither, usesHDR);
// TODO
case G_IM_FMT_CI:
return 0;
@ -141,7 +141,7 @@ uint Float4ToUINT32(float4 i, uint fmt) {
}
}
uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither) {
uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither, bool usesHDR) {
switch (siz) {
// TODO
case G_IM_SIZ_4b:
@ -149,7 +149,7 @@ uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither) {
case G_IM_SIZ_8b:
return Float4ToUINT8(i, fmt, oddColumn);
case G_IM_SIZ_16b:
return Float4ToUINT16(i, fmt, dither);
return Float4ToUINT16(i, fmt, dither, usesHDR);
case G_IM_SIZ_32b:
return Float4ToUINT32(i, fmt);
// Invalid pixel size.

View File

@ -20,7 +20,7 @@ float4 RGBA16toCI8(float4 inputColor, uint2 inputCoord, uint2 outputCoord) {
uint2 ditherCoord = inputCoord + gConstants.ditherOffset;
uint randomSeed = initRand(gConstants.ditherRandomSeed, ditherCoord.y * gConstants.resolution.x + ditherCoord.x, 16);
uint ditherValue = DitherPatternValue(gConstants.ditherPattern, ditherCoord, randomSeed);
uint nativeColor = Float4ToRGBA16(inputColor, ditherValue);
uint nativeColor = Float4ToRGBA16(inputColor, ditherValue, gConstants.usesHDR);
// Extract the lower or upper half of the value depending on the pixel misalignment.
uint pixelMisalignment = 1 - (outputCoord.x % 2);

View File

@ -18,7 +18,7 @@ void CSMain(uint2 coord : SV_DispatchThreadID) {
bool oddColumn = (offsetCoord.x & 1);
uint randomSeed = initRand(gConstants.ditherRandomSeed, dstIndex, 16);
uint ditherValue = DitherPatternValue(gConstants.ditherPattern, offsetCoord, randomSeed);
uint nativeUint = Float4ToUINT(color, gConstants.siz, gConstants.fmt, oddColumn, ditherValue);
uint nativeUint = Float4ToUINT(color, gConstants.siz, gConstants.fmt, oddColumn, ditherValue, gConstants.usesHDR);
gOutput[dstIndex] = EndianSwapUINT(nativeUint, gConstants.siz);
}
}

View File

@ -92,11 +92,12 @@ float4 RGBA16ToFloat4(uint rgba16) {
);
}
uint Float4ToRGBA16(float4 i, uint dither) {
uint Float4ToRGBA16(float4 i, uint dither, bool usesHDR) {
const float cvgRange = usesHDR ? 65535.0f : 255.0f;
uint r = round(clamp(i.r * 255.0f, 0.0f, 255.0f));
uint g = round(clamp(i.g * 255.0f, 0.0f, 255.0f));
uint b = round(clamp(i.b * 255.0f, 0.0f, 255.0f));
int cvgModulo = round(i.a * 255.0f) % 8;
int cvgModulo = round(i.a * cvgRange) % 8;
uint a = (cvgModulo & 0x4) ? 1 : 0;
r = min(r + dither, 255) >> 3;
g = min(g + dither, 255) >> 3;

View File

@ -192,10 +192,12 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
}
// Compute coverage estimation.
float resultCvg = (8.0f / 255.0f) * (otherMode.cvgXAlpha() ? combinerColor.a : 1.0f);
const bool usesHDR = renderFlagUsesHDR(rp.flags);
const float cvgRange = usesHDR ? 65535.0f : 255.0f;
float resultCvg = (8.0f / cvgRange) * (otherMode.cvgXAlpha() ? combinerColor.a : 1.0f);
// Discard all pixels without coverage.
const float CoverageThreshold = 1.0f / 255.0f;
const float CoverageThreshold = 1.0f / cvgRange;
if (resultCvg < CoverageThreshold) {
discard;
}
@ -220,11 +222,11 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
}
// Write a full coverage value regardless of the computed coverage.
else if (otherMode.cvgDst() == CVG_DST_FULL) {
resultColor.a = 7.0f / 255.0f;
resultColor.a = 7.0f / cvgRange;
}
// Write the coverage value clamped to the full value allowed.
else if (otherMode.cvgDst() == CVG_DST_CLAMP) {
resultColor.a = min(resultCvg, 7.0f / 255.0f);
resultColor.a = min(resultCvg, 7.0f / cvgRange);
}
// Write out the computed coverage. It'll be added on wrap mode.
else {

View File

@ -2,13 +2,17 @@
// RT64
//
#include "shared/rt64_render_target_copy.h"
#include "Depth.hlsli"
#include "Formats.hlsli"
[[vk::push_constant]] ConstantBuffer<RenderTargetCopyCB> gConstants : register(b0);
#ifdef MULTISAMPLING
Texture2DMS<float4> gInput : register(t0);
Texture2DMS<float4> gInput : register(t1);
#else
Texture2D<float4> gInput : register(t0);
Texture2D<float4> gInput : register(t1);
#endif
float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sampleIndex : SV_SampleIndex, out float resultDepth : SV_DEPTH) : SV_TARGET {
@ -17,7 +21,7 @@ float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sam
#else
float4 inputColor = gInput.Load(uint3(pos.xy, 0));
#endif
uint rgba16 = Float4ToRGBA16(inputColor, 0);
uint rgba16 = Float4ToRGBA16(inputColor, 0, gConstants.usesHDR);
resultDepth = Depth16ToFloat(rgba16);
return 0.0f;
}

View File

@ -2,13 +2,17 @@
// RT64
//
#include "shared/rt64_render_target_copy.h"
#include "Depth.hlsli"
#include "Formats.hlsli"
[[vk::push_constant]] ConstantBuffer<RenderTargetCopyCB> gConstants : register(b0);
#ifdef MULTISAMPLING
Texture2DMS<float> gInput : register(t0);
Texture2DMS<float> gInput : register(t1);
#else
Texture2D<float> gInput : register(t0);
Texture2D<float> gInput : register(t1);
#endif
float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sampleIndex : SV_SampleIndex) : SV_TARGET {

View File

@ -59,7 +59,7 @@ void computeLOD(OtherMode otherMode, uint rdpTileCount, float2 primLOD, float re
}
}
float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2 texelInt, uint textureIndex, uint tlut, bool canDecodeTMEM) {
float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2 texelInt, uint textureIndex, uint tlut, bool canDecodeTMEM, bool usesHDR) {
if (rdpTile.cms & G_TX_CLAMP) {
texelInt.x = clamp(texelInt.x, 0, (round(gpuTile.tcScale.x * rdpTile.lrs) / 4) - (round(gpuTile.tcScale.x * rdpTile.uls) / 4) + round(gpuTile.tcScale.x - 1.0f));
}
@ -98,7 +98,8 @@ float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2
// Alpha channel in framebuffer textures represent the coverage. A modulo operation must be performed
// to get the value that would correspond to the alpha channel when it's sampled.
if (gpuTileFlagAlphaIsCvg(gpuTile.flags)) {
int cvgModulo = round(textureColor.a * 255.0f) % 8;
const float cvgRange = usesHDR ? 65535.0f : 255.0f;
int cvgModulo = round(textureColor.a * cvgRange) % 8;
textureColor.a = (cvgModulo & 0x4) ? 1.0f : 0.0f;
}
@ -145,13 +146,14 @@ float4 sampleTexture(OtherMode otherMode, RenderFlags renderFlags, float2 inputU
const uint tlut = otherMode.textLUT();
const bool canDecodeTMEM = renderFlagCanDecodeTMEM(renderFlags);
const bool usesHDR = renderFlagUsesHDR(renderFlags);
int2 texelBaseInt = floor(uvCoord);
float4 sample00 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 0), textureIndex, tlut, canDecodeTMEM);
float4 sample00 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 0), textureIndex, tlut, canDecodeTMEM, usesHDR);
if (filterBilerp || linearFiltering) {
float2 fracPart = uvCoord - texelBaseInt;
float4 sample01 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 1), textureIndex, tlut, canDecodeTMEM);
float4 sample10 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 0), textureIndex, tlut, canDecodeTMEM);
float4 sample11 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 1), textureIndex, tlut, canDecodeTMEM);
float4 sample01 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 1), textureIndex, tlut, canDecodeTMEM, usesHDR);
float4 sample10 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 0), textureIndex, tlut, canDecodeTMEM, usesHDR);
float4 sample11 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 1), textureIndex, tlut, canDecodeTMEM, usesHDR);
if (linearFiltering) {
return lerp(lerp(sample00, sample10, fracPart.x), lerp(sample01, sample11, fracPart.x), fracPart.y);
}

View File

@ -18,6 +18,7 @@ namespace interop {
uint siz;
uint ditherPattern;
uint ditherRandomSeed;
uint usesHDR;
};
#ifdef HLSL_CPU
};

View File

@ -20,6 +20,7 @@ namespace interop {
uint ditherPattern;
uint ditherRandomSeed;
uint2 ditherOffset;
uint usesHDR;
};
#ifdef HLSL_CPU
};

View File

@ -32,6 +32,7 @@ namespace interop {
uint usesTexture1 : 1;
uint upscale2D : 1;
uint upscaleLOD : 1;
uint usesHDR : 1;
};
uint value;
@ -119,6 +120,10 @@ namespace interop {
bool renderFlagUpscaleLOD(RenderFlags flags) {
return ((flags >> 24) & 0x1) != 0;
}
bool renderFlagUsesHDR(RenderFlags flags) {
return ((flags >> 25) & 0x1) != 0;
}
#endif
struct RenderParams {

View File

@ -0,0 +1,17 @@
//
// RT64
//
#pragma once
#include "shared/rt64_hlsl.h"
#ifdef HLSL_CPU
namespace interop {
#endif
struct RenderTargetCopyCB {
uint usesHDR;
};
#ifdef HLSL_CPU
};
#endif

View File

@ -3618,6 +3618,16 @@ namespace RT64 {
return;
}
// Find the biggest device local memory available on the device.
VkDeviceSize memoryHeapSize = 0;
const VkPhysicalDeviceMemoryProperties *memoryProps = nullptr;
vmaGetMemoryProperties(allocator, &memoryProps);
for (uint32_t i = 0; i < memoryProps->memoryHeapCount; i++) {
if (memoryProps->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
memoryHeapSize = std::max(memoryProps->memoryHeaps[i].size, memoryHeapSize);
}
}
// Fill capabilities.
capabilities.raytracing = rtSupported;
capabilities.raytracingStateUpdate = false;
@ -3626,6 +3636,7 @@ namespace RT64 {
capabilities.scalarBlockLayout = scalarBlockLayout;
capabilities.presentWait = presentWait;
capabilities.displayTiming = supportedOptionalExtensions.find(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME) != supportedOptionalExtensions.end();
capabilities.preferHDR = memoryHeapSize > (512 * 1024 * 1024);
// Fill Vulkan-only capabilities.
loadStoreOpNoneSupported = supportedOptionalExtensions.find(VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME) != supportedOptionalExtensions.end();