mirror of
https://github.com/rt64/rt64.git
synced 2025-01-30 03:32:37 +00:00
Support for high precision framebuffers. (#31)
* Support for high precision framebuffers. * Missing header. * Add detection logic for devices with low VRAM. * Add HDR handling to shader cache dumping and parsing. * Internal color format in configuration.
This commit is contained in:
parent
c84eb6a984
commit
9b9237f097
@ -28,6 +28,7 @@ namespace RT64 {
|
||||
j["threePointFiltering"] = cfg.threePointFiltering;
|
||||
j["refreshRate"] = cfg.refreshRate;
|
||||
j["refreshRateTarget"] = cfg.refreshRateTarget;
|
||||
j["internalColorFormat"] = cfg.internalColorFormat;
|
||||
j["idleWorkActive"] = cfg.idleWorkActive;
|
||||
j["developerMode"] = cfg.developerMode;
|
||||
}
|
||||
@ -48,6 +49,7 @@ namespace RT64 {
|
||||
cfg.threePointFiltering = j.value("threePointFiltering", defaultCfg.threePointFiltering);
|
||||
cfg.refreshRate = j.value("refreshRate", defaultCfg.refreshRate);
|
||||
cfg.refreshRateTarget = j.value("refreshRateTarget", defaultCfg.refreshRateTarget);
|
||||
cfg.internalColorFormat = j.value("internalColorFormat", defaultCfg.internalColorFormat);
|
||||
cfg.idleWorkActive = j.value("idleWorkActive", defaultCfg.idleWorkActive);
|
||||
cfg.developerMode = j.value("developerMode", defaultCfg.developerMode);
|
||||
}
|
||||
@ -76,6 +78,7 @@ namespace RT64 {
|
||||
threePointFiltering = true;
|
||||
refreshRate = RefreshRate::Original;
|
||||
refreshRateTarget = 60;
|
||||
internalColorFormat = InternalColorFormat::Automatic;
|
||||
idleWorkActive = true;
|
||||
developerMode = false;
|
||||
}
|
||||
@ -89,6 +92,7 @@ namespace RT64 {
|
||||
clampEnum<AspectRatio>(extAspectRatio);
|
||||
clampEnum<Upscale2D>(upscale2D);
|
||||
clampEnum<RefreshRate>(refreshRate);
|
||||
clampEnum<InternalColorFormat>(internalColorFormat);
|
||||
resolutionMultiplier = std::clamp<double>(resolutionMultiplier, 0.0f, ResolutionMultiplierLimit);
|
||||
downsampleMultiplier = std::clamp<int>(downsampleMultiplier, 1, ResolutionMultiplierLimit);
|
||||
aspectTarget = std::clamp<double>(aspectTarget, 0.1f, 100.0f);
|
||||
|
@ -63,6 +63,13 @@ namespace RT64 {
|
||||
OptionCount
|
||||
};
|
||||
|
||||
enum class InternalColorFormat {
|
||||
Standard,
|
||||
High,
|
||||
Automatic,
|
||||
OptionCount
|
||||
};
|
||||
|
||||
GraphicsAPI graphicsAPI;
|
||||
Resolution resolution;
|
||||
Antialiasing antialiasing;
|
||||
@ -77,6 +84,7 @@ namespace RT64 {
|
||||
bool threePointFiltering;
|
||||
RefreshRate refreshRate;
|
||||
int refreshRateTarget;
|
||||
InternalColorFormat internalColorFormat;
|
||||
bool idleWorkActive;
|
||||
bool developerMode;
|
||||
|
||||
@ -131,6 +139,12 @@ namespace RT64 {
|
||||
{ UserConfiguration::RefreshRate::Manual, "Manual" }
|
||||
});
|
||||
|
||||
NLOHMANN_JSON_SERIALIZE_ENUM(UserConfiguration::InternalColorFormat, {
|
||||
{ UserConfiguration::InternalColorFormat::Standard, "Standard" },
|
||||
{ UserConfiguration::InternalColorFormat::High, "High" },
|
||||
{ UserConfiguration::InternalColorFormat::Automatic, "Automatic" }
|
||||
});
|
||||
|
||||
struct ConfigurationJSON {
|
||||
static bool read(UserConfiguration &cfg, std::istream &stream);
|
||||
static bool write(const UserConfiguration &cfg, std::ostream &stream);
|
||||
|
@ -3058,6 +3058,7 @@ namespace RT64 {
|
||||
capabilities.descriptorIndexing = true;
|
||||
capabilities.scalarBlockLayout = true;
|
||||
capabilities.presentWait = true;
|
||||
capabilities.preferHDR = dedicatedVideoMemory > (512 * 1024 * 1024);
|
||||
|
||||
// Create descriptor heaps allocator.
|
||||
descriptorHeapAllocator = std::make_unique<D3D12DescriptorHeapAllocator>(this, ShaderDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
|
@ -179,10 +179,25 @@ namespace RT64 {
|
||||
workloadGraphicsWorker = std::make_unique<RenderWorker>(device.get(), "Workload Graphics", RenderCommandListType::DIRECT);
|
||||
presentGraphicsWorker = std::make_unique<RenderWorker>(device.get(), "Present Graphics", RenderCommandListType::DIRECT);
|
||||
swapChain = presentGraphicsWorker->commandQueue->createSwapChain(appWindow->windowHandle, 2, RenderFormat::B8G8R8A8_UNORM);
|
||||
|
||||
// Detect if the application should use HDR framebuffers or not.
|
||||
bool usesHDR;
|
||||
switch (userConfig.internalColorFormat) {
|
||||
case UserConfiguration::InternalColorFormat::High:
|
||||
usesHDR = true;
|
||||
break;
|
||||
case UserConfiguration::InternalColorFormat::Automatic:
|
||||
usesHDR = device->getCapabilities().preferHDR;
|
||||
break;
|
||||
case UserConfiguration::InternalColorFormat::Standard:
|
||||
default:
|
||||
usesHDR = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// Before configuring multisampling, make sure the device actually supports it for the formats we'll use. If it doesn't, turn off antialiasing in the configuration.
|
||||
const RenderSampleCounts colorSampleCounts = device->getSampleCountsSupported(RenderTarget::ColorBufferFormat);
|
||||
const RenderSampleCounts depthSampleCounts = device->getSampleCountsSupported(RenderTarget::DepthBufferFormat);
|
||||
const RenderSampleCounts colorSampleCounts = device->getSampleCountsSupported(RenderTarget::colorBufferFormat(usesHDR));
|
||||
const RenderSampleCounts depthSampleCounts = device->getSampleCountsSupported(RenderTarget::depthBufferFormat());
|
||||
const RenderSampleCounts commonSampleCounts = colorSampleCounts & depthSampleCounts;
|
||||
if ((commonSampleCounts & userConfig.msaaSampleCount()) == 0) {
|
||||
userConfig.antialiasing = UserConfiguration::Antialiasing::None;
|
||||
@ -190,7 +205,7 @@ namespace RT64 {
|
||||
|
||||
// Create the shader library.
|
||||
const RenderMultisampling multisampling = RasterShader::generateMultisamplingPattern(userConfig.msaaSampleCount(), device->getCapabilities().sampleLocations);
|
||||
shaderLibrary = std::make_unique<ShaderLibrary>();
|
||||
shaderLibrary = std::make_unique<ShaderLibrary>(usesHDR);
|
||||
shaderLibrary->setupCommonShaders(renderInterface.get(), device.get());
|
||||
shaderLibrary->setupMultisamplingShaders(renderInterface.get(), device.get(), multisampling);
|
||||
|
||||
@ -230,6 +245,7 @@ namespace RT64 {
|
||||
sharedQueueResources->setSwapChainSize(swapChain->getWidth(), swapChain->getHeight());
|
||||
sharedQueueResources->setSwapChainRate(appWindow->getRefreshRate());
|
||||
sharedQueueResources->renderTargetManager.setMultisampling(multisampling);
|
||||
sharedQueueResources->renderTargetManager.setUsesHDR(usesHDR);
|
||||
|
||||
WorkloadQueue::External workloadExt;
|
||||
workloadExt.device = device.get();
|
||||
|
@ -126,7 +126,7 @@ namespace RT64 {
|
||||
# elif defined(__APPLE__)
|
||||
windowHandle.window = wmInfo.info.cocoa.window;
|
||||
# else
|
||||
static_assert(false && "Android unimplemented");
|
||||
static_assert(false && "Unimplemented");
|
||||
# endif
|
||||
usingSdl = true;
|
||||
#else
|
||||
|
@ -99,7 +99,7 @@ namespace RT64 {
|
||||
assert(worker != nullptr);
|
||||
assert(src != nullptr);
|
||||
|
||||
FramebufferChange &changeUsed = fbChangePool.use(worker, (type == Type::Depth) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color, width, rowCount);
|
||||
FramebufferChange &changeUsed = fbChangePool.use(worker, (type == Type::Depth) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color, width, rowCount, shaderLibrary->usesHDR);
|
||||
uint32_t readPixels = copyRAMToNativeAndChanges(worker, changeUsed, src, rowStart, rowCount, fmt, true, shaderLibrary);
|
||||
if (readPixels > 0) {
|
||||
return &changeUsed;
|
||||
|
@ -33,7 +33,7 @@ namespace RT64 {
|
||||
}
|
||||
}
|
||||
|
||||
FramebufferChange &FramebufferChangePool::use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height) {
|
||||
FramebufferChange &FramebufferChangePool::use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height, bool usesHDR) {
|
||||
// To increase the chances of reusing buffers, we extend the width and height to a multiple of 32.
|
||||
const uint32_t Alignment = 32;
|
||||
uint32_t alignedWidth = ((width / Alignment) + ((width % Alignment) ? 1 : 0)) * Alignment;
|
||||
@ -65,7 +65,7 @@ namespace RT64 {
|
||||
RenderFormat pixelFormat;
|
||||
switch (type) {
|
||||
case FramebufferChange::Type::Color:
|
||||
pixelFormat = RenderTarget::ColorBufferFormat;
|
||||
pixelFormat = RenderTarget::colorBufferFormat(usesHDR);
|
||||
break;
|
||||
case FramebufferChange::Type::Depth:
|
||||
pixelFormat = RenderFormat::R32_FLOAT;
|
||||
|
@ -45,7 +45,7 @@ namespace RT64 {
|
||||
FramebufferChangePool();
|
||||
~FramebufferChangePool();
|
||||
void reset();
|
||||
FramebufferChange &use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height);
|
||||
FramebufferChange &use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height, bool usesHDR);
|
||||
const FramebufferChange *get(uint64_t id) const;
|
||||
void release(uint64_t id);
|
||||
};
|
||||
|
@ -143,7 +143,7 @@ namespace RT64 {
|
||||
RenderTextureFlags textureFlags = RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS;
|
||||
textureFlags |= RenderTextureFlag::RENDER_TARGET;
|
||||
|
||||
const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(tileCopy.textureWidth, tileCopy.textureHeight, 1, RenderTarget::ColorBufferFormat, textureFlags);
|
||||
const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(tileCopy.textureWidth, tileCopy.textureHeight, 1, RenderTarget::colorBufferFormat(targetManager.usesHDR), textureFlags);
|
||||
tileCopy.texture = renderWorker->device->createTexture(textureDesc);
|
||||
}
|
||||
|
||||
@ -238,7 +238,7 @@ namespace RT64 {
|
||||
cmdListCopies.cmdListCopyRegions.push_back(copyRegion);
|
||||
}
|
||||
|
||||
void FramebufferManager::reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale) {
|
||||
void FramebufferManager::reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale, bool usesHDR) {
|
||||
assert(tileCopies.find(op.reinterpretTile.srcId) != tileCopies.end());
|
||||
|
||||
// Source tile must exist.
|
||||
@ -295,13 +295,13 @@ namespace RT64 {
|
||||
RenderTextureFlags textureFlags = RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS;
|
||||
textureFlags |= RenderTextureFlag::RENDER_TARGET;
|
||||
|
||||
const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(dstTile.textureWidth, dstTile.textureHeight, 1, RenderTarget::ColorBufferFormat, textureFlags);
|
||||
const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(dstTile.textureWidth, dstTile.textureHeight, 1, RenderTarget::colorBufferFormat(usesHDR), textureFlags);
|
||||
dstTile.texture = renderWorker->device->createTexture(textureDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void FramebufferManager::reinterpretTileRecord(RenderWorker *renderWorker, const FramebufferOperation &op, TextureCache &textureCache, hlslpp::float2 resolutionScale,
|
||||
uint64_t submissionFrame, CommandListReinterpretations &cmdListReinterpretations)
|
||||
uint64_t submissionFrame, bool usesHDR, CommandListReinterpretations &cmdListReinterpretations)
|
||||
{
|
||||
assert(tileCopies.find(op.reinterpretTile.srcId) != tileCopies.end());
|
||||
|
||||
@ -325,6 +325,7 @@ namespace RT64 {
|
||||
c.ditherOffset = dstTile.ditherOffset;
|
||||
c.ditherPattern = dstTile.ditherPattern;
|
||||
c.ditherRandomSeed = uint32_t(writeTimestamp) + op.reinterpretTile.dstId;
|
||||
c.usesHDR = usesHDR;
|
||||
dispatch.srcTexture = srcTile.texture.get();
|
||||
dispatch.dstTexture = dstTile.texture.get();
|
||||
|
||||
@ -808,7 +809,7 @@ namespace RT64 {
|
||||
Framebuffer *fb = differentFbs[i];
|
||||
const uint8_t *fbRAM = &RDRAM[fb->addressStart];
|
||||
const FramebufferChange::Type fbChangeType = (fb->lastWriteFmt == G_IM_FMT_DEPTH) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color;
|
||||
FramebufferChange &fbChange = fbChangePool.use(renderWorker, fbChangeType, fb->width, fb->height);
|
||||
FramebufferChange &fbChange = fbChangePool.use(renderWorker, fbChangeType, fb->width, fb->height, shaderLibrary->usesHDR);
|
||||
const uint32_t DifferenceFractionNum = 1;
|
||||
const uint32_t DifferenceFractionDiv = 4;
|
||||
const uint32_t differentPixels = fb->copyRAMToNativeAndChanges(renderWorker, fbChange, fbRAM, 0, fb->height, fb->lastWriteFmt, false, shaderLibrary);
|
||||
@ -883,7 +884,7 @@ namespace RT64 {
|
||||
break;
|
||||
}
|
||||
case FramebufferOperation::Type::ReinterpretTile: {
|
||||
reinterpretTileSetup(renderWorker, op, resolutionScale);
|
||||
reinterpretTileSetup(renderWorker, op, resolutionScale, targetManager.usesHDR);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -917,7 +918,7 @@ namespace RT64 {
|
||||
}
|
||||
case FramebufferOperation::Type::ReinterpretTile: {
|
||||
assert(textureCache != nullptr);
|
||||
reinterpretTileRecord(renderWorker, op, *textureCache, resolutionScale, submissionFrame, cmdListReinterpretations);
|
||||
reinterpretTileRecord(renderWorker, op, *textureCache, resolutionScale, submissionFrame, shaderLibrary->usesHDR, cmdListReinterpretations);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -174,9 +174,9 @@ namespace RT64 {
|
||||
void createTileCopyRecord(RenderWorker *renderWorker, const FramebufferOperation &op, const FramebufferStorage &fbStorage, RenderTargetManager &targetManager,
|
||||
hlslpp::float2 resolutionScale, uint32_t maxFbPairIndex, CommandListCopies &cmdListCopies, const ShaderLibrary *shaderLibrary);
|
||||
|
||||
void reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale);
|
||||
void reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale, bool usesHDR);
|
||||
void reinterpretTileRecord(RenderWorker *renderWorker, const FramebufferOperation &op, TextureCache &textureCache, hlslpp::float2 resolutionScale,
|
||||
uint64_t submissionFrame, CommandListReinterpretations &cmdListReinterpretations);
|
||||
uint64_t submissionFrame, bool usesHDR, CommandListReinterpretations &cmdListReinterpretations);
|
||||
|
||||
bool makeFramebufferTile(Framebuffer *fb, uint32_t addressStart, uint32_t addressEnd, uint32_t lineWidth, uint32_t tileHeight, FramebufferTile &outTile, bool RGBA32);
|
||||
|
||||
|
@ -73,6 +73,7 @@ namespace RT64 {
|
||||
|
||||
const RenderMultisampling multisampling = RasterShader::generateMultisamplingPattern(ext.userConfig->msaaSampleCount(), ext.device->getCapabilities().sampleLocations);
|
||||
renderTargetManager.setMultisampling(multisampling);
|
||||
renderTargetManager.setUsesHDR(ext.shaderLibrary->usesHDR);
|
||||
}
|
||||
|
||||
void State::reset() {
|
||||
@ -779,6 +780,7 @@ namespace RT64 {
|
||||
// Fill out all the rendering data for the framebuffer pairs that will be uploaded.
|
||||
const UserConfiguration::Upscale2D upscale2D = ext.userConfig->upscale2D;
|
||||
const bool scaleLOD = ext.enhancementConfig->textureLOD.scale;
|
||||
const bool usesHDR = ext.shaderLibrary->usesHDR;
|
||||
const std::vector<uint32_t> &faceIndices = workload.drawData.faceIndices;
|
||||
const std::vector<int16_t> &posShorts = workload.drawData.posShorts;
|
||||
uint32_t faceIndex = uint32_t(workload.drawRanges.faceIndices.first);
|
||||
@ -940,6 +942,7 @@ namespace RT64 {
|
||||
flags.usesTexture0 = callDesc.colorCombiner.usesTexture(callDesc.otherMode, 0, flags.oneCycleHardwareBug);
|
||||
flags.usesTexture1 = callDesc.colorCombiner.usesTexture(callDesc.otherMode, 1, flags.oneCycleHardwareBug);
|
||||
flags.blenderApproximation = static_cast<unsigned>(blenderEmuReqs.approximateEmulation);
|
||||
flags.usesHDR = usesHDR;
|
||||
|
||||
// Set whether the LOD should be scaled to the display resolution according to the configuration mode and the extended GBI flags.
|
||||
const bool usesLOD = (callDesc.otherMode.textLOD() == G_TL_LOD);
|
||||
@ -1440,7 +1443,7 @@ namespace RT64 {
|
||||
// Set up the dummy target used for rendering the depth if no depth framebuffer is active.
|
||||
if (depthFb == nullptr) {
|
||||
if (dummyDepthTarget == nullptr) {
|
||||
dummyDepthTarget = std::make_unique<RenderTarget>(0, Framebuffer::Type::Depth, renderTargetManager.multisampling);
|
||||
dummyDepthTarget = std::make_unique<RenderTarget>(0, Framebuffer::Type::Depth, renderTargetManager.multisampling, renderTargetManager.usesHDR);
|
||||
dummyDepthTarget->setupDepth(ext.framebufferGraphicsWorker, rtWidth, rtHeight);
|
||||
}
|
||||
|
||||
@ -1883,7 +1886,8 @@ namespace RT64 {
|
||||
genConfigChanged = ImGui::InputInt("Downsample Multiplier", &userConfig.downsampleMultiplier) || genConfigChanged;
|
||||
|
||||
ImGui::BeginDisabled(!ext.device->getCapabilities().sampleLocations);
|
||||
const RenderSampleCounts sampleCountsSupported = ext.device->getSampleCountsSupported(RenderTarget::ColorBufferFormat) & ext.device->getSampleCountsSupported(RenderTarget::DepthBufferFormat);
|
||||
const bool usesHDR = ext.shaderLibrary->usesHDR;
|
||||
const RenderSampleCounts sampleCountsSupported = ext.device->getSampleCountsSupported(RenderTarget::colorBufferFormat(usesHDR)) & ext.device->getSampleCountsSupported(RenderTarget::depthBufferFormat());
|
||||
const uint32_t antialiasingOptionCount = uint32_t(UserConfiguration::Antialiasing::OptionCount);
|
||||
const char *antialiasingNames[antialiasingOptionCount] = { "None", "MSAA 2X", "MSAA 4X", "MSAA 8X" };
|
||||
if (ImGui::BeginCombo("Antialiasing", antialiasingNames[uint32_t(userConfig.antialiasing)])) {
|
||||
@ -1929,6 +1933,17 @@ namespace RT64 {
|
||||
genConfigChanged = ImGui::InputInt("Refresh Rate Target", &userConfig.refreshRateTarget) || genConfigChanged;
|
||||
}
|
||||
|
||||
// Store the user configuration that was used during initialization the first time we check this.
|
||||
static UserConfiguration::InternalColorFormat configColorFormat = UserConfiguration::InternalColorFormat::OptionCount;
|
||||
if (configColorFormat == UserConfiguration::InternalColorFormat::OptionCount) {
|
||||
configColorFormat = userConfig.internalColorFormat;
|
||||
}
|
||||
|
||||
genConfigChanged = ImGui::Combo("Color Format", reinterpret_cast<int *>(&userConfig.internalColorFormat), "Standard\0High\0Automatic\0") || genConfigChanged;
|
||||
if (userConfig.internalColorFormat != configColorFormat) {
|
||||
ImGui::Text("You must restart the application for this change to be applied.");
|
||||
}
|
||||
|
||||
genConfigChanged = ImGui::Checkbox("Three-Point Filtering", &userConfig.threePointFiltering) || genConfigChanged;
|
||||
genConfigChanged = ImGui::Checkbox("High Performance State", &userConfig.idleWorkActive) || genConfigChanged;
|
||||
|
||||
@ -2198,6 +2213,7 @@ namespace RT64 {
|
||||
ImGui::Text("Scalar Block Layout: %d", capabilities.scalarBlockLayout);
|
||||
ImGui::Text("Present Wait: %d", capabilities.presentWait);
|
||||
ImGui::Text("Display Timing: %d", capabilities.displayTiming);
|
||||
ImGui::Text("Prefer HDR: %d", capabilities.preferHDR);
|
||||
ImGui::EndTabItem();
|
||||
}
|
||||
|
||||
|
@ -494,7 +494,7 @@ namespace RT64 {
|
||||
// Set up the dummy target used for rendering the depth if no depth framebuffer is active.
|
||||
if (depthFb == nullptr) {
|
||||
if (dummyDepthTarget == nullptr) {
|
||||
dummyDepthTarget = std::make_unique<RenderTarget>(0, Framebuffer::Type::Depth, targetManager.multisampling);
|
||||
dummyDepthTarget = std::make_unique<RenderTarget>(0, Framebuffer::Type::Depth, targetManager.multisampling, targetManager.usesHDR);
|
||||
dummyDepthTarget->setupDepth(ext.workloadGraphicsWorker, rtWidth, rtHeight);
|
||||
}
|
||||
|
||||
@ -958,12 +958,13 @@ namespace RT64 {
|
||||
// Create as many render targets as required to store the interpolated targets.
|
||||
auto &interpolatedTargets = ext.sharedResources->interpolatedColorTargets;
|
||||
const bool usingMSAA = (ext.sharedResources->renderTargetManager.multisampling.sampleCount > 1);
|
||||
const bool usesHDR = ext.sharedResources->renderTargetManager.usesHDR;
|
||||
uint32_t requiredFrames = (usingMSAA && generateInterpolatedFrames) ? displayFrames : (displayFrames - 1);
|
||||
if ((requiredFrames > 0) && (interpolatedTargets.size() < requiredFrames)) {
|
||||
uint32_t previousSize = uint32_t(interpolatedTargets.size());
|
||||
interpolatedTargets.resize(requiredFrames);
|
||||
for (uint32_t i = previousSize; i < requiredFrames; i++) {
|
||||
interpolatedTargets[i] = std::make_unique<RenderTarget>(interpolationTargetKey.address, Framebuffer::Type::Color, RenderMultisampling());
|
||||
interpolatedTargets[i] = std::make_unique<RenderTarget>(interpolationTargetKey.address, Framebuffer::Type::Color, RenderMultisampling(), usesHDR);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// RT64
|
||||
//
|
||||
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "common/rt64_thread.h"
|
||||
|
||||
@ -92,7 +92,7 @@ namespace RT64 {
|
||||
|
||||
// Recreate the buffer pair.
|
||||
const uint64_t BlockAlignment = 256;
|
||||
bufferPair.allocatedSize = std::max(((uint64_t)requiredSize * 3) / 2, BlockAlignment);
|
||||
bufferPair.allocatedSize = std::max(uint64_t((requiredSize * 3) / 2), BlockAlignment);
|
||||
bufferPair.allocatedSize = roundUp(bufferPair.allocatedSize, BlockAlignment);
|
||||
bufferPair.uploadBuffer = worker->device->createBuffer(RenderBufferDesc::UploadBuffer(bufferPair.allocatedSize));
|
||||
bufferPair.defaultBuffer = worker->device->createBuffer(RenderBufferDesc::DefaultBuffer(bufferPair.allocatedSize, u.bufferFlags));
|
||||
|
@ -456,7 +456,7 @@ namespace RT64 {
|
||||
|
||||
RenderTargetCopyDescriptorSet(RenderDevice *device = nullptr) {
|
||||
builder.begin();
|
||||
gInput = builder.addTexture(0);
|
||||
gInput = builder.addTexture(1);
|
||||
builder.end();
|
||||
|
||||
if (device != nullptr) {
|
||||
|
@ -161,6 +161,7 @@ namespace RT64 {
|
||||
nativeCB.siz = siz;
|
||||
nativeCB.ditherPattern = 0;
|
||||
nativeCB.ditherRandomSeed = 0;
|
||||
nativeCB.usesHDR = shaderLibrary->usesHDR;
|
||||
|
||||
// Assert for formats that have not been implemented yet because hardware verification is pending.
|
||||
assert((nativeCB.siz != G_IM_SIZ_4b) && "Unimplemented 4 bits Readback mode.");
|
||||
@ -247,6 +248,7 @@ namespace RT64 {
|
||||
nativeCB.siz = siz;
|
||||
nativeCB.ditherPattern = ditherPattern;
|
||||
nativeCB.ditherRandomSeed = ditherRandomSeed;
|
||||
nativeCB.usesHDR = shaderLibrary->usesHDR;
|
||||
|
||||
// Assert for formats that have not been implemented yet because hardware verification is pending.
|
||||
assert((nativeCB.siz != G_IM_SIZ_4b) && "Unimplemented 4 bits Writeback mode.");
|
||||
|
@ -147,6 +147,7 @@ namespace RT64 {
|
||||
creation.zDecal = !copyMode && (desc.otherMode.zMode() == ZMODE_DEC);
|
||||
creation.cvgAdd = (desc.otherMode.cvgDst() == CVG_DST_WRAP) || (desc.otherMode.cvgDst() == CVG_DST_SAVE);
|
||||
creation.NoN = desc.flags.NoN;
|
||||
creation.usesHDR = desc.flags.usesHDR;
|
||||
creation.specConstants = specConstants;
|
||||
creation.multisampling = multisampling;
|
||||
pipeline = createPipeline(creation);
|
||||
@ -260,7 +261,7 @@ namespace RT64 {
|
||||
|
||||
RenderGraphicsPipelineDesc pipelineDesc;
|
||||
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(c.usesHDR);
|
||||
pipelineDesc.renderTargetCount = 1;
|
||||
pipelineDesc.cullMode = c.culling ? RenderCullMode::FRONT : RenderCullMode::NONE;
|
||||
pipelineDesc.depthClipEnabled = !c.NoN;
|
||||
@ -418,6 +419,7 @@ namespace RT64 {
|
||||
creation.vertexShader = vertexShader.get();
|
||||
creation.pixelShader = pixelShader.get();
|
||||
creation.NoN = true;
|
||||
creation.usesHDR = shaderLibrary->usesHDR;
|
||||
creation.multisampling = multisampling;
|
||||
|
||||
uint32_t threadIndex = 0;
|
||||
|
@ -33,6 +33,7 @@ namespace RT64 {
|
||||
bool zUpd;
|
||||
bool zDecal;
|
||||
bool cvgAdd;
|
||||
bool usesHDR;
|
||||
std::vector<RenderSpecConstant> specConstants;
|
||||
RenderMultisampling multisampling;
|
||||
};
|
||||
|
@ -12,7 +12,7 @@ namespace RT64 {
|
||||
// RasterShaderCache::OfflineList
|
||||
|
||||
static const uint32_t OfflineMagic = 0x43535452;
|
||||
static const uint32_t OfflineVersion = 1;
|
||||
static const uint32_t OfflineVersion = 2;
|
||||
|
||||
RasterShaderCache::OfflineList::OfflineList() {
|
||||
entryIterator = entries.end();
|
||||
@ -174,8 +174,11 @@ namespace RT64 {
|
||||
shaderCache->offlineList.step(offlineListEntry);
|
||||
|
||||
// Make sure the hash hasn't been submitted yet by the game. If it hasn't, mark it as such and use this entry of the list.
|
||||
// Also make sure the internal color format used by the shader is compatible.
|
||||
uint64_t shaderHash = offlineListEntry.shaderDesc.hash();
|
||||
if (shaderCache->shaderHashes.find(shaderHash) == shaderCache->shaderHashes.end()) {
|
||||
const bool matchesColorFormat = (offlineListEntry.shaderDesc.flags.usesHDR == shaderCache->usesHDR);
|
||||
const bool hashMissing = (shaderCache->shaderHashes.find(shaderHash) == shaderCache->shaderHashes.end());
|
||||
if (matchesColorFormat && hashMissing) {
|
||||
shaderDesc = offlineListEntry.shaderDesc;
|
||||
shaderCache->shaderHashes[shaderHash] = true;
|
||||
fromOfflineList = true;
|
||||
@ -213,6 +216,11 @@ namespace RT64 {
|
||||
const std::unique_lock<std::mutex> lock(shaderCache->offlineDumperMutex);
|
||||
if (shaderCache->offlineDumper.isDumping()) {
|
||||
shaderCache->offlineDumper.stepDumping(shaderDesc, dumperVsBytes, dumperPsBytes);
|
||||
|
||||
// Toggle the use of HDR and compile another shader.
|
||||
shaderDesc.flags.usesHDR = (shaderDesc.flags.usesHDR == 0);
|
||||
std::make_unique<RasterShader>(shaderCache->device, shaderDesc, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes);
|
||||
shaderCache->offlineDumper.stepDumping(shaderDesc, dumperVsBytes, dumperPsBytes);
|
||||
}
|
||||
}
|
||||
|
||||
@ -256,6 +264,7 @@ namespace RT64 {
|
||||
this->multisampling = multisampling;
|
||||
|
||||
shaderUber = std::make_unique<RasterShaderUber>(device, shaderFormat, multisampling, shaderLibrary, threadCount);
|
||||
usesHDR = shaderLibrary->usesHDR;
|
||||
}
|
||||
|
||||
void RasterShaderCache::submit(const ShaderDescription &desc) {
|
||||
|
@ -71,6 +71,7 @@ namespace RT64 {
|
||||
OfflineList offlineList;
|
||||
OfflineDumper offlineDumper;
|
||||
std::mutex offlineDumperMutex;
|
||||
bool usesHDR = false;
|
||||
|
||||
RasterShaderCache(uint32_t threadCount);
|
||||
~RasterShaderCache();
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "gbi/rt64_f3d.h"
|
||||
#include "shared/rt64_fb_common.h"
|
||||
#include "shared/rt64_render_target_copy.h"
|
||||
|
||||
#include "rt64_raster_shader.h"
|
||||
|
||||
@ -16,14 +17,13 @@
|
||||
namespace RT64 {
|
||||
// RenderTarget
|
||||
|
||||
const RenderFormat RenderTarget::ColorBufferFormat = RenderFormat::R8G8B8A8_UNORM;
|
||||
const RenderFormat RenderTarget::DepthBufferFormat = RenderFormat::D32_FLOAT;
|
||||
const long RenderTarget::MaxDimension = 0x4000L;
|
||||
|
||||
RenderTarget::RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling) {
|
||||
RenderTarget::RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling, bool usesHDR) {
|
||||
this->addressForName = addressForName;
|
||||
this->type = type;
|
||||
this->multisampling = multisampling;
|
||||
this->usesHDR = usesHDR;
|
||||
|
||||
#if PRINT_CONSTRUCTOR_DESTRUCTOR
|
||||
fprintf(stdout, "RenderTarget(0x%p)\n", this);
|
||||
@ -80,7 +80,7 @@ namespace RT64 {
|
||||
this->height = height;
|
||||
|
||||
downsampledTextureMultiplier = 0;
|
||||
format = ColorBufferFormat;
|
||||
format = colorBufferFormat(usesHDR);
|
||||
|
||||
RenderClearValue clearValue = RenderClearValue::Color(RenderColor(), format);
|
||||
texture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, format, multisampling, &clearValue));
|
||||
@ -104,7 +104,7 @@ namespace RT64 {
|
||||
this->height = height;
|
||||
|
||||
downsampledTextureMultiplier = 0;
|
||||
format = DepthBufferFormat;
|
||||
format = depthBufferFormat();
|
||||
|
||||
RenderClearValue clearValue = RenderClearValue::Depth(RenderDepth(), RenderFormat::D32_FLOAT);
|
||||
texture = worker->device->createTexture(RenderTextureDesc::DepthTarget(width, height, format, multisampling, &clearValue));
|
||||
@ -117,7 +117,7 @@ namespace RT64 {
|
||||
assert(worker != nullptr);
|
||||
|
||||
if (dummyTexture == nullptr) {
|
||||
dummyTexture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, ColorBufferFormat, multisampling));
|
||||
dummyTexture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, colorBufferFormat(usesHDR), multisampling));
|
||||
dummyTexture->setName("Render Target Dummy");
|
||||
}
|
||||
}
|
||||
@ -152,12 +152,12 @@ namespace RT64 {
|
||||
const ShaderRecord *shaderRecord = nullptr;
|
||||
bool useDummyTexture = false;
|
||||
const bool srcUsesMSAA = (src->multisampling.sampleCount > 1);
|
||||
if ((format == ColorBufferFormat) && (src->format == DepthBufferFormat)) {
|
||||
if ((format == colorBufferFormat(usesHDR)) && (src->format == depthBufferFormat())) {
|
||||
shaderRecord = srcUsesMSAA ? &shaderLibrary->rtCopyDepthToColorMS : &shaderLibrary->rtCopyDepthToColor;
|
||||
requiredTextureLayout = RenderTextureLayout::COLOR_WRITE;
|
||||
setupColorFramebuffer(worker);
|
||||
}
|
||||
else if ((format == DepthBufferFormat) && (src->format == ColorBufferFormat)) {
|
||||
else if ((format == depthBufferFormat()) && (src->format == colorBufferFormat(usesHDR))) {
|
||||
useDummyTexture = true;
|
||||
shaderRecord = srcUsesMSAA ? &shaderLibrary->rtCopyColorToDepthMS : &shaderLibrary->rtCopyColorToDepth;
|
||||
requiredTextureLayout = RenderTextureLayout::DEPTH_WRITE;
|
||||
@ -183,6 +183,9 @@ namespace RT64 {
|
||||
worker->commandList->barriers(RenderBarrierStage::GRAPHICS, framebufferBarriers);
|
||||
worker->commandList->setFramebuffer(textureFramebuffer.get());
|
||||
|
||||
interop::RenderTargetCopyCB copyCB;
|
||||
copyCB.usesHDR = usesHDR;
|
||||
|
||||
// Record the drawing command.
|
||||
RenderViewport targetViewport = RenderViewport(float(x), float(y), float(width), float(height));
|
||||
RenderRect targetRect(x, y, x + width, y + height);
|
||||
@ -191,6 +194,7 @@ namespace RT64 {
|
||||
worker->commandList->setPipeline(shaderRecord->pipeline.get());
|
||||
worker->commandList->setGraphicsPipelineLayout(shaderRecord->pipelineLayout.get());
|
||||
worker->commandList->setGraphicsDescriptorSet(src->targetCopyDescSet->get(), 0);
|
||||
worker->commandList->setGraphicsPushConstants(0, ©CB);
|
||||
worker->commandList->setVertexBuffers(0, nullptr, 0, nullptr);
|
||||
worker->commandList->drawInstanced(3, 1, 0, 0);
|
||||
|
||||
@ -224,12 +228,12 @@ namespace RT64 {
|
||||
RenderTextureLayout requiredTextureLayout = RenderTextureLayout::UNKNOWN;
|
||||
const ShaderRecord *shaderRecord = nullptr;
|
||||
bool useDummyTexture = false;
|
||||
if (format == ColorBufferFormat) {
|
||||
if (format == colorBufferFormat(usesHDR)) {
|
||||
shaderRecord = &shaderLibrary->fbChangesDrawColor;
|
||||
requiredTextureLayout = RenderTextureLayout::COLOR_WRITE;
|
||||
setupColorFramebuffer(worker);
|
||||
}
|
||||
else if (format == DepthBufferFormat) {
|
||||
else if (format == depthBufferFormat()) {
|
||||
useDummyTexture = true;
|
||||
shaderRecord = &shaderLibrary->fbChangesDrawDepth;
|
||||
requiredTextureLayout = RenderTextureLayout::DEPTH_WRITE;
|
||||
@ -341,7 +345,7 @@ namespace RT64 {
|
||||
uint32_t scaledWidth = std::max(width / downsampleMultiplier, 1U);
|
||||
uint32_t scaledHeight = std::max(height / downsampleMultiplier, 1U);
|
||||
if (downsampledTexture == nullptr) {
|
||||
downsampledTexture = worker->device->createTexture(RenderTextureDesc::Texture2D(scaledWidth, scaledHeight, 1, ColorBufferFormat, RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS));
|
||||
downsampledTexture = worker->device->createTexture(RenderTextureDesc::Texture2D(scaledWidth, scaledHeight, 1, colorBufferFormat(usesHDR), RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS));
|
||||
downsampledTexture->setName("Render Target Downsampled");
|
||||
downsampledTextureMultiplier = downsampleMultiplier;
|
||||
}
|
||||
@ -433,4 +437,12 @@ namespace RT64 {
|
||||
resolutionScale.x = float(expandedColorWidthClamped) / float(nativeWidth);
|
||||
return resolutionScale;
|
||||
}
|
||||
|
||||
RenderFormat RenderTarget::colorBufferFormat(bool usesHDR) {
|
||||
return usesHDR ? RenderFormat::R16G16B16A16_UNORM : RenderFormat::R8G8B8A8_UNORM;
|
||||
}
|
||||
|
||||
RenderFormat RenderTarget::depthBufferFormat() {
|
||||
return RenderFormat::D32_FLOAT;
|
||||
}
|
||||
};
|
@ -17,8 +17,6 @@ namespace RT64 {
|
||||
struct RenderWorker;
|
||||
|
||||
struct RenderTarget {
|
||||
static const RenderFormat ColorBufferFormat;
|
||||
static const RenderFormat DepthBufferFormat;
|
||||
static const long MaxDimension;
|
||||
|
||||
std::unique_ptr<RenderTexture> texture;
|
||||
@ -45,8 +43,9 @@ namespace RT64 {
|
||||
int32_t misalignX = 0;
|
||||
int32_t invMisalignX = 0;
|
||||
bool resolvedTextureDirty = false;
|
||||
bool usesHDR = false;
|
||||
|
||||
RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling);
|
||||
RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling, bool usesHDR);
|
||||
~RenderTarget();
|
||||
void releaseTextures();
|
||||
bool resize(RenderWorker *worker, uint32_t newWidth, uint32_t newHeight);
|
||||
@ -69,5 +68,7 @@ namespace RT64 {
|
||||
bool isEmpty() const;
|
||||
static void computeScaledSize(uint32_t nativeWidth, uint32_t nativeHeight, hlslpp::float2 resolutionScale, uint32_t &scaledWidth, uint32_t &scaledHeight, uint32_t &misalignmentX);
|
||||
static hlslpp::float2 computeFixedResolutionScale(uint32_t nativeWidth, hlslpp::float2 resolutionScale);
|
||||
static RenderFormat colorBufferFormat(bool usesHDR);
|
||||
static RenderFormat depthBufferFormat();
|
||||
};
|
||||
};
|
@ -26,10 +26,16 @@ namespace RT64 {
|
||||
|
||||
// RenderTargetManager
|
||||
|
||||
RenderTargetManager::RenderTargetManager() { }
|
||||
|
||||
void RenderTargetManager::setMultisampling(const RenderMultisampling &multisampling) {
|
||||
this->multisampling = multisampling;
|
||||
}
|
||||
|
||||
void RenderTargetManager::setUsesHDR(bool usesHDR) {
|
||||
this->usesHDR = usesHDR;
|
||||
}
|
||||
|
||||
RenderTarget &RenderTargetManager::get(const RenderTargetKey &key, bool ignoreOverrides) {
|
||||
const uint64_t keyHash = key.hash();
|
||||
if (!ignoreOverrides) {
|
||||
@ -44,7 +50,7 @@ namespace RT64 {
|
||||
return *target;
|
||||
}
|
||||
|
||||
target = std::make_unique<RenderTarget>(key.address, key.fbType, multisampling);
|
||||
target = std::make_unique<RenderTarget>(key.address, key.fbType, multisampling, usesHDR);
|
||||
return *target;
|
||||
}
|
||||
|
||||
|
@ -28,8 +28,11 @@ namespace RT64 {
|
||||
std::unordered_map<uint64_t, std::unique_ptr<RenderTarget>> targetMap;
|
||||
std::unordered_map<uint64_t, RenderTarget *> overrideMap;
|
||||
RenderMultisampling multisampling;
|
||||
bool usesHDR = false;
|
||||
|
||||
RenderTargetManager();
|
||||
void setMultisampling(const RenderMultisampling &multisampling);
|
||||
void setUsesHDR(bool usesHDR);
|
||||
RenderTarget &get(const RenderTargetKey &key, bool ignoreOverrides = false);
|
||||
void destroyAll();
|
||||
void setOverride(const RenderTargetKey &key, RenderTarget *target);
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "rt64_shader_library.h"
|
||||
|
||||
#include "common/rt64_common.h"
|
||||
#include "shared/rt64_render_target_copy.h"
|
||||
#include "shared/rt64_rsp_vertex_test_z.h"
|
||||
|
||||
#include "shaders/FbChangesClearCS.hlsl.spirv.h"
|
||||
@ -114,7 +115,9 @@
|
||||
namespace RT64 {
|
||||
// ShaderLibrary
|
||||
|
||||
ShaderLibrary::ShaderLibrary() { }
|
||||
ShaderLibrary::ShaderLibrary(bool usesHDR) {
|
||||
this->usesHDR = usesHDR;
|
||||
}
|
||||
|
||||
ShaderLibrary::~ShaderLibrary() { }
|
||||
|
||||
@ -476,7 +479,7 @@ namespace RT64 {
|
||||
RenderGraphicsPipelineDesc pipelineDesc;
|
||||
pipelineDesc.pipelineLayout = textureCopy.pipelineLayout.get();
|
||||
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
|
||||
pipelineDesc.renderTargetCount = 1;
|
||||
pipelineDesc.vertexShader = fullScreenVertexShader.get();
|
||||
pipelineDesc.pixelShader = pixelShader.get();
|
||||
@ -562,7 +565,7 @@ namespace RT64 {
|
||||
RenderGraphicsPipelineDesc pipelineDesc;
|
||||
pipelineDesc.pipelineLayout = fbChangesDrawColor.pipelineLayout.get();
|
||||
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
|
||||
pipelineDesc.renderTargetCount = 1;
|
||||
pipelineDesc.vertexShader = fullScreenVertexShader.get();
|
||||
pipelineDesc.pixelShader = colorShader.get();
|
||||
@ -584,6 +587,7 @@ namespace RT64 {
|
||||
RenderTargetCopyDescriptorSet descriptorSet;
|
||||
layoutBuilder.begin();
|
||||
layoutBuilder.addDescriptorSet(descriptorSet);
|
||||
layoutBuilder.addPushConstant(0, 0, sizeof(interop::RenderTargetCopyCB), RenderShaderStageFlag::PIXEL);
|
||||
layoutBuilder.end();
|
||||
rtCopyDepthToColor.pipelineLayout = layoutBuilder.create(device);
|
||||
rtCopyDepthToColorMS.pipelineLayout = layoutBuilder.create(device);
|
||||
@ -594,7 +598,7 @@ namespace RT64 {
|
||||
std::unique_ptr<RenderShader> depthToColorMSShader = device->createShader(CREATE_SHADER_INPUTS(RtCopyDepthToColorPSMSBlobDXIL, RtCopyDepthToColorPSMSBlobSPIRV, "PSMain", shaderFormat));
|
||||
RenderGraphicsPipelineDesc pipelineDesc;
|
||||
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy();
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
|
||||
pipelineDesc.renderTargetCount = 1;
|
||||
pipelineDesc.vertexShader = fullScreenVertexShader.get();
|
||||
pipelineDesc.pipelineLayout = rtCopyDepthToColor.pipelineLayout.get();
|
||||
@ -633,7 +637,7 @@ namespace RT64 {
|
||||
RenderGraphicsPipelineDesc pipelineDesc;
|
||||
pipelineDesc.pipelineLayout = postProcess.pipelineLayout.get();
|
||||
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::AlphaBlend();
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
|
||||
pipelineDesc.renderTargetCount = 1;
|
||||
pipelineDesc.vertexShader = fullScreenVertexShader.get();
|
||||
pipelineDesc.pixelShader = pixelShader.get();
|
||||
@ -658,7 +662,7 @@ namespace RT64 {
|
||||
RenderGraphicsPipelineDesc pipelineDesc;
|
||||
pipelineDesc.pipelineLayout = debug.pipelineLayout.get();
|
||||
pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::AlphaBlend();
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat;
|
||||
pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR);
|
||||
pipelineDesc.renderTargetCount = 1;
|
||||
pipelineDesc.vertexShader = fullScreenVertexShader.get();
|
||||
pipelineDesc.pixelShader = pixelShader.get();
|
||||
|
@ -13,6 +13,8 @@ namespace RT64 {
|
||||
};
|
||||
|
||||
struct ShaderLibrary {
|
||||
bool usesHDR = false;
|
||||
|
||||
std::unique_ptr<RenderSampler> nearestClampSampler;
|
||||
std::unique_ptr<RenderSampler> linearClampSampler;
|
||||
std::unique_ptr<RenderSampler> nearestBorderSampler;
|
||||
@ -59,7 +61,7 @@ namespace RT64 {
|
||||
ShaderRecord videoInterfaceNearest;
|
||||
ShaderRecord videoInterfacePixel;
|
||||
|
||||
ShaderLibrary();
|
||||
ShaderLibrary(bool usesHDR);
|
||||
~ShaderLibrary();
|
||||
void setupCommonShaders(RenderInterface *rhi, RenderDevice *device);
|
||||
void setupMultisamplingShaders(RenderInterface *rhi, RenderDevice *device, const RenderMultisampling &multisampling);
|
||||
|
@ -1544,6 +1544,9 @@ namespace RT64 {
|
||||
// Present.
|
||||
bool presentWait = false;
|
||||
bool displayTiming = false;
|
||||
|
||||
// HDR.
|
||||
bool preferHDR = false;
|
||||
};
|
||||
|
||||
struct RenderInterfaceCapabilities {
|
||||
|
@ -103,10 +103,10 @@ uint Float4ToUINT8(float4 i, uint fmt, bool oddColumn) {
|
||||
}
|
||||
}
|
||||
|
||||
uint Float4ToUINT16(float4 i, uint fmt, uint dither) {
|
||||
uint Float4ToUINT16(float4 i, uint fmt, uint dither, bool usesHDR) {
|
||||
switch (fmt) {
|
||||
case G_IM_FMT_RGBA:
|
||||
return Float4ToRGBA16(i, dither);
|
||||
return Float4ToRGBA16(i, dither, usesHDR);
|
||||
// TODO
|
||||
case G_IM_FMT_CI:
|
||||
return 0;
|
||||
@ -141,7 +141,7 @@ uint Float4ToUINT32(float4 i, uint fmt) {
|
||||
}
|
||||
}
|
||||
|
||||
uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither) {
|
||||
uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither, bool usesHDR) {
|
||||
switch (siz) {
|
||||
// TODO
|
||||
case G_IM_SIZ_4b:
|
||||
@ -149,7 +149,7 @@ uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither) {
|
||||
case G_IM_SIZ_8b:
|
||||
return Float4ToUINT8(i, fmt, oddColumn);
|
||||
case G_IM_SIZ_16b:
|
||||
return Float4ToUINT16(i, fmt, dither);
|
||||
return Float4ToUINT16(i, fmt, dither, usesHDR);
|
||||
case G_IM_SIZ_32b:
|
||||
return Float4ToUINT32(i, fmt);
|
||||
// Invalid pixel size.
|
||||
|
@ -20,7 +20,7 @@ float4 RGBA16toCI8(float4 inputColor, uint2 inputCoord, uint2 outputCoord) {
|
||||
uint2 ditherCoord = inputCoord + gConstants.ditherOffset;
|
||||
uint randomSeed = initRand(gConstants.ditherRandomSeed, ditherCoord.y * gConstants.resolution.x + ditherCoord.x, 16);
|
||||
uint ditherValue = DitherPatternValue(gConstants.ditherPattern, ditherCoord, randomSeed);
|
||||
uint nativeColor = Float4ToRGBA16(inputColor, ditherValue);
|
||||
uint nativeColor = Float4ToRGBA16(inputColor, ditherValue, gConstants.usesHDR);
|
||||
|
||||
// Extract the lower or upper half of the value depending on the pixel misalignment.
|
||||
uint pixelMisalignment = 1 - (outputCoord.x % 2);
|
||||
|
@ -18,7 +18,7 @@ void CSMain(uint2 coord : SV_DispatchThreadID) {
|
||||
bool oddColumn = (offsetCoord.x & 1);
|
||||
uint randomSeed = initRand(gConstants.ditherRandomSeed, dstIndex, 16);
|
||||
uint ditherValue = DitherPatternValue(gConstants.ditherPattern, offsetCoord, randomSeed);
|
||||
uint nativeUint = Float4ToUINT(color, gConstants.siz, gConstants.fmt, oddColumn, ditherValue);
|
||||
uint nativeUint = Float4ToUINT(color, gConstants.siz, gConstants.fmt, oddColumn, ditherValue, gConstants.usesHDR);
|
||||
gOutput[dstIndex] = EndianSwapUINT(nativeUint, gConstants.siz);
|
||||
}
|
||||
}
|
@ -92,11 +92,12 @@ float4 RGBA16ToFloat4(uint rgba16) {
|
||||
);
|
||||
}
|
||||
|
||||
uint Float4ToRGBA16(float4 i, uint dither) {
|
||||
uint Float4ToRGBA16(float4 i, uint dither, bool usesHDR) {
|
||||
const float cvgRange = usesHDR ? 65535.0f : 255.0f;
|
||||
uint r = round(clamp(i.r * 255.0f, 0.0f, 255.0f));
|
||||
uint g = round(clamp(i.g * 255.0f, 0.0f, 255.0f));
|
||||
uint b = round(clamp(i.b * 255.0f, 0.0f, 255.0f));
|
||||
int cvgModulo = round(i.a * 255.0f) % 8;
|
||||
int cvgModulo = round(i.a * cvgRange) % 8;
|
||||
uint a = (cvgModulo & 0x4) ? 1 : 0;
|
||||
r = min(r + dither, 255) >> 3;
|
||||
g = min(g + dither, 255) >> 3;
|
||||
|
@ -192,10 +192,12 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
|
||||
}
|
||||
|
||||
// Compute coverage estimation.
|
||||
float resultCvg = (8.0f / 255.0f) * (otherMode.cvgXAlpha() ? combinerColor.a : 1.0f);
|
||||
const bool usesHDR = renderFlagUsesHDR(rp.flags);
|
||||
const float cvgRange = usesHDR ? 65535.0f : 255.0f;
|
||||
float resultCvg = (8.0f / cvgRange) * (otherMode.cvgXAlpha() ? combinerColor.a : 1.0f);
|
||||
|
||||
// Discard all pixels without coverage.
|
||||
const float CoverageThreshold = 1.0f / 255.0f;
|
||||
const float CoverageThreshold = 1.0f / cvgRange;
|
||||
if (resultCvg < CoverageThreshold) {
|
||||
discard;
|
||||
}
|
||||
@ -220,11 +222,11 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl
|
||||
}
|
||||
// Write a full coverage value regardless of the computed coverage.
|
||||
else if (otherMode.cvgDst() == CVG_DST_FULL) {
|
||||
resultColor.a = 7.0f / 255.0f;
|
||||
resultColor.a = 7.0f / cvgRange;
|
||||
}
|
||||
// Write the coverage value clamped to the full value allowed.
|
||||
else if (otherMode.cvgDst() == CVG_DST_CLAMP) {
|
||||
resultColor.a = min(resultCvg, 7.0f / 255.0f);
|
||||
resultColor.a = min(resultCvg, 7.0f / cvgRange);
|
||||
}
|
||||
// Write out the computed coverage. It'll be added on wrap mode.
|
||||
else {
|
||||
|
@ -2,13 +2,17 @@
|
||||
// RT64
|
||||
//
|
||||
|
||||
#include "shared/rt64_render_target_copy.h"
|
||||
|
||||
#include "Depth.hlsli"
|
||||
#include "Formats.hlsli"
|
||||
|
||||
[[vk::push_constant]] ConstantBuffer<RenderTargetCopyCB> gConstants : register(b0);
|
||||
|
||||
#ifdef MULTISAMPLING
|
||||
Texture2DMS<float4> gInput : register(t0);
|
||||
Texture2DMS<float4> gInput : register(t1);
|
||||
#else
|
||||
Texture2D<float4> gInput : register(t0);
|
||||
Texture2D<float4> gInput : register(t1);
|
||||
#endif
|
||||
|
||||
float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sampleIndex : SV_SampleIndex, out float resultDepth : SV_DEPTH) : SV_TARGET {
|
||||
@ -17,7 +21,7 @@ float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sam
|
||||
#else
|
||||
float4 inputColor = gInput.Load(uint3(pos.xy, 0));
|
||||
#endif
|
||||
uint rgba16 = Float4ToRGBA16(inputColor, 0);
|
||||
uint rgba16 = Float4ToRGBA16(inputColor, 0, gConstants.usesHDR);
|
||||
resultDepth = Depth16ToFloat(rgba16);
|
||||
return 0.0f;
|
||||
}
|
@ -2,13 +2,17 @@
|
||||
// RT64
|
||||
//
|
||||
|
||||
#include "shared/rt64_render_target_copy.h"
|
||||
|
||||
#include "Depth.hlsli"
|
||||
#include "Formats.hlsli"
|
||||
|
||||
[[vk::push_constant]] ConstantBuffer<RenderTargetCopyCB> gConstants : register(b0);
|
||||
|
||||
#ifdef MULTISAMPLING
|
||||
Texture2DMS<float> gInput : register(t0);
|
||||
Texture2DMS<float> gInput : register(t1);
|
||||
#else
|
||||
Texture2D<float> gInput : register(t0);
|
||||
Texture2D<float> gInput : register(t1);
|
||||
#endif
|
||||
|
||||
float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sampleIndex : SV_SampleIndex) : SV_TARGET {
|
||||
|
@ -59,7 +59,7 @@ void computeLOD(OtherMode otherMode, uint rdpTileCount, float2 primLOD, float re
|
||||
}
|
||||
}
|
||||
|
||||
float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2 texelInt, uint textureIndex, uint tlut, bool canDecodeTMEM) {
|
||||
float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2 texelInt, uint textureIndex, uint tlut, bool canDecodeTMEM, bool usesHDR) {
|
||||
if (rdpTile.cms & G_TX_CLAMP) {
|
||||
texelInt.x = clamp(texelInt.x, 0, (round(gpuTile.tcScale.x * rdpTile.lrs) / 4) - (round(gpuTile.tcScale.x * rdpTile.uls) / 4) + round(gpuTile.tcScale.x - 1.0f));
|
||||
}
|
||||
@ -98,7 +98,8 @@ float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2
|
||||
// Alpha channel in framebuffer textures represent the coverage. A modulo operation must be performed
|
||||
// to get the value that would correspond to the alpha channel when it's sampled.
|
||||
if (gpuTileFlagAlphaIsCvg(gpuTile.flags)) {
|
||||
int cvgModulo = round(textureColor.a * 255.0f) % 8;
|
||||
const float cvgRange = usesHDR ? 65535.0f : 255.0f;
|
||||
int cvgModulo = round(textureColor.a * cvgRange) % 8;
|
||||
textureColor.a = (cvgModulo & 0x4) ? 1.0f : 0.0f;
|
||||
}
|
||||
|
||||
@ -145,13 +146,14 @@ float4 sampleTexture(OtherMode otherMode, RenderFlags renderFlags, float2 inputU
|
||||
|
||||
const uint tlut = otherMode.textLUT();
|
||||
const bool canDecodeTMEM = renderFlagCanDecodeTMEM(renderFlags);
|
||||
const bool usesHDR = renderFlagUsesHDR(renderFlags);
|
||||
int2 texelBaseInt = floor(uvCoord);
|
||||
float4 sample00 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 0), textureIndex, tlut, canDecodeTMEM);
|
||||
float4 sample00 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 0), textureIndex, tlut, canDecodeTMEM, usesHDR);
|
||||
if (filterBilerp || linearFiltering) {
|
||||
float2 fracPart = uvCoord - texelBaseInt;
|
||||
float4 sample01 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 1), textureIndex, tlut, canDecodeTMEM);
|
||||
float4 sample10 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 0), textureIndex, tlut, canDecodeTMEM);
|
||||
float4 sample11 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 1), textureIndex, tlut, canDecodeTMEM);
|
||||
float4 sample01 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 1), textureIndex, tlut, canDecodeTMEM, usesHDR);
|
||||
float4 sample10 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 0), textureIndex, tlut, canDecodeTMEM, usesHDR);
|
||||
float4 sample11 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 1), textureIndex, tlut, canDecodeTMEM, usesHDR);
|
||||
if (linearFiltering) {
|
||||
return lerp(lerp(sample00, sample10, fracPart.x), lerp(sample01, sample11, fracPart.x), fracPart.y);
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ namespace interop {
|
||||
uint siz;
|
||||
uint ditherPattern;
|
||||
uint ditherRandomSeed;
|
||||
uint usesHDR;
|
||||
};
|
||||
#ifdef HLSL_CPU
|
||||
};
|
||||
|
@ -20,6 +20,7 @@ namespace interop {
|
||||
uint ditherPattern;
|
||||
uint ditherRandomSeed;
|
||||
uint2 ditherOffset;
|
||||
uint usesHDR;
|
||||
};
|
||||
#ifdef HLSL_CPU
|
||||
};
|
||||
|
@ -32,6 +32,7 @@ namespace interop {
|
||||
uint usesTexture1 : 1;
|
||||
uint upscale2D : 1;
|
||||
uint upscaleLOD : 1;
|
||||
uint usesHDR : 1;
|
||||
};
|
||||
|
||||
uint value;
|
||||
@ -119,6 +120,10 @@ namespace interop {
|
||||
bool renderFlagUpscaleLOD(RenderFlags flags) {
|
||||
return ((flags >> 24) & 0x1) != 0;
|
||||
}
|
||||
|
||||
bool renderFlagUsesHDR(RenderFlags flags) {
|
||||
return ((flags >> 25) & 0x1) != 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct RenderParams {
|
||||
|
17
src/shared/rt64_render_target_copy.h
Normal file
17
src/shared/rt64_render_target_copy.h
Normal file
@ -0,0 +1,17 @@
|
||||
//
|
||||
// RT64
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/rt64_hlsl.h"
|
||||
|
||||
#ifdef HLSL_CPU
|
||||
namespace interop {
|
||||
#endif
|
||||
struct RenderTargetCopyCB {
|
||||
uint usesHDR;
|
||||
};
|
||||
#ifdef HLSL_CPU
|
||||
};
|
||||
#endif
|
@ -3618,6 +3618,16 @@ namespace RT64 {
|
||||
return;
|
||||
}
|
||||
|
||||
// Find the biggest device local memory available on the device.
|
||||
VkDeviceSize memoryHeapSize = 0;
|
||||
const VkPhysicalDeviceMemoryProperties *memoryProps = nullptr;
|
||||
vmaGetMemoryProperties(allocator, &memoryProps);
|
||||
for (uint32_t i = 0; i < memoryProps->memoryHeapCount; i++) {
|
||||
if (memoryProps->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
|
||||
memoryHeapSize = std::max(memoryProps->memoryHeaps[i].size, memoryHeapSize);
|
||||
}
|
||||
}
|
||||
|
||||
// Fill capabilities.
|
||||
capabilities.raytracing = rtSupported;
|
||||
capabilities.raytracingStateUpdate = false;
|
||||
@ -3626,6 +3636,7 @@ namespace RT64 {
|
||||
capabilities.scalarBlockLayout = scalarBlockLayout;
|
||||
capabilities.presentWait = presentWait;
|
||||
capabilities.displayTiming = supportedOptionalExtensions.find(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME) != supportedOptionalExtensions.end();
|
||||
capabilities.preferHDR = memoryHeapSize > (512 * 1024 * 1024);
|
||||
|
||||
// Fill Vulkan-only capabilities.
|
||||
loadStoreOpNoneSupported = supportedOptionalExtensions.find(VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME) != supportedOptionalExtensions.end();
|
||||
|
Loading…
x
Reference in New Issue
Block a user