From 9b9237f097a12bcef6ab6233bcffa6764e02cad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dar=C3=ADo?= Date: Fri, 24 May 2024 16:39:34 -0300 Subject: [PATCH] Support for high precision framebuffers. (#31) * Support for high precision framebuffers. * Missing header. * Add detection logic for devices with low VRAM. * Add HDR handling to shader cache dumping and parsing. * Internal color format in configuration. --- src/common/rt64_user_configuration.cpp | 4 +++ src/common/rt64_user_configuration.h | 14 ++++++++++ src/d3d12/rt64_d3d12.cpp | 1 + src/hle/rt64_application.cpp | 22 +++++++++++++-- src/hle/rt64_application_window.cpp | 2 +- src/hle/rt64_framebuffer.cpp | 2 +- src/hle/rt64_framebuffer_changes.cpp | 4 +-- src/hle/rt64_framebuffer_changes.h | 2 +- src/hle/rt64_framebuffer_manager.cpp | 15 +++++----- src/hle/rt64_framebuffer_manager.h | 4 +-- src/hle/rt64_state.cpp | 20 +++++++++++-- src/hle/rt64_workload_queue.cpp | 5 ++-- src/render/rt64_buffer_uploader.cpp | 4 +-- src/render/rt64_descriptor_sets.h | 2 +- src/render/rt64_native_target.cpp | 2 ++ src/render/rt64_raster_shader.cpp | 4 ++- src/render/rt64_raster_shader.h | 1 + src/render/rt64_raster_shader_cache.cpp | 13 +++++++-- src/render/rt64_raster_shader_cache.h | 1 + src/render/rt64_render_target.cpp | 34 +++++++++++++++-------- src/render/rt64_render_target.h | 7 +++-- src/render/rt64_render_target_manager.cpp | 8 +++++- src/render/rt64_render_target_manager.h | 3 ++ src/render/rt64_shader_library.cpp | 16 +++++++---- src/render/rt64_shader_library.h | 4 ++- src/rhi/rt64_render_interface_types.h | 3 ++ src/shaders/FbCommon.hlsli | 8 +++--- src/shaders/FbReinterpretCS.hlsl | 2 +- src/shaders/FbWriteColorCS.hlsl | 2 +- src/shaders/Formats.hlsli | 5 ++-- src/shaders/RasterPS.hlsl | 10 ++++--- src/shaders/RtCopyColorToDepthPS.hlsl | 10 +++++-- src/shaders/RtCopyDepthToColorPS.hlsl | 8 ++++-- src/shaders/TextureSampler.hlsli | 14 ++++++---- src/shared/rt64_fb_common.h | 1 + src/shared/rt64_fb_reinterpret.h | 1 + src/shared/rt64_render_params.h | 5 ++++ src/shared/rt64_render_target_copy.h | 17 ++++++++++++ src/vulkan/rt64_vulkan.cpp | 11 ++++++++ 39 files changed, 219 insertions(+), 72 deletions(-) create mode 100644 src/shared/rt64_render_target_copy.h diff --git a/src/common/rt64_user_configuration.cpp b/src/common/rt64_user_configuration.cpp index a75efdd..4d07122 100644 --- a/src/common/rt64_user_configuration.cpp +++ b/src/common/rt64_user_configuration.cpp @@ -28,6 +28,7 @@ namespace RT64 { j["threePointFiltering"] = cfg.threePointFiltering; j["refreshRate"] = cfg.refreshRate; j["refreshRateTarget"] = cfg.refreshRateTarget; + j["internalColorFormat"] = cfg.internalColorFormat; j["idleWorkActive"] = cfg.idleWorkActive; j["developerMode"] = cfg.developerMode; } @@ -48,6 +49,7 @@ namespace RT64 { cfg.threePointFiltering = j.value("threePointFiltering", defaultCfg.threePointFiltering); cfg.refreshRate = j.value("refreshRate", defaultCfg.refreshRate); cfg.refreshRateTarget = j.value("refreshRateTarget", defaultCfg.refreshRateTarget); + cfg.internalColorFormat = j.value("internalColorFormat", defaultCfg.internalColorFormat); cfg.idleWorkActive = j.value("idleWorkActive", defaultCfg.idleWorkActive); cfg.developerMode = j.value("developerMode", defaultCfg.developerMode); } @@ -76,6 +78,7 @@ namespace RT64 { threePointFiltering = true; refreshRate = RefreshRate::Original; refreshRateTarget = 60; + internalColorFormat = InternalColorFormat::Automatic; idleWorkActive = true; developerMode = false; } @@ -89,6 +92,7 @@ namespace RT64 { clampEnum(extAspectRatio); clampEnum(upscale2D); clampEnum(refreshRate); + clampEnum(internalColorFormat); resolutionMultiplier = std::clamp(resolutionMultiplier, 0.0f, ResolutionMultiplierLimit); downsampleMultiplier = std::clamp(downsampleMultiplier, 1, ResolutionMultiplierLimit); aspectTarget = std::clamp(aspectTarget, 0.1f, 100.0f); diff --git a/src/common/rt64_user_configuration.h b/src/common/rt64_user_configuration.h index 7f3dd14..7beaa50 100644 --- a/src/common/rt64_user_configuration.h +++ b/src/common/rt64_user_configuration.h @@ -63,6 +63,13 @@ namespace RT64 { OptionCount }; + enum class InternalColorFormat { + Standard, + High, + Automatic, + OptionCount + }; + GraphicsAPI graphicsAPI; Resolution resolution; Antialiasing antialiasing; @@ -77,6 +84,7 @@ namespace RT64 { bool threePointFiltering; RefreshRate refreshRate; int refreshRateTarget; + InternalColorFormat internalColorFormat; bool idleWorkActive; bool developerMode; @@ -131,6 +139,12 @@ namespace RT64 { { UserConfiguration::RefreshRate::Manual, "Manual" } }); + NLOHMANN_JSON_SERIALIZE_ENUM(UserConfiguration::InternalColorFormat, { + { UserConfiguration::InternalColorFormat::Standard, "Standard" }, + { UserConfiguration::InternalColorFormat::High, "High" }, + { UserConfiguration::InternalColorFormat::Automatic, "Automatic" } + }); + struct ConfigurationJSON { static bool read(UserConfiguration &cfg, std::istream &stream); static bool write(const UserConfiguration &cfg, std::ostream &stream); diff --git a/src/d3d12/rt64_d3d12.cpp b/src/d3d12/rt64_d3d12.cpp index 0d46a29..ce6154b 100644 --- a/src/d3d12/rt64_d3d12.cpp +++ b/src/d3d12/rt64_d3d12.cpp @@ -3058,6 +3058,7 @@ namespace RT64 { capabilities.descriptorIndexing = true; capabilities.scalarBlockLayout = true; capabilities.presentWait = true; + capabilities.preferHDR = dedicatedVideoMemory > (512 * 1024 * 1024); // Create descriptor heaps allocator. descriptorHeapAllocator = std::make_unique(this, ShaderDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); diff --git a/src/hle/rt64_application.cpp b/src/hle/rt64_application.cpp index 2d38838..f87f9b2 100644 --- a/src/hle/rt64_application.cpp +++ b/src/hle/rt64_application.cpp @@ -179,10 +179,25 @@ namespace RT64 { workloadGraphicsWorker = std::make_unique(device.get(), "Workload Graphics", RenderCommandListType::DIRECT); presentGraphicsWorker = std::make_unique(device.get(), "Present Graphics", RenderCommandListType::DIRECT); swapChain = presentGraphicsWorker->commandQueue->createSwapChain(appWindow->windowHandle, 2, RenderFormat::B8G8R8A8_UNORM); + + // Detect if the application should use HDR framebuffers or not. + bool usesHDR; + switch (userConfig.internalColorFormat) { + case UserConfiguration::InternalColorFormat::High: + usesHDR = true; + break; + case UserConfiguration::InternalColorFormat::Automatic: + usesHDR = device->getCapabilities().preferHDR; + break; + case UserConfiguration::InternalColorFormat::Standard: + default: + usesHDR = false; + break; + } // Before configuring multisampling, make sure the device actually supports it for the formats we'll use. If it doesn't, turn off antialiasing in the configuration. - const RenderSampleCounts colorSampleCounts = device->getSampleCountsSupported(RenderTarget::ColorBufferFormat); - const RenderSampleCounts depthSampleCounts = device->getSampleCountsSupported(RenderTarget::DepthBufferFormat); + const RenderSampleCounts colorSampleCounts = device->getSampleCountsSupported(RenderTarget::colorBufferFormat(usesHDR)); + const RenderSampleCounts depthSampleCounts = device->getSampleCountsSupported(RenderTarget::depthBufferFormat()); const RenderSampleCounts commonSampleCounts = colorSampleCounts & depthSampleCounts; if ((commonSampleCounts & userConfig.msaaSampleCount()) == 0) { userConfig.antialiasing = UserConfiguration::Antialiasing::None; @@ -190,7 +205,7 @@ namespace RT64 { // Create the shader library. const RenderMultisampling multisampling = RasterShader::generateMultisamplingPattern(userConfig.msaaSampleCount(), device->getCapabilities().sampleLocations); - shaderLibrary = std::make_unique(); + shaderLibrary = std::make_unique(usesHDR); shaderLibrary->setupCommonShaders(renderInterface.get(), device.get()); shaderLibrary->setupMultisamplingShaders(renderInterface.get(), device.get(), multisampling); @@ -230,6 +245,7 @@ namespace RT64 { sharedQueueResources->setSwapChainSize(swapChain->getWidth(), swapChain->getHeight()); sharedQueueResources->setSwapChainRate(appWindow->getRefreshRate()); sharedQueueResources->renderTargetManager.setMultisampling(multisampling); + sharedQueueResources->renderTargetManager.setUsesHDR(usesHDR); WorkloadQueue::External workloadExt; workloadExt.device = device.get(); diff --git a/src/hle/rt64_application_window.cpp b/src/hle/rt64_application_window.cpp index 8db29fe..fdd60f8 100644 --- a/src/hle/rt64_application_window.cpp +++ b/src/hle/rt64_application_window.cpp @@ -126,7 +126,7 @@ namespace RT64 { # elif defined(__APPLE__) windowHandle.window = wmInfo.info.cocoa.window; # else - static_assert(false && "Android unimplemented"); + static_assert(false && "Unimplemented"); # endif usingSdl = true; #else diff --git a/src/hle/rt64_framebuffer.cpp b/src/hle/rt64_framebuffer.cpp index 7a28efc..0600f92 100644 --- a/src/hle/rt64_framebuffer.cpp +++ b/src/hle/rt64_framebuffer.cpp @@ -99,7 +99,7 @@ namespace RT64 { assert(worker != nullptr); assert(src != nullptr); - FramebufferChange &changeUsed = fbChangePool.use(worker, (type == Type::Depth) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color, width, rowCount); + FramebufferChange &changeUsed = fbChangePool.use(worker, (type == Type::Depth) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color, width, rowCount, shaderLibrary->usesHDR); uint32_t readPixels = copyRAMToNativeAndChanges(worker, changeUsed, src, rowStart, rowCount, fmt, true, shaderLibrary); if (readPixels > 0) { return &changeUsed; diff --git a/src/hle/rt64_framebuffer_changes.cpp b/src/hle/rt64_framebuffer_changes.cpp index afcedc3..1c847ed 100644 --- a/src/hle/rt64_framebuffer_changes.cpp +++ b/src/hle/rt64_framebuffer_changes.cpp @@ -33,7 +33,7 @@ namespace RT64 { } } - FramebufferChange &FramebufferChangePool::use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height) { + FramebufferChange &FramebufferChangePool::use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height, bool usesHDR) { // To increase the chances of reusing buffers, we extend the width and height to a multiple of 32. const uint32_t Alignment = 32; uint32_t alignedWidth = ((width / Alignment) + ((width % Alignment) ? 1 : 0)) * Alignment; @@ -65,7 +65,7 @@ namespace RT64 { RenderFormat pixelFormat; switch (type) { case FramebufferChange::Type::Color: - pixelFormat = RenderTarget::ColorBufferFormat; + pixelFormat = RenderTarget::colorBufferFormat(usesHDR); break; case FramebufferChange::Type::Depth: pixelFormat = RenderFormat::R32_FLOAT; diff --git a/src/hle/rt64_framebuffer_changes.h b/src/hle/rt64_framebuffer_changes.h index 3429558..7b38b8e 100644 --- a/src/hle/rt64_framebuffer_changes.h +++ b/src/hle/rt64_framebuffer_changes.h @@ -45,7 +45,7 @@ namespace RT64 { FramebufferChangePool(); ~FramebufferChangePool(); void reset(); - FramebufferChange &use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height); + FramebufferChange &use(RenderWorker *renderWorker, FramebufferChange::Type type, uint32_t width, uint32_t height, bool usesHDR); const FramebufferChange *get(uint64_t id) const; void release(uint64_t id); }; diff --git a/src/hle/rt64_framebuffer_manager.cpp b/src/hle/rt64_framebuffer_manager.cpp index b9b02d3..269b9f8 100644 --- a/src/hle/rt64_framebuffer_manager.cpp +++ b/src/hle/rt64_framebuffer_manager.cpp @@ -143,7 +143,7 @@ namespace RT64 { RenderTextureFlags textureFlags = RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS; textureFlags |= RenderTextureFlag::RENDER_TARGET; - const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(tileCopy.textureWidth, tileCopy.textureHeight, 1, RenderTarget::ColorBufferFormat, textureFlags); + const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(tileCopy.textureWidth, tileCopy.textureHeight, 1, RenderTarget::colorBufferFormat(targetManager.usesHDR), textureFlags); tileCopy.texture = renderWorker->device->createTexture(textureDesc); } @@ -238,7 +238,7 @@ namespace RT64 { cmdListCopies.cmdListCopyRegions.push_back(copyRegion); } - void FramebufferManager::reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale) { + void FramebufferManager::reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale, bool usesHDR) { assert(tileCopies.find(op.reinterpretTile.srcId) != tileCopies.end()); // Source tile must exist. @@ -295,13 +295,13 @@ namespace RT64 { RenderTextureFlags textureFlags = RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS; textureFlags |= RenderTextureFlag::RENDER_TARGET; - const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(dstTile.textureWidth, dstTile.textureHeight, 1, RenderTarget::ColorBufferFormat, textureFlags); + const RenderTextureDesc textureDesc = RenderTextureDesc::Texture2D(dstTile.textureWidth, dstTile.textureHeight, 1, RenderTarget::colorBufferFormat(usesHDR), textureFlags); dstTile.texture = renderWorker->device->createTexture(textureDesc); } } void FramebufferManager::reinterpretTileRecord(RenderWorker *renderWorker, const FramebufferOperation &op, TextureCache &textureCache, hlslpp::float2 resolutionScale, - uint64_t submissionFrame, CommandListReinterpretations &cmdListReinterpretations) + uint64_t submissionFrame, bool usesHDR, CommandListReinterpretations &cmdListReinterpretations) { assert(tileCopies.find(op.reinterpretTile.srcId) != tileCopies.end()); @@ -325,6 +325,7 @@ namespace RT64 { c.ditherOffset = dstTile.ditherOffset; c.ditherPattern = dstTile.ditherPattern; c.ditherRandomSeed = uint32_t(writeTimestamp) + op.reinterpretTile.dstId; + c.usesHDR = usesHDR; dispatch.srcTexture = srcTile.texture.get(); dispatch.dstTexture = dstTile.texture.get(); @@ -808,7 +809,7 @@ namespace RT64 { Framebuffer *fb = differentFbs[i]; const uint8_t *fbRAM = &RDRAM[fb->addressStart]; const FramebufferChange::Type fbChangeType = (fb->lastWriteFmt == G_IM_FMT_DEPTH) ? FramebufferChange::Type::Depth : FramebufferChange::Type::Color; - FramebufferChange &fbChange = fbChangePool.use(renderWorker, fbChangeType, fb->width, fb->height); + FramebufferChange &fbChange = fbChangePool.use(renderWorker, fbChangeType, fb->width, fb->height, shaderLibrary->usesHDR); const uint32_t DifferenceFractionNum = 1; const uint32_t DifferenceFractionDiv = 4; const uint32_t differentPixels = fb->copyRAMToNativeAndChanges(renderWorker, fbChange, fbRAM, 0, fb->height, fb->lastWriteFmt, false, shaderLibrary); @@ -883,7 +884,7 @@ namespace RT64 { break; } case FramebufferOperation::Type::ReinterpretTile: { - reinterpretTileSetup(renderWorker, op, resolutionScale); + reinterpretTileSetup(renderWorker, op, resolutionScale, targetManager.usesHDR); break; } default: @@ -917,7 +918,7 @@ namespace RT64 { } case FramebufferOperation::Type::ReinterpretTile: { assert(textureCache != nullptr); - reinterpretTileRecord(renderWorker, op, *textureCache, resolutionScale, submissionFrame, cmdListReinterpretations); + reinterpretTileRecord(renderWorker, op, *textureCache, resolutionScale, submissionFrame, shaderLibrary->usesHDR, cmdListReinterpretations); break; } default: diff --git a/src/hle/rt64_framebuffer_manager.h b/src/hle/rt64_framebuffer_manager.h index edb8115..9b9aada 100644 --- a/src/hle/rt64_framebuffer_manager.h +++ b/src/hle/rt64_framebuffer_manager.h @@ -174,9 +174,9 @@ namespace RT64 { void createTileCopyRecord(RenderWorker *renderWorker, const FramebufferOperation &op, const FramebufferStorage &fbStorage, RenderTargetManager &targetManager, hlslpp::float2 resolutionScale, uint32_t maxFbPairIndex, CommandListCopies &cmdListCopies, const ShaderLibrary *shaderLibrary); - void reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale); + void reinterpretTileSetup(RenderWorker *renderWorker, const FramebufferOperation &op, hlslpp::float2 resolutionScale, bool usesHDR); void reinterpretTileRecord(RenderWorker *renderWorker, const FramebufferOperation &op, TextureCache &textureCache, hlslpp::float2 resolutionScale, - uint64_t submissionFrame, CommandListReinterpretations &cmdListReinterpretations); + uint64_t submissionFrame, bool usesHDR, CommandListReinterpretations &cmdListReinterpretations); bool makeFramebufferTile(Framebuffer *fb, uint32_t addressStart, uint32_t addressEnd, uint32_t lineWidth, uint32_t tileHeight, FramebufferTile &outTile, bool RGBA32); diff --git a/src/hle/rt64_state.cpp b/src/hle/rt64_state.cpp index cae1165..a8a12a5 100644 --- a/src/hle/rt64_state.cpp +++ b/src/hle/rt64_state.cpp @@ -73,6 +73,7 @@ namespace RT64 { const RenderMultisampling multisampling = RasterShader::generateMultisamplingPattern(ext.userConfig->msaaSampleCount(), ext.device->getCapabilities().sampleLocations); renderTargetManager.setMultisampling(multisampling); + renderTargetManager.setUsesHDR(ext.shaderLibrary->usesHDR); } void State::reset() { @@ -779,6 +780,7 @@ namespace RT64 { // Fill out all the rendering data for the framebuffer pairs that will be uploaded. const UserConfiguration::Upscale2D upscale2D = ext.userConfig->upscale2D; const bool scaleLOD = ext.enhancementConfig->textureLOD.scale; + const bool usesHDR = ext.shaderLibrary->usesHDR; const std::vector &faceIndices = workload.drawData.faceIndices; const std::vector &posShorts = workload.drawData.posShorts; uint32_t faceIndex = uint32_t(workload.drawRanges.faceIndices.first); @@ -940,6 +942,7 @@ namespace RT64 { flags.usesTexture0 = callDesc.colorCombiner.usesTexture(callDesc.otherMode, 0, flags.oneCycleHardwareBug); flags.usesTexture1 = callDesc.colorCombiner.usesTexture(callDesc.otherMode, 1, flags.oneCycleHardwareBug); flags.blenderApproximation = static_cast(blenderEmuReqs.approximateEmulation); + flags.usesHDR = usesHDR; // Set whether the LOD should be scaled to the display resolution according to the configuration mode and the extended GBI flags. const bool usesLOD = (callDesc.otherMode.textLOD() == G_TL_LOD); @@ -1440,7 +1443,7 @@ namespace RT64 { // Set up the dummy target used for rendering the depth if no depth framebuffer is active. if (depthFb == nullptr) { if (dummyDepthTarget == nullptr) { - dummyDepthTarget = std::make_unique(0, Framebuffer::Type::Depth, renderTargetManager.multisampling); + dummyDepthTarget = std::make_unique(0, Framebuffer::Type::Depth, renderTargetManager.multisampling, renderTargetManager.usesHDR); dummyDepthTarget->setupDepth(ext.framebufferGraphicsWorker, rtWidth, rtHeight); } @@ -1883,7 +1886,8 @@ namespace RT64 { genConfigChanged = ImGui::InputInt("Downsample Multiplier", &userConfig.downsampleMultiplier) || genConfigChanged; ImGui::BeginDisabled(!ext.device->getCapabilities().sampleLocations); - const RenderSampleCounts sampleCountsSupported = ext.device->getSampleCountsSupported(RenderTarget::ColorBufferFormat) & ext.device->getSampleCountsSupported(RenderTarget::DepthBufferFormat); + const bool usesHDR = ext.shaderLibrary->usesHDR; + const RenderSampleCounts sampleCountsSupported = ext.device->getSampleCountsSupported(RenderTarget::colorBufferFormat(usesHDR)) & ext.device->getSampleCountsSupported(RenderTarget::depthBufferFormat()); const uint32_t antialiasingOptionCount = uint32_t(UserConfiguration::Antialiasing::OptionCount); const char *antialiasingNames[antialiasingOptionCount] = { "None", "MSAA 2X", "MSAA 4X", "MSAA 8X" }; if (ImGui::BeginCombo("Antialiasing", antialiasingNames[uint32_t(userConfig.antialiasing)])) { @@ -1929,6 +1933,17 @@ namespace RT64 { genConfigChanged = ImGui::InputInt("Refresh Rate Target", &userConfig.refreshRateTarget) || genConfigChanged; } + // Store the user configuration that was used during initialization the first time we check this. + static UserConfiguration::InternalColorFormat configColorFormat = UserConfiguration::InternalColorFormat::OptionCount; + if (configColorFormat == UserConfiguration::InternalColorFormat::OptionCount) { + configColorFormat = userConfig.internalColorFormat; + } + + genConfigChanged = ImGui::Combo("Color Format", reinterpret_cast(&userConfig.internalColorFormat), "Standard\0High\0Automatic\0") || genConfigChanged; + if (userConfig.internalColorFormat != configColorFormat) { + ImGui::Text("You must restart the application for this change to be applied."); + } + genConfigChanged = ImGui::Checkbox("Three-Point Filtering", &userConfig.threePointFiltering) || genConfigChanged; genConfigChanged = ImGui::Checkbox("High Performance State", &userConfig.idleWorkActive) || genConfigChanged; @@ -2198,6 +2213,7 @@ namespace RT64 { ImGui::Text("Scalar Block Layout: %d", capabilities.scalarBlockLayout); ImGui::Text("Present Wait: %d", capabilities.presentWait); ImGui::Text("Display Timing: %d", capabilities.displayTiming); + ImGui::Text("Prefer HDR: %d", capabilities.preferHDR); ImGui::EndTabItem(); } diff --git a/src/hle/rt64_workload_queue.cpp b/src/hle/rt64_workload_queue.cpp index 3a176c2..4947e83 100644 --- a/src/hle/rt64_workload_queue.cpp +++ b/src/hle/rt64_workload_queue.cpp @@ -494,7 +494,7 @@ namespace RT64 { // Set up the dummy target used for rendering the depth if no depth framebuffer is active. if (depthFb == nullptr) { if (dummyDepthTarget == nullptr) { - dummyDepthTarget = std::make_unique(0, Framebuffer::Type::Depth, targetManager.multisampling); + dummyDepthTarget = std::make_unique(0, Framebuffer::Type::Depth, targetManager.multisampling, targetManager.usesHDR); dummyDepthTarget->setupDepth(ext.workloadGraphicsWorker, rtWidth, rtHeight); } @@ -958,12 +958,13 @@ namespace RT64 { // Create as many render targets as required to store the interpolated targets. auto &interpolatedTargets = ext.sharedResources->interpolatedColorTargets; const bool usingMSAA = (ext.sharedResources->renderTargetManager.multisampling.sampleCount > 1); + const bool usesHDR = ext.sharedResources->renderTargetManager.usesHDR; uint32_t requiredFrames = (usingMSAA && generateInterpolatedFrames) ? displayFrames : (displayFrames - 1); if ((requiredFrames > 0) && (interpolatedTargets.size() < requiredFrames)) { uint32_t previousSize = uint32_t(interpolatedTargets.size()); interpolatedTargets.resize(requiredFrames); for (uint32_t i = previousSize; i < requiredFrames; i++) { - interpolatedTargets[i] = std::make_unique(interpolationTargetKey.address, Framebuffer::Type::Color, RenderMultisampling()); + interpolatedTargets[i] = std::make_unique(interpolationTargetKey.address, Framebuffer::Type::Color, RenderMultisampling(), usesHDR); } } diff --git a/src/render/rt64_buffer_uploader.cpp b/src/render/rt64_buffer_uploader.cpp index d0979ce..de997a0 100644 --- a/src/render/rt64_buffer_uploader.cpp +++ b/src/render/rt64_buffer_uploader.cpp @@ -2,8 +2,8 @@ // RT64 // -#include #include +#include #include "common/rt64_thread.h" @@ -92,7 +92,7 @@ namespace RT64 { // Recreate the buffer pair. const uint64_t BlockAlignment = 256; - bufferPair.allocatedSize = std::max(((uint64_t)requiredSize * 3) / 2, BlockAlignment); + bufferPair.allocatedSize = std::max(uint64_t((requiredSize * 3) / 2), BlockAlignment); bufferPair.allocatedSize = roundUp(bufferPair.allocatedSize, BlockAlignment); bufferPair.uploadBuffer = worker->device->createBuffer(RenderBufferDesc::UploadBuffer(bufferPair.allocatedSize)); bufferPair.defaultBuffer = worker->device->createBuffer(RenderBufferDesc::DefaultBuffer(bufferPair.allocatedSize, u.bufferFlags)); diff --git a/src/render/rt64_descriptor_sets.h b/src/render/rt64_descriptor_sets.h index d5a58af..e81b79a 100644 --- a/src/render/rt64_descriptor_sets.h +++ b/src/render/rt64_descriptor_sets.h @@ -456,7 +456,7 @@ namespace RT64 { RenderTargetCopyDescriptorSet(RenderDevice *device = nullptr) { builder.begin(); - gInput = builder.addTexture(0); + gInput = builder.addTexture(1); builder.end(); if (device != nullptr) { diff --git a/src/render/rt64_native_target.cpp b/src/render/rt64_native_target.cpp index 543d7c4..cb1416d 100644 --- a/src/render/rt64_native_target.cpp +++ b/src/render/rt64_native_target.cpp @@ -161,6 +161,7 @@ namespace RT64 { nativeCB.siz = siz; nativeCB.ditherPattern = 0; nativeCB.ditherRandomSeed = 0; + nativeCB.usesHDR = shaderLibrary->usesHDR; // Assert for formats that have not been implemented yet because hardware verification is pending. assert((nativeCB.siz != G_IM_SIZ_4b) && "Unimplemented 4 bits Readback mode."); @@ -247,6 +248,7 @@ namespace RT64 { nativeCB.siz = siz; nativeCB.ditherPattern = ditherPattern; nativeCB.ditherRandomSeed = ditherRandomSeed; + nativeCB.usesHDR = shaderLibrary->usesHDR; // Assert for formats that have not been implemented yet because hardware verification is pending. assert((nativeCB.siz != G_IM_SIZ_4b) && "Unimplemented 4 bits Writeback mode."); diff --git a/src/render/rt64_raster_shader.cpp b/src/render/rt64_raster_shader.cpp index 7d08fcb..5de9b14 100644 --- a/src/render/rt64_raster_shader.cpp +++ b/src/render/rt64_raster_shader.cpp @@ -147,6 +147,7 @@ namespace RT64 { creation.zDecal = !copyMode && (desc.otherMode.zMode() == ZMODE_DEC); creation.cvgAdd = (desc.otherMode.cvgDst() == CVG_DST_WRAP) || (desc.otherMode.cvgDst() == CVG_DST_SAVE); creation.NoN = desc.flags.NoN; + creation.usesHDR = desc.flags.usesHDR; creation.specConstants = specConstants; creation.multisampling = multisampling; pipeline = createPipeline(creation); @@ -260,7 +261,7 @@ namespace RT64 { RenderGraphicsPipelineDesc pipelineDesc; pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy(); - pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat; + pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(c.usesHDR); pipelineDesc.renderTargetCount = 1; pipelineDesc.cullMode = c.culling ? RenderCullMode::FRONT : RenderCullMode::NONE; pipelineDesc.depthClipEnabled = !c.NoN; @@ -418,6 +419,7 @@ namespace RT64 { creation.vertexShader = vertexShader.get(); creation.pixelShader = pixelShader.get(); creation.NoN = true; + creation.usesHDR = shaderLibrary->usesHDR; creation.multisampling = multisampling; uint32_t threadIndex = 0; diff --git a/src/render/rt64_raster_shader.h b/src/render/rt64_raster_shader.h index d23f561..b302b82 100644 --- a/src/render/rt64_raster_shader.h +++ b/src/render/rt64_raster_shader.h @@ -33,6 +33,7 @@ namespace RT64 { bool zUpd; bool zDecal; bool cvgAdd; + bool usesHDR; std::vector specConstants; RenderMultisampling multisampling; }; diff --git a/src/render/rt64_raster_shader_cache.cpp b/src/render/rt64_raster_shader_cache.cpp index ce13aa2..0823272 100644 --- a/src/render/rt64_raster_shader_cache.cpp +++ b/src/render/rt64_raster_shader_cache.cpp @@ -12,7 +12,7 @@ namespace RT64 { // RasterShaderCache::OfflineList static const uint32_t OfflineMagic = 0x43535452; - static const uint32_t OfflineVersion = 1; + static const uint32_t OfflineVersion = 2; RasterShaderCache::OfflineList::OfflineList() { entryIterator = entries.end(); @@ -174,8 +174,11 @@ namespace RT64 { shaderCache->offlineList.step(offlineListEntry); // Make sure the hash hasn't been submitted yet by the game. If it hasn't, mark it as such and use this entry of the list. + // Also make sure the internal color format used by the shader is compatible. uint64_t shaderHash = offlineListEntry.shaderDesc.hash(); - if (shaderCache->shaderHashes.find(shaderHash) == shaderCache->shaderHashes.end()) { + const bool matchesColorFormat = (offlineListEntry.shaderDesc.flags.usesHDR == shaderCache->usesHDR); + const bool hashMissing = (shaderCache->shaderHashes.find(shaderHash) == shaderCache->shaderHashes.end()); + if (matchesColorFormat && hashMissing) { shaderDesc = offlineListEntry.shaderDesc; shaderCache->shaderHashes[shaderHash] = true; fromOfflineList = true; @@ -213,6 +216,11 @@ namespace RT64 { const std::unique_lock lock(shaderCache->offlineDumperMutex); if (shaderCache->offlineDumper.isDumping()) { shaderCache->offlineDumper.stepDumping(shaderDesc, dumperVsBytes, dumperPsBytes); + + // Toggle the use of HDR and compile another shader. + shaderDesc.flags.usesHDR = (shaderDesc.flags.usesHDR == 0); + std::make_unique(shaderCache->device, shaderDesc, uberPipelineLayout, shaderCache->shaderFormat, multisampling, shaderCache->shaderCompiler.get(), shaderVsBytes, shaderPsBytes, useShaderBytes); + shaderCache->offlineDumper.stepDumping(shaderDesc, dumperVsBytes, dumperPsBytes); } } @@ -256,6 +264,7 @@ namespace RT64 { this->multisampling = multisampling; shaderUber = std::make_unique(device, shaderFormat, multisampling, shaderLibrary, threadCount); + usesHDR = shaderLibrary->usesHDR; } void RasterShaderCache::submit(const ShaderDescription &desc) { diff --git a/src/render/rt64_raster_shader_cache.h b/src/render/rt64_raster_shader_cache.h index 49d487d..5ec61a3 100644 --- a/src/render/rt64_raster_shader_cache.h +++ b/src/render/rt64_raster_shader_cache.h @@ -71,6 +71,7 @@ namespace RT64 { OfflineList offlineList; OfflineDumper offlineDumper; std::mutex offlineDumperMutex; + bool usesHDR = false; RasterShaderCache(uint32_t threadCount); ~RasterShaderCache(); diff --git a/src/render/rt64_render_target.cpp b/src/render/rt64_render_target.cpp index f2b45ba..e62bb48 100644 --- a/src/render/rt64_render_target.cpp +++ b/src/render/rt64_render_target.cpp @@ -8,6 +8,7 @@ #include "gbi/rt64_f3d.h" #include "shared/rt64_fb_common.h" +#include "shared/rt64_render_target_copy.h" #include "rt64_raster_shader.h" @@ -16,14 +17,13 @@ namespace RT64 { // RenderTarget - const RenderFormat RenderTarget::ColorBufferFormat = RenderFormat::R8G8B8A8_UNORM; - const RenderFormat RenderTarget::DepthBufferFormat = RenderFormat::D32_FLOAT; const long RenderTarget::MaxDimension = 0x4000L; - RenderTarget::RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling) { + RenderTarget::RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling, bool usesHDR) { this->addressForName = addressForName; this->type = type; this->multisampling = multisampling; + this->usesHDR = usesHDR; #if PRINT_CONSTRUCTOR_DESTRUCTOR fprintf(stdout, "RenderTarget(0x%p)\n", this); @@ -80,7 +80,7 @@ namespace RT64 { this->height = height; downsampledTextureMultiplier = 0; - format = ColorBufferFormat; + format = colorBufferFormat(usesHDR); RenderClearValue clearValue = RenderClearValue::Color(RenderColor(), format); texture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, format, multisampling, &clearValue)); @@ -104,7 +104,7 @@ namespace RT64 { this->height = height; downsampledTextureMultiplier = 0; - format = DepthBufferFormat; + format = depthBufferFormat(); RenderClearValue clearValue = RenderClearValue::Depth(RenderDepth(), RenderFormat::D32_FLOAT); texture = worker->device->createTexture(RenderTextureDesc::DepthTarget(width, height, format, multisampling, &clearValue)); @@ -117,7 +117,7 @@ namespace RT64 { assert(worker != nullptr); if (dummyTexture == nullptr) { - dummyTexture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, ColorBufferFormat, multisampling)); + dummyTexture = worker->device->createTexture(RenderTextureDesc::ColorTarget(width, height, colorBufferFormat(usesHDR), multisampling)); dummyTexture->setName("Render Target Dummy"); } } @@ -152,12 +152,12 @@ namespace RT64 { const ShaderRecord *shaderRecord = nullptr; bool useDummyTexture = false; const bool srcUsesMSAA = (src->multisampling.sampleCount > 1); - if ((format == ColorBufferFormat) && (src->format == DepthBufferFormat)) { + if ((format == colorBufferFormat(usesHDR)) && (src->format == depthBufferFormat())) { shaderRecord = srcUsesMSAA ? &shaderLibrary->rtCopyDepthToColorMS : &shaderLibrary->rtCopyDepthToColor; requiredTextureLayout = RenderTextureLayout::COLOR_WRITE; setupColorFramebuffer(worker); } - else if ((format == DepthBufferFormat) && (src->format == ColorBufferFormat)) { + else if ((format == depthBufferFormat()) && (src->format == colorBufferFormat(usesHDR))) { useDummyTexture = true; shaderRecord = srcUsesMSAA ? &shaderLibrary->rtCopyColorToDepthMS : &shaderLibrary->rtCopyColorToDepth; requiredTextureLayout = RenderTextureLayout::DEPTH_WRITE; @@ -183,6 +183,9 @@ namespace RT64 { worker->commandList->barriers(RenderBarrierStage::GRAPHICS, framebufferBarriers); worker->commandList->setFramebuffer(textureFramebuffer.get()); + interop::RenderTargetCopyCB copyCB; + copyCB.usesHDR = usesHDR; + // Record the drawing command. RenderViewport targetViewport = RenderViewport(float(x), float(y), float(width), float(height)); RenderRect targetRect(x, y, x + width, y + height); @@ -191,6 +194,7 @@ namespace RT64 { worker->commandList->setPipeline(shaderRecord->pipeline.get()); worker->commandList->setGraphicsPipelineLayout(shaderRecord->pipelineLayout.get()); worker->commandList->setGraphicsDescriptorSet(src->targetCopyDescSet->get(), 0); + worker->commandList->setGraphicsPushConstants(0, ©CB); worker->commandList->setVertexBuffers(0, nullptr, 0, nullptr); worker->commandList->drawInstanced(3, 1, 0, 0); @@ -224,12 +228,12 @@ namespace RT64 { RenderTextureLayout requiredTextureLayout = RenderTextureLayout::UNKNOWN; const ShaderRecord *shaderRecord = nullptr; bool useDummyTexture = false; - if (format == ColorBufferFormat) { + if (format == colorBufferFormat(usesHDR)) { shaderRecord = &shaderLibrary->fbChangesDrawColor; requiredTextureLayout = RenderTextureLayout::COLOR_WRITE; setupColorFramebuffer(worker); } - else if (format == DepthBufferFormat) { + else if (format == depthBufferFormat()) { useDummyTexture = true; shaderRecord = &shaderLibrary->fbChangesDrawDepth; requiredTextureLayout = RenderTextureLayout::DEPTH_WRITE; @@ -341,7 +345,7 @@ namespace RT64 { uint32_t scaledWidth = std::max(width / downsampleMultiplier, 1U); uint32_t scaledHeight = std::max(height / downsampleMultiplier, 1U); if (downsampledTexture == nullptr) { - downsampledTexture = worker->device->createTexture(RenderTextureDesc::Texture2D(scaledWidth, scaledHeight, 1, ColorBufferFormat, RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS)); + downsampledTexture = worker->device->createTexture(RenderTextureDesc::Texture2D(scaledWidth, scaledHeight, 1, colorBufferFormat(usesHDR), RenderTextureFlag::STORAGE | RenderTextureFlag::UNORDERED_ACCESS)); downsampledTexture->setName("Render Target Downsampled"); downsampledTextureMultiplier = downsampleMultiplier; } @@ -433,4 +437,12 @@ namespace RT64 { resolutionScale.x = float(expandedColorWidthClamped) / float(nativeWidth); return resolutionScale; } + + RenderFormat RenderTarget::colorBufferFormat(bool usesHDR) { + return usesHDR ? RenderFormat::R16G16B16A16_UNORM : RenderFormat::R8G8B8A8_UNORM; + } + + RenderFormat RenderTarget::depthBufferFormat() { + return RenderFormat::D32_FLOAT; + } }; \ No newline at end of file diff --git a/src/render/rt64_render_target.h b/src/render/rt64_render_target.h index dc585e0..fd63a29 100644 --- a/src/render/rt64_render_target.h +++ b/src/render/rt64_render_target.h @@ -17,8 +17,6 @@ namespace RT64 { struct RenderWorker; struct RenderTarget { - static const RenderFormat ColorBufferFormat; - static const RenderFormat DepthBufferFormat; static const long MaxDimension; std::unique_ptr texture; @@ -45,8 +43,9 @@ namespace RT64 { int32_t misalignX = 0; int32_t invMisalignX = 0; bool resolvedTextureDirty = false; + bool usesHDR = false; - RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling); + RenderTarget(uint32_t addressForName, Framebuffer::Type type, const RenderMultisampling &multisampling, bool usesHDR); ~RenderTarget(); void releaseTextures(); bool resize(RenderWorker *worker, uint32_t newWidth, uint32_t newHeight); @@ -69,5 +68,7 @@ namespace RT64 { bool isEmpty() const; static void computeScaledSize(uint32_t nativeWidth, uint32_t nativeHeight, hlslpp::float2 resolutionScale, uint32_t &scaledWidth, uint32_t &scaledHeight, uint32_t &misalignmentX); static hlslpp::float2 computeFixedResolutionScale(uint32_t nativeWidth, hlslpp::float2 resolutionScale); + static RenderFormat colorBufferFormat(bool usesHDR); + static RenderFormat depthBufferFormat(); }; }; \ No newline at end of file diff --git a/src/render/rt64_render_target_manager.cpp b/src/render/rt64_render_target_manager.cpp index 861954a..a1405a8 100644 --- a/src/render/rt64_render_target_manager.cpp +++ b/src/render/rt64_render_target_manager.cpp @@ -26,10 +26,16 @@ namespace RT64 { // RenderTargetManager + RenderTargetManager::RenderTargetManager() { } + void RenderTargetManager::setMultisampling(const RenderMultisampling &multisampling) { this->multisampling = multisampling; } + void RenderTargetManager::setUsesHDR(bool usesHDR) { + this->usesHDR = usesHDR; + } + RenderTarget &RenderTargetManager::get(const RenderTargetKey &key, bool ignoreOverrides) { const uint64_t keyHash = key.hash(); if (!ignoreOverrides) { @@ -44,7 +50,7 @@ namespace RT64 { return *target; } - target = std::make_unique(key.address, key.fbType, multisampling); + target = std::make_unique(key.address, key.fbType, multisampling, usesHDR); return *target; } diff --git a/src/render/rt64_render_target_manager.h b/src/render/rt64_render_target_manager.h index df23846..7368711 100644 --- a/src/render/rt64_render_target_manager.h +++ b/src/render/rt64_render_target_manager.h @@ -28,8 +28,11 @@ namespace RT64 { std::unordered_map> targetMap; std::unordered_map overrideMap; RenderMultisampling multisampling; + bool usesHDR = false; + RenderTargetManager(); void setMultisampling(const RenderMultisampling &multisampling); + void setUsesHDR(bool usesHDR); RenderTarget &get(const RenderTargetKey &key, bool ignoreOverrides = false); void destroyAll(); void setOverride(const RenderTargetKey &key, RenderTarget *target); diff --git a/src/render/rt64_shader_library.cpp b/src/render/rt64_shader_library.cpp index bc31af1..15fc362 100644 --- a/src/render/rt64_shader_library.cpp +++ b/src/render/rt64_shader_library.cpp @@ -5,6 +5,7 @@ #include "rt64_shader_library.h" #include "common/rt64_common.h" +#include "shared/rt64_render_target_copy.h" #include "shared/rt64_rsp_vertex_test_z.h" #include "shaders/FbChangesClearCS.hlsl.spirv.h" @@ -114,7 +115,9 @@ namespace RT64 { // ShaderLibrary - ShaderLibrary::ShaderLibrary() { } + ShaderLibrary::ShaderLibrary(bool usesHDR) { + this->usesHDR = usesHDR; + } ShaderLibrary::~ShaderLibrary() { } @@ -476,7 +479,7 @@ namespace RT64 { RenderGraphicsPipelineDesc pipelineDesc; pipelineDesc.pipelineLayout = textureCopy.pipelineLayout.get(); pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy(); - pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat; + pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR); pipelineDesc.renderTargetCount = 1; pipelineDesc.vertexShader = fullScreenVertexShader.get(); pipelineDesc.pixelShader = pixelShader.get(); @@ -562,7 +565,7 @@ namespace RT64 { RenderGraphicsPipelineDesc pipelineDesc; pipelineDesc.pipelineLayout = fbChangesDrawColor.pipelineLayout.get(); pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy(); - pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat; + pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR); pipelineDesc.renderTargetCount = 1; pipelineDesc.vertexShader = fullScreenVertexShader.get(); pipelineDesc.pixelShader = colorShader.get(); @@ -584,6 +587,7 @@ namespace RT64 { RenderTargetCopyDescriptorSet descriptorSet; layoutBuilder.begin(); layoutBuilder.addDescriptorSet(descriptorSet); + layoutBuilder.addPushConstant(0, 0, sizeof(interop::RenderTargetCopyCB), RenderShaderStageFlag::PIXEL); layoutBuilder.end(); rtCopyDepthToColor.pipelineLayout = layoutBuilder.create(device); rtCopyDepthToColorMS.pipelineLayout = layoutBuilder.create(device); @@ -594,7 +598,7 @@ namespace RT64 { std::unique_ptr depthToColorMSShader = device->createShader(CREATE_SHADER_INPUTS(RtCopyDepthToColorPSMSBlobDXIL, RtCopyDepthToColorPSMSBlobSPIRV, "PSMain", shaderFormat)); RenderGraphicsPipelineDesc pipelineDesc; pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::Copy(); - pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat; + pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR); pipelineDesc.renderTargetCount = 1; pipelineDesc.vertexShader = fullScreenVertexShader.get(); pipelineDesc.pipelineLayout = rtCopyDepthToColor.pipelineLayout.get(); @@ -633,7 +637,7 @@ namespace RT64 { RenderGraphicsPipelineDesc pipelineDesc; pipelineDesc.pipelineLayout = postProcess.pipelineLayout.get(); pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::AlphaBlend(); - pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat; + pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR); pipelineDesc.renderTargetCount = 1; pipelineDesc.vertexShader = fullScreenVertexShader.get(); pipelineDesc.pixelShader = pixelShader.get(); @@ -658,7 +662,7 @@ namespace RT64 { RenderGraphicsPipelineDesc pipelineDesc; pipelineDesc.pipelineLayout = debug.pipelineLayout.get(); pipelineDesc.renderTargetBlend[0] = RenderBlendDesc::AlphaBlend(); - pipelineDesc.renderTargetFormat[0] = RenderTarget::ColorBufferFormat; + pipelineDesc.renderTargetFormat[0] = RenderTarget::colorBufferFormat(usesHDR); pipelineDesc.renderTargetCount = 1; pipelineDesc.vertexShader = fullScreenVertexShader.get(); pipelineDesc.pixelShader = pixelShader.get(); diff --git a/src/render/rt64_shader_library.h b/src/render/rt64_shader_library.h index 18483e7..c8eed00 100644 --- a/src/render/rt64_shader_library.h +++ b/src/render/rt64_shader_library.h @@ -13,6 +13,8 @@ namespace RT64 { }; struct ShaderLibrary { + bool usesHDR = false; + std::unique_ptr nearestClampSampler; std::unique_ptr linearClampSampler; std::unique_ptr nearestBorderSampler; @@ -59,7 +61,7 @@ namespace RT64 { ShaderRecord videoInterfaceNearest; ShaderRecord videoInterfacePixel; - ShaderLibrary(); + ShaderLibrary(bool usesHDR); ~ShaderLibrary(); void setupCommonShaders(RenderInterface *rhi, RenderDevice *device); void setupMultisamplingShaders(RenderInterface *rhi, RenderDevice *device, const RenderMultisampling &multisampling); diff --git a/src/rhi/rt64_render_interface_types.h b/src/rhi/rt64_render_interface_types.h index a56e7c8..dd0259f 100644 --- a/src/rhi/rt64_render_interface_types.h +++ b/src/rhi/rt64_render_interface_types.h @@ -1544,6 +1544,9 @@ namespace RT64 { // Present. bool presentWait = false; bool displayTiming = false; + + // HDR. + bool preferHDR = false; }; struct RenderInterfaceCapabilities { diff --git a/src/shaders/FbCommon.hlsli b/src/shaders/FbCommon.hlsli index 28303b7..973ee67 100644 --- a/src/shaders/FbCommon.hlsli +++ b/src/shaders/FbCommon.hlsli @@ -103,10 +103,10 @@ uint Float4ToUINT8(float4 i, uint fmt, bool oddColumn) { } } -uint Float4ToUINT16(float4 i, uint fmt, uint dither) { +uint Float4ToUINT16(float4 i, uint fmt, uint dither, bool usesHDR) { switch (fmt) { case G_IM_FMT_RGBA: - return Float4ToRGBA16(i, dither); + return Float4ToRGBA16(i, dither, usesHDR); // TODO case G_IM_FMT_CI: return 0; @@ -141,7 +141,7 @@ uint Float4ToUINT32(float4 i, uint fmt) { } } -uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither) { +uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither, bool usesHDR) { switch (siz) { // TODO case G_IM_SIZ_4b: @@ -149,7 +149,7 @@ uint Float4ToUINT(float4 i, uint siz, uint fmt, bool oddColumn, uint dither) { case G_IM_SIZ_8b: return Float4ToUINT8(i, fmt, oddColumn); case G_IM_SIZ_16b: - return Float4ToUINT16(i, fmt, dither); + return Float4ToUINT16(i, fmt, dither, usesHDR); case G_IM_SIZ_32b: return Float4ToUINT32(i, fmt); // Invalid pixel size. diff --git a/src/shaders/FbReinterpretCS.hlsl b/src/shaders/FbReinterpretCS.hlsl index 6cb99eb..6ac3969 100644 --- a/src/shaders/FbReinterpretCS.hlsl +++ b/src/shaders/FbReinterpretCS.hlsl @@ -20,7 +20,7 @@ float4 RGBA16toCI8(float4 inputColor, uint2 inputCoord, uint2 outputCoord) { uint2 ditherCoord = inputCoord + gConstants.ditherOffset; uint randomSeed = initRand(gConstants.ditherRandomSeed, ditherCoord.y * gConstants.resolution.x + ditherCoord.x, 16); uint ditherValue = DitherPatternValue(gConstants.ditherPattern, ditherCoord, randomSeed); - uint nativeColor = Float4ToRGBA16(inputColor, ditherValue); + uint nativeColor = Float4ToRGBA16(inputColor, ditherValue, gConstants.usesHDR); // Extract the lower or upper half of the value depending on the pixel misalignment. uint pixelMisalignment = 1 - (outputCoord.x % 2); diff --git a/src/shaders/FbWriteColorCS.hlsl b/src/shaders/FbWriteColorCS.hlsl index dd86c8a..cbc9418 100644 --- a/src/shaders/FbWriteColorCS.hlsl +++ b/src/shaders/FbWriteColorCS.hlsl @@ -18,7 +18,7 @@ void CSMain(uint2 coord : SV_DispatchThreadID) { bool oddColumn = (offsetCoord.x & 1); uint randomSeed = initRand(gConstants.ditherRandomSeed, dstIndex, 16); uint ditherValue = DitherPatternValue(gConstants.ditherPattern, offsetCoord, randomSeed); - uint nativeUint = Float4ToUINT(color, gConstants.siz, gConstants.fmt, oddColumn, ditherValue); + uint nativeUint = Float4ToUINT(color, gConstants.siz, gConstants.fmt, oddColumn, ditherValue, gConstants.usesHDR); gOutput[dstIndex] = EndianSwapUINT(nativeUint, gConstants.siz); } } \ No newline at end of file diff --git a/src/shaders/Formats.hlsli b/src/shaders/Formats.hlsli index ab3dd16..0e0f33a 100644 --- a/src/shaders/Formats.hlsli +++ b/src/shaders/Formats.hlsli @@ -92,11 +92,12 @@ float4 RGBA16ToFloat4(uint rgba16) { ); } -uint Float4ToRGBA16(float4 i, uint dither) { +uint Float4ToRGBA16(float4 i, uint dither, bool usesHDR) { + const float cvgRange = usesHDR ? 65535.0f : 255.0f; uint r = round(clamp(i.r * 255.0f, 0.0f, 255.0f)); uint g = round(clamp(i.g * 255.0f, 0.0f, 255.0f)); uint b = round(clamp(i.b * 255.0f, 0.0f, 255.0f)); - int cvgModulo = round(i.a * 255.0f) % 8; + int cvgModulo = round(i.a * cvgRange) % 8; uint a = (cvgModulo & 0x4) ? 1 : 0; r = min(r + dither, 255) >> 3; g = min(g + dither, 255) >> 3; diff --git a/src/shaders/RasterPS.hlsl b/src/shaders/RasterPS.hlsl index 57982bb..e5d3e9b 100644 --- a/src/shaders/RasterPS.hlsl +++ b/src/shaders/RasterPS.hlsl @@ -192,10 +192,12 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl } // Compute coverage estimation. - float resultCvg = (8.0f / 255.0f) * (otherMode.cvgXAlpha() ? combinerColor.a : 1.0f); + const bool usesHDR = renderFlagUsesHDR(rp.flags); + const float cvgRange = usesHDR ? 65535.0f : 255.0f; + float resultCvg = (8.0f / cvgRange) * (otherMode.cvgXAlpha() ? combinerColor.a : 1.0f); // Discard all pixels without coverage. - const float CoverageThreshold = 1.0f / 255.0f; + const float CoverageThreshold = 1.0f / cvgRange; if (resultCvg < CoverageThreshold) { discard; } @@ -220,11 +222,11 @@ void RasterPS(const RenderParams rp, bool outputDepth, float4 vertexPosition, fl } // Write a full coverage value regardless of the computed coverage. else if (otherMode.cvgDst() == CVG_DST_FULL) { - resultColor.a = 7.0f / 255.0f; + resultColor.a = 7.0f / cvgRange; } // Write the coverage value clamped to the full value allowed. else if (otherMode.cvgDst() == CVG_DST_CLAMP) { - resultColor.a = min(resultCvg, 7.0f / 255.0f); + resultColor.a = min(resultCvg, 7.0f / cvgRange); } // Write out the computed coverage. It'll be added on wrap mode. else { diff --git a/src/shaders/RtCopyColorToDepthPS.hlsl b/src/shaders/RtCopyColorToDepthPS.hlsl index a219651..a2330f1 100644 --- a/src/shaders/RtCopyColorToDepthPS.hlsl +++ b/src/shaders/RtCopyColorToDepthPS.hlsl @@ -2,13 +2,17 @@ // RT64 // +#include "shared/rt64_render_target_copy.h" + #include "Depth.hlsli" #include "Formats.hlsli" +[[vk::push_constant]] ConstantBuffer gConstants : register(b0); + #ifdef MULTISAMPLING -Texture2DMS gInput : register(t0); +Texture2DMS gInput : register(t1); #else -Texture2D gInput : register(t0); +Texture2D gInput : register(t1); #endif float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sampleIndex : SV_SampleIndex, out float resultDepth : SV_DEPTH) : SV_TARGET { @@ -17,7 +21,7 @@ float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sam #else float4 inputColor = gInput.Load(uint3(pos.xy, 0)); #endif - uint rgba16 = Float4ToRGBA16(inputColor, 0); + uint rgba16 = Float4ToRGBA16(inputColor, 0, gConstants.usesHDR); resultDepth = Depth16ToFloat(rgba16); return 0.0f; } \ No newline at end of file diff --git a/src/shaders/RtCopyDepthToColorPS.hlsl b/src/shaders/RtCopyDepthToColorPS.hlsl index 99edb09..6ce3e77 100644 --- a/src/shaders/RtCopyDepthToColorPS.hlsl +++ b/src/shaders/RtCopyDepthToColorPS.hlsl @@ -2,13 +2,17 @@ // RT64 // +#include "shared/rt64_render_target_copy.h" + #include "Depth.hlsli" #include "Formats.hlsli" +[[vk::push_constant]] ConstantBuffer gConstants : register(b0); + #ifdef MULTISAMPLING -Texture2DMS gInput : register(t0); +Texture2DMS gInput : register(t1); #else -Texture2D gInput : register(t0); +Texture2D gInput : register(t1); #endif float4 PSMain(in float4 pos : SV_Position, in float2 uv : TEXCOORD0, in uint sampleIndex : SV_SampleIndex) : SV_TARGET { diff --git a/src/shaders/TextureSampler.hlsli b/src/shaders/TextureSampler.hlsli index db6cc7b..43af553 100644 --- a/src/shaders/TextureSampler.hlsli +++ b/src/shaders/TextureSampler.hlsli @@ -59,7 +59,7 @@ void computeLOD(OtherMode otherMode, uint rdpTileCount, float2 primLOD, float re } } -float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2 texelInt, uint textureIndex, uint tlut, bool canDecodeTMEM) { +float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2 texelInt, uint textureIndex, uint tlut, bool canDecodeTMEM, bool usesHDR) { if (rdpTile.cms & G_TX_CLAMP) { texelInt.x = clamp(texelInt.x, 0, (round(gpuTile.tcScale.x * rdpTile.lrs) / 4) - (round(gpuTile.tcScale.x * rdpTile.uls) / 4) + round(gpuTile.tcScale.x - 1.0f)); } @@ -98,7 +98,8 @@ float4 clampWrapMirrorSample(const RDPTile rdpTile, const GPUTile gpuTile, int2 // Alpha channel in framebuffer textures represent the coverage. A modulo operation must be performed // to get the value that would correspond to the alpha channel when it's sampled. if (gpuTileFlagAlphaIsCvg(gpuTile.flags)) { - int cvgModulo = round(textureColor.a * 255.0f) % 8; + const float cvgRange = usesHDR ? 65535.0f : 255.0f; + int cvgModulo = round(textureColor.a * cvgRange) % 8; textureColor.a = (cvgModulo & 0x4) ? 1.0f : 0.0f; } @@ -145,13 +146,14 @@ float4 sampleTexture(OtherMode otherMode, RenderFlags renderFlags, float2 inputU const uint tlut = otherMode.textLUT(); const bool canDecodeTMEM = renderFlagCanDecodeTMEM(renderFlags); + const bool usesHDR = renderFlagUsesHDR(renderFlags); int2 texelBaseInt = floor(uvCoord); - float4 sample00 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 0), textureIndex, tlut, canDecodeTMEM); + float4 sample00 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 0), textureIndex, tlut, canDecodeTMEM, usesHDR); if (filterBilerp || linearFiltering) { float2 fracPart = uvCoord - texelBaseInt; - float4 sample01 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 1), textureIndex, tlut, canDecodeTMEM); - float4 sample10 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 0), textureIndex, tlut, canDecodeTMEM); - float4 sample11 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 1), textureIndex, tlut, canDecodeTMEM); + float4 sample01 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(0, 1), textureIndex, tlut, canDecodeTMEM, usesHDR); + float4 sample10 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 0), textureIndex, tlut, canDecodeTMEM, usesHDR); + float4 sample11 = clampWrapMirrorSample(rdpTile, gpuTile, texelBaseInt + int2(1, 1), textureIndex, tlut, canDecodeTMEM, usesHDR); if (linearFiltering) { return lerp(lerp(sample00, sample10, fracPart.x), lerp(sample01, sample11, fracPart.x), fracPart.y); } diff --git a/src/shared/rt64_fb_common.h b/src/shared/rt64_fb_common.h index 3d2dfe0..7cc4909 100644 --- a/src/shared/rt64_fb_common.h +++ b/src/shared/rt64_fb_common.h @@ -18,6 +18,7 @@ namespace interop { uint siz; uint ditherPattern; uint ditherRandomSeed; + uint usesHDR; }; #ifdef HLSL_CPU }; diff --git a/src/shared/rt64_fb_reinterpret.h b/src/shared/rt64_fb_reinterpret.h index 3f39bfa..f49379b 100644 --- a/src/shared/rt64_fb_reinterpret.h +++ b/src/shared/rt64_fb_reinterpret.h @@ -20,6 +20,7 @@ namespace interop { uint ditherPattern; uint ditherRandomSeed; uint2 ditherOffset; + uint usesHDR; }; #ifdef HLSL_CPU }; diff --git a/src/shared/rt64_render_params.h b/src/shared/rt64_render_params.h index b4b33c0..ba45aa4 100644 --- a/src/shared/rt64_render_params.h +++ b/src/shared/rt64_render_params.h @@ -32,6 +32,7 @@ namespace interop { uint usesTexture1 : 1; uint upscale2D : 1; uint upscaleLOD : 1; + uint usesHDR : 1; }; uint value; @@ -119,6 +120,10 @@ namespace interop { bool renderFlagUpscaleLOD(RenderFlags flags) { return ((flags >> 24) & 0x1) != 0; } + + bool renderFlagUsesHDR(RenderFlags flags) { + return ((flags >> 25) & 0x1) != 0; + } #endif struct RenderParams { diff --git a/src/shared/rt64_render_target_copy.h b/src/shared/rt64_render_target_copy.h new file mode 100644 index 0000000..7e0ebbe --- /dev/null +++ b/src/shared/rt64_render_target_copy.h @@ -0,0 +1,17 @@ +// +// RT64 +// + +#pragma once + +#include "shared/rt64_hlsl.h" + +#ifdef HLSL_CPU +namespace interop { +#endif + struct RenderTargetCopyCB { + uint usesHDR; + }; +#ifdef HLSL_CPU +}; +#endif \ No newline at end of file diff --git a/src/vulkan/rt64_vulkan.cpp b/src/vulkan/rt64_vulkan.cpp index 685d2e1..448c2d1 100644 --- a/src/vulkan/rt64_vulkan.cpp +++ b/src/vulkan/rt64_vulkan.cpp @@ -3618,6 +3618,16 @@ namespace RT64 { return; } + // Find the biggest device local memory available on the device. + VkDeviceSize memoryHeapSize = 0; + const VkPhysicalDeviceMemoryProperties *memoryProps = nullptr; + vmaGetMemoryProperties(allocator, &memoryProps); + for (uint32_t i = 0; i < memoryProps->memoryHeapCount; i++) { + if (memoryProps->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { + memoryHeapSize = std::max(memoryProps->memoryHeaps[i].size, memoryHeapSize); + } + } + // Fill capabilities. capabilities.raytracing = rtSupported; capabilities.raytracingStateUpdate = false; @@ -3626,6 +3636,7 @@ namespace RT64 { capabilities.scalarBlockLayout = scalarBlockLayout; capabilities.presentWait = presentWait; capabilities.displayTiming = supportedOptionalExtensions.find(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME) != supportedOptionalExtensions.end(); + capabilities.preferHDR = memoryHeapSize > (512 * 1024 * 1024); // Fill Vulkan-only capabilities. loadStoreOpNoneSupported = supportedOptionalExtensions.find(VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME) != supportedOptionalExtensions.end();