mirror of
https://github.com/rt64/rt64.git
synced 2025-04-16 05:42:29 +00:00
514 lines
23 KiB
C++
514 lines
23 KiB
C++
//
|
|
// RT64
|
|
//
|
|
|
|
#include "rt64_present_queue.h"
|
|
|
|
#include "common/rt64_thread.h"
|
|
#include "rhi/rt64_render_hooks.h"
|
|
|
|
#include "rt64_workload_queue.h"
|
|
|
|
namespace RT64 {
|
|
// PresentQueue
|
|
|
|
PresentQueue::PresentQueue() {
|
|
reset();
|
|
}
|
|
|
|
PresentQueue::~PresentQueue() {
|
|
presentThreadRunning = false;
|
|
cursorCondition.notify_all();
|
|
|
|
if (presentThread != nullptr) {
|
|
presentThread->join();
|
|
delete presentThread;
|
|
}
|
|
|
|
presentIdCondition.notify_all();
|
|
}
|
|
|
|
void PresentQueue::reset() {
|
|
threadCursor = 0;
|
|
writeCursor = 0;
|
|
barrierCursor = 0;
|
|
presentId = 0;
|
|
}
|
|
|
|
void PresentQueue::advanceToNextPresent() {
|
|
int nextWriteCursor = (writeCursor + 1) % presents.size();
|
|
|
|
// Stall the thread until the barrier is lifted if we're trying to write on a present being used by the GPU.
|
|
bool waitForBarrier;
|
|
do {
|
|
const std::scoped_lock lock(cursorMutex);
|
|
waitForBarrier = (nextWriteCursor == barrierCursor);
|
|
} while (waitForBarrier);
|
|
|
|
// Modify the cursor and notify anything waiting on the queue.
|
|
{
|
|
const std::scoped_lock lock(cursorMutex);
|
|
writeCursor = nextWriteCursor;
|
|
}
|
|
|
|
cursorCondition.notify_all();
|
|
}
|
|
|
|
void PresentQueue::repeatLastPresent() {
|
|
{
|
|
const std::scoped_lock lock(cursorMutex);
|
|
threadCursor = previousWriteCursor();
|
|
}
|
|
|
|
cursorCondition.notify_all();
|
|
}
|
|
|
|
uint32_t PresentQueue::previousWriteCursor() const {
|
|
if (writeCursor > 0) {
|
|
return writeCursor - 1;
|
|
}
|
|
else {
|
|
return uint32_t(presents.size()) - 1;
|
|
}
|
|
}
|
|
|
|
void PresentQueue::waitForIdle() {
|
|
std::unique_lock<std::mutex> threadLock(threadMutex);
|
|
}
|
|
|
|
void PresentQueue::waitForPresentId(uint64_t waitId) {
|
|
std::unique_lock<std::mutex> presentLock(presentIdMutex);
|
|
presentIdCondition.wait(presentLock, [&]() {
|
|
return (waitId <= presentId) || !presentThreadRunning;
|
|
});
|
|
}
|
|
|
|
void PresentQueue::setup(const External &ext) {
|
|
this->ext = ext;
|
|
|
|
viRenderer = std::make_unique<VIRenderer>();
|
|
|
|
presentThreadRunning = true;
|
|
presentThread = new std::thread(&PresentQueue::threadLoop, this);
|
|
}
|
|
|
|
void PresentQueue::threadPresent(const Present &present) {
|
|
FramebufferManager &fbManager = ext.sharedResources->framebufferManager;
|
|
RenderTargetManager &targetManager = ext.sharedResources->renderTargetManager;
|
|
const bool usingMSAA = (targetManager.multisampling.sampleCount > 1);
|
|
hlslpp::float2 resolutionScale;
|
|
EnhancementConfiguration::Presentation::Mode presentationMode;
|
|
UserConfiguration::RefreshRate refreshRate;
|
|
UserConfiguration::Filtering filtering;
|
|
uint32_t viOriginalRate;
|
|
uint32_t targetRate;
|
|
{
|
|
std::scoped_lock<std::mutex> configurationLock(ext.sharedResources->configurationMutex);
|
|
resolutionScale = ext.sharedResources->resolutionScale;
|
|
presentationMode = ext.sharedResources->enhancementConfig.presentation.mode;
|
|
refreshRate = ext.sharedResources->userConfig.refreshRate;
|
|
filtering = ext.sharedResources->userConfig.filtering;
|
|
viOriginalRate = ext.sharedResources->viOriginalRate;
|
|
targetRate = ext.sharedResources->targetRate;
|
|
}
|
|
|
|
RenderTarget *colorTarget = nullptr;
|
|
int32_t framesToPresent = 1;
|
|
bool lockedWorkloadMutex = false;
|
|
InterpolatedFrameCounters &frameCounters = ext.sharedResources->interpolatedFrames[ext.sharedResources->interpolatedFramesIndex];
|
|
|
|
// TODO: There's a possible race condition interactions that can happen while the workload
|
|
// queue is rendering extra frames and the present event is processed while it's generating
|
|
// interpolated frames. When the framebuffer manager or the render target manager maps are
|
|
// modified while the present queue is retrieving the framebuffer or the target. These can
|
|
// likely be solved by locking the access to the managers during modification.
|
|
|
|
// Perform any external write operations indicated by the event.
|
|
if (!present.fbOperations.empty()) {
|
|
const std::scoped_lock lock(screenFbChangePoolMutex);
|
|
{
|
|
RenderWorkerExecution workerExecution(ext.presentGraphicsWorker);
|
|
fbManager.performOperations(ext.presentGraphicsWorker, &screenFbChangePool, nullptr, ext.shaderLibrary, nullptr,
|
|
present.fbOperations, targetManager, resolutionScale, 0, 0, nullptr);
|
|
}
|
|
}
|
|
|
|
// Present the VI specified by the event.
|
|
// Attempt to find the matching framebuffer for the VI based on the origin address.
|
|
// If that fails, we look at the shared storage.
|
|
if (present.screenVI.visible()) {
|
|
Framebuffer *viFb = nullptr;
|
|
if (!viewRDRAM) {
|
|
viFb = fbManager.find(present.screenVI.fbAddress());
|
|
}
|
|
|
|
Framebuffer *presentFb = viFb;
|
|
|
|
// Show the framebuffer the debugger has requested instead.
|
|
if (present.debuggerFramebuffer.view) {
|
|
Framebuffer *candidateFb = fbManager.find(present.debuggerFramebuffer.address);
|
|
if (candidateFb != nullptr) {
|
|
presentFb = candidateFb;
|
|
}
|
|
}
|
|
|
|
if ((presentFb != nullptr) && (viFb != nullptr)) {
|
|
for (uint32_t colorAddress : ext.sharedResources->colorImageAddressVector) {
|
|
Framebuffer *colorFb = fbManager.find(colorAddress);
|
|
if (colorFb == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
// Always default to interpolation being disabled for all modified framebuffers.
|
|
colorFb->interpolationEnabled = false;
|
|
|
|
// When the skip buffering option is on, we check the video history to find if any of the framebuffers that
|
|
// were drawn in this frame have been previously used for presentation. This is ignored when the debugger
|
|
// has forced viewing a particular framebuffer.
|
|
if (!present.debuggerFramebuffer.view && (presentationMode == EnhancementConfiguration::Presentation::Mode::SkipBuffering)) {
|
|
for (size_t h = 0; h < viHistory.history.size(); h++) {
|
|
const VIHistory::Present &entry = viHistory.history[h];
|
|
if ((colorFb->addressStart == entry.vi.fbAddress()) && (colorFb->width == entry.fbWidth) && (colorFb->siz == entry.vi.fbSiz()) && entry.vi.compatibleWith(present.screenVI)) {
|
|
presentFb = colorFb;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Present early (or games that behave like it) will make it so that the presented image is a color image
|
|
// that the workload modified. We run a basic check to see if that holds true to indicate it was presented
|
|
// so interpolation is possible.
|
|
if (colorFb == presentFb) {
|
|
presentFb->interpolationEnabled = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (presentFb->interpolationEnabled) {
|
|
framesToPresent = frameCounters.count;
|
|
}
|
|
else {
|
|
lockedWorkloadMutex = true;
|
|
ext.sharedResources->workloadMutex.lock();
|
|
}
|
|
|
|
RenderTargetKey colorTargetKey(presentFb->addressStart, presentFb->width, presentFb->siz, Framebuffer::Type::Color);
|
|
colorTarget = &targetManager.get(colorTargetKey, true);
|
|
if (!colorTarget->isEmpty()) {
|
|
// If a depth framebuffer is about to be shown, convert it to color.
|
|
if (presentFb->isLastWriteDifferent(Framebuffer::Type::Color)) {
|
|
RenderTargetKey otherColorTargetKey(presentFb->addressStart, presentFb->width, presentFb->siz, presentFb->lastWriteType);
|
|
RenderTarget &otherColorTarget = targetManager.get(otherColorTargetKey, true);
|
|
if (!otherColorTarget.isEmpty()) {
|
|
const FixedRect &r = presentFb->lastWriteRect;
|
|
RenderWorkerExecution workerExecution(ext.presentGraphicsWorker);
|
|
colorTarget->copyFromTarget(ext.presentGraphicsWorker, &otherColorTarget, r.left(false), r.top(false), r.width(false, true), r.height(false, true), ext.shaderLibrary);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
colorTarget = nullptr;
|
|
}
|
|
|
|
if (!present.paused && (viHistory.top().vi != present.screenVI)) {
|
|
viHistory.pushVI(present.screenVI, viFb->width);
|
|
}
|
|
}
|
|
else {
|
|
uint32_t fbAddress = present.screenVI.fbAddress();
|
|
|
|
// Use a scratch framebuffer to upload the RAM to the render target.
|
|
hlslpp::uint2 fbSize = present.screenVI.fbSize();
|
|
scratchFb.addressStart = fbAddress;
|
|
scratchFb.width = fbSize.x;
|
|
scratchFb.height = fbSize.y;
|
|
scratchFb.siz = present.screenVI.fbSiz();
|
|
|
|
lockedWorkloadMutex = true;
|
|
ext.sharedResources->workloadMutex.lock();
|
|
|
|
RenderTargetKey colorTargetKey(fbAddress, scratchFb.width, scratchFb.siz, Framebuffer::Type::Color);
|
|
colorTarget = &targetManager.get(colorTargetKey, true);
|
|
colorTarget->resize(ext.presentGraphicsWorker, scratchFb.width, scratchFb.height);
|
|
colorTarget->resolutionScale = { 1.0f, 1.0f };
|
|
colorTarget->downsampleMultiplier = 1;
|
|
|
|
scratchFb.nativeTarget.resetBufferHistory();
|
|
|
|
{
|
|
RenderWorkerExecution workerExecution(ext.presentGraphicsWorker);
|
|
colorTarget->clearColorTarget(ext.presentGraphicsWorker);
|
|
FramebufferChange *colorFbChange = scratchFb.readChangeFromBytes(ext.presentGraphicsWorker, scratchFbChangePool, Framebuffer::Type::Color,
|
|
G_IM_FMT_RGBA, present.storage.data(), 0, scratchFb.height, ext.shaderLibrary);
|
|
|
|
if (colorFbChange != nullptr) {
|
|
colorTarget->copyFromChanges(ext.presentGraphicsWorker, *colorFbChange, scratchFb.width, scratchFb.height, 0, ext.shaderLibrary);
|
|
}
|
|
}
|
|
|
|
scratchFbChangePool.reset();
|
|
|
|
if (!present.paused && (viHistory.top().vi != present.screenVI)) {
|
|
viHistory.pushVI(present.screenVI, fbSize.x);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create the framebuffers if necessary.
|
|
if (swapChainFramebuffers.empty()) {
|
|
uint32_t textureCount = ext.swapChain->getTextureCount();
|
|
swapChainFramebuffers.resize(textureCount);
|
|
for (uint32_t i = 0; i < textureCount; i++) {
|
|
const RenderTexture *swapChainTexture = ext.swapChain->getTexture(i);
|
|
swapChainFramebuffers[i] = ext.device->createFramebuffer(RenderFramebufferDesc(&swapChainTexture, 1));
|
|
}
|
|
}
|
|
|
|
bool swapChainValid = true;
|
|
for (int32_t i = 0; i < framesToPresent; i++) {
|
|
uint32_t frameCountersNextPresented = 0;
|
|
if ((framesToPresent > 1) && (usingMSAA || (i > 0))) {
|
|
// Stall until the interpolated color target is available.
|
|
const uint32_t targetIndex = usingMSAA ? i : (i - 1);
|
|
std::unique_lock<std::mutex> interpolatedLock(ext.sharedResources->interpolatedMutex);
|
|
ext.sharedResources->interpolatedCondition.wait(interpolatedLock, [&]() {
|
|
return (frameCounters.available > targetIndex) || ((frameCounters.available == targetIndex) && frameCounters.skipped);
|
|
});
|
|
|
|
// Do not present any more frames after this one after reaching the last available frame if the workload was skipped.
|
|
if ((frameCounters.available == targetIndex) && frameCounters.skipped) {
|
|
framesToPresent = std::min(int(frameCounters.available), i + 1);
|
|
frameCountersNextPresented = frameCounters.count;
|
|
}
|
|
else {
|
|
frameCountersNextPresented = frameCounters.presented + 1;
|
|
}
|
|
|
|
if (i < framesToPresent) {
|
|
uint32_t targetIndex = usingMSAA ? i : (i - 1);
|
|
colorTarget = ext.sharedResources->interpolatedColorTargets[targetIndex].get();
|
|
}
|
|
else {
|
|
colorTarget = nullptr;
|
|
}
|
|
}
|
|
else if (framesToPresent == 1) {
|
|
frameCountersNextPresented = frameCounters.count;
|
|
}
|
|
|
|
const bool presentFrame = (i < framesToPresent) && swapChainValid;
|
|
if (presentFrame) {
|
|
// Draw the framebuffer with the VI renderer.
|
|
const uint32_t textureIndex = ext.swapChain->getTextureIndex();
|
|
RenderTexture *swapChainTexture = ext.swapChain->getTexture(textureIndex);
|
|
RenderFramebuffer *swapChainFramebuffer = swapChainFramebuffers[textureIndex].get();
|
|
RenderCommandList *commandList = ext.presentGraphicsWorker->commandList.get();
|
|
commandList->begin();
|
|
commandList->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(swapChainTexture, RenderTextureLayout::COLOR_WRITE));
|
|
commandList->setFramebuffer(swapChainFramebuffer);
|
|
commandList->clearColor();
|
|
|
|
if (colorTarget != nullptr) {
|
|
VIRenderer::RenderParams renderParams;
|
|
renderParams.device = ext.device;
|
|
renderParams.commandList = commandList;
|
|
renderParams.swapChain = ext.swapChain;
|
|
renderParams.shaderLibrary = ext.shaderLibrary;
|
|
renderParams.textureFormat = colorTarget->format;
|
|
renderParams.resolutionScale = colorTarget->resolutionScale;
|
|
renderParams.downsamplingScale = 1;
|
|
renderParams.filtering = filtering;
|
|
renderParams.vi = &present.screenVI;
|
|
|
|
const bool useDownsampling = (colorTarget->downsampleMultiplier > 1);
|
|
if (useDownsampling) {
|
|
colorTarget->downsampleTarget(ext.presentGraphicsWorker, ext.shaderLibrary);
|
|
renderParams.texture = colorTarget->downsampledTexture.get();
|
|
renderParams.textureWidth = colorTarget->width / colorTarget->downsampleMultiplier;
|
|
renderParams.textureHeight = colorTarget->height / colorTarget->downsampleMultiplier;
|
|
renderParams.downsamplingScale = colorTarget->downsampleMultiplier;
|
|
}
|
|
else {
|
|
colorTarget->resolveTarget(ext.presentGraphicsWorker);
|
|
renderParams.texture = colorTarget->getResolvedTexture();
|
|
renderParams.textureWidth = colorTarget->width;
|
|
renderParams.textureHeight = colorTarget->height;
|
|
}
|
|
|
|
commandList->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(renderParams.texture, RenderTextureLayout::SHADER_READ));
|
|
viRenderer->render(renderParams);
|
|
}
|
|
|
|
{
|
|
RenderHookDraw *drawHook = GetRenderHookDraw();
|
|
if (drawHook) {
|
|
drawHook(commandList, swapChainFramebuffer);
|
|
}
|
|
}
|
|
|
|
{
|
|
const std::scoped_lock lock(inspectorMutex);
|
|
if (inspector != nullptr) {
|
|
inspector->draw(commandList);
|
|
}
|
|
|
|
commandList->barriers(RenderBarrierStage::NONE, RenderTextureBarrier(swapChainTexture, RenderTextureLayout::PRESENT));
|
|
commandList->end();
|
|
ext.presentGraphicsWorker->execute();
|
|
ext.presentGraphicsWorker->wait();
|
|
}
|
|
}
|
|
|
|
if (lockedWorkloadMutex) {
|
|
ext.sharedResources->workloadMutex.unlock();
|
|
lockedWorkloadMutex = false;
|
|
}
|
|
|
|
if (frameCountersNextPresented > 0) {
|
|
{
|
|
std::unique_lock<std::mutex> interpolatedLock(ext.sharedResources->interpolatedMutex);
|
|
frameCounters.presented = frameCountersNextPresented;
|
|
}
|
|
|
|
ext.sharedResources->interpolatedCondition.notify_all();
|
|
}
|
|
|
|
// As soon as we're done with the first render target, we notify the workload queue it can proceed.
|
|
if (i == 0) {
|
|
notifyPresentId(present);
|
|
}
|
|
|
|
if (presentFrame) {
|
|
// Wait until the approximate time the next present should be at the current intended rate.
|
|
if ((presentTimestamp != Timestamp()) && (targetRate > 0) && (targetRate > viOriginalRate)) {
|
|
Timestamp currentTimestamp = Timer::current();
|
|
int64_t deltaMicro = Timer::deltaMicroseconds(presentTimestamp, currentTimestamp);
|
|
int64_t targetRateMicro = 1000000 / targetRate;
|
|
|
|
// The OS only provides about one millisecond of accuracy by default. Since we also
|
|
// want to wait slightly less than the target time, we substract from it and wait 1
|
|
// millisecond less than the floor of the micrseconds value.
|
|
int64_t msToWait = (targetRateMicro - deltaMicro - 500) / 1000;
|
|
if (msToWait > 1) {
|
|
Thread::sleepMilliseconds(msToWait - 1);
|
|
}
|
|
}
|
|
|
|
swapChainValid = ext.swapChain->present();
|
|
presentProfiler.logAndRestart();
|
|
presentTimestamp = Timer::current();
|
|
}
|
|
}
|
|
}
|
|
|
|
void PresentQueue::skipInterpolation() {
|
|
{
|
|
std::unique_lock<std::mutex> interpolatedLock(ext.sharedResources->interpolatedMutex);
|
|
InterpolatedFrameCounters &frameCounters = ext.sharedResources->interpolatedFrames[ext.sharedResources->interpolatedFramesIndex];
|
|
frameCounters.presented = frameCounters.count;
|
|
}
|
|
|
|
ext.sharedResources->interpolatedCondition.notify_all();
|
|
}
|
|
|
|
void PresentQueue::notifyPresentId(const Present &present) {
|
|
{
|
|
std::scoped_lock<std::mutex> cursorLock(presentIdMutex);
|
|
presentId = present.presentId;
|
|
}
|
|
|
|
presentIdCondition.notify_all();
|
|
}
|
|
|
|
void PresentQueue::threadAdvanceBarrier() {
|
|
std::scoped_lock<std::mutex> cursorLock(cursorMutex);
|
|
barrierCursor = (barrierCursor + 1) % presents.size();
|
|
}
|
|
|
|
void PresentQueue::threadLoop() {
|
|
Thread::setCurrentThreadName("RT64 Present Queue");
|
|
|
|
int processCursor = -1;
|
|
bool skipPresent = false;
|
|
uint32_t displayTimingRate = UINT32_MAX;
|
|
const bool displayTiming = ext.device->getCapabilities().displayTiming;
|
|
while (presentThreadRunning) {
|
|
{
|
|
std::unique_lock<std::mutex> cursorLock(cursorMutex);
|
|
cursorCondition.wait(cursorLock, [&]() {
|
|
return (writeCursor != threadCursor) || !presentThreadRunning;
|
|
});
|
|
|
|
if (presentThreadRunning) {
|
|
processCursor = threadCursor;
|
|
threadCursor = (threadCursor + 1) % presents.size();
|
|
skipPresent = (writeCursor != threadCursor);
|
|
}
|
|
}
|
|
|
|
if (processCursor >= 0) {
|
|
std::unique_lock<std::mutex> threadLock(threadMutex);
|
|
const bool needsResize = ext.swapChain->needsResize();
|
|
if (needsResize) {
|
|
ext.presentGraphicsWorker->commandList->begin();
|
|
ext.presentGraphicsWorker->commandList->end();
|
|
ext.presentGraphicsWorker->execute();
|
|
ext.presentGraphicsWorker->wait();
|
|
ext.swapChain->resize();
|
|
swapChainFramebuffers.clear();
|
|
ext.appWindow->detectRefreshRate();
|
|
ext.sharedResources->setSwapChainSize(ext.swapChain->getWidth(), ext.swapChain->getHeight());
|
|
ext.sharedResources->setSwapChainRate(std::min(ext.appWindow->getRefreshRate(), displayTimingRate));
|
|
}
|
|
|
|
if (displayTiming) {
|
|
uint32_t newDisplayTimingRate = ext.swapChain->getRefreshRate();
|
|
if (newDisplayTimingRate == 0) {
|
|
newDisplayTimingRate = UINT32_MAX;
|
|
}
|
|
|
|
if (newDisplayTimingRate != displayTimingRate) {
|
|
ext.sharedResources->setSwapChainRate(std::min(ext.appWindow->getRefreshRate(), newDisplayTimingRate));
|
|
displayTimingRate = newDisplayTimingRate;
|
|
}
|
|
}
|
|
|
|
skipPresent = skipPresent || ext.swapChain->isEmpty();
|
|
|
|
const Present &present = presents[processCursor];
|
|
ext.workloadQueue->waitForWorkloadId(present.workloadId);
|
|
|
|
if (!presentThreadRunning) {
|
|
continue;
|
|
}
|
|
|
|
if (skipPresent) {
|
|
skipInterpolation();
|
|
notifyPresentId(present);
|
|
}
|
|
else {
|
|
threadPresent(present);
|
|
}
|
|
|
|
if (!present.fbOperations.empty()) {
|
|
const std::scoped_lock lock(screenFbChangePoolMutex);
|
|
screenFbChangePool.release(present.fbOperations.front().writeChanges.id);
|
|
}
|
|
|
|
if (!present.paused) {
|
|
threadAdvanceBarrier();
|
|
}
|
|
|
|
processCursor = -1;
|
|
}
|
|
}
|
|
|
|
// Transition the active swap chain render target out of the present state to avoid live references to the resource.
|
|
ext.presentGraphicsWorker->commandList->begin();
|
|
RenderTexture *swapChainTexture = ext.swapChain->getTexture(ext.swapChain->getTextureIndex());
|
|
ext.presentGraphicsWorker->commandList->barriers(RenderBarrierStage::NONE, RenderTextureBarrier(swapChainTexture, RenderTextureLayout::COLOR_WRITE));
|
|
ext.presentGraphicsWorker->commandList->end();
|
|
ext.presentGraphicsWorker->execute();
|
|
ext.presentGraphicsWorker->wait();
|
|
}
|
|
}; |