rt64/src/hle/rt64_present_queue.cpp
2024-04-18 23:31:11 -03:00

514 lines
23 KiB
C++

//
// RT64
//
#include "rt64_present_queue.h"
#include "common/rt64_thread.h"
#include "rhi/rt64_render_hooks.h"
#include "rt64_workload_queue.h"
namespace RT64 {
// PresentQueue
PresentQueue::PresentQueue() {
reset();
}
PresentQueue::~PresentQueue() {
presentThreadRunning = false;
cursorCondition.notify_all();
if (presentThread != nullptr) {
presentThread->join();
delete presentThread;
}
presentIdCondition.notify_all();
}
void PresentQueue::reset() {
threadCursor = 0;
writeCursor = 0;
barrierCursor = 0;
presentId = 0;
}
void PresentQueue::advanceToNextPresent() {
int nextWriteCursor = (writeCursor + 1) % presents.size();
// Stall the thread until the barrier is lifted if we're trying to write on a present being used by the GPU.
bool waitForBarrier;
do {
const std::scoped_lock lock(cursorMutex);
waitForBarrier = (nextWriteCursor == barrierCursor);
} while (waitForBarrier);
// Modify the cursor and notify anything waiting on the queue.
{
const std::scoped_lock lock(cursorMutex);
writeCursor = nextWriteCursor;
}
cursorCondition.notify_all();
}
void PresentQueue::repeatLastPresent() {
{
const std::scoped_lock lock(cursorMutex);
threadCursor = previousWriteCursor();
}
cursorCondition.notify_all();
}
uint32_t PresentQueue::previousWriteCursor() const {
if (writeCursor > 0) {
return writeCursor - 1;
}
else {
return uint32_t(presents.size()) - 1;
}
}
void PresentQueue::waitForIdle() {
std::unique_lock<std::mutex> threadLock(threadMutex);
}
void PresentQueue::waitForPresentId(uint64_t waitId) {
std::unique_lock<std::mutex> presentLock(presentIdMutex);
presentIdCondition.wait(presentLock, [&]() {
return (waitId <= presentId) || !presentThreadRunning;
});
}
void PresentQueue::setup(const External &ext) {
this->ext = ext;
viRenderer = std::make_unique<VIRenderer>();
presentThreadRunning = true;
presentThread = new std::thread(&PresentQueue::threadLoop, this);
}
void PresentQueue::threadPresent(const Present &present) {
FramebufferManager &fbManager = ext.sharedResources->framebufferManager;
RenderTargetManager &targetManager = ext.sharedResources->renderTargetManager;
const bool usingMSAA = (targetManager.multisampling.sampleCount > 1);
hlslpp::float2 resolutionScale;
EnhancementConfiguration::Presentation::Mode presentationMode;
UserConfiguration::RefreshRate refreshRate;
UserConfiguration::Filtering filtering;
uint32_t viOriginalRate;
uint32_t targetRate;
{
std::scoped_lock<std::mutex> configurationLock(ext.sharedResources->configurationMutex);
resolutionScale = ext.sharedResources->resolutionScale;
presentationMode = ext.sharedResources->enhancementConfig.presentation.mode;
refreshRate = ext.sharedResources->userConfig.refreshRate;
filtering = ext.sharedResources->userConfig.filtering;
viOriginalRate = ext.sharedResources->viOriginalRate;
targetRate = ext.sharedResources->targetRate;
}
RenderTarget *colorTarget = nullptr;
int32_t framesToPresent = 1;
bool lockedWorkloadMutex = false;
InterpolatedFrameCounters &frameCounters = ext.sharedResources->interpolatedFrames[ext.sharedResources->interpolatedFramesIndex];
// TODO: There's a possible race condition interactions that can happen while the workload
// queue is rendering extra frames and the present event is processed while it's generating
// interpolated frames. When the framebuffer manager or the render target manager maps are
// modified while the present queue is retrieving the framebuffer or the target. These can
// likely be solved by locking the access to the managers during modification.
// Perform any external write operations indicated by the event.
if (!present.fbOperations.empty()) {
const std::scoped_lock lock(screenFbChangePoolMutex);
{
RenderWorkerExecution workerExecution(ext.presentGraphicsWorker);
fbManager.performOperations(ext.presentGraphicsWorker, &screenFbChangePool, nullptr, ext.shaderLibrary, nullptr,
present.fbOperations, targetManager, resolutionScale, 0, 0, nullptr);
}
}
// Present the VI specified by the event.
// Attempt to find the matching framebuffer for the VI based on the origin address.
// If that fails, we look at the shared storage.
if (present.screenVI.visible()) {
Framebuffer *viFb = nullptr;
if (!viewRDRAM) {
viFb = fbManager.find(present.screenVI.fbAddress());
}
Framebuffer *presentFb = viFb;
// Show the framebuffer the debugger has requested instead.
if (present.debuggerFramebuffer.view) {
Framebuffer *candidateFb = fbManager.find(present.debuggerFramebuffer.address);
if (candidateFb != nullptr) {
presentFb = candidateFb;
}
}
if ((presentFb != nullptr) && (viFb != nullptr)) {
for (uint32_t colorAddress : ext.sharedResources->colorImageAddressVector) {
Framebuffer *colorFb = fbManager.find(colorAddress);
if (colorFb == nullptr) {
continue;
}
// Always default to interpolation being disabled for all modified framebuffers.
colorFb->interpolationEnabled = false;
// When the skip buffering option is on, we check the video history to find if any of the framebuffers that
// were drawn in this frame have been previously used for presentation. This is ignored when the debugger
// has forced viewing a particular framebuffer.
if (!present.debuggerFramebuffer.view && (presentationMode == EnhancementConfiguration::Presentation::Mode::SkipBuffering)) {
for (size_t h = 0; h < viHistory.history.size(); h++) {
const VIHistory::Present &entry = viHistory.history[h];
if ((colorFb->addressStart == entry.vi.fbAddress()) && (colorFb->width == entry.fbWidth) && (colorFb->siz == entry.vi.fbSiz()) && entry.vi.compatibleWith(present.screenVI)) {
presentFb = colorFb;
break;
}
}
}
// Present early (or games that behave like it) will make it so that the presented image is a color image
// that the workload modified. We run a basic check to see if that holds true to indicate it was presented
// so interpolation is possible.
if (colorFb == presentFb) {
presentFb->interpolationEnabled = true;
break;
}
}
if (presentFb->interpolationEnabled) {
framesToPresent = frameCounters.count;
}
else {
lockedWorkloadMutex = true;
ext.sharedResources->workloadMutex.lock();
}
RenderTargetKey colorTargetKey(presentFb->addressStart, presentFb->width, presentFb->siz, Framebuffer::Type::Color);
colorTarget = &targetManager.get(colorTargetKey, true);
if (!colorTarget->isEmpty()) {
// If a depth framebuffer is about to be shown, convert it to color.
if (presentFb->isLastWriteDifferent(Framebuffer::Type::Color)) {
RenderTargetKey otherColorTargetKey(presentFb->addressStart, presentFb->width, presentFb->siz, presentFb->lastWriteType);
RenderTarget &otherColorTarget = targetManager.get(otherColorTargetKey, true);
if (!otherColorTarget.isEmpty()) {
const FixedRect &r = presentFb->lastWriteRect;
RenderWorkerExecution workerExecution(ext.presentGraphicsWorker);
colorTarget->copyFromTarget(ext.presentGraphicsWorker, &otherColorTarget, r.left(false), r.top(false), r.width(false, true), r.height(false, true), ext.shaderLibrary);
}
}
}
else {
colorTarget = nullptr;
}
if (!present.paused && (viHistory.top().vi != present.screenVI)) {
viHistory.pushVI(present.screenVI, viFb->width);
}
}
else {
uint32_t fbAddress = present.screenVI.fbAddress();
// Use a scratch framebuffer to upload the RAM to the render target.
hlslpp::uint2 fbSize = present.screenVI.fbSize();
scratchFb.addressStart = fbAddress;
scratchFb.width = fbSize.x;
scratchFb.height = fbSize.y;
scratchFb.siz = present.screenVI.fbSiz();
lockedWorkloadMutex = true;
ext.sharedResources->workloadMutex.lock();
RenderTargetKey colorTargetKey(fbAddress, scratchFb.width, scratchFb.siz, Framebuffer::Type::Color);
colorTarget = &targetManager.get(colorTargetKey, true);
colorTarget->resize(ext.presentGraphicsWorker, scratchFb.width, scratchFb.height);
colorTarget->resolutionScale = { 1.0f, 1.0f };
colorTarget->downsampleMultiplier = 1;
scratchFb.nativeTarget.resetBufferHistory();
{
RenderWorkerExecution workerExecution(ext.presentGraphicsWorker);
colorTarget->clearColorTarget(ext.presentGraphicsWorker);
FramebufferChange *colorFbChange = scratchFb.readChangeFromBytes(ext.presentGraphicsWorker, scratchFbChangePool, Framebuffer::Type::Color,
G_IM_FMT_RGBA, present.storage.data(), 0, scratchFb.height, ext.shaderLibrary);
if (colorFbChange != nullptr) {
colorTarget->copyFromChanges(ext.presentGraphicsWorker, *colorFbChange, scratchFb.width, scratchFb.height, 0, ext.shaderLibrary);
}
}
scratchFbChangePool.reset();
if (!present.paused && (viHistory.top().vi != present.screenVI)) {
viHistory.pushVI(present.screenVI, fbSize.x);
}
}
}
// Create the framebuffers if necessary.
if (swapChainFramebuffers.empty()) {
uint32_t textureCount = ext.swapChain->getTextureCount();
swapChainFramebuffers.resize(textureCount);
for (uint32_t i = 0; i < textureCount; i++) {
const RenderTexture *swapChainTexture = ext.swapChain->getTexture(i);
swapChainFramebuffers[i] = ext.device->createFramebuffer(RenderFramebufferDesc(&swapChainTexture, 1));
}
}
bool swapChainValid = true;
for (int32_t i = 0; i < framesToPresent; i++) {
uint32_t frameCountersNextPresented = 0;
if ((framesToPresent > 1) && (usingMSAA || (i > 0))) {
// Stall until the interpolated color target is available.
const uint32_t targetIndex = usingMSAA ? i : (i - 1);
std::unique_lock<std::mutex> interpolatedLock(ext.sharedResources->interpolatedMutex);
ext.sharedResources->interpolatedCondition.wait(interpolatedLock, [&]() {
return (frameCounters.available > targetIndex) || ((frameCounters.available == targetIndex) && frameCounters.skipped);
});
// Do not present any more frames after this one after reaching the last available frame if the workload was skipped.
if ((frameCounters.available == targetIndex) && frameCounters.skipped) {
framesToPresent = std::min(int(frameCounters.available), i + 1);
frameCountersNextPresented = frameCounters.count;
}
else {
frameCountersNextPresented = frameCounters.presented + 1;
}
if (i < framesToPresent) {
uint32_t targetIndex = usingMSAA ? i : (i - 1);
colorTarget = ext.sharedResources->interpolatedColorTargets[targetIndex].get();
}
else {
colorTarget = nullptr;
}
}
else if (framesToPresent == 1) {
frameCountersNextPresented = frameCounters.count;
}
const bool presentFrame = (i < framesToPresent) && swapChainValid;
if (presentFrame) {
// Draw the framebuffer with the VI renderer.
const uint32_t textureIndex = ext.swapChain->getTextureIndex();
RenderTexture *swapChainTexture = ext.swapChain->getTexture(textureIndex);
RenderFramebuffer *swapChainFramebuffer = swapChainFramebuffers[textureIndex].get();
RenderCommandList *commandList = ext.presentGraphicsWorker->commandList.get();
commandList->begin();
commandList->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(swapChainTexture, RenderTextureLayout::COLOR_WRITE));
commandList->setFramebuffer(swapChainFramebuffer);
commandList->clearColor();
if (colorTarget != nullptr) {
VIRenderer::RenderParams renderParams;
renderParams.device = ext.device;
renderParams.commandList = commandList;
renderParams.swapChain = ext.swapChain;
renderParams.shaderLibrary = ext.shaderLibrary;
renderParams.textureFormat = colorTarget->format;
renderParams.resolutionScale = colorTarget->resolutionScale;
renderParams.downsamplingScale = 1;
renderParams.filtering = filtering;
renderParams.vi = &present.screenVI;
const bool useDownsampling = (colorTarget->downsampleMultiplier > 1);
if (useDownsampling) {
colorTarget->downsampleTarget(ext.presentGraphicsWorker, ext.shaderLibrary);
renderParams.texture = colorTarget->downsampledTexture.get();
renderParams.textureWidth = colorTarget->width / colorTarget->downsampleMultiplier;
renderParams.textureHeight = colorTarget->height / colorTarget->downsampleMultiplier;
renderParams.downsamplingScale = colorTarget->downsampleMultiplier;
}
else {
colorTarget->resolveTarget(ext.presentGraphicsWorker);
renderParams.texture = colorTarget->getResolvedTexture();
renderParams.textureWidth = colorTarget->width;
renderParams.textureHeight = colorTarget->height;
}
commandList->barriers(RenderBarrierStage::GRAPHICS, RenderTextureBarrier(renderParams.texture, RenderTextureLayout::SHADER_READ));
viRenderer->render(renderParams);
}
{
RenderHookDraw *drawHook = GetRenderHookDraw();
if (drawHook) {
drawHook(commandList, swapChainFramebuffer);
}
}
{
const std::scoped_lock lock(inspectorMutex);
if (inspector != nullptr) {
inspector->draw(commandList);
}
commandList->barriers(RenderBarrierStage::NONE, RenderTextureBarrier(swapChainTexture, RenderTextureLayout::PRESENT));
commandList->end();
ext.presentGraphicsWorker->execute();
ext.presentGraphicsWorker->wait();
}
}
if (lockedWorkloadMutex) {
ext.sharedResources->workloadMutex.unlock();
lockedWorkloadMutex = false;
}
if (frameCountersNextPresented > 0) {
{
std::unique_lock<std::mutex> interpolatedLock(ext.sharedResources->interpolatedMutex);
frameCounters.presented = frameCountersNextPresented;
}
ext.sharedResources->interpolatedCondition.notify_all();
}
// As soon as we're done with the first render target, we notify the workload queue it can proceed.
if (i == 0) {
notifyPresentId(present);
}
if (presentFrame) {
// Wait until the approximate time the next present should be at the current intended rate.
if ((presentTimestamp != Timestamp()) && (targetRate > 0) && (targetRate > viOriginalRate)) {
Timestamp currentTimestamp = Timer::current();
int64_t deltaMicro = Timer::deltaMicroseconds(presentTimestamp, currentTimestamp);
int64_t targetRateMicro = 1000000 / targetRate;
// The OS only provides about one millisecond of accuracy by default. Since we also
// want to wait slightly less than the target time, we substract from it and wait 1
// millisecond less than the floor of the micrseconds value.
int64_t msToWait = (targetRateMicro - deltaMicro - 500) / 1000;
if (msToWait > 1) {
Thread::sleepMilliseconds(msToWait - 1);
}
}
swapChainValid = ext.swapChain->present();
presentProfiler.logAndRestart();
presentTimestamp = Timer::current();
}
}
}
void PresentQueue::skipInterpolation() {
{
std::unique_lock<std::mutex> interpolatedLock(ext.sharedResources->interpolatedMutex);
InterpolatedFrameCounters &frameCounters = ext.sharedResources->interpolatedFrames[ext.sharedResources->interpolatedFramesIndex];
frameCounters.presented = frameCounters.count;
}
ext.sharedResources->interpolatedCondition.notify_all();
}
void PresentQueue::notifyPresentId(const Present &present) {
{
std::scoped_lock<std::mutex> cursorLock(presentIdMutex);
presentId = present.presentId;
}
presentIdCondition.notify_all();
}
void PresentQueue::threadAdvanceBarrier() {
std::scoped_lock<std::mutex> cursorLock(cursorMutex);
barrierCursor = (barrierCursor + 1) % presents.size();
}
void PresentQueue::threadLoop() {
Thread::setCurrentThreadName("RT64 Present Queue");
int processCursor = -1;
bool skipPresent = false;
uint32_t displayTimingRate = UINT32_MAX;
const bool displayTiming = ext.device->getCapabilities().displayTiming;
while (presentThreadRunning) {
{
std::unique_lock<std::mutex> cursorLock(cursorMutex);
cursorCondition.wait(cursorLock, [&]() {
return (writeCursor != threadCursor) || !presentThreadRunning;
});
if (presentThreadRunning) {
processCursor = threadCursor;
threadCursor = (threadCursor + 1) % presents.size();
skipPresent = (writeCursor != threadCursor);
}
}
if (processCursor >= 0) {
std::unique_lock<std::mutex> threadLock(threadMutex);
const bool needsResize = ext.swapChain->needsResize();
if (needsResize) {
ext.presentGraphicsWorker->commandList->begin();
ext.presentGraphicsWorker->commandList->end();
ext.presentGraphicsWorker->execute();
ext.presentGraphicsWorker->wait();
ext.swapChain->resize();
swapChainFramebuffers.clear();
ext.appWindow->detectRefreshRate();
ext.sharedResources->setSwapChainSize(ext.swapChain->getWidth(), ext.swapChain->getHeight());
ext.sharedResources->setSwapChainRate(std::min(ext.appWindow->getRefreshRate(), displayTimingRate));
}
if (displayTiming) {
uint32_t newDisplayTimingRate = ext.swapChain->getRefreshRate();
if (newDisplayTimingRate == 0) {
newDisplayTimingRate = UINT32_MAX;
}
if (newDisplayTimingRate != displayTimingRate) {
ext.sharedResources->setSwapChainRate(std::min(ext.appWindow->getRefreshRate(), newDisplayTimingRate));
displayTimingRate = newDisplayTimingRate;
}
}
skipPresent = skipPresent || ext.swapChain->isEmpty();
const Present &present = presents[processCursor];
ext.workloadQueue->waitForWorkloadId(present.workloadId);
if (!presentThreadRunning) {
continue;
}
if (skipPresent) {
skipInterpolation();
notifyPresentId(present);
}
else {
threadPresent(present);
}
if (!present.fbOperations.empty()) {
const std::scoped_lock lock(screenFbChangePoolMutex);
screenFbChangePool.release(present.fbOperations.front().writeChanges.id);
}
if (!present.paused) {
threadAdvanceBarrier();
}
processCursor = -1;
}
}
// Transition the active swap chain render target out of the present state to avoid live references to the resource.
ext.presentGraphicsWorker->commandList->begin();
RenderTexture *swapChainTexture = ext.swapChain->getTexture(ext.swapChain->getTextureIndex());
ext.presentGraphicsWorker->commandList->barriers(RenderBarrierStage::NONE, RenderTextureBarrier(swapChainTexture, RenderTextureLayout::COLOR_WRITE));
ext.presentGraphicsWorker->commandList->end();
ext.presentGraphicsWorker->execute();
ext.presentGraphicsWorker->wait();
}
};