Fifo: Extract syncing loop

It's now a new helper function within common.
This commit is contained in:
degasus 2015-05-27 20:53:09 +02:00
parent ff29ffeb66
commit 02a3a063c3
9 changed files with 212 additions and 71 deletions

View File

@ -0,0 +1,164 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include <mutex>
#include <thread>
#include "Common/Event.h"
#include "Common/Flag.h"
namespace Common
{
// This class provides a synchronized loop.
// It's a thread-safe way to trigger a new iteration without busy loops.
// It's optimized for high-usage iterations which usually are already running while it's triggered often.
class BlockingLoop
{
public:
BlockingLoop()
{
m_stopped.Set();
}
~BlockingLoop()
{
Stop();
}
// Triggers to rerun the payload of the Run() function at least once again.
// This function will never block and is designed to finish as fast as possible.
void Wakeup()
{
// already running, so no need for a wakeup
if (m_is_running.IsSet())
return;
m_is_running.Set();
m_is_pending.Set();
m_new_work_event.Set();
}
// Wait for a complete payload run after the last Wakeup() call.
// If stopped, this returns immediately.
void Wait()
{
// We have to give the loop a chance to exit.
m_may_sleep.Set();
if (m_stopped.IsSet() || (!m_is_running.IsSet() && !m_is_pending.IsSet()))
return;
// notifying this event will only wake up one thread, so use a mutex here to
// allow only one waiting thread. And in this way, we get an event free wakeup
// but for the first thread for free
std::lock_guard<std::mutex> lk(m_wait_lock);
while (!m_stopped.IsSet() && (m_is_running.IsSet() || m_is_pending.IsSet()))
{
m_may_sleep.Set();
m_done_event.Wait();
}
}
// Half start the worker.
// So this object is in running state and Wait() will block until the worker calls Run().
// This may be called from any thread and is supposed to call at least once before Wait() is used.
void Prepare()
{
// There is a race condition if the other threads call this function while
// the loop thread is initializing. Using this lock will ensure a valid state.
std::lock_guard<std::mutex> lk(m_prepare_lock);
if (!m_stopped.TestAndClear())
return;
m_is_pending.Set();
m_shutdown.Clear();
m_may_sleep.Set();
}
// Mainloop of this object.
// The payload callback is called at least as often as it's needed to match the Wakeup() requirements.
template<class F> void Run(F payload)
{
Prepare();
while (!m_shutdown.IsSet())
{
payload();
m_is_pending.Clear();
m_done_event.Set();
if (m_is_running.IsSet())
{
if (m_may_sleep.IsSet())
{
m_is_pending.Set();
m_is_running.Clear();
// We'll sleep after the next iteration now,
// so clear this flag now and we won't sleep another times.
m_may_sleep.Clear();
}
}
else
{
m_new_work_event.WaitFor(std::chrono::milliseconds(100));
}
}
m_is_running.Clear();
m_is_pending.Clear();
m_stopped.Set();
m_done_event.Set();
}
// Quits the mainloop.
// By default, it will wait until the Mainloop quits.
// Be careful to not use the blocking way within the payload of the Run() method.
void Stop(bool block = true)
{
if (m_stopped.IsSet())
return;
m_shutdown.Set();
Wakeup();
if (block)
Wait();
}
bool IsRunning() const
{
return !m_stopped.IsSet() && !m_shutdown.IsSet();
}
void AllowSleep()
{
m_may_sleep.Set();
}
private:
std::mutex m_wait_lock;
std::mutex m_prepare_lock;
Flag m_stopped; // This one is set, Wait() shall not block.
Flag m_shutdown; // If this one is set, the loop shall be quit.
Event m_new_work_event;
Flag m_is_running; // If this one is set, the loop will be called at least once again.
Event m_done_event;
Flag m_is_pending; // If this one is set, there might still be work to do.
Flag m_may_sleep; // If this one is set, we fall back from the busy loop to an event based synchronization.
};
}

View File

@ -40,6 +40,7 @@
<ClInclude Include="Atomic_Win32.h" /> <ClInclude Include="Atomic_Win32.h" />
<ClInclude Include="BitField.h" /> <ClInclude Include="BitField.h" />
<ClInclude Include="BitSet.h" /> <ClInclude Include="BitSet.h" />
<ClInclude Include="BlockLoop.h" />
<ClInclude Include="BreakPoints.h" /> <ClInclude Include="BreakPoints.h" />
<ClInclude Include="CDUtils.h" /> <ClInclude Include="CDUtils.h" />
<ClInclude Include="ChunkFile.h" /> <ClInclude Include="ChunkFile.h" />

View File

@ -14,6 +14,7 @@
<ClInclude Include="Atomic_Win32.h" /> <ClInclude Include="Atomic_Win32.h" />
<ClInclude Include="BitField.h" /> <ClInclude Include="BitField.h" />
<ClInclude Include="BitSet.h" /> <ClInclude Include="BitSet.h" />
<ClInclude Include="BlockingLoop.h" />
<ClInclude Include="BreakPoints.h" /> <ClInclude Include="BreakPoints.h" />
<ClInclude Include="CDUtils.h" /> <ClInclude Include="CDUtils.h" />
<ClInclude Include="ChunkFile.h" /> <ClInclude Include="ChunkFile.h" />
@ -126,4 +127,4 @@
<ItemGroup> <ItemGroup>
<Text Include="CMakeLists.txt" /> <Text Include="CMakeLists.txt" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -475,7 +475,7 @@ void Idle()
{ {
//DEBUG_LOG(POWERPC, "Idle"); //DEBUG_LOG(POWERPC, "Idle");
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack && !SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
{ {
//When the FIFO is processing data we must not advance because in this way //When the FIFO is processing data we must not advance because in this way
//the VI will be desynchronized. So, We are waiting until the FIFO finish and //the VI will be desynchronized. So, We are waiting until the FIFO finish and

View File

@ -62,6 +62,7 @@ IPC_HLE_PERIOD: For the Wiimote this is the call schedule:
#include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/PowerPC.h"
#include "VideoCommon/CommandProcessor.h" #include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoBackendBase.h"
@ -189,7 +190,7 @@ static void PatchEngineCallback(u64 userdata, int cyclesLate)
static void ThrottleCallback(u64 last_time, int cyclesLate) static void ThrottleCallback(u64 last_time, int cyclesLate)
{ {
// Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz. // Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz.
CommandProcessor::s_gpuMaySleep.Set(); GpuMaySleep();
u32 time = Common::Timer::GetTimeMs(); u32 time = Common::Timer::GetTimeMs();

View File

@ -49,8 +49,6 @@ static std::atomic<bool> s_interrupt_finish_waiting;
static std::atomic<u32> s_vi_ticks(CommandProcessor::m_cpClockOrigin); static std::atomic<u32> s_vi_ticks(CommandProcessor::m_cpClockOrigin);
Common::Flag s_gpuMaySleep;
static bool IsOnThread() static bool IsOnThread()
{ {
return SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread; return SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;

View File

@ -17,7 +17,6 @@ namespace CommandProcessor
{ {
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread. extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
extern Common::Flag s_gpuMaySleep;
// internal hardware addresses // internal hardware addresses
enum enum

View File

@ -5,6 +5,7 @@
#include <atomic> #include <atomic>
#include "Common/Atomic.h" #include "Common/Atomic.h"
#include "Common/BlockingLoop.h"
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
#include "Common/CPUDetect.h" #include "Common/CPUDetect.h"
#include "Common/Event.h" #include "Common/Event.h"
@ -26,11 +27,13 @@
#include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelEngine.h"
#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
bool g_bSkipCurrentFrame = false; bool g_bSkipCurrentFrame = false;
static std::atomic<bool> s_gpu_running_state; static Common::BlockingLoop s_gpu_mainloop;
static std::atomic<bool> s_emu_running_state; static std::atomic<bool> s_emu_running_state;
// Most of this array is unlikely to be faulted in... // Most of this array is unlikely to be faulted in...
@ -41,8 +44,6 @@ static u8* s_fifo_aux_read_ptr;
bool g_use_deterministic_gpu_thread; bool g_use_deterministic_gpu_thread;
// STATE_TO_SAVE // STATE_TO_SAVE
static std::mutex s_video_buffer_lock;
static std::condition_variable s_video_buffer_cond;
static u8* s_video_buffer; static u8* s_video_buffer;
static u8* s_video_buffer_read_ptr; static u8* s_video_buffer_read_ptr;
static std::atomic<u8*> s_video_buffer_write_ptr; static std::atomic<u8*> s_video_buffer_write_ptr;
@ -60,12 +61,6 @@ static u8* s_video_buffer_pp_read_ptr;
// polls, it's just atomic. // polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr. // - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again
static Common::Event s_gpu_new_work_event;
static Common::Flag s_gpu_is_pending; // If this one is set, there might still be work to do
static Common::Event s_gpu_done_event;
void Fifo_DoState(PointerWrap &p) void Fifo_DoState(PointerWrap &p)
{ {
p.DoArray(s_video_buffer, FIFO_SIZE); p.DoArray(s_video_buffer, FIFO_SIZE);
@ -102,13 +97,14 @@ void Fifo_Init()
// Padded so that SIMD overreads in the vertex loader are safe // Padded so that SIMD overreads in the vertex loader are safe
s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4); s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4);
ResetVideoBuffer(); ResetVideoBuffer();
s_gpu_running_state.store(false); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
s_gpu_mainloop.Prepare();
CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin); CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin);
} }
void Fifo_Shutdown() void Fifo_Shutdown()
{ {
if (s_gpu_running_state.load()) if (s_gpu_mainloop.IsRunning())
PanicAlert("Fifo shutting down while active"); PanicAlert("Fifo shutting down while active");
FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4); FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
@ -135,27 +131,22 @@ void ExitGpuLoop()
FlushGpu(); FlushGpu();
// Terminate GPU thread loop // Terminate GPU thread loop
s_gpu_running_state.store(false);
s_emu_running_state.store(true); s_emu_running_state.store(true);
s_gpu_new_work_event.Set(); s_gpu_mainloop.Stop(false);
} }
void EmulatorState(bool running) void EmulatorState(bool running)
{ {
s_emu_running_state.store(running); s_emu_running_state.store(running);
s_gpu_new_work_event.Set(); s_gpu_mainloop.Wakeup();
} }
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
{ {
if (g_use_deterministic_gpu_thread) if (g_use_deterministic_gpu_thread)
{ {
std::unique_lock<std::mutex> lk(s_video_buffer_lock); s_gpu_mainloop.Wait();
u8* write_ptr = s_video_buffer_write_ptr; if (!s_gpu_mainloop.IsRunning())
s_video_buffer_cond.wait(lk, [&]() {
return !s_gpu_running_state.load() || s_video_buffer_seen_ptr == write_ptr;
});
if (!s_gpu_running_state.load())
return; return;
// Opportunistically reset FIFOs so we don't wrap around. // Opportunistically reset FIFOs so we don't wrap around.
@ -168,6 +159,8 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
if (may_move_read_ptr) if (may_move_read_ptr)
{ {
u8* write_ptr = s_video_buffer_write_ptr;
// what's left over in the buffer // what's left over in the buffer
size_t size = write_ptr - s_video_buffer_pp_read_ptr; size_t size = write_ptr - s_video_buffer_pp_read_ptr;
@ -188,7 +181,7 @@ void PushFifoAuxBuffer(void* ptr, size_t size)
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr)) if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{ {
SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false); SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
if (!s_gpu_running_state.load()) if (!s_gpu_mainloop.IsRunning())
{ {
// GPU is shutting down // GPU is shutting down
return; return;
@ -243,9 +236,9 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
// We can't wrap around while the GPU is working on the data. // We can't wrap around while the GPU is working on the data.
// This should be very rare due to the reset in SyncGPU. // This should be very rare due to the reset in SyncGPU.
SyncGPU(SYNC_GPU_WRAPAROUND); SyncGPU(SYNC_GPU_WRAPAROUND);
if (!s_gpu_running_state.load()) if (!s_gpu_mainloop.IsRunning())
{ {
// GPU is shutting down // GPU is shutting down, so the next asserts may fail
return; return;
} }
@ -283,18 +276,19 @@ void ResetVideoBuffer()
// Purpose: Keep the Core HW updated about the CPU-GPU distance // Purpose: Keep the Core HW updated about the CPU-GPU distance
void RunGpuLoop() void RunGpuLoop()
{ {
s_gpu_running_state.store(true);
SCPFifoStruct &fifo = CommandProcessor::fifo;
u32 cyclesExecuted = 0;
AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetEnable(true);
AsyncRequests::GetInstance()->SetPassthrough(false); AsyncRequests::GetInstance()->SetPassthrough(false);
while (s_gpu_running_state.load()) s_gpu_mainloop.Run(
{ [] {
g_video_backend->PeekMessages(); g_video_backend->PeekMessages();
if (g_use_deterministic_gpu_thread && s_emu_running_state.load()) // Do nothing while paused
if (!s_emu_running_state.load())
return;
if (g_use_deterministic_gpu_thread)
{ {
AsyncRequests::GetInstance()->PullEvents(); AsyncRequests::GetInstance()->PullEvents();
@ -305,16 +299,13 @@ void RunGpuLoop()
if (write_ptr > seen_ptr) if (write_ptr > seen_ptr)
{ {
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
s_video_buffer_seen_ptr = write_ptr;
{
std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
s_video_buffer_seen_ptr = write_ptr;
s_video_buffer_cond.notify_all();
}
} }
} }
else if (s_emu_running_state.load()) else
{ {
SCPFifoStruct &fifo = CommandProcessor::fifo;
AsyncRequests::GetInstance()->PullEvents(); AsyncRequests::GetInstance()->PullEvents();
CommandProcessor::SetCPStatusFromGPU(); CommandProcessor::SetCPStatusFromGPU();
@ -333,6 +324,7 @@ void RunGpuLoop()
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin) if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin)
{ {
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer; u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr); ReadDataFromFifo(readPtr);
@ -369,31 +361,15 @@ void RunGpuLoop()
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down. // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
AsyncRequests::GetInstance()->PullEvents(); AsyncRequests::GetInstance()->PullEvents();
} }
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
VertexManager::Flush();
// don't release the GPU running state on sync GPU waits // don't release the GPU running state on sync GPU waits
fifo.isGpuReadingData = !run_loop; fifo.isGpuReadingData = !run_loop;
} }
});
s_gpu_is_pending.Clear();
s_gpu_done_event.Set();
if (s_gpu_is_running.IsSet())
{
if (CommandProcessor::s_gpuMaySleep.IsSet())
{
// Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop
s_gpu_is_pending.Set();
s_gpu_is_running.Clear();
CommandProcessor::s_gpuMaySleep.Clear();
}
}
else
{
s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100));
}
}
// wake up SyncGPU if we were interrupted
s_video_buffer_cond.notify_all();
AsyncRequests::GetInstance()->SetEnable(false); AsyncRequests::GetInstance()->SetEnable(false);
AsyncRequests::GetInstance()->SetPassthrough(true); AsyncRequests::GetInstance()->SetPassthrough(true);
} }
@ -403,11 +379,12 @@ void FlushGpu()
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
return; return;
while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet()) s_gpu_mainloop.Wait();
{ }
CommandProcessor::s_gpuMaySleep.Set();
s_gpu_done_event.Wait(); void GpuMaySleep()
} {
s_gpu_mainloop.AllowSleep();
} }
bool AtBreakpoint() bool AtBreakpoint()
@ -429,6 +406,7 @@ void RunGpu()
if (g_use_deterministic_gpu_thread) if (g_use_deterministic_gpu_thread)
{ {
ReadDataFromFifoOnCPU(fifo.CPReadPointer); ReadDataFromFifoOnCPU(fifo.CPReadPointer);
s_gpu_mainloop.Wakeup();
} }
else else
{ {
@ -460,11 +438,9 @@ void RunGpu()
} }
// wake up GPU thread // wake up GPU thread
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet()) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
{ {
s_gpu_is_pending.Set(); s_gpu_mainloop.Wakeup();
s_gpu_is_running.Set();
s_gpu_new_work_event.Set();
} }
} }

View File

@ -43,6 +43,7 @@ void* PopFifoAuxBuffer(size_t size);
void FlushGpu(); void FlushGpu();
void RunGpu(); void RunGpu();
void GpuMaySleep();
void RunGpuLoop(); void RunGpuLoop();
void ExitGpuLoop(); void ExitGpuLoop();
void EmulatorState(bool running); void EmulatorState(bool running);