mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-02-21 03:41:18 +00:00
Merge pull request #9710 from JosJuice/volatile-begone
Remove all remaining volatile qualifiers
This commit is contained in:
commit
8b81481920
@ -32,11 +32,6 @@
|
|||||||
#include "Common/Inline.h"
|
#include "Common/Inline.h"
|
||||||
#include "Common/Logging/Log.h"
|
#include "Common/Logging/Log.h"
|
||||||
|
|
||||||
// XXX: Replace this with std::is_trivially_copyable<T> once we stop using volatile
|
|
||||||
// on things that are put in savestates, as volatile types are not trivially copyable.
|
|
||||||
template <typename T>
|
|
||||||
constexpr bool IsTriviallyCopyable = std::is_trivially_copyable<std::remove_volatile_t<T>>::value;
|
|
||||||
|
|
||||||
// Wrapper class
|
// Wrapper class
|
||||||
class PointerWrap
|
class PointerWrap
|
||||||
{
|
{
|
||||||
@ -181,13 +176,13 @@ public:
|
|||||||
DoArray(x.data(), static_cast<u32>(x.size()));
|
DoArray(x.data(), static_cast<u32>(x.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename std::enable_if_t<IsTriviallyCopyable<T>, int> = 0>
|
template <typename T, typename std::enable_if_t<std::is_trivially_copyable_v<T>, int> = 0>
|
||||||
void DoArray(T* x, u32 count)
|
void DoArray(T* x, u32 count)
|
||||||
{
|
{
|
||||||
DoVoid(x, count * sizeof(T));
|
DoVoid(x, count * sizeof(T));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename std::enable_if_t<!IsTriviallyCopyable<T>, int> = 0>
|
template <typename T, typename std::enable_if_t<!std::is_trivially_copyable_v<T>, int> = 0>
|
||||||
void DoArray(T* x, u32 count)
|
void DoArray(T* x, u32 count)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < count; ++i)
|
for (u32 i = 0; i < count; ++i)
|
||||||
@ -230,7 +225,7 @@ public:
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void Do(T& x)
|
void Do(T& x)
|
||||||
{
|
{
|
||||||
static_assert(IsTriviallyCopyable<T>, "Only sane for trivially copyable types");
|
static_assert(std::is_trivially_copyable_v<T>, "Only sane for trivially copyable types");
|
||||||
// Note:
|
// Note:
|
||||||
// Usually we can just use x = **ptr, etc. However, this doesn't work
|
// Usually we can just use x = **ptr, etc. However, this doesn't work
|
||||||
// for unions containing BitFields (long story, stupid language rules)
|
// for unions containing BitFields (long story, stupid language rules)
|
||||||
|
@ -101,20 +101,10 @@ ReadHandlingMethod<T>* DirectRead(const T* addr, u32 mask)
|
|||||||
return new DirectHandlingMethod<T>(const_cast<T*>(addr), mask);
|
return new DirectHandlingMethod<T>(const_cast<T*>(addr), mask);
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ReadHandlingMethod<T>* DirectRead(volatile const T* addr, u32 mask)
|
|
||||||
{
|
|
||||||
return new DirectHandlingMethod<T>((T*)addr, mask);
|
|
||||||
}
|
|
||||||
template <typename T>
|
|
||||||
WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask)
|
WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask)
|
||||||
{
|
{
|
||||||
return new DirectHandlingMethod<T>(addr, mask);
|
return new DirectHandlingMethod<T>(addr, mask);
|
||||||
}
|
}
|
||||||
template <typename T>
|
|
||||||
WriteHandlingMethod<T>* DirectWrite(volatile T* addr, u32 mask)
|
|
||||||
{
|
|
||||||
return new DirectHandlingMethod<T>((T*)addr, mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Complex: holds a lambda that is called when a read or a write is executed.
|
// Complex: holds a lambda that is called when a read or a write is executed.
|
||||||
// This gives complete control to the user as to what is going to happen during
|
// This gives complete control to the user as to what is going to happen during
|
||||||
|
@ -46,11 +46,7 @@ WriteHandlingMethod<T>* Nop();
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
ReadHandlingMethod<T>* DirectRead(const T* addr, u32 mask = 0xFFFFFFFF);
|
ReadHandlingMethod<T>* DirectRead(const T* addr, u32 mask = 0xFFFFFFFF);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ReadHandlingMethod<T>* DirectRead(volatile const T* addr, u32 mask = 0xFFFFFFFF);
|
|
||||||
template <typename T>
|
|
||||||
WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask = 0xFFFFFFFF);
|
WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask = 0xFFFFFFFF);
|
||||||
template <typename T>
|
|
||||||
WriteHandlingMethod<T>* DirectWrite(volatile T* addr, u32 mask = 0xFFFFFFFF);
|
|
||||||
|
|
||||||
// Complex: use when no other handling method fits your needs. These allow you
|
// Complex: use when no other handling method fits your needs. These allow you
|
||||||
// to directly provide a function that will be called when a read/write needs
|
// to directly provide a function that will be called when a read/write needs
|
||||||
@ -204,9 +200,7 @@ private:
|
|||||||
MaybeExtern template ReadHandlingMethod<T>* Constant<T>(T value); \
|
MaybeExtern template ReadHandlingMethod<T>* Constant<T>(T value); \
|
||||||
MaybeExtern template WriteHandlingMethod<T>* Nop<T>(); \
|
MaybeExtern template WriteHandlingMethod<T>* Nop<T>(); \
|
||||||
MaybeExtern template ReadHandlingMethod<T>* DirectRead(const T* addr, u32 mask); \
|
MaybeExtern template ReadHandlingMethod<T>* DirectRead(const T* addr, u32 mask); \
|
||||||
MaybeExtern template ReadHandlingMethod<T>* DirectRead(volatile const T* addr, u32 mask); \
|
|
||||||
MaybeExtern template WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask); \
|
MaybeExtern template WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask); \
|
||||||
MaybeExtern template WriteHandlingMethod<T>* DirectWrite(volatile T* addr, u32 mask); \
|
|
||||||
MaybeExtern template ReadHandlingMethod<T>* ComplexRead<T>(std::function<T(u32)>); \
|
MaybeExtern template ReadHandlingMethod<T>* ComplexRead<T>(std::function<T(u32)>); \
|
||||||
MaybeExtern template WriteHandlingMethod<T>* ComplexWrite<T>(std::function<void(u32, T)>); \
|
MaybeExtern template WriteHandlingMethod<T>* ComplexWrite<T>(std::function<void(u32, T)>); \
|
||||||
MaybeExtern template ReadHandlingMethod<T>* InvalidRead<T>(); \
|
MaybeExtern template ReadHandlingMethod<T>* InvalidRead<T>(); \
|
||||||
|
@ -27,11 +27,13 @@ PerfQuery::~PerfQuery() = default;
|
|||||||
|
|
||||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
||||||
{
|
{
|
||||||
|
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
// Is this sane?
|
// Is this sane?
|
||||||
if (m_query_count > m_query_buffer.size() / 2)
|
if (query_count > m_query_buffer.size() / 2)
|
||||||
WeakFlush();
|
WeakFlush();
|
||||||
|
|
||||||
if (m_query_buffer.size() == m_query_count)
|
if (m_query_buffer.size() == query_count)
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
FlushOne();
|
FlushOne();
|
||||||
@ -41,12 +43,12 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
|
|||||||
// start query
|
// start query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
|
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
||||||
|
|
||||||
D3D::context->Begin(entry.query.Get());
|
D3D::context->Begin(entry.query.Get());
|
||||||
entry.query_type = type;
|
entry.query_type = type;
|
||||||
|
|
||||||
++m_query_count;
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,7 +57,8 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
|
|||||||
// stop query
|
// stop query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + m_query_buffer.size() - 1) %
|
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count.load(std::memory_order_relaxed) +
|
||||||
|
m_query_buffer.size() - 1) %
|
||||||
m_query_buffer.size()];
|
m_query_buffer.size()];
|
||||||
D3D::context->End(entry.query.Get());
|
D3D::context->End(entry.query.Get());
|
||||||
}
|
}
|
||||||
@ -63,8 +66,9 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
|
|||||||
|
|
||||||
void PerfQuery::ResetQuery()
|
void PerfQuery::ResetQuery()
|
||||||
{
|
{
|
||||||
m_query_count = 0;
|
m_query_count.store(0, std::memory_order_relaxed);
|
||||||
std::fill(std::begin(m_results), std::end(m_results), 0);
|
for (size_t i = 0; i < m_results.size(); ++i)
|
||||||
|
m_results[i].store(0, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
||||||
@ -72,13 +76,22 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
|||||||
u32 result = 0;
|
u32 result = 0;
|
||||||
|
|
||||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||||
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||||
result = m_results[PQG_ZCOMP];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_BLEND_INPUT)
|
else if (type == PQ_BLEND_INPUT)
|
||||||
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
|
||||||
|
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_EFB_COPY_CLOCKS)
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||||
result = m_results[PQG_EFB_COPY_CLOCKS];
|
{
|
||||||
|
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -98,11 +111,13 @@ void PerfQuery::FlushOne()
|
|||||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
// TODO: Dropping the lower 2 bits from this count should be closer to actual
|
// TODO: Dropping the lower 2 bits from this count should be closer to actual
|
||||||
// hardware behavior when drawing triangles.
|
// hardware behavior when drawing triangles.
|
||||||
m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() *
|
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
||||||
EFB_HEIGHT / g_renderer->GetTargetHeight());
|
g_renderer->GetTargetHeight();
|
||||||
|
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||||
--m_query_count;
|
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: could selectively flush things, but I don't think that will do much
|
// TODO: could selectively flush things, but I don't think that will do much
|
||||||
@ -125,11 +140,13 @@ void PerfQuery::WeakFlush()
|
|||||||
if (hr == S_OK)
|
if (hr == S_OK)
|
||||||
{
|
{
|
||||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() *
|
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
||||||
EFB_HEIGHT / g_renderer->GetTargetHeight());
|
g_renderer->GetTargetHeight();
|
||||||
|
m_results[entry.query_type].store(static_cast<u32>(native_res_result),
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||||
--m_query_count;
|
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -140,7 +157,7 @@ void PerfQuery::WeakFlush()
|
|||||||
|
|
||||||
bool PerfQuery::IsFlushed() const
|
bool PerfQuery::IsFlushed() const
|
||||||
{
|
{
|
||||||
return 0 == m_query_count;
|
return m_query_count.load(std::memory_order_relaxed) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace DX11
|
} // namespace DX11
|
||||||
|
@ -52,10 +52,11 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
|
|||||||
{
|
{
|
||||||
// Block if there are no free slots.
|
// Block if there are no free slots.
|
||||||
// Otherwise, try to keep half of them available.
|
// Otherwise, try to keep half of them available.
|
||||||
if (m_query_count > m_query_buffer.size() / 2)
|
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
||||||
|
if (query_count > m_query_buffer.size() / 2)
|
||||||
{
|
{
|
||||||
const bool do_resolve = m_unresolved_queries > m_query_buffer.size() / 2;
|
const bool do_resolve = m_unresolved_queries > m_query_buffer.size() / 2;
|
||||||
const bool blocking = m_query_count == PERF_QUERY_BUFFER_SIZE;
|
const bool blocking = query_count == PERF_QUERY_BUFFER_SIZE;
|
||||||
PartialFlush(do_resolve, blocking);
|
PartialFlush(do_resolve, blocking);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,19 +84,20 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
|
|||||||
g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
|
g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
|
||||||
m_query_next_pos);
|
m_query_next_pos);
|
||||||
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
|
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
|
||||||
m_query_count++;
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
m_unresolved_queries++;
|
m_unresolved_queries++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::ResetQuery()
|
void PerfQuery::ResetQuery()
|
||||||
{
|
{
|
||||||
m_query_count = 0;
|
m_query_count.store(0, std::memory_order_relaxed);
|
||||||
m_unresolved_queries = 0;
|
m_unresolved_queries = 0;
|
||||||
m_query_resolve_pos = 0;
|
m_query_resolve_pos = 0;
|
||||||
m_query_readback_pos = 0;
|
m_query_readback_pos = 0;
|
||||||
m_query_next_pos = 0;
|
m_query_next_pos = 0;
|
||||||
std::fill(std::begin(m_results), std::end(m_results), 0);
|
for (size_t i = 0; i < m_results.size(); ++i)
|
||||||
|
m_results[i].store(0, std::memory_order_relaxed);
|
||||||
for (auto& entry : m_query_buffer)
|
for (auto& entry : m_query_buffer)
|
||||||
{
|
{
|
||||||
entry.fence_value = 0;
|
entry.fence_value = 0;
|
||||||
@ -108,13 +110,22 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
|||||||
{
|
{
|
||||||
u32 result = 0;
|
u32 result = 0;
|
||||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||||
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||||
result = m_results[PQG_ZCOMP];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_BLEND_INPUT)
|
else if (type == PQ_BLEND_INPUT)
|
||||||
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
|
||||||
|
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_EFB_COPY_CLOCKS)
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||||
result = m_results[PQG_EFB_COPY_CLOCKS];
|
{
|
||||||
|
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
return result / 4;
|
return result / 4;
|
||||||
}
|
}
|
||||||
@ -127,7 +138,7 @@ void PerfQuery::FlushResults()
|
|||||||
|
|
||||||
bool PerfQuery::IsFlushed() const
|
bool PerfQuery::IsFlushed() const
|
||||||
{
|
{
|
||||||
return m_query_count == 0;
|
return m_query_count.load(std::memory_order_relaxed) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::ResolveQueries()
|
void PerfQuery::ResolveQueries()
|
||||||
@ -165,7 +176,7 @@ void PerfQuery::ReadbackQueries(bool blocking)
|
|||||||
u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue();
|
u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue();
|
||||||
|
|
||||||
// Need to save these since ProcessResults will modify them.
|
// Need to save these since ProcessResults will modify them.
|
||||||
const u32 outstanding_queries = m_query_count;
|
const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed);
|
||||||
u32 readback_count = 0;
|
u32 readback_count = 0;
|
||||||
for (u32 i = 0; i < outstanding_queries; i++)
|
for (u32 i = 0; i < outstanding_queries; i++)
|
||||||
{
|
{
|
||||||
@ -203,7 +214,7 @@ void PerfQuery::ReadbackQueries(bool blocking)
|
|||||||
void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
|
void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
|
||||||
{
|
{
|
||||||
// Should be at maximum query_count queries pending.
|
// Should be at maximum query_count queries pending.
|
||||||
ASSERT(query_count <= m_query_count &&
|
ASSERT(query_count <= m_query_count.load(std::memory_order_relaxed) &&
|
||||||
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
|
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
|
||||||
|
|
||||||
const D3D12_RANGE read_range = {m_query_readback_pos * sizeof(PerfQueryDataType),
|
const D3D12_RANGE read_range = {m_query_readback_pos * sizeof(PerfQueryDataType),
|
||||||
@ -231,16 +242,18 @@ void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
|
|||||||
std::memcpy(&result, mapped_ptr + (index * sizeof(PerfQueryDataType)), sizeof(result));
|
std::memcpy(&result, mapped_ptr + (index * sizeof(PerfQueryDataType)), sizeof(result));
|
||||||
|
|
||||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
m_results[entry.query_type] +=
|
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH /
|
||||||
static_cast<u32>(static_cast<u64>(result) * EFB_WIDTH / g_renderer->GetTargetWidth() *
|
g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
||||||
EFB_HEIGHT / g_renderer->GetTargetHeight());
|
g_renderer->GetTargetHeight();
|
||||||
|
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
||||||
|
std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr D3D12_RANGE write_range = {0, 0};
|
constexpr D3D12_RANGE write_range = {0, 0};
|
||||||
m_query_readback_buffer->Unmap(0, &write_range);
|
m_query_readback_buffer->Unmap(0, &write_range);
|
||||||
|
|
||||||
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
||||||
m_query_count -= query_count;
|
m_query_count.fetch_sub(query_count, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::PartialFlush(bool resolve, bool blocking)
|
void PerfQuery::PartialFlush(bool resolve, bool blocking)
|
||||||
|
@ -43,7 +43,7 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
|
|||||||
|
|
||||||
bool PerfQuery::IsFlushed() const
|
bool PerfQuery::IsFlushed() const
|
||||||
{
|
{
|
||||||
return 0 == m_query_count;
|
return m_query_count.load(std::memory_order_relaxed) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: could selectively flush things, but I don't think that will do much
|
// TODO: could selectively flush things, but I don't think that will do much
|
||||||
@ -54,8 +54,9 @@ void PerfQuery::FlushResults()
|
|||||||
|
|
||||||
void PerfQuery::ResetQuery()
|
void PerfQuery::ResetQuery()
|
||||||
{
|
{
|
||||||
m_query_count = 0;
|
m_query_count.store(0, std::memory_order_relaxed);
|
||||||
std::fill(std::begin(m_results), std::end(m_results), 0);
|
for (size_t i = 0; i < m_results.size(); ++i)
|
||||||
|
m_results[i].store(0, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
||||||
@ -64,19 +65,20 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
|||||||
|
|
||||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||||
{
|
{
|
||||||
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||||
{
|
{
|
||||||
result = m_results[PQG_ZCOMP];
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
else if (type == PQ_BLEND_INPUT)
|
else if (type == PQ_BLEND_INPUT)
|
||||||
{
|
{
|
||||||
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
|
||||||
|
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
else if (type == PQ_EFB_COPY_CLOCKS)
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||||
{
|
{
|
||||||
result = m_results[PQG_EFB_COPY_CLOCKS];
|
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -97,11 +99,13 @@ PerfQueryGL::~PerfQueryGL()
|
|||||||
|
|
||||||
void PerfQueryGL::EnableQuery(PerfQueryGroup type)
|
void PerfQueryGL::EnableQuery(PerfQueryGroup type)
|
||||||
{
|
{
|
||||||
|
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
// Is this sane?
|
// Is this sane?
|
||||||
if (m_query_count > m_query_buffer.size() / 2)
|
if (query_count > m_query_buffer.size() / 2)
|
||||||
WeakFlush();
|
WeakFlush();
|
||||||
|
|
||||||
if (m_query_buffer.size() == m_query_count)
|
if (m_query_buffer.size() == query_count)
|
||||||
{
|
{
|
||||||
FlushOne();
|
FlushOne();
|
||||||
// ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
|
// ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
|
||||||
@ -110,12 +114,12 @@ void PerfQueryGL::EnableQuery(PerfQueryGroup type)
|
|||||||
// start query
|
// start query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
|
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
||||||
|
|
||||||
glBeginQuery(m_query_type, entry.query_id);
|
glBeginQuery(m_query_type, entry.query_id);
|
||||||
entry.query_type = type;
|
entry.query_type = type;
|
||||||
|
|
||||||
++m_query_count;
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void PerfQueryGL::DisableQuery(PerfQueryGroup type)
|
void PerfQueryGL::DisableQuery(PerfQueryGroup type)
|
||||||
@ -164,10 +168,10 @@ void PerfQueryGL::FlushOne()
|
|||||||
if (g_ActiveConfig.iMultisamples > 1)
|
if (g_ActiveConfig.iMultisamples > 1)
|
||||||
result /= g_ActiveConfig.iMultisamples;
|
result /= g_ActiveConfig.iMultisamples;
|
||||||
|
|
||||||
m_results[entry.query_type] += result;
|
m_results[entry.query_type].fetch_add(result, std::memory_order_relaxed);
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||||
--m_query_count;
|
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: could selectively flush things, but I don't think that will do much
|
// TODO: could selectively flush things, but I don't think that will do much
|
||||||
@ -191,11 +195,12 @@ PerfQueryGLESNV::~PerfQueryGLESNV()
|
|||||||
|
|
||||||
void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
|
void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
|
||||||
{
|
{
|
||||||
|
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
||||||
// Is this sane?
|
// Is this sane?
|
||||||
if (m_query_count > m_query_buffer.size() / 2)
|
if (query_count > m_query_buffer.size() / 2)
|
||||||
WeakFlush();
|
WeakFlush();
|
||||||
|
|
||||||
if (m_query_buffer.size() == m_query_count)
|
if (m_query_buffer.size() == query_count)
|
||||||
{
|
{
|
||||||
FlushOne();
|
FlushOne();
|
||||||
// ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
|
// ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
|
||||||
@ -204,12 +209,12 @@ void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
|
|||||||
// start query
|
// start query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
|
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
||||||
|
|
||||||
glBeginOcclusionQueryNV(entry.query_id);
|
glBeginOcclusionQueryNV(entry.query_id);
|
||||||
entry.query_type = type;
|
entry.query_type = type;
|
||||||
|
|
||||||
++m_query_count;
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
|
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
|
||||||
@ -251,11 +256,13 @@ void PerfQueryGLESNV::FlushOne()
|
|||||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
// TODO: Dropping the lower 2 bits from this count should be closer to actual
|
// TODO: Dropping the lower 2 bits from this count should be closer to actual
|
||||||
// hardware behavior when drawing triangles.
|
// hardware behavior when drawing triangles.
|
||||||
m_results[entry.query_type] += static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
|
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
|
||||||
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
|
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
|
||||||
|
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||||
--m_query_count;
|
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: could selectively flush things, but I don't think that will do much
|
// TODO: could selectively flush things, but I don't think that will do much
|
||||||
|
@ -43,8 +43,9 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
|
|||||||
{
|
{
|
||||||
// Block if there are no free slots.
|
// Block if there are no free slots.
|
||||||
// Otherwise, try to keep half of them available.
|
// Otherwise, try to keep half of them available.
|
||||||
if (m_query_count > m_query_buffer.size() / 2)
|
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
||||||
PartialFlush(m_query_count == PERF_QUERY_BUFFER_SIZE);
|
if (query_count > m_query_buffer.size() / 2)
|
||||||
|
PartialFlush(query_count == PERF_QUERY_BUFFER_SIZE);
|
||||||
|
|
||||||
// Ensure command buffer is ready to go before beginning the query, that way we don't submit
|
// Ensure command buffer is ready to go before beginning the query, that way we don't submit
|
||||||
// a buffer with open queries.
|
// a buffer with open queries.
|
||||||
@ -73,16 +74,17 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
|
|||||||
{
|
{
|
||||||
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
|
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
|
||||||
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
|
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
|
||||||
m_query_count++;
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::ResetQuery()
|
void PerfQuery::ResetQuery()
|
||||||
{
|
{
|
||||||
m_query_count = 0;
|
m_query_count.store(0, std::memory_order_relaxed);
|
||||||
m_query_readback_pos = 0;
|
m_query_readback_pos = 0;
|
||||||
m_query_next_pos = 0;
|
m_query_next_pos = 0;
|
||||||
std::fill(std::begin(m_results), std::end(m_results), 0);
|
for (size_t i = 0; i < m_results.size(); ++i)
|
||||||
|
m_results[i].store(0, std::memory_order_relaxed);
|
||||||
|
|
||||||
// Reset entire query pool, ensuring all queries are ready to write to.
|
// Reset entire query pool, ensuring all queries are ready to write to.
|
||||||
StateTracker::GetInstance()->EndRenderPass();
|
StateTracker::GetInstance()->EndRenderPass();
|
||||||
@ -96,13 +98,22 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
|||||||
{
|
{
|
||||||
u32 result = 0;
|
u32 result = 0;
|
||||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||||
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||||
result = m_results[PQG_ZCOMP];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_BLEND_INPUT)
|
else if (type == PQ_BLEND_INPUT)
|
||||||
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
{
|
||||||
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
|
||||||
|
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
else if (type == PQ_EFB_COPY_CLOCKS)
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||||
result = m_results[PQG_EFB_COPY_CLOCKS];
|
{
|
||||||
|
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
return result / 4;
|
return result / 4;
|
||||||
}
|
}
|
||||||
@ -115,7 +126,7 @@ void PerfQuery::FlushResults()
|
|||||||
|
|
||||||
bool PerfQuery::IsFlushed() const
|
bool PerfQuery::IsFlushed() const
|
||||||
{
|
{
|
||||||
return m_query_count == 0;
|
return m_query_count.load(std::memory_order_relaxed) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PerfQuery::CreateQueryPool()
|
bool PerfQuery::CreateQueryPool()
|
||||||
@ -144,7 +155,7 @@ void PerfQuery::ReadbackQueries()
|
|||||||
const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
|
const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
|
||||||
|
|
||||||
// Need to save these since ProcessResults will modify them.
|
// Need to save these since ProcessResults will modify them.
|
||||||
const u32 outstanding_queries = m_query_count;
|
const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed);
|
||||||
u32 readback_count = 0;
|
u32 readback_count = 0;
|
||||||
for (u32 i = 0; i < outstanding_queries; i++)
|
for (u32 i = 0; i < outstanding_queries; i++)
|
||||||
{
|
{
|
||||||
@ -171,7 +182,7 @@ void PerfQuery::ReadbackQueries()
|
|||||||
void PerfQuery::ReadbackQueries(u32 query_count)
|
void PerfQuery::ReadbackQueries(u32 query_count)
|
||||||
{
|
{
|
||||||
// Should be at maximum query_count queries pending.
|
// Should be at maximum query_count queries pending.
|
||||||
ASSERT(query_count <= m_query_count &&
|
ASSERT(query_count <= m_query_count.load(std::memory_order_relaxed) &&
|
||||||
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
|
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
|
||||||
|
|
||||||
// Read back from the GPU.
|
// Read back from the GPU.
|
||||||
@ -194,13 +205,15 @@ void PerfQuery::ReadbackQueries(u32 query_count)
|
|||||||
entry.has_value = false;
|
entry.has_value = false;
|
||||||
|
|
||||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
m_results[entry.query_type] +=
|
const u64 native_res_result = static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
|
||||||
static_cast<u32>(static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
|
g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
||||||
g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight());
|
g_renderer->GetTargetHeight();
|
||||||
|
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
||||||
|
std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
||||||
m_query_count -= query_count;
|
m_query_count.fetch_sub(query_count, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::PartialFlush(bool blocking)
|
void PerfQuery::PartialFlush(bool blocking)
|
||||||
|
@ -119,11 +119,11 @@ void Init()
|
|||||||
m_tokenReg = 0;
|
m_tokenReg = 0;
|
||||||
|
|
||||||
memset(&fifo, 0, sizeof(fifo));
|
memset(&fifo, 0, sizeof(fifo));
|
||||||
fifo.bFF_Breakpoint = 0;
|
fifo.bFF_Breakpoint.store(0, std::memory_order_relaxed);
|
||||||
fifo.bFF_HiWatermark = 0;
|
fifo.bFF_HiWatermark.store(0, std::memory_order_relaxed);
|
||||||
fifo.bFF_HiWatermarkInt = 0;
|
fifo.bFF_HiWatermarkInt.store(0, std::memory_order_relaxed);
|
||||||
fifo.bFF_LoWatermark = 0;
|
fifo.bFF_LoWatermark.store(0, std::memory_order_relaxed);
|
||||||
fifo.bFF_LoWatermarkInt = 0;
|
fifo.bFF_LoWatermarkInt.store(0, std::memory_order_relaxed);
|
||||||
|
|
||||||
s_interrupt_set.Clear();
|
s_interrupt_set.Clear();
|
||||||
s_interrupt_waiting.Clear();
|
s_interrupt_waiting.Clear();
|
||||||
@ -368,7 +368,7 @@ void GatherPipeBursted()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If the game is running close to overflowing, make the exception checking more frequent.
|
// If the game is running close to overflowing, make the exception checking more frequent.
|
||||||
if (fifo.bFF_HiWatermark)
|
if (fifo.bFF_HiWatermark.load(std::memory_order_relaxed) != 0)
|
||||||
CoreTiming::ForceExceptionCheck(0);
|
CoreTiming::ForceExceptionCheck(0);
|
||||||
|
|
||||||
fifo.CPReadWriteDistance.fetch_add(GATHER_PIPE_SIZE, std::memory_order_seq_cst);
|
fifo.CPReadWriteDistance.fetch_add(GATHER_PIPE_SIZE, std::memory_order_seq_cst);
|
||||||
@ -427,47 +427,53 @@ bool IsInterruptWaiting()
|
|||||||
void SetCPStatusFromGPU()
|
void SetCPStatusFromGPU()
|
||||||
{
|
{
|
||||||
// breakpoint
|
// breakpoint
|
||||||
if (fifo.bFF_BPEnable)
|
const bool breakpoint = fifo.bFF_Breakpoint.load(std::memory_order_relaxed);
|
||||||
|
if (fifo.bFF_BPEnable.load(std::memory_order_relaxed) != 0)
|
||||||
{
|
{
|
||||||
if (fifo.CPBreakpoint.load(std::memory_order_relaxed) ==
|
if (fifo.CPBreakpoint.load(std::memory_order_relaxed) ==
|
||||||
fifo.CPReadPointer.load(std::memory_order_relaxed))
|
fifo.CPReadPointer.load(std::memory_order_relaxed))
|
||||||
{
|
{
|
||||||
if (!fifo.bFF_Breakpoint)
|
if (!breakpoint)
|
||||||
{
|
{
|
||||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}",
|
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}",
|
||||||
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
||||||
fifo.bFF_Breakpoint = true;
|
fifo.bFF_Breakpoint.store(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (fifo.bFF_Breakpoint)
|
if (breakpoint)
|
||||||
{
|
{
|
||||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
|
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
|
||||||
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
||||||
|
fifo.bFF_Breakpoint.store(0, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
fifo.bFF_Breakpoint = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (fifo.bFF_Breakpoint)
|
if (breakpoint)
|
||||||
{
|
{
|
||||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
|
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
|
||||||
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
||||||
|
fifo.bFF_Breakpoint = false;
|
||||||
}
|
}
|
||||||
fifo.bFF_Breakpoint = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// overflow & underflow check
|
// overflow & underflow check
|
||||||
fifo.bFF_HiWatermark =
|
fifo.bFF_HiWatermark.store(
|
||||||
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark);
|
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark),
|
||||||
fifo.bFF_LoWatermark =
|
std::memory_order_relaxed);
|
||||||
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark);
|
fifo.bFF_LoWatermark.store(
|
||||||
|
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark),
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
|
bool bpInt = fifo.bFF_Breakpoint.load(std::memory_order_relaxed) &&
|
||||||
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
|
fifo.bFF_BPInt.load(std::memory_order_relaxed);
|
||||||
bool undfInt = fifo.bFF_LoWatermark && fifo.bFF_LoWatermarkInt;
|
bool ovfInt = fifo.bFF_HiWatermark.load(std::memory_order_relaxed) &&
|
||||||
|
fifo.bFF_HiWatermarkInt.load(std::memory_order_relaxed);
|
||||||
|
bool undfInt = fifo.bFF_LoWatermark.load(std::memory_order_relaxed) &&
|
||||||
|
fifo.bFF_LoWatermarkInt.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
|
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
|
||||||
|
|
||||||
@ -493,14 +499,19 @@ void SetCPStatusFromGPU()
|
|||||||
void SetCPStatusFromCPU()
|
void SetCPStatusFromCPU()
|
||||||
{
|
{
|
||||||
// overflow & underflow check
|
// overflow & underflow check
|
||||||
fifo.bFF_HiWatermark =
|
fifo.bFF_HiWatermark.store(
|
||||||
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark);
|
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark),
|
||||||
fifo.bFF_LoWatermark =
|
std::memory_order_relaxed);
|
||||||
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark);
|
fifo.bFF_LoWatermark.store(
|
||||||
|
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark),
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
|
bool bpInt = fifo.bFF_Breakpoint.load(std::memory_order_relaxed) &&
|
||||||
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
|
fifo.bFF_BPInt.load(std::memory_order_relaxed);
|
||||||
bool undfInt = fifo.bFF_LoWatermark && fifo.bFF_LoWatermarkInt;
|
bool ovfInt = fifo.bFF_HiWatermark.load(std::memory_order_relaxed) &&
|
||||||
|
fifo.bFF_HiWatermarkInt.load(std::memory_order_relaxed);
|
||||||
|
bool undfInt = fifo.bFF_LoWatermark.load(std::memory_order_relaxed) &&
|
||||||
|
fifo.bFF_LoWatermarkInt.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
|
bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;
|
||||||
|
|
||||||
@ -526,14 +537,15 @@ void SetCPStatusFromCPU()
|
|||||||
void SetCpStatusRegister()
|
void SetCpStatusRegister()
|
||||||
{
|
{
|
||||||
// Here always there is one fifo attached to the GPU
|
// Here always there is one fifo attached to the GPU
|
||||||
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint;
|
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint.load(std::memory_order_relaxed);
|
||||||
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
|
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
|
||||||
(fifo.CPReadPointer.load(std::memory_order_relaxed) ==
|
(fifo.CPReadPointer.load(std::memory_order_relaxed) ==
|
||||||
fifo.CPWritePointer.load(std::memory_order_relaxed));
|
fifo.CPWritePointer.load(std::memory_order_relaxed));
|
||||||
m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
|
m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
|
||||||
Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable;
|
Fifo::AtBreakpoint() ||
|
||||||
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark;
|
!fifo.bFF_GPReadEnable.load(std::memory_order_relaxed);
|
||||||
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark;
|
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark.load(std::memory_order_relaxed);
|
||||||
|
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "\t Read from STATUS_REGISTER : {:04x}", m_CPStatusReg.Hex);
|
DEBUG_LOG_FMT(COMMANDPROCESSOR, "\t Read from STATUS_REGISTER : {:04x}", m_CPStatusReg.Hex);
|
||||||
DEBUG_LOG_FMT(
|
DEBUG_LOG_FMT(
|
||||||
@ -545,15 +557,15 @@ void SetCpStatusRegister()
|
|||||||
|
|
||||||
void SetCpControlRegister()
|
void SetCpControlRegister()
|
||||||
{
|
{
|
||||||
fifo.bFF_BPInt = m_CPCtrlReg.BPInt;
|
fifo.bFF_BPInt.store(m_CPCtrlReg.BPInt, std::memory_order_relaxed);
|
||||||
fifo.bFF_BPEnable = m_CPCtrlReg.BPEnable;
|
fifo.bFF_BPEnable.store(m_CPCtrlReg.BPEnable, std::memory_order_relaxed);
|
||||||
fifo.bFF_HiWatermarkInt = m_CPCtrlReg.FifoOverflowIntEnable;
|
fifo.bFF_HiWatermarkInt.store(m_CPCtrlReg.FifoOverflowIntEnable, std::memory_order_relaxed);
|
||||||
fifo.bFF_LoWatermarkInt = m_CPCtrlReg.FifoUnderflowIntEnable;
|
fifo.bFF_LoWatermarkInt.store(m_CPCtrlReg.FifoUnderflowIntEnable, std::memory_order_relaxed);
|
||||||
fifo.bFF_GPLinkEnable = m_CPCtrlReg.GPLinkEnable;
|
fifo.bFF_GPLinkEnable.store(m_CPCtrlReg.GPLinkEnable, std::memory_order_relaxed);
|
||||||
|
|
||||||
if (fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable)
|
if (fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) && !m_CPCtrlReg.GPReadEnable)
|
||||||
{
|
{
|
||||||
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
|
fifo.bFF_GPReadEnable.store(m_CPCtrlReg.GPReadEnable, std::memory_order_relaxed);
|
||||||
Fifo::FlushGpu();
|
Fifo::FlushGpu();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -562,8 +574,10 @@ void SetCpControlRegister()
|
|||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "\t GPREAD {} | BP {} | Int {} | OvF {} | UndF {} | LINK {}",
|
DEBUG_LOG_FMT(COMMANDPROCESSOR, "\t GPREAD {} | BP {} | Int {} | OvF {} | UndF {} | LINK {}",
|
||||||
fifo.bFF_GPReadEnable ? "ON" : "OFF", fifo.bFF_BPEnable ? "ON" : "OFF",
|
fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) ? "ON" : "OFF",
|
||||||
fifo.bFF_BPInt ? "ON" : "OFF", m_CPCtrlReg.FifoOverflowIntEnable ? "ON" : "OFF",
|
fifo.bFF_BPEnable.load(std::memory_order_relaxed) ? "ON" : "OFF",
|
||||||
|
fifo.bFF_BPInt.load(std::memory_order_relaxed) ? "ON" : "OFF",
|
||||||
|
m_CPCtrlReg.FifoOverflowIntEnable ? "ON" : "OFF",
|
||||||
m_CPCtrlReg.FifoUnderflowIntEnable ? "ON" : "OFF",
|
m_CPCtrlReg.FifoUnderflowIntEnable ? "ON" : "OFF",
|
||||||
m_CPCtrlReg.GPLinkEnable ? "ON" : "OFF");
|
m_CPCtrlReg.GPLinkEnable ? "ON" : "OFF");
|
||||||
}
|
}
|
||||||
@ -588,32 +602,35 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
|
|||||||
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");
|
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");
|
||||||
|
|
||||||
{
|
{
|
||||||
PanicAlertFmt(
|
PanicAlertFmt("Illegal command {:02x}\n"
|
||||||
"Illegal command {:02x}\n"
|
"CPBase: {:#010x}\n"
|
||||||
"CPBase: {:#010x}\n"
|
"CPEnd: {:#010x}\n"
|
||||||
"CPEnd: {:#010x}\n"
|
"CPHiWatermark: {:#010x}\n"
|
||||||
"CPHiWatermark: {:#010x}\n"
|
"CPLoWatermark: {:#010x}\n"
|
||||||
"CPLoWatermark: {:#010x}\n"
|
"CPReadWriteDistance: {:#010x}\n"
|
||||||
"CPReadWriteDistance: {:#010x}\n"
|
"CPWritePointer: {:#010x}\n"
|
||||||
"CPWritePointer: {:#010x}\n"
|
"CPReadPointer: {:#010x}\n"
|
||||||
"CPReadPointer: {:#010x}\n"
|
"CPBreakpoint: {:#010x}\n"
|
||||||
"CPBreakpoint: {:#010x}\n"
|
"bFF_GPReadEnable: {}\n"
|
||||||
"bFF_GPReadEnable: {}\n"
|
"bFF_BPEnable: {}\n"
|
||||||
"bFF_BPEnable: {}\n"
|
"bFF_BPInt: {}\n"
|
||||||
"bFF_BPInt: {}\n"
|
"bFF_Breakpoint: {}\n"
|
||||||
"bFF_Breakpoint: {}\n"
|
"bFF_GPLinkEnable: {}\n"
|
||||||
"bFF_GPLinkEnable: {}\n"
|
"bFF_HiWatermarkInt: {}\n"
|
||||||
"bFF_HiWatermarkInt: {}\n"
|
"bFF_LoWatermarkInt: {}\n",
|
||||||
"bFF_LoWatermarkInt: {}\n",
|
cmd_byte, fifo.CPBase.load(std::memory_order_relaxed),
|
||||||
cmd_byte, fifo.CPBase.load(std::memory_order_relaxed),
|
fifo.CPEnd.load(std::memory_order_relaxed), fifo.CPHiWatermark,
|
||||||
fifo.CPEnd.load(std::memory_order_relaxed), fifo.CPHiWatermark, fifo.CPLoWatermark,
|
fifo.CPLoWatermark, fifo.CPReadWriteDistance.load(std::memory_order_relaxed),
|
||||||
fifo.CPReadWriteDistance.load(std::memory_order_relaxed),
|
fifo.CPWritePointer.load(std::memory_order_relaxed),
|
||||||
fifo.CPWritePointer.load(std::memory_order_relaxed),
|
fifo.CPReadPointer.load(std::memory_order_relaxed),
|
||||||
fifo.CPReadPointer.load(std::memory_order_relaxed),
|
fifo.CPBreakpoint.load(std::memory_order_relaxed),
|
||||||
fifo.CPBreakpoint.load(std::memory_order_relaxed), fifo.bFF_GPReadEnable ? "true" : "false",
|
fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) ? "true" : "false",
|
||||||
fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false",
|
fifo.bFF_BPEnable.load(std::memory_order_relaxed) ? "true" : "false",
|
||||||
fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false",
|
fifo.bFF_BPInt.load(std::memory_order_relaxed) ? "true" : "false",
|
||||||
fifo.bFF_HiWatermarkInt ? "true" : "false", fifo.bFF_LoWatermarkInt ? "true" : "false");
|
fifo.bFF_Breakpoint.load(std::memory_order_relaxed) ? "true" : "false",
|
||||||
|
fifo.bFF_GPLinkEnable.load(std::memory_order_relaxed) ? "true" : "false",
|
||||||
|
fifo.bFF_HiWatermarkInt.load(std::memory_order_relaxed) ? "true" : "false",
|
||||||
|
fifo.bFF_LoWatermarkInt.load(std::memory_order_relaxed) ? "true" : "false");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,17 +29,17 @@ struct SCPFifoStruct
|
|||||||
std::atomic<u32> CPBreakpoint;
|
std::atomic<u32> CPBreakpoint;
|
||||||
std::atomic<u32> SafeCPReadPointer;
|
std::atomic<u32> SafeCPReadPointer;
|
||||||
|
|
||||||
volatile u32 bFF_GPLinkEnable;
|
std::atomic<u32> bFF_GPLinkEnable;
|
||||||
volatile u32 bFF_GPReadEnable;
|
std::atomic<u32> bFF_GPReadEnable;
|
||||||
volatile u32 bFF_BPEnable;
|
std::atomic<u32> bFF_BPEnable;
|
||||||
volatile u32 bFF_BPInt;
|
std::atomic<u32> bFF_BPInt;
|
||||||
volatile u32 bFF_Breakpoint;
|
std::atomic<u32> bFF_Breakpoint;
|
||||||
|
|
||||||
volatile u32 bFF_LoWatermarkInt;
|
std::atomic<u32> bFF_LoWatermarkInt;
|
||||||
volatile u32 bFF_HiWatermarkInt;
|
std::atomic<u32> bFF_HiWatermarkInt;
|
||||||
|
|
||||||
volatile u32 bFF_LoWatermark;
|
std::atomic<u32> bFF_LoWatermark;
|
||||||
volatile u32 bFF_HiWatermark;
|
std::atomic<u32> bFF_HiWatermark;
|
||||||
|
|
||||||
void DoState(PointerWrap& p);
|
void DoState(PointerWrap& p);
|
||||||
};
|
};
|
||||||
|
@ -139,7 +139,7 @@ void Shutdown()
|
|||||||
void ExitGpuLoop()
|
void ExitGpuLoop()
|
||||||
{
|
{
|
||||||
// This should break the wait loop in CPU thread
|
// This should break the wait loop in CPU thread
|
||||||
CommandProcessor::fifo.bFF_GPReadEnable = false;
|
CommandProcessor::fifo.bFF_GPReadEnable.store(0, std::memory_order_relaxed);
|
||||||
FlushGpu();
|
FlushGpu();
|
||||||
|
|
||||||
// Terminate GPU thread loop
|
// Terminate GPU thread loop
|
||||||
@ -327,7 +327,8 @@ void RunGpuLoop()
|
|||||||
CommandProcessor::SetCPStatusFromGPU();
|
CommandProcessor::SetCPStatusFromGPU();
|
||||||
|
|
||||||
// check if we are able to run this buffer
|
// check if we are able to run this buffer
|
||||||
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable &&
|
while (!CommandProcessor::IsInterruptWaiting() &&
|
||||||
|
fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) &&
|
||||||
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint())
|
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint())
|
||||||
{
|
{
|
||||||
if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
|
if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
|
||||||
@ -415,8 +416,9 @@ void GpuMaySleep()
|
|||||||
bool AtBreakpoint()
|
bool AtBreakpoint()
|
||||||
{
|
{
|
||||||
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
|
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
|
||||||
return fifo.bFF_BPEnable && (fifo.CPReadPointer.load(std::memory_order_relaxed) ==
|
return fifo.bFF_BPEnable.load(std::memory_order_relaxed) &&
|
||||||
fifo.CPBreakpoint.load(std::memory_order_relaxed));
|
(fifo.CPReadPointer.load(std::memory_order_relaxed) ==
|
||||||
|
fifo.CPBreakpoint.load(std::memory_order_relaxed));
|
||||||
}
|
}
|
||||||
|
|
||||||
void RunGpu()
|
void RunGpu()
|
||||||
@ -446,8 +448,9 @@ static int RunGpuOnCpu(int ticks)
|
|||||||
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
|
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
|
||||||
bool reset_simd_state = false;
|
bool reset_simd_state = false;
|
||||||
int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
|
int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
|
||||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) &&
|
while (fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) &&
|
||||||
!AtBreakpoint() && available_ticks >= 0)
|
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint() &&
|
||||||
|
available_ticks >= 0)
|
||||||
{
|
{
|
||||||
if (s_use_deterministic_gpu_thread)
|
if (s_use_deterministic_gpu_thread)
|
||||||
{
|
{
|
||||||
|
@ -4,7 +4,10 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <atomic>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
|
||||||
enum PerfQueryType
|
enum PerfQueryType
|
||||||
@ -61,9 +64,8 @@ public:
|
|||||||
virtual bool IsFlushed() const { return true; }
|
virtual bool IsFlushed() const { return true; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// TODO: sloppy
|
std::atomic<u32> m_query_count;
|
||||||
volatile u32 m_query_count;
|
std::array<std::atomic<u32>, PQG_NUM_MEMBERS> m_results;
|
||||||
volatile u32 m_results[PQG_NUM_MEMBERS];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extern std::unique_ptr<PerfQueryBase> g_perf_query;
|
extern std::unique_ptr<PerfQueryBase> g_perf_query;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user