vm::spu max address was overflowing resulting in issues, so cast to u64 where needed. Fixes #6145.
    Use vm::get_addr instead of manually substructing vm::base(0) from pointer in texture cache code.
    Prefer std::atomic_thread_fence over _mm_?fence(), adjust usage to be more correct.
    Used sequantially consistent ordering in semaphore_release for TSX path as well.
    Improved memory ordering for sys_rsx_context_iounmap/map.
    Fixed sync bugs in HLE gcm because of not using atomic instructions.
    Use release memory barrier in lwsync for PPU LLVM, according to this xbox360 programming guide lwsync is a hw release memory barrier.
    Also use release barrier where lwsync was originally used in liblv2 sys_lwmutex and cellSync.
    Use acquire barrier for isync instruction, see https://devblogs.microsoft.com/oldnewthing/20180814-00/?p=99485
This commit is contained in:
Eladash 2019-06-29 18:48:42 +03:00 committed by Ivan
parent 1ee7b91646
commit 43f919c04b
20 changed files with 85 additions and 65 deletions

View File

@ -12,6 +12,7 @@
#include "sysPrxForUser.h" #include "sysPrxForUser.h"
#include <thread> #include <thread>
#include <atomic>
LOG_CHANNEL(cellGcmSys); LOG_CHANNEL(cellGcmSys);
@ -40,10 +41,11 @@ struct CellGcmSysConfig {
}; };
u64 system_mode = 0; u64 system_mode = 0;
u32 reserved_size = 0;
u32 local_size = 0; u32 local_size = 0;
u32 local_addr = 0; u32 local_addr = 0;
atomic_t<u32> reserved_size = 0;
// Auxiliary functions // Auxiliary functions
/* /*
@ -72,7 +74,7 @@ u32 gcmGetLocalMemorySize(u32 sdk_version)
} }
CellGcmOffsetTable offsetTable; CellGcmOffsetTable offsetTable;
u16 IoMapTable[0xC00]; atomic_t<u16> IoMapTable[0xC00]{};
void InitOffsetTable() void InitOffsetTable()
{ {
@ -1013,8 +1015,6 @@ s32 gcmMapEaIoAddress(u32 ea, u32 io, u32 size, bool is_strict)
ea >>= 20, io >>= 20, size >>= 20; ea >>= 20, io >>= 20, size >>= 20;
IoMapTable[ea] = size;
// Fill the offset table // Fill the offset table
for (u32 i = 0; i < size; i++) for (u32 i = 0; i < size; i++)
{ {
@ -1022,6 +1022,7 @@ s32 gcmMapEaIoAddress(u32 ea, u32 io, u32 size, bool is_strict)
offsetTable.eaAddress[io + i] = ea + i; offsetTable.eaAddress[io + i] = ea + i;
} }
IoMapTable[ea] = size;
return CELL_OK; return CELL_OK;
} }
@ -1079,8 +1080,6 @@ s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr<u32> offset)
ea >>= 20, size >>= 20; ea >>= 20, size >>= 20;
IoMapTable[ea] = size;
// Fill the offset table // Fill the offset table
for (u32 i = 0; i < size; i++) for (u32 i = 0; i < size; i++)
{ {
@ -1088,6 +1087,8 @@ s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr<u32> offset)
offsetTable.eaAddress[io + i] = ea + i; offsetTable.eaAddress[io + i] = ea + i;
} }
IoMapTable[ea] = size;
*offset = io << 20; *offset = io << 20;
return CELL_OK; return CELL_OK;
} }
@ -1127,15 +1128,17 @@ s32 cellGcmUnmapEaIoAddress(u32 ea)
{ {
cellGcmSys.trace("cellGcmUnmapEaIoAddress(ea=0x%x)", ea); cellGcmSys.trace("cellGcmUnmapEaIoAddress(ea=0x%x)", ea);
if (const u32 size = std::exchange(IoMapTable[ea >>= 20], 0)) if (const u32 size = IoMapTable[ea >>= 20].exchange(0))
{ {
const u32 io = offsetTable.ioAddress[ea]; const u32 io = offsetTable.ioAddress[ea];
for (u32 i = 0; i < size; i++) for (u32 i = 0; i < size; i++)
{ {
RSXIOMem.io[ea + i].release(offsetTable.ioAddress[ea + i] = 0xFFFF); RSXIOMem.io[ea + i].raw() = offsetTable.ioAddress[ea + i] = 0xFFFF;
RSXIOMem.ea[io + i].release(offsetTable.eaAddress[io + i] = 0xFFFF); RSXIOMem.ea[io + i].raw() = offsetTable.eaAddress[io + i] = 0xFFFF;
} }
std::atomic_thread_fence(std::memory_order_seq_cst);
} }
else else
{ {
@ -1150,15 +1153,17 @@ s32 cellGcmUnmapIoAddress(u32 io)
{ {
cellGcmSys.trace("cellGcmUnmapIoAddress(io=0x%x)", io); cellGcmSys.trace("cellGcmUnmapIoAddress(io=0x%x)", io);
if (u32 size = std::exchange(IoMapTable[RSXIOMem.ea[io >>= 20]], 0)) if (u32 size = IoMapTable[RSXIOMem.ea[io >>= 20]].exchange(0))
{ {
const u32 ea = offsetTable.eaAddress[io]; const u32 ea = offsetTable.eaAddress[io];
for (u32 i = 0; i < size; i++) for (u32 i = 0; i < size; i++)
{ {
RSXIOMem.io[ea + i].release(offsetTable.ioAddress[ea + i] = 0xFFFF); RSXIOMem.io[ea + i].raw() = offsetTable.ioAddress[ea + i] = 0xFFFF;
RSXIOMem.ea[io + i].release(offsetTable.eaAddress[io + i] = 0xFFFF); RSXIOMem.ea[io + i].raw() = offsetTable.eaAddress[io + i] = 0xFFFF;
} }
std::atomic_thread_fence(std::memory_order_seq_cst);
} }
else else
{ {

View File

@ -15,6 +15,8 @@
#include "sysPrxForUser.h" #include "sysPrxForUser.h"
#include "cellSpurs.h" #include "cellSpurs.h"
#include <atomic>
LOG_CHANNEL(cellSpurs); LOG_CHANNEL(cellSpurs);
error_code sys_spu_image_close(vm::ptr<sys_spu_image> img); error_code sys_spu_image_close(vm::ptr<sys_spu_image> img);
@ -2575,7 +2577,7 @@ s32 _cellSpursWorkloadFlagReceiver(vm::ptr<CellSpurs> spurs, u32 wid, u32 is_set
return CELL_SPURS_POLICY_MODULE_ERROR_STAT; return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
} }
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
if (s32 res = spurs->wklFlag.flag.atomic_op([spurs, wid, is_set](be_t<u32>& flag) -> s32 if (s32 res = spurs->wklFlag.flag.atomic_op([spurs, wid, is_set](be_t<u32>& flag) -> s32
{ {

View File

@ -6,6 +6,8 @@
#include "Emu/Cell/lv2/sys_process.h" #include "Emu/Cell/lv2/sys_process.h"
#include "cellSync.h" #include "cellSync.h"
#include <atomic>
LOG_CHANNEL(cellSync); LOG_CHANNEL(cellSync);
template<> template<>
@ -85,8 +87,7 @@ error_code cellSyncMutexLock(ppu_thread& ppu, vm::ptr<CellSyncMutex> mutex)
} }
} }
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
return CELL_OK; return CELL_OK;
} }
@ -195,7 +196,7 @@ error_code cellSyncBarrierTryNotify(vm::ptr<CellSyncBarrier> barrier)
return CELL_SYNC_ERROR_ALIGN; return CELL_SYNC_ERROR_ALIGN;
} }
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
if (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_notify>()) if (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_notify>())
{ {
@ -219,7 +220,7 @@ error_code cellSyncBarrierWait(ppu_thread& ppu, vm::ptr<CellSyncBarrier> barrier
return CELL_SYNC_ERROR_ALIGN; return CELL_SYNC_ERROR_ALIGN;
} }
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
while (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_wait>()) while (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_wait>())
{ {
@ -246,7 +247,7 @@ error_code cellSyncBarrierTryWait(vm::ptr<CellSyncBarrier> barrier)
return CELL_SYNC_ERROR_ALIGN; return CELL_SYNC_ERROR_ALIGN;
} }
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
if (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_wait>()) if (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_wait>())
{ {
@ -280,7 +281,7 @@ error_code cellSyncRwmInitialize(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer,
rwm->size = buffer_size; rwm->size = buffer_size;
rwm->buffer = buffer; rwm->buffer = buffer;
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
return CELL_OK; return CELL_OK;
} }
@ -452,7 +453,7 @@ error_code cellSyncQueueInitialize(vm::ptr<CellSyncQueue> queue, vm::ptr<u8> buf
queue->depth = depth; queue->depth = depth;
queue->buffer = buffer; queue->buffer = buffer;
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
return CELL_OK; return CELL_OK;
} }
@ -865,7 +866,7 @@ error_code cellSyncLFQueueInitialize(vm::ptr<CellSyncLFQueue> queue, vm::cptr<vo
} }
} }
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
} }
else else
{ {
@ -894,7 +895,7 @@ error_code _cellSyncLFQueueGetPushPointer(ppu_thread& ppu, vm::ptr<CellSyncLFQue
{ {
while (true) while (true)
{ {
const auto old = queue->push1.load(); _mm_lfence(); const auto old = queue->push1.load();
auto push = old; auto push = old;
if (var1) if (var1)
@ -989,9 +990,10 @@ error_code _cellSyncLFQueueCompletePushPointer(ppu_thread& ppu, vm::ptr<CellSync
while (true) while (true)
{ {
const auto old = queue->push2.load(); _mm_lfence(); const auto old = queue->push2.load();
auto push2 = old; auto push2 = old;
// Loads must be in this order
const auto old2 = queue->push3.load(); const auto old2 = queue->push3.load();
auto push3 = old2; auto push3 = old2;
@ -1192,7 +1194,7 @@ error_code _cellSyncLFQueueGetPopPointer(ppu_thread& ppu, vm::ptr<CellSyncLFQueu
{ {
while (true) while (true)
{ {
const auto old = queue->pop1.load(); _mm_lfence(); const auto old = queue->pop1.load();
auto pop = old; auto pop = old;
if (var1) if (var1)
@ -1288,9 +1290,10 @@ error_code _cellSyncLFQueueCompletePopPointer(ppu_thread& ppu, vm::ptr<CellSyncL
while (true) while (true)
{ {
const auto old = queue->pop2.load(); _mm_lfence(); const auto old = queue->pop2.load();
auto pop2 = old; auto pop2 = old;
// Loads must be in this order
const auto old2 = queue->pop3.load(); const auto old2 = queue->pop3.load();
auto pop3 = old2; auto pop3 = old2;
@ -1489,9 +1492,10 @@ error_code cellSyncLFQueueClear(vm::ptr<CellSyncLFQueue> queue)
while (true) while (true)
{ {
const auto old = queue->pop1.load(); _mm_lfence(); const auto old = queue->pop1.load();
auto pop = old; auto pop = old;
// Loads must be in this order
const auto push = queue->push1.load(); const auto push = queue->push1.load();
s32 var1, var2; s32 var1, var2;
@ -1540,8 +1544,9 @@ error_code cellSyncLFQueueSize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<u32> size
while (true) while (true)
{ {
const auto old = queue->pop3.load(); _mm_lfence(); const auto old = queue->pop3.load();
// Loads must be in this order
u32 var1 = (u16)queue->pop1.load().m_h1; u32 var1 = (u16)queue->pop1.load().m_h1;
u32 var2 = (u16)queue->push1.load().m_h5; u32 var2 = (u16)queue->push1.load().m_h5;

View File

@ -8,6 +8,8 @@
#include "Emu/Cell/lv2/sys_mutex.h" #include "Emu/Cell/lv2/sys_mutex.h"
#include "sysPrxForUser.h" #include "sysPrxForUser.h"
#include <atomic>
extern logs::channel sysPrxForUser; extern logs::channel sysPrxForUser;
error_code sys_lwmutex_create(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex, vm::ptr<sys_lwmutex_attribute_t> attr) error_code sys_lwmutex_create(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex, vm::ptr<sys_lwmutex_attribute_t> attr)
@ -128,7 +130,7 @@ error_code sys_lwmutex_lock(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex, u64
// recursive locking succeeded // recursive locking succeeded
lwmutex->recursive_count++; lwmutex->recursive_count++;
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
return CELL_OK; return CELL_OK;
} }
@ -288,7 +290,7 @@ error_code sys_lwmutex_trylock(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex)
// recursive locking succeeded // recursive locking succeeded
lwmutex->recursive_count++; lwmutex->recursive_count++;
_mm_mfence(); std::atomic_thread_fence(std::memory_order_release);
return CELL_OK; return CELL_OK;
} }

View File

@ -8,6 +8,7 @@
#include "Emu/Cell/Common.h" #include "Emu/Cell/Common.h"
#include <cmath> #include <cmath>
#include <atomic>
#if !defined(_MSC_VER) && !defined(__SSSE3__) #if !defined(_MSC_VER) && !defined(__SSSE3__)
#define _mm_shuffle_epi8(opa, opb) opb #define _mm_shuffle_epi8(opa, opb) opb
@ -2966,7 +2967,7 @@ bool ppu_interpreter::CRANDC(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::ISYNC(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::ISYNC(ppu_thread& ppu, ppu_opcode_t op)
{ {
_mm_mfence(); std::atomic_thread_fence(std::memory_order_acquire);
return true; return true;
} }
@ -4046,7 +4047,7 @@ bool ppu_interpreter::LFSUX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::SYNC(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::SYNC(ppu_thread& ppu, ppu_opcode_t op)
{ {
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
return true; return true;
} }
@ -4280,7 +4281,7 @@ bool ppu_interpreter::SRADI(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::EIEIO(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::EIEIO(ppu_thread& ppu, ppu_opcode_t op)
{ {
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
return true; return true;
} }

View File

@ -1848,7 +1848,7 @@ void PPUTranslator::CRANDC(ppu_opcode_t op)
void PPUTranslator::ISYNC(ppu_opcode_t op) void PPUTranslator::ISYNC(ppu_opcode_t op)
{ {
m_ir->CreateFence(AtomicOrdering::SequentiallyConsistent); m_ir->CreateFence(AtomicOrdering::Acquire);
} }
void PPUTranslator::CRXOR(ppu_opcode_t op) void PPUTranslator::CRXOR(ppu_opcode_t op)
@ -3105,7 +3105,9 @@ void PPUTranslator::LFSUX(ppu_opcode_t op)
void PPUTranslator::SYNC(ppu_opcode_t op) void PPUTranslator::SYNC(ppu_opcode_t op)
{ {
m_ir->CreateFence(AtomicOrdering::SequentiallyConsistent); // sync: Full seq cst barrier
// lwsync: Release barrier
m_ir->CreateFence(op.l10 ? AtomicOrdering::Release : AtomicOrdering::SequentiallyConsistent);
} }
void PPUTranslator::LFDX(ppu_opcode_t op) void PPUTranslator::LFDX(ppu_opcode_t op)

View File

@ -6,6 +6,8 @@
#include "Emu/Cell/RawSPUThread.h" #include "Emu/Cell/RawSPUThread.h"
#include <atomic>
// Originally, SPU MFC registers are accessed externally in a concurrent manner (don't mix with channels, SPU MFC channels are isolated) // Originally, SPU MFC registers are accessed externally in a concurrent manner (don't mix with channels, SPU MFC channels are isolated)
thread_local spu_mfc_cmd g_tls_mfc[8] = {}; thread_local spu_mfc_cmd g_tls_mfc[8] = {};
@ -173,7 +175,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
case MFC_SYNC_CMD: case MFC_SYNC_CMD:
{ {
g_tls_mfc[index] = {}; g_tls_mfc[index] = {};
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
return true; return true;
} }
} }

View File

@ -140,14 +140,14 @@ bool spu_interpreter::LNOP(spu_thread& spu, spu_opcode_t op)
// This instruction must be used following a store instruction that modifies the instruction stream. // This instruction must be used following a store instruction that modifies the instruction stream.
bool spu_interpreter::SYNC(spu_thread& spu, spu_opcode_t op) bool spu_interpreter::SYNC(spu_thread& spu, spu_opcode_t op)
{ {
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
return true; return true;
} }
// This instruction forces all earlier load, store, and channel instructions to complete before proceeding. // This instruction forces all earlier load, store, and channel instructions to complete before proceeding.
bool spu_interpreter::DSYNC(spu_thread& spu, spu_opcode_t op) bool spu_interpreter::DSYNC(spu_thread& spu, spu_opcode_t op)
{ {
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
return true; return true;
} }

View File

@ -1662,7 +1662,7 @@ void spu_thread::do_mfc(bool wait)
if (&args - mfc_queue <= removed) if (&args - mfc_queue <= removed)
{ {
// Remove barrier-class command if it's the first in the queue // Remove barrier-class command if it's the first in the queue
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
removed++; removed++;
return true; return true;
} }
@ -2086,7 +2086,7 @@ bool spu_thread::process_mfc_cmd()
{ {
if (mfc_size == 0) if (mfc_size == 0)
{ {
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
} }
else else
{ {
@ -3025,12 +3025,13 @@ bool spu_thread::stop_and_signal(u32 code)
case 0x100: case 0x100:
{ {
// SPU thread group yield (TODO)
if (ch_out_mbox.get_count()) if (ch_out_mbox.get_count())
{ {
fmt::throw_exception("STOP code 0x100: Out_MBox is not empty" HERE); fmt::throw_exception("STOP code 0x100: Out_MBox is not empty" HERE);
} }
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
return true; return true;
} }

View File

@ -284,7 +284,7 @@ public:
// push unconditionally (overwriting latest value), returns true if needs signaling // push unconditionally (overwriting latest value), returns true if needs signaling
void push(cpu_thread& spu, u32 value) void push(cpu_thread& spu, u32 value)
{ {
value3 = value; _mm_sfence(); value3.store(value);
if (values.atomic_op([=](sync_var_t& data) -> bool if (values.atomic_op([=](sync_var_t& data) -> bool
{ {
@ -325,7 +325,6 @@ public:
data.value0 = data.value1; data.value0 = data.value1;
data.value1 = data.value2; data.value1 = data.value2;
_mm_lfence();
data.value2 = this->value3; data.value2 = this->value3;
} }
else else

View File

@ -1,6 +1,7 @@
#include "stdafx.h" #include "stdafx.h"
#include "sys_rsx.h" #include "sys_rsx.h"
#include <atomic>
#include "Emu/System.h" #include "Emu/System.h"
#include "Emu/Cell/PPUModule.h" #include "Emu/Cell/PPUModule.h"
#include "Emu/RSX/GSRender.h" #include "Emu/RSX/GSRender.h"
@ -175,7 +176,7 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f
{ {
sys_rsx.warning("sys_rsx_context_iomap(context_id=0x%x, io=0x%x, ea=0x%x, size=0x%x, flags=0x%llx)", context_id, io, ea, size, flags); sys_rsx.warning("sys_rsx_context_iomap(context_id=0x%x, io=0x%x, ea=0x%x, size=0x%x, flags=0x%llx)", context_id, io, ea, size, flags);
if (!size || io & 0xFFFFF || ea + u64{size} >= rsx::constants::local_mem_base || ea & 0xFFFFF || size & 0xFFFFF || if (!size || io & 0xFFFFF || ea + u64{size} > rsx::constants::local_mem_base || ea & 0xFFFFF || size & 0xFFFFF ||
rsx::get_current_renderer()->main_mem_size < io + u64{size}) rsx::get_current_renderer()->main_mem_size < io + u64{size})
{ {
return CELL_EINVAL; return CELL_EINVAL;
@ -195,8 +196,8 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f
for (u32 i = 0; i < size; i++) for (u32 i = 0; i < size; i++)
{ {
RSXIOMem.io[ea + i].release(io + i); RSXIOMem.io[ea + i].raw() = io + i;
RSXIOMem.ea[io + i].release(ea + i); RSXIOMem.ea[io + i].raw() = ea + i;
} }
return CELL_OK; return CELL_OK;
@ -220,10 +221,11 @@ error_code sys_rsx_context_iounmap(u32 context_id, u32 io, u32 size)
const u32 end = (io >>= 20) + (size >>= 20); const u32 end = (io >>= 20) + (size >>= 20);
for (u32 ea = RSXIOMem.ea[io]; io < end;) for (u32 ea = RSXIOMem.ea[io]; io < end;)
{ {
RSXIOMem.io[ea++].release(0xFFFF); RSXIOMem.io[ea++].raw() = 0xFFFF;
RSXIOMem.ea[io++].release(0xFFFF); RSXIOMem.ea[io++].raw() = 0xFFFF;
} }
std::atomic_thread_fence(std::memory_order_seq_cst);
return CELL_OK; return CELL_OK;
} }

View File

@ -750,7 +750,7 @@ namespace vm
const u32 size = ::align(orig_size, min_page_size); const u32 size = ::align(orig_size, min_page_size);
// return if addr or size is invalid // return if addr or size is invalid
if (!size || addr < this->addr || addr + u64{size} > this->addr + this->size || flags & 0x10) if (!size || addr < this->addr || addr + u64{size} > this->addr + u64{this->size} || flags & 0x10)
{ {
return 0; return 0;
} }
@ -823,7 +823,7 @@ namespace vm
std::pair<u32, std::shared_ptr<utils::shm>> block_t::get(u32 addr, u32 size) std::pair<u32, std::shared_ptr<utils::shm>> block_t::get(u32 addr, u32 size)
{ {
if (addr < this->addr || addr + u64{size} > this->addr + this->size) if (addr < this->addr || addr + u64{size} > this->addr + u64{this->size})
{ {
return {addr, nullptr}; return {addr, nullptr};
} }
@ -852,7 +852,7 @@ namespace vm
} }
// Range check // Range check
if (std::max<u32>(size, addr - found->first + size) > found->second.second->size()) if (addr + u64{size} > found->first + u64{found->second.second->size()})
{ {
return {addr, nullptr}; return {addr, nullptr};
} }

View File

@ -8,6 +8,7 @@
#include "Emu/RSX/GSRender.h" #include "Emu/RSX/GSRender.h"
#include <map> #include <map>
#include <atomic>
#include <exception> #include <exception>
namespace rsx namespace rsx
@ -179,7 +180,7 @@ namespace rsx
{ {
// Load registers while the RSX is still idle // Load registers while the RSX is still idle
method_registers = frame->reg_state; method_registers = frame->reg_state;
_mm_mfence(); std::atomic_thread_fence(std::memory_order_seq_cst);
// start up fifo buffer by dumping the put ptr to first stop // start up fifo buffer by dumping the put ptr to first stop
sys_rsx_context_attribute(context_id, 0x001, 0x10000000, fifo_stops[0], 0, 0); sys_rsx_context_attribute(context_id, 0x001, 0x10000000, fifo_stops[0], 0, 0);

View File

@ -2279,8 +2279,8 @@ namespace rsx
image_resource_type vram_texture = 0; image_resource_type vram_texture = 0;
image_resource_type dest_texture = 0; image_resource_type dest_texture = 0;
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); const u32 dst_address = vm::get_addr(dst.pixels);
u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0)); u32 src_address = vm::get_addr(src.pixels);
const f32 scale_x = fabsf(dst.scale_x); const f32 scale_x = fabsf(dst.scale_x);
const f32 scale_y = fabsf(dst.scale_y); const f32 scale_y = fabsf(dst.scale_y);

View File

@ -46,7 +46,6 @@ struct work_item
{ {
while (!processed) while (!processed)
{ {
_mm_lfence();
std::this_thread::yield(); std::this_thread::yield();
} }

View File

@ -491,7 +491,7 @@ void GLGSRender::read_buffers()
continue; continue;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); u32 texaddr = vm::get_addr(color_buffer.ptr);
const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height); const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height);
bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i])); bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i]));

View File

@ -123,7 +123,7 @@ namespace rsx
} }
while (m_enqueued_count.load() != m_processed_count) while (m_enqueued_count.load() != m_processed_count)
_mm_lfence(); _mm_pause();
} }
void dma_manager::join() void dma_manager::join()

View File

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "Emu/Memory/vm.h" #include "Emu/Memory/vm.h"
#include "Emu/System.h" #include "Emu/System.h"
#include "Emu/IdManager.h" #include "Emu/IdManager.h"

View File

@ -275,7 +275,6 @@ struct flush_request_task
{ {
while (num_waiters.load() != 0) while (num_waiters.load() != 0)
{ {
_mm_lfence();
_mm_pause(); _mm_pause();
} }
} }
@ -284,7 +283,6 @@ struct flush_request_task
{ {
while (pending_state.load()) while (pending_state.load())
{ {
_mm_lfence();
std::this_thread::yield(); std::this_thread::yield();
} }
} }

View File

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "rsx_methods.h" #include "rsx_methods.h"
#include "RSXThread.h" #include "RSXThread.h"
#include "Emu/Memory/vm_reservation.h" #include "Emu/Memory/vm_reservation.h"
@ -10,6 +10,7 @@
#include "Capture/rsx_capture.h" #include "Capture/rsx_capture.h"
#include <thread> #include <thread>
#include <atomic>
template <> template <>
void fmt_class_string<frame_limit_type>::format(std::string& out, u64 arg) void fmt_class_string<frame_limit_type>::format(std::string& out, u64 arg)
@ -66,13 +67,13 @@ namespace rsx
// Get raw BE value // Get raw BE value
arg = be_t<u32>{arg}.raw(); arg = be_t<u32>{arg}.raw();
const auto& sema = vm::_ref<nse_t<u32>>(addr); const auto& sema = vm::_ref<atomic_t<nse_t<u32>>>(addr);
// TODO: Remove vblank semaphore hack // TODO: Remove vblank semaphore hack
if (sema == arg || addr == rsx->ctxt_addr + 0x30) return; if (sema.load() == arg || addr == rsx->ctxt_addr + 0x30) return;
u64 start = get_system_time(); u64 start = get_system_time();
while (sema != arg) while (sema.load() != arg)
{ {
if (Emu.IsStopped()) if (Emu.IsStopped())
return; return;
@ -107,7 +108,7 @@ namespace rsx
rsx->performance_counters.idle_time += (get_system_time() - start); rsx->performance_counters.idle_time += (get_system_time() - start);
} }
void semaphore_release(thread* rsx, u32 _reg, u32 arg) void semaphore_release(thread* rsx, u32 /*_reg*/, u32 arg)
{ {
rsx->sync(); rsx->sync();
rsx->sync_point_request = true; rsx->sync_point_request = true;
@ -115,7 +116,7 @@ namespace rsx
if (LIKELY(g_use_rtm)) if (LIKELY(g_use_rtm))
{ {
vm::write32(addr, arg); vm::_ref<atomic_t<u32>>(addr) = arg;
} }
else else
{ {