diff --git a/rpcs3/Emu/Memory/atomic_type.h b/rpcs3/Emu/Memory/atomic_type.h index 3334e748fa..35ebf639bd 100644 --- a/rpcs3/Emu/Memory/atomic_type.h +++ b/rpcs3/Emu/Memory/atomic_type.h @@ -103,6 +103,36 @@ public: } } + // perform atomic operation on data with additional memory barrier + template __forceinline void atomic_op_sync(const FT atomic_proc) volatile + { + T old = read_sync(); + while (true) + { + T _new = old; + atomic_proc(_new); // function should accept reference to T type + const T val = compare_and_swap(old, _new); + if ((atomic_type&)val == (atomic_type&)old) return; + old = val; + } + } + + // perform atomic operation on data with additional memory barrier and special exit condition (if intermediate result != proceed_value) + template __forceinline RT atomic_op_sync(const RT proceed_value, const FT atomic_proc) volatile + { + T old = read_sync(); + while (true) + { + T _new = old; + RT res = (RT)atomic_proc(_new); // function should accept reference to T type and return some value + if (res != proceed_value) return res; + const T val = compare_and_swap(old, _new); + if ((atomic_type&)val == (atomic_type&)old) return proceed_value; + old = val; + } + } + + // perform non-atomic operation on data directly without memory barriers template __forceinline void direct_op(const FT direct_proc) volatile { direct_proc((T&)data); @@ -122,6 +152,13 @@ public: return (T&)res; } + // atomic bitwise AND NOT (inverts right argument), returns previous data + __forceinline const T _and_not(const T& right) volatile + { + const atomic_type res = InterlockedAnd(&data, ~(atomic_type&)(right)); + return (T&)res; + } + // atomic bitwise XOR, returns previous data __forceinline const T _xor(const T& right) volatile { diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 6751a5c548..efc7c91bf3 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -92,7 +92,7 @@ s64 spursInit( if (!isSecond) { - spurs->m.wklMask.write_relaxed(be_t::make(0xffff)); + spurs->m.wklMsk1.write_relaxed(be_t::make(0xffff)); } spurs->m.unk6[0xC] = 0; spurs->m.unk6[0xD] = 0; @@ -762,7 +762,7 @@ s64 cellSpursGetInfo(vm::ptr spurs, vm::ptr info) s64 spursWakeUp(vm::ptr spurs) { -#ifdef PRX_DEBUG +#ifdef PRX_DEBUG_XXX return cb_call>(GetCurrentPPUThread(), libsre + 0x84D8, libsre_rtoc, spurs); #endif if (!spurs) @@ -809,7 +809,7 @@ s32 spursAddWorkload( vm::ptr hook, vm::ptr hookArg) { -#ifdef PRX_DEBUG +#ifdef PRX_DEBUG_XXX return cb_call, vm::ptr, vm::ptr, u32, u64, u32, u32, u32, u32, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0x96EC, libsre_rtoc, spurs, wid, pm, size, data, Memory.RealToVirtualAddr(priorityTable), minContention, maxContention, nameClass.addr(), nameInstance.addr(), hook.addr(), hookArg.addr()); @@ -833,7 +833,7 @@ s32 spursAddWorkload( u32 wnum; const u32 wmax = spurs->m.x70.read_relaxed().flags1 & 0x40 ? 0x20 : 0x10; // check isSecond (TODO: check if can be changed) - spurs->m.wklMask.atomic_op([spurs, wmax, &wnum](be_t& value) + spurs->m.wklMsk1.atomic_op([spurs, wmax, &wnum](be_t& value) { wnum = cntlz32(~(u32)value); // found empty position if (wnum < wmax) @@ -848,10 +848,11 @@ s32 spursAddWorkload( return CELL_SPURS_POLICY_MODULE_ERROR_AGAIN; } + u32 index = wnum % 0x10; if (wnum <= 15) { - assert((spurs->m.wklA1[wnum] & 0xf) == 0); - assert((spurs->m.wklB1[wnum] & 0xf) == 0); + assert((spurs->m.wklA[wnum] & 0xf) == 0); + assert((spurs->m.wklB[wnum] & 0xf) == 0); spurs->m.wklC1[wnum] = 1; spurs->m.wklD1[wnum] = 0; spurs->m.wklE1[wnum] = 0; @@ -861,29 +862,111 @@ s32 spursAddWorkload( spurs->m.wklG1[wnum].wklPriority = *(be_t*)priorityTable; spurs->m.wklH1[wnum].nameClass = nameClass; spurs->m.wklH1[wnum].nameInstance = nameInstance; - memset(spurs->m.wklF1[wnum].unk0, 0, 0x18); - // (preserve semaphore id) + memset(spurs->m.wklF1[wnum].unk0, 0, 0x18); // clear struct preserving semaphore id memset(spurs->m.wklF1[wnum].unk1, 0, 0x60); if (hook) { spurs->m.wklF1[wnum].hook = hook; spurs->m.wklF1[wnum].hookArg = hookArg; + spurs->m.wklE1[wnum] |= 2; } - spurs->m.wklY1[wnum] = 0; - if (spurs->m.x70.read_relaxed().flags1 & 0x40) + spurs->m.wklZ1[wnum] = 0; + if ((spurs->m.x70.read_relaxed().flags1 & 0x40) == 0) { - } - else - { - spurs->m.wklZ1[wnum] = 0; - spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : 0; + spurs->m.wklZ2[wnum] = 0; + spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : minContention; } } else { - + assert((spurs->m.wklA[index] & 0xf0) == 0); + assert((spurs->m.wklB[index] & 0xf0) == 0); + spurs->m.wklC2[index] = 1; + spurs->m.wklD2[index] = 0; + spurs->m.wklE2[index] = 0; + spurs->m.wklG2[index].wklPm = pm; + spurs->m.wklG2[index].wklArg = data; + spurs->m.wklG2[index].wklSize = size; + spurs->m.wklG2[index].wklPriority = *(be_t*)priorityTable; + spurs->m.wklH2[index].nameClass = nameClass; + spurs->m.wklH2[index].nameInstance = nameInstance; + memset(spurs->m.wklF2[index].unk0, 0, 0x18); // clear struct preserving semaphore id + memset(spurs->m.wklF2[index].unk1, 0, 0x60); + if (hook) + { + spurs->m.wklF2[index].hook = hook; + spurs->m.wklF2[index].hookArg = hookArg; + spurs->m.wklE2[index] |= 2; + } + spurs->m.wklZ2[index] = 0; } + u32 pos = ((~wnum * 8) | (wnum / 4)) & 0x1c; + spurs->m.wklMaxCnt[index / 4].atomic_op([pos, maxContention](be_t& v) + { + v &= ~(0xf << pos); + v |= (maxContention > 8 ? 8 : maxContention) << pos; + }); + + if (wnum <= 15) + { + spurs->m.x70._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag1 + } + else + { + spurs->m.x78._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag2 + } + + spurs->m.x70.atomic_op([wnum](CellSpurs::_sub_x70& x70) + { + if (x70.unk7 == wnum) + { + x70.unk7 = 0xff; + } + }); + + u32 res_wkl; + spurs->m.wklMsk2.atomic_op_sync([spurs, wnum, &res_wkl](be_t& v) + { + CellSpurs::_sub_str3& wkl = wnum <= 15 ? spurs->m.wklG1[wnum] : spurs->m.wklG2[wnum & 0xf]; + const u32 mask = v.ToLE() & ~(0x80000000 >> wnum); + res_wkl = 0; + + for (u32 i = 0, m = 0x80000000, k = 0; i < 32; i++, m >>= 1) + { + if (mask & m) + { + CellSpurs::_sub_str3& current = i <= 15 ? spurs->m.wklG1[i] : spurs->m.wklG2[i & 0xf]; + if (current.wklPm.addr() == wkl.wklPm.addr()) + { + // if a workload with identical policy module found + res_wkl = current.wklCopy.read_relaxed(); + break; + } + else + { + k |= 0x80000000 >> current.wklCopy.read_relaxed(); + res_wkl = cntlz32(~k); + } + } + } + + wkl.wklCopy.exchange((u8)res_wkl); + }); + assert(res_wkl <= 31); + + if (wnum <= 15) + { + spurs->m.wklC1[wnum] = 2; + } + else + { + spurs->m.wklC2[index] = 2; + } + + spurs->m.unk23[5].exchange(-1); // write 0xff byte at 0xbd + spurs->m.x70._and_not({ {}, -1 }); // clear byte at 0x72 + return CELL_OK; } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index db8e50735d..47ab8d975f 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -177,7 +177,6 @@ struct CellSpurs u8 unk0[0x20]; be_t sem; // 0x20 u8 unk1[0x8]; - u32 pad; vm::bptr hook; // 0x30 vm::bptr hookArg; // 0x38 u8 unk2[0x40]; @@ -202,6 +201,7 @@ struct CellSpurs vm::bptr wklPm; // policy module be_t wklArg; // spu argument be_t wklSize; + atomic_t wklCopy; be_t wklPriority; }; @@ -215,9 +215,8 @@ struct CellSpurs struct _sub_x70 { - u8 unk0; - u8 unk1; - u8 unk2; + be_t wklFlag1; // 0x70 + u8 unk2; // 0x72 u8 unk3; u8 flags1; u8 unk5; @@ -227,7 +226,8 @@ struct CellSpurs struct _sub_x78 { - u64 unk; + be_t wklFlag2; + u8 unk[6]; }; union @@ -239,22 +239,26 @@ struct CellSpurs // real data struct { - u8 wklY1[0x10]; - u8 wklZ1[0x10]; // 0x10 - u8 wklA1[0x10]; // 0x20 - u8 wklB1[0x10]; // 0x30 + u8 wklZ1[0x10]; // 0x0 + u8 wklZ2[0x10]; // 0x10 + u8 wklA[0x10]; // 0x20 + u8 wklB[0x10]; // 0x30 u8 wklMinCnt[0x10]; // 0x40 - u8 unknown0[0x6C - 0x50]; + atomic_t wklMaxCnt[4]; // 0x50 + u8 unknown0[0x6C - 0x60]; be_t unk18; // 0x6C atomic_t<_sub_x70> x70; // 0x70 atomic_t<_sub_x78> x78; // 0x78 u8 wklC1[0x10]; // 0x80 u8 wklD1[0x10]; // 0x90 u8 wklE1[0x10]; // 0xA0 - atomic_t wklMask;// 0xB0 - u8 unknown2[0xC0 - 0xB4]; + atomic_t wklMsk1;// 0xB0 + atomic_t wklMsk2;// 0xB4 + atomic_t unk23[8];// 0xB8 u8 unk6[0x10]; // 0xC0 (SPU port at 0xc9) - u8 unknown1[0x100 - 0x0D0]; + u8 wklC2[0x10]; // 0xD0 + u8 wklD2[0x10]; // 0xE0 + u8 wklE2[0x10]; // 0xF0 _sub_str1 wklF1[0x10];// 0x100 be_t unk22; // 0x900 u8 unknown7[0x980 - 0x908]; @@ -294,9 +298,10 @@ struct CellSpurs u8 unknown9[0xE00 - 0xDD0]; _sub_str4 wklH1[0x10];// 0xE00 _sub_str2 sub3; // 0xF00 - u8 unknown6[0x1200 - 0xF80]; + u8 unknown6[0x1000 - 0xF80]; + _sub_str3 wklG2[0x10];// 0x1000 _sub_str1 wklF2[0x10];// 0x1200 - // ... + _sub_str4 wklH2[0x10];// 0x1A00 } m; // alternative implementation diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp index 1c5f4a773a..5592521d18 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp @@ -112,8 +112,7 @@ s32 cellSyncMutexUnlock(vm::ptr mutex) return CELL_SYNC_ERROR_ALIGN; } - mutex->data.read_sync(); - mutex->data.atomic_op([](CellSyncMutex::data_t& mutex) + mutex->data.atomic_op_sync([](CellSyncMutex::data_t& mutex) { mutex.m_rel++; }); @@ -178,8 +177,7 @@ s32 cellSyncBarrierNotify(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - barrier->data.read_sync(); - while (barrier->data.atomic_op(CELL_OK, syncBarrierTryNotifyOp)) + while (barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp)) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack if (Emu.IsStopped()) @@ -204,8 +202,7 @@ s32 cellSyncBarrierTryNotify(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - barrier->data.read_sync(); - return barrier->data.atomic_op(CELL_OK, syncBarrierTryNotifyOp); + return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp); } s32 syncBarrierTryWaitOp(CellSyncBarrier::data_t& barrier) @@ -239,8 +236,7 @@ s32 cellSyncBarrierWait(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - barrier->data.read_sync(); - while (barrier->data.atomic_op(CELL_OK, syncBarrierTryWaitOp)) + while (barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp)) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack if (Emu.IsStopped()) @@ -265,8 +261,7 @@ s32 cellSyncBarrierTryWait(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - barrier->data.read_sync(); - return barrier->data.atomic_op(CELL_OK, syncBarrierTryWaitOp); + return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp); } s32 syncRwmInitialize(vm::ptr rwm, vm::ptr buffer, u32 buffer_size)