mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-12 04:14:35 +00:00
spursAddWorkload
This commit is contained in:
parent
ae17ef4d68
commit
c53a822c4e
@ -103,6 +103,36 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// perform atomic operation on data with additional memory barrier
|
||||
template<typename FT> __forceinline void atomic_op_sync(const FT atomic_proc) volatile
|
||||
{
|
||||
T old = read_sync();
|
||||
while (true)
|
||||
{
|
||||
T _new = old;
|
||||
atomic_proc(_new); // function should accept reference to T type
|
||||
const T val = compare_and_swap(old, _new);
|
||||
if ((atomic_type&)val == (atomic_type&)old) return;
|
||||
old = val;
|
||||
}
|
||||
}
|
||||
|
||||
// perform atomic operation on data with additional memory barrier and special exit condition (if intermediate result != proceed_value)
|
||||
template<typename RT, typename FT> __forceinline RT atomic_op_sync(const RT proceed_value, const FT atomic_proc) volatile
|
||||
{
|
||||
T old = read_sync();
|
||||
while (true)
|
||||
{
|
||||
T _new = old;
|
||||
RT res = (RT)atomic_proc(_new); // function should accept reference to T type and return some value
|
||||
if (res != proceed_value) return res;
|
||||
const T val = compare_and_swap(old, _new);
|
||||
if ((atomic_type&)val == (atomic_type&)old) return proceed_value;
|
||||
old = val;
|
||||
}
|
||||
}
|
||||
|
||||
// perform non-atomic operation on data directly without memory barriers
|
||||
template<typename FT> __forceinline void direct_op(const FT direct_proc) volatile
|
||||
{
|
||||
direct_proc((T&)data);
|
||||
@ -122,6 +152,13 @@ public:
|
||||
return (T&)res;
|
||||
}
|
||||
|
||||
// atomic bitwise AND NOT (inverts right argument), returns previous data
|
||||
__forceinline const T _and_not(const T& right) volatile
|
||||
{
|
||||
const atomic_type res = InterlockedAnd(&data, ~(atomic_type&)(right));
|
||||
return (T&)res;
|
||||
}
|
||||
|
||||
// atomic bitwise XOR, returns previous data
|
||||
__forceinline const T _xor(const T& right) volatile
|
||||
{
|
||||
|
@ -92,7 +92,7 @@ s64 spursInit(
|
||||
|
||||
if (!isSecond)
|
||||
{
|
||||
spurs->m.wklMask.write_relaxed(be_t<u32>::make(0xffff));
|
||||
spurs->m.wklMsk1.write_relaxed(be_t<u32>::make(0xffff));
|
||||
}
|
||||
spurs->m.unk6[0xC] = 0;
|
||||
spurs->m.unk6[0xD] = 0;
|
||||
@ -762,7 +762,7 @@ s64 cellSpursGetInfo(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursInfo> info)
|
||||
|
||||
s64 spursWakeUp(vm::ptr<CellSpurs> spurs)
|
||||
{
|
||||
#ifdef PRX_DEBUG
|
||||
#ifdef PRX_DEBUG_XXX
|
||||
return cb_call<s32, vm::ptr<CellSpurs>>(GetCurrentPPUThread(), libsre + 0x84D8, libsre_rtoc, spurs);
|
||||
#endif
|
||||
if (!spurs)
|
||||
@ -809,7 +809,7 @@ s32 spursAddWorkload(
|
||||
vm::ptr<CellSpursShutdownCompletionEventHook> hook,
|
||||
vm::ptr<void> hookArg)
|
||||
{
|
||||
#ifdef PRX_DEBUG
|
||||
#ifdef PRX_DEBUG_XXX
|
||||
return cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<u32>, vm::ptr<const void>, u32, u64, u32, u32, u32, u32, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0x96EC, libsre_rtoc,
|
||||
spurs, wid, pm, size, data, Memory.RealToVirtualAddr(priorityTable), minContention, maxContention,
|
||||
nameClass.addr(), nameInstance.addr(), hook.addr(), hookArg.addr());
|
||||
@ -833,7 +833,7 @@ s32 spursAddWorkload(
|
||||
|
||||
u32 wnum;
|
||||
const u32 wmax = spurs->m.x70.read_relaxed().flags1 & 0x40 ? 0x20 : 0x10; // check isSecond (TODO: check if can be changed)
|
||||
spurs->m.wklMask.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
|
||||
spurs->m.wklMsk1.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
|
||||
{
|
||||
wnum = cntlz32(~(u32)value); // found empty position
|
||||
if (wnum < wmax)
|
||||
@ -848,10 +848,11 @@ s32 spursAddWorkload(
|
||||
return CELL_SPURS_POLICY_MODULE_ERROR_AGAIN;
|
||||
}
|
||||
|
||||
u32 index = wnum % 0x10;
|
||||
if (wnum <= 15)
|
||||
{
|
||||
assert((spurs->m.wklA1[wnum] & 0xf) == 0);
|
||||
assert((spurs->m.wklB1[wnum] & 0xf) == 0);
|
||||
assert((spurs->m.wklA[wnum] & 0xf) == 0);
|
||||
assert((spurs->m.wklB[wnum] & 0xf) == 0);
|
||||
spurs->m.wklC1[wnum] = 1;
|
||||
spurs->m.wklD1[wnum] = 0;
|
||||
spurs->m.wklE1[wnum] = 0;
|
||||
@ -861,29 +862,111 @@ s32 spursAddWorkload(
|
||||
spurs->m.wklG1[wnum].wklPriority = *(be_t<u64>*)priorityTable;
|
||||
spurs->m.wklH1[wnum].nameClass = nameClass;
|
||||
spurs->m.wklH1[wnum].nameInstance = nameInstance;
|
||||
memset(spurs->m.wklF1[wnum].unk0, 0, 0x18);
|
||||
// (preserve semaphore id)
|
||||
memset(spurs->m.wklF1[wnum].unk0, 0, 0x18); // clear struct preserving semaphore id
|
||||
memset(spurs->m.wklF1[wnum].unk1, 0, 0x60);
|
||||
if (hook)
|
||||
{
|
||||
spurs->m.wklF1[wnum].hook = hook;
|
||||
spurs->m.wklF1[wnum].hookArg = hookArg;
|
||||
spurs->m.wklE1[wnum] |= 2;
|
||||
}
|
||||
spurs->m.wklY1[wnum] = 0;
|
||||
if (spurs->m.x70.read_relaxed().flags1 & 0x40)
|
||||
spurs->m.wklZ1[wnum] = 0;
|
||||
if ((spurs->m.x70.read_relaxed().flags1 & 0x40) == 0)
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
spurs->m.wklZ1[wnum] = 0;
|
||||
spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : 0;
|
||||
spurs->m.wklZ2[wnum] = 0;
|
||||
spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : minContention;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
assert((spurs->m.wklA[index] & 0xf0) == 0);
|
||||
assert((spurs->m.wklB[index] & 0xf0) == 0);
|
||||
spurs->m.wklC2[index] = 1;
|
||||
spurs->m.wklD2[index] = 0;
|
||||
spurs->m.wklE2[index] = 0;
|
||||
spurs->m.wklG2[index].wklPm = pm;
|
||||
spurs->m.wklG2[index].wklArg = data;
|
||||
spurs->m.wklG2[index].wklSize = size;
|
||||
spurs->m.wklG2[index].wklPriority = *(be_t<u64>*)priorityTable;
|
||||
spurs->m.wklH2[index].nameClass = nameClass;
|
||||
spurs->m.wklH2[index].nameInstance = nameInstance;
|
||||
memset(spurs->m.wklF2[index].unk0, 0, 0x18); // clear struct preserving semaphore id
|
||||
memset(spurs->m.wklF2[index].unk1, 0, 0x60);
|
||||
if (hook)
|
||||
{
|
||||
spurs->m.wklF2[index].hook = hook;
|
||||
spurs->m.wklF2[index].hookArg = hookArg;
|
||||
spurs->m.wklE2[index] |= 2;
|
||||
}
|
||||
spurs->m.wklZ2[index] = 0;
|
||||
}
|
||||
|
||||
u32 pos = ((~wnum * 8) | (wnum / 4)) & 0x1c;
|
||||
spurs->m.wklMaxCnt[index / 4].atomic_op([pos, maxContention](be_t<u32>& v)
|
||||
{
|
||||
v &= ~(0xf << pos);
|
||||
v |= (maxContention > 8 ? 8 : maxContention) << pos;
|
||||
});
|
||||
|
||||
if (wnum <= 15)
|
||||
{
|
||||
spurs->m.x70._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag1
|
||||
}
|
||||
else
|
||||
{
|
||||
spurs->m.x78._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag2
|
||||
}
|
||||
|
||||
spurs->m.x70.atomic_op([wnum](CellSpurs::_sub_x70& x70)
|
||||
{
|
||||
if (x70.unk7 == wnum)
|
||||
{
|
||||
x70.unk7 = 0xff;
|
||||
}
|
||||
});
|
||||
|
||||
u32 res_wkl;
|
||||
spurs->m.wklMsk2.atomic_op_sync([spurs, wnum, &res_wkl](be_t<u32>& v)
|
||||
{
|
||||
CellSpurs::_sub_str3& wkl = wnum <= 15 ? spurs->m.wklG1[wnum] : spurs->m.wklG2[wnum & 0xf];
|
||||
const u32 mask = v.ToLE() & ~(0x80000000 >> wnum);
|
||||
res_wkl = 0;
|
||||
|
||||
for (u32 i = 0, m = 0x80000000, k = 0; i < 32; i++, m >>= 1)
|
||||
{
|
||||
if (mask & m)
|
||||
{
|
||||
CellSpurs::_sub_str3& current = i <= 15 ? spurs->m.wklG1[i] : spurs->m.wklG2[i & 0xf];
|
||||
if (current.wklPm.addr() == wkl.wklPm.addr())
|
||||
{
|
||||
// if a workload with identical policy module found
|
||||
res_wkl = current.wklCopy.read_relaxed();
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
k |= 0x80000000 >> current.wklCopy.read_relaxed();
|
||||
res_wkl = cntlz32(~k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wkl.wklCopy.exchange((u8)res_wkl);
|
||||
});
|
||||
assert(res_wkl <= 31);
|
||||
|
||||
if (wnum <= 15)
|
||||
{
|
||||
spurs->m.wklC1[wnum] = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
spurs->m.wklC2[index] = 2;
|
||||
}
|
||||
|
||||
spurs->m.unk23[5].exchange(-1); // write 0xff byte at 0xbd
|
||||
spurs->m.x70._and_not({ {}, -1 }); // clear byte at 0x72
|
||||
|
||||
return CELL_OK;
|
||||
}
|
||||
|
||||
|
@ -177,7 +177,6 @@ struct CellSpurs
|
||||
u8 unk0[0x20];
|
||||
be_t<u64> sem; // 0x20
|
||||
u8 unk1[0x8];
|
||||
u32 pad;
|
||||
vm::bptr<CellSpursShutdownCompletionEventHook, 1, u64> hook; // 0x30
|
||||
vm::bptr<void, 1, u64> hookArg; // 0x38
|
||||
u8 unk2[0x40];
|
||||
@ -202,6 +201,7 @@ struct CellSpurs
|
||||
vm::bptr<const void, 1, u64> wklPm; // policy module
|
||||
be_t<u64> wklArg; // spu argument
|
||||
be_t<u32> wklSize;
|
||||
atomic_t<u8> wklCopy;
|
||||
be_t<u64> wklPriority;
|
||||
};
|
||||
|
||||
@ -215,9 +215,8 @@ struct CellSpurs
|
||||
|
||||
struct _sub_x70
|
||||
{
|
||||
u8 unk0;
|
||||
u8 unk1;
|
||||
u8 unk2;
|
||||
be_t<u16> wklFlag1; // 0x70
|
||||
u8 unk2; // 0x72
|
||||
u8 unk3;
|
||||
u8 flags1;
|
||||
u8 unk5;
|
||||
@ -227,7 +226,8 @@ struct CellSpurs
|
||||
|
||||
struct _sub_x78
|
||||
{
|
||||
u64 unk;
|
||||
be_t<u16> wklFlag2;
|
||||
u8 unk[6];
|
||||
};
|
||||
|
||||
union
|
||||
@ -239,22 +239,26 @@ struct CellSpurs
|
||||
// real data
|
||||
struct
|
||||
{
|
||||
u8 wklY1[0x10];
|
||||
u8 wklZ1[0x10]; // 0x10
|
||||
u8 wklA1[0x10]; // 0x20
|
||||
u8 wklB1[0x10]; // 0x30
|
||||
u8 wklZ1[0x10]; // 0x0
|
||||
u8 wklZ2[0x10]; // 0x10
|
||||
u8 wklA[0x10]; // 0x20
|
||||
u8 wklB[0x10]; // 0x30
|
||||
u8 wklMinCnt[0x10]; // 0x40
|
||||
u8 unknown0[0x6C - 0x50];
|
||||
atomic_t<u32> wklMaxCnt[4]; // 0x50
|
||||
u8 unknown0[0x6C - 0x60];
|
||||
be_t<u32> unk18; // 0x6C
|
||||
atomic_t<_sub_x70> x70; // 0x70
|
||||
atomic_t<_sub_x78> x78; // 0x78
|
||||
u8 wklC1[0x10]; // 0x80
|
||||
u8 wklD1[0x10]; // 0x90
|
||||
u8 wklE1[0x10]; // 0xA0
|
||||
atomic_t<u32> wklMask;// 0xB0
|
||||
u8 unknown2[0xC0 - 0xB4];
|
||||
atomic_t<u32> wklMsk1;// 0xB0
|
||||
atomic_t<u32> wklMsk2;// 0xB4
|
||||
atomic_t<u8> unk23[8];// 0xB8
|
||||
u8 unk6[0x10]; // 0xC0 (SPU port at 0xc9)
|
||||
u8 unknown1[0x100 - 0x0D0];
|
||||
u8 wklC2[0x10]; // 0xD0
|
||||
u8 wklD2[0x10]; // 0xE0
|
||||
u8 wklE2[0x10]; // 0xF0
|
||||
_sub_str1 wklF1[0x10];// 0x100
|
||||
be_t<u64> unk22; // 0x900
|
||||
u8 unknown7[0x980 - 0x908];
|
||||
@ -294,9 +298,10 @@ struct CellSpurs
|
||||
u8 unknown9[0xE00 - 0xDD0];
|
||||
_sub_str4 wklH1[0x10];// 0xE00
|
||||
_sub_str2 sub3; // 0xF00
|
||||
u8 unknown6[0x1200 - 0xF80];
|
||||
u8 unknown6[0x1000 - 0xF80];
|
||||
_sub_str3 wklG2[0x10];// 0x1000
|
||||
_sub_str1 wklF2[0x10];// 0x1200
|
||||
// ...
|
||||
_sub_str4 wklH2[0x10];// 0x1A00
|
||||
} m;
|
||||
|
||||
// alternative implementation
|
||||
|
@ -112,8 +112,7 @@ s32 cellSyncMutexUnlock(vm::ptr<CellSyncMutex> mutex)
|
||||
return CELL_SYNC_ERROR_ALIGN;
|
||||
}
|
||||
|
||||
mutex->data.read_sync();
|
||||
mutex->data.atomic_op([](CellSyncMutex::data_t& mutex)
|
||||
mutex->data.atomic_op_sync([](CellSyncMutex::data_t& mutex)
|
||||
{
|
||||
mutex.m_rel++;
|
||||
});
|
||||
@ -178,8 +177,7 @@ s32 cellSyncBarrierNotify(vm::ptr<CellSyncBarrier> barrier)
|
||||
return CELL_SYNC_ERROR_ALIGN;
|
||||
}
|
||||
|
||||
barrier->data.read_sync();
|
||||
while (barrier->data.atomic_op(CELL_OK, syncBarrierTryNotifyOp))
|
||||
while (barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp))
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack
|
||||
if (Emu.IsStopped())
|
||||
@ -204,8 +202,7 @@ s32 cellSyncBarrierTryNotify(vm::ptr<CellSyncBarrier> barrier)
|
||||
return CELL_SYNC_ERROR_ALIGN;
|
||||
}
|
||||
|
||||
barrier->data.read_sync();
|
||||
return barrier->data.atomic_op(CELL_OK, syncBarrierTryNotifyOp);
|
||||
return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp);
|
||||
}
|
||||
|
||||
s32 syncBarrierTryWaitOp(CellSyncBarrier::data_t& barrier)
|
||||
@ -239,8 +236,7 @@ s32 cellSyncBarrierWait(vm::ptr<CellSyncBarrier> barrier)
|
||||
return CELL_SYNC_ERROR_ALIGN;
|
||||
}
|
||||
|
||||
barrier->data.read_sync();
|
||||
while (barrier->data.atomic_op(CELL_OK, syncBarrierTryWaitOp))
|
||||
while (barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp))
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack
|
||||
if (Emu.IsStopped())
|
||||
@ -265,8 +261,7 @@ s32 cellSyncBarrierTryWait(vm::ptr<CellSyncBarrier> barrier)
|
||||
return CELL_SYNC_ERROR_ALIGN;
|
||||
}
|
||||
|
||||
barrier->data.read_sync();
|
||||
return barrier->data.atomic_op(CELL_OK, syncBarrierTryWaitOp);
|
||||
return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp);
|
||||
}
|
||||
|
||||
s32 syncRwmInitialize(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer, u32 buffer_size)
|
||||
|
Loading…
x
Reference in New Issue
Block a user