spursAddWorkload

This commit is contained in:
Nekotekina 2014-09-28 23:10:13 +04:00
parent ae17ef4d68
commit c53a822c4e
4 changed files with 161 additions and 41 deletions

View File

@ -103,6 +103,36 @@ public:
}
}
// perform atomic operation on data with additional memory barrier
template<typename FT> __forceinline void atomic_op_sync(const FT atomic_proc) volatile
{
T old = read_sync();
while (true)
{
T _new = old;
atomic_proc(_new); // function should accept reference to T type
const T val = compare_and_swap(old, _new);
if ((atomic_type&)val == (atomic_type&)old) return;
old = val;
}
}
// perform atomic operation on data with additional memory barrier and special exit condition (if intermediate result != proceed_value)
template<typename RT, typename FT> __forceinline RT atomic_op_sync(const RT proceed_value, const FT atomic_proc) volatile
{
T old = read_sync();
while (true)
{
T _new = old;
RT res = (RT)atomic_proc(_new); // function should accept reference to T type and return some value
if (res != proceed_value) return res;
const T val = compare_and_swap(old, _new);
if ((atomic_type&)val == (atomic_type&)old) return proceed_value;
old = val;
}
}
// perform non-atomic operation on data directly without memory barriers
template<typename FT> __forceinline void direct_op(const FT direct_proc) volatile
{
direct_proc((T&)data);
@ -122,6 +152,13 @@ public:
return (T&)res;
}
// atomic bitwise AND NOT (inverts right argument), returns previous data
__forceinline const T _and_not(const T& right) volatile
{
const atomic_type res = InterlockedAnd(&data, ~(atomic_type&)(right));
return (T&)res;
}
// atomic bitwise XOR, returns previous data
__forceinline const T _xor(const T& right) volatile
{

View File

@ -92,7 +92,7 @@ s64 spursInit(
if (!isSecond)
{
spurs->m.wklMask.write_relaxed(be_t<u32>::make(0xffff));
spurs->m.wklMsk1.write_relaxed(be_t<u32>::make(0xffff));
}
spurs->m.unk6[0xC] = 0;
spurs->m.unk6[0xD] = 0;
@ -762,7 +762,7 @@ s64 cellSpursGetInfo(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursInfo> info)
s64 spursWakeUp(vm::ptr<CellSpurs> spurs)
{
#ifdef PRX_DEBUG
#ifdef PRX_DEBUG_XXX
return cb_call<s32, vm::ptr<CellSpurs>>(GetCurrentPPUThread(), libsre + 0x84D8, libsre_rtoc, spurs);
#endif
if (!spurs)
@ -809,7 +809,7 @@ s32 spursAddWorkload(
vm::ptr<CellSpursShutdownCompletionEventHook> hook,
vm::ptr<void> hookArg)
{
#ifdef PRX_DEBUG
#ifdef PRX_DEBUG_XXX
return cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<u32>, vm::ptr<const void>, u32, u64, u32, u32, u32, u32, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0x96EC, libsre_rtoc,
spurs, wid, pm, size, data, Memory.RealToVirtualAddr(priorityTable), minContention, maxContention,
nameClass.addr(), nameInstance.addr(), hook.addr(), hookArg.addr());
@ -833,7 +833,7 @@ s32 spursAddWorkload(
u32 wnum;
const u32 wmax = spurs->m.x70.read_relaxed().flags1 & 0x40 ? 0x20 : 0x10; // check isSecond (TODO: check if can be changed)
spurs->m.wklMask.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
spurs->m.wklMsk1.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
{
wnum = cntlz32(~(u32)value); // found empty position
if (wnum < wmax)
@ -848,10 +848,11 @@ s32 spursAddWorkload(
return CELL_SPURS_POLICY_MODULE_ERROR_AGAIN;
}
u32 index = wnum % 0x10;
if (wnum <= 15)
{
assert((spurs->m.wklA1[wnum] & 0xf) == 0);
assert((spurs->m.wklB1[wnum] & 0xf) == 0);
assert((spurs->m.wklA[wnum] & 0xf) == 0);
assert((spurs->m.wklB[wnum] & 0xf) == 0);
spurs->m.wklC1[wnum] = 1;
spurs->m.wklD1[wnum] = 0;
spurs->m.wklE1[wnum] = 0;
@ -861,29 +862,111 @@ s32 spursAddWorkload(
spurs->m.wklG1[wnum].wklPriority = *(be_t<u64>*)priorityTable;
spurs->m.wklH1[wnum].nameClass = nameClass;
spurs->m.wklH1[wnum].nameInstance = nameInstance;
memset(spurs->m.wklF1[wnum].unk0, 0, 0x18);
// (preserve semaphore id)
memset(spurs->m.wklF1[wnum].unk0, 0, 0x18); // clear struct preserving semaphore id
memset(spurs->m.wklF1[wnum].unk1, 0, 0x60);
if (hook)
{
spurs->m.wklF1[wnum].hook = hook;
spurs->m.wklF1[wnum].hookArg = hookArg;
spurs->m.wklE1[wnum] |= 2;
}
spurs->m.wklY1[wnum] = 0;
if (spurs->m.x70.read_relaxed().flags1 & 0x40)
spurs->m.wklZ1[wnum] = 0;
if ((spurs->m.x70.read_relaxed().flags1 & 0x40) == 0)
{
}
else
{
spurs->m.wklZ1[wnum] = 0;
spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : 0;
spurs->m.wklZ2[wnum] = 0;
spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : minContention;
}
}
else
{
assert((spurs->m.wklA[index] & 0xf0) == 0);
assert((spurs->m.wklB[index] & 0xf0) == 0);
spurs->m.wklC2[index] = 1;
spurs->m.wklD2[index] = 0;
spurs->m.wklE2[index] = 0;
spurs->m.wklG2[index].wklPm = pm;
spurs->m.wklG2[index].wklArg = data;
spurs->m.wklG2[index].wklSize = size;
spurs->m.wklG2[index].wklPriority = *(be_t<u64>*)priorityTable;
spurs->m.wklH2[index].nameClass = nameClass;
spurs->m.wklH2[index].nameInstance = nameInstance;
memset(spurs->m.wklF2[index].unk0, 0, 0x18); // clear struct preserving semaphore id
memset(spurs->m.wklF2[index].unk1, 0, 0x60);
if (hook)
{
spurs->m.wklF2[index].hook = hook;
spurs->m.wklF2[index].hookArg = hookArg;
spurs->m.wklE2[index] |= 2;
}
spurs->m.wklZ2[index] = 0;
}
u32 pos = ((~wnum * 8) | (wnum / 4)) & 0x1c;
spurs->m.wklMaxCnt[index / 4].atomic_op([pos, maxContention](be_t<u32>& v)
{
v &= ~(0xf << pos);
v |= (maxContention > 8 ? 8 : maxContention) << pos;
});
if (wnum <= 15)
{
spurs->m.x70._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag1
}
else
{
spurs->m.x78._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag2
}
spurs->m.x70.atomic_op([wnum](CellSpurs::_sub_x70& x70)
{
if (x70.unk7 == wnum)
{
x70.unk7 = 0xff;
}
});
u32 res_wkl;
spurs->m.wklMsk2.atomic_op_sync([spurs, wnum, &res_wkl](be_t<u32>& v)
{
CellSpurs::_sub_str3& wkl = wnum <= 15 ? spurs->m.wklG1[wnum] : spurs->m.wklG2[wnum & 0xf];
const u32 mask = v.ToLE() & ~(0x80000000 >> wnum);
res_wkl = 0;
for (u32 i = 0, m = 0x80000000, k = 0; i < 32; i++, m >>= 1)
{
if (mask & m)
{
CellSpurs::_sub_str3& current = i <= 15 ? spurs->m.wklG1[i] : spurs->m.wklG2[i & 0xf];
if (current.wklPm.addr() == wkl.wklPm.addr())
{
// if a workload with identical policy module found
res_wkl = current.wklCopy.read_relaxed();
break;
}
else
{
k |= 0x80000000 >> current.wklCopy.read_relaxed();
res_wkl = cntlz32(~k);
}
}
}
wkl.wklCopy.exchange((u8)res_wkl);
});
assert(res_wkl <= 31);
if (wnum <= 15)
{
spurs->m.wklC1[wnum] = 2;
}
else
{
spurs->m.wklC2[index] = 2;
}
spurs->m.unk23[5].exchange(-1); // write 0xff byte at 0xbd
spurs->m.x70._and_not({ {}, -1 }); // clear byte at 0x72
return CELL_OK;
}

View File

@ -177,7 +177,6 @@ struct CellSpurs
u8 unk0[0x20];
be_t<u64> sem; // 0x20
u8 unk1[0x8];
u32 pad;
vm::bptr<CellSpursShutdownCompletionEventHook, 1, u64> hook; // 0x30
vm::bptr<void, 1, u64> hookArg; // 0x38
u8 unk2[0x40];
@ -202,6 +201,7 @@ struct CellSpurs
vm::bptr<const void, 1, u64> wklPm; // policy module
be_t<u64> wklArg; // spu argument
be_t<u32> wklSize;
atomic_t<u8> wklCopy;
be_t<u64> wklPriority;
};
@ -215,9 +215,8 @@ struct CellSpurs
struct _sub_x70
{
u8 unk0;
u8 unk1;
u8 unk2;
be_t<u16> wklFlag1; // 0x70
u8 unk2; // 0x72
u8 unk3;
u8 flags1;
u8 unk5;
@ -227,7 +226,8 @@ struct CellSpurs
struct _sub_x78
{
u64 unk;
be_t<u16> wklFlag2;
u8 unk[6];
};
union
@ -239,22 +239,26 @@ struct CellSpurs
// real data
struct
{
u8 wklY1[0x10];
u8 wklZ1[0x10]; // 0x10
u8 wklA1[0x10]; // 0x20
u8 wklB1[0x10]; // 0x30
u8 wklZ1[0x10]; // 0x0
u8 wklZ2[0x10]; // 0x10
u8 wklA[0x10]; // 0x20
u8 wklB[0x10]; // 0x30
u8 wklMinCnt[0x10]; // 0x40
u8 unknown0[0x6C - 0x50];
atomic_t<u32> wklMaxCnt[4]; // 0x50
u8 unknown0[0x6C - 0x60];
be_t<u32> unk18; // 0x6C
atomic_t<_sub_x70> x70; // 0x70
atomic_t<_sub_x78> x78; // 0x78
u8 wklC1[0x10]; // 0x80
u8 wklD1[0x10]; // 0x90
u8 wklE1[0x10]; // 0xA0
atomic_t<u32> wklMask;// 0xB0
u8 unknown2[0xC0 - 0xB4];
atomic_t<u32> wklMsk1;// 0xB0
atomic_t<u32> wklMsk2;// 0xB4
atomic_t<u8> unk23[8];// 0xB8
u8 unk6[0x10]; // 0xC0 (SPU port at 0xc9)
u8 unknown1[0x100 - 0x0D0];
u8 wklC2[0x10]; // 0xD0
u8 wklD2[0x10]; // 0xE0
u8 wklE2[0x10]; // 0xF0
_sub_str1 wklF1[0x10];// 0x100
be_t<u64> unk22; // 0x900
u8 unknown7[0x980 - 0x908];
@ -294,9 +298,10 @@ struct CellSpurs
u8 unknown9[0xE00 - 0xDD0];
_sub_str4 wklH1[0x10];// 0xE00
_sub_str2 sub3; // 0xF00
u8 unknown6[0x1200 - 0xF80];
u8 unknown6[0x1000 - 0xF80];
_sub_str3 wklG2[0x10];// 0x1000
_sub_str1 wklF2[0x10];// 0x1200
// ...
_sub_str4 wklH2[0x10];// 0x1A00
} m;
// alternative implementation

View File

@ -112,8 +112,7 @@ s32 cellSyncMutexUnlock(vm::ptr<CellSyncMutex> mutex)
return CELL_SYNC_ERROR_ALIGN;
}
mutex->data.read_sync();
mutex->data.atomic_op([](CellSyncMutex::data_t& mutex)
mutex->data.atomic_op_sync([](CellSyncMutex::data_t& mutex)
{
mutex.m_rel++;
});
@ -178,8 +177,7 @@ s32 cellSyncBarrierNotify(vm::ptr<CellSyncBarrier> barrier)
return CELL_SYNC_ERROR_ALIGN;
}
barrier->data.read_sync();
while (barrier->data.atomic_op(CELL_OK, syncBarrierTryNotifyOp))
while (barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp))
{
std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack
if (Emu.IsStopped())
@ -204,8 +202,7 @@ s32 cellSyncBarrierTryNotify(vm::ptr<CellSyncBarrier> barrier)
return CELL_SYNC_ERROR_ALIGN;
}
barrier->data.read_sync();
return barrier->data.atomic_op(CELL_OK, syncBarrierTryNotifyOp);
return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp);
}
s32 syncBarrierTryWaitOp(CellSyncBarrier::data_t& barrier)
@ -239,8 +236,7 @@ s32 cellSyncBarrierWait(vm::ptr<CellSyncBarrier> barrier)
return CELL_SYNC_ERROR_ALIGN;
}
barrier->data.read_sync();
while (barrier->data.atomic_op(CELL_OK, syncBarrierTryWaitOp))
while (barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp))
{
std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack
if (Emu.IsStopped())
@ -265,8 +261,7 @@ s32 cellSyncBarrierTryWait(vm::ptr<CellSyncBarrier> barrier)
return CELL_SYNC_ERROR_ALIGN;
}
barrier->data.read_sync();
return barrier->data.atomic_op(CELL_OK, syncBarrierTryWaitOp);
return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp);
}
s32 syncRwmInitialize(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer, u32 buffer_size)