SPU: Utilize Operating System sleep in detected RCHCNT loop

This commit is contained in:
Elad Ashkenazi 2024-07-13 10:48:41 +03:00
parent f3ceebabd9
commit 77e8f9a8ab
7 changed files with 271 additions and 100 deletions

View File

@ -244,7 +244,14 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
case SPU_In_MBox_offs:
{
ch_in_mbox.push(value);
if (!ch_in_mbox.push(value).op_done)
{
if (auto cpu = cpu_thread::get_current())
{
cpu->state += cpu_flag::again;
}
}
return true;
}

View File

@ -6253,10 +6253,15 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
rchcnt_loop.ch_state = vregs[op.rt];
invalidate = false;
}
else if (rchcnt_loop.active && it != rchcnt_loop_all.end())
else if (rchcnt_loop.active)
{
// Success
it->second.active = false;
rchcnt_loop.active = false;
if (it == rchcnt_loop_all.end())
{
rchcnt_loop_all.emplace(pos, rchcnt_loop);
}
}
break;
@ -7167,7 +7172,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none)
{
//add_pattern(false, inst_attr::ch_lop, get_pc - result.entry_point);
add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point);
spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash);
}

View File

@ -3487,9 +3487,28 @@ public:
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
}
llvm::Value* wait_rchcnt(u32 off, u64 inv = 0)
{
auto wait_on_channel = [](spu_thread* _spu, spu_channel* ch, u32 is_read) -> u32
{
if (is_read)
{
ch->pop_wait(*_spu, false);
}
else
{
ch->push_wait(*_spu, 0, false);
}
return ch->get_count();
};
return m_ir->CreateXor(call("wait_on_spu_channel", +wait_on_channel, m_thread, _ptr<u64>(m_thread, off), m_ir->getInt32(!inv)), m_ir->getInt32(inv));
}
void RCHCNT(spu_opcode_t op) //
{
value_t<u32> res;
value_t<u32> res{};
if (m_interp_magn)
{
@ -3532,6 +3551,50 @@ public:
}
}
if (m_inst_attrs[(m_pos - m_base) / 4] == inst_attr::rchcnt_loop)
{
switch (op.ra)
{
case SPU_WrOutMbox:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_mbox), true);
break;
}
case SPU_WrOutIntrMbox:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_intr_mbox), true);
break;
}
case SPU_RdSigNotify1:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr1));
break;
}
case SPU_RdSigNotify2:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr2));
break;
}
case SPU_RdInMbox:
{
auto wait_inbox = [](spu_thread* _spu, spu_channel_4_t* ch) -> u32
{
return ch->pop_wait(*_spu, false), ch->get_count();
};
res.value = call("wait_spu_inbox", +wait_inbox, m_thread, spu_ptr<void*>(&spu_thread::ch_in_mbox));
break;
}
default: break;
}
if (res.value)
{
set_vr(op.rt, insert(splat<u32[4]>(0), 3, res));
return;
}
}
switch (op.ra)
{
case SPU_WrOutMbox:

View File

@ -390,6 +390,7 @@ protected:
omit,
putllc16,
putllc0,
rchcnt_loop,
};
std::vector<inst_attr> m_inst_attrs;

View File

@ -2235,7 +2235,7 @@ void spu_thread::push_snr(u32 number, u32 value)
const bool bitor_bit = !!((snr_config >> number) & 1);
// Redundant, g_use_rtm is checked inside tx_start now.
if (g_use_rtm)
if (g_use_rtm && false)
{
bool channel_notify = false;
bool thread_notify = false;
@ -2295,8 +2295,21 @@ void spu_thread::push_snr(u32 number, u32 value)
});
// Check corresponding SNR register settings
if (channel->push(value, bitor_bit))
auto push_state = channel->push(value, bitor_bit);
if (push_state.old_count < push_state.count)
{
set_events(event_bit);
}
else if (!push_state.op_done)
{
ensure(is_stopped());
if (auto cpu = cpu_thread::get_current())
{
cpu->state += cpu_flag::again;
}
}
ch_events.atomic_op([](ch_events_t& ev)
{
@ -6846,8 +6859,13 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
return false;
}
data = bit_wait;
jostling_value.release(bit_wait);
data = (pop ? bit_occupy : 0) | bit_wait;
if (pop)
{
jostling_value.release(bit_occupy);
}
return true;
}).first;
@ -6862,29 +6880,39 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
if (!(data & bit_wait))
{
return static_cast<u32>(jostling_value);
return static_cast<u32>(pop ? jostling_value.exchange(0) : +data);
}
}
const u32 wait_on_val = static_cast<u32>(((pop ? bit_occupy : 0) | bit_wait) >> 32);
while (true)
{
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32{bit_wait >> 32});
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], wait_on_val);
old = data;
if (!(old & bit_wait))
{
return static_cast<u32>(jostling_value);
return static_cast<u32>(pop ? jostling_value.exchange(0) : +data);
}
if (spu.is_stopped())
{
// Abort waiting and test if a value has been received
if (u64 v = jostling_value.exchange(0); !(v & bit_wait))
if (pop)
{
return static_cast<u32>(v);
if (u64 v = jostling_value.exchange(0); !(v & bit_occupy))
{
return static_cast<u32>(v);
}
ensure(data.fetch_and(~(bit_wait | bit_occupy)) & bit_wait);
}
else
{
data.bit_test_reset(off_wait);
}
ensure(data.bit_test_reset(off_wait));
return -1;
}
}
@ -6898,8 +6926,8 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
{
if (data & bit_count) [[unlikely]]
{
jostling_value.release(push ? value : static_cast<u32>(data));
data |= bit_wait;
jostling_value.release(push ? (bit_occupy | value) : static_cast<u32>(data));
data |= (push ? bit_occupy : 0) | bit_wait;
}
else if (push)
{
@ -6919,11 +6947,6 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
{
if (!(state & bit_wait))
{
if (!push)
{
data &= ~bit_count;
}
return true;
}
@ -6935,18 +6958,12 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
{
if (!(state & bit_wait))
{
if (!push)
{
data &= ~bit_count;
}
return true;
}
if (spu.is_stopped())
{
data &= ~bit_wait;
return false;
return !data.bit_test_reset(off_wait);
}
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32(state >> 32));
@ -6954,12 +6971,17 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
}
}
std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu, bool pop_value)
{
auto old = values.fetch_op([&](sync_var_t& data)
{
if (data.count != 0)
{
if (!pop_value)
{
return;
}
data.waiting = 0;
data.count--;
@ -6969,8 +6991,8 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
}
else
{
data.waiting = 1;
jostling_value.release(bit_wait);
data.waiting = (pop_value ? bit_occupy : 0) | bit_wait;
jostling_value.release(pop_value ? jostling_flag : 0);
}
});
@ -6979,7 +7001,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
return {old.count, old.value0};
}
old.waiting = 1;
old.waiting = (pop_value ? bit_occupy : 0) | bit_wait;
for (int i = 0; i < 10; i++)
{
@ -6987,7 +7009,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
if (!atomic_storage<u8>::load(values.raw().waiting))
{
return {1, static_cast<u32>(jostling_value)};
return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
}
}
@ -6996,26 +7018,91 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&values)[0], u32(u64(std::bit_cast<u128>(old))));
old = values;
if (!old.waiting)
if (~old.waiting & bit_wait)
{
// Count of 1 because a value has been inserted and popped in the same step.
return {1, static_cast<u32>(jostling_value)};
return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
}
if (spu.is_stopped())
{
// Abort waiting and test if a value has been received
if (u64 v = jostling_value.exchange(0); !(v & bit_wait))
if (pop_value)
{
return {1, static_cast<u32>(v)};
// Abort waiting and test if a value has been received
if (u64 v = jostling_value.exchange(0); !(v & jostling_flag))
{
return {1, static_cast<u32>(v)};
}
}
if (~atomic_storage<u8>::exchange(values.raw().waiting, 0) & bit_wait)
{
// Count of 1 because a value has been inserted and popped in the same step.
return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
}
ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
return {};
}
}
}
spu_channel_op_state spu_channel_4_t::push(u32 value, bool postpone_notify)
{
while (true)
{
value3.release(value);
const auto [old, pushed_to_data] = values.fetch_op([&](sync_var_t& data)
{
if (data.waiting & bit_occupy)
{
return false;
}
switch (data.count++)
{
case 0: data.value0 = value; break;
case 1: data.value1 = value; break;
case 2: data.value2 = value; break;
default:
{
data.count = 4;
data.value3_inval++; // Ensure the SPU reads the most recent value3 write in try_pop by re-loading
break;
}
}
return true;
});
if (!pushed_to_data)
{
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
if (!jostling_value.compare_and_swap_test(jostling_flag, value))
{
// Other thread has inserted a value through jostling_value, retry
continue;
}
}
if (old.waiting & bit_wait)
{
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
if (~atomic_storage<u8>::exchange(values.raw().waiting, 0) & bit_wait)
{
// Could be fatal or at emulation stopping, to be checked by the caller
return { old.count, old.count, false, false };
}
if (!postpone_notify)
{
utils::bless<atomic_t<u32>>(&values)[0].notify_one();
}
}
return { old.count, std::min<u8>(static_cast<u8>(old.count + 1), 4), !!(old.waiting & bit_wait), true };
}
}
template <>
void fmt_class_string<spu_channel>::format(std::string& out, u64 arg)
{

View File

@ -176,6 +176,14 @@ enum : u32
SPU_FAKE_BASE_ADDR = 0xE8000000,
};
struct spu_channel_op_state
{
u8 old_count;
u8 count;
bool notify;
bool op_done;
};
struct alignas(16) spu_channel
{
// Low 32 bits contain value
@ -186,8 +194,10 @@ struct alignas(16) spu_channel
public:
static constexpr u32 off_wait = 32;
static constexpr u32 off_occupy = 32;
static constexpr u32 off_count = 63;
static constexpr u64 bit_wait = 1ull << off_wait;
static constexpr u64 bit_occupy = 1ull << off_occupy;
static constexpr u64 bit_count = 1ull << off_count;
// Returns true on success
@ -207,20 +217,21 @@ public:
// Push unconditionally, may require notification
// Performing bitwise OR with previous value if specified, otherwise overwiting it
bool push(u32 value, bool to_or = false)
// Returns old count and new count
spu_channel_op_state push(u32 value, bool to_or = false, bool postpone_notify = false)
{
while (true)
{
const auto [old, pushed_to_data] = data.fetch_op([&](u64& data)
{
if (data == bit_wait)
if (data & bit_occupy)
{
return false;
}
if (to_or)
{
data |= bit_count | value;
data = bit_count | (static_cast<u32>(data) | value);
}
else
{
@ -233,26 +244,42 @@ public:
if (!pushed_to_data)
{
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
if (!jostling_value.compare_and_swap_test(bit_wait, value))
if (!jostling_value.compare_and_swap_test(bit_occupy, value))
{
// Other thread has inserted a value through jostling_value, retry
continue;
}
}
if (old & bit_wait)
{
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value)
ensure(this->data.bit_test_reset(off_wait));
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
if (!this->data.bit_test_reset(off_wait))
{
// Could be fatal or at emulation stopping, to be checked by the caller
return { (old & bit_count) == 0, 0, false, false };
}
if (!postpone_notify)
{
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
}
}
// Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs
return !pushed_to_data || (old & bit_count) == 0;
return { (old & bit_count) == 0, 1, (old & bit_wait) != 0, true };
}
}
void notify()
{
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
}
// Returns true on success
bool try_pop(u32& out)
{
return data.fetch_op([&](u64& data)
return data.fetch_op([&out](u64& data)
{
if (data & bit_count) [[likely]]
{
@ -284,7 +311,7 @@ public:
u32 pop()
{
// Value is not cleared and may be read again
constexpr u64 mask = bit_count | bit_wait;
constexpr u64 mask = bit_count | bit_occupy;
const u64 old = data.fetch_op([&](u64& data)
{
@ -295,10 +322,10 @@ public:
return;
}
data &= ~mask;
data &= ~(mask | bit_wait);
});
if ((old & mask) == mask)
if (old & bit_wait)
{
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
}
@ -324,7 +351,7 @@ public:
u32 get_count() const
{
return static_cast<u32>(data >> off_count);
return (data & bit_count) ? 1 : 0;
}
};
@ -344,59 +371,26 @@ struct spu_channel_4_t
atomic_t<u64> jostling_value;
atomic_t<u32> value3;
static constexpr u32 off_wait = 32;
static constexpr u32 off_wait = 0;
static constexpr u32 off_occupy = 7;
static constexpr u64 bit_wait = 1ull << off_wait;
static constexpr u64 bit_occupy = 1ull << off_occupy;
static constexpr u64 jostling_flag = 1ull << 63;
void clear()
{
values.release({});
jostling_value.release(0);
value3.release(0);
}
// push unconditionally (overwriting latest value), returns true if needs signaling
void push(u32 value)
// returning if could be aborted (operation failed unexpectedly)
spu_channel_op_state push(u32 value, bool postpone_notify = false);
void notify()
{
while (true)
{
value3.release(value);
const auto [old, pushed_to_data] = values.fetch_op([&](sync_var_t& data)
{
if (data.waiting)
{
return false;
}
switch (data.count++)
{
case 0: data.value0 = value; break;
case 1: data.value1 = value; break;
case 2: data.value2 = value; break;
default:
{
data.count = 4;
data.value3_inval++; // Ensure the SPU reads the most recent value3 write in try_pop by re-loading
break;
}
}
return true;
});
if (!pushed_to_data)
{
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
if (!jostling_value.compare_and_swap_test(bit_wait, value))
{
// Other thread has inserted a value through jostling_value, retry
continue;
}
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
utils::bless<atomic_t<u32>>(&values)[0].notify_one();
}
return;
}
utils::bless<atomic_t<u32>>(&values)[0].notify_one();
}
// returns non-zero value on success: queue size before removal
@ -422,7 +416,7 @@ struct spu_channel_4_t
}
// Returns [previous count, value] (if aborted 0 count is returned)
std::pair<u32, u32> pop_wait(cpu_thread& spu);
std::pair<u32, u32> pop_wait(cpu_thread& spu, bool pop_value = true);
// returns current queue size without modification
uint try_read(u32 (&out)[4]) const
@ -443,7 +437,7 @@ struct spu_channel_4_t
u32 get_count() const
{
return std::as_const(values).raw().count;
return atomic_storage<u8>::load(std::as_const(values).raw().count);
}
void set_values(u32 count, u32 value0, u32 value1 = 0, u32 value2 = 0, u32 value3 = 0)

View File

@ -1749,9 +1749,23 @@ error_code sys_spu_thread_write_spu_mb(ppu_thread& ppu, u32 id, u32 value)
return CELL_ESRCH;
}
std::lock_guard lock(group->mutex);
spu_channel_op_state state{};
{
std::lock_guard lock(group->mutex);
thread->ch_in_mbox.push(value);
state = thread->ch_in_mbox.push(value, true);
}
if (!state.op_done)
{
ppu.state += cpu_flag::again;
return {};
}
if (state.notify)
{
thread->ch_in_mbox.notify();
}
return CELL_OK;
}