mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-29 00:33:01 +00:00
[WIP] SPU: Channel Loop Pattern Detection
This commit is contained in:
parent
a1d2a72a78
commit
aa5284c8f1
@ -2493,6 +2493,12 @@ bool reg_state_t::is_const() const
|
||||
return !!(flag & vf::is_const);
|
||||
}
|
||||
|
||||
bool reg_state_t::compare_tags(const reg_state_t& rhs) const
|
||||
{
|
||||
// Compare by tag, address of instruction origin
|
||||
return tag == rhs.tag && origin == rhs.origin && is_instruction == rhs.is_instruction;
|
||||
}
|
||||
|
||||
bool reg_state_t::operator&(vf to_test) const
|
||||
{
|
||||
return this->flag.all_of(to_test);
|
||||
@ -2521,7 +2527,7 @@ bool reg_state_t::operator==(const reg_state_t& r) const
|
||||
return false;
|
||||
}
|
||||
|
||||
return (flag & vf::is_const ? value == r.value : (tag == r.tag && known_ones == r.known_ones && known_zeroes == r.known_zeroes));
|
||||
return (flag & vf::is_const ? value == r.value : (compare_tags(r) && known_ones == r.known_ones && known_zeroes == r.known_zeroes));
|
||||
}
|
||||
|
||||
bool reg_state_t::operator==(u32 imm) const
|
||||
@ -2558,7 +2564,7 @@ bool reg_state_t::compare_with_mask_indifference(const reg_state_t& r, u32 mask_
|
||||
const auto _this = this->downgrade();
|
||||
const auto _r = r.downgrade();
|
||||
|
||||
const bool is_mask_equal = (_this.tag == _r.tag && _this.flag == _r.flag && !((_this.known_ones ^ _r.known_ones) & mask_bits) && !((_this.known_zeroes ^ _r.known_zeroes) & mask_bits));
|
||||
const bool is_mask_equal = (_this.compare_tags(_r) && _this.flag == _r.flag && !((_this.known_ones ^ _r.known_ones) & mask_bits) && !((_this.known_zeroes ^ _r.known_zeroes) & mask_bits));
|
||||
|
||||
return is_mask_equal;
|
||||
}
|
||||
@ -2611,7 +2617,7 @@ bool reg_state_t::unequal_with_mask_indifference(const reg_state_t& r, u32 mask_
|
||||
const auto _this = this->downgrade();
|
||||
const auto _r = r.downgrade();
|
||||
|
||||
const bool is_base_value_equal = (_this.tag == _r.tag && _this.flag == _r.flag);
|
||||
const bool is_base_value_equal = (_this.compare_tags(_r) && _this.flag == _r.flag);
|
||||
|
||||
if (!is_base_value_equal)
|
||||
{
|
||||
@ -2662,6 +2668,7 @@ reg_state_t reg_state_t::merge(const reg_state_t& rhs, u32 current_pc) const
|
||||
// Success (create new value tag)
|
||||
res.tag = reg_state_t::alloc_tag();
|
||||
res.origin = current_pc;
|
||||
res.is_instruction = false;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
@ -4747,29 +4754,38 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
}
|
||||
|
||||
auto sort_breakig_reasons = [](const std::array<atomic_t<u64>, 128>& breaking_reason)
|
||||
{
|
||||
std::vector<std::pair<u32, u64>> map;
|
||||
for (usz i = 0; i < breaking_reason.size(); i++)
|
||||
{
|
||||
if (u64 v = breaking_reason[i])
|
||||
{
|
||||
map.emplace_back(i, v);
|
||||
}
|
||||
}
|
||||
|
||||
std::stable_sort(map.begin(), map.end(), FN(x.second > y.second));
|
||||
return map;
|
||||
};
|
||||
|
||||
struct putllc16_statistics_t
|
||||
{
|
||||
atomic_t<u64> all = 0;
|
||||
atomic_t<u64> single = 0;
|
||||
atomic_t<u64> nowrite = 0;
|
||||
std::array<atomic_t<u64>, 128> breaking_reason{};
|
||||
|
||||
std::vector<std::pair<u32, u64>> get_reasons()
|
||||
{
|
||||
std::vector<std::pair<u32, u64>> map;
|
||||
for (usz i = 0; i < breaking_reason.size(); i++)
|
||||
{
|
||||
if (u64 v = breaking_reason[i])
|
||||
{
|
||||
map.emplace_back(i, v);
|
||||
}
|
||||
}
|
||||
|
||||
std::stable_sort(map.begin(), map.end(), FN(x.second > y.second));
|
||||
return map;
|
||||
}
|
||||
};
|
||||
|
||||
struct rchcnt_statistics_t
|
||||
{
|
||||
atomic_t<u64> all = 0;
|
||||
atomic_t<u64> single = 0;
|
||||
std::array<atomic_t<u64>, 128> breaking_reason{};
|
||||
};
|
||||
|
||||
// Pattern structures
|
||||
|
||||
struct atomic16_t
|
||||
{
|
||||
bool active = false; // GETLLAR happened
|
||||
@ -4824,6 +4840,29 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
};
|
||||
|
||||
struct rchcnt_loop_t
|
||||
{
|
||||
bool active = false; // RDCH/RCHCNT happened
|
||||
bool failed = false; // needc this flag to distinguish start of the pattern vs failed pattern (they begin and end of the same address)
|
||||
bool conditioned = false; // needc this flag to distinguish start of the pattern vs failed pattern (they begin and end of the same address)
|
||||
u32 channel = 128;
|
||||
u32 read_pc = SPU_LS_SIZE; // PC of RDCH or RCHCNT (that encloses the loop)
|
||||
reg_state_t ch_state{+vf::is_null}; // Channel stat, example: RCNCNT ch_state, MFC_Cmd
|
||||
reg_state_t ch_product{+vf::is_null}; // Optional comparison state for channl state, example: CEQI ch_product, ch_state, 1
|
||||
bool product_test_negate = false; // Compare the opposite way, such as: CEQI ch_product, ch_state, 0 which turns 0 t -1 and 1 to 0
|
||||
std::basic_string<u32> origins;
|
||||
u32 branch_pc = SPU_LS_SIZE; // Where the loop branch is located
|
||||
u32 branch_target = SPU_LS_SIZE; // The target of the loop branch
|
||||
|
||||
// Return old state for error reporting
|
||||
rchcnt_loop_t discard()
|
||||
{
|
||||
const rchcnt_loop_t old = *this;
|
||||
*this = rchcnt_loop_t{};
|
||||
return old;
|
||||
}
|
||||
};
|
||||
|
||||
// Reset tags
|
||||
reg_state_t::alloc_tag(true);
|
||||
|
||||
@ -4843,6 +4882,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
// PUTLLC16 optimization analysis tracker
|
||||
atomic16_t atomic16{};
|
||||
|
||||
// RDCH/RCHCNT Loop analysis tracker
|
||||
rchcnt_loop_t rchcnt_loop{};
|
||||
|
||||
block_reg_state_iterator(u32 _pc, usz _parent_iterator_index = umax, usz _parent_target_index = 0) noexcept
|
||||
: pc(_pc)
|
||||
, parent_iterator_index(_parent_iterator_index)
|
||||
@ -4854,6 +4896,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
std::vector<block_reg_state_iterator> reg_state_it;
|
||||
|
||||
std::map<u32, atomic16_t> atomic16_all; // RdAtomicStat location -> atomic loop optimization state
|
||||
std::map<u32, rchcnt_loop_t> rchcnt_loop_all; // RDCH/RCHCNT location -> channel read loop optimization state
|
||||
std::map<u32, bool> getllar_starts; // True for failed loops
|
||||
std::map<u32, bool> run_on_block;
|
||||
std::map<u32, bool> logged_block;
|
||||
@ -4861,6 +4904,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
std::array<reg_state_t, s_reg_max>* true_state_walkby = nullptr;
|
||||
|
||||
atomic16_t dummy16{};
|
||||
rchcnt_loop_t dummy_loop{};
|
||||
|
||||
bool likely_putllc_loop = false;
|
||||
bool had_putllc_evaluation = false;
|
||||
@ -4913,6 +4957,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
const bool is_pattern_match = wf == 1;
|
||||
|
||||
dummy16.active = false;
|
||||
dummy_loop.active = false;
|
||||
|
||||
if (!is_form_block && wa == bpc)
|
||||
{
|
||||
@ -4966,7 +5011,6 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
if (f.flag & vf::is_null)
|
||||
{
|
||||
// Evaluate locally
|
||||
//f.tag = reg_state_t::alloc_tag();
|
||||
f.flag -= vf::is_null;
|
||||
}
|
||||
}
|
||||
@ -4974,6 +5018,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
|
||||
auto& vregs = is_form_block ? infos[bpc]->local_state : *true_state_walkby;
|
||||
auto& atomic16 = is_pattern_match ? ::at32(reg_state_it, wi).atomic16 : dummy16;
|
||||
auto& rchcnt_loop = is_pattern_match ? ::at32(reg_state_it, wi).rchcnt_loop : dummy_loop;
|
||||
|
||||
const u32 pos = wa;
|
||||
|
||||
@ -5001,7 +5046,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
|
||||
std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", pos, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc);
|
||||
|
||||
const auto values = g_fxo->get<putllc16_statistics_t>().get_reasons();
|
||||
const auto values = sort_breakig_reasons(g_fxo->get<putllc16_statistics_t>().breaking_reason);
|
||||
|
||||
std::string tracing = "Top Breaking Reasons:";
|
||||
|
||||
@ -5035,14 +5080,72 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
|
||||
fmt::append(tracing, " of %d failures", fail_count);
|
||||
spu_log.notice("%s\n%s", break_error, tracing);
|
||||
|
||||
if (cause == 17 || cause == 20)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const auto break_channel_pattern = [&](u32 cause, rchcnt_loop_t previous)
|
||||
{
|
||||
if (previous.active && rchcnt_loop_all.contains(previous.read_pc))
|
||||
{
|
||||
const bool is_first = !std::exchange(rchcnt_loop_all[previous.read_pc].failed, true);
|
||||
|
||||
if (!is_first)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
g_fxo->get<rchcnt_statistics_t>().breaking_reason[cause]++;
|
||||
|
||||
if (!spu_log.notice)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
std::string break_error = fmt::format("Channel pattern breakage [%x cause=%u] (read_pc=0x%x)", pos, cause, previous.read_pc);
|
||||
|
||||
const auto values = sort_breakig_reasons(g_fxo->get<rchcnt_statistics_t>().breaking_reason);
|
||||
|
||||
std::string tracing = "Top Breaking Reasons:";
|
||||
|
||||
usz i = 0;
|
||||
usz fail_count = 0;
|
||||
bool switched_to_minimal = false;
|
||||
|
||||
for (auto it = values.begin(); it != values.end(); i++, it++)
|
||||
{
|
||||
fail_count += it->second;
|
||||
|
||||
if (i >= 12)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i < 8 && it->second > 1)
|
||||
{
|
||||
fmt::append(tracing, " [cause=%u, n=%d]", it->first, it->second);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!std::exchange(switched_to_minimal, true))
|
||||
{
|
||||
fmt::append(tracing, "; More:");
|
||||
}
|
||||
|
||||
fmt::append(tracing, " %u", it->first);
|
||||
}
|
||||
}
|
||||
|
||||
fmt::append(tracing, " of %d failures", fail_count);
|
||||
spu_log.notice("%s\n%s", break_error, tracing);
|
||||
}
|
||||
};
|
||||
|
||||
const auto break_all_patterns = [&](u32 cause)
|
||||
{
|
||||
break_putllc16(cause, atomic16.discard());
|
||||
break_channel_pattern(cause, rchcnt_loop.discard());
|
||||
};
|
||||
|
||||
const auto calculate_absolute_ls_difference = [](u32 addr1, u32 addr2)
|
||||
{
|
||||
addr1 &= SPU_LS_MASK_1;
|
||||
@ -5056,6 +5159,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
|
||||
bool called_next = false;
|
||||
|
||||
u32 data{};
|
||||
|
||||
const auto next_block = [&]()
|
||||
{
|
||||
if (called_next)
|
||||
@ -5279,6 +5384,11 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
break_putllc16(40, reg_state_it[stackframe_it].atomic16.discard());
|
||||
}
|
||||
|
||||
if (reg_state_it[stackframe_it].rchcnt_loop.active)
|
||||
{
|
||||
break_channel_pattern(40, reg_state_it[stackframe_it].rchcnt_loop.discard());
|
||||
}
|
||||
|
||||
// Allow the block to run only once, to avoid unnecessary iterations
|
||||
run_on_block[target_pc / 4] = true;
|
||||
}
|
||||
@ -5294,8 +5404,23 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
}
|
||||
|
||||
const u32 previous_pc = m_bbs.at(reg_state_it[stackframe_it].pc).size * 4 + reg_state_it[stackframe_it].pc - 4;
|
||||
|
||||
bool may_return = previous_pc + 4 != entry_point + result.data.size() * 4 && (m_ret_info[(previous_pc / 4) + 1] || m_entry_info[previous_pc / 4]);
|
||||
|
||||
if (!may_return)
|
||||
{
|
||||
const u32 branch_target = op_branch_targets(previous_pc, spu_opcode_t{data})[0];
|
||||
|
||||
if (branch_target == umax || branch_target >= entry_point + result.data.size() * 4 || branch_target < entry_point)
|
||||
{
|
||||
may_return = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Backup analyser information
|
||||
const auto atomic16_info = reg_state_it[stackframe_it].atomic16;
|
||||
const auto rchcnt_loop_info = reg_state_it[stackframe_it].rchcnt_loop;
|
||||
|
||||
// Clean from the back possible because it does not affect old indices
|
||||
// Technically should always do a full cleanup at the moment
|
||||
@ -5314,6 +5439,18 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
}
|
||||
|
||||
if (wi != stackframe_it || may_return || !insert_entry)
|
||||
{
|
||||
// Possible function end
|
||||
if (rchcnt_loop.active)
|
||||
{
|
||||
// Does not post-dominates channel reads
|
||||
auto& pair = rchcnt_loop_all[rchcnt_loop.read_pc];
|
||||
pair.failed = true;
|
||||
pair.active = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (insert_entry)
|
||||
{
|
||||
const usz target_size = get_block_targets(stackframe_pc).size();
|
||||
@ -5325,10 +5462,24 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
{
|
||||
// Restore analyser information (if not an entry)
|
||||
next.atomic16 = atomic16_info;
|
||||
|
||||
if (previous_pc != rchcnt_loop_info.branch_pc || target_pc == rchcnt_loop_info.branch_target)
|
||||
next.rchcnt_loop = rchcnt_loop_info;
|
||||
}
|
||||
else if (atomic16_info.active)
|
||||
else
|
||||
{
|
||||
break_putllc16(39, atomic16_info);
|
||||
if (atomic16_info.active)
|
||||
{
|
||||
break_putllc16(39, atomic16_info);
|
||||
}
|
||||
|
||||
if (rchcnt_loop.active)
|
||||
{
|
||||
// Does not post-dominates channel read
|
||||
auto& pair = rchcnt_loop_all[rchcnt_loop.read_pc];
|
||||
pair.failed = true;
|
||||
pair.active = false;
|
||||
}
|
||||
}
|
||||
|
||||
next.iterator_id = iterator_id_alloc++;
|
||||
@ -5431,7 +5582,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
|
||||
const auto unconst = [&](u32 reg, u32 pc)
|
||||
{
|
||||
vregs[reg] = reg_state_t::make_unknown(pc);
|
||||
vregs[reg] = reg_state_t::make_unknown(pc, pos);
|
||||
};
|
||||
|
||||
const auto add_block = [&](u32 target)
|
||||
@ -5482,7 +5633,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
if (!is_form_block)
|
||||
{
|
||||
// Call for external code
|
||||
break_putllc16(25, atomic16.discard());
|
||||
break_all_patterns(25);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5494,7 +5645,16 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
}
|
||||
|
||||
const u32 data = std::bit_cast<be_t<u32>>(::at32(result.data, (pos - lsa) / 4));
|
||||
if (rchcnt_loop.active)
|
||||
{
|
||||
if (rchcnt_loop.origins.find_first_of(pos) != umax)
|
||||
{
|
||||
rchcnt_loop.failed = true;
|
||||
rchcnt_loop.active = false;
|
||||
}
|
||||
}
|
||||
|
||||
data = std::bit_cast<be_t<u32>>(::at32(result.data, (pos - lsa) / 4));
|
||||
const auto op = spu_opcode_t{data};
|
||||
const auto type = g_spu_itype.decode(data);
|
||||
|
||||
@ -5604,7 +5764,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
{
|
||||
if (op.e || op.d)
|
||||
{
|
||||
break_putllc16(27, atomic16.discard());
|
||||
break_all_patterns(27);
|
||||
}
|
||||
|
||||
break;
|
||||
@ -5621,9 +5781,35 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
break;
|
||||
}
|
||||
|
||||
case spu_itype::BR:
|
||||
case spu_itype::BRZ:
|
||||
case spu_itype::BRNZ:
|
||||
{
|
||||
const u32 next_pc = spu_branch_target(pos, 1);
|
||||
const u32 target = spu_branch_target(pos, op.i16);
|
||||
|
||||
if (rchcnt_loop.active)
|
||||
{
|
||||
const reg_state_t& rt = vregs[op.rt];
|
||||
|
||||
if (rt.is_instruction && (rchcnt_loop.ch_state.origin == rt.origin || rchcnt_loop.ch_product.origin == rt.origin))
|
||||
{
|
||||
if (rchcnt_loop.conditioned)
|
||||
{
|
||||
// Let's not make it complicated, have a single branch determining the condition
|
||||
break_channel_pattern(54, rchcnt_loop.discard());
|
||||
break;
|
||||
}
|
||||
|
||||
rchcnt_loop.conditioned = true;
|
||||
rchcnt_loop.branch_pc = pos;
|
||||
rchcnt_loop.branch_target = rchcnt_loop.product_test_negate != (type == spu_itype::BRZ) ? target : next_pc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case spu_itype::BR:
|
||||
case spu_itype::BRHZ:
|
||||
case spu_itype::BRHNZ:
|
||||
{
|
||||
@ -5648,6 +5834,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
|
||||
case spu_itype::WRCH:
|
||||
{
|
||||
break_channel_pattern(56, rchcnt_loop.discard());
|
||||
|
||||
switch (op.ra)
|
||||
{
|
||||
case MFC_EAL:
|
||||
@ -5677,7 +5865,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
case MFC_Cmd:
|
||||
{
|
||||
const auto [af, av, atagg, _3, _5, apc] = get_reg(op.rt);
|
||||
const auto [af, av, atagg, _3, _5, apc, ainst] = get_reg(op.rt);
|
||||
|
||||
if (!is_pattern_match)
|
||||
{
|
||||
@ -5896,7 +6084,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
break;
|
||||
default:
|
||||
{
|
||||
break_putllc16(6, atomic16.discard());
|
||||
break_all_patterns(6);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -5908,7 +6096,34 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
case spu_itype::RDCH:
|
||||
{
|
||||
const bool is_read = type == spu_itype::RDCH;
|
||||
bool invalidate = is_read;
|
||||
bool invalidate = true;
|
||||
|
||||
const auto it = rchcnt_loop_all.find(pos);
|
||||
|
||||
if (it != rchcnt_loop_all.end())
|
||||
{
|
||||
if (rchcnt_loop.failed || !rchcnt_loop.conditioned || rchcnt_loop.read_pc != pos)
|
||||
{
|
||||
// Propagate faiure
|
||||
it->second.failed = true;
|
||||
it->second.active = false;
|
||||
it->second.conditioned = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
it->second.active = false;
|
||||
}
|
||||
|
||||
rchcnt_loop.active = false;
|
||||
}
|
||||
|
||||
if (rchcnt_loop.active)
|
||||
{
|
||||
if (rchcnt_loop.read_pc != pos)
|
||||
{
|
||||
break_channel_pattern(53, rchcnt_loop.discard());
|
||||
}
|
||||
}
|
||||
|
||||
switch (op.ra)
|
||||
{
|
||||
@ -6025,6 +6240,25 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
break_putllc16(29, atomic16.discard());
|
||||
}
|
||||
|
||||
if (!is_pattern_match || is_read)
|
||||
{
|
||||
//
|
||||
}
|
||||
else if (!rchcnt_loop.active && it == rchcnt_loop_all.end())
|
||||
{
|
||||
rchcnt_loop.read_pc = pos;
|
||||
rchcnt_loop.channel = op.ra;
|
||||
rchcnt_loop.active = true;
|
||||
unconst(op.rt, pos);
|
||||
rchcnt_loop.ch_state = vregs[op.rt];
|
||||
invalidate = false;
|
||||
}
|
||||
else if (rchcnt_loop.active && it != rchcnt_loop_all.end())
|
||||
{
|
||||
// Success
|
||||
it->second.active = false;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -6424,7 +6658,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
case spu_itype::HBR:
|
||||
{
|
||||
hbr_loc = spu_branch_target(pos, op.roh << 7 | op.rt);
|
||||
const auto [af, av, at, ao, az, apc] = get_reg(op.ra);
|
||||
const auto [af, av, at, ao, az, apc, ainst] = get_reg(op.ra);
|
||||
hbr_tg = af & vf::is_const && !op.c ? av & 0x3fffc : -1;
|
||||
break;
|
||||
}
|
||||
@ -6492,8 +6726,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto rb = get_reg(op.rb);
|
||||
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc] = rb;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc, binst] = rb;
|
||||
|
||||
inherit_const_value(op.rt, ra, rb, av | bv, pos);
|
||||
break;
|
||||
@ -6508,7 +6742,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
|
||||
const auto ra = get_reg(op.ra);
|
||||
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
|
||||
inherit_const_value(op.rt, ra, ra, av ^ op.si10, pos);
|
||||
break;
|
||||
@ -6524,8 +6758,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto rb = get_reg(op.rb);
|
||||
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc] = rb;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc, binst] = rb;
|
||||
|
||||
inherit_const_value(op.rt, ra, rb, bv ^ av, pos);
|
||||
break;
|
||||
@ -6535,8 +6769,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto rb = get_reg(op.rb);
|
||||
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc] = rb;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc, binst] = rb;
|
||||
|
||||
inherit_const_value(op.rt, ra, rb, ~(bv | av), pos);
|
||||
break;
|
||||
@ -6558,8 +6792,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto rb = get_reg(op.rb);
|
||||
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc] = rb;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc, binst] = rb;
|
||||
|
||||
inherit_const_value(op.rt, ra, rb, bv & av, pos);
|
||||
break;
|
||||
@ -6573,7 +6807,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
|
||||
inherit_const_value(op.rt, ra, ra, av + op.si10, pos);
|
||||
|
||||
@ -6590,8 +6824,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto rb = get_reg(op.rb);
|
||||
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc] = rb;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc, binst] = rb;
|
||||
|
||||
inherit_const_value(op.rt, ra, rb, bv + av, pos);
|
||||
|
||||
@ -6606,7 +6840,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
case spu_itype::SFI:
|
||||
{
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto [af, av, at, ao, az, apc] = get_reg(op.ra);
|
||||
const auto [af, av, at, ao, az, apc, ainst] = get_reg(op.ra);
|
||||
|
||||
inherit_const_value(op.rt, ra, ra, op.si10 - av, pos);
|
||||
break;
|
||||
@ -6616,8 +6850,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto rb = get_reg(op.rb);
|
||||
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc] = rb;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
const auto [bf, bv, bt, bo, bz, bpc, binst] = rb;
|
||||
|
||||
inherit_const_value(op.rt, ra, rb, bv - av, pos);
|
||||
|
||||
@ -6656,7 +6890,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto [af, av, at, ao, az, apc] = get_reg(op.ra);
|
||||
const auto [af, av, at, ao, az, apc, ainst] = get_reg(op.ra);
|
||||
|
||||
inherit_const_value(op.rt, ra, ra, av >> ((0 - op.i7) & 0x1f), pos);
|
||||
break;
|
||||
@ -6676,7 +6910,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto [af, av, at, ao, az, apc] = ra;
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
|
||||
inherit_const_value(op.rt, ra, ra, av << (op.i7 & 0x1f), pos);
|
||||
break;
|
||||
@ -6690,6 +6924,30 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
vregs[op.rt4] = ra.merge(rb, pos);
|
||||
break;
|
||||
}
|
||||
case spu_itype::CEQI:
|
||||
{
|
||||
const auto ra = get_reg(op.ra);
|
||||
const auto [af, av, at, ao, az, apc, ainst] = ra;
|
||||
|
||||
inherit_const_value(op.rt, ra, ra, av == op.si10, pos);
|
||||
|
||||
if (rchcnt_loop.active)
|
||||
{
|
||||
if (ra.is_instruction && ra.origin == rchcnt_loop.ch_state.origin)
|
||||
{
|
||||
if (op.si10 != 0 && op.si10 != 1)
|
||||
{
|
||||
break_channel_pattern(55, rchcnt_loop.discard());
|
||||
break;
|
||||
}
|
||||
|
||||
rchcnt_loop.ch_product = vregs[op.rt];
|
||||
rchcnt_loop.product_test_negate = op.si10 == 1;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case spu_itype::SHLQBYI:
|
||||
{
|
||||
if (op.i7 & 0x10)
|
||||
@ -6755,6 +7013,14 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
|
||||
unconst(op_rt, reg_pos == SPU_LS_SIZE ? pos : reg_pos);
|
||||
|
||||
if (rchcnt_loop.active)
|
||||
{
|
||||
if (rchcnt_loop.origins.find_first_of(vregs[op_rt].origin) == umax)
|
||||
{
|
||||
rchcnt_loop.origins.push_back(vregs[op_rt].origin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
@ -6881,6 +7147,27 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
, pattern.mem_count, pattern.put_pc, value.type == v_relative, value.off18, value.type == v_const, value.type == v_reg2, value.reg, value.runtime16_select, entry_point, func_hash, +stats.nowrite, ++stats.single, +stats.all);
|
||||
}
|
||||
|
||||
for (const auto& [read_pc, pattern] : rchcnt_loop_all)
|
||||
{
|
||||
if (pattern.failed || pattern.read_pc == SPU_LS_SIZE)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pattern.active)
|
||||
{
|
||||
spu_log.error("Channel loop error! (get_pc=0x%x, 0x%x-%s)", read_pc, entry_point, func_hash);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none)
|
||||
{
|
||||
//add_pattern(false, inst_attr::ch_lop, get_pc - result.entry_point);
|
||||
|
||||
spu_log.success("Channel Loop Pattern Detected! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash);
|
||||
}
|
||||
}
|
||||
|
||||
if (likely_putllc_loop && !had_putllc_evaluation)
|
||||
{
|
||||
spu_log.notice("Likely missed PUTLLC16 patterns. (entry=0x%x)", entry_point);
|
||||
|
@ -209,6 +209,7 @@ public:
|
||||
u32 known_ones{};
|
||||
u32 known_zeroes{};
|
||||
u32 origin = SPU_LS_SIZE;
|
||||
bool is_instruction = false;
|
||||
|
||||
bool is_const() const;
|
||||
|
||||
@ -242,7 +243,7 @@ public:
|
||||
void invalidate_if_created(u32 current_pc);
|
||||
|
||||
template <usz Count = 1>
|
||||
static std::conditional_t<Count == 1, reg_state_t, std::array<reg_state_t, Count>> make_unknown(u32 pc) noexcept
|
||||
static std::conditional_t<Count == 1, reg_state_t, std::array<reg_state_t, Count>> make_unknown(u32 pc, u32 current_pc = SPU_LS_SIZE) noexcept
|
||||
{
|
||||
if constexpr (Count == 1)
|
||||
{
|
||||
@ -250,6 +251,7 @@ public:
|
||||
v.tag = alloc_tag();
|
||||
v.flag = {};
|
||||
v.origin = pc;
|
||||
v.is_instruction = pc == current_pc;
|
||||
return v;
|
||||
}
|
||||
else
|
||||
@ -258,13 +260,15 @@ public:
|
||||
|
||||
for (reg_state_t& state : result)
|
||||
{
|
||||
state = make_unknown<1>(pc);
|
||||
state = make_unknown<1>(pc, current_pc);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
bool compare_tags(const reg_state_t& rhs) const;
|
||||
|
||||
static reg_state_t from_value(u32 value) noexcept;
|
||||
static u32 alloc_tag(bool reset = false) noexcept;
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user