SPU: Function discovery fix

Do not detect branch to next.
This commit is contained in:
Eladash 2023-08-31 09:54:45 +03:00 committed by Elad Ashkenazi
parent a597368c46
commit e851c044b5
2 changed files with 24 additions and 6 deletions

View File

@ -2122,16 +2122,16 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
// Discover functions
// Use the most simple method: search for instructions that calls them
// And then filter invalid cases (does not detect tail calls)
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62) : v128::from32p(umax);
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
{
// Search for BRSL and BRASL
// Search for BRSL LR and BRASL LR
// TODO: BISL
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
const v128 shifted = gv_shr32(inst, 23);
const v128 eq_brsl = gv_eq32(shifted, v128::from32p(0x66));
const v128 eq_brasl = gv_eq32(shifted, brasl_mask);
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
const v128 result = eq_brsl | eq_brasl;
if (!gv_testz(result))
@ -2160,7 +2160,7 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
const u32 func = op_branch_targets(addr, op)[0];
if (func == umax || std::count(addrs.begin(), addrs.end(), func))
if (func == umax || addr + 4 == func || func == addr || std::count(addrs.begin(), addrs.end(), func))
{
continue;
}

View File

@ -2988,6 +2988,24 @@ inline v128 gv_rol32(const v128& a, const v128& b)
#endif
}
// For each 32-bit element, r = rotate a by count
inline v128 gv_rol32(const v128& a, u32 count)
{
count %= 32;
#if defined(ARCH_X64)
return _mm_or_epi32(_mm_srli_epi32(a, 32 - count), _mm_slli_epi32(a, count));
#elif defined(ARCH_ARM64)
const auto amt1 = vdupq_n_s32(count);
const auto amt2 = vdupq_n_s32(count - 32);
return vorrq_u32(vshlq_u32(a, amt1), vshlq_u32(a, amt2));
#else
v128 r;
for (u32 i = 0; i < 4; i++)
r._u32[i] = utils::rol32(a._u32[i], count);
return r;
#endif
}
// For each 8-bit element, r = (a << (c & 7)) | (b >> (~c & 7) >> 1)
template <typename A, typename B, typename C>
inline auto gv_fshl8(A&& a, B&& b, C&& c)