u128 renamed to v128

Since it's vector union type
This commit is contained in:
Nekotekina 2015-08-06 16:31:13 +03:00
parent f8afee3325
commit 6f3c50eba2
23 changed files with 388 additions and 388 deletions

View File

@ -8,7 +8,7 @@
#define IS_LE_MACHINE // only draft
union u128
union v128
{
u64 _u64[2];
s64 _s64[2];
@ -171,22 +171,22 @@ union u128
} _bit;
static u128 from64(u64 _0, u64 _1 = 0)
static v128 from64(u64 _0, u64 _1 = 0)
{
u128 ret;
v128 ret;
ret._u64[0] = _0;
ret._u64[1] = _1;
return ret;
}
static u128 from64r(u64 _1, u64 _0 = 0)
static v128 from64r(u64 _1, u64 _0 = 0)
{
return from64(_0, _1);
}
static u128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0)
static v128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0)
{
u128 ret;
v128 ret;
ret._u32[0] = _0;
ret._u32[1] = _1;
ret._u32[2] = _2;
@ -194,141 +194,141 @@ union u128
return ret;
}
static u128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0)
static v128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0)
{
return from32(_0, _1, _2, _3);
}
static u128 from32p(u32 value)
static v128 from32p(u32 value)
{
u128 ret;
v128 ret;
ret.vi = _mm_set1_epi32(static_cast<s32>(value));
return ret;
}
static u128 from16p(u16 value)
static v128 from16p(u16 value)
{
u128 ret;
v128 ret;
ret.vi = _mm_set1_epi16(static_cast<s16>(value));
return ret;
}
static u128 from8p(u8 value)
static v128 from8p(u8 value)
{
u128 ret;
v128 ret;
ret.vi = _mm_set1_epi8(static_cast<s8>(value));
return ret;
}
static u128 fromBit(u32 bit)
static v128 fromBit(u32 bit)
{
u128 ret = {};
v128 ret = {};
ret._bit[bit] = true;
return ret;
}
static u128 fromV(__m128i value)
static v128 fromV(__m128i value)
{
u128 ret;
v128 ret;
ret.vi = value;
return ret;
}
static u128 fromF(__m128 value)
static v128 fromF(__m128 value)
{
u128 ret;
v128 ret;
ret.vf = value;
return ret;
}
static u128 fromD(__m128d value)
static v128 fromD(__m128d value)
{
u128 ret;
v128 ret;
ret.vd = value;
return ret;
}
static force_inline u128 add8(const u128& left, const u128& right)
static force_inline v128 add8(const v128& left, const v128& right)
{
return fromV(_mm_add_epi8(left.vi, right.vi));
}
static force_inline u128 add16(const u128& left, const u128& right)
static force_inline v128 add16(const v128& left, const v128& right)
{
return fromV(_mm_add_epi16(left.vi, right.vi));
}
static force_inline u128 add32(const u128& left, const u128& right)
static force_inline v128 add32(const v128& left, const v128& right)
{
return fromV(_mm_add_epi32(left.vi, right.vi));
}
static force_inline u128 addfs(const u128& left, const u128& right)
static force_inline v128 addfs(const v128& left, const v128& right)
{
return fromF(_mm_add_ps(left.vf, right.vf));
}
static force_inline u128 addfd(const u128& left, const u128& right)
static force_inline v128 addfd(const v128& left, const v128& right)
{
return fromD(_mm_add_pd(left.vd, right.vd));
}
static force_inline u128 sub8(const u128& left, const u128& right)
static force_inline v128 sub8(const v128& left, const v128& right)
{
return fromV(_mm_sub_epi8(left.vi, right.vi));
}
static force_inline u128 sub16(const u128& left, const u128& right)
static force_inline v128 sub16(const v128& left, const v128& right)
{
return fromV(_mm_sub_epi16(left.vi, right.vi));
}
static force_inline u128 sub32(const u128& left, const u128& right)
static force_inline v128 sub32(const v128& left, const v128& right)
{
return fromV(_mm_sub_epi32(left.vi, right.vi));
}
static force_inline u128 subfs(const u128& left, const u128& right)
static force_inline v128 subfs(const v128& left, const v128& right)
{
return fromF(_mm_sub_ps(left.vf, right.vf));
}
static force_inline u128 subfd(const u128& left, const u128& right)
static force_inline v128 subfd(const v128& left, const v128& right)
{
return fromD(_mm_sub_pd(left.vd, right.vd));
}
static force_inline u128 maxu8(const u128& left, const u128& right)
static force_inline v128 maxu8(const v128& left, const v128& right)
{
return fromV(_mm_max_epu8(left.vi, right.vi));
}
static force_inline u128 minu8(const u128& left, const u128& right)
static force_inline v128 minu8(const v128& left, const v128& right)
{
return fromV(_mm_min_epu8(left.vi, right.vi));
}
static force_inline u128 eq8(const u128& left, const u128& right)
static force_inline v128 eq8(const v128& left, const v128& right)
{
return fromV(_mm_cmpeq_epi8(left.vi, right.vi));
}
static force_inline u128 eq16(const u128& left, const u128& right)
static force_inline v128 eq16(const v128& left, const v128& right)
{
return fromV(_mm_cmpeq_epi16(left.vi, right.vi));
}
static force_inline u128 eq32(const u128& left, const u128& right)
static force_inline v128 eq32(const v128& left, const v128& right)
{
return fromV(_mm_cmpeq_epi32(left.vi, right.vi));
}
bool operator == (const u128& right) const
bool operator == (const v128& right) const
{
return (_u64[0] == right._u64[0]) && (_u64[1] == right._u64[1]);
}
bool operator != (const u128& right) const
bool operator != (const v128& right) const
{
return (_u64[0] != right._u64[0]) || (_u64[1] != right._u64[1]);
}
@ -344,7 +344,7 @@ union u128
}
// result = (~left) & (right)
static force_inline u128 andnot(const u128& left, const u128& right)
static force_inline v128 andnot(const v128& left, const v128& right)
{
return fromV(_mm_andnot_si128(left.vi, right.vi));
}
@ -358,46 +358,46 @@ union u128
std::string to_xyzw() const;
static force_inline u128 byteswap(const u128 val)
static force_inline v128 byteswap(const v128 val)
{
return fromV(_mm_shuffle_epi8(val.vi, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)));
}
};
CHECK_SIZE_ALIGN(u128, 16, 16);
CHECK_SIZE_ALIGN(v128, 16, 16);
inline u128 operator |(const u128& left, const u128& right)
inline v128 operator |(const v128& left, const v128& right)
{
return u128::fromV(_mm_or_si128(left.vi, right.vi));
return v128::fromV(_mm_or_si128(left.vi, right.vi));
}
inline u128 operator &(const u128& left, const u128& right)
inline v128 operator &(const v128& left, const v128& right)
{
return u128::fromV(_mm_and_si128(left.vi, right.vi));
return v128::fromV(_mm_and_si128(left.vi, right.vi));
}
inline u128 operator ^(const u128& left, const u128& right)
inline v128 operator ^(const v128& left, const v128& right)
{
return u128::fromV(_mm_xor_si128(left.vi, right.vi));
return v128::fromV(_mm_xor_si128(left.vi, right.vi));
}
inline u128 operator ~(const u128& other)
inline v128 operator ~(const v128& other)
{
return u128::from64(~other._u64[0], ~other._u64[1]);
return v128::from64(~other._u64[0], ~other._u64[1]);
}
static force_inline u128 sync_val_compare_and_swap(volatile u128* dest, u128 comp, u128 exch)
static force_inline v128 sync_val_compare_and_swap(volatile v128* dest, v128 comp, v128 exch)
{
#if !defined(_MSC_VER)
auto res = __sync_val_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch);
return (u128&)res;
return (v128&)res;
#else
_InterlockedCompareExchange128((volatile long long*)dest, exch._u64[1], exch._u64[0], (long long*)&comp);
return comp;
#endif
}
static force_inline bool sync_bool_compare_and_swap(volatile u128* dest, u128 comp, u128 exch)
static force_inline bool sync_bool_compare_and_swap(volatile v128* dest, v128 comp, v128 exch)
{
#if !defined(_MSC_VER)
return __sync_bool_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch);
@ -406,38 +406,38 @@ static force_inline bool sync_bool_compare_and_swap(volatile u128* dest, u128 co
#endif
}
static force_inline u128 sync_lock_test_and_set(volatile u128* dest, u128 value)
static force_inline v128 sync_lock_test_and_set(volatile v128* dest, v128 value)
{
while (true)
{
const u128 old = *(u128*)dest;
const v128 old = *(v128*)dest;
if (sync_bool_compare_and_swap(dest, old, value)) return old;
}
}
static force_inline u128 sync_fetch_and_or(volatile u128* dest, u128 value)
static force_inline v128 sync_fetch_and_or(volatile v128* dest, v128 value)
{
while (true)
{
const u128 old = *(u128*)dest;
const v128 old = *(v128*)dest;
if (sync_bool_compare_and_swap(dest, old, value | old)) return old;
}
}
static force_inline u128 sync_fetch_and_and(volatile u128* dest, u128 value)
static force_inline v128 sync_fetch_and_and(volatile v128* dest, v128 value)
{
while (true)
{
const u128 old = *(u128*)dest;
const v128 old = *(v128*)dest;
if (sync_bool_compare_and_swap(dest, old, value & old)) return old;
}
}
static force_inline u128 sync_fetch_and_xor(volatile u128* dest, u128 value)
static force_inline v128 sync_fetch_and_xor(volatile v128* dest, v128 value)
{
while (true)
{
const u128 old = *(u128*)dest;
const v128 old = *(v128*)dest;
if (sync_bool_compare_and_swap(dest, old, value ^ old)) return old;
}
}
@ -488,14 +488,14 @@ template<typename T> struct se_t<T, 8>
template<typename T> struct se_t<T, 16>
{
static force_inline u128 to(const T& src)
static force_inline v128 to(const T& src)
{
return u128::byteswap((u128&)src);
return v128::byteswap((v128&)src);
}
static force_inline T from(const u128& src)
static force_inline T from(const v128& src)
{
const u128 res = u128::byteswap(src);
const v128 res = v128::byteswap(src);
return (T&)res;
}
};
@ -553,7 +553,7 @@ template<typename T> struct be_storage<T, 8>
template<typename T> struct be_storage<T, 16>
{
using type = u128;
using type = v128;
};
template<typename T> using be_storage_t = typename be_storage<T>::type;
@ -602,7 +602,7 @@ public:
#endif
static_assert(!std::is_class<type>::value, "be_t<> error: invalid type (class or structure)");
static_assert(!std::is_union<type>::value || std::is_same<type, u128>::value, "be_t<> error: invalid type (union)");
static_assert(!std::is_union<type>::value || std::is_same<type, v128>::value, "be_t<> error: invalid type (union)");
static_assert(!std::is_pointer<type>::value, "be_t<> error: invalid type (pointer)");
static_assert(!std::is_reference<type>::value, "be_t<> error: invalid type (reference)");
static_assert(!std::is_array<type>::value, "be_t<> error: invalid type (array)");
@ -748,7 +748,7 @@ template<typename T> struct is_be_t<volatile T> : public std::integral_constant<
// to_be_t helper struct
template<typename T> struct to_be
{
using type = std::conditional_t<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_same<T, u128>::value, be_t<T>, T>;
using type = std::conditional_t<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_same<T, v128>::value, be_t<T>, T>;
};
// be_t<T> if possible, T otherwise
@ -781,7 +781,7 @@ public:
type m_data; // don't access directly
static_assert(!std::is_class<type>::value, "le_t<> error: invalid type (class or structure)");
static_assert(!std::is_union<type>::value || std::is_same<type, u128>::value, "le_t<> error: invalid type (union)");
static_assert(!std::is_union<type>::value || std::is_same<type, v128>::value, "le_t<> error: invalid type (union)");
static_assert(!std::is_pointer<type>::value, "le_t<> error: invalid type (pointer)");
static_assert(!std::is_reference<type>::value, "le_t<> error: invalid type (reference)");
static_assert(!std::is_array<type>::value, "le_t<> error: invalid type (array)");
@ -863,7 +863,7 @@ template<typename T> struct is_le_t<volatile T> : public std::integral_constant<
template<typename T> struct to_le
{
using type = std::conditional_t<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_same<T, u128>::value, le_t<T>, T>;
using type = std::conditional_t<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_same<T, v128>::value, le_t<T>, T>;
};
// le_t<T> if possible, T otherwise

View File

@ -5,12 +5,12 @@
#include <wx/string.h>
#pragma warning(pop)
std::string u128::to_hex() const
std::string v128::to_hex() const
{
return fmt::format("%016llx%016llx", _u64[1], _u64[0]);
}
std::string u128::to_xyzw() const
std::string v128::to_xyzw() const
{
return fmt::Format("x: %g y: %g z: %g w: %g", _f[3], _f[2], _f[1], _f[0]);
}

View File

@ -492,7 +492,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz
typedef CONTEXT x64_context;
#define X64REG(context, reg) (&(&(context)->Rax)[reg])
#define XMMREG(context, reg) (reinterpret_cast<u128*>(&(&(context)->Xmm0)[reg]))
#define XMMREG(context, reg) (reinterpret_cast<v128*>(&(&(context)->Xmm0)[reg]))
#define EFLAGS(context) ((context)->EFlags)
#else
@ -502,7 +502,7 @@ typedef ucontext_t x64_context;
#ifdef __APPLE__
#define X64REG(context, reg) (darwin_x64reg(context, reg))
#define XMMREG(context, reg) (reinterpret_cast<u128*>(&(context)->uc_mcontext->__fs.__fpu_xmm0.__xmm_reg[reg]))
#define XMMREG(context, reg) (reinterpret_cast<v128*>(&(context)->uc_mcontext->__fs.__fpu_xmm0.__xmm_reg[reg]))
#define EFLAGS(context) ((context)->uc_mcontext->__ss.__rflags)
uint64_t* darwin_x64reg(x64_context *context, int reg)
@ -560,7 +560,7 @@ static const reg_table_t reg_table[17] =
};
#define X64REG(context, reg) (&(context)->uc_mcontext.gregs[reg_table[reg]])
#define XMMREG(context, reg) (reinterpret_cast<u128*>(&(context)->uc_mcontext.fpregs->_xmm[reg]))
#define XMMREG(context, reg) (reinterpret_cast<v128*>(&(context)->uc_mcontext.fpregs->_xmm[reg]))
#define EFLAGS(context) ((context)->uc_mcontext.gregs[REG_EFL])
#endif // __APPLE__

View File

@ -136,7 +136,7 @@ namespace psv_func_detail
struct bind_arg<T, ARG_VECTOR, g_count, f_count, v_count>
{
static_assert(v_count <= 0, "TODO: Unsupported argument type (vector)");
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid function argument type for ARG_VECTOR");
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid function argument type for ARG_VECTOR");
force_inline static T get_arg(ARMv7Context& context)
{
@ -294,7 +294,7 @@ namespace psv_func_detail
//template<typename T>
//struct bind_result<T, ARG_VECTOR>
//{
// static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid function result type for ARG_VECTOR");
// static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid function result type for ARG_VECTOR");
// static force_inline void put_result(ARMv7Context& context, const T& result)
// {
@ -307,7 +307,7 @@ namespace psv_func_detail
static_assert(!std::is_pointer<RT>::value, "Invalid function result type (pointer)");
static_assert(!std::is_reference<RT>::value, "Invalid function result type (reference)");
static const bool is_float = std::is_floating_point<RT>::value;
static const bool is_vector = std::is_same<std::remove_cv_t<RT>, u128>::value;
static const bool is_vector = std::is_same<std::remove_cv_t<RT>, v128>::value;
static const arg_class value = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL);
};
@ -316,7 +316,7 @@ namespace psv_func_detail
{
// TODO: check calculations
static const bool is_float = std::is_floating_point<T>::value;
static const bool is_vector = std::is_same<std::remove_cv_t<T>, u128>::value;
static const bool is_vector = std::is_same<std::remove_cv_t<T>, v128>::value;
static const bool is_context = std::is_same<T, ARMv7Context&>::value;
static const bool is_variadic = std::is_same<std::remove_cv_t<T>, armv7_va_args_t>::value;
static const bool is_general = !is_float && !is_vector && !is_context && !is_variadic;

View File

@ -91,7 +91,7 @@ void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::addfs(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::addfs(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op)
@ -108,7 +108,7 @@ void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op)
{
const auto a = CPU.VPR[op.va];
const auto b = CPU.VPR[op.vb];
const auto s = u128::add32(a, b); // a + b
const auto s = v128::add32(a, b); // a + b
const auto m = (a ^ s) & (b ^ s); // overflow bit
const auto x = _mm_srai_epi32(m.vi, 31); // saturation mask
const auto y = _mm_srai_epi32(_mm_and_si128(s.vi, m.vi), 31); // positive saturation mask
@ -117,7 +117,7 @@ void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::add8(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::add8(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op)
@ -127,7 +127,7 @@ void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::add16(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::add16(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op)
@ -137,7 +137,7 @@ void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::add32(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::add32(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op)
@ -160,30 +160,30 @@ void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op)
{
const auto a = CPU.VPR[op.va];
const auto b = u128::add8(CPU.VPR[op.vb], u128::from8p(1)); // add 1
const auto summ = u128::add8(a, b) & u128::from8p(0xfe);
const auto sign = u128::from8p(0x80);
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq8(b, sign)) & sign; // calculate msb
const auto b = v128::add8(CPU.VPR[op.vb], v128::from8p(1)); // add 1
const auto summ = v128::add8(a, b) & v128::from8p(0xfe);
const auto sign = v128::from8p(0x80);
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq8(b, sign)) & sign; // calculate msb
CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi64(summ.vi, 1));
}
void ppu_interpreter::VAVGSH(PPUThread& CPU, ppu_opcode_t op)
{
const auto a = CPU.VPR[op.va];
const auto b = u128::add16(CPU.VPR[op.vb], u128::from16p(1)); // add 1
const auto summ = u128::add16(a, b);
const auto sign = u128::from16p(0x8000);
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq16(b, sign)) & sign; // calculate msb
const auto b = v128::add16(CPU.VPR[op.vb], v128::from16p(1)); // add 1
const auto summ = v128::add16(a, b);
const auto sign = v128::from16p(0x8000);
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq16(b, sign)) & sign; // calculate msb
CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi16(summ.vi, 1));
}
void ppu_interpreter::VAVGSW(PPUThread& CPU, ppu_opcode_t op)
{
const auto a = CPU.VPR[op.va];
const auto b = u128::add32(CPU.VPR[op.vb], u128::from32p(1)); // add 1
const auto summ = u128::add32(a, b);
const auto sign = u128::from32p(0x80000000);
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq32(b, sign)) & sign; // calculate msb
const auto b = v128::add32(CPU.VPR[op.vb], v128::from32p(1)); // add 1
const auto summ = v128::add32(a, b);
const auto sign = v128::from32p(0x80000000);
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq32(b, sign)) & sign; // calculate msb
CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi32(summ.vi, 1));
}
@ -201,7 +201,7 @@ void ppu_interpreter::VAVGUW(PPUThread& CPU, ppu_opcode_t op)
{
const auto a = CPU.VPR[op.va];
const auto b = CPU.VPR[op.vb];
const auto summ = u128::add32(u128::add32(a, b), u128::from32p(1));
const auto summ = v128::add32(v128::add32(a, b), v128::from32p(1));
const auto carry = _mm_xor_si128(_mm_slli_epi32(sse_cmpgt_epu32(summ.vi, a.vi), 31), _mm_set1_epi32(0x80000000));
CPU.VPR[op.vd].vi = _mm_or_si128(carry, _mm_srli_epi32(summ.vi, 1));
}
@ -248,7 +248,7 @@ void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::eq8(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::eq8(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op)
@ -260,7 +260,7 @@ void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::eq16(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::eq16(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op)
@ -272,7 +272,7 @@ void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::eq32(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::eq32(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op)
@ -727,8 +727,8 @@ void ppu_interpreter::VPERM(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VPKPX(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
u128 VB = CPU.VPR[op.vb];
v128 VA = CPU.VPR[op.va];
v128 VB = CPU.VPR[op.vb];
for (uint h = 0; h < 4; h++)
{
u16 bb7 = VB._u8[15 - (h * 4 + 0)] & 0x1;
@ -764,8 +764,8 @@ void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op)
{
//CPU.VPR[op.vd].vi = _mm_packus_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi);
u128 VA = CPU.VPR[op.va];
u128 VB = CPU.VPR[op.vb];
v128 VA = CPU.VPR[op.va];
v128 VB = CPU.VPR[op.vb];
for (uint h = 0; h < 4; h++)
{
s32 result = VA._s32[h];
@ -798,8 +798,8 @@ void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
u128 VB = CPU.VPR[op.vb];
v128 VA = CPU.VPR[op.va];
v128 VB = CPU.VPR[op.vb];
for (uint b = 0; b < 8; b++)
{
CPU.VPR[op.vd]._u8[b + 8] = VA._u8[b * 2];
@ -809,8 +809,8 @@ void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
u128 VB = CPU.VPR[op.vb];
v128 VA = CPU.VPR[op.va];
v128 VB = CPU.VPR[op.vb];
for (uint b = 0; b < 8; b++)
{
u16 result = VA._u16[b];
@ -835,8 +835,8 @@ void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
u128 VB = CPU.VPR[op.vb];
v128 VA = CPU.VPR[op.va];
v128 VB = CPU.VPR[op.vb];
for (uint h = 0; h < 4; h++)
{
CPU.VPR[op.vd]._u16[h + 4] = VA._u16[h * 2];
@ -846,8 +846,8 @@ void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VPKUWUS(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
u128 VB = CPU.VPR[op.vb];
v128 VA = CPU.VPR[op.va];
v128 VB = CPU.VPR[op.vb];
for (uint h = 0; h < 4; h++)
{
u32 result = VA._u32[h];
@ -949,7 +949,7 @@ void ppu_interpreter::VSEL(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VSL(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
v128 VA = CPU.VPR[op.va];
u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7;
CPU.VPR[op.vd]._u8[0] = VA._u8[0] << sh;
@ -989,7 +989,7 @@ void ppu_interpreter::VSLH(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VSLO(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
v128 VA = CPU.VPR[op.va];
u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf;
CPU.VPR[op.vd].clear();
@ -1068,7 +1068,7 @@ void ppu_interpreter::VSPLTW(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VSR(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
v128 VA = CPU.VPR[op.va];
u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7;
CPU.VPR[op.vd]._u8[15] = VA._u8[15] >> sh;
@ -1120,7 +1120,7 @@ void ppu_interpreter::VSRH(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VSRO(PPUThread& CPU, ppu_opcode_t op)
{
u128 VA = CPU.VPR[op.va];
v128 VA = CPU.VPR[op.va];
u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf;
CPU.VPR[op.vd].clear();
@ -1149,7 +1149,7 @@ void ppu_interpreter::VSUBCUW(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VSUBFP(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::subfs(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::subfs(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VSUBSBS(PPUThread& CPU, ppu_opcode_t op)
@ -1183,7 +1183,7 @@ void ppu_interpreter::VSUBSWS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VSUBUBM(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::sub8(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::sub8(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op)
@ -1193,7 +1193,7 @@ void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::sub16(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::sub16(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op)
@ -1203,7 +1203,7 @@ void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VSUBUWM(PPUThread& CPU, ppu_opcode_t op)
{
CPU.VPR[op.vd] = u128::sub32(CPU.VPR[op.va], CPU.VPR[op.vb]);
CPU.VPR[op.vd] = v128::sub32(CPU.VPR[op.va], CPU.VPR[op.vb]);
}
void ppu_interpreter::VSUBUWS(PPUThread& CPU, ppu_opcode_t op)
@ -1334,7 +1334,7 @@ void ppu_interpreter::VSUM4UBS(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op)
{
u128 VB = CPU.VPR[op.vb];
v128 VB = CPU.VPR[op.vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[8 + w * 2 + 1] >> 7; // signed shift sign extends
@ -1346,7 +1346,7 @@ void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op)
{
u128 VB = CPU.VPR[op.vb];
v128 VB = CPU.VPR[op.vb];
for (uint h = 0; h < 8; h++)
{
CPU.VPR[op.vd]._s16[h] = VB._s8[8 + h];
@ -1355,7 +1355,7 @@ void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op)
{
u128 VB = CPU.VPR[op.vb];
v128 VB = CPU.VPR[op.vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[op.vd]._s32[w] = VB._s16[4 + w];
@ -1364,7 +1364,7 @@ void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op)
{
u128 VB = CPU.VPR[op.vb];
v128 VB = CPU.VPR[op.vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[w * 2 + 1] >> 7; // signed shift sign extends
@ -1376,7 +1376,7 @@ void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op)
{
u128 VB = CPU.VPR[op.vb];
v128 VB = CPU.VPR[op.vb];
for (uint h = 0; h < 8; h++)
{
CPU.VPR[op.vd]._s16[h] = VB._s8[h];
@ -1385,7 +1385,7 @@ void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op)
void ppu_interpreter::VUPKLSH(PPUThread& CPU, ppu_opcode_t op)
{
u128 VB = CPU.VPR[op.vb];
v128 VB = CPU.VPR[op.vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[op.vd]._s32[w] = VB._s16[w];

View File

@ -1055,8 +1055,8 @@ private:
}
void VMRGHB(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 8; h++)
{
CPU.VPR[vd]._u8[15 - h*2] = VA._u8[15 - h];
@ -1065,8 +1065,8 @@ private:
}
void VMRGHH(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[vd]._u16[7 - w*2] = VA._u16[7 - w];
@ -1075,8 +1075,8 @@ private:
}
void VMRGHW(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint d = 0; d < 2; d++)
{
CPU.VPR[vd]._u32[3 - d*2] = VA._u32[3 - d];
@ -1085,8 +1085,8 @@ private:
}
void VMRGLB(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 8; h++)
{
CPU.VPR[vd]._u8[15 - h*2] = VA._u8[7 - h];
@ -1095,8 +1095,8 @@ private:
}
void VMRGLH(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[vd]._u16[7 - w*2] = VA._u16[3 - w];
@ -1105,8 +1105,8 @@ private:
}
void VMRGLW(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint d = 0; d < 2; d++)
{
CPU.VPR[vd]._u32[3 - d*2] = VA._u32[1 - d];
@ -1339,8 +1339,8 @@ private:
}
void VPKPX(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++)
{
u16 bb7 = VB._u8[15 - (h*4 + 0)] & 0x1;
@ -1358,8 +1358,8 @@ private:
}
void VPKSHSS(u32 vd, u32 va, u32 vb) //nf
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint b = 0; b < 8; b++)
{
s16 result = VA._s16[b];
@ -1395,8 +1395,8 @@ private:
}
void VPKSHUS(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint b = 0; b < 8; b++)
{
s16 result = VA._s16[b];
@ -1432,8 +1432,8 @@ private:
}
void VPKSWSS(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++)
{
s32 result = VA._s32[h];
@ -1469,8 +1469,8 @@ private:
}
void VPKSWUS(u32 vd, u32 va, u32 vb) //nf
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++)
{
s32 result = VA._s32[h];
@ -1506,8 +1506,8 @@ private:
}
void VPKUHUM(u32 vd, u32 va, u32 vb) //nf
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint b = 0; b < 8; b++)
{
CPU.VPR[vd]._u8[b+8] = VA._u8[b*2];
@ -1516,8 +1516,8 @@ private:
}
void VPKUHUS(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint b = 0; b < 8; b++)
{
u16 result = VA._u16[b];
@ -1543,8 +1543,8 @@ private:
}
void VPKUWUM(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++)
{
CPU.VPR[vd]._u16[h+4] = VA._u16[h*2];
@ -1553,8 +1553,8 @@ private:
}
void VPKUWUS(u32 vd, u32 va, u32 vb) //nf
{
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
v128 VA = CPU.VPR[va];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++)
{
u32 result = VA._u32[h];
@ -1684,7 +1684,7 @@ private:
}
void VSL(u32 vd, u32 va, u32 vb) //nf
{
u128 VA = CPU.VPR[va];
v128 VA = CPU.VPR[va];
u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
CPU.VPR[vd]._u8[0] = VA._u8[0] << sh;
@ -1720,7 +1720,7 @@ private:
}
void VSLO(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
v128 VA = CPU.VPR[va];
u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf;
CPU.VPR[vd].clear();
@ -1791,7 +1791,7 @@ private:
}
void VSR(u32 vd, u32 va, u32 vb) //nf
{
u128 VA = CPU.VPR[va];
v128 VA = CPU.VPR[va];
u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
CPU.VPR[vd]._u8[15] = VA._u8[15] >> sh;
@ -1837,7 +1837,7 @@ private:
}
void VSRO(u32 vd, u32 va, u32 vb)
{
u128 VA = CPU.VPR[va];
v128 VA = CPU.VPR[va];
u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf;
CPU.VPR[vd].clear();
@ -2121,7 +2121,7 @@ private:
}
void VUPKHPX(u32 vd, u32 vb)
{
u128 VB = CPU.VPR[vb];
v128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[8 + w*2 + 1] >> 7; // signed shift sign extends
@ -2132,7 +2132,7 @@ private:
}
void VUPKHSB(u32 vd, u32 vb)
{
u128 VB = CPU.VPR[vb];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 8; h++)
{
CPU.VPR[vd]._s16[h] = VB._s8[8 + h];
@ -2140,7 +2140,7 @@ private:
}
void VUPKHSH(u32 vd, u32 vb)
{
u128 VB = CPU.VPR[vb];
v128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[vd]._s32[w] = VB._s16[4 + w];
@ -2148,7 +2148,7 @@ private:
}
void VUPKLPX(u32 vd, u32 vb)
{
u128 VB = CPU.VPR[vb];
v128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[w*2 + 1] >> 7; // signed shift sign extends
@ -2159,7 +2159,7 @@ private:
}
void VUPKLSB(u32 vd, u32 vb) //nf
{
u128 VB = CPU.VPR[vb];
v128 VB = CPU.VPR[vb];
for (uint h = 0; h < 8; h++)
{
CPU.VPR[vd]._s16[h] = VB._s8[h];
@ -2167,7 +2167,7 @@ private:
}
void VUPKLSH(u32 vd, u32 vb)
{
u128 VB = CPU.VPR[vb];
v128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++)
{
CPU.VPR[vd]._s32[w] = VB._s16[w];

View File

@ -2121,7 +2121,7 @@ void Compiler::TW(u32 to, u32 ra, u32 rb) {
}
void Compiler::LVSL(u32 vd, u32 ra, u32 rb) {
static const u128 s_lvsl_values[] = {
static const v128 s_lvsl_values[] = {
{ 0x08090A0B0C0D0E0F, 0x0001020304050607 },
{ 0x090A0B0C0D0E0F10, 0x0102030405060708 },
{ 0x0A0B0C0D0E0F1011, 0x0203040506070809 },
@ -2350,7 +2350,7 @@ void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) {
}
void Compiler::LVSR(u32 vd, u32 ra, u32 rb) {
static const u128 s_lvsr_values[] = {
static const v128 s_lvsr_values[] = {
{ 0x18191A1B1C1D1E1F, 0x1011121314151617 },
{ 0x1718191A1B1C1D1E, 0x0F10111213141516 },
{ 0x161718191A1B1C1D, 0x0E0F101112131415 },

View File

@ -51,7 +51,7 @@ struct ppu_recompiler_llvm::PPUState {
u64 GPR[32];
/// Vector purpose registers
u128 VPR[32];
v128 VPR[32];
/// Condition register
CRhdr CR;

View File

@ -462,7 +462,7 @@ public:
PPCdouble FPR[32]{}; //Floating Point Register
FPSCRhdr FPSCR{}; //Floating Point Status and Control Register
u64 GPR[32]{}; //General-Purpose Register
u128 VPR[32]{};
v128 VPR[32]{};
u32 vpcr = 0;
CRhdr CR{}; //Condition Register

View File

@ -2,5 +2,5 @@
struct SPUContext
{
u128 gpr[128];
v128 gpr[128];
};

View File

@ -69,17 +69,17 @@ void spu_interpreter::MFSPR(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::RDCH(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_value(op.ra));
CPU.GPR[op.rt] = v128::from32r(CPU.get_ch_value(op.ra));
}
void spu_interpreter::RCHCNT(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_count(op.ra));
CPU.GPR[op.rt] = v128::from32r(CPU.get_ch_count(op.ra));
}
void spu_interpreter::SF(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]);
CPU.GPR[op.rt] = v128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]);
}
void spu_interpreter::OR(SPUThread& CPU, spu_opcode_t op)
@ -94,7 +94,7 @@ void spu_interpreter::BG(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::SFH(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]);
CPU.GPR[op.rt] = v128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]);
}
void spu_interpreter::NOR(SPUThread& CPU, spu_opcode_t op)
@ -106,7 +106,7 @@ void spu_interpreter::ABSDB(SPUThread& CPU, spu_opcode_t op)
{
const auto a = CPU.GPR[op.ra];
const auto b = CPU.GPR[op.rb];
CPU.GPR[op.rt] = u128::sub8(u128::maxu8(a, b), u128::minu8(a, b));
CPU.GPR[op.rt] = v128::sub8(v128::maxu8(a, b), v128::minu8(a, b));
}
void spu_interpreter::ROT(SPUThread& CPU, spu_opcode_t op)
@ -249,7 +249,7 @@ void spu_interpreter::SHLHI(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::A(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]);
CPU.GPR[op.rt] = v128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]);
}
void spu_interpreter::AND(SPUThread& CPU, spu_opcode_t op)
@ -266,7 +266,7 @@ void spu_interpreter::CG(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::AH(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]);
CPU.GPR[op.rt] = v128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]);
}
void spu_interpreter::NAND(SPUThread& CPU, spu_opcode_t op)
@ -343,7 +343,7 @@ void spu_interpreter::BI(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::BISL(SPUThread& CPU, spu_opcode_t op)
{
const u32 target = SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0);
CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4);
CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4);
CPU.PC = target - 4;
set_interrupt_status(CPU, op);
}
@ -364,17 +364,17 @@ void spu_interpreter::HBR(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::GB(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)), 7)));
CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)), 7)));
}
void spu_interpreter::GBH(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)), 7)));
CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)), 7)));
}
void spu_interpreter::GBB(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7)));
CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7)));
}
void spu_interpreter::FSM(SPUThread& CPU, spu_opcode_t op)
@ -426,28 +426,28 @@ void spu_interpreter::SHLQBYBI(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::CBX(SPUThread& CPU, spu_opcode_t op)
{
const s32 t = ~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xf;
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt]._u8[t] = 0x03;
}
void spu_interpreter::CHX(SPUThread& CPU, spu_opcode_t op)
{
const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1;
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt]._u16[t] = 0x0203;
}
void spu_interpreter::CWX(SPUThread& CPU, spu_opcode_t op)
{
const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2;
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt]._u32[t] = 0x00010203;
}
void spu_interpreter::CDX(SPUThread& CPU, spu_opcode_t op)
{
const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3;
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull;
}
@ -489,34 +489,34 @@ void spu_interpreter::SHLQBY(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::ORX(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]);
CPU.GPR[op.rt] = v128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]);
}
void spu_interpreter::CBD(SPUThread& CPU, spu_opcode_t op)
{
const s32 t = ~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xf;
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt]._u8[t] = 0x03;
}
void spu_interpreter::CHD(SPUThread& CPU, spu_opcode_t op)
{
const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1;
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt]._u16[t] = 0x0203;
}
void spu_interpreter::CWD(SPUThread& CPU, spu_opcode_t op)
{
const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2;
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt]._u32[t] = 0x00010203;
}
void spu_interpreter::CDD(SPUThread& CPU, spu_opcode_t op)
{
const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3;
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull;
}
@ -640,7 +640,7 @@ void spu_interpreter::CLGT(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::ANDC(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::andnot(CPU.GPR[op.rb], CPU.GPR[op.ra]);
CPU.GPR[op.rt] = v128::andnot(CPU.GPR[op.rb], CPU.GPR[op.ra]);
}
void spu_interpreter::FCGT(SPUThread& CPU, spu_opcode_t op)
@ -655,12 +655,12 @@ void spu_interpreter::DFCGT(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::FA(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::addfs(CPU.GPR[op.ra], CPU.GPR[op.rb]);
CPU.GPR[op.rt] = v128::addfs(CPU.GPR[op.ra], CPU.GPR[op.rb]);
}
void spu_interpreter::FS(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::subfs(CPU.GPR[op.ra], CPU.GPR[op.rb]);
CPU.GPR[op.rt] = v128::subfs(CPU.GPR[op.ra], CPU.GPR[op.rb]);
}
void spu_interpreter::FM(SPUThread& CPU, spu_opcode_t op)
@ -691,12 +691,12 @@ void spu_interpreter::DFCMGT(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::DFA(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::addfd(CPU.GPR[op.ra], CPU.GPR[op.rb]);
CPU.GPR[op.rt] = v128::addfd(CPU.GPR[op.ra], CPU.GPR[op.rb]);
}
void spu_interpreter::DFS(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::subfd(CPU.GPR[op.ra], CPU.GPR[op.rb]);
CPU.GPR[op.rt] = v128::subfd(CPU.GPR[op.ra], CPU.GPR[op.rb]);
}
void spu_interpreter::DFM(SPUThread& CPU, spu_opcode_t op)
@ -751,12 +751,12 @@ void spu_interpreter::MPYHHU(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::ADDX(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::add32(u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]), CPU.GPR[op.rt] & u128::from32p(1));
CPU.GPR[op.rt] = v128::add32(v128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]), CPU.GPR[op.rt] & v128::from32p(1));
}
void spu_interpreter::SFX(SPUThread& CPU, spu_opcode_t op)
{
CPU.GPR[op.rt] = u128::sub32(u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]), u128::andnot(CPU.GPR[op.rt], u128::from32p(1)));
CPU.GPR[op.rt] = v128::sub32(v128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]), v128::andnot(CPU.GPR[op.rt], v128::from32p(1)));
}
void spu_interpreter::CGX(SPUThread& CPU, spu_opcode_t op)
@ -976,7 +976,7 @@ void spu_interpreter::LQA(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::BRASL(SPUThread& CPU, spu_opcode_t op)
{
const u32 target = SPUOpcodes::branchTarget(0, op.i16);
CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4);
CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4);
CPU.PC = target - 4;
}
@ -993,7 +993,7 @@ void spu_interpreter::FSMBI(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::BRSL(SPUThread& CPU, spu_opcode_t op)
{
const u32 target = SPUOpcodes::branchTarget(CPU.PC, op.i16);
CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4);
CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4);
CPU.PC = target - 4;
}
@ -1197,7 +1197,7 @@ void spu_interpreter::ILA(SPUThread& CPU, spu_opcode_t op)
void spu_interpreter::SELB(SPUThread& CPU, spu_opcode_t op)
{
// rt <> rc
CPU.GPR[op.rc] = (CPU.GPR[op.rt] & CPU.GPR[op.rb]) | u128::andnot(CPU.GPR[op.rt], CPU.GPR[op.ra]);
CPU.GPR[op.rc] = (CPU.GPR[op.rt] & CPU.GPR[op.rb]) | v128::andnot(CPU.GPR[op.rt], CPU.GPR[op.ra]);
}
void spu_interpreter::SHUFB(SPUThread& CPU, spu_opcode_t op)

View File

@ -116,11 +116,11 @@ private:
}
void RDCH(u32 rt, u32 ra)
{
CPU.GPR[rt] = u128::from32r(CPU.get_ch_value(ra));
CPU.GPR[rt] = v128::from32r(CPU.get_ch_value(ra));
}
void RCHCNT(u32 rt, u32 ra)
{
CPU.GPR[rt] = u128::from32r(CPU.get_ch_count(ra));
CPU.GPR[rt] = v128::from32r(CPU.get_ch_count(ra));
}
void SF(u32 rt, u32 ra, u32 rb)
{
@ -424,7 +424,7 @@ private:
void BISL(u32 intr, u32 rt, u32 ra)
{
u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0);
CPU.GPR[rt] = u128::from32r(CPU.PC + 4);
CPU.GPR[rt] = v128::from32r(CPU.PC + 4);
LOG5_OPCODE("branch (0x%x)", target);
CPU.PC = target - 4;
@ -539,14 +539,14 @@ private:
void ROTQBYBI(u32 rt, u32 ra, u32 rb)
{
const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0xf;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
}
void ROTQMBYBI(u32 rt, u32 ra, u32 rb)
{
const int s = (0 - (CPU.GPR[rb]._u32[3] >> 3)) & 0x1f;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt].clear();
for (int b = 0; b < 16 - s; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
@ -554,7 +554,7 @@ private:
void SHLQBYBI(u32 rt, u32 ra, u32 rb)
{
const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0x1f;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt].clear();
for (int b = s; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
@ -620,7 +620,7 @@ private:
const int t = CPU.GPR[rb]._u32[3] & 0x7;
if (t) // not an optimization, it fixes shifts
{
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << t) | (temp._u32[3] >> (32 - t));
CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t));
@ -636,7 +636,7 @@ private:
const int t = (0 - CPU.GPR[rb]._u32[3]) & 0x7;
if (t) // not an optimization, it fixes shifts
{
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] >> t) | (temp._u32[1] << (32 - t));
CPU.GPR[rt]._u32[1] = (temp._u32[1] >> t) | (temp._u32[2] << (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] >> t) | (temp._u32[3] << (32 - t));
@ -652,7 +652,7 @@ private:
const int t = CPU.GPR[rb]._u32[3] & 0x7;
if (t) // not an optimization, it fixes shifts
{
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << t);
CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t));
@ -666,14 +666,14 @@ private:
void ROTQBY(u32 rt, u32 ra, u32 rb)
{
const int s = CPU.GPR[rb]._u32[3] & 0xf;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
for (int b = 0; b < 16; ++b)
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
}
void ROTQMBY(u32 rt, u32 ra, u32 rb)
{
const int s = (0 - CPU.GPR[rb]._u32[3]) & 0x1f;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt].clear();
for (int b = 0; b < 16 - s; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
@ -681,7 +681,7 @@ private:
void SHLQBY(u32 rt, u32 ra, u32 rb)
{
const int s = CPU.GPR[rb]._u32[3] & 0x1f;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt].clear();
for (int b = s; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
@ -753,7 +753,7 @@ private:
const int s = i7 & 0x7;
if (s) // not an optimization, it fixes shifts
{
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << s) | (temp._u32[3] >> (32 - s));
CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s));
@ -769,7 +769,7 @@ private:
const int s = (0 - i7) & 0x7;
if (s) // not an optimization, it fixes shifts
{
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] >> s) | (temp._u32[1] << (32 - s));
CPU.GPR[rt]._u32[1] = (temp._u32[1] >> s) | (temp._u32[2] << (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] >> s) | (temp._u32[3] << (32 - s));
@ -785,7 +785,7 @@ private:
const int s = i7 & 0x7;
if (s) // not an optimization, it fixes shifts
{
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << s);
CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s));
@ -799,14 +799,14 @@ private:
void ROTQBYI(u32 rt, u32 ra, s32 i7)
{
const int s = i7 & 0xf;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
}
void ROTQMBYI(u32 rt, u32 ra, s32 i7)
{
const int s = (0 - i7) & 0x1f;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt].clear();
for (int b = 0; b < 16 - s; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
@ -814,7 +814,7 @@ private:
void SHLQBYI(u32 rt, u32 ra, s32 i7)
{
const int s = i7 & 0x1f;
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt].clear();
for (int b = s; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
@ -849,8 +849,8 @@ private:
}
void SUMB(u32 rt, u32 ra, u32 rb)
{
const u128 _a = CPU.GPR[ra];
const u128 _b = CPU.GPR[rb];
const v128 _a = CPU.GPR[ra];
const v128 _b = CPU.GPR[rb];
for (int w = 0; w < 4; w++)
{
CPU.GPR[rt]._u16[w*2] = _a._u8[w*4] + _a._u8[w*4 + 1] + _a._u8[w*4 + 2] + _a._u8[w*4 + 3];
@ -890,7 +890,7 @@ private:
}
void CNTB(u32 rt, u32 ra)
{
const u128 temp = CPU.GPR[ra];
const v128 temp = CPU.GPR[ra];
CPU.GPR[rt].clear();
for (int b = 0; b < 16; b++)
for (int i = 0; i < 8; i++)
@ -1621,7 +1621,7 @@ private:
void BRASL(u32 rt, s32 i16)
{
u32 target = branchTarget(0, i16);
CPU.GPR[rt] = u128::from32r(CPU.PC + 4);
CPU.GPR[rt] = v128::from32r(CPU.PC + 4);
LOG5_OPCODE("branch (0x%x)", target);
CPU.PC = target - 4;
}
@ -1650,7 +1650,7 @@ private:
void BRSL(u32 rt, s32 i16)
{
u32 target = branchTarget(CPU.PC, i16);
CPU.GPR[rt] = u128::from32r(CPU.PC + 4);
CPU.GPR[rt] = v128::from32r(CPU.PC + 4);
LOG5_OPCODE("branch (0x%x)", target);
CPU.PC = target - 4;
}
@ -1873,8 +1873,8 @@ private:
}
void SHUFB(u32 rt, u32 ra, u32 rb, u32 rc)
{
const u128 _a = CPU.GPR[ra];
const u128 _b = CPU.GPR[rb];
const v128 _a = CPU.GPR[ra];
const v128 _b = CPU.GPR[rb];
for (int i = 0; i < 16; i++)
{
u8 b = CPU.GPR[rc]._u8[i];

View File

@ -34,7 +34,7 @@ public:
std::array<SPURecEntry, 0x10000> entry = {};
std::vector<u128> imm_table;
std::vector<v128> imm_table;
SPURecompilerCore(SPUThread& cpu);
@ -105,7 +105,7 @@ public:
void XmmInvalidate(const s8 reg);
void XmmFinalize(const XmmLink& var, s8 reg = -1);
void XmmRelease();
asmjit::X86Mem XmmConst(u128 data);
asmjit::X86Mem XmmConst(v128 data);
private:

View File

@ -486,7 +486,7 @@ void SPURecompiler::XmmRelease()
}
}
X86Mem SPURecompiler::XmmConst(u128 data)
X86Mem SPURecompiler::XmmConst(v128 data)
{
s32 shift = 0;
@ -494,12 +494,12 @@ X86Mem SPURecompiler::XmmConst(u128 data)
{
if (rec.imm_table[shift] == data)
{
return oword_ptr(*imm_var, shift * sizeof(u128));
return oword_ptr(*imm_var, shift * sizeof(v128));
}
}
rec.imm_table.push_back(data);
return oword_ptr(*imm_var, shift * sizeof(u128));
return oword_ptr(*imm_var, shift * sizeof(v128));
}
@ -553,7 +553,7 @@ void SPURecompiler::RDCH(u32 rt, u32 ra)
{
c.mov(cpu_dword(PC), CPU.PC);
WRAPPER_BEGIN(rt, ra, zz);
CPU->GPR[rt] = u128::from32r(CPU->get_ch_value(ra));
CPU->GPR[rt] = v128::from32r(CPU->get_ch_value(ra));
WRAPPER_END(rt, ra, 0);
// TODO
}
@ -562,7 +562,7 @@ void SPURecompiler::RCHCNT(u32 rt, u32 ra)
{
c.mov(cpu_dword(PC), CPU.PC);
WRAPPER_BEGIN(rt, ra, zz);
CPU->GPR[rt] = u128::from32r(CPU->get_ch_count(ra));
CPU->GPR[rt] = v128::from32r(CPU->get_ch_count(ra));
WRAPPER_END(rt, ra, 0);
// TODO
}
@ -603,7 +603,7 @@ void SPURecompiler::BG(u32 rt, u32 ra, u32 rb)
// compare if-greater-than
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vi = XmmAlloc();
c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000)));
c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000)));
c.pxor(va.get(), vi.get());
if (const XmmLink* vb = XmmRead(rb))
{
@ -614,7 +614,7 @@ void SPURecompiler::BG(u32 rt, u32 ra, u32 rb)
c.pxor(vi.get(), cpu_xmm(GPR[rb]));
}
c.pcmpgtd(va.get(), vi.get());
c.paddd(va.get(), XmmConst(u128::from32p(1)));
c.paddd(va.get(), XmmConst(v128::from32p(1)));
XmmFinalize(va, rt);
XmmFinalize(vi);
LOG_OPCODE();
@ -650,7 +650,7 @@ void SPURecompiler::NOR(u32 rt, u32 ra, u32 rb)
c.por(va.get(), cpu_xmm(GPR[rb]));
}
}
c.pxor(va.get(), XmmConst(u128::from32p(0xffffffff)));
c.pxor(va.get(), XmmConst(v128::from32p(0xffffffff)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -901,7 +901,7 @@ void SPURecompiler::CG(u32 rt, u32 ra, u32 rb)
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vb = XmmGet(rb);
const XmmLink& vi = XmmAlloc();
c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000)));
c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000)));
c.paddd(vb.get(), va.get());
c.pxor(va.get(), vi.get());
c.pxor(vb.get(), vi.get());
@ -940,7 +940,7 @@ void SPURecompiler::NAND(u32 rt, u32 ra, u32 rb)
{
c.pand(va.get(), cpu_xmm(GPR[rb]));
}
c.pxor(va.get(), XmmConst(u128::from32p(0xffffffff)));
c.pxor(va.get(), XmmConst(v128::from32p(0xffffffff)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -1178,7 +1178,7 @@ void SPURecompiler::HBR(u32 p, u32 ro, u32 ra)
void SPURecompiler::GB(u32 rt, u32 ra)
{
const XmmLink& va = XmmGet(ra, rt);
c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0))));
c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0))));
c.psllq(va.get(), 7);
c.pmovmskb(*addr, va.get());
c.pxor(va.get(), va.get());
@ -1190,7 +1190,7 @@ void SPURecompiler::GB(u32 rt, u32 ra)
void SPURecompiler::GBH(u32 rt, u32 ra)
{
const XmmLink& va = XmmGet(ra, rt);
c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0))));
c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0))));
c.psllq(va.get(), 7);
c.pmovmskb(*addr, va.get());
c.pxor(va.get(), va.get());
@ -1254,7 +1254,7 @@ void SPURecompiler::FREST(u32 rt, u32 ra)
void SPURecompiler::FRSQEST(u32 rt, u32 ra)
{
const XmmLink& va = XmmGet(ra, rt);
c.andps(va.get(), XmmConst(u128::from32p(0x7fffffff))); // abs
c.andps(va.get(), XmmConst(v128::from32p(0x7fffffff))); // abs
c.rsqrtps(va.get(), va.get());
XmmFinalize(va, rt);
LOG_OPCODE();
@ -1343,7 +1343,7 @@ void SPURecompiler::CBX(u32 rt, u32 ra, u32 rb)
c.not_(*addr);
c.and_(*addr, 0xf);
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
XmmFinalize(vr, rt);
XmmInvalidate(rt);
c.mov(byte_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x03);
@ -1368,7 +1368,7 @@ void SPURecompiler::CHX(u32 rt, u32 ra, u32 rb)
c.not_(*addr);
c.and_(*addr, 0xe);
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
XmmFinalize(vr, rt);
XmmInvalidate(rt);
c.mov(word_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x0203);
@ -1393,7 +1393,7 @@ void SPURecompiler::CWX(u32 rt, u32 ra, u32 rb)
c.not_(*addr);
c.and_(*addr, 0xc);
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
XmmFinalize(vr, rt);
XmmInvalidate(rt);
c.mov(dword_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x00010203);
@ -1419,10 +1419,10 @@ void SPURecompiler::CDX(u32 rt, u32 ra, u32 rb)
const XmmLink& vr = XmmAlloc(rt);
Label p1(c), p2(c);
c.jnz(p1);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f))));
c.jmp(p2);
c.bind(p1);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607))));
c.bind(p2);
XmmFinalize(vr, rt);
LOG_OPCODE();
@ -1527,7 +1527,7 @@ void SPURecompiler::CBD(u32 rt, u32 ra, s32 i7)
{
// assuming that SP % 16 is always zero
const XmmLink& vr = XmmAlloc(rt);
u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
value.u8r[i7 & 0xf] = 0x03;
c.movdqa(vr.get(), XmmConst(value));
XmmFinalize(vr, rt);
@ -1539,7 +1539,7 @@ void SPURecompiler::CBD(u32 rt, u32 ra, s32 i7)
c.not_(*addr);
c.and_(*addr, 0xf);
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
XmmFinalize(vr, rt);
XmmInvalidate(rt);
c.mov(byte_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x03);
@ -1553,7 +1553,7 @@ void SPURecompiler::CHD(u32 rt, u32 ra, s32 i7)
{
// assuming that SP % 16 is always zero
const XmmLink& vr = XmmAlloc(rt);
u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
value.u16r[(i7 >> 1) & 0x7] = 0x0203;
c.movdqa(vr.get(), XmmConst(value));
XmmFinalize(vr, rt);
@ -1565,7 +1565,7 @@ void SPURecompiler::CHD(u32 rt, u32 ra, s32 i7)
c.not_(*addr);
c.and_(*addr, 0xe);
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
XmmFinalize(vr, rt);
XmmInvalidate(rt);
c.mov(word_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x0203);
@ -1579,7 +1579,7 @@ void SPURecompiler::CWD(u32 rt, u32 ra, s32 i7)
{
// assuming that SP % 16 is always zero
const XmmLink& vr = XmmAlloc(rt);
u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
value.u32r[(i7 >> 2) & 0x3] = 0x00010203;
c.movdqa(vr.get(), XmmConst(value));
XmmFinalize(vr, rt);
@ -1591,7 +1591,7 @@ void SPURecompiler::CWD(u32 rt, u32 ra, s32 i7)
c.not_(*addr);
c.and_(*addr, 0xc);
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
XmmFinalize(vr, rt);
XmmInvalidate(rt);
c.mov(dword_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x00010203);
@ -1605,7 +1605,7 @@ void SPURecompiler::CDD(u32 rt, u32 ra, s32 i7)
{
// assuming that SP % 16 is always zero
const XmmLink& vr = XmmAlloc(rt);
u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
value.u64r[(i7 >> 3) & 0x1] = 0x0001020304050607ull;
c.movdqa(vr.get(), XmmConst(value));
XmmFinalize(vr, rt);
@ -1618,10 +1618,10 @@ void SPURecompiler::CDD(u32 rt, u32 ra, s32 i7)
const XmmLink& vr = XmmAlloc(rt);
Label p1(c), p2(c);
c.jnz(p1);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f))));
c.jmp(p2);
c.bind(p1);
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607))));
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607))));
c.bind(p2);
XmmFinalize(vr, rt);
}
@ -1746,7 +1746,7 @@ void SPURecompiler::CGTH(u32 rt, u32 ra, u32 rb)
void SPURecompiler::EQV(u32 rt, u32 ra, u32 rb)
{
const XmmLink& vb = XmmGet(rb, rt);
c.pxor(vb.get(), XmmConst(u128::from32p(0xffffffff)));
c.pxor(vb.get(), XmmConst(v128::from32p(0xffffffff)));
if (const XmmLink* va = XmmRead(ra))
{
c.pxor(vb.get(), va->read());
@ -1779,11 +1779,11 @@ void SPURecompiler::SUMB(u32 rt, u32 ra, u32 rb)
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb);
const XmmLink& vi = XmmAlloc();
c.movdqa(vi.get(), XmmConst(u128::from8p(1)));
c.movdqa(vi.get(), XmmConst(v128::from8p(1)));
c.pmaddubsw(va.get(), vi.get());
c.pmaddubsw(vb.get(), vi.get());
c.phaddw(va.get(), vb.get());
c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0))));
c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0))));
XmmFinalize(va, rt);
XmmFinalize(vb);
XmmFinalize(vi);
@ -1842,12 +1842,12 @@ void SPURecompiler::CNTB(u32 rt, u32 ra)
const XmmLink& v1 = XmmCopy(va);
const XmmLink& vm = XmmAlloc();
c.psrlq(v1.get(), 4);
c.movdqa(vm.get(), XmmConst(u128::from8p(0xf)));
c.movdqa(vm.get(), XmmConst(v128::from8p(0xf)));
c.pand(va.get(), vm.get());
c.pand(v1.get(), vm.get());
c.movdqa(vm.get(), XmmConst(u128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0))));
c.movdqa(vm.get(), XmmConst(v128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0))));
c.pshufb(vm.get(), va.get());
c.movdqa(va.get(), XmmConst(u128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0))));
c.movdqa(va.get(), XmmConst(v128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0))));
c.pshufb(va.get(), v1.get());
c.paddb(va.get(), vm.get());
XmmFinalize(va, rt);
@ -1870,7 +1870,7 @@ void SPURecompiler::CLGT(u32 rt, u32 ra, u32 rb)
// compare if-greater-than
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vi = XmmAlloc();
c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000)));
c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000)));
c.pxor(va.get(), vi.get());
if (const XmmLink* vb = XmmRead(rb))
{
@ -1973,7 +1973,7 @@ void SPURecompiler::CLGTH(u32 rt, u32 ra, u32 rb)
// compare if-greater-than
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vi = XmmAlloc();
c.movdqa(vi.get(), XmmConst(u128::from16p(0x8000)));
c.movdqa(vi.get(), XmmConst(v128::from16p(0x8000)));
c.pxor(va.get(), vi.get());
if (const XmmLink* vb = XmmRead(rb))
{
@ -1992,7 +1992,7 @@ void SPURecompiler::CLGTH(u32 rt, u32 ra, u32 rb)
void SPURecompiler::ORC(u32 rt, u32 ra, u32 rb)
{
const XmmLink& vb = XmmGet(rb, rt);
c.pxor(vb.get(), XmmConst(u128::from32p(0xffffffff)));
c.pxor(vb.get(), XmmConst(v128::from32p(0xffffffff)));
if (const XmmLink* va = XmmRead(ra))
{
c.por(vb.get(), va->read());
@ -2010,7 +2010,7 @@ void SPURecompiler::FCMGT(u32 rt, u32 ra, u32 rb)
// reverted less-than
const XmmLink& vb = XmmGet(rb, rt);
const XmmLink& vi = XmmAlloc();
c.movaps(vi.get(), XmmConst(u128::from32p(0x7fffffff)));
c.movaps(vi.get(), XmmConst(v128::from32p(0x7fffffff)));
c.andps(vb.get(), vi.get()); // abs
if (const XmmLink* va = XmmRead(ra))
{
@ -2081,7 +2081,7 @@ void SPURecompiler::CLGTB(u32 rt, u32 ra, u32 rb)
// compare if-greater-than
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vi = XmmAlloc();
c.movdqa(vi.get(), XmmConst(u128::from8p(0x80)));
c.movdqa(vi.get(), XmmConst(v128::from8p(0x80)));
c.pxor(va.get(), vi.get());
if (const XmmLink* vb = XmmRead(rb))
{
@ -2177,7 +2177,7 @@ void SPURecompiler::MPYHHU(u32 rt, u32 ra, u32 rb)
const XmmLink& va2 = XmmCopy(va);
c.pmulhuw(va.get(), vb.get());
c.pmullw(va2.get(), vb.get());
c.pand(va.get(), XmmConst(u128::from32p(0xffff0000)));
c.pand(va.get(), XmmConst(v128::from32p(0xffff0000)));
c.psrld(va2.get(), 16);
c.por(va.get(), va2.get());
XmmFinalize(va, rt);
@ -2189,7 +2189,7 @@ void SPURecompiler::MPYHHU(u32 rt, u32 ra, u32 rb)
void SPURecompiler::ADDX(u32 rt, u32 ra, u32 rb)
{
const XmmLink& vt = XmmGet(rt);
c.pand(vt.get(), XmmConst(u128::from32p(1)));
c.pand(vt.get(), XmmConst(v128::from32p(1)));
c.paddd(vt.get(), cpu_xmm(GPR[ra]));
c.paddd(vt.get(), cpu_xmm(GPR[rb]));
XmmFinalize(vt, rt);
@ -2200,7 +2200,7 @@ void SPURecompiler::SFX(u32 rt, u32 ra, u32 rb)
{
const XmmLink& vt = XmmGet(rt);
const XmmLink& vb = XmmGet(rb, rt);
c.pandn(vt.get(), XmmConst(u128::from32p(1)));
c.pandn(vt.get(), XmmConst(v128::from32p(1)));
c.psubd(vb.get(), cpu_xmm(GPR[ra]));
c.psubd(vb.get(), vt.get());
XmmFinalize(vb, rt);
@ -2252,7 +2252,7 @@ void SPURecompiler::MPYHHAU(u32 rt, u32 ra, u32 rb)
const XmmLink& va2 = XmmCopy(va);
c.pmulhuw(va.get(), vb.get());
c.pmullw(va2.get(), vb.get());
c.pand(va.get(), XmmConst(u128::from32p(0xffff0000)));
c.pand(va.get(), XmmConst(v128::from32p(0xffff0000)));
c.psrld(va2.get(), 16);
c.paddd(vt.get(), va.get());
c.paddd(vt.get(), va2.get());
@ -2327,7 +2327,7 @@ void SPURecompiler::MPY(u32 rt, u32 ra, u32 rb)
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb);
const XmmLink& vi = XmmAlloc();
c.movdqa(vi.get(), XmmConst(u128::from32p(0xffff)));
c.movdqa(vi.get(), XmmConst(v128::from32p(0xffff)));
c.pand(va.get(), vi.get());
c.pand(vb.get(), vi.get());
c.pmaddwd(va.get(), vb.get());
@ -2392,7 +2392,7 @@ void SPURecompiler::FCMEQ(u32 rt, u32 ra, u32 rb)
{
const XmmLink& vb = XmmGet(rb, rt);
const XmmLink& vi = XmmAlloc();
c.movaps(vi.get(), XmmConst(u128::from32p(0x7fffffff)));
c.movaps(vi.get(), XmmConst(v128::from32p(0x7fffffff)));
c.andps(vb.get(), vi.get()); // abs
if (const XmmLink* va = XmmRead(ra))
{
@ -2421,7 +2421,7 @@ void SPURecompiler::MPYU(u32 rt, u32 ra, u32 rb)
c.pmulhuw(va.get(), vb.get());
c.pmullw(va2.get(), vb.get());
c.pslld(va.get(), 16);
c.pand(va2.get(), XmmConst(u128::from32p(0xffff)));
c.pand(va2.get(), XmmConst(v128::from32p(0xffff)));
c.por(va.get(), va2.get());
XmmFinalize(va, rt);
XmmFinalize(vb);
@ -2468,10 +2468,10 @@ void SPURecompiler::CFLTS(u32 rt, u32 ra, s32 i8)
const XmmLink& va = XmmGet(ra, rt);
if (i8 != 173)
{
c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast<float>(173 - (i8 & 0xff))))))); // scale
c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast<float>(173 - (i8 & 0xff))))))); // scale
}
const XmmLink& vi = XmmAlloc();
c.movaps(vi.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31)))));
c.movaps(vi.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31)))));
c.cmpps(vi.get(), va.get(), 2);
c.cvttps2dq(va.get(), va.get()); // convert to ints with truncation
c.pxor(va.get(), vi.get()); // fix result saturation (0x80000000 -> 0x7fffffff)
@ -2485,18 +2485,18 @@ void SPURecompiler::CFLTU(u32 rt, u32 ra, s32 i8)
const XmmLink& va = XmmGet(ra, rt);
if (i8 != 173)
{
c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast<float>(173 - (i8 & 0xff))))))); // scale
c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast<float>(173 - (i8 & 0xff))))))); // scale
}
c.maxps(va.get(), XmmConst({})); // saturate
const XmmLink& vs = XmmCopy(va); // copy scaled value
const XmmLink& vs2 = XmmCopy(va);
const XmmLink& vs3 = XmmAlloc();
c.movaps(vs3.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31)))));
c.movaps(vs3.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31)))));
c.subps(vs2.get(), vs3.get());
c.cmpps(vs3.get(), vs.get(), 2);
c.andps(vs2.get(), vs3.get());
c.cvttps2dq(va.get(), va.get());
c.cmpps(vs.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(32)))), 5);
c.cmpps(vs.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(32)))), 5);
c.cvttps2dq(vs2.get(), vs2.get());
c.por(va.get(), vs.get());
c.por(va.get(), vs2.get());
@ -2513,7 +2513,7 @@ void SPURecompiler::CSFLT(u32 rt, u32 ra, s32 i8)
c.cvtdq2ps(va.get(), va.get()); // convert to floats
if (i8 != 155)
{
c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast<float>((i8 & 0xff) - 155)))))); // scale
c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast<float>((i8 & 0xff) - 155)))))); // scale
}
XmmFinalize(va, rt);
LOG_OPCODE();
@ -2523,14 +2523,14 @@ void SPURecompiler::CUFLT(u32 rt, u32 ra, s32 i8)
{
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& v1 = XmmCopy(va);
c.pand(va.get(), XmmConst(u128::from32p(0x7fffffff)));
c.pand(va.get(), XmmConst(v128::from32p(0x7fffffff)));
c.cvtdq2ps(va.get(), va.get()); // convert to floats
c.psrad(v1.get(), 31); // generate mask from sign bit
c.andps(v1.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31))))); // generate correction component
c.andps(v1.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31))))); // generate correction component
c.addps(va.get(), v1.get()); // add correction component
if (i8 != 155)
{
c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast<float>((i8 & 0xff) - 155)))))); // scale
c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast<float>((i8 & 0xff) - 155)))))); // scale
}
XmmFinalize(va, rt);
XmmFinalize(v1);
@ -2746,7 +2746,7 @@ void SPURecompiler::IL(u32 rt, s32 i16)
}
else
{
c.movdqa(vr.get(), XmmConst(u128::from32p(i16)));
c.movdqa(vr.get(), XmmConst(v128::from32p(i16)));
}
XmmFinalize(vr, rt);
LOG_OPCODE();
@ -2755,7 +2755,7 @@ void SPURecompiler::IL(u32 rt, s32 i16)
void SPURecompiler::ILHU(u32 rt, s32 i16)
{
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::from32p(i16 << 16)));
c.movdqa(vr.get(), XmmConst(v128::from32p(i16 << 16)));
XmmFinalize(vr, rt);
LOG_OPCODE();
}
@ -2763,7 +2763,7 @@ void SPURecompiler::ILHU(u32 rt, s32 i16)
void SPURecompiler::ILH(u32 rt, s32 i16)
{
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::from32p(i16)));
c.movdqa(vr.get(), XmmConst(v128::from32p(i16)));
XmmFinalize(vr, rt);
LOG_OPCODE();
}
@ -2771,7 +2771,7 @@ void SPURecompiler::ILH(u32 rt, s32 i16)
void SPURecompiler::IOHL(u32 rt, s32 i16)
{
const XmmLink& vt = XmmGet(rt, rt);
c.por(vt.get(), XmmConst(u128::from32p(i16 & 0xffff)));
c.por(vt.get(), XmmConst(v128::from32p(i16 & 0xffff)));
XmmFinalize(vt, rt);
LOG_OPCODE();
}
@ -2798,7 +2798,7 @@ void SPURecompiler::ORI(u32 rt, u32 ra, s32 i10)
else
{
const XmmLink& va = XmmGet(ra, rt);
c.por(va.get(), XmmConst(u128::from32p(i10)));
c.por(va.get(), XmmConst(v128::from32p(i10)));
XmmFinalize(va, rt);
}
LOG_OPCODE();
@ -2807,7 +2807,7 @@ void SPURecompiler::ORI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::ORHI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra, rt);
c.por(va.get(), XmmConst(u128::from16p(i10)));
c.por(va.get(), XmmConst(v128::from16p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2815,7 +2815,7 @@ void SPURecompiler::ORHI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::ORBI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra, rt);
c.por(va.get(), XmmConst(u128::from8p(i10)));
c.por(va.get(), XmmConst(v128::from8p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2841,7 +2841,7 @@ void SPURecompiler::SFI(u32 rt, u32 ra, s32 i10)
else
{
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::from32p(i10)));
c.movdqa(vr.get(), XmmConst(v128::from32p(i10)));
c.psubd(vr.get(), cpu_xmm(GPR[ra]));
XmmFinalize(vr, rt);
}
@ -2869,7 +2869,7 @@ void SPURecompiler::SFHI(u32 rt, u32 ra, s32 i10)
else
{
const XmmLink& vr = XmmAlloc(rt);
c.movdqa(vr.get(), XmmConst(u128::from16p(i10)));
c.movdqa(vr.get(), XmmConst(v128::from16p(i10)));
c.psubw(vr.get(), cpu_xmm(GPR[ra]));
XmmFinalize(vr, rt);
}
@ -2879,7 +2879,7 @@ void SPURecompiler::SFHI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::ANDI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra, rt);
c.pand(va.get(), XmmConst(u128::from32p(i10)));
c.pand(va.get(), XmmConst(v128::from32p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2887,7 +2887,7 @@ void SPURecompiler::ANDI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::ANDHI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra, rt);
c.pand(va.get(), XmmConst(u128::from16p(i10)));
c.pand(va.get(), XmmConst(v128::from16p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2895,7 +2895,7 @@ void SPURecompiler::ANDHI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::ANDBI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra, rt);
c.pand(va.get(), XmmConst(u128::from8p(i10)));
c.pand(va.get(), XmmConst(v128::from8p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2904,7 +2904,7 @@ void SPURecompiler::AI(u32 rt, u32 ra, s32 i10)
{
// add
const XmmLink& va = XmmGet(ra, rt);
c.paddd(va.get(), XmmConst(u128::from32p(i10)));
c.paddd(va.get(), XmmConst(v128::from32p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2913,7 +2913,7 @@ void SPURecompiler::AHI(u32 rt, u32 ra, s32 i10)
{
// add
const XmmLink& va = XmmGet(ra, rt);
c.paddw(va.get(), XmmConst(u128::from16p(i10)));
c.paddw(va.get(), XmmConst(v128::from16p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2965,7 +2965,7 @@ void SPURecompiler::LQD(u32 rt, s32 i10, u32 ra) // i10 is shifted left by 4 whi
void SPURecompiler::XORI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pxor(va.get(), XmmConst(u128::from32p(i10)));
c.pxor(va.get(), XmmConst(v128::from32p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2973,7 +2973,7 @@ void SPURecompiler::XORI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::XORHI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pxor(va.get(), XmmConst(u128::from16p(i10)));
c.pxor(va.get(), XmmConst(v128::from16p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2981,7 +2981,7 @@ void SPURecompiler::XORHI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::XORBI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pxor(va.get(), XmmConst(u128::from8p(i10)));
c.pxor(va.get(), XmmConst(v128::from8p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2989,7 +2989,7 @@ void SPURecompiler::XORBI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CGTI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pcmpgtd(va.get(), XmmConst(u128::from32p(i10)));
c.pcmpgtd(va.get(), XmmConst(v128::from32p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -2997,7 +2997,7 @@ void SPURecompiler::CGTI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CGTHI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pcmpgtw(va.get(), XmmConst(u128::from16p(i10)));
c.pcmpgtw(va.get(), XmmConst(v128::from16p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3005,7 +3005,7 @@ void SPURecompiler::CGTHI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CGTBI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pcmpgtb(va.get(), XmmConst(u128::from8p(i10)));
c.pcmpgtb(va.get(), XmmConst(v128::from8p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3025,8 +3025,8 @@ void SPURecompiler::HGTI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CLGTI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pxor(va.get(), XmmConst(u128::from32p(0x80000000)));
c.pcmpgtd(va.get(), XmmConst(u128::from32p((u32)i10 - 0x80000000)));
c.pxor(va.get(), XmmConst(v128::from32p(0x80000000)));
c.pcmpgtd(va.get(), XmmConst(v128::from32p((u32)i10 - 0x80000000)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3034,8 +3034,8 @@ void SPURecompiler::CLGTI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CLGTHI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pxor(va.get(), XmmConst(u128::from16p(0x8000)));
c.pcmpgtw(va.get(), XmmConst(u128::from16p((u16)i10 - 0x8000)));
c.pxor(va.get(), XmmConst(v128::from16p(0x8000)));
c.pcmpgtw(va.get(), XmmConst(v128::from16p((u16)i10 - 0x8000)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3043,8 +3043,8 @@ void SPURecompiler::CLGTHI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CLGTBI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.psubb(va.get(), XmmConst(u128::from8p(0x80)));
c.pcmpgtb(va.get(), XmmConst(u128::from8p((s8)i10 - 0x80)));
c.psubb(va.get(), XmmConst(v128::from8p(0x80)));
c.pcmpgtb(va.get(), XmmConst(v128::from8p((s8)i10 - 0x80)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3064,7 +3064,7 @@ void SPURecompiler::HLGTI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::MPYI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra, rt);
c.pmaddwd(va.get(), XmmConst(u128::from32p(i10 & 0xffff)));
c.pmaddwd(va.get(), XmmConst(v128::from32p(i10 & 0xffff)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3074,7 +3074,7 @@ void SPURecompiler::MPYUI(u32 rt, u32 ra, s32 i10)
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vi = XmmAlloc();
const XmmLink& va2 = XmmCopy(va);
c.movdqa(vi.get(), XmmConst(u128::from32p(i10 & 0xffff)));
c.movdqa(vi.get(), XmmConst(v128::from32p(i10 & 0xffff)));
c.pmulhuw(va.get(), vi.get());
c.pmullw(va2.get(), vi.get());
c.pslld(va.get(), 16);
@ -3088,7 +3088,7 @@ void SPURecompiler::MPYUI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CEQI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pcmpeqd(va.get(), XmmConst(u128::from32p(i10)));
c.pcmpeqd(va.get(), XmmConst(v128::from32p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3096,7 +3096,7 @@ void SPURecompiler::CEQI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CEQHI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pcmpeqw(va.get(), XmmConst(u128::from16p(i10)));
c.pcmpeqw(va.get(), XmmConst(v128::from16p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3104,7 +3104,7 @@ void SPURecompiler::CEQHI(u32 rt, u32 ra, s32 i10)
void SPURecompiler::CEQBI(u32 rt, u32 ra, s32 i10)
{
const XmmLink& va = XmmGet(ra);
c.pcmpeqb(va.get(), XmmConst(u128::from8p(i10)));
c.pcmpeqb(va.get(), XmmConst(v128::from8p(i10)));
XmmFinalize(va, rt);
LOG_OPCODE();
}
@ -3141,7 +3141,7 @@ void SPURecompiler::ILA(u32 rt, u32 i18)
}
else
{
c.movdqa(vr.get(), XmmConst(u128::from32p(i18 & 0x3ffff)));
c.movdqa(vr.get(), XmmConst(v128::from32p(i18 & 0x3ffff)));
}
XmmFinalize(vr, rt);
LOG_OPCODE();
@ -3168,11 +3168,11 @@ void SPURecompiler::SHUFB(u32 rt, u32 ra, u32 rb, u32 rc)
const XmmLink& v4 = XmmAlloc();
const XmmLink& vFF = XmmAlloc(rt);
// generate specific values:
c.movdqa(v1.get(), XmmConst(u128::from8p(0xe0))); // v1 = 11100000
c.movdqa(v3.get(), XmmConst(u128::from8p(0x80))); // v3 = 10000000
c.movdqa(v1.get(), XmmConst(v128::from8p(0xe0))); // v1 = 11100000
c.movdqa(v3.get(), XmmConst(v128::from8p(0x80))); // v3 = 10000000
c.pand(v2.get(), v1.get()); // filter mask v2 = mask & 11100000
c.movdqa(vFF.get(), v2.get()); // and copy vFF = mask & 11100000
c.movdqa(v4.get(), XmmConst(u128::from8p(0xc0))); // v4 = 11000000
c.movdqa(v4.get(), XmmConst(v128::from8p(0xc0))); // v4 = 11000000
c.pcmpeqb(vFF.get(), v4.get()); // gen 0xff vFF = (mask & 11100000 == 11000000) ? 0xff : 0
c.movdqa(v4.get(), v2.get()); // copy again v4 = mask & 11100000
c.pand(v4.get(), v3.get()); // filter mask v4 = mask & 10000000
@ -3182,13 +3182,13 @@ void SPURecompiler::SHUFB(u32 rt, u32 ra, u32 rb, u32 rc)
c.por(vFF.get(), v2.get()); // merge 0xff, 0x80 vFF = (mask & 11100000 == 11000000) ? 0xff : (mask & 11100000 == 11100000) ? 0x80 : 0
c.pandn(v1.get(), v0.get()); // filter mask v1 = mask & 00011111
// select bytes from [rb]:
c.movdqa(v2.get(), XmmConst(u128::from8p(0x0f))); // v2 = 00001111
c.pxor(v1.get(), XmmConst(u128::from8p(0x10))); // v1 = (mask & 00011111) ^ 00010000
c.movdqa(v2.get(), XmmConst(v128::from8p(0x0f))); // v2 = 00001111
c.pxor(v1.get(), XmmConst(v128::from8p(0x10))); // v1 = (mask & 00011111) ^ 00010000
c.psubb(v2.get(), v1.get()); // v2 = 00001111 - ((mask & 00011111) ^ 00010000)
c.movdqa(v1.get(), cpu_xmm(GPR[rb])); // v1 = rb
c.pshufb(v1.get(), v2.get()); // v1 = select(rb, 00001111 - ((mask & 00011111) ^ 00010000))
// select bytes from [ra]:
c.pxor(v2.get(), XmmConst(u128::from8p(0xf0))); // v2 = (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000
c.pxor(v2.get(), XmmConst(v128::from8p(0xf0))); // v2 = (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000
c.movdqa(v3.get(), cpu_xmm(GPR[ra])); // v3 = ra
c.pshufb(v3.get(), v2.get()); // v3 = select(ra, (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000)
c.por(v1.get(), v3.get()); // v1 = select(rb, 00001111 - ((mask & 00011111) ^ 00010000)) | (v3)
@ -3208,7 +3208,7 @@ void SPURecompiler::MPYA(u32 rt, u32 ra, u32 rb, u32 rc)
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& vb = XmmGet(rb);
const XmmLink& vi = XmmAlloc();
c.movdqa(vi.get(), XmmConst(u128::from32p(0xffff)));
c.movdqa(vi.get(), XmmConst(v128::from32p(0xffff)));
c.pand(va.get(), vi.get());
c.pand(vb.get(), vi.get());
c.pmaddwd(va.get(), vb.get());

View File

@ -360,13 +360,13 @@ struct spu_int_ctrl_t
struct g_spu_imm_table_t
{
u128 fsmb[65536]; // table for FSMB, FSMBI instructions
u128 fsmh[256]; // table for FSMH instruction
u128 fsm[16]; // table for FSM instruction
v128 fsmb[65536]; // table for FSMB, FSMBI instructions
v128 fsmh[256]; // table for FSMH instruction
v128 fsm[16]; // table for FSM instruction
u128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions
u128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions
u128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions
v128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions
v128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions
v128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions
class scale_table_t
{
@ -475,7 +475,7 @@ public:
memset(this, 0, sizeof(*this));
}
//slice -> 0 - 1 (double-precision slice index)
//NOTE: slices follow u128 indexing, i.e. slice 0 is RIGHT end of register!
//NOTE: slices follow v128 indexing, i.e. slice 0 is RIGHT end of register!
//roundTo -> FPSCR_RN_*
void setSliceRounding(u8 slice, u8 roundTo)
{
@ -523,7 +523,7 @@ public:
}
// Write the FPSCR
void Write(const u128 & r)
void Write(const v128 & r)
{
_u32[3] = r._u32[3] & 0x00000F07;
_u32[2] = r._u32[2] & 0x00003F07;
@ -532,7 +532,7 @@ public:
}
// Read the FPSCR
void Read(u128 & r)
void Read(v128 & r)
{
r._u32[3] = _u32[3];
r._u32[2] = _u32[2];
@ -544,7 +544,7 @@ public:
class SPUThread : public CPUThread
{
public:
u128 GPR[128]; // General-Purpose Registers
v128 GPR[128]; // General-Purpose Registers
SPU_FPSCR FPSCR;
std::unordered_map<u32, std::function<bool(SPUThread& SPU)>> m_addr_to_hle_function_map;
@ -643,18 +643,18 @@ public:
u16 read16(u32 lsa) const { return vm::ps3::read16(lsa + offset); }
u32 read32(u32 lsa) const { return vm::ps3::read32(lsa + offset); }
u64 read64(u32 lsa) const { return vm::ps3::read64(lsa + offset); }
u128 read128(u32 lsa) const { return vm::ps3::read128(lsa + offset); }
v128 read128(u32 lsa) const { return vm::ps3::read128(lsa + offset); }
void write8(u32 lsa, u8 data) const { vm::write8(lsa + offset, data); }
void write16(u32 lsa, u16 data) const { vm::ps3::write16(lsa + offset, data); }
void write32(u32 lsa, u32 data) const { vm::ps3::write32(lsa + offset, data); }
void write64(u32 lsa, u64 data) const { vm::ps3::write64(lsa + offset, data); }
void write128(u32 lsa, u128 data) const { vm::ps3::write128(lsa + offset, data); }
void write128(u32 lsa, v128 data) const { vm::ps3::write128(lsa + offset, data); }
void write16(u32 lsa, be_t<u16> data) const { vm::ps3::write16(lsa + offset, data); }
void write32(u32 lsa, be_t<u32> data) const { vm::ps3::write32(lsa + offset, data); }
void write64(u32 lsa, be_t<u64> data) const { vm::ps3::write64(lsa + offset, data); }
void write128(u32 lsa, be_t<u128> data) const { vm::ps3::write128(lsa + offset, data); }
void write128(u32 lsa, be_t<v128> data) const { vm::ps3::write128(lsa + offset, data); }
void RegisterHleFunction(u32 addr, std::function<bool(SPUThread & SPU)> function)
{

View File

@ -27,7 +27,7 @@ template<typename T> struct _to_atomic_subtype<T, 8>
template<typename T> struct _to_atomic_subtype<T, 16>
{
using type = u128;
using type = v128;
};
template<typename T> using atomic_subtype_t = typename _to_atomic_subtype<T>::type;
@ -127,7 +127,7 @@ private:
data = value;
}
force_inline static void write_relaxed(volatile u128& data, const u128& value)
force_inline static void write_relaxed(volatile v128& data, const v128& value)
{
sync_lock_test_and_set(&data, value);
}
@ -137,9 +137,9 @@ private:
return data;
}
force_inline static u128 read_relaxed(const volatile u128& value)
force_inline static v128 read_relaxed(const volatile v128& value)
{
return sync_val_compare_and_swap(const_cast<volatile u128*>(&value), {}, {});
return sync_val_compare_and_swap(const_cast<volatile v128*>(&value), {}, {});
}
public:

View File

@ -339,14 +339,14 @@ namespace vm
get_ref<be_t<u64>>(addr) = value;
}
inline const be_t<u128>& read128(u32 addr)
inline const be_t<v128>& read128(u32 addr)
{
return get_ref<const be_t<u128>>(addr);
return get_ref<const be_t<v128>>(addr);
}
inline void write128(u32 addr, be_t<u128> value)
inline void write128(u32 addr, be_t<v128> value)
{
get_ref<be_t<u128>>(addr) = value;
get_ref<be_t<v128>>(addr) = value;
}
}
@ -384,14 +384,14 @@ namespace vm
get_ref<le_t<u64>>(addr) = value;
}
inline const le_t<u128>& read128(u32 addr)
inline const le_t<v128>& read128(u32 addr)
{
return get_ref<const le_t<u128>>(addr);
return get_ref<const le_t<v128>>(addr);
}
inline void write128(u32 addr, le_t<u128> value)
inline void write128(u32 addr, le_t<v128> value)
{
get_ref<le_t<u128>>(addr) = value;
get_ref<le_t<v128>>(addr) = value;
}
}

View File

@ -47,7 +47,7 @@ namespace cb_detail
template<typename T, int g_count, int f_count, int v_count>
struct _func_arg<T, ARG_VECTOR, g_count, f_count, v_count>
{
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid callback argument type for ARG_VECTOR");
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid callback argument type for ARG_VECTOR");
force_inline static void set_value(PPUThread& CPU, const T& arg)
{
@ -91,7 +91,7 @@ namespace cb_detail
force_inline static bool _bind_func_args(PPUThread& CPU, T1 arg1, T... args)
{
const bool is_float = std::is_floating_point<T1>::value;
const bool is_vector = std::is_same<std::remove_cv_t<T1>, u128>::value;
const bool is_vector = std::is_same<std::remove_cv_t<T1>, v128>::value;
const bool is_context = std::is_same<T1, PPUThread&>::value;
const bool is_general = !is_float && !is_vector && !is_context;
@ -138,7 +138,7 @@ namespace cb_detail
template<typename T>
struct _func_res<T, ARG_VECTOR>
{
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid callback result type for ARG_VECTOR");
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid callback result type for ARG_VECTOR");
force_inline static T get_value(const PPUThread& CPU)
{
@ -156,7 +156,7 @@ namespace cb_detail
static_assert(!std::is_pointer<RT>::value, "Invalid callback result type (pointer)");
static_assert(!std::is_reference<RT>::value, "Invalid callback result type (reference)");
const bool is_float = std::is_floating_point<RT>::value;
const bool is_vector = std::is_same<std::remove_cv_t<RT>, u128>::value;
const bool is_vector = std::is_same<std::remove_cv_t<RT>, v128>::value;
const _func_arg_type t = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL);
return _func_res<RT, t>::get_value(CPU);

View File

@ -3536,7 +3536,7 @@ s32 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
alloc_ls_blocks = size > 0x3D400 ? 0x7A : ((size - 0x400) >> 11);
if (ls_pattern)
{
u128 ls_pattern_128 = u128::from64r(ls_pattern->_u64[0], ls_pattern->_u64[1]);
v128 ls_pattern_128 = v128::from64r(ls_pattern->_u64[0], ls_pattern->_u64[1]);
u32 ls_blocks = 0;
for (auto i = 0; i < 128; i++)
{
@ -3551,8 +3551,8 @@ s32 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
return CELL_SPURS_TASK_ERROR_INVAL;
}
u128 _0 = u128::from32(0);
if ((ls_pattern_128 & u128::from32r(0xFC000000)) != _0)
v128 _0 = v128::from32(0);
if ((ls_pattern_128 & v128::from32r(0xFC000000)) != _0)
{
// Prevent save/restore to SPURS management area
return CELL_SPURS_TASK_ERROR_INVAL;
@ -3666,7 +3666,7 @@ s32 _cellSpursSendSignal(PPUThread& ppu, vm::ptr<CellSpursTaskset> taskset, u32
return CELL_SPURS_TASK_ERROR_INVAL;
}
be_t<u128> _0(u128::from32(0));
be_t<v128> _0(v128::from32(0));
bool disabled = taskset->enabled.value()._bit[taskId];
auto invalid = (taskset->ready & taskset->pending_ready) != _0 || (taskset->running & taskset->waiting) != _0 || disabled ||
((taskset->running | taskset->ready | taskset->pending_ready | taskset->waiting | taskset->signalled) & ~taskset->enabled) != _0;
@ -3676,7 +3676,7 @@ s32 _cellSpursSendSignal(PPUThread& ppu, vm::ptr<CellSpursTaskset> taskset, u32
return CELL_SPURS_TASK_ERROR_SRCH;
}
auto shouldSignal = ((taskset->waiting & ~taskset->signalled) & be_t<u128>(u128::fromBit(taskId))) != _0 ? true : false;
auto shouldSignal = ((taskset->waiting & ~taskset->signalled) & be_t<v128>(v128::fromBit(taskId))) != _0 ? true : false;
auto signalled = taskset->signalled.value();
signalled._bit[taskId] = true;
taskset->signalled = signalled;

View File

@ -746,12 +746,12 @@ struct set_alignment(128) CellSpursTaskset
CHECK_SIZE(TaskInfo, 48);
be_t<u128> running; // 0x00
be_t<u128> ready; // 0x10
be_t<u128> pending_ready; // 0x20
be_t<u128> enabled; // 0x30
be_t<u128> signalled; // 0x40
be_t<u128> waiting; // 0x50
be_t<v128> running; // 0x00
be_t<v128> ready; // 0x10
be_t<v128> pending_ready; // 0x20
be_t<v128> enabled; // 0x30
be_t<v128> signalled; // 0x40
be_t<v128> waiting; // 0x50
vm::bptr<CellSpurs, u64> spurs; // 0x60
be_t<u64> args; // 0x68
u8 enable_clear_ls; // 0x70
@ -806,7 +806,7 @@ struct set_alignment(128) CellSpursTaskset2
u32 event_flag_id1; // 0x1898
u32 event_flag_id2; // 0x189C
u8 unk3[0x1980 - 0x18A0]; // 0x18A0
be_t<u128> task_exit_code[128]; // 0x1980
be_t<v128> task_exit_code[128]; // 0x1980
u8 unk4[0x2900 - 0x2180]; // 0x2180
};
@ -894,10 +894,10 @@ struct SpursTasksetContext
u8 x27D8[0x2840 - 0x27D8]; // 0x27D8
u8 moduleId[16]; // 0x2840
u8 stackArea[0x2C80 - 0x2850]; // 0x2850
be_t<u128> savedContextLr; // 0x2C80
be_t<u128> savedContextSp; // 0x2C90
be_t<u128> savedContextR80ToR127[48]; // 0x2CA0
be_t<u128> savedContextFpscr; // 0x2FA0
be_t<v128> savedContextLr; // 0x2C80
be_t<v128> savedContextSp; // 0x2C90
be_t<v128> savedContextR80ToR127[48]; // 0x2CA0
be_t<v128> savedContextFpscr; // 0x2FA0
be_t<u32> savedWriteTagGroupQueryMask; // 0x2FB0
be_t<u32> savedSpuWriteEventMask; // 0x2FB4
be_t<u32> tasksetMgmtAddr; // 0x2FB8

View File

@ -1162,7 +1162,7 @@ void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
auto taskset = vm::get_ptr<CellSpursTaskset>(spu.offset + 0x2700);
spu.GPR[2].clear();
spu.GPR[3] = u128::from64r(taskArgs._u64[0], taskArgs._u64[1]);
spu.GPR[3] = v128::from64r(taskArgs._u64[0], taskArgs._u64[1]);
spu.GPR[4]._u64[1] = taskset->args;
spu.GPR[4]._u64[0] = taskset->spurs.addr();
for (auto i = 5; i < 128; i++) {
@ -1183,7 +1183,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
auto taskset = ctxt->taskset.priv_ptr();
// Verify taskset state is valid
be_t<u128> _0(u128::from32(0));
be_t<v128> _0(v128::from32(0));
if ((taskset->waiting & taskset->running) != _0 || (taskset->ready & taskset->pending_ready) != _0 ||
((taskset->running | taskset->ready | taskset->pending_ready | taskset->signalled | taskset->waiting) & ~taskset->enabled) != _0) {
assert(!"Invalid taskset state");
@ -1199,13 +1199,13 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
}
}
u128 readyButNotRunning;
v128 readyButNotRunning;
u8 selectedTaskId;
u128 running = taskset->running.value();
u128 waiting = taskset->waiting.value();
u128 enabled = taskset->enabled.value();
u128 signalled = (taskset->signalled & (taskset->ready | taskset->pending_ready));
u128 ready = (taskset->signalled | taskset->ready | taskset->pending_ready);
v128 running = taskset->running.value();
v128 waiting = taskset->waiting.value();
v128 enabled = taskset->enabled.value();
v128 signalled = (taskset->signalled & (taskset->ready | taskset->pending_ready));
v128 ready = (taskset->signalled | taskset->ready | taskset->pending_ready);
switch (request) {
case SPURS_TASKSET_REQUEST_POLL_SIGNAL:
@ -1235,7 +1235,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
case SPURS_TASKSET_REQUEST_POLL:
readyButNotRunning = ready & ~running;
if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->wkl_flag_wait_task));
readyButNotRunning = readyButNotRunning & ~(v128::fromBit(taskset->wkl_flag_wait_task));
}
rc = readyButNotRunning != _0 ? 1 : 0;
@ -1260,7 +1260,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
case SPURS_TASKSET_REQUEST_SELECT_TASK:
readyButNotRunning = ready & ~running;
if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->wkl_flag_wait_task));
readyButNotRunning = readyButNotRunning & ~(v128::fromBit(taskset->wkl_flag_wait_task));
}
// Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness.
@ -1402,7 +1402,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
u32 lsBlocks = 0;
u128 ls_pattern = u128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
for (auto i = 0; i < 128; i++) {
if (ls_pattern._bit[i]) {
lsBlocks++;
@ -1421,7 +1421,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
}
// Get the processor context
u128 r;
v128 r;
spu.FPSCR.Read(r);
ctxt->savedContextFpscr = r;
ctxt->savedSpuWriteEventMask = spu.get_ch_value(SPU_RdEventMask);
@ -1486,7 +1486,7 @@ void spursTasksetDispatch(SPUThread & spu) {
//spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
ctxt->savedContextLr = u128::from32r(entryPoint);
ctxt->savedContextLr = v128::from32r(entryPoint);
ctxt->guidAddr = lowestLoadAddr;
ctxt->tasksetMgmtAddr = 0x2700;
ctxt->x2FC0 = 0;
@ -1516,8 +1516,8 @@ void spursTasksetDispatch(SPUThread & spu) {
}
// If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well
u128 ls_pattern = u128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
if (ls_pattern != u128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) {
v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
if (ls_pattern != v128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) {
// Load the ELF
u32 entryPoint;
if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf.addr(), true) != CELL_OK) {

View File

@ -53,7 +53,7 @@ namespace ppu_func_detail
template<typename T, u32 g_count, u32 f_count, u32 v_count>
struct bind_arg<T, ARG_VECTOR, g_count, f_count, v_count>
{
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid function argument type for ARG_VECTOR");
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid function argument type for ARG_VECTOR");
static force_inline T get_arg(PPUThread& CPU)
{
@ -124,7 +124,7 @@ namespace ppu_func_detail
template<typename T>
struct bind_result<T, ARG_VECTOR>
{
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid function result type for ARG_VECTOR");
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid function result type for ARG_VECTOR");
static force_inline void put_result(PPUThread& CPU, const T& result)
{
@ -176,7 +176,7 @@ namespace ppu_func_detail
// TODO: check calculations
const bool is_float = std::is_floating_point<T>::value;
const bool is_vector = std::is_same<std::remove_cv_t<T>, u128>::value;
const bool is_vector = std::is_same<std::remove_cv_t<T>, v128>::value;
const bool is_context = std::is_same<T, PPUThread&>::value;
const bool is_variadic = std::is_same<std::remove_cv_t<T>, ppu_va_args_t>::value;
const bool is_general = !is_float && !is_vector && !is_context && !is_variadic;
@ -201,7 +201,7 @@ namespace ppu_func_detail
static_assert(!std::is_pointer<RT>::value, "Invalid function result type (pointer)");
static_assert(!std::is_reference<RT>::value, "Invalid function result type (reference)");
static const bool is_float = std::is_floating_point<RT>::value;
static const bool is_vector = std::is_same<std::remove_cv_t<RT>, u128>::value;
static const bool is_vector = std::is_same<std::remove_cv_t<RT>, v128>::value;
static const arg_class value = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL);
};

View File

@ -318,10 +318,10 @@ s32 sys_spu_thread_group_start(u32 id)
t->PC = image->entry_point;
t->run();
t->GPR[3] = u128::from64(0, args.arg1);
t->GPR[4] = u128::from64(0, args.arg2);
t->GPR[5] = u128::from64(0, args.arg3);
t->GPR[6] = u128::from64(0, args.arg4);
t->GPR[3] = v128::from64(0, args.arg1);
t->GPR[4] = v128::from64(0, args.arg2);
t->GPR[5] = v128::from64(0, args.arg3);
t->GPR[6] = v128::from64(0, args.arg4);
t->status.exchange(SPU_STATUS_RUNNING);
}