LLVM DSL / SPU LLVM: implement infinite precision shifts

Remove old make_*** helpers in favor of matcheable expressions.
This commit is contained in:
Nekotekina 2021-09-06 21:07:06 +03:00
parent 67b3fc70f8
commit 543fb7a9cb
2 changed files with 118 additions and 60 deletions

View File

@ -3250,6 +3250,84 @@ public:
});
}
// Infinite-precision shift left
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_shl(T&& a, U&& b)
{
static constexpr u32 esz = llvm_value_t<CT>::esize;
return expr(select(b < esz, a << b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{
static const auto M = match<CT>();
if (auto [ok, b, a, b2] = match_expr(value, _m, select(M < esz, M << M, splat<CT>(0))); ok && b.eq(b2))
{
if (auto r1 = llvm_expr_t<T>{}.match(a.value, _m); a.eq())
{
if (auto r2 = llvm_expr_t<U>{}.match(b.value, _m); b.eq())
{
return std::tuple_cat(r1, r2);
}
}
}
value = nullptr;
return {};
});
}
// Infinite-precision logical shift right (unsigned)
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_lshr(T&& a, U&& b)
{
static constexpr u32 esz = llvm_value_t<CT>::esize;
return expr(select(b < esz, a >> b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{
static const auto M = match<CT>();
if (auto [ok, b, a, b2] = match_expr(value, _m, select(M < esz, M >> M, splat<CT>(0))); ok && b.eq(b2))
{
if (auto r1 = llvm_expr_t<T>{}.match(a.value, _m); a.eq())
{
if (auto r2 = llvm_expr_t<U>{}.match(b.value, _m); b.eq())
{
return std::tuple_cat(r1, r2);
}
}
}
value = nullptr;
return {};
});
}
// Infinite-precision arithmetic shift right (signed)
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_ashr(T&& a, U&& b)
{
static constexpr u32 esz = llvm_value_t<CT>::esize;
return expr(a >> select(b > (esz - 1), splat<CT>(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{
static const auto M = match<CT>();
if (auto [ok, a, b, b2] = match_expr(value, _m, M >> select(M > (esz - 1), splat<CT>(esz - 1), M)); ok && b.eq(b2))
{
if (auto r1 = llvm_expr_t<T>{}.match(a.value, _m); a.eq())
{
if (auto r2 = llvm_expr_t<U>{}.match(b.value, _m); b.eq())
{
return std::tuple_cat(r1, r2);
}
}
}
value = nullptr;
return {};
});
}
template <typename... Types>
llvm::Function* get_intrinsic(llvm::Intrinsic::ID id)
{

View File

@ -6420,128 +6420,108 @@ public:
set_vr(op.rt, absd(a, b));
}
template <typename T>
void make_spu_rol(spu_opcode_t op, value_t<T> by)
{
set_vr(op.rt, rol(get_vr<T>(op.ra), by));
}
template <typename R, typename T>
void make_spu_rotate_mask(spu_opcode_t op, value_t<T> by)
{
value_t<R> sh;
static_assert(sh.esize == by.esize);
sh.value = m_ir->CreateAnd(m_ir->CreateNeg(by.value), by.esize * 2 - 1);
if constexpr (!by.is_vector)
sh.value = m_ir->CreateVectorSplat(sh.is_vector, sh.value);
set_vr(op.rt, select(sh < by.esize, get_vr<R>(op.ra) >> sh, splat<R>(0)));
}
template <typename R, typename T>
void make_spu_rotate_sext(spu_opcode_t op, value_t<T> by)
{
value_t<R> sh;
static_assert(sh.esize == by.esize);
sh.value = m_ir->CreateAnd(m_ir->CreateNeg(by.value), by.esize * 2 - 1);
if constexpr (!by.is_vector)
sh.value = m_ir->CreateVectorSplat(sh.is_vector, sh.value);
value_t<R> max_sh = eval(splat<R>(by.esize - 1));
sh.value = m_ir->CreateSelect(m_ir->CreateICmpUGT(max_sh.value, sh.value), sh.value, max_sh.value);
set_vr(op.rt, get_vr<R>(op.ra) >> sh);
}
template <typename R, typename T>
void make_spu_shift_left(spu_opcode_t op, value_t<T> by)
{
value_t<R> sh;
static_assert(sh.esize == by.esize);
sh.value = m_ir->CreateAnd(by.value, by.esize * 2 - 1);
if constexpr (!by.is_vector)
sh.value = m_ir->CreateVectorSplat(sh.is_vector, sh.value);
set_vr(op.rt, select(sh < by.esize, get_vr<R>(op.ra) << sh, splat<R>(0)));
}
void ROT(spu_opcode_t op)
{
make_spu_rol(op, get_vr<u32[4]>(op.rb));
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
set_vr(op.rt, rol(a, b));
}
void ROTM(spu_opcode_t op)
{
make_spu_rotate_mask<u32[4]>(op, get_vr(op.rb));
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
set_vr(op.rt, inf_lshr(a, -b & 63));
}
void ROTMA(spu_opcode_t op)
{
make_spu_rotate_sext<s32[4]>(op, get_vr(op.rb));
const auto [a, b] = get_vrs<s32[4]>(op.ra, op.rb);
set_vr(op.rt, inf_ashr(a, -b & 63));
}
void SHL(spu_opcode_t op)
{
make_spu_shift_left<u32[4]>(op, get_vr(op.rb));
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
set_vr(op.rt, inf_shl(a, b & 63));
}
void ROTH(spu_opcode_t op)
{
make_spu_rol(op, get_vr<u16[8]>(op.rb));
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
set_vr(op.rt, rol(a, b));
}
void ROTHM(spu_opcode_t op)
{
make_spu_rotate_mask<u16[8]>(op, get_vr<u16[8]>(op.rb));
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
set_vr(op.rt, inf_lshr(a, -b & 31));
}
void ROTMAH(spu_opcode_t op)
{
make_spu_rotate_sext<s16[8]>(op, get_vr<s16[8]>(op.rb));
const auto [a, b] = get_vrs<s16[8]>(op.ra, op.rb);
set_vr(op.rt, inf_ashr(a, -b & 31));
}
void SHLH(spu_opcode_t op)
{
make_spu_shift_left<u16[8]>(op, get_vr<u16[8]>(op.rb));
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
set_vr(op.rt, inf_shl(a, b & 31));
}
void ROTI(spu_opcode_t op)
{
make_spu_rol(op, get_imm<u32[4]>(op.i7, false));
const auto a = get_vr<u32[4]>(op.ra);
const auto i = get_imm<u32[4]>(op.i7, false);
set_vr(op.rt, rol(a, i));
}
void ROTMI(spu_opcode_t op)
{
make_spu_rotate_mask<u32[4]>(op, get_imm<u32>(op.i7, false));
const auto a = get_vr<u32[4]>(op.ra);
const auto i = get_imm<u32[4]>(op.i7, false);
set_vr(op.rt, inf_lshr(a, -i & 63));
}
void ROTMAI(spu_opcode_t op)
{
make_spu_rotate_sext<s32[4]>(op, get_imm<u32>(op.i7, false));
const auto a = get_vr<s32[4]>(op.ra);
const auto i = get_imm<s32[4]>(op.i7, false);
set_vr(op.rt, inf_ashr(a, -i & 63));
}
void SHLI(spu_opcode_t op)
{
make_spu_shift_left<u32[4]>(op, get_imm<u32>(op.i7, false));
const auto a = get_vr<u32[4]>(op.ra);
const auto i = get_imm<u32[4]>(op.i7, false);
set_vr(op.rt, inf_shl(a, i & 63));
}
void ROTHI(spu_opcode_t op)
{
make_spu_rol(op, get_imm<u16[8]>(op.i7, false));
const auto a = get_vr<u16[8]>(op.ra);
const auto i = get_imm<u16[8]>(op.i7, false);
set_vr(op.rt, rol(a, i));
}
void ROTHMI(spu_opcode_t op)
{
make_spu_rotate_mask<u16[8]>(op, get_imm<u16>(op.i7, false));
const auto a = get_vr<u16[8]>(op.ra);
const auto i = get_imm<u16[8]>(op.i7, false);
set_vr(op.rt, inf_lshr(a, -i & 31));
}
void ROTMAHI(spu_opcode_t op)
{
make_spu_rotate_sext<s16[8]>(op, get_imm<u16>(op.i7, false));
const auto a = get_vr<s16[8]>(op.ra);
const auto i = get_imm<s16[8]>(op.i7, false);
set_vr(op.rt, inf_ashr(a, -i & 31));
}
void SHLHI(spu_opcode_t op)
{
make_spu_shift_left<u16[8]>(op, get_imm<u16>(op.i7, false));
const auto a = get_vr<u16[8]>(op.ra);
const auto i = get_imm<u16[8]>(op.i7, false);
set_vr(op.rt, inf_shl(a, i & 31));
}
void A(spu_opcode_t op)