SPU LLVM: Expand branch optimizations for more instructions

This commit is contained in:
Malcolm Jestadt 2021-05-27 02:55:49 -04:00 committed by Ivan
parent 99af70ae59
commit 7c2b08b9b6

View File

@ -8578,6 +8578,28 @@ public:
void BIZ(spu_opcode_t op) //
{
if (m_block) m_block->block_end = m_ir->GetInsertBlock();
// Check sign bit instead (optimization)
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
{
using VT = typename decltype(MP)::type;
if (auto [ok, x] = match_expr(c, sext<VT>(match<bool[std::extent_v<VT>]>())); ok)
{
const auto a = get_vr<s8[16]>(op.rt);
const auto cond = eval(bitcast<s16>(trunc<bool[16]>(a)) >= 0);
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
const auto target = add_block_indirect(op, addr);
m_ir->CreateCondBr(cond.value, target, add_block_next());
return true;
}
return false;
}))
{
return;
}
const auto cond = eval(extract(get_vr(op.rt), 3) == 0);
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
const auto target = add_block_indirect(op, addr);
@ -8587,6 +8609,28 @@ public:
void BINZ(spu_opcode_t op) //
{
if (m_block) m_block->block_end = m_ir->GetInsertBlock();
// Check sign bit instead (optimization)
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
{
using VT = typename decltype(MP)::type;
if (auto [ok, x] = match_expr(c, sext<VT>(match<bool[std::extent_v<VT>]>())); ok)
{
const auto a = get_vr<s8[16]>(op.rt);
const auto cond = eval(bitcast<s16>(trunc<bool[16]>(a)) < 0);
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
const auto target = add_block_indirect(op, addr);
m_ir->CreateCondBr(cond.value, target, add_block_next());
return true;
}
return false;
}))
{
return;
}
const auto cond = eval(extract(get_vr(op.rt), 3) != 0);
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
const auto target = add_block_indirect(op, addr);
@ -8596,6 +8640,28 @@ public:
void BIHZ(spu_opcode_t op) //
{
if (m_block) m_block->block_end = m_ir->GetInsertBlock();
// Check sign bits of 2 vector elements (optimization)
if (match_vr<s8[16], s16[8], s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
{
using VT = typename decltype(MP)::type;
if (auto [ok, x] = match_expr(c, sext<VT>(match<bool[std::extent_v<VT>]>())); ok)
{
const auto a = get_vr<s8[16]>(op.rt);
const auto cond = eval((bitcast<s16>(trunc<bool[16]>(a)) & 0x3000) == 0);
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
const auto target = add_block_indirect(op, addr);
m_ir->CreateCondBr(cond.value, target, add_block_next());
return true;
}
return false;
}))
{
return;
}
const auto cond = eval(extract(get_vr<u16[8]>(op.rt), 6) == 0);
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
const auto target = add_block_indirect(op, addr);
@ -8605,6 +8671,28 @@ public:
void BIHNZ(spu_opcode_t op) //
{
if (m_block) m_block->block_end = m_ir->GetInsertBlock();
// Check sign bits of 2 vector elements (optimization)
if (match_vr<s8[16], s16[8], s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
{
using VT = typename decltype(MP)::type;
if (auto [ok, x] = match_expr(c, sext<VT>(match<bool[std::extent_v<VT>]>())); ok)
{
const auto a = get_vr<s8[16]>(op.rt);
const auto cond = eval((bitcast<s16>(trunc<bool[16]>(a)) & 0x3000) != 0);
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
const auto target = add_block_indirect(op, addr);
m_ir->CreateCondBr(cond.value, target, add_block_next());
return true;
}
return false;
}))
{
return;
}
const auto cond = eval(extract(get_vr<u16[8]>(op.rt), 6) != 0);
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
const auto target = add_block_indirect(op, addr);
@ -8830,6 +8918,30 @@ public:
const u32 target = spu_branch_target(m_pos, op.i16);
// Check sign bits of 2 vector elements (optimization)
if (match_vr<s8[16], s16[8], s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
{
using VT = typename decltype(MP)::type;
if (auto [ok, x] = match_expr(c, sext<VT>(match<bool[std::extent_v<VT>]>())); ok)
{
if (target != m_pos + 4)
{
m_block->block_end = m_ir->GetInsertBlock();
const auto a = get_vr<s8[16]>(op.rt);
const auto cond = eval((bitcast<s16>(trunc<bool[16]>(a)) & 0x3000) == 0);
//const auto cond = eval((m & 0x3000) == 0);
m_ir->CreateCondBr(cond.value, add_block(target), add_block(m_pos + 4));
return true;
}
}
return false;
}))
{
return;
}
if (target != m_pos + 4)
{
m_block->block_end = m_ir->GetInsertBlock();
@ -8851,6 +8963,29 @@ public:
const u32 target = spu_branch_target(m_pos, op.i16);
// Check sign bits of 2 vector elements (optimization)
if (match_vr<s8[16], s16[8], s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
{
using VT = typename decltype(MP)::type;
if (auto [ok, x] = match_expr(c, sext<VT>(match<bool[std::extent_v<VT>]>())); ok)
{
if (target != m_pos + 4)
{
m_block->block_end = m_ir->GetInsertBlock();
const auto a = get_vr<s8[16]>(op.rt);
const auto cond = eval((bitcast<s16>(trunc<bool[16]>(a)) & 0x3000) != 0);
m_ir->CreateCondBr(cond.value, add_block(target), add_block(m_pos + 4));
return true;
}
}
return false;
}))
{
return;
}
if (target != m_pos + 4)
{
m_block->block_end = m_ir->GetInsertBlock();