PPU LLVM: New "stack" strategy

Minor pessimization: all memory accesses are volatile
Special handling of prologues and epilogues
Minor optimizing assumption for SP
This commit is contained in:
Nekotekina 2016-07-24 20:56:14 +03:00
parent 712c04b2ad
commit 5c9f83c3a8
3 changed files with 47 additions and 65 deletions

View File

@ -585,6 +585,32 @@ extern void ppu_initialize(const std::string& name, const std::vector<ppu_functi
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
}
}
continue;
}
if (const auto li = dyn_cast<LoadInst>(inst))
{
// TODO: more careful check
if (li->getNumUses() == 0)
{
// Remove unreferenced volatile loads
li->eraseFromParent();
}
continue;
}
if (const auto si = dyn_cast<StoreInst>(inst))
{
// TODO: more careful check
if (isa<UndefValue>(si->getOperand(0)) && si->getParent() == &func->getEntryBlock())
{
// Remove undef volatile stores
si->eraseFromParent();
}
continue;
}
}
}
@ -595,6 +621,7 @@ extern void ppu_initialize(const std::string& name, const std::vector<ppu_functi
// Remove unused functions, structs, global variables, etc
mpm.add(createStripDeadPrototypesPass());
//mpm.add(createFunctionInliningPass());
mpm.add(createDeadInstEliminationPass());
mpm.run(*module);
std::string result;

View File

@ -116,7 +116,6 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
m_start_addr = info.addr;
m_end_addr = info.addr + info.size;
m_blocks.clear();
m_value_usage.clear();
std::fill(std::begin(m_globals), std::end(m_globals), nullptr);
std::fill(std::begin(m_locals), std::end(m_locals), nullptr);
@ -129,9 +128,10 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
// Non-volatile registers with special meaning (TODO)
if (info.attr & ppu_attr::uses_r0) m_g_gpr[0] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 0, ".r0g");
m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".sp");
m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".spg");
m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 2, ".rtoc");
m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 13, ".tls");
m_gpr[1] = m_ir->CreateAlloca(GetType<u64>(), nullptr, ".sp");
// Registers used for args or results (TODO)
for (u32 i = 3; i <= 10; i++) m_g_gpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + i, fmt::format(".r%u", i));
@ -139,9 +139,9 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
for (u32 i = 2; i <= 13; i++) m_g_vr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 65 + i, fmt::format(".v%u", i));
/* Create local variables */
for (u32 i = 0; i < 32; i++) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType<u64>(), nullptr, fmt::format(".r%d", i));
for (u32 i = 0; i < 32; i++) m_fpr[i] = m_g_fpr[i] ? m_g_fpr[i] : m_ir->CreateAlloca(GetType<f64>(), nullptr, fmt::format(".f%d", i));
for (u32 i = 0; i < 32; i++) m_vr[i] = m_g_vr[i] ? m_g_vr[i] : m_ir->Insert(new AllocaInst(GetType<u32[4]>(), nullptr, 16, fmt::format(".v%d", i)));
for (u32 i = 0; i < 32; i++) if (!m_gpr[i]) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType<u64>(), nullptr, fmt::format(".r%d", i));
for (u32 i = 0; i < 32; i++) if (!m_fpr[i]) m_fpr[i] = m_g_fpr[i] ? m_g_fpr[i] : m_ir->CreateAlloca(GetType<f64>(), nullptr, fmt::format(".f%d", i));
for (u32 i = 0; i < 32; i++) if (!m_vr[i]) m_vr[i] = m_g_vr[i] ? m_g_vr[i] : m_ir->Insert(new AllocaInst(GetType<u32[4]>(), nullptr, 16, fmt::format(".v%d", i)));
for (u32 i = 0; i < 32; i++)
{
@ -201,6 +201,7 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
//m_fpscr_rnl = m_fpscr[31] = m_ir->CreateAlloca(GetType<bool>(), nullptr, "fpscr.rn.lsb");
/* Initialize local variables */
m_ir->CreateStore(m_ir->CreateLoad(m_g_gpr[1]), m_gpr[1]); // SP
m_ir->CreateStore(m_ir->getFalse(), m_xer_so); // XER.SO
m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT
m_ir->CreateStore(m_ir->getTrue(), m_vscr_nj);
@ -275,23 +276,6 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
CallFunction(0, true, _ctr);
}
//for (auto i = inst_begin(*m_function), end = inst_end(*m_function); i != end;)
//{
// const auto inst = &*i++;
// // Remove unnecessary stores of global variables created by PrepareGlobalArguments() and similar functions
// if (const auto si = dyn_cast<StoreInst>(inst))
// {
// const auto g = dyn_cast<GlobalVariable>(si->getOperand(1));
// if (g && m_value_usage[g] == 0)
// {
// si->eraseFromParent();
// continue;
// }
// }
//}
return m_function;
}
@ -572,12 +556,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
{
// Read, byteswap, bitcast
const auto int_type = m_ir->getIntNTy(size);
const auto value = m_ir->CreateAlignedLoad(GetMemory(addr, int_type), align, !IsStackAddr(addr));
const auto value = m_ir->CreateAlignedLoad(GetMemory(addr, int_type), align, true);
return m_ir->CreateBitCast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
}
// Read normally
return m_ir->CreateAlignedLoad(GetMemory(addr, type), align, !IsStackAddr(addr));
return m_ir->CreateAlignedLoad(GetMemory(addr, type), align, true);
}
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
@ -593,7 +577,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
}
// Write
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), align, !IsStackAddr(addr));
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), align, true);
}
void PPUTranslator::CompilationError(const std::string& error)
@ -3942,20 +3926,27 @@ void PPUTranslator::UNK(ppu_opcode_t op)
Value* PPUTranslator::GetGpr(u32 r, u32 num_bits)
{
m_value_usage[m_gpr[r]]++;
return m_ir->CreateTrunc(m_ir->CreateLoad(m_gpr[r]), m_ir->getIntNTy(num_bits));
}
void PPUTranslator::SetGpr(u32 r, Value* value)
{
m_ir->CreateStore(m_ir->CreateZExt(value, GetType<u64>()), m_gpr[r]);
m_value_usage[m_gpr[r]]++;
const auto i64_val = m_ir->CreateZExt(value, GetType<u64>());
if (true) // Update local: all regs
{
m_ir->CreateStore(i64_val, m_gpr[r]);
}
if (r == 1) // Update global: SP
{
m_ir->CreateStore(i64_val, m_g_gpr[r]);
}
}
Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int)
{
const auto value = m_ir->CreateAlignedLoad(m_fpr[r], 8);
m_value_usage[m_fpr[r]]++;
if (!as_int && bits == 64)
{
@ -3979,13 +3970,11 @@ void PPUTranslator::SetFpr(u32 r, Value* val)
val->getType() == GetType<f32>() ? m_ir->CreateFPExt(val, GetType<f64>()) : val;
m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8);
m_value_usage[m_fpr[r]]++;
}
Value* PPUTranslator::GetVr(u32 vr, VrType type)
{
const auto value = m_ir->CreateAlignedLoad(m_vr[vr], 16);
m_value_usage[m_vr[vr]]++;
switch (type)
{
@ -4019,7 +4008,6 @@ void PPUTranslator::SetVr(u32 vr, Value* value)
}
m_ir->CreateAlignedStore(m_ir->CreateBitCast(value, GetType<u32[4]>()), m_vr[vr], 16);
m_value_usage[m_vr[vr]]++;
}
Value* PPUTranslator::GetCrb(u32 crb)
@ -4230,31 +4218,4 @@ Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi)
return use_ctr ? use_ctr : use_cond;
}
bool PPUTranslator::IsStackAddr(Value* addr)
{
// Analyse various binary ops
if (const auto bin_op = dyn_cast<BinaryOperator>(addr))
{
if (bin_op->isBinaryOp(Instruction::Add) || bin_op->isBinaryOp(Instruction::And) || bin_op->isBinaryOp(Instruction::Or) || bin_op->isBinaryOp(Instruction::Xor))
{
return IsStackAddr(bin_op->getOperand(0)) || IsStackAddr(bin_op->getOperand(1));
}
if (bin_op->isBinaryOp(Instruction::Sub))
{
return IsStackAddr(bin_op->getOperand(0));
}
// TODO
}
// Detect load instruction
if (const auto load_op = dyn_cast<LoadInst>(addr))
{
return load_op->getOperand(0) == m_gpr[1];
}
return false;
}
#endif

View File

@ -149,9 +149,6 @@ class PPUTranslator final //: public CPUTranslator
/* Variables */
// Explicit register usage counter
std::unordered_map<llvm::Value*, u64> m_value_usage;
// Memory base
llvm::Value* m_base;
@ -386,9 +383,6 @@ public:
// Branch to next instruction if condition failed, never branch on nullptr
void UseCondition(llvm::Value* = nullptr);
// Check whether the address is stack
bool IsStackAddr(llvm::Value* addr);
// Get memory pointer
llvm::Value* GetMemory(llvm::Value* addr, llvm::Type* type);