llvm: update code to new API (#13500)

* llvm: update code to new API

* llvm: remove OLDLLVM define
This commit is contained in:
oltolm 2023-03-10 23:57:21 +01:00 committed by GitHub
parent ec8cb1668c
commit 520524285a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 187 additions and 162 deletions

View File

@ -1304,7 +1304,7 @@ std::string jit_compiler::cpu(const std::string& _cpu)
if (m_cpu.empty()) if (m_cpu.empty())
{ {
m_cpu = llvm::sys::getHostCPUName().operator std::string(); m_cpu = llvm::sys::getHostCPUName().str();
if (m_cpu == "sandybridge" || if (m_cpu == "sandybridge" ||
m_cpu == "ivybridge" || m_cpu == "ivybridge" ||

View File

@ -299,7 +299,7 @@ llvm::Constant* cpu_translator::make_const_vector<v128>(v128 v, llvm::Type* t, u
{ {
if (const auto ct = llvm::dyn_cast<llvm::IntegerType>(t); ct && ct->getBitWidth() == 128) if (const auto ct = llvm::dyn_cast<llvm::IntegerType>(t); ct && ct->getBitWidth() == 128)
{ {
return llvm::ConstantInt::get(t, llvm::APInt(128, llvm::makeArrayRef(reinterpret_cast<const u64*>(v._bytes), 2))); return llvm::ConstantInt::get(t, llvm::APInt(128, llvm::ArrayRef(reinterpret_cast<const u64*>(v._bytes), 2)));
} }
ensure(t->isVectorTy()); ensure(t->isVectorTy());
@ -309,27 +309,27 @@ llvm::Constant* cpu_translator::make_const_vector<v128>(v128 v, llvm::Type* t, u
if (sct->isIntegerTy(8)) if (sct->isIntegerTy(8))
{ {
return llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u8*>(v._bytes), 16)); return llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(v._bytes), 16));
} }
if (sct->isIntegerTy(16)) if (sct->isIntegerTy(16))
{ {
return llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u16*>(v._bytes), 8)); return llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u16*>(v._bytes), 8));
} }
if (sct->isIntegerTy(32)) if (sct->isIntegerTy(32))
{ {
return llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u32*>(v._bytes), 4)); return llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u32*>(v._bytes), 4));
} }
if (sct->isIntegerTy(64)) if (sct->isIntegerTy(64))
{ {
return llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u64*>(v._bytes), 2)); return llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u64*>(v._bytes), 2));
} }
if (sct->isFloatTy()) if (sct->isFloatTy())
{ {
return llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const f32*>(v._bytes), 4)); return llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const f32*>(v._bytes), 4));
} }
if (sct->isDoubleTy()) if (sct->isDoubleTy())
{ {
return llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const f64*>(v._bytes), 2)); return llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const f64*>(v._bytes), 2));
} }
fmt::throw_exception("[line %u] No supported constant type", _line); fmt::throw_exception("[line %u] No supported constant type", _line);

View File

@ -3429,7 +3429,7 @@ public:
if (cv || llvm::isa<llvm::ConstantAggregateZero>(c)) if (cv || llvm::isa<llvm::ConstantAggregateZero>(c))
{ {
result.value = llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u8*>(&mask), 16)); result.value = llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(&mask), 16));
result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>()); result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>());
result.value = m_ir->CreateShuffleVector(data0, zeros, result.value); result.value = m_ir->CreateShuffleVector(data0, zeros, result.value);
return result; return result;
@ -3472,7 +3472,7 @@ public:
if (cv || llvm::isa<llvm::ConstantAggregateZero>(c)) if (cv || llvm::isa<llvm::ConstantAggregateZero>(c))
{ {
result.value = llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u8*>(&mask), 16)); result.value = llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(&mask), 16));
result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>()); result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>());
result.value = m_ir->CreateShuffleVector(data0, data1, result.value); result.value = m_ir->CreateShuffleVector(data0, data1, result.value);
return result; return result;
@ -3512,7 +3512,7 @@ public:
if (cv || llvm::isa<llvm::ConstantAggregateZero>(c)) if (cv || llvm::isa<llvm::ConstantAggregateZero>(c))
{ {
result.value = llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u8*>(&mask), 16)); result.value = llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(&mask), 16));
result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>()); result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>());
result.value = m_ir->CreateShuffleVector(data0, data1, result.value); result.value = m_ir->CreateShuffleVector(data0, data1, result.value);
return result; return result;
@ -3530,7 +3530,7 @@ public:
u8 mask16[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; u8 mask16[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
// insert the second source operand into the same vector as the first source operand and expand to 256 bit width // insert the second source operand into the same vector as the first source operand and expand to 256 bit width
shuffle.value = llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u8*>(&mask32), 32)); shuffle.value = llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(&mask32), 32));
shuffle.value = m_ir->CreateZExt(shuffle.value, get_type<u32[32]>()); shuffle.value = m_ir->CreateZExt(shuffle.value, get_type<u32[32]>());
intermediate.value = m_ir->CreateShuffleVector(data0, data1, shuffle.value); intermediate.value = m_ir->CreateShuffleVector(data0, data1, shuffle.value);
@ -3541,22 +3541,23 @@ public:
intermediate.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_permvar_qi_256), {intermediate.value, shuffleindex.value}); intermediate.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_permvar_qi_256), {intermediate.value, shuffleindex.value});
// convert the 256 bit vector back to 128 bits // convert the 256 bit vector back to 128 bits
result.value = llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef(reinterpret_cast<const u8*>(&mask16), 16)); result.value = llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(&mask16), 16));
result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>()); result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>());
result.value = m_ir->CreateShuffleVector(intermediate.value, zeroes32, result.value); result.value = m_ir->CreateShuffleVector(intermediate.value, zeroes32, result.value);
return result; return result;
} }
template <typename T>
llvm::Value* load_const(llvm::GlobalVariable* g, llvm::Value* i) llvm::Value* load_const(llvm::GlobalVariable* g, llvm::Value* i)
{ {
return m_ir->CreateLoad(m_ir->CreateGEP(g, {m_ir->getInt64(0), m_ir->CreateZExtOrTrunc(i, get_type<u64>())})); return m_ir->CreateLoad(get_type<T>(), m_ir->CreateGEP(g->getValueType(), g, {m_ir->getInt64(0), m_ir->CreateZExtOrTrunc(i, get_type<u64>())}));
} }
template <typename T, typename I> template <typename T, typename I>
value_t<T> load_const(llvm::GlobalVariable* g, I i) value_t<T> load_const(llvm::GlobalVariable* g, I i)
{ {
value_t<T> result; value_t<T> result;
result.value = load_const(g, i.eval(m_ir)); result.value = load_const<T>(g, i.eval(m_ir));
return result; return result;
} }
@ -3635,7 +3636,7 @@ public:
if (cv || llvm::isa<llvm::ConstantAggregateZero>(c)) if (cv || llvm::isa<llvm::ConstantAggregateZero>(c))
{ {
llvm::Value* r = nullptr; llvm::Value* r = nullptr;
r = llvm::ConstantDataVector::get(ir->getContext(), llvm::makeArrayRef(reinterpret_cast<const u8*>(&mask), 16)); r = llvm::ConstantDataVector::get(ir->getContext(), llvm::ArrayRef(reinterpret_cast<const u8*>(&mask), 16));
r = ir->CreateZExt(r, llvm_value_t<u32[16]>::get_type(ir->getContext())); r = ir->CreateZExt(r, llvm_value_t<u32[16]>::get_type(ir->getContext()));
r = ir->CreateShuffleVector(args[0], zeros, r); r = ir->CreateShuffleVector(args[0], zeros, r);
return r; return r;

View File

@ -42,7 +42,9 @@
#include "llvm/Support/FormattedStream.h" #include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Host.h" #include "llvm/Support/Host.h"
#include "llvm/Object/ObjectFile.h" #include "llvm/Object/ObjectFile.h"
#if LLVM_VERSION_MAJOR < 17
#include "llvm/ADT/Triple.h" #include "llvm/ADT/Triple.h"
#endif
#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"
#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstIterator.h"
#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LegacyPassManager.h"
@ -3970,7 +3972,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
{ {
const auto f = cast<Function>(_module->getOrInsertFunction(func.name, _func).getCallee()); const auto f = cast<Function>(_module->getOrInsertFunction(func.name, _func).getCallee());
f->setCallingConv(CallingConv::GHC); f->setCallingConv(CallingConv::GHC);
f->addAttribute(2, Attribute::NoAlias); f->addParamAttr(1, llvm::Attribute::NoAlias);
f->addFnAttr(Attribute::NoUnwind); f->addFnAttr(Attribute::NoUnwind);
} }
} }

View File

@ -1,3 +1,4 @@
#include <bit>
#ifdef LLVM_AVAILABLE #ifdef LLVM_AVAILABLE
#include "Emu/system_config.h" #include "Emu/system_config.h"
@ -176,14 +177,14 @@ Function* PPUTranslator::Translate(const ppu_function& info)
} }
} }
m_thread = &*(m_function->arg_begin() + 1); m_thread = m_function->getArg(1);
m_base = &*(m_function->arg_begin() + 3); m_base = m_function->getArg(3);
m_exec = &*(m_function->arg_begin() + 0); m_exec = m_function->getArg(0);
m_seg0 = &*(m_function->arg_begin() + 2); m_seg0 = m_function->getArg(2);
m_gpr[0] = &*(m_function->arg_begin() + 4); m_gpr[0] = m_function->getArg(4);
m_gpr[1] = &*(m_function->arg_begin() + 5); m_gpr[1] = m_function->getArg(5);
m_gpr[2] = &*(m_function->arg_begin() + 6); m_gpr[2] = m_function->getArg(6);
const auto body = BasicBlock::Create(m_context, "__body", m_function); const auto body = BasicBlock::Create(m_context, "__body", m_function);
@ -191,7 +192,9 @@ Function* PPUTranslator::Translate(const ppu_function& info)
if (need_check) if (need_check)
{ {
// Check status register in the entry block // Check status register in the entry block
const auto vstate = m_ir->CreateLoad(m_ir->CreateStructGEP(m_thread, 1), true); auto ptr = llvm::dyn_cast<GetElementPtrInst>(m_ir->CreateStructGEP(m_thread_type, m_thread, 1));
assert(ptr->getResultElementType() == GetType<u32>());
const auto vstate = m_ir->CreateLoad(ptr->getResultElementType(), ptr, true);
const auto vcheck = BasicBlock::Create(m_context, "__test", m_function); const auto vcheck = BasicBlock::Create(m_context, "__test", m_function);
m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), body, vcheck, m_md_likely); m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), body, vcheck, m_md_likely);
@ -369,7 +372,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
if (indirect) if (indirect)
{ {
m_ir->CreateStore(Trunc(indirect, GetType<u32>()), m_ir->CreateStructGEP(m_thread, static_cast<uint>(&m_cia - m_locals)), true); m_ir->CreateStore(Trunc(indirect, GetType<u32>()), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)), true);
// Try to optimize // Try to optimize
if (auto inst = dyn_cast_or_null<Instruction>(indirect)) if (auto inst = dyn_cast_or_null<Instruction>(indirect))
@ -381,8 +384,8 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
} }
const auto pos = m_ir->CreateShl(indirect, 1); const auto pos = m_ir->CreateShl(indirect, 1);
const auto ptr = m_ir->CreateGEP(m_exec, pos); const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), m_exec, pos));
const auto val = m_ir->CreateLoad(m_ir->CreateBitCast(ptr, get_type<u64*>())); const auto val = m_ir->CreateLoad(get_type<u64>(), m_ir->CreateBitCast(ptr, get_type<u64*>()));
callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0xffff'ffff'ffff), type->getPointerTo())); callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0xffff'ffff'ffff), type->getPointerTo()));
// Load new segment address // Load new segment address
@ -406,7 +409,7 @@ Value* PPUTranslator::RegInit(Value*& local)
} }
// (Re)Initialize global, will be written in FlushRegisters // (Re)Initialize global, will be written in FlushRegisters
m_globals[index] = m_ir->CreateStructGEP(m_thread, index); m_globals[index] = m_ir->CreateStructGEP(m_thread_type, m_thread, index);
return m_globals[index]; return m_globals[index];
} }
@ -422,7 +425,8 @@ Value* PPUTranslator::RegLoad(Value*& local)
} }
// Load from the global value // Load from the global value
local = m_ir->CreateLoad(m_ir->CreateStructGEP(m_thread, index)); auto ptr = llvm::dyn_cast<llvm::GetElementPtrInst>(m_ir->CreateStructGEP(m_thread_type, m_thread, index));
local = m_ir->CreateLoad(ptr->getResultElementType(), ptr);
return local; return local;
} }
@ -510,7 +514,7 @@ Value* PPUTranslator::Broadcast(Value* value, u32 count)
{ {
if (const auto cv = dyn_cast<Constant>(value)) if (const auto cv = dyn_cast<Constant>(value))
{ {
return ConstantVector::getSplat({count, false}, cv); return ConstantVector::getSplat(llvm::ElementCount::get(count, false), cv);
} }
return m_ir->CreateVectorSplat(count, value); return m_ir->CreateVectorSplat(count, value);
@ -589,7 +593,7 @@ void PPUTranslator::UseCondition(MDNode* hint, Value* cond)
llvm::Value* PPUTranslator::GetMemory(llvm::Value* addr, llvm::Type* type) llvm::Value* PPUTranslator::GetMemory(llvm::Value* addr, llvm::Type* type)
{ {
return bitcast(m_ir->CreateGEP(m_base, addr), type->getPointerTo()); return bitcast(m_ir->CreateGEP(get_type<u8>(), m_base, addr), type->getPointerTo());
} }
Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align) Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
@ -600,12 +604,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
{ {
// Read, byteswap, bitcast // Read, byteswap, bitcast
const auto int_type = m_ir->getIntNTy(size); const auto int_type = m_ir->getIntNTy(size);
const auto value = m_ir->CreateAlignedLoad(GetMemory(addr, int_type), llvm::MaybeAlign{align}, true); const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align}, true);
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type); return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
} }
// Read normally // Read normally
return m_ir->CreateAlignedLoad(GetMemory(addr, type), llvm::MaybeAlign{align}, true); return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align}, true);
} }
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align) void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
@ -1914,7 +1918,7 @@ void PPUTranslator::BC(ppu_opcode_t op)
if (op.lk) if (op.lk)
{ {
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread, static_cast<uint>(&m_lr - m_locals))); m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
} }
UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi)); UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi));
@ -1984,7 +1988,7 @@ void PPUTranslator::BCLR(ppu_opcode_t op)
if (op.lk) if (op.lk)
{ {
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread, static_cast<uint>(&m_lr - m_locals))); m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
} }
UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi)); UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi));
@ -2047,7 +2051,7 @@ void PPUTranslator::BCCTR(ppu_opcode_t op)
if (op.lk) if (op.lk)
{ {
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread, static_cast<uint>(&m_lr - m_locals))); m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
} }
UseCondition(CheckBranchProbability(op.bo | 0x4), CheckBranchCondition(op.bo | 0x4, op.bi)); UseCondition(CheckBranchProbability(op.bo | 0x4), CheckBranchCondition(op.bo | 0x4, op.bi));
@ -2448,7 +2452,11 @@ void PPUTranslator::MFOCRF(ppu_opcode_t op)
{ {
// MFOCRF // MFOCRF
const u64 pos = countLeadingZeros<u32>(op.crm, ZB_Width) - 24; #if LLVM_VERSION_MAJOR < 17
const u64 pos = countLeadingZeros<u32>(op.crm) - 24;
#else
const u64 pos = countl_zero<u32>(op.crm) - 24;
#endif
if (pos >= 8 || 0x80u >> pos != op.crm) if (pos >= 8 || 0x80u >> pos != op.crm)
{ {
@ -2459,11 +2467,11 @@ void PPUTranslator::MFOCRF(ppu_opcode_t op)
else if (std::none_of(m_cr + 0, m_cr + 32, [](auto* p) { return p; })) else if (std::none_of(m_cr + 0, m_cr + 32, [](auto* p) { return p; }))
{ {
// MFCR (optimized) // MFCR (optimized)
Value* ln0 = m_ir->CreateIntToPtr(m_ir->CreatePtrToInt(m_ir->CreateStructGEP(m_thread, 99), GetType<uptr>()), GetType<u8[16]>()->getPointerTo()); Value* ln0 = m_ir->CreateIntToPtr(m_ir->CreatePtrToInt(m_ir->CreateStructGEP(m_thread_type, m_thread, 99), GetType<uptr>()), GetType<u8[16]>()->getPointerTo());
Value* ln1 = m_ir->CreateIntToPtr(m_ir->CreatePtrToInt(m_ir->CreateStructGEP(m_thread, 115), GetType<uptr>()), GetType<u8[16]>()->getPointerTo()); Value* ln1 = m_ir->CreateIntToPtr(m_ir->CreatePtrToInt(m_ir->CreateStructGEP(m_thread_type, m_thread, 115), GetType<uptr>()), GetType<u8[16]>()->getPointerTo());
ln0 = m_ir->CreateLoad(ln0); ln0 = m_ir->CreateLoad(GetType<u8[16]>(), ln0);
ln1 = m_ir->CreateLoad(ln1); ln1 = m_ir->CreateLoad(GetType<u8[16]>(), ln1);
if (!m_is_be) if (!m_is_be)
{ {
ln0 = Shuffle(ln0, nullptr, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); ln0 = Shuffle(ln0, nullptr, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
@ -2724,7 +2732,11 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op)
if (op.l11) if (op.l11)
{ {
// MTOCRF // MTOCRF
const u64 pos = countLeadingZeros<u32>(op.crm, ZB_Width) - 24; #if LLVM_VERSION_MAJOR < 17
const u64 pos = countLeadingZeros<u32>(op.crm) - 24;
#else
const u64 pos = countl_zero<u32>(op.crm) - 24;
#endif
if (pos >= 8 || 0x80u >> pos != op.crm) if (pos >= 8 || 0x80u >> pos != op.crm)
{ {
@ -2772,9 +2784,13 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op)
std::fill_n(m_g_cr + i * 4, 4, nullptr); std::fill_n(m_g_cr + i * 4, 4, nullptr);
const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15); const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15);
const auto src = m_ir->CreateGEP(m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)}); const auto src = m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)});
const auto dst = bitcast(m_ir->CreateStructGEP(m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>()); const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>());
#if LLVM_VERSION_MAJOR < 15
Call(GetType<void>(), "llvm.memcpy.p0i8.p0i8.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse()); Call(GetType<void>(), "llvm.memcpy.p0i8.p0i8.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
#else
Call(GetType<void>(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
#endif
} }
} }
} }
@ -3993,7 +4009,9 @@ void PPUTranslator::FRES(ppu_opcode_t op)
const auto n = m_ir->CreateFCmpUNO(a, a); // test for NaN const auto n = m_ir->CreateFCmpUNO(a, a); // test for NaN
const auto e = m_ir->CreateAnd(m_ir->CreateLShr(b, 52), 0x7ff); // double exp const auto e = m_ir->CreateAnd(m_ir->CreateLShr(b, 52), 0x7ff); // double exp
const auto i = m_ir->CreateAnd(m_ir->CreateLShr(b, 45), 0x7f); // mantissa LUT index const auto i = m_ir->CreateAnd(m_ir->CreateLShr(b, 45), 0x7f); // mantissa LUT index
const auto m = m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_ir->CreateGEP(m_fres_table, {m_ir->getInt64(0), i}))), 29); const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_fres_table)->getValueType(), m_fres_table, {m_ir->getInt64(0), i}));
assert(ptr->getResultElementType() == get_type<u32>());
const auto m = m_ir->CreateShl(ZExt(m_ir->CreateLoad(ptr->getResultElementType(), ptr)), 29);
const auto c = m_ir->CreateICmpUGE(e, m_ir->getInt64(0x3ff + 0x80)); // test for INF const auto c = m_ir->CreateICmpUGE(e, m_ir->getInt64(0x3ff + 0x80)); // test for INF
const auto x = m_ir->CreateShl(m_ir->CreateSub(m_ir->getInt64(0x7ff - 2), e), 52); const auto x = m_ir->CreateShl(m_ir->CreateSub(m_ir->getInt64(0x7ff - 2), e), 52);
const auto s = m_ir->CreateSelect(c, m_ir->getInt64(0), m_ir->CreateOr(x, m)); const auto s = m_ir->CreateSelect(c, m_ir->getInt64(0), m_ir->CreateOr(x, m));
@ -4364,7 +4382,9 @@ void PPUTranslator::FRSQRTE(ppu_opcode_t op)
} }
const auto b = m_ir->CreateBitCast(GetFpr(op.frb), GetType<u64>()); const auto b = m_ir->CreateBitCast(GetFpr(op.frb), GetType<u64>());
const auto v = m_ir->CreateLoad(m_ir->CreateGEP(m_frsqrte_table, {m_ir->getInt64(0), m_ir->CreateLShr(b, 49)})); const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_frsqrte_table)->getValueType(), m_frsqrte_table, {m_ir->getInt64(0), m_ir->CreateLShr(b, 49)}));
assert(ptr->getResultElementType() == get_type<u32>());
const auto v = m_ir->CreateLoad(ptr->getResultElementType(), ptr);
const auto result = m_ir->CreateBitCast(m_ir->CreateShl(ZExt(v), 32), GetType<f64>()); const auto result = m_ir->CreateBitCast(m_ir->CreateShl(ZExt(v), 32), GetType<f64>());
SetFpr(op.frd, result); SetFpr(op.frd, result);

View File

@ -3911,7 +3911,9 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out)
#pragma GCC diagnostic ignored "-Weffc++" #pragma GCC diagnostic ignored "-Weffc++"
#pragma GCC diagnostic ignored "-Wmissing-noreturn" #pragma GCC diagnostic ignored "-Wmissing-noreturn"
#endif #endif
#if LLVM_VERSION_MAJOR < 17
#include "llvm/ADT/Triple.h" #include "llvm/ADT/Triple.h"
#endif
#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"
#include "llvm/IR/InlineAsm.h" #include "llvm/IR/InlineAsm.h"
@ -4069,8 +4071,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Set parameters // Set parameters
result->setLinkage(llvm::GlobalValue::InternalLinkage); result->setLinkage(llvm::GlobalValue::InternalLinkage);
result->addAttribute(1, llvm::Attribute::NoAlias); result->addParamAttr(0, llvm::Attribute::NoAlias);
result->addAttribute(2, llvm::Attribute::NoAlias); result->addParamAttr(1, llvm::Attribute::NoAlias);
#if 1 #if 1
result->setCallingConv(llvm::CallingConv::GHC); result->setCallingConv(llvm::CallingConv::GHC);
#endif #endif
@ -4093,8 +4095,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
llvm::Function* fn = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(fname, func_type).getCallee()); llvm::Function* fn = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(fname, func_type).getCallee());
fn->setLinkage(llvm::GlobalValue::InternalLinkage); fn->setLinkage(llvm::GlobalValue::InternalLinkage);
fn->addAttribute(1, llvm::Attribute::NoAlias); fn->addParamAttr(0, llvm::Attribute::NoAlias);
fn->addAttribute(2, llvm::Attribute::NoAlias); fn->addParamAttr(1, llvm::Attribute::NoAlias);
#if 1 #if 1
fn->setCallingConv(llvm::CallingConv::GHC); fn->setCallingConv(llvm::CallingConv::GHC);
#endif #endif
@ -4164,9 +4166,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (!m_finfo->fn && !m_block) if (!m_finfo->fn && !m_block)
{ {
lr = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::gpr, +s_reg_lr, &v128::_u32, 3)); lr = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::gpr, +s_reg_lr, &v128::_u32, 3));
sp = m_ir->CreateLoad(spu_ptr<u32[4]>(&spu_thread::gpr, +s_reg_sp)); sp = m_ir->CreateLoad(get_type<u32[4]>(), spu_ptr<u32[4]>(&spu_thread::gpr, +s_reg_sp));
r3 = m_ir->CreateLoad(spu_ptr<u32[4]>(&spu_thread::gpr, 3)); r3 = m_ir->CreateLoad(get_type<u32[4]>(), spu_ptr<u32[4]>(&spu_thread::gpr, 3));
} }
else else
{ {
@ -4199,7 +4201,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
{ {
if (i != s_reg_lr && i != s_reg_sp && (i < s_reg_80 || i > s_reg_127)) if (i != s_reg_lr && i != s_reg_sp && (i < s_reg_80 || i > s_reg_127))
{ {
m_block->reg[i] = m_ir->CreateLoad(init_reg_fixed(i)); m_block->reg[i] = m_ir->CreateLoad(get_reg_type(i), init_reg_fixed(i));
} }
} }
@ -4217,9 +4219,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
void set_function(llvm::Function* func) void set_function(llvm::Function* func)
{ {
m_function = func; m_function = func;
m_thread = &*func->arg_begin(); m_thread = func->getArg(0);
m_lsptr = &*(func->arg_begin() + 1); m_lsptr = func->getArg(1);
m_base_pc = &*(func->arg_begin() + 2); m_base_pc = func->getArg(2);
m_reg_addr.fill(nullptr); m_reg_addr.fill(nullptr);
m_block = nullptr; m_block = nullptr;
@ -4227,7 +4229,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_blocks.clear(); m_blocks.clear();
m_block_queue.clear(); m_block_queue.clear();
m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", m_function)); m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", m_function));
m_memptr = m_ir->CreateLoad(spu_ptr<u8*>(&spu_thread::memory_base_addr)); m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::memory_base_addr));
} }
// Add block with current block as a predecessor // Add block with current block as a predecessor
@ -4246,11 +4248,11 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_finfo->fn = fn; m_finfo->fn = fn;
m_function = fn; m_function = fn;
m_thread = &*fn->arg_begin(); m_thread = fn->getArg(0);
m_lsptr = &*(fn->arg_begin() + 1); m_lsptr = fn->getArg(1);
m_base_pc = &*(fn->arg_begin() + 2); m_base_pc = fn->getArg(2);
m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", fn)); m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", fn));
m_memptr = m_ir->CreateLoad(spu_ptr<u8*>(&spu_thread::memory_base_addr)); m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::memory_base_addr));
// Load registers at the entry chunk // Load registers at the entry chunk
for (u32 i = 0; i < s_reg_max; i++) for (u32 i = 0; i < s_reg_max; i++)
@ -4261,14 +4263,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
//m_finfo->load[i] = llvm::UndefValue::get(get_reg_type(i)); //m_finfo->load[i] = llvm::UndefValue::get(get_reg_type(i));
} }
m_finfo->load[i] = m_ir->CreateLoad(init_reg_fixed(i)); m_finfo->load[i] = m_ir->CreateLoad(get_reg_type(i), init_reg_fixed(i));
} }
// Load $SP // Load $SP
m_finfo->load[s_reg_sp] = &*(fn->arg_begin() + 3); m_finfo->load[s_reg_sp] = fn->getArg(3);
// Load first args // Load first args
m_finfo->load[3] = &*(fn->arg_begin() + 4); m_finfo->load[3] = fn->getArg(4);
} }
} }
else if (m_block_info[target / 4] && m_entry_info[target / 4] && !(pred_found && m_entry == target) && (!m_finfo->fn || !m_ret_info[target / 4])) else if (m_block_info[target / 4] && m_entry_info[target / 4] && !(pred_found && m_entry == target) && (!m_finfo->fn || !m_ret_info[target / 4]))
@ -4367,7 +4369,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
template <typename T = u8> template <typename T = u8>
llvm::Value* _ptr(llvm::Value* base, u32 offset) llvm::Value* _ptr(llvm::Value* base, u32 offset)
{ {
const auto off = m_ir->CreateGEP(base, m_ir->getInt64(offset)); const auto off = m_ir->CreateGEP(get_type<u8>(), base, m_ir->getInt64(offset));
const auto ptr = m_ir->CreateBitCast(off, get_type<T*>()); const auto ptr = m_ir->CreateBitCast(off, get_type<T*>());
return ptr; return ptr;
} }
@ -4381,7 +4383,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
template <typename T, typename... Args> template <typename T, typename... Args>
llvm::Value* spu_ptr(value_t<u64> add, Args... offset_args) llvm::Value* spu_ptr(value_t<u64> add, Args... offset_args)
{ {
const auto off = m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(offset_args...))); const auto off = m_ir->CreateGEP(get_type<u8>(), m_thread, m_ir->getInt64(::offset32(offset_args...)));
const auto ptr = m_ir->CreateBitCast(m_ir->CreateAdd(off, add.value), get_type<T*>()); const auto ptr = m_ir->CreateBitCast(m_ir->CreateAdd(off, add.value), get_type<T*>());
return ptr; return ptr;
} }
@ -4466,7 +4468,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto idx = m_ir->CreateAnd(I > 4 ? isr : isl, m_interp_7f0); const auto idx = m_ir->CreateAnd(I > 4 ? isr : isl, m_interp_7f0);
// Pointer to the register // Pointer to the register
return m_ir->CreateBitCast(m_ir->CreateGEP(m_interp_regs, m_ir->CreateZExt(idx, get_type<u64>())), get_type<T*>()); return m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_interp_regs, m_ir->CreateZExt(idx, get_type<u64>())), get_type<T*>());
} }
llvm::Value* double_as_uint64(llvm::Value* val) llvm::Value* double_as_uint64(llvm::Value* val)
@ -4548,7 +4550,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (!reg) if (!reg)
{ {
// Load register value if necessary // Load register value if necessary
reg = m_finfo && m_finfo->load[index] ? m_finfo->load[index] : m_ir->CreateLoad(init_reg_fixed(index)); reg = m_finfo && m_finfo->load[index] ? m_finfo->load[index] : m_ir->CreateLoad(get_reg_type(index), init_reg_fixed(index));
} }
if (reg->getType() == get_type<f64[4]>()) if (reg->getType() == get_type<f64[4]>())
@ -4593,11 +4595,11 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Load reg // Load reg
if (get_type<T>() == get_type<f64[4]>()) if (get_type<T>() == get_type<f64[4]>())
{ {
r.value = xfloat_to_double(m_ir->CreateLoad(init_vr<u32[4]>(index))); r.value = xfloat_to_double(m_ir->CreateLoad(get_type<u32[4]>(), init_vr<u32[4]>(index)));
} }
else else
{ {
r.value = m_ir->CreateLoad(init_vr<T>(index)); r.value = m_ir->CreateLoad(get_type<T>(), init_vr<T>(index));
} }
} }
else else
@ -4763,7 +4765,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
} }
// Write register to the context // Write register to the context
_store = m_ir->CreateStore(is_xfloat ? double_to_xfloat(saved_value) : m_ir->CreateBitCast(value, addr->getType()->getPointerElementType()), addr); _store = m_ir->CreateStore(is_xfloat ? double_to_xfloat(saved_value) : m_ir->CreateBitCast(value, get_reg_type(index)), addr);
} }
template <typename T, uint I> template <typename T, uint I>
@ -4879,7 +4881,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto pstate = spu_ptr<u32>(&spu_thread::state); const auto pstate = spu_ptr<u32>(&spu_thread::state);
const auto _body = llvm::BasicBlock::Create(m_context, "", m_function); const auto _body = llvm::BasicBlock::Create(m_context, "", m_function);
const auto check = llvm::BasicBlock::Create(m_context, "", m_function); const auto check = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(pstate, true), m_ir->getInt32(0)), _body, check, m_md_likely); m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(get_type<u32>(), pstate, true), m_ir->getInt32(0)), _body, check, m_md_likely);
m_ir->SetInsertPoint(check); m_ir->SetInsertPoint(check);
update_pc(addr); update_pc(addr);
@ -5017,7 +5019,7 @@ public:
// Add entry function (contains only state/code check) // Add entry function (contains only state/code check)
const auto main_func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(m_hash, get_ftype<void, u8*, u8*, u64>()).getCallee()); const auto main_func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(m_hash, get_ftype<void, u8*, u8*, u64>()).getCallee());
const auto main_arg2 = &*(main_func->arg_begin() + 2); const auto main_arg2 = main_func->getArg(2);
main_func->setCallingConv(CallingConv::GHC); main_func->setCallingConv(CallingConv::GHC);
set_function(main_func); set_function(main_func);
@ -5028,11 +5030,11 @@ public:
const auto label_stop = BasicBlock::Create(m_context, "", m_function); const auto label_stop = BasicBlock::Create(m_context, "", m_function);
// Load PC, which will be the actual value of 'm_base' // Load PC, which will be the actual value of 'm_base'
m_base_pc = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::pc)); m_base_pc = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::pc));
// Emit state check // Emit state check
const auto pstate = spu_ptr<u32>(&spu_thread::state); const auto pstate = spu_ptr<u32>(&spu_thread::state);
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(pstate, true), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u32>(), pstate, true), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely);
// Emit code check // Emit code check
u32 check_iterations = 0; u32 check_iterations = 0;
@ -5049,14 +5051,14 @@ public:
} }
else if (func.data.size() == 1) else if (func.data.size() == 1)
{ {
const auto pu32 = m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_base_pc), get_type<u32*>()); const auto pu32 = m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_base_pc), get_type<u32*>());
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu32), m_ir->getInt32(func.data[0])); const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u32>(), pu32), m_ir->getInt32(func.data[0]));
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely); m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
} }
else if (func.data.size() == 2) else if (func.data.size() == 2)
{ {
const auto pu64 = m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_base_pc), get_type<u64*>()); const auto pu64 = m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_base_pc), get_type<u64*>());
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(pu64), m_ir->getInt64(static_cast<u64>(func.data[1]) << 32 | func.data[0])); const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u64>(), pu64), m_ir->getInt64(static_cast<u64>(func.data[1]) << 32 | func.data[0]));
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely); m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
} }
else else
@ -5101,7 +5103,7 @@ public:
// Get actual pc corresponding to the found beginning of the data // Get actual pc corresponding to the found beginning of the data
llvm::Value* starta_pc = m_ir->CreateAnd(get_pc(starta), 0x3fffc); llvm::Value* starta_pc = m_ir->CreateAnd(get_pc(starta), 0x3fffc);
llvm::Value* data_addr = m_ir->CreateGEP(m_lsptr, starta_pc); llvm::Value* data_addr = m_ir->CreateGEP(get_type<u8>(), m_lsptr, starta_pc);
llvm::Value* acc = nullptr; llvm::Value* acc = nullptr;
@ -5138,21 +5140,21 @@ public:
// Load unaligned code block from LS // Load unaligned code block from LS
if (m_use_avx512 && g_cfg.core.full_width_avx512) if (m_use_avx512 && g_cfg.core.full_width_avx512)
{ {
vls = m_ir->CreateAlignedLoad(_ptr<u32[16]>(data_addr, j - starta), llvm::MaybeAlign{4}); vls = m_ir->CreateAlignedLoad(get_type<u32[16]>(), _ptr<u32[16]>(data_addr, j - starta), llvm::MaybeAlign{4});
} }
else if (m_use_avx) else if (m_use_avx)
{ {
vls = m_ir->CreateAlignedLoad(_ptr<u32[8]>(data_addr, j - starta), llvm::MaybeAlign{4}); vls = m_ir->CreateAlignedLoad(get_type<u32[8]>(), _ptr<u32[8]>(data_addr, j - starta), llvm::MaybeAlign{4});
} }
else else
{ {
vls = m_ir->CreateAlignedLoad(_ptr<u32[4]>(data_addr, j - starta), llvm::MaybeAlign{4}); vls = m_ir->CreateAlignedLoad(get_type<u32[4]>(), _ptr<u32[4]>(data_addr, j - starta), llvm::MaybeAlign{4});
} }
// Mask if necessary // Mask if necessary
if (holes) if (holes)
{ {
vls = m_ir->CreateShuffleVector(vls, ConstantAggregateZero::get(vls->getType()), llvm::makeArrayRef(indices, elements)); vls = m_ir->CreateShuffleVector(vls, ConstantAggregateZero::get(vls->getType()), llvm::ArrayRef(indices, elements));
} }
// Perform bitwise comparison and accumulate // Perform bitwise comparison and accumulate
@ -5164,7 +5166,7 @@ public:
words[i] = k >= start && k < end ? func.data[(k - start) / 4] : 0; words[i] = k >= start && k < end ? func.data[(k - start) / 4] : 0;
} }
vls = m_ir->CreateXor(vls, ConstantDataVector::get(m_context, llvm::makeArrayRef(words, elements))); vls = m_ir->CreateXor(vls, ConstantDataVector::get(m_context, llvm::ArrayRef(words, elements)));
acc = acc ? m_ir->CreateOr(acc, vls) : vls; acc = acc ? m_ir->CreateOr(acc, vls) : vls;
check_iterations++; check_iterations++;
} }
@ -5198,7 +5200,7 @@ public:
// Increase block counter with statistics // Increase block counter with statistics
m_ir->SetInsertPoint(label_body); m_ir->SetInsertPoint(label_body);
const auto pbcount = spu_ptr<u64>(&spu_thread::block_counter); const auto pbcount = spu_ptr<u64>(&spu_thread::block_counter);
m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(pbcount), m_ir->getInt64(check_iterations)), pbcount); m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(get_type<u64>(), pbcount), m_ir->getInt64(check_iterations)), pbcount);
// Call the entry function chunk // Call the entry function chunk
const auto entry_chunk = add_function(m_pos); const auto entry_chunk = add_function(m_pos);
@ -5232,7 +5234,7 @@ public:
if (g_cfg.core.spu_verification) if (g_cfg.core.spu_verification)
{ {
const auto pbfail = spu_ptr<u64>(&spu_thread::block_failure); const auto pbfail = spu_ptr<u64>(&spu_thread::block_failure);
m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(pbfail), m_ir->getInt64(1)), pbfail); m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(get_type<u64>(), pbfail), m_ir->getInt64(1)), pbfail);
const auto dispci = call("spu_dispatch", spu_runtime::tr_dispatch, m_thread, m_lsptr, main_arg2); const auto dispci = call("spu_dispatch", spu_runtime::tr_dispatch, m_thread, m_lsptr, main_arg2);
dispci->setCallingConv(CallingConv::GHC); dispci->setCallingConv(CallingConv::GHC);
dispci->setTailCall(); dispci->setTailCall();
@ -5273,9 +5275,9 @@ public:
m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", m_test_state)); m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", m_test_state));
const auto escape_yes = BasicBlock::Create(m_context, "", m_test_state); const auto escape_yes = BasicBlock::Create(m_context, "", m_test_state);
const auto escape_no = BasicBlock::Create(m_context, "", m_test_state); const auto escape_no = BasicBlock::Create(m_context, "", m_test_state);
m_ir->CreateCondBr(call("spu_exec_check_state", &exec_check_state, &*m_test_state->arg_begin()), escape_yes, escape_no); m_ir->CreateCondBr(call("spu_exec_check_state", &exec_check_state, m_test_state->getArg(0)), escape_yes, escape_no);
m_ir->SetInsertPoint(escape_yes); m_ir->SetInsertPoint(escape_yes);
call("spu_escape", spu_runtime::g_escape, &*m_test_state->arg_begin()); call("spu_escape", spu_runtime::g_escape, m_test_state->getArg(0));
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
m_ir->SetInsertPoint(escape_no); m_ir->SetInsertPoint(escape_no);
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
@ -5341,7 +5343,7 @@ public:
if (!value) if (!value)
{ {
// Value hasn't been loaded yet // Value hasn't been loaded yet
value = m_finfo && m_finfo->load[i] ? m_finfo->load[i] : m_ir->CreateLoad(regptr); value = m_finfo && m_finfo->load[i] ? m_finfo->load[i] : m_ir->CreateLoad(get_reg_type(i), regptr);
} }
if (value->getType() == get_type<f64[4]>() && type != get_type<f64[4]>()) if (value->getType() == get_type<f64[4]>() && type != get_type<f64[4]>())
@ -5372,7 +5374,7 @@ public:
const auto regptr = init_reg_fixed(i); const auto regptr = init_reg_fixed(i);
const auto cblock = m_ir->GetInsertBlock(); const auto cblock = m_ir->GetInsertBlock();
m_ir->SetInsertPoint(m_function->getEntryBlock().getTerminator()); m_ir->SetInsertPoint(m_function->getEntryBlock().getTerminator());
const auto value = m_finfo && m_finfo->load[i] ? m_finfo->load[i] : m_ir->CreateLoad(regptr); const auto value = m_finfo && m_finfo->load[i] ? m_finfo->load[i] : m_ir->CreateLoad(get_reg_type(i), regptr);
m_ir->SetInsertPoint(cblock); m_ir->SetInsertPoint(cblock);
_phi->addIncoming(value, &m_function->getEntryBlock()); _phi->addIncoming(value, &m_function->getEntryBlock());
} }
@ -5541,7 +5543,7 @@ public:
CallInst* ci{}; CallInst* ci{};
if (si->getOperand(0) == m_ir->getFalse()) if (si->getOperand(0) == m_ir->getFalse())
{ {
ci = m_ir->CreateCall(m_test_state, {&*f->arg_begin()}); ci = m_ir->CreateCall(m_test_state, {f->getArg(0)});
ci->setCallingConv(m_test_state->getCallingConv()); ci->setCallingConv(m_test_state->getCallingConv());
} }
else else
@ -5697,8 +5699,8 @@ public:
ret_func->setCallingConv(CallingConv::GHC); ret_func->setCallingConv(CallingConv::GHC);
ret_func->setLinkage(GlobalValue::InternalLinkage); ret_func->setLinkage(GlobalValue::InternalLinkage);
m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", ret_func)); m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", ret_func));
m_thread = &*(ret_func->arg_begin() + 1); m_thread = ret_func->getArg(1);
m_interp_pc = &*(ret_func->arg_begin() + 2); m_interp_pc = ret_func->getArg(2);
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
// Add entry function, serves as a trampoline // Add entry function, serves as a trampoline
@ -5709,12 +5711,12 @@ public:
set_function(main_func); set_function(main_func);
// Load pc and opcode // Load pc and opcode
m_interp_pc = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::pc)); m_interp_pc = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::pc));
m_interp_op = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_ir->CreateZExt(m_interp_pc, get_type<u64>())), get_type<u32*>())); m_interp_op = m_ir->CreateLoad(get_type<u32>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_ir->CreateZExt(m_interp_pc, get_type<u64>())), get_type<u32*>()));
m_interp_op = m_ir->CreateCall(get_intrinsic<u32>(Intrinsic::bswap), {m_interp_op}); m_interp_op = m_ir->CreateCall(get_intrinsic<u32>(Intrinsic::bswap), {m_interp_op});
// Pinned constant, address of interpreter table // Pinned constant, address of interpreter table
m_interp_table = m_ir->CreateBitCast(m_ir->CreateGEP(m_function_table, {m_ir->getInt64(0), m_ir->getInt64(0)}), get_type<u8*>()); m_interp_table = m_ir->CreateBitCast(m_ir->CreateGEP(m_function_table->getValueType(), m_function_table, {m_ir->getInt64(0), m_ir->getInt64(0)}), get_type<u8*>());
// Pinned constant, mask for shifted register index // Pinned constant, mask for shifted register index
m_interp_7f0 = m_ir->getInt32(0x7f0); m_interp_7f0 = m_ir->getInt32(0x7f0);
@ -5732,7 +5734,7 @@ public:
m_ir->CreateStore(m_ir->CreateCall(get_intrinsic<u64>(Intrinsic::read_register), {rsp_name}), native_sp); m_ir->CreateStore(m_ir->CreateCall(get_intrinsic<u64>(Intrinsic::read_register), {rsp_name}), native_sp);
// Decode (shift) and load function pointer // Decode (shift) and load function pointer
const auto first = m_ir->CreateLoad(m_ir->CreateGEP(m_ir->CreateBitCast(m_interp_table, if_pptr), m_ir->CreateLShr(m_interp_op, 32u - m_interp_magn))); const auto first = m_ir->CreateLoad(if_type->getPointerTo(), m_ir->CreateGEP(if_type->getPointerTo(), m_ir->CreateBitCast(m_interp_table, if_pptr), m_ir->CreateLShr(m_interp_op, 32u - m_interp_magn)));
const auto call0 = m_ir->CreateCall({if_type, first}, {m_lsptr, m_thread, m_interp_pc, m_interp_op, m_interp_table, m_interp_7f0, m_interp_regs}); const auto call0 = m_ir->CreateCall({if_type, first}, {m_lsptr, m_thread, m_interp_pc, m_interp_op, m_interp_table, m_interp_7f0, m_interp_regs});
call0->setCallingConv(CallingConv::GHC); call0->setCallingConv(CallingConv::GHC);
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
@ -5800,16 +5802,16 @@ public:
f->setCallingConv(CallingConv::GHC); f->setCallingConv(CallingConv::GHC);
m_function = f; m_function = f;
m_lsptr = &*(f->arg_begin() + 0); m_lsptr = f->getArg(0);
m_thread = &*(f->arg_begin() + 1); m_thread = f->getArg(1);
m_interp_pc = &*(f->arg_begin() + 2); m_interp_pc = f->getArg(2);
m_interp_op = &*(f->arg_begin() + 3); m_interp_op = f->getArg(3);
m_interp_table = &*(f->arg_begin() + 4); m_interp_table = f->getArg(4);
m_interp_7f0 = &*(f->arg_begin() + 5); m_interp_7f0 = f->getArg(5);
m_interp_regs = &*(f->arg_begin() + 6); m_interp_regs = f->getArg(6);
m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", f)); m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", f));
m_memptr = m_ir->CreateLoad(spu_ptr<u8*>(&spu_thread::memory_base_addr)); m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::memory_base_addr));
switch (itype) switch (itype)
{ {
@ -5874,9 +5876,9 @@ public:
// Decode next instruction. // Decode next instruction.
const auto next_pc = itype & spu_itype::branch ? m_interp_pc : m_interp_pc_next; const auto next_pc = itype & spu_itype::branch ? m_interp_pc : m_interp_pc_next;
const auto be32_op = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, m_ir->CreateZExt(next_pc, get_type<u64>())), get_type<u32*>())); const auto be32_op = m_ir->CreateLoad(get_type<u32>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_ir->CreateZExt(next_pc, get_type<u64>())), get_type<u32*>()));
const auto next_op = m_ir->CreateCall(get_intrinsic<u32>(Intrinsic::bswap), {be32_op}); const auto next_op = m_ir->CreateCall(get_intrinsic<u32>(Intrinsic::bswap), {be32_op});
const auto next_if = m_ir->CreateLoad(m_ir->CreateGEP(m_ir->CreateBitCast(m_interp_table, if_pptr), m_ir->CreateLShr(next_op, 32u - m_interp_magn))); const auto next_if = m_ir->CreateLoad(if_type->getPointerTo(), m_ir->CreateGEP(if_type->getPointerTo(), m_ir->CreateBitCast(m_interp_table, if_pptr), m_ir->CreateLShr(next_op, 32u - m_interp_magn)));
llvm::cast<LoadInst>(next_if)->setVolatile(true); llvm::cast<LoadInst>(next_if)->setVolatile(true);
if (!(itype & spu_itype::branch)) if (!(itype & spu_itype::branch))
@ -5906,7 +5908,7 @@ public:
{ {
const auto _stop = BasicBlock::Create(m_context, "", f); const auto _stop = BasicBlock::Create(m_context, "", f);
const auto _next = BasicBlock::Create(m_context, "", f); const auto _next = BasicBlock::Create(m_context, "", f);
m_ir->CreateCondBr(m_ir->CreateIsNotNull(m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::state))), _stop, _next, m_md_unlikely); m_ir->CreateCondBr(m_ir->CreateIsNotNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::state))), _stop, _next, m_md_unlikely);
m_ir->SetInsertPoint(_stop); m_ir->SetInsertPoint(_stop);
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc)); m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
@ -5962,7 +5964,7 @@ public:
// Call next instruction. // Call next instruction.
const auto _stop = BasicBlock::Create(m_context, "", f); const auto _stop = BasicBlock::Create(m_context, "", f);
const auto _next = BasicBlock::Create(m_context, "", f); const auto _next = BasicBlock::Create(m_context, "", f);
m_ir->CreateCondBr(m_ir->CreateIsNotNull(m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::state))), _stop, _next, m_md_unlikely); m_ir->CreateCondBr(m_ir->CreateIsNotNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::state))), _stop, _next, m_md_unlikely);
m_ir->SetInsertPoint(_next); m_ir->SetInsertPoint(_next);
if (itype == spu_itype::WRCH || if (itype == spu_itype::WRCH ||
@ -6227,7 +6229,7 @@ public:
} }
else else
{ {
const auto val = m_ir->CreateLoad(ptr, true); const auto val = m_ir->CreateLoad(get_type<u64>(), ptr, true);
m_ir->CreateStore(m_ir->getInt64(0), ptr, true); m_ir->CreateStore(m_ir->getInt64(0), ptr, true);
val0 = val; val0 = val;
} }
@ -6264,7 +6266,7 @@ public:
{ {
case SPU_RdSRR0: case SPU_RdSRR0:
{ {
res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::srr0)); res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::srr0));
break; break;
} }
case SPU_RdInMbox: case SPU_RdInMbox:
@ -6281,7 +6283,7 @@ public:
} }
case MFC_RdTagMask: case MFC_RdTagMask:
{ {
res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_tag_mask)); res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_tag_mask));
break; break;
} }
case SPU_RdSigNotify1: case SPU_RdSigNotify1:
@ -6313,7 +6315,7 @@ public:
} }
case SPU_RdEventMask: case SPU_RdEventMask:
{ {
res.value = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>()); res.value = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>());
break; break;
} }
case SPU_RdEventStat: case SPU_RdEventStat:
@ -6324,8 +6326,8 @@ public:
} }
case SPU_RdMachStat: case SPU_RdMachStat:
{ {
res.value = m_ir->CreateZExt(m_ir->CreateLoad(spu_ptr<u8>(&spu_thread::interrupts_enabled)), get_type<u32>()); res.value = m_ir->CreateZExt(m_ir->CreateLoad(get_type<u8>(), spu_ptr<u8>(&spu_thread::interrupts_enabled)), get_type<u32>());
res.value = m_ir->CreateOr(res.value, m_ir->CreateAnd(m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::thread_type)), m_ir->getInt32(2))); res.value = m_ir->CreateOr(res.value, m_ir->CreateAnd(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::thread_type)), m_ir->getInt32(2)));
break; break;
} }
@ -6353,7 +6355,7 @@ public:
llvm::Value* get_rchcnt(u32 off, u64 inv = 0) llvm::Value* get_rchcnt(u32 off, u64 inv = 0)
{ {
const auto val = m_ir->CreateLoad(_ptr<u64>(m_thread, off), true); const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off), true);
const auto shv = m_ir->CreateLShr(val, spu_channel::off_count); const auto shv = m_ir->CreateLShr(val, spu_channel::off_count);
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>()); return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
} }
@ -6413,20 +6415,20 @@ public:
} }
case MFC_Cmd: case MFC_Cmd:
{ {
res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::mfc_size), true); res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size), true);
res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value); res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value);
break; break;
} }
case SPU_RdInMbox: case SPU_RdInMbox:
{ {
res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_in_mbox), true); res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox), true);
res.value = m_ir->CreateLShr(res.value, 8); res.value = m_ir->CreateLShr(res.value, 8);
res.value = m_ir->CreateAnd(res.value, 7); res.value = m_ir->CreateAnd(res.value, 7);
break; break;
} }
case SPU_RdEventStat: case SPU_RdEventStat:
{ {
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>()); const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>());
res.value = call("spu_get_events", &exec_get_events, m_thread, mask); res.value = call("spu_get_events", &exec_get_events, m_thread, mask);
break; break;
} }
@ -6532,7 +6534,7 @@ public:
m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_tag_mask)); m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_tag_mask));
const auto next = llvm::BasicBlock::Create(m_context, "", m_function); const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
const auto _mfc = llvm::BasicBlock::Create(m_context, "", m_function); const auto _mfc = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_tag_upd)), m_ir->getInt32(MFC_TAG_UPDATE_IMMEDIATE)), _mfc, next); m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_tag_upd)), m_ir->getInt32(MFC_TAG_UPDATE_IMMEDIATE)), _mfc, next);
m_ir->SetInsertPoint(_mfc); m_ir->SetInsertPoint(_mfc);
update_pc(); update_pc();
call("spu_write_channel", &exec_wrch, m_thread, m_ir->getInt32(op.ra), val.value); call("spu_write_channel", &exec_wrch, m_thread, m_ir->getInt32(op.ra), val.value);
@ -6544,8 +6546,8 @@ public:
{ {
if (true) if (true)
{ {
const auto tag_mask = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_tag_mask)); const auto tag_mask = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_tag_mask));
const auto mfc_fence = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::mfc_fence)); const auto mfc_fence = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_fence));
const auto completed = m_ir->CreateAnd(tag_mask, m_ir->CreateNot(mfc_fence)); const auto completed = m_ir->CreateAnd(tag_mask, m_ir->CreateNot(mfc_fence));
const auto upd_ptr = spu_ptr<u32>(&spu_thread::ch_tag_upd); const auto upd_ptr = spu_ptr<u32>(&spu_thread::ch_tag_upd);
const auto stat_ptr = spu_ptr<u64>(&spu_thread::ch_tag_stat); const auto stat_ptr = spu_ptr<u64>(&spu_thread::ch_tag_stat);
@ -6722,19 +6724,19 @@ public:
csize = -1; csize = -1;
} }
llvm::Value* src = m_ir->CreateGEP(m_lsptr, zext<u64>(lsa).eval(m_ir)); llvm::Value* src = m_ir->CreateGEP(get_type<u8>(), m_lsptr, zext<u64>(lsa).eval(m_ir));
llvm::Value* dst = m_ir->CreateGEP(m_memptr, zext<u64>(eal).eval(m_ir)); llvm::Value* dst = m_ir->CreateGEP(get_type<u8>(), m_memptr, zext<u64>(eal).eval(m_ir));
if (cmd & MFC_GET_CMD) if (cmd & MFC_GET_CMD)
{ {
std::swap(src, dst); std::swap(src, dst);
} }
llvm::Value* barrier = m_ir->CreateLoad(pb); llvm::Value* barrier = m_ir->CreateLoad(get_type<u32>(), pb);
if (cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK)) if (cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK))
{ {
barrier = m_ir->CreateOr(barrier, m_ir->CreateLoad(pf)); barrier = m_ir->CreateOr(barrier, m_ir->CreateLoad(get_type<u32>(), pf));
} }
const auto cond = m_ir->CreateIsNull(m_ir->CreateAnd(mask, barrier)); const auto cond = m_ir->CreateIsNull(m_ir->CreateAnd(mask, barrier));
@ -6756,7 +6758,7 @@ public:
m_ir->CreateBr(next); m_ir->CreateBr(next);
m_ir->SetInsertPoint(copy); m_ir->SetInsertPoint(copy);
llvm::Type* vtype = get_type<u8(*)[16]>(); llvm::Type* vtype = get_type<u8[16]>();
switch (csize) switch (csize)
{ {
@ -6767,22 +6769,22 @@ public:
} }
case 1: case 1:
{ {
vtype = get_type<u8*>(); vtype = get_type<u8>();
break; break;
} }
case 2: case 2:
{ {
vtype = get_type<u16*>(); vtype = get_type<u16>();
break; break;
} }
case 4: case 4:
{ {
vtype = get_type<u32*>(); vtype = get_type<u32>();
break; break;
} }
case 8: case 8:
{ {
vtype = get_type<u64*>(); vtype = get_type<u64>();
break; break;
} }
default: default:
@ -6806,29 +6808,29 @@ public:
if (m_use_avx && csize >= 32 && !(clsa % 32)) if (m_use_avx && csize >= 32 && !(clsa % 32))
{ {
vtype = get_type<u8(*)[32]>(); vtype = get_type<u8[32]>();
stride = 32; stride = 32;
} }
if (csize > 0 && csize <= 16) if (csize > 0 && csize <= 16)
{ {
// Generate single copy operation // Generate single copy operation
m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateBitCast(src, vtype), true), m_ir->CreateBitCast(dst, vtype), true); m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo()), true), m_ir->CreateBitCast(dst, vtype->getPointerTo()), true);
} }
else if (csize <= stride * 16 && !(csize % 32)) else if (csize <= stride * 16 && !(csize % 32))
{ {
// Generate fixed sequence of copy operations // Generate fixed sequence of copy operations
for (u32 i = 0; i < csize; i += stride) for (u32 i = 0; i < csize; i += stride)
{ {
const auto _src = m_ir->CreateGEP(src, m_ir->getInt32(i)); const auto _src = m_ir->CreateGEP(get_type<u8>(), src, m_ir->getInt32(i));
const auto _dst = m_ir->CreateGEP(dst, m_ir->getInt32(i)); const auto _dst = m_ir->CreateGEP(get_type<u8>(), dst, m_ir->getInt32(i));
if (csize - i < stride) if (csize - i < stride)
{ {
m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateBitCast(_src, get_type<u8(*)[16]>()), true), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()), true); m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>()), true), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()), true);
} }
else else
{ {
m_ir->CreateAlignedStore(m_ir->CreateAlignedLoad(m_ir->CreateBitCast(_src, vtype), llvm::MaybeAlign{16}), m_ir->CreateBitCast(_dst, vtype), llvm::MaybeAlign{16}); m_ir->CreateAlignedStore(m_ir->CreateAlignedLoad(vtype, m_ir->CreateBitCast(_src, vtype->getPointerTo()), llvm::MaybeAlign{16}), m_ir->CreateBitCast(_dst, vtype->getPointerTo()), llvm::MaybeAlign{16});
} }
} }
} }
@ -6851,7 +6853,7 @@ public:
case MFC_EIEIO_CMD: case MFC_EIEIO_CMD:
case MFC_SYNC_CMD: case MFC_SYNC_CMD:
{ {
const auto cond = m_ir->CreateIsNull(m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::mfc_size))); const auto cond = m_ir->CreateIsNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size)));
m_ir->CreateCondBr(cond, exec, fail, m_md_likely); m_ir->CreateCondBr(cond, exec, fail, m_md_likely);
m_ir->SetInsertPoint(exec); m_ir->SetInsertPoint(exec);
m_ir->CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); m_ir->CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
@ -6873,10 +6875,10 @@ public:
m_ir->SetInsertPoint(fail); m_ir->SetInsertPoint(fail);
// Get MFC slot, redirect to invalid memory address // Get MFC slot, redirect to invalid memory address
const auto slot = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::mfc_size)); const auto slot = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size));
const auto off0 = m_ir->CreateAdd(m_ir->CreateMul(slot, m_ir->getInt32(sizeof(spu_mfc_cmd))), m_ir->getInt32(::offset32(&spu_thread::mfc_queue))); const auto off0 = m_ir->CreateAdd(m_ir->CreateMul(slot, m_ir->getInt32(sizeof(spu_mfc_cmd))), m_ir->getInt32(::offset32(&spu_thread::mfc_queue)));
const auto ptr0 = m_ir->CreateGEP(m_thread, m_ir->CreateZExt(off0, get_type<u64>())); const auto ptr0 = m_ir->CreateGEP(get_type<u8>(), m_thread, m_ir->CreateZExt(off0, get_type<u64>()));
const auto ptr1 = m_ir->CreateGEP(m_memptr, m_ir->getInt64(0xffdeadf0)); const auto ptr1 = m_ir->CreateGEP(get_type<u8>(), m_memptr, m_ir->getInt64(0xffdeadf0));
const auto pmfc = m_ir->CreateSelect(m_ir->CreateICmpULT(slot, m_ir->getInt32(16)), ptr0, ptr1); const auto pmfc = m_ir->CreateSelect(m_ir->CreateICmpULT(slot, m_ir->getInt32(16)), ptr0, ptr1);
m_ir->CreateStore(ci, _ptr<u8>(pmfc, ::offset32(&spu_mfc_cmd::cmd))); m_ir->CreateStore(ci, _ptr<u8>(pmfc, ::offset32(&spu_mfc_cmd::cmd)));
@ -6922,9 +6924,9 @@ public:
m_ir->CreateStore(size.value, _ptr<u16>(pmfc, ::offset32(&spu_mfc_cmd::size))); m_ir->CreateStore(size.value, _ptr<u16>(pmfc, ::offset32(&spu_mfc_cmd::size)));
m_ir->CreateStore(lsa.value, _ptr<u32>(pmfc, ::offset32(&spu_mfc_cmd::lsa))); m_ir->CreateStore(lsa.value, _ptr<u32>(pmfc, ::offset32(&spu_mfc_cmd::lsa)));
m_ir->CreateStore(eal.value, _ptr<u32>(pmfc, ::offset32(&spu_mfc_cmd::eal))); m_ir->CreateStore(eal.value, _ptr<u32>(pmfc, ::offset32(&spu_mfc_cmd::eal)));
m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(pf), mask), pf); m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(get_type<u32>(), pf), mask), pf);
if (cmd & MFC_BARRIER_MASK) if (cmd & MFC_BARRIER_MASK)
m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(pb), mask), pb); m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(get_type<u32>(), pb), mask), pb);
break; break;
} }
case MFC_BARRIER_CMD: case MFC_BARRIER_CMD:
@ -6932,7 +6934,7 @@ public:
case MFC_SYNC_CMD: case MFC_SYNC_CMD:
{ {
m_ir->CreateStore(m_ir->getInt32(-1), pb); m_ir->CreateStore(m_ir->getInt32(-1), pb);
m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(pf), mask), pf); m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(get_type<u32>(), pf), mask), pf);
break; break;
} }
default: default:
@ -6956,7 +6958,7 @@ public:
{ {
const auto mask = eval(splat<u32>(1) << (val & 0x1f)); const auto mask = eval(splat<u32>(1) << (val & 0x1f));
const auto _ptr = spu_ptr<u32>(&spu_thread::ch_stall_mask); const auto _ptr = spu_ptr<u32>(&spu_thread::ch_stall_mask);
const auto _old = m_ir->CreateLoad(_ptr); const auto _old = m_ir->CreateLoad(get_type<u32>(), _ptr);
const auto _new = m_ir->CreateAnd(_old, m_ir->CreateNot(mask.value)); const auto _new = m_ir->CreateAnd(_old, m_ir->CreateNot(mask.value));
m_ir->CreateStore(_new, _ptr); m_ir->CreateStore(_new, _ptr);
const auto next = llvm::BasicBlock::Create(m_context, "", m_function); const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
@ -9609,13 +9611,13 @@ public:
void make_store_ls(value_t<u64> addr, value_t<u8[16]> data) void make_store_ls(value_t<u64> addr, value_t<u8[16]> data)
{ {
const auto bswapped = byteswap(data); const auto bswapped = byteswap(data);
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()), true); m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
} }
auto make_load_ls(value_t<u64> addr) auto make_load_ls(value_t<u64> addr)
{ {
value_t<u8[16]> data; value_t<u8[16]> data;
data.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()), true); data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
return byteswap(data); return byteswap(data);
} }
@ -9730,7 +9732,7 @@ public:
m_ir->CreateCondBr(cond.value, halt, next, m_md_unlikely); m_ir->CreateCondBr(cond.value, halt, next, m_md_unlikely);
m_ir->SetInsertPoint(halt); m_ir->SetInsertPoint(halt);
if (m_interp_magn) if (m_interp_magn)
m_ir->CreateStore(&*(m_function->arg_begin() + 2), spu_ptr<u32>(&spu_thread::pc))->setVolatile(true); m_ir->CreateStore(m_function->getArg(2), spu_ptr<u32>(&spu_thread::pc))->setVolatile(true);
else else
update_pc(); update_pc();
const auto ptr = _ptr<u32>(m_memptr, 0xffdead00); const auto ptr = _ptr<u32>(m_memptr, 0xffdead00);
@ -9873,7 +9875,7 @@ public:
} }
else else
{ {
sp.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::gpr, 1, &v128::_u32, 3)); sp.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::gpr, 1, &v128::_u32, 3));
} }
} }
@ -9898,19 +9900,19 @@ public:
// Compare address stored in stack mirror with addr // Compare address stored in stack mirror with addr
const auto stack0 = eval(zext<u64>(sp) + ::offset32(&spu_thread::stack_mirror)); const auto stack0 = eval(zext<u64>(sp) + ::offset32(&spu_thread::stack_mirror));
const auto stack1 = eval(stack0 + 8); const auto stack1 = eval(stack0 + 8);
const auto _ret = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), get_type<u64*>())); const auto _ret = m_ir->CreateLoad(get_type<u64>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_thread, stack0.value), get_type<u64*>()));
const auto link = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack1.value), get_type<u64*>())); const auto link = m_ir->CreateLoad(get_type<u64>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_thread, stack1.value), get_type<u64*>()));
const auto fail = llvm::BasicBlock::Create(m_context, "", m_function); const auto fail = llvm::BasicBlock::Create(m_context, "", m_function);
const auto done = llvm::BasicBlock::Create(m_context, "", m_function); const auto done = llvm::BasicBlock::Create(m_context, "", m_function);
const auto next = llvm::BasicBlock::Create(m_context, "", m_function); const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpEQ(addr.value, m_ir->CreateTrunc(link, get_type<u32>())), next, fail, m_md_likely); m_ir->CreateCondBr(m_ir->CreateICmpEQ(addr.value, m_ir->CreateTrunc(link, get_type<u32>())), next, fail, m_md_likely);
m_ir->SetInsertPoint(next); m_ir->SetInsertPoint(next);
const auto cmp2 = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u32*>())); const auto cmp2 = m_ir->CreateLoad(get_type<u32>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u32*>()));
m_ir->CreateCondBr(m_ir->CreateICmpEQ(cmp2, m_ir->CreateTrunc(_ret, get_type<u32>())), done, fail, m_md_likely); m_ir->CreateCondBr(m_ir->CreateICmpEQ(cmp2, m_ir->CreateTrunc(_ret, get_type<u32>())), done, fail, m_md_likely);
m_ir->SetInsertPoint(done); m_ir->SetInsertPoint(done);
// Clear stack mirror and return by tail call to the provided return address // Clear stack mirror and return by tail call to the provided return address
m_ir->CreateStore(splat<u64[2]>(-1).eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), get_type<u64(*)[2]>())); m_ir->CreateStore(splat<u64[2]>(-1).eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_thread, stack0.value), get_type<u64(*)[2]>()));
const auto targ = m_ir->CreateAdd(m_ir->CreateLShr(_ret, 32), get_segment_base()); const auto targ = m_ir->CreateAdd(m_ir->CreateLShr(_ret, 32), get_segment_base());
const auto type = m_finfo->chunk->getFunctionType(); const auto type = m_finfo->chunk->getFunctionType();
const auto fval = m_ir->CreateIntToPtr(targ, type->getPointerTo()); const auto fval = m_ir->CreateIntToPtr(targ, type->getPointerTo());
@ -9928,8 +9930,8 @@ public:
m_ir->SetInsertPoint(done); m_ir->SetInsertPoint(done);
const auto ad64 = m_ir->CreateZExt(ad32, get_type<u64>()); const auto ad64 = m_ir->CreateZExt(ad32, get_type<u64>());
const auto pptr = m_ir->CreateGEP(m_function_table, {m_ir->getInt64(0), m_ir->CreateLShr(ad64, 2, "", true)}); const auto pptr = m_ir->CreateGEP(get_type<u8>(), m_function_table, {m_ir->getInt64(0), m_ir->CreateLShr(ad64, 2, "", true)});
tail_chunk({m_dispatch->getFunctionType(), m_ir->CreateLoad(pptr)}); tail_chunk({m_dispatch->getFunctionType(), m_ir->CreateLoad(get_type<u8*>(), pptr)});
m_ir->SetInsertPoint(fail); m_ir->SetInsertPoint(fail);
} }
@ -10242,7 +10244,7 @@ public:
{ {
if (m_block) m_block->block_end = m_ir->GetInsertBlock(); if (m_block) m_block->block_end = m_ir->GetInsertBlock();
value_t<u32> srr0; value_t<u32> srr0;
srr0.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::srr0)); srr0.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::srr0));
m_ir->CreateBr(add_block_indirect(op, srr0)); m_ir->CreateBr(add_block_indirect(op, srr0));
} }
@ -10251,7 +10253,7 @@ public:
if (m_block) m_block->block_end = m_ir->GetInsertBlock(); if (m_block) m_block->block_end = m_ir->GetInsertBlock();
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc); const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
set_link(op); set_link(op);
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>()); const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>());
const auto res = call("spu_get_events", &exec_get_events, m_thread, mask); const auto res = call("spu_get_events", &exec_get_events, m_thread, mask);
const auto target = add_block_indirect(op, addr); const auto target = add_block_indirect(op, addr);
m_ir->CreateCondBr(m_ir->CreateICmpNE(res, m_ir->getInt32(0)), target, add_block_next()); m_ir->CreateCondBr(m_ir->CreateICmpNE(res, m_ir->getInt32(0)), target, add_block_next());
@ -10585,8 +10587,8 @@ public:
const auto rel_ptr = m_ir->CreateSub(m_ir->CreatePtrToInt(pfunc->chunk, get_type<u64>()), get_segment_base()); const auto rel_ptr = m_ir->CreateSub(m_ir->CreatePtrToInt(pfunc->chunk, get_type<u64>()), get_segment_base());
const auto ptr_plus_op = m_ir->CreateOr(m_ir->CreateShl(rel_ptr, 32), m_ir->getInt64(m_next_op)); const auto ptr_plus_op = m_ir->CreateOr(m_ir->CreateShl(rel_ptr, 32), m_ir->getInt64(m_next_op));
const auto base_plus_pc = m_ir->CreateOr(m_ir->CreateShl(m_ir->CreateZExt(m_base_pc, get_type<u64>()), 32), m_ir->getInt64(m_pos + 4)); const auto base_plus_pc = m_ir->CreateOr(m_ir->CreateShl(m_ir->CreateZExt(m_base_pc, get_type<u64>()), 32), m_ir->getInt64(m_pos + 4));
m_ir->CreateStore(ptr_plus_op, m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), get_type<u64*>())); m_ir->CreateStore(ptr_plus_op, m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_thread, stack0.value), get_type<u64*>()));
m_ir->CreateStore(base_plus_pc, m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack1.value), get_type<u64*>())); m_ir->CreateStore(base_plus_pc, m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_thread, stack1.value), get_type<u64*>()));
} }
} }