From 1365c830b232f98fcc21db87291e7dcc2379725f Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Mon, 16 Feb 2015 23:07:22 +0530 Subject: [PATCH] PPUJIT: Fix EOL issues --- rpcs3/Emu/Cell/PPULLVMRecompiler.cpp | 11924 ++++++++++++------------- 1 file changed, 5962 insertions(+), 5962 deletions(-) diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 33994f0227..7bcb074901 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -1,2474 +1,2474 @@ -#include "stdafx.h" -#include "Utilities/Log.h" -#include "Emu/Cell/PPULLVMRecompiler.h" -#include "Emu/Memory/Memory.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/CodeGen/MachineCodeInfo.h" -#include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/IR/Dominators.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Vectorize.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/IR/Verifier.h" - -using namespace llvm; -using namespace ppu_recompiler_llvm; - -u64 Compiler::s_rotate_mask[64][64]; -bool Compiler::s_rotate_mask_inited = false; - -Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, const Executable execute_unknown_block) - : m_recompilation_engine(recompilation_engine) { - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - InitializeNativeTargetDisassembler(); - - m_llvm_context = new LLVMContext(); - m_ir_builder = new IRBuilder<>(*m_llvm_context); - m_module = new llvm::Module("Module", *m_llvm_context); - m_fpm = new FunctionPassManager(m_module); - - EngineBuilder engine_builder(m_module); - engine_builder.setMCPU(sys::getHostCPUName()); - engine_builder.setEngineKind(EngineKind::JIT); - engine_builder.setOptLevel(CodeGenOpt::Default); - m_execution_engine = engine_builder.create(); - - m_fpm->add(new DataLayoutPass(m_module)); - m_fpm->add(createNoAAPass()); - m_fpm->add(createBasicAliasAnalysisPass()); - m_fpm->add(createNoTargetTransformInfoPass()); - m_fpm->add(createEarlyCSEPass()); - m_fpm->add(createTailCallEliminationPass()); - m_fpm->add(createReassociatePass()); - m_fpm->add(createInstructionCombiningPass()); - m_fpm->add(new DominatorTreeWrapperPass()); - m_fpm->add(new MemoryDependenceAnalysis()); - m_fpm->add(createGVNPass()); - m_fpm->add(createInstructionCombiningPass()); - m_fpm->add(new MemoryDependenceAnalysis()); - m_fpm->add(createDeadStoreEliminationPass()); - m_fpm->add(new LoopInfo()); - m_fpm->add(new ScalarEvolution()); - m_fpm->add(createSLPVectorizerPass()); - m_fpm->add(createInstructionCombiningPass()); - m_fpm->add(createCFGSimplificationPass()); - m_fpm->doInitialization(); - - std::vector arg_types; - arg_types.push_back(m_ir_builder->getInt8PtrTy()); - arg_types.push_back(m_ir_builder->getInt8PtrTy()); - arg_types.push_back(m_ir_builder->getInt64Ty()); - m_compiled_function_type = FunctionType::get(m_ir_builder->getInt32Ty(), arg_types, false); - - m_execute_unknown_function = (Function *)m_module->getOrInsertFunction("execute_unknown_function", m_compiled_function_type); - m_execute_unknown_function->setCallingConv(CallingConv::X86_64_Win64); - m_execution_engine->addGlobalMapping(m_execute_unknown_function, (void *)execute_unknown_function); - - m_execute_unknown_block = (Function *)m_module->getOrInsertFunction("execute_unknown_block", m_compiled_function_type); - m_execute_unknown_block->setCallingConv(CallingConv::X86_64_Win64); - m_execution_engine->addGlobalMapping(m_execute_unknown_block, (void *)execute_unknown_block); - - if (!s_rotate_mask_inited) { - InitRotateMask(); - s_rotate_mask_inited = true; - } -} - -Compiler::~Compiler() { - delete m_execution_engine; - delete m_fpm; - delete m_ir_builder; - delete m_llvm_context; -} - -Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool inline_all, bool generate_linkable_exits) { - auto compilation_start = std::chrono::high_resolution_clock::now(); - - m_state.cfg = &cfg; - m_state.inline_all = inline_all; - m_state.generate_linkable_exits = generate_linkable_exits; - - // Create the function - m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); - m_state.function->setCallingConv(CallingConv::X86_64_Win64); - auto arg_i = m_state.function->arg_begin(); - arg_i->setName("ppu_state"); - m_state.args[CompileTaskState::Args::State] = arg_i; - (++arg_i)->setName("context"); - m_state.args[CompileTaskState::Args::Context] = arg_i; - - // Create the entry block and add code to branch to the first instruction - m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0)); - m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg.start_address)); - - // Convert each instruction in the CFG to LLVM IR - std::vector exit_instr_list; - for (auto instr_i = cfg.instruction_addresses.begin(); instr_i != cfg.instruction_addresses.end(); instr_i++) { - m_state.hit_branch_instruction = false; - m_state.current_instruction_address = *instr_i; - auto instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); - m_ir_builder->SetInsertPoint(instr_bb); - - if (!inline_all && *instr_i != cfg.start_address) { - // Use an already compiled implementation of this block if available - auto ordinal = m_recompilation_engine.GetOrdinal(*instr_i); - if (ordinal != 0xFFFFFFFF) { - auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); - exit_instr_list.push_back(exit_instr_i32); - - auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); - context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - auto ret_i32 = IndirectCall(*instr_i, context_i64, false); - - auto switch_instr = m_ir_builder->CreateSwitch(ret_i32, GetBasicBlockFromAddress(0xFFFFFFFF)); - auto branch_i = cfg.branches.find(*instr_i); - if (branch_i != cfg.branches.end()) { - for (auto next_instr_i = branch_i->second.begin(); next_instr_i != branch_i->second.end(); next_instr_i++) { - switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); - } - } - } - } - - if (instr_bb->empty()) { - u32 instr = re32(vm::get_ref(m_state.current_instruction_address)); - Decode(instr); - if (!m_state.hit_branch_instruction) { - m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); - } - } - } - - // Generate exit logic for all empty blocks - auto default_exit_block_name = GetBasicBlockNameFromAddress(0xFFFFFFFF); - for (auto block_i = m_state.function->begin(); block_i != m_state.function->end(); block_i++) { - if (!block_i->getInstList().empty() || block_i->getName() == default_exit_block_name) { - continue; - } - - // Found an empty block - m_ir_builder->SetInsertPoint(block_i); - auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); - exit_instr_list.push_back(exit_instr_i32); - - auto instr_address = GetAddressFromBasicBlockName(block_i->getName()); - SetPc(m_ir_builder->getInt32(instr_address)); - - if (generate_linkable_exits) { - auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); - context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - auto ret_i32 = IndirectCall(instr_address, context_i64, false); - auto cmp_i1 = m_ir_builder->CreateICmpNE(ret_i32, m_ir_builder->getInt32(0)); - auto then_bb = GetBasicBlockFromAddress(instr_address, "then"); - auto merge_bb = GetBasicBlockFromAddress(instr_address, "merge"); - m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); - - m_ir_builder->SetInsertPoint(then_bb); - context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty()); - context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); - m_ir_builder->CreateBr(merge_bb); - - m_ir_builder->SetInsertPoint(merge_bb); - m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); - } else { - m_ir_builder->CreateRet(exit_instr_i32); - } - } - - // If the function has a default exit block then generate code for it - auto default_exit_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "", false); - if (default_exit_bb) { - m_ir_builder->SetInsertPoint(default_exit_bb); - auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); - exit_instr_list.push_back(exit_instr_i32); - - if (generate_linkable_exits) { - auto cmp_i1 = m_ir_builder->CreateICmpNE(exit_instr_i32, m_ir_builder->getInt32(0)); - auto then_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "then"); - auto merge_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "merge"); - m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); - - m_ir_builder->SetInsertPoint(then_bb); - auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); - context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); - m_ir_builder->CreateBr(merge_bb); - - m_ir_builder->SetInsertPoint(merge_bb); - m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); - } else { - m_ir_builder->CreateRet(exit_instr_i32); - } - } - - // Add incoming values for all exit instr PHI nodes - for (auto exit_instr_i = exit_instr_list.begin(); exit_instr_i != exit_instr_list.end(); exit_instr_i++) { - auto block = (*exit_instr_i)->getParent(); - for (auto pred_i = pred_begin(block); pred_i != pred_end(block); pred_i++) { - auto pred_address = GetAddressFromBasicBlockName((*pred_i)->getName()); - (*exit_instr_i)->addIncoming(m_ir_builder->getInt32(pred_address), *pred_i); - } - } - -#ifdef _DEBUG - m_recompilation_engine.Log() << *m_state.function; - - std::string verify; - raw_string_ostream verify_ostream(verify); - if (verifyFunction(*m_state.function, &verify_ostream)) { - m_recompilation_engine.Log() << "Verification failed: " << verify << "\n"; - } -#endif - - auto ir_build_end = std::chrono::high_resolution_clock::now(); - m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); - - // Optimize this function - m_fpm->run(*m_state.function); - auto optimize_end = std::chrono::high_resolution_clock::now(); - m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); - - // Translate to machine code - MachineCodeInfo mci; - m_execution_engine->runJITOnFunction(m_state.function, &mci); - auto translate_end = std::chrono::high_resolution_clock::now(); - m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); - -#ifdef _DEBUG - m_recompilation_engine.Log() << "\nDisassembly:\n"; - auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); - for (size_t pc = 0; pc < mci.size();) { - char str[1024]; - - auto size = LLVMDisasmInstruction(disassembler, ((u8 *)mci.address()) + pc, mci.size() - pc, (uint64_t)(((u8 *)mci.address()) + pc), str, sizeof(str)); - m_recompilation_engine.Log() << fmt::Format("0x%08X: ", (u64)(((u8 *)mci.address()) + pc)) << str << '\n'; - pc += size; - } - - LLVMDisasmDispose(disassembler); -#endif - - auto compilation_end = std::chrono::high_resolution_clock::now(); - m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); - - return (Executable)mci.address(); -} - -void Compiler::FreeExecutable(const std::string & name) { - auto function = m_module->getFunction(name); - if (function) { - m_execution_engine->freeMachineCodeForFunction(function); - function->eraseFromParent(); - } -} - -Compiler::Stats Compiler::GetStats() { - return m_stats; -} - -void Compiler::Decode(const u32 code) { - (*PPU_instr::main_list)(this, code); -} - -void Compiler::NULL_OP() { - CompilationError("NULL_OP"); -} - -void Compiler::NOP() { - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::TDI(u32 to, u32 ra, s32 simm16) { - CompilationError("TDI"); -} - -void Compiler::TWI(u32 to, u32 ra, s32 simm16) { - CompilationError("TWI"); -} - -void Compiler::MFVSCR(u32 vd) { - auto vscr_i32 = GetVscr(); - auto vscr_i128 = m_ir_builder->CreateZExt(vscr_i32, m_ir_builder->getIntNTy(128)); - SetVr(vd, vscr_i128); -} - -void Compiler::MTVSCR(u32 vb) { - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto vscr_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(0)); - vscr_i32 = m_ir_builder->CreateAnd(vscr_i32, 0x00010001); - SetVscr(vscr_i32); -} - -void Compiler::VADDCUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - va_v4i32 = m_ir_builder->CreateNot(va_v4i32); - auto cmpv4i1 = m_ir_builder->CreateICmpULT(va_v4i32, vb_v4i32); - auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmpv4i32); -} - -void Compiler::VADDFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto sum_v4f32 = m_ir_builder->CreateFAdd(va_v4f32, vb_v4f32); - SetVr(vd, sum_v4f32); -} - -void Compiler::VADDSBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_b), va_v16i8, vb_v16i8); - SetVr(vd, sum_v16i8); - - // TODO: Set VSCR.SAT -} - -void Compiler::VADDSHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_w), va_v8i16, vb_v8i16); - SetVr(vd, sum_v8i16); - - // TODO: Set VSCR.SAT -} - -void Compiler::VADDSWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - // It looks like x86 does not have an instruction to add 32 bit intergers with signed/unsigned saturation. - // To implement add with saturation, we first determine what the result would be if the operation were to cause - // an overflow. If two -ve numbers are being added and cause an overflow, the result would be 0x80000000. - // If two +ve numbers are being added and cause an overflow, the result would be 0x7FFFFFFF. Addition of a -ve - // number and a +ve number cannot cause overflow. So the result in case of an overflow is 0x7FFFFFFF + sign bit - // of any one of the operands. - auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31); - tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); - auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - - // Next, we find if the addition can actually result in an overflow. Since an overflow can only happen if the operands - // have the same sign, we bitwise XOR both the operands. If the sign bit of the result is 0 then the operands have the - // same sign and so may cause an overflow. We invert the result so that the sign bit is 1 when the operands have the - // same sign. - auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32); - tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); - - // Perform the sum. - auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); - auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - - // If an overflow occurs, then the sign of the sum will be different from the sign of the operands. So, we xor the - // result with one of the operands. The sign bit of the result will be 1 if the sign bit of the sum and the sign bit of the - // result is different. This result is again ANDed with tmp3 (the sign bit of tmp3 is 1 only if the operands have the same - // sign and so can cause an overflow). - auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, sum_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); - auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - - // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise. - auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); - SetVr(vd, res_v16i8); - - // TODO: Set SAT -} - -void Compiler::VADDUBM(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sum_v16i8 = m_ir_builder->CreateAdd(va_v16i8, vb_v16i8); - SetVr(vd, sum_v16i8); -} - -void Compiler::VADDUBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_b), va_v16i8, vb_v16i8); - SetVr(vd, sum_v16i8); - - // TODO: Set SAT -} - -void Compiler::VADDUHM(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto sum_v8i16 = m_ir_builder->CreateAdd(va_v8i16, vb_v8i16); - SetVr(vd, sum_v8i16); -} - -void Compiler::VADDUHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_w), va_v8i16, vb_v8i16); - SetVr(vd, sum_v8i16); - - // TODO: Set SAT -} - -void Compiler::VADDUWM(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); - SetVr(vd, sum_v4i32); -} - -void Compiler::VADDUWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); - auto cmp_v4i1 = m_ir_builder->CreateICmpULT(sum_v4i32, va_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto res_v4i32 = m_ir_builder->CreateOr(sum_v4i32, cmp_v4i32); - SetVr(vd, res_v4i32); - - // TODO: Set SAT -} - -void Compiler::VAND(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VANDC(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateNot(vb_v4i32); - auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VAVGSB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto vb_v16i16 = m_ir_builder->CreateSExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto sum_v16i16 = m_ir_builder->CreateAdd(va_v16i16, vb_v16i16); - sum_v16i16 = m_ir_builder->CreateAdd(sum_v16i16, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt16(1))); - auto avg_v16i16 = m_ir_builder->CreateAShr(sum_v16i16, 1); - auto avg_v16i8 = m_ir_builder->CreateTrunc(avg_v16i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - SetVr(vd, avg_v16i8); -} - -void Compiler::VAVGSH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto sum_v8i32 = m_ir_builder->CreateAdd(va_v8i32, vb_v8i32); - sum_v8i32 = m_ir_builder->CreateAdd(sum_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(1))); - auto avg_v8i32 = m_ir_builder->CreateAShr(sum_v8i32, 1); - auto avg_v8i16 = m_ir_builder->CreateTrunc(avg_v8i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, avg_v8i16); -} - -void Compiler::VAVGSW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64); - sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1))); - auto avg_v4i64 = m_ir_builder->CreateAShr(sum_v4i64, 1); - auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, avg_v4i32); -} - -void Compiler::VAVGUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto avg_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_b), va_v16i8, vb_v16i8); - SetVr(vd, avg_v16i8); -} - -void Compiler::VAVGUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto avg_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_w), va_v8i16, vb_v8i16); - SetVr(vd, avg_v8i16); -} - -void Compiler::VAVGUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto va_v4i64 = m_ir_builder->CreateZExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto vb_v4i64 = m_ir_builder->CreateZExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64); - sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1))); - auto avg_v4i64 = m_ir_builder->CreateLShr(sum_v4i64, 1); - auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, avg_v4i32); -} - -void Compiler::VCFSX(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4f32 = m_ir_builder->CreateSIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); - - if (uimm5) { - float scale = (float)((u64)1 << uimm5); - res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale))); - } - - SetVr(vd, res_v4f32); -} - -void Compiler::VCFUX(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4f32 = m_ir_builder->CreateUIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); - - if (uimm5) { - float scale = (float)((u64)1 << uimm5); - res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale))); - } - - SetVr(vd, res_v4f32); -} - -void Compiler::VCMPBFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto cmp_gt_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); - vb_v4f32 = m_ir_builder->CreateFNeg(vb_v4f32); - auto cmp_lt_v4i1 = m_ir_builder->CreateFCmpOLT(va_v4f32, vb_v4f32); - auto cmp_gt_v4i32 = m_ir_builder->CreateZExt(cmp_gt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto cmp_lt_v4i32 = m_ir_builder->CreateZExt(cmp_lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - cmp_gt_v4i32 = m_ir_builder->CreateShl(cmp_gt_v4i32, 31); - cmp_lt_v4i32 = m_ir_builder->CreateShl(cmp_lt_v4i32, 30); - auto res_v4i32 = m_ir_builder->CreateOr(cmp_gt_v4i32, cmp_lt_v4i32); - SetVr(vd, res_v4i32); - - // TODO: Implement NJ mode -} - -void Compiler::VCMPBFP_(u32 vd, u32 va, u32 vb) { - VCMPBFP(vd, va, vb); - - auto vd_v16i8 = GetVrAsIntVec(vd, 8); - u32 mask_v16i32[16] = {3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - vd_v16i8 = m_ir_builder->CreateShuffleVector(vd_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - auto vd_v4i32 = m_ir_builder->CreateBitCast(vd_v16i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto vd_mask_i32 = m_ir_builder->CreateExtractElement(vd_v4i32, m_ir_builder->getInt32(0)); - auto cmp_i1 = m_ir_builder->CreateICmpEQ(vd_mask_i32, m_ir_builder->getInt32(0)); - SetCrField(6, nullptr, nullptr, cmp_i1, nullptr); -} - -void Compiler::VCMPEQFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOEQ(va_v4f32, vb_v4f32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); -} - -void Compiler::VCMPEQFP_(u32 vd, u32 va, u32 vb) { - VCMPEQFP(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPEQUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto cmp_v16i1 = m_ir_builder->CreateICmpEQ(va_v16i8, vb_v16i8); - auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - SetVr(vd, cmp_v16i8); -} - -void Compiler::VCMPEQUB_(u32 vd, u32 va, u32 vb) { - VCMPEQUB(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPEQUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto cmp_v8i1 = m_ir_builder->CreateICmpEQ(va_v8i16, vb_v8i16); - auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, cmp_v8i16); -} - -void Compiler::VCMPEQUH_(u32 vd, u32 va, u32 vb) { - VCMPEQUH(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPEQUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto cmp_v4i1 = m_ir_builder->CreateICmpEQ(va_v4i32, vb_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); -} - -void Compiler::VCMPEQUW_(u32 vd, u32 va, u32 vb) { - VCMPEQUW(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPGEFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(va_v4f32, vb_v4f32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); -} - -void Compiler::VCMPGEFP_(u32 vd, u32 va, u32 vb) { - VCMPGEFP(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPGTFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); -} - -void Compiler::VCMPGTFP_(u32 vd, u32 va, u32 vb) { - VCMPGTFP(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPGTSB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto cmp_v16i1 = m_ir_builder->CreateICmpSGT(va_v16i8, vb_v16i8); - auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - SetVr(vd, cmp_v16i8); -} - -void Compiler::VCMPGTSB_(u32 vd, u32 va, u32 vb) { - VCMPGTSB(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPGTSH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto cmp_v8i1 = m_ir_builder->CreateICmpSGT(va_v8i16, vb_v8i16); - auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, cmp_v8i16); -} - -void Compiler::VCMPGTSH_(u32 vd, u32 va, u32 vb) { - VCMPGTSH(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPGTSW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto cmp_v4i1 = m_ir_builder->CreateICmpSGT(va_v4i32, vb_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); -} - -void Compiler::VCMPGTSW_(u32 vd, u32 va, u32 vb) { - VCMPGTSW(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPGTUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto cmp_v16i1 = m_ir_builder->CreateICmpUGT(va_v16i8, vb_v16i8); - auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - SetVr(vd, cmp_v16i8); -} - -void Compiler::VCMPGTUB_(u32 vd, u32 va, u32 vb) { - VCMPGTUB(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPGTUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto cmp_v8i1 = m_ir_builder->CreateICmpUGT(va_v8i16, vb_v8i16); - auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, cmp_v8i16); -} - -void Compiler::VCMPGTUH_(u32 vd, u32 va, u32 vb) { - VCMPGTUH(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCMPGTUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto cmp_v4i1 = m_ir_builder->CreateICmpUGT(va_v4i32, vb_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); -} - -void Compiler::VCMPGTUW_(u32 vd, u32 va, u32 vb) { - VCMPGTUW(vd, va, vb); - SetCr6AfterVectorCompare(vd); -} - -void Compiler::VCTSXS(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - if (uimm5) { - vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); - } - - auto res_v4i32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_cvtps2dq), vb_v4f32); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0x7FFFFFFF))); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - res_v4i32 = m_ir_builder->CreateXor(cmp_v4i32, res_v4i32); - SetVr(vd, res_v4i32); - - // TODO: Set VSCR.SAT -} - -void Compiler::VCTUXS(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - if (uimm5) { - vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); - } - - auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0))); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0xFFFFFFFFu))); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto res_v4i32 = m_ir_builder->CreateFPToUI(res_v4f32, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); - SetVr(vd, res_v4i32); - - // TODO: Set VSCR.SAT -} - -void Compiler::VEXPTEFP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::pow, VectorType::get(m_ir_builder->getFloatTy(), 4)), - m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 2.0f)), vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VLOGEFP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::log2, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto vc_v4f32 = GetVrAsFloatVec(vc); - auto res_v4f32 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VMAXFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), va_v4f32, vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VMAXSB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsb), va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VMAXSH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxs_w), va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMAXSW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsd), va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VMAXUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxu_b), va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VMAXUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxuw), va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMAXUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxud), va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v8i16 = GetVrAsIntVec(vc, 16); - auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); - res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); - res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); - - u32 mask1_v4i32[4] = {0, 1, 2, 3}; - auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - u32 mask2_v4i32[4] = {4, 5, 6, 7}; - auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); - SetVr(vd, res_v8i16); - - // TODO: Set VSCR.SAT -} - -void Compiler::VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v8i16 = GetVrAsIntVec(vc, 16); - auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); - res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(0x4000))); - res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); - res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); - - u32 mask1_v4i32[4] = {0, 1, 2, 3}; - auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - u32 mask2_v4i32[4] = {4, 5, 6, 7}; - auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); - SetVr(vd, res_v8i16); - - // TODO: Set VSCR.SAT -} - -void Compiler::VMINFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_min_ps), va_v4f32, vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VMINSB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsb), va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VMINSH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmins_w), va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMINSW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsd), va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VMINUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pminu_b), va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VMINUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMINUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v8i16 = GetVrAsIntVec(vc, 16); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - res_v8i16 = m_ir_builder->CreateAdd(res_v8i16, vc_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMRGHB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v16i32[16] = {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}; - auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, vd_v16i8); -} - -void Compiler::VMRGHH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = {12, 4, 13, 5, 14, 6, 15, 7}; - auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, vd_v8i16); -} - -void Compiler::VMRGHW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - u32 mask_v4i32[4] = {6, 2, 7, 3}; - auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); - SetVr(vd, vd_v4i32); -} - -void Compiler::VMRGLB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v16i32[16] = {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}; - auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, vd_v16i8); -} - -void Compiler::VMRGLH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = {8, 0, 9, 1, 10, 2, 11, 3}; - auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, vd_v8i16); -} - -void Compiler::VMRGLW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - u32 mask_v4i32[4] = {4, 0, 5, 1}; - auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); - SetVr(vd, vd_v4i32); -} - -void Compiler::VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16); - - auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16)); - u32 mask1_v4i32[4] = {0, 4, 8, 12}; - auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto tmp1_v4i32 = m_ir_builder->CreateSExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask2_v4i32[4] = {1, 5, 9, 13}; - auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto tmp2_v4i32 = m_ir_builder->CreateSExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask3_v4i32[4] = {2, 6, 10, 14}; - auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); - auto tmp3_v4i32 = m_ir_builder->CreateSExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask4_v4i32[4] = {3, 7, 11, 15}; - auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); - auto tmp4_v4i32 = m_ir_builder->CreateSExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - - SetVr(vd, res_v4i32); - - // TODO: Try to optimize with horizontal add -} - -void Compiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); - - auto tmp1_v4i32 = m_ir_builder->CreateLShr(vc_v4i32, 31); - tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); - auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto tmp2_v4i32 = m_ir_builder->CreateXor(vc_v4i32, res_v4i32); - tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); - auto sum_v4i32 = m_ir_builder->CreateAdd(vc_v4i32, res_v4i32); - auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto tmp3_v4i32 = m_ir_builder->CreateXor(vc_v4i32, sum_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); - auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); - SetVr(vd, res_v16i8); - - // TODO: Set VSCR.SAT -} - -void Compiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto va_v16i16 = m_ir_builder->CreateZExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16); - - auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16)); - u32 mask1_v4i32[4] = {0, 4, 8, 12}; - auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto tmp1_v4i32 = m_ir_builder->CreateZExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask2_v4i32[4] = {1, 5, 9, 13}; - auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto tmp2_v4i32 = m_ir_builder->CreateZExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask3_v4i32[4] = {2, 6, 10, 14}; - auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); - auto tmp3_v4i32 = m_ir_builder->CreateZExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask4_v4i32[4] = {3, 7, 11, 15}; - auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); - auto tmp4_v4i32 = m_ir_builder->CreateZExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - - SetVr(vd, res_v4i32); - - // TODO: Try to optimize with horizontal add -} - -void Compiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); - - auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)); - u32 mask1_v4i32[4] = {0, 2, 4, 6}; - auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - u32 mask2_v4i32[4] = {1, 3, 5, 7}; - auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - - SetVr(vd, res_v4i32); - - // TODO: Try to optimize with horizontal add -} - -void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); - auto tmp_v8i64 = m_ir_builder->CreateZExt(tmp_v8i32, VectorType::get(m_ir_builder->getInt64Ty(), 8)); - - u32 mask1_v4i32[4] = {0, 2, 4, 6}; - u32 mask2_v4i32[4] = {1, 3, 5, 7}; - auto tmp1_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto tmp2_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto vc_v4i64 = m_ir_builder->CreateZExt(vc_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto res_v4i64 = m_ir_builder->CreateAdd(tmp1_v4i64, tmp2_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vc_v4i64); - auto gt_v4i1 = m_ir_builder->CreateICmpUGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF))); - auto gt_v4i64 = m_ir_builder->CreateSExt(gt_v4i1, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - res_v4i64 = m_ir_builder->CreateOr(res_v4i64, gt_v4i64); - auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); - - // TODO: Set VSCR.SAT -} - -void Compiler::VMULESB(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMULESH(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); - auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VMULEUB(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMULEUH(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); - auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VMULOSB(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); - va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMULOSH(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); - va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); - auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VMULOUB(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); - va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VMULOUH(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); - va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); - auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto vc_v4f32 = GetVrAsFloatVec(vc); - vc_v4f32 = m_ir_builder->CreateFNeg(vc_v4f32); - auto res_v4f32 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VNOR(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); - res_v8i16 = m_ir_builder->CreateNot(res_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VOR(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto vc_v16i8 = GetVrAsIntVec(vc, 8); - - auto thrity_one_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(31)); - vc_v16i8 = m_ir_builder->CreateAnd(vc_v16i8, thrity_one_v16i8); - - auto fifteen_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(15)); - auto vc_le15_v16i8 = m_ir_builder->CreateSub(fifteen_v16i8, vc_v16i8); - auto res_va_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), va_v16i8, vc_le15_v16i8); - - auto vc_gt15_v16i8 = m_ir_builder->CreateSub(thrity_one_v16i8, vc_v16i8); - auto cmp_i1 = m_ir_builder->CreateICmpUGT(vc_gt15_v16i8, fifteen_v16i8); - auto cmp_i8 = m_ir_builder->CreateSExt(cmp_i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - vc_gt15_v16i8 = m_ir_builder->CreateOr(cmp_i8, vc_gt15_v16i8); - auto res_vb_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), vb_v16i8, vc_gt15_v16i8); - - auto res_v16i8 = m_ir_builder->CreateOr(res_vb_v16i8, res_va_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VPKPX(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - auto tmpa_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); - tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); - va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); - va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); - tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); - tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); - va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); - va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); - tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); - auto tmpa_v8i16 = m_ir_builder->CreateBitCast(tmpa_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - - auto tmpb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); - tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); - vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); - tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); - tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); - vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); - tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); - auto tmpb_v8i16 = m_ir_builder->CreateBitCast(tmpb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - - u32 mask_v8i32[8] = {1, 3, 5, 7, 9, 11, 13, 15}; - auto res_v8i16 = m_ir_builder->CreateShuffleVector(tmpb_v8i16, tmpa_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - - SetVr(vd, res_v8i16); - - // TODO: Implement with pext on CPUs with BMI -} - -void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packsswb_128), vb_v8i16, va_v8i16); - SetVr(vd, res_v16i8); - - // TODO: VSCR.SAT -} - -void Compiler::VPKSHUS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packuswb_128), vb_v8i16, va_v8i16); - SetVr(vd, res_v16i8); - - // TODO: VSCR.SAT -} - -void Compiler::VPKSWSS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), vb_v4i32, va_v4i32); - SetVr(vd, res_v8i16); - - // TODO: VSCR.SAT -} - -void Compiler::VPKSWUS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_packusdw), vb_v4i32, va_v4i32); - SetVr(vd, res_v8i16); - - // TODO: VSCR.SAT -} - -void Compiler::VPKUHUM(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - - u32 mask_v16i32[16] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}; - auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, res_v16i8); -} - -void Compiler::VPKUHUS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); - vb_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); - auto va_v16i8 = m_ir_builder->CreateBitCast(va_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto vb_v16i8 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - - u32 mask_v16i32[16] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}; - auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, res_v16i8); - - // TODO: Set VSCR.SAT -} - -void Compiler::VPKUWUM(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - - u32 mask_v8i32[8] = {0, 2, 4, 6, 8, 10, 12, 14}; - auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, res_v8i16); -} - -void Compiler::VPKUWUS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); - vb_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); - auto va_v8i16 = m_ir_builder->CreateBitCast(va_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - auto vb_v8i16 = m_ir_builder->CreateBitCast(vb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - - u32 mask_v8i32[8] = {0, 2, 4, 6, 8, 10, 12, 14}; - auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, res_v8i16); - - // TODO: Set VSCR.SAT -} - -void Compiler::VREFP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VRFIM(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::floor, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VRFIN(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::nearbyint, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VRFIP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::ceil, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VRFIZ(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::trunc, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VRLB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(7))); - auto tmp1_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); - vb_v16i8 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(8)), vb_v16i8); - auto tmp2_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); - auto res_v16i8 = m_ir_builder->CreateOr(tmp1_v16i8, tmp2_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VRLH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); - auto tmp1_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); - vb_v8i16 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0x10)), vb_v8i16); - auto tmp2_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); - auto res_v8i16 = m_ir_builder->CreateOr(tmp1_v8i16, tmp2_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VRLW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); - auto tmp1_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); - vb_v4i32 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x20)), vb_v4i32); - auto tmp2_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); - auto res_v4i32 = m_ir_builder->CreateOr(tmp1_v4i32, tmp2_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VRSQRTEFP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), res_v4f32); - SetVr(vd, res_v4f32); -} - -void Compiler::VSEL(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, vc_v4i32); - vc_v4i32 = m_ir_builder->CreateNot(vc_v4i32); - va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vc_v4i32); - auto vd_v4i32 = m_ir_builder->CreateOr(va_v4i32, vb_v4i32); - SetVr(vd, vd_v4i32); -} - -void Compiler::VSL(u32 vd, u32 va, u32 vb) { - auto va_i128 = GetVr(va); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); - sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7); - auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); - va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128); - SetVr(vd, va_i128); -} - -void Compiler::VSLB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); - auto res_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - sh = 16 - sh; - u32 mask_v16i32[16] = {sh, sh + 1, sh + 2, sh + 3, sh + 4, sh + 5, sh + 6, sh + 7, sh + 8, sh + 9, sh + 10, sh + 11, sh + 12, sh + 13, sh + 14, sh + 15}; - auto vd_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, vd_v16i8); -} - -void Compiler::VSLH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); - auto res_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VSLO(u32 vd, u32 va, u32 vb) { - auto va_i128 = GetVr(va); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); - sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78); - auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); - va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128); - SetVr(vd, va_i128); -} - -void Compiler::VSLW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); - auto res_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VSPLTB(u32 vd, u32 uimm5, u32 vb) { - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto undef_v16i8 = UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto mask_v16i32 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt32(15 - uimm5)); - auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, undef_v16i8, mask_v16i32); - SetVr(vd, res_v16i8); -} - -void Compiler::VSPLTH(u32 vd, u32 uimm5, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto undef_v8i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)); - auto mask_v8i32 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(7 - uimm5)); - auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, undef_v8i16, mask_v8i32); - SetVr(vd, res_v8i16); -} - -void Compiler::VSPLTISB(u32 vd, s32 simm5) { - auto vd_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8((s8)simm5)); - SetVr(vd, vd_v16i8); -} - -void Compiler::VSPLTISH(u32 vd, s32 simm5) { - auto vd_v8i16 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16((s16)simm5)); - SetVr(vd, vd_v8i16); -} - -void Compiler::VSPLTISW(u32 vd, s32 simm5) { - auto vd_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32((s32)simm5)); - SetVr(vd, vd_v4i32); -} - -void Compiler::VSPLTW(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto undef_v4i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto mask_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3 - uimm5)); - auto res_v4i32 = m_ir_builder->CreateShuffleVector(vb_v4i32, undef_v4i32, mask_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VSR(u32 vd, u32 va, u32 vb) { - auto va_i128 = GetVr(va); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); - sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7); - auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); - va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128); - SetVr(vd, va_i128); -} - -void Compiler::VSRAB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); - auto res_v16i8 = m_ir_builder->CreateAShr(va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VSRAH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); - auto res_v8i16 = m_ir_builder->CreateAShr(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VSRAW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); - auto res_v4i32 = m_ir_builder->CreateAShr(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VSRB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); - auto res_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); -} - -void Compiler::VSRH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); - auto res_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::VSRO(u32 vd, u32 va, u32 vb) { - auto va_i128 = GetVr(va); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); - sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78); - auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); - va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128); - SetVr(vd, va_i128); -} - -void Compiler::VSRW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); - auto res_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VSUBCUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - auto cmpv4i1 = m_ir_builder->CreateICmpUGE(va_v4i32, vb_v4i32); - auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmpv4i32); -} - -void Compiler::VSUBFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto diff_v4f32 = m_ir_builder->CreateFSub(va_v4f32, vb_v4f32); - SetVr(vd, diff_v4f32); -} - -void Compiler::VSUBSBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_b), va_v16i8, vb_v16i8); - SetVr(vd, diff_v16i8); - - // TODO: Set VSCR.SAT -} - -void Compiler::VSUBSHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_w), va_v8i16, vb_v8i16); - SetVr(vd, diff_v8i16); - - // TODO: Set VSCR.SAT -} - -void Compiler::VSUBSWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - // See the comments for VADDSWS for a detailed description of how this works - - // Find the result in case of an overflow - auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31); - tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); - auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - - // Find the elements that can overflow (elements with opposite sign bits) - auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32); - - // Perform the sub - auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); - auto diff_v16i8 = m_ir_builder->CreateBitCast(diff_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - - // Find the elements that overflowed - auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, diff_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); - auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - - // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise. - auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), diff_v16i8, tmp1_v16i8, tmp3_v16i8); - SetVr(vd, res_v16i8); - - // TODO: Set SAT -} - -void Compiler::VSUBUBM(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto diff_v16i8 = m_ir_builder->CreateSub(va_v16i8, vb_v16i8); - SetVr(vd, diff_v16i8); -} - -void Compiler::VSUBUBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_b), va_v16i8, vb_v16i8); - SetVr(vd, diff_v16i8); - - // TODO: Set SAT -} - -void Compiler::VSUBUHM(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto diff_v8i16 = m_ir_builder->CreateSub(va_v8i16, vb_v8i16); - SetVr(vd, diff_v8i16); -} - -void Compiler::VSUBUHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_w), va_v8i16, vb_v8i16); - SetVr(vd, diff_v8i16); - - // TODO: Set SAT -} - -void Compiler::VSUBUWM(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); - SetVr(vd, diff_v4i32); -} - -void Compiler::VSUBUWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); - auto cmp_v4i1 = m_ir_builder->CreateICmpULE(diff_v4i32, va_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto res_v4i32 = m_ir_builder->CreateAnd(diff_v4i32, cmp_v4i32); - SetVr(vd, res_v4i32); - - // TODO: Set SAT -} - -void Compiler::VSUMSWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - auto res_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(3)); - auto res_i64 = m_ir_builder->CreateSExt(res_i32, m_ir_builder->getInt64Ty()); - for (auto i = 0; i < 4; i++) { - auto va_i32 = m_ir_builder->CreateExtractElement(va_v4i32, m_ir_builder->getInt32(i)); - auto va_i64 = m_ir_builder->CreateSExt(va_i32, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateAdd(res_i64, va_i64); - } - - auto gt_i1 = m_ir_builder->CreateICmpSGT(res_i64, m_ir_builder->getInt64(0x7FFFFFFFull)); - auto lt_i1 = m_ir_builder->CreateICmpSLT(res_i64, m_ir_builder->getInt64(0xFFFFFFFF80000000ull)); - res_i64 = m_ir_builder->CreateSelect(gt_i1, m_ir_builder->getInt64(0x7FFFFFFFull), res_i64); - res_i64 = m_ir_builder->CreateSelect(lt_i1, m_ir_builder->getInt64(0xFFFFFFFF80000000ull), res_i64); - auto res_i128 = m_ir_builder->CreateZExt(res_i64, m_ir_builder->getIntNTy(128)); - - SetVr(vd, res_i128); - - // TODO: Set VSCR.SAT -} - -void Compiler::VSUM2SWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - u32 mask1_v2i32[2] = { 0, 2 }; - u32 mask2_v2i32[2] = { 1, 3 }; - auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto va1_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); - auto va2_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v2i32)); - auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto vb_v2i64 = m_ir_builder->CreateShuffleVector(vb_v4i64, UndefValue::get(vb_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); - - auto res_v2i64 = m_ir_builder->CreateAdd(va1_v2i64, va2_v2i64); - res_v2i64 = m_ir_builder->CreateAdd(res_v2i64, vb_v2i64); - auto gt_v2i1 = m_ir_builder->CreateICmpSGT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull))); - auto lt_v2i1 = m_ir_builder->CreateICmpSLT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); - res_v2i64 = m_ir_builder->CreateSelect(gt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v2i64); - res_v2i64 = m_ir_builder->CreateSelect(lt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x80000000ull)), res_v2i64); - SetVr(vd, res_v2i64); - - // TODO: Set VSCR.SAT -} - -void Compiler::VSUM4SBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; - u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; - u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; - u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; - auto va_v16i64 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt64Ty(), 16)); - auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto va3_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); - auto va4_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); - auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - - auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va3_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va4_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); - auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); - auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); - res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); - res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); - auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); - - // TODO: Set VSCR.SAT -} - -void Compiler::VSUM4SHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; - u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; - auto va_v8i64 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt64Ty(), 8)); - auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - - auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); - auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); - auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); - res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); - res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); - auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); - - // TODO: Set VSCR.SAT -} - -void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - - u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; - u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; - u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; - u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; - auto va1_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto va1_v4i32 = m_ir_builder->CreateZExt(va1_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto va2_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto va2_v4i32 = m_ir_builder->CreateZExt(va2_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto va3_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); - auto va3_v4i32 = m_ir_builder->CreateZExt(va3_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto va4_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); - auto va4_v4i32 = m_ir_builder->CreateZExt(va4_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - - auto res_v4i32 = m_ir_builder->CreateAdd(va1_v4i32, va2_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va3_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va4_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vb_v4i32); - auto lt_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vb_v4i32); - auto lt_v4i32 = m_ir_builder->CreateSExt(lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - res_v4i32 = m_ir_builder->CreateOr(lt_v4i32, res_v4i32); - SetVr(vd, res_v4i32); - - // TODO: Set VSCR.SAT -} - -void Compiler::VUPKHPX(u32 vd, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = { 4, 4, 5, 5, 6, 6, 7, 7 }; - vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - - auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); - auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); - tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); - auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); - tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); - auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VUPKHSB(u32 vd, u32 vb) { - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v8i32[8] = { 8, 9, 10, 11, 12, 13, 14, 15 }; - auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, res_v8i16); -} - -void Compiler::VUPKHSH(u32 vd, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v4i32[4] = { 4, 5, 6, 7 }; - auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); - auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); -} - -void Compiler::VUPKLPX(u32 vd, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = { 0, 0, 1, 1, 2, 2, 3, 3 }; - vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - - auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); - auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); - tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); - auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); - tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); - auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); - SetVr(vd, res_v4i32); -} - -void Compiler::VUPKLSB(u32 vd, u32 vb) { - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v8i32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; - auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, res_v8i16); -} - -void Compiler::VUPKLSH(u32 vd, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v4i32[4] = { 0, 1, 2, 3 }; - auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); - auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); -} - -void Compiler::VXOR(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateXor(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); -} - -void Compiler::MULLI(u32 rd, u32 ra, s32 simm16) { - auto ra_i64 = GetGpr(ra); - auto res_i64 = m_ir_builder->CreateMul(ra_i64, m_ir_builder->getInt64((s64)simm16)); - SetGpr(rd, res_i64); -} - -void Compiler::SUBFIC(u32 rd, u32 ra, s32 simm16) { - auto ra_i64 = GetGpr(ra); - ra_i64 = m_ir_builder->CreateNeg(ra_i64); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, m_ir_builder->getInt64((s64)simm16)); - auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - SetGpr(rd, diff_i64); - SetXerCa(carry_i1); -} - -void Compiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) { - Value * ra_i64; - if (l == 0) { - ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); - } else { - ra_i64 = GetGpr(ra); - } - - SetCrFieldUnsignedCmp(crfd, ra_i64, m_ir_builder->getInt64(uimm16)); -} - -void Compiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) { - Value * ra_i64; - if (l == 0) { - ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); - } else { - ra_i64 = GetGpr(ra); - } - - SetCrFieldSignedCmp(crfd, ra_i64, m_ir_builder->getInt64((s64)simm16)); -} - -void Compiler::ADDIC(u32 rd, u32 ra, s32 simm16) { - auto ra_i64 = GetGpr(ra); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), m_ir_builder->getInt64((s64)simm16), ra_i64); - auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - SetGpr(rd, sum_i64); - SetXerCa(carry_i1); -} - -void Compiler::ADDIC_(u32 rd, u32 ra, s32 simm16) { - ADDIC(rd, ra, simm16); - SetCrFieldSignedCmp(0, GetGpr(rd), m_ir_builder->getInt64(0)); -} - -void Compiler::ADDI(u32 rd, u32 ra, s32 simm16) { - if (ra == 0) { - SetGpr(rd, m_ir_builder->getInt64((s64)simm16)); - } else { - auto ra_i64 = GetGpr(ra); - auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16)); - SetGpr(rd, sum_i64); - } -} - -void Compiler::ADDIS(u32 rd, u32 ra, s32 simm16) { - if (ra == 0) { - SetGpr(rd, m_ir_builder->getInt64((s64)simm16 << 16)); - } else { - auto ra_i64 = GetGpr(ra); - auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16 << 16)); - SetGpr(rd, sum_i64); - } -} - -void Compiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { - auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, bd)); - auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); - CreateBranch(CheckBranchCondition(bo, bi), target_i32, lk ? true : false); -} - -void Compiler::SC(u32 lev) { - switch (lev) { - case 0: - Call("SysCalls.DoSyscall", SysCalls::DoSyscall, m_state.args[CompileTaskState::Args::State], GetGpr(11)); - break; - case 2: - Call("StaticFuncManager.StaticExecute", &StaticFuncManager::StaticExecute, - m_ir_builder->getInt64((u64)&Emu.GetSFuncManager()), m_state.args[CompileTaskState::Args::State], GetGpr(11, 32)); - break; - case 3: - Call("PPUThread.FastStop", &PPUThread::FastStop, m_state.args[CompileTaskState::Args::State]); - break; - default: - CompilationError(fmt::Format("SC %u", lev)); - break; - } -} - -void Compiler::B(s32 ll, u32 aa, u32 lk) { - auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, ll)); - auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); - CreateBranch(nullptr, target_i32, lk ? true : false); -} - -void Compiler::MCRF(u32 crfd, u32 crfs) { - if (crfd != crfs) { - auto cr_i32 = GetCr(); - auto crf_i32 = GetNibble(cr_i32, crfs); - cr_i32 = SetNibble(cr_i32, crfd, crf_i32); - SetCr(cr_i32); - } -} - -void Compiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { - auto lr_i64 = GetLr(); - lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL); - auto lr_i32 = m_ir_builder->CreateTrunc(lr_i64, m_ir_builder->getInt32Ty()); - CreateBranch(CheckBranchCondition(bo, bi), lr_i32, lk ? true : false, true); -} - -void Compiler::CRNOR(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); - res_i32 = m_ir_builder->CreateXor(res_i32, 1); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); -} - -void Compiler::CRANDC(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1); - res_i32 = m_ir_builder->CreateAnd(ba_i32, res_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); -} - -void Compiler::ISYNC() { - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); -} - -void Compiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); -} - -void Compiler::DCBI(u32 ra, u32 rb) { - // TODO: See if this can be translated to cache flush - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::CRNAND(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); - res_i32 = m_ir_builder->CreateXor(res_i32, 1); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); -} - -void Compiler::CRAND(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); -} - -void Compiler::CREQV(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); - res_i32 = m_ir_builder->CreateXor(res_i32, 1); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); -} - -void Compiler::CRORC(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1); - res_i32 = m_ir_builder->CreateOr(ba_i32, res_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); -} - -void Compiler::CROR(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); -} - -void Compiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { - auto ctr_i64 = GetCtr(); - ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL); - auto ctr_i32 = m_ir_builder->CreateTrunc(ctr_i64, m_ir_builder->getInt32Ty()); - CreateBranch(CheckBranchCondition(bo, bi), ctr_i32, lk ? true : false); -} - -void Compiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); - rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); - auto ra_i64 = GetGpr(ra); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } - - u64 mask = s_rotate_mask[32 + mb][32 + me]; - res_i64 = m_ir_builder->CreateAnd(res_i64, mask); - ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask); - res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); - rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } - - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); - rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); - auto rb_i64 = GetGpr(rb); - auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x1F); - auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(32), shl_i64); - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64); - auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::ORI(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, uimm16); - SetGpr(ra, res_i64); -} - -void Compiler::ORIS(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, (u64)uimm16 << 16); - SetGpr(ra, res_i64); -} - -void Compiler::XORI(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateXor(rs_i64, uimm16); - SetGpr(ra, res_i64); -} - -void Compiler::XORIS(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateXor(rs_i64, (u64)uimm16 << 16); - SetGpr(ra, res_i64); -} - -void Compiler::ANDI_(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, uimm16); - SetGpr(ra, res_i64); - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); -} - -void Compiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, (u64)uimm16 << 16); - SetGpr(ra, res_i64); - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); -} - -void Compiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } - - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63]); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } - - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][me]); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } - - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63 - sh]); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto ra_i64 = GetGpr(ra); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } - - u64 mask = s_rotate_mask[mb][63 - sh]; - res_i64 = m_ir_builder->CreateAnd(res_i64, mask); - ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask); - res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x3F); - auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(64), shl_i64); - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64); - auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - - if (is_r) { - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][m_eb]); - } else { - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[m_eb][63]); - } - - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { - Value * ra_i64; - Value * rb_i64; - if (l == 0) { - ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); - rb_i64 = m_ir_builder->CreateSExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty()); - } else { - ra_i64 = GetGpr(ra); - rb_i64 = GetGpr(rb); - } - - SetCrFieldSignedCmp(crfd, ra_i64, rb_i64); -} - -void Compiler::TW(u32 to, u32 ra, u32 rb) { - CompilationError("TW"); -} - -void Compiler::LVSL(u32 vd, u32 ra, u32 rb) { - static const u128 s_lvsl_values[] = { - {0x08090A0B0C0D0E0F, 0x0001020304050607}, - {0x090A0B0C0D0E0F10, 0x0102030405060708}, - {0x0A0B0C0D0E0F1011, 0x0203040506070809}, - {0x0B0C0D0E0F101112, 0x030405060708090A}, - {0x0C0D0E0F10111213, 0x0405060708090A0B}, - {0x0D0E0F1011121314, 0x05060708090A0B0C}, - {0x0E0F101112131415, 0x060708090A0B0C0D}, - {0x0F10111213141516, 0x0708090A0B0C0D0E}, - {0x1011121314151617, 0x08090A0B0C0D0E0F}, - {0x1112131415161718, 0x090A0B0C0D0E0F10}, - {0x1213141516171819, 0x0A0B0C0D0E0F1011}, - {0x131415161718191A, 0x0B0C0D0E0F101112}, - {0x1415161718191A1B, 0x0C0D0E0F10111213}, - {0x15161718191A1B1C, 0x0D0E0F1011121314}, - {0x161718191A1B1C1D, 0x0E0F101112131415}, - {0x1718191A1B1C1D1E, 0x0F10111213141516}, - }; - - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); - auto lvsl_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsl_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo()); - lvsl_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsl_values_v16i8_ptr, index_i64); - auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsl_values_v16i8_ptr, 16); - SetVr(vd, val_v16i8); -} - -void Compiler::LVEBX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto val_i8 = ReadMemory(addr_i64, 8); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64); - auto vd_v16i8 = GetVrAsIntVec(vd, 8); - vd_v16i8 = m_ir_builder->CreateInsertElement(vd_v16i8, val_i8, index_i64); - SetVr(vd, vd_v16i8); -} - -void Compiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - ra_i64 = m_ir_builder->CreateNeg(ra_i64); - auto rb_i64 = GetGpr(rb); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); - auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - SetGpr(rd, diff_i64); - SetXerCa(carry_i1); - - if (rc) { - SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("SUBFCO"); - } -} - -void Compiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); - auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - SetGpr(rd, sum_i64); - SetXerCa(carry_i1); - - if (rc) { - SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - } -} - -void Compiler::MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto ra_i128 = m_ir_builder->CreateZExt(ra_i64, m_ir_builder->getIntNTy(128)); - auto rb_i128 = m_ir_builder->CreateZExt(rb_i64, m_ir_builder->getIntNTy(128)); - auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128); - prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64); - auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty()); - SetGpr(rd, prod_i64); - - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto ra_i64 = m_ir_builder->CreateZExt(ra_i32, m_ir_builder->getInt64Ty()); - auto rb_i64 = m_ir_builder->CreateZExt(rb_i32, m_ir_builder->getInt64Ty()); - auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); - prod_i64 = m_ir_builder->CreateLShr(prod_i64, 32); - SetGpr(rd, prod_i64); - - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::MFOCRF(u32 a, u32 rd, u32 crm) { - auto cr_i32 = GetCr(); - auto cr_i64 = m_ir_builder->CreateZExt(cr_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, cr_i64); -} - -void Compiler::LWARX(u32 rd, u32 ra, u32 rb) { +#include "stdafx.h" +#include "Utilities/Log.h" +#include "Emu/Cell/PPULLVMRecompiler.h" +#include "Emu/Memory/Memory.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Vectorize.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/IR/Verifier.h" + +using namespace llvm; +using namespace ppu_recompiler_llvm; + +u64 Compiler::s_rotate_mask[64][64]; +bool Compiler::s_rotate_mask_inited = false; + +Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, const Executable execute_unknown_block) + : m_recompilation_engine(recompilation_engine) { + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetDisassembler(); + + m_llvm_context = new LLVMContext(); + m_ir_builder = new IRBuilder<>(*m_llvm_context); + m_module = new llvm::Module("Module", *m_llvm_context); + m_fpm = new FunctionPassManager(m_module); + + EngineBuilder engine_builder(m_module); + engine_builder.setMCPU(sys::getHostCPUName()); + engine_builder.setEngineKind(EngineKind::JIT); + engine_builder.setOptLevel(CodeGenOpt::Default); + m_execution_engine = engine_builder.create(); + + m_fpm->add(new DataLayoutPass(m_module)); + m_fpm->add(createNoAAPass()); + m_fpm->add(createBasicAliasAnalysisPass()); + m_fpm->add(createNoTargetTransformInfoPass()); + m_fpm->add(createEarlyCSEPass()); + m_fpm->add(createTailCallEliminationPass()); + m_fpm->add(createReassociatePass()); + m_fpm->add(createInstructionCombiningPass()); + m_fpm->add(new DominatorTreeWrapperPass()); + m_fpm->add(new MemoryDependenceAnalysis()); + m_fpm->add(createGVNPass()); + m_fpm->add(createInstructionCombiningPass()); + m_fpm->add(new MemoryDependenceAnalysis()); + m_fpm->add(createDeadStoreEliminationPass()); + m_fpm->add(new LoopInfo()); + m_fpm->add(new ScalarEvolution()); + m_fpm->add(createSLPVectorizerPass()); + m_fpm->add(createInstructionCombiningPass()); + m_fpm->add(createCFGSimplificationPass()); + m_fpm->doInitialization(); + + std::vector arg_types; + arg_types.push_back(m_ir_builder->getInt8PtrTy()); + arg_types.push_back(m_ir_builder->getInt8PtrTy()); + arg_types.push_back(m_ir_builder->getInt64Ty()); + m_compiled_function_type = FunctionType::get(m_ir_builder->getInt32Ty(), arg_types, false); + + m_execute_unknown_function = (Function *)m_module->getOrInsertFunction("execute_unknown_function", m_compiled_function_type); + m_execute_unknown_function->setCallingConv(CallingConv::X86_64_Win64); + m_execution_engine->addGlobalMapping(m_execute_unknown_function, (void *)execute_unknown_function); + + m_execute_unknown_block = (Function *)m_module->getOrInsertFunction("execute_unknown_block", m_compiled_function_type); + m_execute_unknown_block->setCallingConv(CallingConv::X86_64_Win64); + m_execution_engine->addGlobalMapping(m_execute_unknown_block, (void *)execute_unknown_block); + + if (!s_rotate_mask_inited) { + InitRotateMask(); + s_rotate_mask_inited = true; + } +} + +Compiler::~Compiler() { + delete m_execution_engine; + delete m_fpm; + delete m_ir_builder; + delete m_llvm_context; +} + +Executable Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool inline_all, bool generate_linkable_exits) { + auto compilation_start = std::chrono::high_resolution_clock::now(); + + m_state.cfg = &cfg; + m_state.inline_all = inline_all; + m_state.generate_linkable_exits = generate_linkable_exits; + + // Create the function + m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); + m_state.function->setCallingConv(CallingConv::X86_64_Win64); + auto arg_i = m_state.function->arg_begin(); + arg_i->setName("ppu_state"); + m_state.args[CompileTaskState::Args::State] = arg_i; + (++arg_i)->setName("context"); + m_state.args[CompileTaskState::Args::Context] = arg_i; + + // Create the entry block and add code to branch to the first instruction + m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0)); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg.start_address)); + + // Convert each instruction in the CFG to LLVM IR + std::vector exit_instr_list; + for (auto instr_i = cfg.instruction_addresses.begin(); instr_i != cfg.instruction_addresses.end(); instr_i++) { + m_state.hit_branch_instruction = false; + m_state.current_instruction_address = *instr_i; + auto instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); + m_ir_builder->SetInsertPoint(instr_bb); + + if (!inline_all && *instr_i != cfg.start_address) { + // Use an already compiled implementation of this block if available + auto ordinal = m_recompilation_engine.GetOrdinal(*instr_i); + if (ordinal != 0xFFFFFFFF) { + auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); + exit_instr_list.push_back(exit_instr_i32); + + auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + auto ret_i32 = IndirectCall(*instr_i, context_i64, false); + + auto switch_instr = m_ir_builder->CreateSwitch(ret_i32, GetBasicBlockFromAddress(0xFFFFFFFF)); + auto branch_i = cfg.branches.find(*instr_i); + if (branch_i != cfg.branches.end()) { + for (auto next_instr_i = branch_i->second.begin(); next_instr_i != branch_i->second.end(); next_instr_i++) { + switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); + } + } + } + } + + if (instr_bb->empty()) { + u32 instr = re32(vm::get_ref(m_state.current_instruction_address)); + Decode(instr); + if (!m_state.hit_branch_instruction) { + m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); + } + } + } + + // Generate exit logic for all empty blocks + auto default_exit_block_name = GetBasicBlockNameFromAddress(0xFFFFFFFF); + for (auto block_i = m_state.function->begin(); block_i != m_state.function->end(); block_i++) { + if (!block_i->getInstList().empty() || block_i->getName() == default_exit_block_name) { + continue; + } + + // Found an empty block + m_ir_builder->SetInsertPoint(block_i); + auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); + exit_instr_list.push_back(exit_instr_i32); + + auto instr_address = GetAddressFromBasicBlockName(block_i->getName()); + SetPc(m_ir_builder->getInt32(instr_address)); + + if (generate_linkable_exits) { + auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + auto ret_i32 = IndirectCall(instr_address, context_i64, false); + auto cmp_i1 = m_ir_builder->CreateICmpNE(ret_i32, m_ir_builder->getInt32(0)); + auto then_bb = GetBasicBlockFromAddress(instr_address, "then"); + auto merge_bb = GetBasicBlockFromAddress(instr_address, "merge"); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + + m_ir_builder->SetInsertPoint(then_bb); + context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); + m_ir_builder->CreateBr(merge_bb); + + m_ir_builder->SetInsertPoint(merge_bb); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); + } else { + m_ir_builder->CreateRet(exit_instr_i32); + } + } + + // If the function has a default exit block then generate code for it + auto default_exit_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "", false); + if (default_exit_bb) { + m_ir_builder->SetInsertPoint(default_exit_bb); + auto exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); + exit_instr_list.push_back(exit_instr_i32); + + if (generate_linkable_exits) { + auto cmp_i1 = m_ir_builder->CreateICmpNE(exit_instr_i32, m_ir_builder->getInt32(0)); + auto then_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "then"); + auto merge_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "merge"); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + + m_ir_builder->SetInsertPoint(then_bb); + auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); + m_ir_builder->CreateBr(merge_bb); + + m_ir_builder->SetInsertPoint(merge_bb); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); + } else { + m_ir_builder->CreateRet(exit_instr_i32); + } + } + + // Add incoming values for all exit instr PHI nodes + for (auto exit_instr_i = exit_instr_list.begin(); exit_instr_i != exit_instr_list.end(); exit_instr_i++) { + auto block = (*exit_instr_i)->getParent(); + for (auto pred_i = pred_begin(block); pred_i != pred_end(block); pred_i++) { + auto pred_address = GetAddressFromBasicBlockName((*pred_i)->getName()); + (*exit_instr_i)->addIncoming(m_ir_builder->getInt32(pred_address), *pred_i); + } + } + +#ifdef _DEBUG + m_recompilation_engine.Log() << *m_state.function; + + std::string verify; + raw_string_ostream verify_ostream(verify); + if (verifyFunction(*m_state.function, &verify_ostream)) { + m_recompilation_engine.Log() << "Verification failed: " << verify << "\n"; + } +#endif + + auto ir_build_end = std::chrono::high_resolution_clock::now(); + m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); + + // Optimize this function + m_fpm->run(*m_state.function); + auto optimize_end = std::chrono::high_resolution_clock::now(); + m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); + + // Translate to machine code + MachineCodeInfo mci; + m_execution_engine->runJITOnFunction(m_state.function, &mci); + auto translate_end = std::chrono::high_resolution_clock::now(); + m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); + +#ifdef _DEBUG + m_recompilation_engine.Log() << "\nDisassembly:\n"; + auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); + for (size_t pc = 0; pc < mci.size();) { + char str[1024]; + + auto size = LLVMDisasmInstruction(disassembler, ((u8 *)mci.address()) + pc, mci.size() - pc, (uint64_t)(((u8 *)mci.address()) + pc), str, sizeof(str)); + m_recompilation_engine.Log() << fmt::Format("0x%08X: ", (u64)(((u8 *)mci.address()) + pc)) << str << '\n'; + pc += size; + } + + LLVMDisasmDispose(disassembler); +#endif + + auto compilation_end = std::chrono::high_resolution_clock::now(); + m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); + + return (Executable)mci.address(); +} + +void Compiler::FreeExecutable(const std::string & name) { + auto function = m_module->getFunction(name); + if (function) { + m_execution_engine->freeMachineCodeForFunction(function); + function->eraseFromParent(); + } +} + +Compiler::Stats Compiler::GetStats() { + return m_stats; +} + +void Compiler::Decode(const u32 code) { + (*PPU_instr::main_list)(this, code); +} + +void Compiler::NULL_OP() { + CompilationError("NULL_OP"); +} + +void Compiler::NOP() { + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::TDI(u32 to, u32 ra, s32 simm16) { + CompilationError("TDI"); +} + +void Compiler::TWI(u32 to, u32 ra, s32 simm16) { + CompilationError("TWI"); +} + +void Compiler::MFVSCR(u32 vd) { + auto vscr_i32 = GetVscr(); + auto vscr_i128 = m_ir_builder->CreateZExt(vscr_i32, m_ir_builder->getIntNTy(128)); + SetVr(vd, vscr_i128); +} + +void Compiler::MTVSCR(u32 vb) { + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto vscr_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(0)); + vscr_i32 = m_ir_builder->CreateAnd(vscr_i32, 0x00010001); + SetVscr(vscr_i32); +} + +void Compiler::VADDCUW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + va_v4i32 = m_ir_builder->CreateNot(va_v4i32); + auto cmpv4i1 = m_ir_builder->CreateICmpULT(va_v4i32, vb_v4i32); + auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmpv4i32); +} + +void Compiler::VADDFP(u32 vd, u32 va, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto sum_v4f32 = m_ir_builder->CreateFAdd(va_v4f32, vb_v4f32); + SetVr(vd, sum_v4f32); +} + +void Compiler::VADDSBS(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_b), va_v16i8, vb_v16i8); + SetVr(vd, sum_v16i8); + + // TODO: Set VSCR.SAT +} + +void Compiler::VADDSHS(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_w), va_v8i16, vb_v8i16); + SetVr(vd, sum_v8i16); + + // TODO: Set VSCR.SAT +} + +void Compiler::VADDSWS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + // It looks like x86 does not have an instruction to add 32 bit intergers with signed/unsigned saturation. + // To implement add with saturation, we first determine what the result would be if the operation were to cause + // an overflow. If two -ve numbers are being added and cause an overflow, the result would be 0x80000000. + // If two +ve numbers are being added and cause an overflow, the result would be 0x7FFFFFFF. Addition of a -ve + // number and a +ve number cannot cause overflow. So the result in case of an overflow is 0x7FFFFFFF + sign bit + // of any one of the operands. + auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31); + tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); + auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + + // Next, we find if the addition can actually result in an overflow. Since an overflow can only happen if the operands + // have the same sign, we bitwise XOR both the operands. If the sign bit of the result is 0 then the operands have the + // same sign and so may cause an overflow. We invert the result so that the sign bit is 1 when the operands have the + // same sign. + auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32); + tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); + + // Perform the sum. + auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); + auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + + // If an overflow occurs, then the sign of the sum will be different from the sign of the operands. So, we xor the + // result with one of the operands. The sign bit of the result will be 1 if the sign bit of the sum and the sign bit of the + // result is different. This result is again ANDed with tmp3 (the sign bit of tmp3 is 1 only if the operands have the same + // sign and so can cause an overflow). + auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, sum_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); + auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + + // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise. + auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); + SetVr(vd, res_v16i8); + + // TODO: Set SAT +} + +void Compiler::VADDUBM(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sum_v16i8 = m_ir_builder->CreateAdd(va_v16i8, vb_v16i8); + SetVr(vd, sum_v16i8); +} + +void Compiler::VADDUBS(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_b), va_v16i8, vb_v16i8); + SetVr(vd, sum_v16i8); + + // TODO: Set SAT +} + +void Compiler::VADDUHM(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto sum_v8i16 = m_ir_builder->CreateAdd(va_v8i16, vb_v8i16); + SetVr(vd, sum_v8i16); +} + +void Compiler::VADDUHS(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_w), va_v8i16, vb_v8i16); + SetVr(vd, sum_v8i16); + + // TODO: Set SAT +} + +void Compiler::VADDUWM(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); + SetVr(vd, sum_v4i32); +} + +void Compiler::VADDUWS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); + auto cmp_v4i1 = m_ir_builder->CreateICmpULT(sum_v4i32, va_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto res_v4i32 = m_ir_builder->CreateOr(sum_v4i32, cmp_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Set SAT +} + +void Compiler::VAND(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VANDC(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateNot(vb_v4i32); + auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VAVGSB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto vb_v16i16 = m_ir_builder->CreateSExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto sum_v16i16 = m_ir_builder->CreateAdd(va_v16i16, vb_v16i16); + sum_v16i16 = m_ir_builder->CreateAdd(sum_v16i16, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt16(1))); + auto avg_v16i16 = m_ir_builder->CreateAShr(sum_v16i16, 1); + auto avg_v16i8 = m_ir_builder->CreateTrunc(avg_v16i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + SetVr(vd, avg_v16i8); +} + +void Compiler::VAVGSH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto sum_v8i32 = m_ir_builder->CreateAdd(va_v8i32, vb_v8i32); + sum_v8i32 = m_ir_builder->CreateAdd(sum_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(1))); + auto avg_v8i32 = m_ir_builder->CreateAShr(sum_v8i32, 1); + auto avg_v8i16 = m_ir_builder->CreateTrunc(avg_v8i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, avg_v8i16); +} + +void Compiler::VAVGSW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64); + sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1))); + auto avg_v4i64 = m_ir_builder->CreateAShr(sum_v4i64, 1); + auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, avg_v4i32); +} + +void Compiler::VAVGUB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto avg_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_b), va_v16i8, vb_v16i8); + SetVr(vd, avg_v16i8); +} + +void Compiler::VAVGUH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto avg_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_w), va_v8i16, vb_v8i16); + SetVr(vd, avg_v8i16); +} + +void Compiler::VAVGUW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i64 = m_ir_builder->CreateZExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto vb_v4i64 = m_ir_builder->CreateZExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64); + sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1))); + auto avg_v4i64 = m_ir_builder->CreateLShr(sum_v4i64, 1); + auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, avg_v4i32); +} + +void Compiler::VCFSX(u32 vd, u32 uimm5, u32 vb) { + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4f32 = m_ir_builder->CreateSIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); + + if (uimm5) { + float scale = (float)((u64)1 << uimm5); + res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale))); + } + + SetVr(vd, res_v4f32); +} + +void Compiler::VCFUX(u32 vd, u32 uimm5, u32 vb) { + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4f32 = m_ir_builder->CreateUIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); + + if (uimm5) { + float scale = (float)((u64)1 << uimm5); + res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale))); + } + + SetVr(vd, res_v4f32); +} + +void Compiler::VCMPBFP(u32 vd, u32 va, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto cmp_gt_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); + vb_v4f32 = m_ir_builder->CreateFNeg(vb_v4f32); + auto cmp_lt_v4i1 = m_ir_builder->CreateFCmpOLT(va_v4f32, vb_v4f32); + auto cmp_gt_v4i32 = m_ir_builder->CreateZExt(cmp_gt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto cmp_lt_v4i32 = m_ir_builder->CreateZExt(cmp_lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + cmp_gt_v4i32 = m_ir_builder->CreateShl(cmp_gt_v4i32, 31); + cmp_lt_v4i32 = m_ir_builder->CreateShl(cmp_lt_v4i32, 30); + auto res_v4i32 = m_ir_builder->CreateOr(cmp_gt_v4i32, cmp_lt_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Implement NJ mode +} + +void Compiler::VCMPBFP_(u32 vd, u32 va, u32 vb) { + VCMPBFP(vd, va, vb); + + auto vd_v16i8 = GetVrAsIntVec(vd, 8); + u32 mask_v16i32[16] = {3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + vd_v16i8 = m_ir_builder->CreateShuffleVector(vd_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + auto vd_v4i32 = m_ir_builder->CreateBitCast(vd_v16i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto vd_mask_i32 = m_ir_builder->CreateExtractElement(vd_v4i32, m_ir_builder->getInt32(0)); + auto cmp_i1 = m_ir_builder->CreateICmpEQ(vd_mask_i32, m_ir_builder->getInt32(0)); + SetCrField(6, nullptr, nullptr, cmp_i1, nullptr); +} + +void Compiler::VCMPEQFP(u32 vd, u32 va, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOEQ(va_v4f32, vb_v4f32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); +} + +void Compiler::VCMPEQFP_(u32 vd, u32 va, u32 vb) { + VCMPEQFP(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPEQUB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto cmp_v16i1 = m_ir_builder->CreateICmpEQ(va_v16i8, vb_v16i8); + auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + SetVr(vd, cmp_v16i8); +} + +void Compiler::VCMPEQUB_(u32 vd, u32 va, u32 vb) { + VCMPEQUB(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPEQUH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto cmp_v8i1 = m_ir_builder->CreateICmpEQ(va_v8i16, vb_v8i16); + auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, cmp_v8i16); +} + +void Compiler::VCMPEQUH_(u32 vd, u32 va, u32 vb) { + VCMPEQUH(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPEQUW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto cmp_v4i1 = m_ir_builder->CreateICmpEQ(va_v4i32, vb_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); +} + +void Compiler::VCMPEQUW_(u32 vd, u32 va, u32 vb) { + VCMPEQUW(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPGEFP(u32 vd, u32 va, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(va_v4f32, vb_v4f32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); +} + +void Compiler::VCMPGEFP_(u32 vd, u32 va, u32 vb) { + VCMPGEFP(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPGTFP(u32 vd, u32 va, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); +} + +void Compiler::VCMPGTFP_(u32 vd, u32 va, u32 vb) { + VCMPGTFP(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPGTSB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto cmp_v16i1 = m_ir_builder->CreateICmpSGT(va_v16i8, vb_v16i8); + auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + SetVr(vd, cmp_v16i8); +} + +void Compiler::VCMPGTSB_(u32 vd, u32 va, u32 vb) { + VCMPGTSB(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPGTSH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto cmp_v8i1 = m_ir_builder->CreateICmpSGT(va_v8i16, vb_v8i16); + auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, cmp_v8i16); +} + +void Compiler::VCMPGTSH_(u32 vd, u32 va, u32 vb) { + VCMPGTSH(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPGTSW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto cmp_v4i1 = m_ir_builder->CreateICmpSGT(va_v4i32, vb_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); +} + +void Compiler::VCMPGTSW_(u32 vd, u32 va, u32 vb) { + VCMPGTSW(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPGTUB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto cmp_v16i1 = m_ir_builder->CreateICmpUGT(va_v16i8, vb_v16i8); + auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + SetVr(vd, cmp_v16i8); +} + +void Compiler::VCMPGTUB_(u32 vd, u32 va, u32 vb) { + VCMPGTUB(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPGTUH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto cmp_v8i1 = m_ir_builder->CreateICmpUGT(va_v8i16, vb_v8i16); + auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, cmp_v8i16); +} + +void Compiler::VCMPGTUH_(u32 vd, u32 va, u32 vb) { + VCMPGTUH(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCMPGTUW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto cmp_v4i1 = m_ir_builder->CreateICmpUGT(va_v4i32, vb_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); +} + +void Compiler::VCMPGTUW_(u32 vd, u32 va, u32 vb) { + VCMPGTUW(vd, va, vb); + SetCr6AfterVectorCompare(vd); +} + +void Compiler::VCTSXS(u32 vd, u32 uimm5, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + if (uimm5) { + vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); + } + + auto res_v4i32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_cvtps2dq), vb_v4f32); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0x7FFFFFFF))); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateXor(cmp_v4i32, res_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT +} + +void Compiler::VCTUXS(u32 vd, u32 uimm5, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + if (uimm5) { + vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); + } + + auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0))); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0xFFFFFFFFu))); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto res_v4i32 = m_ir_builder->CreateFPToUI(res_v4f32, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT +} + +void Compiler::VEXPTEFP(u32 vd, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::pow, VectorType::get(m_ir_builder->getFloatTy(), 4)), + m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 2.0f)), vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VLOGEFP(u32 vd, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::log2, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto vc_v4f32 = GetVrAsFloatVec(vc); + auto res_v4f32 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VMAXFP(u32 vd, u32 va, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), va_v4f32, vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VMAXSB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsb), va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VMAXSH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxs_w), va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMAXSW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsd), va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VMAXUB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxu_b), va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VMAXUH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxuw), va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMAXUW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxud), va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); + + u32 mask1_v4i32[4] = {0, 1, 2, 3}; + auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = {4, 5, 6, 7}; + auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); + SetVr(vd, res_v8i16); + + // TODO: Set VSCR.SAT +} + +void Compiler::VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(0x4000))); + res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); + + u32 mask1_v4i32[4] = {0, 1, 2, 3}; + auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = {4, 5, 6, 7}; + auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); + SetVr(vd, res_v8i16); + + // TODO: Set VSCR.SAT +} + +void Compiler::VMINFP(u32 vd, u32 va, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_min_ps), va_v4f32, vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VMINSB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsb), va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VMINSH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmins_w), va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMINSW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsd), va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VMINUB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pminu_b), va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VMINUH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMINUW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + res_v8i16 = m_ir_builder->CreateAdd(res_v8i16, vc_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMRGHB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v16i32[16] = {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}; + auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, vd_v16i8); +} + +void Compiler::VMRGHH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = {12, 4, 13, 5, 14, 6, 15, 7}; + auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, vd_v8i16); +} + +void Compiler::VMRGHW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + u32 mask_v4i32[4] = {6, 2, 7, 3}; + auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + SetVr(vd, vd_v4i32); +} + +void Compiler::VMRGLB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v16i32[16] = {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}; + auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, vd_v16i8); +} + +void Compiler::VMRGLH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = {8, 0, 9, 1, 10, 2, 11, 3}; + auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, vd_v8i16); +} + +void Compiler::VMRGLW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + u32 mask_v4i32[4] = {4, 0, 5, 1}; + auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + SetVr(vd, vd_v4i32); +} + +void Compiler::VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16); + + auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16)); + u32 mask1_v4i32[4] = {0, 4, 8, 12}; + auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto tmp1_v4i32 = m_ir_builder->CreateSExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask2_v4i32[4] = {1, 5, 9, 13}; + auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto tmp2_v4i32 = m_ir_builder->CreateSExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask3_v4i32[4] = {2, 6, 10, 14}; + auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto tmp3_v4i32 = m_ir_builder->CreateSExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask4_v4i32[4] = {3, 7, 11, 15}; + auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto tmp4_v4i32 = m_ir_builder->CreateSExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + + SetVr(vd, res_v4i32); + + // TODO: Try to optimize with horizontal add +} + +void Compiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); + + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vc_v4i32, 31); + tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); + auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto tmp2_v4i32 = m_ir_builder->CreateXor(vc_v4i32, res_v4i32); + tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); + auto sum_v4i32 = m_ir_builder->CreateAdd(vc_v4i32, res_v4i32); + auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto tmp3_v4i32 = m_ir_builder->CreateXor(vc_v4i32, sum_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); + auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); + SetVr(vd, res_v16i8); + + // TODO: Set VSCR.SAT +} + +void Compiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto va_v16i16 = m_ir_builder->CreateZExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16); + + auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16)); + u32 mask1_v4i32[4] = {0, 4, 8, 12}; + auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto tmp1_v4i32 = m_ir_builder->CreateZExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask2_v4i32[4] = {1, 5, 9, 13}; + auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto tmp2_v4i32 = m_ir_builder->CreateZExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask3_v4i32[4] = {2, 6, 10, 14}; + auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto tmp3_v4i32 = m_ir_builder->CreateZExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask4_v4i32[4] = {3, 7, 11, 15}; + auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto tmp4_v4i32 = m_ir_builder->CreateZExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + + SetVr(vd, res_v4i32); + + // TODO: Try to optimize with horizontal add +} + +void Compiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + + auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)); + u32 mask1_v4i32[4] = {0, 2, 4, 6}; + auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = {1, 3, 5, 7}; + auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + + SetVr(vd, res_v4i32); + + // TODO: Try to optimize with horizontal add +} + +void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + auto tmp_v8i64 = m_ir_builder->CreateZExt(tmp_v8i32, VectorType::get(m_ir_builder->getInt64Ty(), 8)); + + u32 mask1_v4i32[4] = {0, 2, 4, 6}; + u32 mask2_v4i32[4] = {1, 3, 5, 7}; + auto tmp1_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto tmp2_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto vc_v4i64 = m_ir_builder->CreateZExt(vc_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto res_v4i64 = m_ir_builder->CreateAdd(tmp1_v4i64, tmp2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vc_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpUGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF))); + auto gt_v4i64 = m_ir_builder->CreateSExt(gt_v4i1, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + res_v4i64 = m_ir_builder->CreateOr(res_v4i64, gt_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT +} + +void Compiler::VMULESB(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMULESH(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VMULEUB(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMULEUH(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VMULOSB(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); + va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMULOSH(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); + va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VMULOUB(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); + va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VMULOUH(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); + va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto vc_v4f32 = GetVrAsFloatVec(vc); + vc_v4f32 = m_ir_builder->CreateFNeg(vc_v4f32); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VNOR(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); + res_v8i16 = m_ir_builder->CreateNot(res_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VOR(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto vc_v16i8 = GetVrAsIntVec(vc, 8); + + auto thrity_one_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(31)); + vc_v16i8 = m_ir_builder->CreateAnd(vc_v16i8, thrity_one_v16i8); + + auto fifteen_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(15)); + auto vc_le15_v16i8 = m_ir_builder->CreateSub(fifteen_v16i8, vc_v16i8); + auto res_va_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), va_v16i8, vc_le15_v16i8); + + auto vc_gt15_v16i8 = m_ir_builder->CreateSub(thrity_one_v16i8, vc_v16i8); + auto cmp_i1 = m_ir_builder->CreateICmpUGT(vc_gt15_v16i8, fifteen_v16i8); + auto cmp_i8 = m_ir_builder->CreateSExt(cmp_i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + vc_gt15_v16i8 = m_ir_builder->CreateOr(cmp_i8, vc_gt15_v16i8); + auto res_vb_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), vb_v16i8, vc_gt15_v16i8); + + auto res_v16i8 = m_ir_builder->CreateOr(res_vb_v16i8, res_va_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VPKPX(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + auto tmpa_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); + tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); + tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); + tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); + tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); + auto tmpa_v8i16 = m_ir_builder->CreateBitCast(tmpa_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + + auto tmpb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); + tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); + tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); + tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); + tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); + auto tmpb_v8i16 = m_ir_builder->CreateBitCast(tmpb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + + u32 mask_v8i32[8] = {1, 3, 5, 7, 9, 11, 13, 15}; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(tmpb_v8i16, tmpa_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + + SetVr(vd, res_v8i16); + + // TODO: Implement with pext on CPUs with BMI +} + +void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packsswb_128), vb_v8i16, va_v8i16); + SetVr(vd, res_v16i8); + + // TODO: VSCR.SAT +} + +void Compiler::VPKSHUS(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packuswb_128), vb_v8i16, va_v8i16); + SetVr(vd, res_v16i8); + + // TODO: VSCR.SAT +} + +void Compiler::VPKSWSS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), vb_v4i32, va_v4i32); + SetVr(vd, res_v8i16); + + // TODO: VSCR.SAT +} + +void Compiler::VPKSWUS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_packusdw), vb_v4i32, va_v4i32); + SetVr(vd, res_v8i16); + + // TODO: VSCR.SAT +} + +void Compiler::VPKUHUM(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + + u32 mask_v16i32[16] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}; + auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, res_v16i8); +} + +void Compiler::VPKUHUS(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); + vb_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); + auto va_v16i8 = m_ir_builder->CreateBitCast(va_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto vb_v16i8 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + + u32 mask_v16i32[16] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}; + auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, res_v16i8); + + // TODO: Set VSCR.SAT +} + +void Compiler::VPKUWUM(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + + u32 mask_v8i32[8] = {0, 2, 4, 6, 8, 10, 12, 14}; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, res_v8i16); +} + +void Compiler::VPKUWUS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); + vb_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); + auto va_v8i16 = m_ir_builder->CreateBitCast(va_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + auto vb_v8i16 = m_ir_builder->CreateBitCast(vb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + + u32 mask_v8i32[8] = {0, 2, 4, 6, 8, 10, 12, 14}; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, res_v8i16); + + // TODO: Set VSCR.SAT +} + +void Compiler::VREFP(u32 vd, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VRFIM(u32 vd, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::floor, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VRFIN(u32 vd, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::nearbyint, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VRFIP(u32 vd, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::ceil, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VRFIZ(u32 vd, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::trunc, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VRLB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(7))); + auto tmp1_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); + vb_v16i8 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(8)), vb_v16i8); + auto tmp2_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); + auto res_v16i8 = m_ir_builder->CreateOr(tmp1_v16i8, tmp2_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VRLH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto tmp1_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); + vb_v8i16 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0x10)), vb_v8i16); + auto tmp2_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); + auto res_v8i16 = m_ir_builder->CreateOr(tmp1_v8i16, tmp2_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VRLW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto tmp1_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); + vb_v4i32 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x20)), vb_v4i32); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); + auto res_v4i32 = m_ir_builder->CreateOr(tmp1_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VRSQRTEFP(u32 vd, u32 vb) { + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), res_v4f32); + SetVr(vd, res_v4f32); +} + +void Compiler::VSEL(u32 vd, u32 va, u32 vb, u32 vc) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, vc_v4i32); + vc_v4i32 = m_ir_builder->CreateNot(vc_v4i32); + va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vc_v4i32); + auto vd_v4i32 = m_ir_builder->CreateOr(va_v4i32, vb_v4i32); + SetVr(vd, vd_v4i32); +} + +void Compiler::VSL(u32 vd, u32 va, u32 vb) { + auto va_i128 = GetVr(va); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); + sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7); + auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); + va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128); + SetVr(vd, va_i128); +} + +void Compiler::VSLB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); + auto res_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + sh = 16 - sh; + u32 mask_v16i32[16] = {sh, sh + 1, sh + 2, sh + 3, sh + 4, sh + 5, sh + 6, sh + 7, sh + 8, sh + 9, sh + 10, sh + 11, sh + 12, sh + 13, sh + 14, sh + 15}; + auto vd_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, vd_v16i8); +} + +void Compiler::VSLH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto res_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VSLO(u32 vd, u32 va, u32 vb) { + auto va_i128 = GetVr(va); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); + sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78); + auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); + va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128); + SetVr(vd, va_i128); +} + +void Compiler::VSLW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto res_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VSPLTB(u32 vd, u32 uimm5, u32 vb) { + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto undef_v16i8 = UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto mask_v16i32 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt32(15 - uimm5)); + auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, undef_v16i8, mask_v16i32); + SetVr(vd, res_v16i8); +} + +void Compiler::VSPLTH(u32 vd, u32 uimm5, u32 vb) { + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto undef_v8i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)); + auto mask_v8i32 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(7 - uimm5)); + auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, undef_v8i16, mask_v8i32); + SetVr(vd, res_v8i16); +} + +void Compiler::VSPLTISB(u32 vd, s32 simm5) { + auto vd_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8((s8)simm5)); + SetVr(vd, vd_v16i8); +} + +void Compiler::VSPLTISH(u32 vd, s32 simm5) { + auto vd_v8i16 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16((s16)simm5)); + SetVr(vd, vd_v8i16); +} + +void Compiler::VSPLTISW(u32 vd, s32 simm5) { + auto vd_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32((s32)simm5)); + SetVr(vd, vd_v4i32); +} + +void Compiler::VSPLTW(u32 vd, u32 uimm5, u32 vb) { + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto undef_v4i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto mask_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3 - uimm5)); + auto res_v4i32 = m_ir_builder->CreateShuffleVector(vb_v4i32, undef_v4i32, mask_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VSR(u32 vd, u32 va, u32 vb) { + auto va_i128 = GetVr(va); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); + sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7); + auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); + va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128); + SetVr(vd, va_i128); +} + +void Compiler::VSRAB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); + auto res_v16i8 = m_ir_builder->CreateAShr(va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VSRAH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto res_v8i16 = m_ir_builder->CreateAShr(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VSRAW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto res_v4i32 = m_ir_builder->CreateAShr(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VSRB(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); + auto res_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); +} + +void Compiler::VSRH(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto res_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::VSRO(u32 vd, u32 va, u32 vb) { + auto va_i128 = GetVr(va); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); + sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78); + auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); + va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128); + SetVr(vd, va_i128); +} + +void Compiler::VSRW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto res_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VSUBCUW(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + auto cmpv4i1 = m_ir_builder->CreateICmpUGE(va_v4i32, vb_v4i32); + auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmpv4i32); +} + +void Compiler::VSUBFP(u32 vd, u32 va, u32 vb) { + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto diff_v4f32 = m_ir_builder->CreateFSub(va_v4f32, vb_v4f32); + SetVr(vd, diff_v4f32); +} + +void Compiler::VSUBSBS(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_b), va_v16i8, vb_v16i8); + SetVr(vd, diff_v16i8); + + // TODO: Set VSCR.SAT +} + +void Compiler::VSUBSHS(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_w), va_v8i16, vb_v8i16); + SetVr(vd, diff_v8i16); + + // TODO: Set VSCR.SAT +} + +void Compiler::VSUBSWS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + // See the comments for VADDSWS for a detailed description of how this works + + // Find the result in case of an overflow + auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31); + tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); + auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + + // Find the elements that can overflow (elements with opposite sign bits) + auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32); + + // Perform the sub + auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); + auto diff_v16i8 = m_ir_builder->CreateBitCast(diff_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + + // Find the elements that overflowed + auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, diff_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); + auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + + // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise. + auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), diff_v16i8, tmp1_v16i8, tmp3_v16i8); + SetVr(vd, res_v16i8); + + // TODO: Set SAT +} + +void Compiler::VSUBUBM(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto diff_v16i8 = m_ir_builder->CreateSub(va_v16i8, vb_v16i8); + SetVr(vd, diff_v16i8); +} + +void Compiler::VSUBUBS(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_b), va_v16i8, vb_v16i8); + SetVr(vd, diff_v16i8); + + // TODO: Set SAT +} + +void Compiler::VSUBUHM(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto diff_v8i16 = m_ir_builder->CreateSub(va_v8i16, vb_v8i16); + SetVr(vd, diff_v8i16); +} + +void Compiler::VSUBUHS(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_w), va_v8i16, vb_v8i16); + SetVr(vd, diff_v8i16); + + // TODO: Set SAT +} + +void Compiler::VSUBUWM(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); + SetVr(vd, diff_v4i32); +} + +void Compiler::VSUBUWS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); + auto cmp_v4i1 = m_ir_builder->CreateICmpULE(diff_v4i32, va_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto res_v4i32 = m_ir_builder->CreateAnd(diff_v4i32, cmp_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Set SAT +} + +void Compiler::VSUMSWS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + auto res_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(3)); + auto res_i64 = m_ir_builder->CreateSExt(res_i32, m_ir_builder->getInt64Ty()); + for (auto i = 0; i < 4; i++) { + auto va_i32 = m_ir_builder->CreateExtractElement(va_v4i32, m_ir_builder->getInt32(i)); + auto va_i64 = m_ir_builder->CreateSExt(va_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateAdd(res_i64, va_i64); + } + + auto gt_i1 = m_ir_builder->CreateICmpSGT(res_i64, m_ir_builder->getInt64(0x7FFFFFFFull)); + auto lt_i1 = m_ir_builder->CreateICmpSLT(res_i64, m_ir_builder->getInt64(0xFFFFFFFF80000000ull)); + res_i64 = m_ir_builder->CreateSelect(gt_i1, m_ir_builder->getInt64(0x7FFFFFFFull), res_i64); + res_i64 = m_ir_builder->CreateSelect(lt_i1, m_ir_builder->getInt64(0xFFFFFFFF80000000ull), res_i64); + auto res_i128 = m_ir_builder->CreateZExt(res_i64, m_ir_builder->getIntNTy(128)); + + SetVr(vd, res_i128); + + // TODO: Set VSCR.SAT +} + +void Compiler::VSUM2SWS(u32 vd, u32 va, u32 vb) { + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + u32 mask1_v2i32[2] = { 0, 2 }; + u32 mask2_v2i32[2] = { 1, 3 }; + auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto va1_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); + auto va2_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v2i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto vb_v2i64 = m_ir_builder->CreateShuffleVector(vb_v4i64, UndefValue::get(vb_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); + + auto res_v2i64 = m_ir_builder->CreateAdd(va1_v2i64, va2_v2i64); + res_v2i64 = m_ir_builder->CreateAdd(res_v2i64, vb_v2i64); + auto gt_v2i1 = m_ir_builder->CreateICmpSGT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v2i1 = m_ir_builder->CreateICmpSLT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v2i64 = m_ir_builder->CreateSelect(gt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v2i64); + res_v2i64 = m_ir_builder->CreateSelect(lt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x80000000ull)), res_v2i64); + SetVr(vd, res_v2i64); + + // TODO: Set VSCR.SAT +} + +void Compiler::VSUM4SBS(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; + u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; + u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; + u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; + auto va_v16i64 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt64Ty(), 16)); + auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto va3_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto va4_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + + auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va3_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va4_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); + res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT +} + +void Compiler::VSUM4SHS(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; + u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; + auto va_v8i64 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt64Ty(), 8)); + auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + + auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); + res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT +} + +void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) { + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + + u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; + u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; + u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; + u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; + auto va1_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va1_v4i32 = m_ir_builder->CreateZExt(va1_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va2_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto va2_v4i32 = m_ir_builder->CreateZExt(va2_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va3_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto va3_v4i32 = m_ir_builder->CreateZExt(va3_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va4_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto va4_v4i32 = m_ir_builder->CreateZExt(va4_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + + auto res_v4i32 = m_ir_builder->CreateAdd(va1_v4i32, va2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va3_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va4_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vb_v4i32); + auto lt_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vb_v4i32); + auto lt_v4i32 = m_ir_builder->CreateSExt(lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateOr(lt_v4i32, res_v4i32); + SetVr(vd, res_v4i32); + + // TODO: Set VSCR.SAT +} + +void Compiler::VUPKHPX(u32 vd, u32 vb) { + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = { 4, 4, 5, 5, 6, 6, 7, 7 }; + vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + + auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); + tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); + auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VUPKHSB(u32 vd, u32 vb) { + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v8i32[8] = { 8, 9, 10, 11, 12, 13, 14, 15 }; + auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, res_v8i16); +} + +void Compiler::VUPKHSH(u32 vd, u32 vb) { + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v4i32[4] = { 4, 5, 6, 7 }; + auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); +} + +void Compiler::VUPKLPX(u32 vd, u32 vb) { + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = { 0, 0, 1, 1, 2, 2, 3, 3 }; + vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + + auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); + tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); + auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); +} + +void Compiler::VUPKLSB(u32 vd, u32 vb) { + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v8i32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, res_v8i16); +} + +void Compiler::VUPKLSH(u32 vd, u32 vb) { + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v4i32[4] = { 0, 1, 2, 3 }; + auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); +} + +void Compiler::VXOR(u32 vd, u32 va, u32 vb) { + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateXor(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); +} + +void Compiler::MULLI(u32 rd, u32 ra, s32 simm16) { + auto ra_i64 = GetGpr(ra); + auto res_i64 = m_ir_builder->CreateMul(ra_i64, m_ir_builder->getInt64((s64)simm16)); + SetGpr(rd, res_i64); +} + +void Compiler::SUBFIC(u32 rd, u32 ra, s32 simm16) { + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNeg(ra_i64); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, m_ir_builder->getInt64((s64)simm16)); + auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + SetGpr(rd, diff_i64); + SetXerCa(carry_i1); +} + +void Compiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) { + Value * ra_i64; + if (l == 0) { + ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); + } else { + ra_i64 = GetGpr(ra); + } + + SetCrFieldUnsignedCmp(crfd, ra_i64, m_ir_builder->getInt64(uimm16)); +} + +void Compiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) { + Value * ra_i64; + if (l == 0) { + ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); + } else { + ra_i64 = GetGpr(ra); + } + + SetCrFieldSignedCmp(crfd, ra_i64, m_ir_builder->getInt64((s64)simm16)); +} + +void Compiler::ADDIC(u32 rd, u32 ra, s32 simm16) { + auto ra_i64 = GetGpr(ra); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), m_ir_builder->getInt64((s64)simm16), ra_i64); + auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + SetGpr(rd, sum_i64); + SetXerCa(carry_i1); +} + +void Compiler::ADDIC_(u32 rd, u32 ra, s32 simm16) { + ADDIC(rd, ra, simm16); + SetCrFieldSignedCmp(0, GetGpr(rd), m_ir_builder->getInt64(0)); +} + +void Compiler::ADDI(u32 rd, u32 ra, s32 simm16) { + if (ra == 0) { + SetGpr(rd, m_ir_builder->getInt64((s64)simm16)); + } else { + auto ra_i64 = GetGpr(ra); + auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16)); + SetGpr(rd, sum_i64); + } +} + +void Compiler::ADDIS(u32 rd, u32 ra, s32 simm16) { + if (ra == 0) { + SetGpr(rd, m_ir_builder->getInt64((s64)simm16 << 16)); + } else { + auto ra_i64 = GetGpr(ra); + auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16 << 16)); + SetGpr(rd, sum_i64); + } +} + +void Compiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { + auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, bd)); + auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), target_i32, lk ? true : false); +} + +void Compiler::SC(u32 lev) { + switch (lev) { + case 0: + Call("SysCalls.DoSyscall", SysCalls::DoSyscall, m_state.args[CompileTaskState::Args::State], GetGpr(11)); + break; + case 2: + Call("StaticFuncManager.StaticExecute", &StaticFuncManager::StaticExecute, + m_ir_builder->getInt64((u64)&Emu.GetSFuncManager()), m_state.args[CompileTaskState::Args::State], GetGpr(11, 32)); + break; + case 3: + Call("PPUThread.FastStop", &PPUThread::FastStop, m_state.args[CompileTaskState::Args::State]); + break; + default: + CompilationError(fmt::Format("SC %u", lev)); + break; + } +} + +void Compiler::B(s32 ll, u32 aa, u32 lk) { + auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, ll)); + auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); + CreateBranch(nullptr, target_i32, lk ? true : false); +} + +void Compiler::MCRF(u32 crfd, u32 crfs) { + if (crfd != crfs) { + auto cr_i32 = GetCr(); + auto crf_i32 = GetNibble(cr_i32, crfs); + cr_i32 = SetNibble(cr_i32, crfd, crf_i32); + SetCr(cr_i32); + } +} + +void Compiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { + auto lr_i64 = GetLr(); + lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL); + auto lr_i32 = m_ir_builder->CreateTrunc(lr_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), lr_i32, lk ? true : false, true); +} + +void Compiler::CRNOR(u32 crbd, u32 crba, u32 crbb) { + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); + res_i32 = m_ir_builder->CreateXor(res_i32, 1); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); +} + +void Compiler::CRANDC(u32 crbd, u32 crba, u32 crbb) { + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1); + res_i32 = m_ir_builder->CreateAnd(ba_i32, res_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); +} + +void Compiler::ISYNC() { + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); +} + +void Compiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); +} + +void Compiler::DCBI(u32 ra, u32 rb) { + // TODO: See if this can be translated to cache flush + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::CRNAND(u32 crbd, u32 crba, u32 crbb) { + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); + res_i32 = m_ir_builder->CreateXor(res_i32, 1); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); +} + +void Compiler::CRAND(u32 crbd, u32 crba, u32 crbb) { + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); +} + +void Compiler::CREQV(u32 crbd, u32 crba, u32 crbb) { + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); + res_i32 = m_ir_builder->CreateXor(res_i32, 1); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); +} + +void Compiler::CRORC(u32 crbd, u32 crba, u32 crbb) { + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1); + res_i32 = m_ir_builder->CreateOr(ba_i32, res_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); +} + +void Compiler::CROR(u32 crbd, u32 crba, u32 crbb) { + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); +} + +void Compiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { + auto ctr_i64 = GetCtr(); + ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL); + auto ctr_i32 = m_ir_builder->CreateTrunc(ctr_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), ctr_i32, lk ? true : false); +} + +void Compiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); + rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); + auto ra_i64 = GetGpr(ra); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } + + u64 mask = s_rotate_mask[32 + mb][32 + me]; + res_i64 = m_ir_builder->CreateAnd(res_i64, mask); + ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask); + res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); + rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } + + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); + rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); + auto rb_i64 = GetGpr(rb); + auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x1F); + auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(32), shl_i64); + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64); + auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::ORI(u32 ra, u32 rs, u32 uimm16) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, uimm16); + SetGpr(ra, res_i64); +} + +void Compiler::ORIS(u32 ra, u32 rs, u32 uimm16) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, (u64)uimm16 << 16); + SetGpr(ra, res_i64); +} + +void Compiler::XORI(u32 ra, u32 rs, u32 uimm16) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, uimm16); + SetGpr(ra, res_i64); +} + +void Compiler::XORIS(u32 ra, u32 rs, u32 uimm16) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, (u64)uimm16 << 16); + SetGpr(ra, res_i64); +} + +void Compiler::ANDI_(u32 ra, u32 rs, u32 uimm16) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, uimm16); + SetGpr(ra, res_i64); + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); +} + +void Compiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, (u64)uimm16 << 16); + SetGpr(ra, res_i64); + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); +} + +void Compiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } + + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63]); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } + + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][me]); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } + + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63 - sh]); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto ra_i64 = GetGpr(ra); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } + + u64 mask = s_rotate_mask[mb][63 - sh]; + res_i64 = m_ir_builder->CreateAnd(res_i64, mask); + ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask); + res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x3F); + auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(64), shl_i64); + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64); + auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + + if (is_r) { + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][m_eb]); + } else { + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[m_eb][63]); + } + + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { + Value * ra_i64; + Value * rb_i64; + if (l == 0) { + ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); + rb_i64 = m_ir_builder->CreateSExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty()); + } else { + ra_i64 = GetGpr(ra); + rb_i64 = GetGpr(rb); + } + + SetCrFieldSignedCmp(crfd, ra_i64, rb_i64); +} + +void Compiler::TW(u32 to, u32 ra, u32 rb) { + CompilationError("TW"); +} + +void Compiler::LVSL(u32 vd, u32 ra, u32 rb) { + static const u128 s_lvsl_values[] = { + {0x08090A0B0C0D0E0F, 0x0001020304050607}, + {0x090A0B0C0D0E0F10, 0x0102030405060708}, + {0x0A0B0C0D0E0F1011, 0x0203040506070809}, + {0x0B0C0D0E0F101112, 0x030405060708090A}, + {0x0C0D0E0F10111213, 0x0405060708090A0B}, + {0x0D0E0F1011121314, 0x05060708090A0B0C}, + {0x0E0F101112131415, 0x060708090A0B0C0D}, + {0x0F10111213141516, 0x0708090A0B0C0D0E}, + {0x1011121314151617, 0x08090A0B0C0D0E0F}, + {0x1112131415161718, 0x090A0B0C0D0E0F10}, + {0x1213141516171819, 0x0A0B0C0D0E0F1011}, + {0x131415161718191A, 0x0B0C0D0E0F101112}, + {0x1415161718191A1B, 0x0C0D0E0F10111213}, + {0x15161718191A1B1C, 0x0D0E0F1011121314}, + {0x161718191A1B1C1D, 0x0E0F101112131415}, + {0x1718191A1B1C1D1E, 0x0F10111213141516}, + }; + auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); } - + + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); + auto lvsl_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsl_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo()); + lvsl_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsl_values_v16i8_ptr, index_i64); + auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsl_values_v16i8_ptr, 16); + SetVr(vd, val_v16i8); +} + +void Compiler::LVEBX(u32 vd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto val_i8 = ReadMemory(addr_i64, 8); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64); + auto vd_v16i8 = GetVrAsIntVec(vd, 8); + vd_v16i8 = m_ir_builder->CreateInsertElement(vd_v16i8, val_i8, index_i64); + SetVr(vd, vd_v16i8); +} + +void Compiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNeg(ra_i64); + auto rb_i64 = GetGpr(rb); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); + auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + SetGpr(rd, diff_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("SUBFCO"); + } +} + +void Compiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); + auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + SetGpr(rd, sum_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + } +} + +void Compiler::MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto ra_i128 = m_ir_builder->CreateZExt(ra_i64, m_ir_builder->getIntNTy(128)); + auto rb_i128 = m_ir_builder->CreateZExt(rb_i64, m_ir_builder->getIntNTy(128)); + auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128); + prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64); + auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty()); + SetGpr(rd, prod_i64); + + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto ra_i64 = m_ir_builder->CreateZExt(ra_i32, m_ir_builder->getInt64Ty()); + auto rb_i64 = m_ir_builder->CreateZExt(rb_i32, m_ir_builder->getInt64Ty()); + auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); + prod_i64 = m_ir_builder->CreateLShr(prod_i64, 32); + SetGpr(rd, prod_i64); + + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::MFOCRF(u32 a, u32 rd, u32 crm) { + auto cr_i32 = GetCr(); + auto cr_i64 = m_ir_builder->CreateZExt(cr_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, cr_i64); +} + +void Compiler::LWARX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); auto val_i32_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt32Ty()); val_i32_ptr->setAlignment(4); @@ -2477,269 +2477,269 @@ void Compiler::LWARX(u32 rd, u32 ra, u32 rb) { val_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_i32); auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); SetGpr(rd, val_i64); -} - -void Compiler::LDX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i64 = ReadMemory(addr_i64, 64); - SetGpr(rd, mem_i64); -} - -void Compiler::LWZX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::SLW(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); - auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); - auto res_i64 = m_ir_builder->CreateShl(rs_i64, rb_i64); - auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); - res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::CNTLZW(u32 ra, u32 rs, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto res_i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt32Ty()), rs_i32, m_ir_builder->getInt1(false)); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::SLD(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); - auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); - auto res_i128 = m_ir_builder->CreateShl(rs_i128, rb_i128); - auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::AND(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { - Value * ra_i64; - Value * rb_i64; - if (l == 0) { - ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); - rb_i64 = m_ir_builder->CreateZExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty()); - } else { - ra_i64 = GetGpr(ra); - rb_i64 = GetGpr(rb); - } - - SetCrFieldUnsignedCmp(crfd, ra_i64, rb_i64); -} - -void Compiler::LVSR(u32 vd, u32 ra, u32 rb) { - static const u128 s_lvsr_values[] = { - {0x18191A1B1C1D1E1F, 0x1011121314151617}, - {0x1718191A1B1C1D1E, 0x0F10111213141516}, - {0x161718191A1B1C1D, 0x0E0F101112131415}, - {0x15161718191A1B1C, 0x0D0E0F1011121314}, - {0x1415161718191A1B, 0x0C0D0E0F10111213}, - {0x131415161718191A, 0x0B0C0D0E0F101112}, - {0x1213141516171819, 0x0A0B0C0D0E0F1011}, - {0x1112131415161718, 0x090A0B0C0D0E0F10}, - {0x1011121314151617, 0x08090A0B0C0D0E0F}, - {0x0F10111213141516, 0x0708090A0B0C0D0E}, - {0x0E0F101112131415, 0x060708090A0B0C0D}, - {0x0D0E0F1011121314, 0x05060708090A0B0C}, - {0x0C0D0E0F10111213, 0x0405060708090A0B}, - {0x0B0C0D0E0F101112, 0x030405060708090A}, - {0x0A0B0C0D0E0F1011, 0x0203040506070809}, - {0x090A0B0C0D0E0F10, 0x0102030405060708}, - }; - - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); - auto lvsr_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsr_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo()); - lvsr_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsr_values_v16i8_ptr, index_i64); - auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsr_values_v16i8_ptr, 16); - SetVr(vd, val_v16i8); -} - -void Compiler::LVEHX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL); - auto val_i16 = ReadMemory(addr_i64, 16, 2); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateLShr(index_i64, 1); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64); - auto vd_v8i16 = GetVrAsIntVec(vd, 16); - vd_v8i16 = m_ir_builder->CreateInsertElement(vd_v8i16, val_i16, index_i64); - SetVr(vd, vd_v8i16); -} - -void Compiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto diff_i64 = m_ir_builder->CreateSub(rb_i64, ra_i64); - SetGpr(rd, diff_i64); - - if (rc) { - SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("SUBFO"); - } -} - -void Compiler::LDUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i64 = ReadMemory(addr_i64, 64); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::DCBST(u32 ra, u32 rb) { - // TODO: Implement this - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::LWZUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::CNTLZD(u32 ra, u32 rs, bool rc) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt64Ty()), rs_i64, m_ir_builder->getInt1(false)); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - rb_i64 = m_ir_builder->CreateNot(rb_i64); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::TD(u32 to, u32 ra, u32 rb) { - CompilationError("TD"); -} - -void Compiler::LVEWX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL); - auto val_i32 = ReadMemory(addr_i64, 32, 4); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateLShr(index_i64, 2); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64); - auto vd_v4i32 = GetVrAsIntVec(vd, 32); - vd_v4i32 = m_ir_builder->CreateInsertElement(vd_v4i32, val_i32, index_i64); - SetVr(vd, vd_v4i32); -} - -void Compiler::MULHD(u32 rd, u32 ra, u32 rb, bool rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto ra_i128 = m_ir_builder->CreateSExt(ra_i64, m_ir_builder->getIntNTy(128)); - auto rb_i128 = m_ir_builder->CreateSExt(rb_i64, m_ir_builder->getIntNTy(128)); - auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128); - prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64); - auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty()); - SetGpr(rd, prod_i64); - - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::MULHW(u32 rd, u32 ra, u32 rb, bool rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); - auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty()); - auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); - prod_i64 = m_ir_builder->CreateAShr(prod_i64, 32); - SetGpr(rd, prod_i64); - - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { +} + +void Compiler::LDX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); } - + + auto mem_i64 = ReadMemory(addr_i64, 64); + SetGpr(rd, mem_i64); +} + +void Compiler::LWZX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::SLW(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); + auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); + auto res_i64 = m_ir_builder->CreateShl(rs_i64, rb_i64); + auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); + res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::CNTLZW(u32 ra, u32 rs, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto res_i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt32Ty()), rs_i32, m_ir_builder->getInt1(false)); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::SLD(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); + auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); + auto res_i128 = m_ir_builder->CreateShl(rs_i128, rb_i128); + auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::AND(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { + Value * ra_i64; + Value * rb_i64; + if (l == 0) { + ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); + rb_i64 = m_ir_builder->CreateZExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty()); + } else { + ra_i64 = GetGpr(ra); + rb_i64 = GetGpr(rb); + } + + SetCrFieldUnsignedCmp(crfd, ra_i64, rb_i64); +} + +void Compiler::LVSR(u32 vd, u32 ra, u32 rb) { + static const u128 s_lvsr_values[] = { + {0x18191A1B1C1D1E1F, 0x1011121314151617}, + {0x1718191A1B1C1D1E, 0x0F10111213141516}, + {0x161718191A1B1C1D, 0x0E0F101112131415}, + {0x15161718191A1B1C, 0x0D0E0F1011121314}, + {0x1415161718191A1B, 0x0C0D0E0F10111213}, + {0x131415161718191A, 0x0B0C0D0E0F101112}, + {0x1213141516171819, 0x0A0B0C0D0E0F1011}, + {0x1112131415161718, 0x090A0B0C0D0E0F10}, + {0x1011121314151617, 0x08090A0B0C0D0E0F}, + {0x0F10111213141516, 0x0708090A0B0C0D0E}, + {0x0E0F101112131415, 0x060708090A0B0C0D}, + {0x0D0E0F1011121314, 0x05060708090A0B0C}, + {0x0C0D0E0F10111213, 0x0405060708090A0B}, + {0x0B0C0D0E0F101112, 0x030405060708090A}, + {0x0A0B0C0D0E0F1011, 0x0203040506070809}, + {0x090A0B0C0D0E0F10, 0x0102030405060708}, + }; + + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); + auto lvsr_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsr_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo()); + lvsr_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsr_values_v16i8_ptr, index_i64); + auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsr_values_v16i8_ptr, 16); + SetVr(vd, val_v16i8); +} + +void Compiler::LVEHX(u32 vd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL); + auto val_i16 = ReadMemory(addr_i64, 16, 2); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateLShr(index_i64, 1); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64); + auto vd_v8i16 = GetVrAsIntVec(vd, 16); + vd_v8i16 = m_ir_builder->CreateInsertElement(vd_v8i16, val_i16, index_i64); + SetVr(vd, vd_v8i16); +} + +void Compiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto diff_i64 = m_ir_builder->CreateSub(rb_i64, ra_i64); + SetGpr(rd, diff_i64); + + if (rc) { + SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("SUBFO"); + } +} + +void Compiler::LDUX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i64 = ReadMemory(addr_i64, 64); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::DCBST(u32 ra, u32 rb) { + // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::LWZUX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::CNTLZD(u32 ra, u32 rs, bool rc) { + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt64Ty()), rs_i64, m_ir_builder->getInt1(false)); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + rb_i64 = m_ir_builder->CreateNot(rb_i64); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::TD(u32 to, u32 ra, u32 rb) { + CompilationError("TD"); +} + +void Compiler::LVEWX(u32 vd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL); + auto val_i32 = ReadMemory(addr_i64, 32, 4); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateLShr(index_i64, 2); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64); + auto vd_v4i32 = GetVrAsIntVec(vd, 32); + vd_v4i32 = m_ir_builder->CreateInsertElement(vd_v4i32, val_i32, index_i64); + SetVr(vd, vd_v4i32); +} + +void Compiler::MULHD(u32 rd, u32 ra, u32 rb, bool rc) { + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto ra_i128 = m_ir_builder->CreateSExt(ra_i64, m_ir_builder->getIntNTy(128)); + auto rb_i128 = m_ir_builder->CreateSExt(rb_i64, m_ir_builder->getIntNTy(128)); + auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128); + prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64); + auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty()); + SetGpr(rd, prod_i64); + + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::MULHW(u32 rd, u32 ra, u32 rb, bool rc) { + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); + auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty()); + auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); + prod_i64 = m_ir_builder->CreateAShr(prod_i64, 32); + SetGpr(rd, prod_i64); + + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); auto val_i64_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt64Ty()); val_i64_ptr->setAlignment(8); @@ -2747,175 +2747,175 @@ void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { auto val_i64 = (Value *)m_ir_builder->CreateLoad(val_i64_ptr); val_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt64Ty()), val_i64); SetGpr(rd, val_i64); -} - -void Compiler::DCBF(u32 ra, u32 rb) { - // TODO: Implement this - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::LBZX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i8 = ReadMemory(addr_i64, 8); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::LVX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); - auto mem_i128 = ReadMemory(addr_i64, 128, 16); - SetVr(vd, mem_i128); -} - -void Compiler::NEG(u32 rd, u32 ra, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - auto diff_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(0), ra_i64); - SetGpr(rd, diff_i64); - - if (rc) { - SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("NEGO"); - } -} - -void Compiler::LBZUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i8 = ReadMemory(addr_i64, 8); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::NOR(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); - res_i64 = m_ir_builder->CreateXor(res_i64, (s64)-1); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::STVEBX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64); - auto vs_v16i8 = GetVrAsIntVec(vs, 8); - auto val_i8 = m_ir_builder->CreateExtractElement(vs_v16i8, index_i64); - WriteMemory(addr_i64, val_i8); -} - -void Compiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ca_i64 = GetXerCa(); - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - ra_i64 = m_ir_builder->CreateNot(ra_i64); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); - res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("SUBFEO"); - } -} - -void Compiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ca_i64 = GetXerCa(); - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); - res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("ADDEO"); - } -} - -void Compiler::MTOCRF(u32 l, u32 crm, u32 rs) { - auto rs_i32 = GetGpr(rs, 32); - auto cr_i32 = GetCr(); - u32 mask = 0; - - for (u32 i = 0; i < 8; i++) { - if (crm & (1 << i)) { - mask |= 0xF << ((7 - i) * 4); - if (l) { - break; - } - } - } - - cr_i32 = m_ir_builder->CreateAnd(cr_i32, ~mask); - rs_i32 = m_ir_builder->CreateAnd(rs_i32, ~mask); - cr_i32 = m_ir_builder->CreateOr(cr_i32, rs_i32); - SetCr(cr_i32); -} - -void Compiler::STDX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 64)); -} - -void Compiler::STWCX_(u32 rs, u32 ra, u32 rb) { +} + +void Compiler::DCBF(u32 ra, u32 rb) { + // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::LBZX(u32 rd, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); } - + + auto mem_i8 = ReadMemory(addr_i64, 8); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::LVX(u32 vd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); + auto mem_i128 = ReadMemory(addr_i64, 128, 16); + SetVr(vd, mem_i128); +} + +void Compiler::NEG(u32 rd, u32 ra, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + auto diff_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(0), ra_i64); + SetGpr(rd, diff_i64); + + if (rc) { + SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("NEGO"); + } +} + +void Compiler::LBZUX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i8 = ReadMemory(addr_i64, 8); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::NOR(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); + res_i64 = m_ir_builder->CreateXor(res_i64, (s64)-1); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::STVEBX(u32 vs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64); + auto vs_v16i8 = GetVrAsIntVec(vs, 8); + auto val_i8 = m_ir_builder->CreateExtractElement(vs_v16i8, index_i64); + WriteMemory(addr_i64, val_i8); +} + +void Compiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); + res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("SUBFEO"); + } +} + +void Compiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); + res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("ADDEO"); + } +} + +void Compiler::MTOCRF(u32 l, u32 crm, u32 rs) { + auto rs_i32 = GetGpr(rs, 32); + auto cr_i32 = GetCr(); + u32 mask = 0; + + for (u32 i = 0; i < 8; i++) { + if (crm & (1 << i)) { + mask |= 0xF << ((7 - i) * 4); + if (l) { + break; + } + } + } + + cr_i32 = m_ir_builder->CreateAnd(cr_i32, ~mask); + rs_i32 = m_ir_builder->CreateAnd(rs_i32, ~mask); + cr_i32 = m_ir_builder->CreateOr(cr_i32, rs_i32); + SetCr(cr_i32); +} + +void Compiler::STDX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 64)); +} + +void Compiler::STWCX_(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); auto rs_i32 = GetGpr(rs, 32); rs_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), rs_i32); @@ -2923,118 +2923,118 @@ void Compiler::STWCX_(u32 rs, u32 ra, u32 rb) { rs_i32_ptr->setAlignment(4); m_ir_builder->CreateStore(rs_i32, rs_i32_ptr); auto success_i1 = Call("vm.reservation_update", vm::reservation_update, addr_i32, m_ir_builder->CreateBitCast(rs_i32_ptr, m_ir_builder->getInt8PtrTy()), m_ir_builder->getInt32(4)); - + auto cr_i32 = GetCr(); cr_i32 = SetBit(cr_i32, 2, success_i1); SetCr(cr_i32); -} - -void Compiler::STWX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 32)); -} - -void Compiler::STVEHX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateLShr(index_i64, 1); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64); - auto vs_v8i16 = GetVrAsIntVec(vs, 16); - auto val_i16 = m_ir_builder->CreateExtractElement(vs_v8i16, index_i64); - WriteMemory(addr_i64, val_i16, 2); -} - -void Compiler::STDUX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - WriteMemory(addr_i64, GetGpr(rs, 64)); - SetGpr(ra, addr_i64); -} - -void Compiler::STWUX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - WriteMemory(addr_i64, GetGpr(rs, 32)); - SetGpr(ra, addr_i64); -} - -void Compiler::STVEWX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateLShr(index_i64, 2); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64); - auto vs_v4i32 = GetVrAsIntVec(vs, 32); - auto val_i32 = m_ir_builder->CreateExtractElement(vs_v4i32, index_i64); - WriteMemory(addr_i64, val_i32, 4); -} - -void Compiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - auto ca_i64 = GetXerCa(); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - SetGpr(rd, sum_i64); - SetXerCa(carry_i1); - - if (rc) { - SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("ADDZEO"); - } -} - -void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - ra_i64 = m_ir_builder->CreateNot(ra_i64); - auto ca_i64 = GetXerCa(); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("SUBFZEO"); - } -} - -void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) { +} + +void Compiler::STWX(u32 rs, u32 ra, u32 rb) { auto addr_i64 = GetGpr(rb); if (ra) { auto ra_i64 = GetGpr(ra); addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); } - + + WriteMemory(addr_i64, GetGpr(rs, 32)); +} + +void Compiler::STVEHX(u32 vs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateLShr(index_i64, 1); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64); + auto vs_v8i16 = GetVrAsIntVec(vs, 16); + auto val_i16 = m_ir_builder->CreateExtractElement(vs_v8i16, index_i64); + WriteMemory(addr_i64, val_i16, 2); +} + +void Compiler::STDUX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + WriteMemory(addr_i64, GetGpr(rs, 64)); + SetGpr(ra, addr_i64); +} + +void Compiler::STWUX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + WriteMemory(addr_i64, GetGpr(rs, 32)); + SetGpr(ra, addr_i64); +} + +void Compiler::STVEWX(u32 vs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateLShr(index_i64, 2); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64); + auto vs_v4i32 = GetVrAsIntVec(vs, 32); + auto val_i32 = m_ir_builder->CreateExtractElement(vs_v4i32, index_i64); + WriteMemory(addr_i64, val_i32, 4); +} + +void Compiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + auto ca_i64 = GetXerCa(); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + SetGpr(rd, sum_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("ADDZEO"); + } +} + +void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto ca_i64 = GetXerCa(); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("SUBFZEO"); + } +} + +void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); auto rs_i64 = GetGpr(rs); rs_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt64Ty()), rs_i64); @@ -3042,2996 +3042,2996 @@ void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) { rs_i64_ptr->setAlignment(8); m_ir_builder->CreateStore(rs_i64, rs_i64_ptr); auto success_i1 = Call("vm.reservation_update", vm::reservation_update, addr_i32, m_ir_builder->CreateBitCast(rs_i64_ptr, m_ir_builder->getInt8PtrTy()), m_ir_builder->getInt32(8)); - + auto cr_i32 = GetCr(); cr_i32 = SetBit(cr_i32, 2, success_i1); SetCr(cr_i32); -} - -void Compiler::STBX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 8)); -} - -void Compiler::STVX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); - WriteMemory(addr_i64, GetVr(vs), 16); -} - -void Compiler::SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { - auto ca_i64 = GetXerCa(); - auto ra_i64 = GetGpr(ra); - ra_i64 = m_ir_builder->CreateNot(ra_i64); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); - res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("SUBFMEO"); - } -} - -void Compiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); - SetGpr(rd, prod_i64); - - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO implement oe - CompilationError("MULLDO"); - } -} - -void Compiler::ADDME(u32 rd, u32 ra, u32 oe, bool rc) { - auto ca_i64 = GetXerCa(); - auto ra_i64 = GetGpr(ra); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); - res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); - auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); - auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("ADDMEO"); - } -} - -void Compiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); - auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty()); - auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); - SetGpr(rd, prod_i64); - - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO implement oe - CompilationError("MULLWO"); - } -} - -void Compiler::DCBTST(u32 ra, u32 rb, u32 th) { - // TODO: Implement this - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::STBUX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - WriteMemory(addr_i64, GetGpr(rs, 8)); - SetGpr(ra, addr_i64); -} - -void Compiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, rb_i64); - SetGpr(rd, sum_i64); - - if (rc) { - SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO: Implement this - CompilationError("ADDO"); - } -} - -void Compiler::DCBT(u32 ra, u32 rb, u32 th) { - // TODO: Implement this using prefetch - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::LHZX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); - res_i64 = m_ir_builder->CreateNot(res_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::ECIWX(u32 rd, u32 ra, u32 rb) { +} + +void Compiler::STBX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 8)); +} + +void Compiler::STVX(u32 vs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); + WriteMemory(addr_i64, GetVr(vs), 16); +} + +void Compiler::SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); + res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("SUBFMEO"); + } +} + +void Compiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); + SetGpr(rd, prod_i64); + + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO implement oe + CompilationError("MULLDO"); + } +} + +void Compiler::ADDME(u32 rd, u32 ra, u32 oe, bool rc) { + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); + res_i64 = m_ir_builder->CreateExtractValue(res_s, {0}); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, {1}); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("ADDMEO"); + } +} + +void Compiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); + auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty()); + auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); + SetGpr(rd, prod_i64); + + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO implement oe + CompilationError("MULLWO"); + } +} + +void Compiler::DCBTST(u32 ra, u32 rb, u32 th) { + // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::STBUX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + WriteMemory(addr_i64, GetGpr(rs, 8)); + SetGpr(ra, addr_i64); +} + +void Compiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, rb_i64); + SetGpr(rd, sum_i64); + + if (rc) { + SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO: Implement this + CompilationError("ADDO"); + } +} + +void Compiler::DCBT(u32 ra, u32 rb, u32 th) { + // TODO: Implement this using prefetch + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::LHZX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); + res_i64 = m_ir_builder->CreateNot(res_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::ECIWX(u32 rd, u32 ra, u32 rb) { CompilationError("ECIWX"); - //auto addr_i64 = GetGpr(rb); - //if (ra) { - // auto ra_i64 = GetGpr(ra); - // addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - //} - - //auto mem_i32 = ReadMemory(addr_i64, 32); - //auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - //SetGpr(rd, mem_i64); -} - -void Compiler::LHZUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::XOR(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::MFSPR(u32 rd, u32 spr) { - Value * rd_i64; - auto n = (spr >> 5) | ((spr & 0x1f) << 5); - - switch (n) { - case 0x001: - rd_i64 = GetXer(); - break; - case 0x008: - rd_i64 = GetLr(); - break; - case 0x009: - rd_i64 = GetCtr(); - break; - case 0x100: - rd_i64 = GetVrsave(); - break; - case 0x10C: - rd_i64 = Call("get_time", get_time); - break; - case 0x10D: - rd_i64 = Call("get_time", get_time); - rd_i64 = m_ir_builder->CreateLShr(rd_i64, 32); - break; - default: - assert(0); - break; - } - - SetGpr(rd, rd_i64); -} - -void Compiler::LWAX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::DST(u32 ra, u32 rb, u32 strm, u32 t) { - // TODO: Revisit - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::LHAX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::LVXL(u32 vd, u32 ra, u32 rb) { - LVX(vd, ra, rb); -} - -void Compiler::MFTB(u32 rd, u32 spr) { - auto tb_i64 = Call("get_time", get_time); - - u32 n = (spr >> 5) | ((spr & 0x1f) << 5); - if (n == 0x10D) { - tb_i64 = m_ir_builder->CreateLShr(tb_i64, 32); - } - - SetGpr(rd, tb_i64); -} - -void Compiler::LWAUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::DSTST(u32 ra, u32 rb, u32 strm, u32 t) { - // TODO: Revisit - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::LHAUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::STHX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 16)); -} - -void Compiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - rb_i64 = m_ir_builder->CreateNot(rb_i64); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::ECOWX(u32 rs, u32 ra, u32 rb) { + //auto addr_i64 = GetGpr(rb); + //if (ra) { + // auto ra_i64 = GetGpr(ra); + // addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + //} + + //auto mem_i32 = ReadMemory(addr_i64, 32); + //auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + //SetGpr(rd, mem_i64); +} + +void Compiler::LHZUX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::XOR(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::MFSPR(u32 rd, u32 spr) { + Value * rd_i64; + auto n = (spr >> 5) | ((spr & 0x1f) << 5); + + switch (n) { + case 0x001: + rd_i64 = GetXer(); + break; + case 0x008: + rd_i64 = GetLr(); + break; + case 0x009: + rd_i64 = GetCtr(); + break; + case 0x100: + rd_i64 = GetVrsave(); + break; + case 0x10C: + rd_i64 = Call("get_time", get_time); + break; + case 0x10D: + rd_i64 = Call("get_time", get_time); + rd_i64 = m_ir_builder->CreateLShr(rd_i64, 32); + break; + default: + assert(0); + break; + } + + SetGpr(rd, rd_i64); +} + +void Compiler::LWAX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::DST(u32 ra, u32 rb, u32 strm, u32 t) { + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::LHAX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::LVXL(u32 vd, u32 ra, u32 rb) { + LVX(vd, ra, rb); +} + +void Compiler::MFTB(u32 rd, u32 spr) { + auto tb_i64 = Call("get_time", get_time); + + u32 n = (spr >> 5) | ((spr & 0x1f) << 5); + if (n == 0x10D) { + tb_i64 = m_ir_builder->CreateLShr(tb_i64, 32); + } + + SetGpr(rd, tb_i64); +} + +void Compiler::LWAUX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::DSTST(u32 ra, u32 rb, u32 strm, u32 t) { + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::LHAUX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::STHX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 16)); +} + +void Compiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + rb_i64 = m_ir_builder->CreateNot(rb_i64); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::ECOWX(u32 rs, u32 ra, u32 rb) { CompilationError("ECOWX"); - //auto addr_i64 = GetGpr(rb); - //if (ra) { - // auto ra_i64 = GetGpr(ra); - // addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - //} - - //WriteMemory(addr_i64, GetGpr(rs, 32)); -} - -void Compiler::STHUX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - WriteMemory(addr_i64, GetGpr(rs, 16)); - SetGpr(ra, addr_i64); -} - -void Compiler::OR(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateUDiv(ra_i64, rb_i64); - SetGpr(rd, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO implement oe - CompilationError("DIVDUO"); - } - - // TODO make sure an exception does not occur on divide by 0 and overflow -} - -void Compiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto res_i32 = m_ir_builder->CreateUDiv(ra_i32, rb_i32); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO implement oe - CompilationError("DIVWUO"); - } - - // TODO make sure an exception does not occur on divide by 0 and overflow -} - -void Compiler::MTSPR(u32 spr, u32 rs) { - auto rs_i64 = GetGpr(rs); - auto n = (spr >> 5) | ((spr & 0x1f) << 5); - - switch (n) { - case 0x001: - SetXer(rs_i64); - break; - case 0x008: - SetLr(rs_i64); - break; - case 0x009: - SetCtr(rs_i64); - break; - case 0x100: - SetVrsave(rs_i64); - break; - default: - assert(0); - break; - } - -} - -void Compiler::NAND(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); - res_i64 = m_ir_builder->CreateNot(res_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::STVXL(u32 vs, u32 ra, u32 rb) { - STVX(vs, ra, rb); -} - -void Compiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateSDiv(ra_i64, rb_i64); - SetGpr(rd, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO implement oe - CompilationError("DIVDO"); - } - - // TODO make sure an exception does not occur on divide by 0 and overflow -} - -void Compiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto res_i32 = m_ir_builder->CreateSDiv(ra_i32, rb_i32); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } - - if (oe) { - // TODO implement oe - CompilationError("DIVWO"); - } - - // TODO make sure an exception does not occur on divide by 0 and overflow -} - -void Compiler::LVLX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto eb_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); - eb_i64 = m_ir_builder->CreateShl(eb_i64, 3); - auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128)); - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); - auto mem_i128 = ReadMemory(addr_i64, 128, 16); - mem_i128 = m_ir_builder->CreateShl(mem_i128, eb_i128); - SetVr(vd, mem_i128); -} - -void Compiler::LDBRX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i64 = ReadMemory(addr_i64, 64, 0, false); - SetGpr(rd, mem_i64); -} - -void Compiler::LSWX(u32 rd, u32 ra, u32 rb) { - CompilationError("LSWX"); -} - -void Compiler::LWBRX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i32 = ReadMemory(addr_i64, 32, 0, false); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::LFSX(u32 frd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i32 = ReadMemory(addr_i64, 32); - SetFpr(frd, mem_i32); -} - -void Compiler::SRW(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); - auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); - auto res_i64 = m_ir_builder->CreateLShr(rs_i64, rb_i64); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::SRD(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); - auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); - auto res_i128 = m_ir_builder->CreateLShr(rs_i128, rb_i128); - auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); - SetGpr(ra, res_i64); - - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::LVRX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto eb_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), addr_i64); - eb_i64 = m_ir_builder->CreateAnd(eb_i64, 0xF); - eb_i64 = m_ir_builder->CreateShl(eb_i64, 3); - auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128)); - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); - auto mem_i128 = ReadMemory(addr_i64, 128, 16); - mem_i128 = m_ir_builder->CreateLShr(mem_i128, eb_i128); - auto cmp_i1 = m_ir_builder->CreateICmpNE(eb_i64, m_ir_builder->getInt64(0)); - auto cmp_i128 = m_ir_builder->CreateSExt(cmp_i1, m_ir_builder->getIntNTy(128)); - mem_i128 = m_ir_builder->CreateAnd(mem_i128, cmp_i128); - SetVr(vd, mem_i128); -} - -void Compiler::LSWI(u32 rd, u32 ra, u32 nb) { - auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); - - nb = nb ? nb : 32; - for (u32 i = 0; i < nb; i += 4) { - auto val_i32 = ReadMemory(addr_i64, 32, 0, true, false); - - if (i + 4 <= nb) { - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); - } else { - u32 mask = 0xFFFFFFFF << ((4 - (nb - i)) * 8); - val_i32 = m_ir_builder->CreateAnd(val_i32, mask); - } - - auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, val_i64); - rd = (rd + 1) % 32; - } -} - -void Compiler::LFSUX(u32 frd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i32 = ReadMemory(addr_i64, 32); - SetFpr(frd, mem_i32); - SetGpr(ra, addr_i64); -} - -void Compiler::SYNC(u32 l) { - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); -} - -void Compiler::LFDX(u32 frd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i64 = ReadMemory(addr_i64, 64); - SetFpr(frd, mem_i64); -} - -void Compiler::LFDUX(u32 frd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i64 = ReadMemory(addr_i64, 64); - SetFpr(frd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::STVLX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - auto size_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), index_i64); - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); - auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); - - auto vs_i128 = GetVr(vs); - vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); - auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); - vs_i128_ptr->setAlignment(16); - m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); - auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); - - Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; - m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), - addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); -} - -void Compiler::STDBRX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs), 0, false); -} - -void Compiler::STSWX(u32 rs, u32 ra, u32 rb) { - CompilationError("STSWX"); -} - -void Compiler::STWBRX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 32), 0, false); -} - -void Compiler::STFSX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); -} - -void Compiler::STVRX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto size_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - auto index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), size_i64); - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFF0); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); - auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); - - auto vs_i128 = GetVr(vs); - vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); - auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); - vs_i128_ptr->setAlignment(16); - m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); - auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); - vs_i8_ptr = m_ir_builder->CreateGEP(vs_i8_ptr, index_i64); - - Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; - m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), - addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); -} - -void Compiler::STFSUX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); - SetGpr(ra, addr_i64); -} - -void Compiler::STSWI(u32 rd, u32 ra, u32 nb) { - auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); - - nb = nb ? nb : 32; - for (u32 i = 0; i < nb; i += 4) { - auto val_i32 = GetGpr(rd, 32); - - if (i + 4 <= nb) { - WriteMemory(addr_i64, val_i32, 0, true, false); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); - rd = (rd + 1) % 32; - } else { - u32 n = nb - i; - if (n >= 2) { - auto val_i16 = m_ir_builder->CreateLShr(val_i32, 16); - val_i16 = m_ir_builder->CreateTrunc(val_i16, m_ir_builder->getInt16Ty()); - WriteMemory(addr_i64, val_i16); - - if (n == 3) { - auto val_i8 = m_ir_builder->CreateLShr(val_i32, 8); - val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty()); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(2)); - WriteMemory(addr_i64, val_i8); - } - } else { - auto val_i8 = m_ir_builder->CreateLShr(val_i32, 24); - val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty()); - WriteMemory(addr_i64, val_i8); - } - } - } -} - -void Compiler::STFDX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - WriteMemory(addr_i64, frs_i64); -} - -void Compiler::STFDUX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - WriteMemory(addr_i64, frs_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::LVLXL(u32 vd, u32 ra, u32 rb) { - LVLX(vd, ra, rb); -} - -void Compiler::LHBRX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i16 = ReadMemory(addr_i64, 16, 0, false); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::SRAW(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); - auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); - auto res_i64 = m_ir_builder->CreateAShr(rs_i64, rb_i64); - auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32); - SetGpr(ra, ra_i64); - - auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); - auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); - auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0)); - auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); - SetXerCa(ca_i1); - - if (rc) { - SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::SRAD(u32 ra, u32 rs, u32 rb, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); - rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); - auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); - auto res_i128 = m_ir_builder->CreateAShr(rs_i128, rb_i128); - auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64); - auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty()); - SetGpr(ra, ra_i64); - - auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); - auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); - auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0)); - auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); - SetXerCa(ca_i1); - - if (rc) { - SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::LVRXL(u32 vd, u32 ra, u32 rb) { - LVRX(vd, ra, rb); -} - -void Compiler::DSS(u32 strm, u32 a) { - // TODO: Revisit - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); - auto res_i64 = m_ir_builder->CreateAShr(rs_i64, sh); - auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32); - SetGpr(ra, ra_i64); - - auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); - auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); - auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0)); - auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); - SetXerCa(ca_i1); - - if (rc) { - SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { - auto rs_i64 = GetGpr(rs); - auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); - rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); - auto res_i128 = m_ir_builder->CreateAShr(rs_i128, sh); - auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64); - auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty()); - SetGpr(ra, ra_i64); - - auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); - auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); - auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0)); - auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); - SetXerCa(ca_i1); - - if (rc) { - SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { - SRADI1(ra, rs, sh, rc); -} - -void Compiler::EIEIO() { - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); -} - -void Compiler::STVLXL(u32 vs, u32 ra, u32 rb) { - STVLX(vs, ra, rb); -} - -void Compiler::STHBRX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 16), 0, false); -} - -void Compiler::EXTSH(u32 ra, u32 rs, bool rc) { - auto rs_i16 = GetGpr(rs, 16); - auto rs_i64 = m_ir_builder->CreateSExt(rs_i16, m_ir_builder->getInt64Ty()); - SetGpr(ra, rs_i64); - - if (rc) { - SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::STVRXL(u32 vs, u32 ra, u32 rb) { - STVRX(vs, ra, rb); -} - -void Compiler::EXTSB(u32 ra, u32 rs, bool rc) { - auto rs_i8 = GetGpr(rs, 8); - auto rs_i64 = m_ir_builder->CreateSExt(rs_i8, m_ir_builder->getInt64Ty()); - SetGpr(ra, rs_i64); - - if (rc) { - SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::STFIWX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - auto frs_i32 = m_ir_builder->CreateTrunc(frs_i64, m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); -} - -void Compiler::EXTSW(u32 ra, u32 rs, bool rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateSExt(rs_i32, m_ir_builder->getInt64Ty()); - SetGpr(ra, rs_i64); - - if (rc) { - SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); - } -} - -void Compiler::ICBI(u32 ra, u32 rs) { - // TODO: Revisit - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); -} - -void Compiler::DCBZ(u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - addr_i64 = m_ir_builder->CreateAnd(addr_i64, ~(127ULL)); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); - auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); - - std::vector types = {(Type *)m_ir_builder->getInt8PtrTy(), (Type *)m_ir_builder->getInt32Ty()}; - m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memset, types), - addr_i8_ptr, m_ir_builder->getInt8(0), m_ir_builder->getInt32(128), m_ir_builder->getInt32(128), m_ir_builder->getInt1(true)); -} - -void Compiler::LWZ(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::LWZU(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::LBZ(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i8 = ReadMemory(addr_i64, 8); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::LBZU(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i8 = ReadMemory(addr_i64, 8); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::STW(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 32)); -} - -void Compiler::STWU(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - WriteMemory(addr_i64, GetGpr(rs, 32)); - SetGpr(ra, addr_i64); -} - -void Compiler::STB(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 8)); -} - -void Compiler::STBU(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - WriteMemory(addr_i64, GetGpr(rs, 8)); - SetGpr(ra, addr_i64); -} - -void Compiler::LHZ(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::LHZU(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::LHA(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::LHAU(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::STH(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 16)); -} - -void Compiler::STHU(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - WriteMemory(addr_i64, GetGpr(rs, 16)); - SetGpr(ra, addr_i64); -} - -void Compiler::LMW(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); - } - - for (u32 i = rd; i < 32; i++) { - auto val_i32 = ReadMemory(addr_i64, 32); - auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); - SetGpr(i, val_i64); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); - } -} - -void Compiler::STMW(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); - } - - for (u32 i = rs; i < 32; i++) { - auto val_i32 = GetGpr(i, 32); - WriteMemory(addr_i64, val_i32); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); - } -} - -void Compiler::LFS(u32 frd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i32 = ReadMemory(addr_i64, 32); - SetFpr(frd, mem_i32); -} - -void Compiler::LFSU(u32 frd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i32 = ReadMemory(addr_i64, 32); - SetFpr(frd, mem_i32); - SetGpr(ra, addr_i64); -} - -void Compiler::LFD(u32 frd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i64 = ReadMemory(addr_i64, 64); - SetFpr(frd, mem_i64); -} - -void Compiler::LFDU(u32 frd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i64 = ReadMemory(addr_i64, 64); - SetFpr(frd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::STFS(u32 frs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); -} - -void Compiler::STFSU(u32 frs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); - SetGpr(ra, addr_i64); -} - -void Compiler::STFD(u32 frs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - WriteMemory(addr_i64, frs_i64); -} - -void Compiler::STFDU(u32 frs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - WriteMemory(addr_i64, frs_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::LD(u32 rd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i64 = ReadMemory(addr_i64, 64); - SetGpr(rd, mem_i64); -} - -void Compiler::LDU(u32 rd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - auto mem_i64 = ReadMemory(addr_i64, 64); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); -} - -void Compiler::LWA(u32 rd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); -} - -void Compiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FDIVS."); - } - - // TODO: Set flags -} - -void Compiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FSUBS."); - } - - // TODO: Set flags -} - -void Compiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FADDS."); - } - - // TODO: Set flags -} - -void Compiler::FSQRTS(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FSQRTS."); - } - - // TODO: Set flags -} - -void Compiler::FRES(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FRES."); - } - - // TODO: Set flags -} - -void Compiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rc_f64 = GetFpr(frc); - auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FMULS."); - } - - // TODO: Set flags -} - -void Compiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FMADDS."); - } - - // TODO: Set flags -} - -void Compiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FMSUBS."); - } - - // TODO: Set flags -} - -void Compiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - res_f64 = m_ir_builder->CreateFNeg(res_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FNMSUBS."); - } - - // TODO: Set flags -} - -void Compiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - res_f64 = m_ir_builder->CreateFNeg(res_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); - - if (rc) { - // TODO: Implement this - CompilationError("FNMADDS."); - } - - // TODO: Set flags -} - -void Compiler::STD(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } - - WriteMemory(addr_i64, GetGpr(rs, 64)); -} - -void Compiler::STDU(u32 rs, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - - WriteMemory(addr_i64, GetGpr(rs, 64)); - SetGpr(ra, addr_i64); -} - -void Compiler::MTFSB1(u32 crbd, bool rc) { - auto fpscr_i32 = GetFpscr(); - fpscr_i32 = SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false); - SetFpscr(fpscr_i32); - - if (rc) { - // TODO: Implement this - CompilationError("MTFSB1."); - } -} - -void Compiler::MCRFS(u32 crbd, u32 crbs) { - auto fpscr_i32 = GetFpscr(); - auto val_i32 = GetNibble(fpscr_i32, crbs); - SetCrField(crbd, val_i32); - - switch (crbs) { - case 0: - fpscr_i32 = ClrBit(fpscr_i32, 0); - fpscr_i32 = ClrBit(fpscr_i32, 3); - break; - case 1: - fpscr_i32 = ClrNibble(fpscr_i32, 1); - break; - case 2: - fpscr_i32 = ClrNibble(fpscr_i32, 2); - break; - case 3: - fpscr_i32 = ClrBit(fpscr_i32, 12); - break; - case 5: - fpscr_i32 = ClrBit(fpscr_i32, 21); - fpscr_i32 = ClrBit(fpscr_i32, 22); - fpscr_i32 = ClrBit(fpscr_i32, 23); - break; - default: - break; - } - - SetFpscr(fpscr_i32); -} - -void Compiler::MTFSB0(u32 crbd, bool rc) { - auto fpscr_i32 = GetFpscr(); - fpscr_i32 = ClrBit(fpscr_i32, crbd); - SetFpscr(fpscr_i32); - - if (rc) { - // TODO: Implement this - CompilationError("MTFSB0."); - } -} - -void Compiler::MTFSFI(u32 crfd, u32 i, bool rc) { - auto fpscr_i32 = GetFpscr(); - fpscr_i32 = SetNibble(fpscr_i32, crfd, m_ir_builder->getInt32(i & 0xF)); - SetFpscr(fpscr_i32); - - if (rc) { - // TODO: Implement this - CompilationError("MTFSFI."); - } -} - -void Compiler::MFFS(u32 frd, bool rc) { - auto fpscr_i32 = GetFpscr(); - auto fpscr_i64 = m_ir_builder->CreateZExt(fpscr_i32, m_ir_builder->getInt64Ty()); - SetFpr(frd, fpscr_i64); - - if (rc) { - // TODO: Implement this - CompilationError("MFFS."); - } -} - -void Compiler::MTFSF(u32 flm, u32 frb, bool rc) { - u32 mask = 0; - for(u32 i = 0; i < 8; i++) { - if (flm & (1 << i)) { - mask |= 0xF << (i * 4); - } - } - - auto rb_i32 = GetFpr(frb, 32, true); - auto fpscr_i32 = GetFpscr(); - fpscr_i32 = m_ir_builder->CreateAnd(fpscr_i32, ~mask); - rb_i32 = m_ir_builder->CreateAnd(rb_i32, mask); - fpscr_i32 = m_ir_builder->CreateOr(fpscr_i32, rb_i32); - SetFpscr(fpscr_i32); - - if (rc) { - // TODO: Implement this - CompilationError("MTFSF."); - } -} - -void Compiler::FCMPU(u32 crfd, u32 fra, u32 frb) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); - auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); - auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); - SetCr(cr_i32); - - // TODO: Set flags / Handle NaN -} - -void Compiler::FRSP(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto res_f32 = m_ir_builder->CreateFPTrunc(rb_f64, m_ir_builder->getFloatTy()); - auto res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FRSP."); - } - - // TODO: Revisit this - // TODO: Set flags -} - -void Compiler::FCTIW(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); - auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); - res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); - SetFpr(frd, res_i64); - - if (rc) { - // TODO: Implement this - CompilationError("FCTIW."); - } - - // TODO: Set flags / Implement rounding modes -} - -void Compiler::FCTIWZ(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); - auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); - res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); - SetFpr(frd, res_i64); - - if (rc) { - // TODO: Implement this - CompilationError("FCTIWZ."); - } - - // TODO: Set flags -} - -void Compiler::FDIV(u32 frd, u32 fra, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FDIV."); - } - - // TODO: Set flags -} - -void Compiler::FSUB(u32 frd, u32 fra, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FSUB."); - } - - // TODO: Set flags -} - -void Compiler::FADD(u32 frd, u32 fra, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FADD."); - } - - // TODO: Set flags -} - -void Compiler::FSQRT(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FSQRT."); - } - - // TODO: Set flags -} - -void Compiler::FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - auto cmp_i1 = m_ir_builder->CreateFCmpOGE(ra_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0.0)); - auto res_f64 = m_ir_builder->CreateSelect(cmp_i1, rc_f64, rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FSEL."); - } - - // TODO: Set flags -} - -void Compiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rc_f64 = GetFpr(frc); - auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FMUL."); - } - - // TODO: Set flags -} - -void Compiler::FRSQRTE(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); - res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), res_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FRSQRTE."); - } -} - -void Compiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FMSUB."); - } - - // TODO: Set flags -} - -void Compiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FMADD."); - } - - // TODO: Set flags -} - -void Compiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rc_f64 = m_ir_builder->CreateFNeg(rc_f64); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FNMSUB."); - } - - // TODO: Set flags -} - -void Compiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - rc_f64 = m_ir_builder->CreateFNeg(rc_f64); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FNMADD."); - } - - // TODO: Set flags -} - -void Compiler::FCMPO(u32 crfd, u32 fra, u32 frb) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); - auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); - auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); - SetCr(cr_i32); - - // TODO: Set flags / Handle NaN -} - -void Compiler::FNEG(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - SetFpr(frd, rb_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FNEG."); - } - - // TODO: Set flags -} - -void Compiler::FMR(u32 frd, u32 frb, bool rc) { - SetFpr(frd, GetFpr(frb)); - - if (rc) { - // TODO: Implement this - CompilationError("FMR."); - } - - // TODO: Set flags -} - -void Compiler::FNABS(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); - res_f64 = m_ir_builder->CreateFNeg(res_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FNABS."); - } - - // TODO: Set flags -} - -void Compiler::FABS(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FABS."); - } - - // TODO: Set flags -} - -void Compiler::FCTID(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); - auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); - res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); - SetFpr(frd, res_i64); - - if (rc) { - // TODO: Implement this - CompilationError("FCTID."); - } - - // TODO: Set flags / Implement rounding modes -} - -void Compiler::FCTIDZ(u32 frd, u32 frb, bool rc) { - auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); - auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); - res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); - SetFpr(frd, res_i64); - - if (rc) { - // TODO: Implement this - CompilationError("FCTIDZ."); - } - - // TODO: Set flags -} - -void Compiler::FCFID(u32 frd, u32 frb, bool rc) { - auto rb_i64 = GetFpr(frb, 64, true); - auto res_f64 = m_ir_builder->CreateSIToFP(rb_i64, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); - - if (rc) { - // TODO: Implement this - CompilationError("FCFID."); - } - - // TODO: Set flags -} - -void Compiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { - CompilationError(fmt::Format("Unknown/Illegal opcode! (0x%08x : 0x%x : 0x%x)", code, opcode, gcode)); -} - -std::string Compiler::GetBasicBlockNameFromAddress(u32 address, const std::string & suffix) const { - std::string name; - - if (address == 0) { - name = "entry"; - } else if (address == 0xFFFFFFFF) { - name = "default_exit"; - } else { - name = fmt::Format("instr_0x%08X", address); - } - - if (suffix != "") { - name += "_" + suffix; - } - - return name; -} - -u32 Compiler::GetAddressFromBasicBlockName(const std::string & name) const { - if (name.compare(0, 6, "instr_") == 0) { - return strtoul(name.c_str() + 6, nullptr, 0); - } else if (name == GetBasicBlockNameFromAddress(0)) { - return 0; - } else if (name == GetBasicBlockNameFromAddress(0xFFFFFFFF)) { - return 0xFFFFFFFF; - } - - return 0; -} - -BasicBlock * Compiler::GetBasicBlockFromAddress(u32 address, const std::string & suffix, bool create_if_not_exist) { - auto block_name = GetBasicBlockNameFromAddress(address, suffix); - BasicBlock * block = nullptr; - BasicBlock * next_block = nullptr; - for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { - if (i->getName() == block_name) { - block = &(*i); - break; - } - -#ifdef _DEBUG - auto block_address = GetAddressFromBasicBlockName(i->getName()); - if (block_address > address) { - next_block = &(*i); - break; - } -#endif - } - - if (!block && create_if_not_exist) { - block = BasicBlock::Create(m_ir_builder->getContext(), block_name, m_state.function, next_block); - } - - return block; -} - -Value * Compiler::GetBit(Value * val, u32 n) { - Value * bit; - -#ifdef PPU_LLVM_RECOMPILER_USE_BMI - if (val->getType()->isIntegerTy(32)) { - bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32(1 << (31- n))); - } else if (val->getType()->isIntegerTy(64)) { - bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)1 << (63 - n))); - } else { -#endif - if (val->getType()->getIntegerBitWidth() != (n + 1)) { - bit = m_ir_builder->CreateLShr(val, val->getType()->getIntegerBitWidth() - n - 1); - } - - bit = m_ir_builder->CreateAnd(bit, 1); -#ifdef PPU_LLVM_RECOMPILER_USE_BMI - } -#endif - - return bit; -} - -Value * Compiler::ClrBit(Value * val, u32 n) { - return m_ir_builder->CreateAnd(val, ~((u64)1 << (val->getType()->getIntegerBitWidth() - n - 1))); -} - -Value * Compiler::SetBit(Value * val, u32 n, Value * bit, bool doClear) { - if (doClear) { - val = ClrBit(val, n); - } - - if (bit->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) { - bit = m_ir_builder->CreateZExt(bit, val->getType()); - } else if (bit->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) { - bit = m_ir_builder->CreateTrunc(bit, val->getType()); - } - - if (val->getType()->getIntegerBitWidth() != (n + 1)) { - bit = m_ir_builder->CreateShl(bit, bit->getType()->getIntegerBitWidth() - n - 1); - } - - return m_ir_builder->CreateOr(val, bit); -} - -Value * Compiler::GetNibble(Value * val, u32 n) { - Value * nibble; - -#ifdef PPU_LLVM_RECOMPILER_USE_BMI - if (val->getType()->isIntegerTy(32)) { - nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32((u64)0xF << ((7 - n) * 4))); - } else if (val->getType()->isIntegerTy(64)) { - nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)0xF << ((15 - n) * 4))); - } else { -#endif - if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) { - val = m_ir_builder->CreateLShr(val, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4); - } - - nibble = m_ir_builder->CreateAnd(val, 0xF); -#ifdef PPU_LLVM_RECOMPILER_USE_BMI - } -#endif - - return nibble; -} - -Value * Compiler::ClrNibble(Value * val, u32 n) { - return m_ir_builder->CreateAnd(val, ~((u64)0xF << ((((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4))); -} - -Value * Compiler::SetNibble(Value * val, u32 n, Value * nibble, bool doClear) { - if (doClear) { - val = ClrNibble(val, n); - } - - if (nibble->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) { - nibble = m_ir_builder->CreateZExt(nibble, val->getType()); - } else if (nibble->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) { - nibble = m_ir_builder->CreateTrunc(nibble, val->getType()); - } - - if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) { - nibble = m_ir_builder->CreateShl(nibble, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4); - } - - return m_ir_builder->CreateOr(val, nibble); -} - -Value * Compiler::SetNibble(Value * val, u32 n, Value * b0, Value * b1, Value * b2, Value * b3, bool doClear) { - if (doClear) { - val = ClrNibble(val, n); - } - - if (b0) { - val = SetBit(val, n * 4, b0, false); - } - - if (b1) { - val = SetBit(val, (n * 4) + 1, b1, false); - } - - if (b2) { - val = SetBit(val, (n * 4) + 2, b2, false); - } - - if (b3) { - val = SetBit(val, (n * 4) + 3, b3, false); - } - - return val; -} - -Value * Compiler::GetPc() { - auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); - auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(pc_i32_ptr, 4); -} - -void Compiler::SetPc(Value * val_ix) { - auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); - auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_ix, m_ir_builder->getInt32Ty()); - m_ir_builder->CreateAlignedStore(val_i32, pc_i32_ptr, 4); -} - -Value * Compiler::GetGpr(u32 r, u32 num_bits) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); - auto r_ix_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getIntNTy(num_bits)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(r_ix_ptr, 8); -} - -void Compiler::SetGpr(u32 r, Value * val_x64) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); - auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - m_ir_builder->CreateAlignedStore(val_i64, r_i64_ptr, 8); -} - -Value * Compiler::GetCr() { - auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); - auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(cr_i32_ptr, 4); -} - -Value * Compiler::GetCrField(u32 n) { - return GetNibble(GetCr(), n); -} - -void Compiler::SetCr(Value * val_x32) { - auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); - auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i32, cr_i32_ptr, 4); -} - -void Compiler::SetCrField(u32 n, Value * field) { - SetCr(SetNibble(GetCr(), n, field)); -} - -void Compiler::SetCrField(u32 n, Value * b0, Value * b1, Value * b2, Value * b3) { - SetCr(SetNibble(GetCr(), n, b0, b1, b2, b3)); -} - -void Compiler::SetCrFieldSignedCmp(u32 n, Value * a, Value * b) { - auto lt_i1 = m_ir_builder->CreateICmpSLT(a, b); - auto gt_i1 = m_ir_builder->CreateICmpSGT(a, b); - auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo()); - SetCr(cr_i32); -} - -void Compiler::SetCrFieldUnsignedCmp(u32 n, Value * a, Value * b) { - auto lt_i1 = m_ir_builder->CreateICmpULT(a, b); - auto gt_i1 = m_ir_builder->CreateICmpUGT(a, b); - auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo()); - SetCr(cr_i32); -} - -void Compiler::SetCr6AfterVectorCompare(u32 vr) { - auto vr_v16i8 = GetVrAsIntVec(vr, 8); - auto vr_mask_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmovmskb_128), vr_v16i8); - auto cmp0_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0)); - auto cmp1_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0xFFFF)); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, 6, cmp1_i1, nullptr, cmp0_i1, nullptr); - SetCr(cr_i32); -} - -Value * Compiler::GetLr() { - auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); - auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(lr_i64_ptr, 8); -} - -void Compiler::SetLr(Value * val_x64) { - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); - auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i64, lr_i64_ptr, 8); -} - -Value * Compiler::GetCtr() { - auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); - auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(ctr_i64_ptr, 8); -} - -void Compiler::SetCtr(Value * val_x64) { - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); - auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i64, ctr_i64_ptr, 8); -} - -Value * Compiler::GetXer() { - auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); - auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(xer_i64_ptr, 8); -} - -Value * Compiler::GetXerCa() { - return GetBit(GetXer(), 34); -} - -Value * Compiler::GetXerSo() { - return GetBit(GetXer(), 32); -} - -void Compiler::SetXer(Value * val_x64) { - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); - auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i64, xer_i64_ptr, 8); -} - -void Compiler::SetXerCa(Value * ca) { - auto xer_i64 = GetXer(); - xer_i64 = SetBit(xer_i64, 34, ca); - SetXer(xer_i64); -} - -void Compiler::SetXerSo(Value * so) { - auto xer_i64 = GetXer(); - xer_i64 = SetBit(xer_i64, 32, so); - SetXer(xer_i64); -} - -Value * Compiler::GetVrsave() { - auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); - auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - auto val_i32 = m_ir_builder->CreateAlignedLoad(vrsave_i32_ptr, 4); - return m_ir_builder->CreateZExtOrTrunc(val_i32, m_ir_builder->getInt64Ty()); -} - -void Compiler::SetVrsave(Value * val_x64) { - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_i64, m_ir_builder->getInt32Ty()); - auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); - auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i32, vrsave_i32_ptr, 8); -} - -Value * Compiler::GetFpscr() { - auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); - auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(fpscr_i32_ptr, 4); -} - -void Compiler::SetFpscr(Value * val_x32) { - auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); - auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i32, fpscr_i32_ptr, 4); -} - -Value * Compiler::GetFpr(u32 r, u32 bits, bool as_int) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); - if (!as_int) { - auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); - auto r_f64 = m_ir_builder->CreateAlignedLoad(r_f64_ptr, 8); - if (bits == 32) { - return m_ir_builder->CreateFPTrunc(r_f64, m_ir_builder->getFloatTy()); - } else { - return r_f64; - } - } else { - auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - auto r_i64 = m_ir_builder->CreateAlignedLoad(r_i64_ptr, 8); - if (bits == 32) { - return m_ir_builder->CreateTrunc(r_i64, m_ir_builder->getInt32Ty()); - } else { - return r_i64; - } - } -} - -void Compiler::SetFpr(u32 r, Value * val) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); - auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); - - Value* val_f64; - if (val->getType()->isDoubleTy() || val->getType()->isIntegerTy(64)) { - val_f64 = m_ir_builder->CreateBitCast(val, m_ir_builder->getDoubleTy()); - } else if (val->getType()->isFloatTy() || val->getType()->isIntegerTy(32)) { - auto val_f32 = m_ir_builder->CreateBitCast(val, m_ir_builder->getFloatTy()); - val_f64 = m_ir_builder->CreateFPExt(val_f32, m_ir_builder->getDoubleTy()); - } else { - assert(0); - } - - m_ir_builder->CreateAlignedStore(val_f64, r_f64_ptr, 8); -} - -Value * Compiler::GetVscr() { - auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); - auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vscr_i32_ptr, 4); -} - -void Compiler::SetVscr(Value * val_x32) { - auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); - auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i32, vscr_i32_ptr, 4); -} - -Value * Compiler::GetVr(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vr_i128_ptr, 16); -} - -Value * Compiler::GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - auto vr_vec_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getIntNTy(vec_elt_num_bits), 128 / vec_elt_num_bits)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vr_vec_ptr, 16); -} - -Value * Compiler::GetVrAsFloatVec(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - auto vr_v4f32_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getFloatTy(), 4)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vr_v4f32_ptr, 16); -} - -Value * Compiler::GetVrAsDoubleVec(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - auto vr_v2f64_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getDoubleTy(), 2)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vr_v2f64_ptr, 16); -} - -void Compiler::SetVr(u32 vr, Value * val_x128) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - auto val_i128 = m_ir_builder->CreateBitCast(val_x128, m_ir_builder->getIntNTy(128)); - m_ir_builder->CreateAlignedStore(val_i128, vr_i128_ptr, 16); -} - -Value * Compiler::CheckBranchCondition(u32 bo, u32 bi) { - bool bo0 = bo & 0x10 ? true : false; - bool bo1 = bo & 0x08 ? true : false; - bool bo2 = bo & 0x04 ? true : false; - bool bo3 = bo & 0x02 ? true : false; - - auto ctr_i64 = GetCtr(); - if (!bo2) { - ctr_i64 = m_ir_builder->CreateSub(ctr_i64, m_ir_builder->getInt64(1)); - SetCtr(ctr_i64); - } - - Value * ctr_ok_i1 = nullptr; - if (!bo2) { - // TODO: Check if we should compare all bits or just the lower 32 bits. This depends on MSR[SF]. Not sure what it is for PS3. - ctr_ok_i1 = m_ir_builder->CreateICmpNE(ctr_i64, m_ir_builder->getInt64(0)); - if (bo3) { - ctr_ok_i1 = m_ir_builder->CreateXor(ctr_ok_i1, m_ir_builder->getInt1(bo3)); - } - } - - Value * cond_ok_i1 = nullptr; - if (!bo0) { - auto cr_bi_i32 = GetBit(GetCr(), bi); - cond_ok_i1 = m_ir_builder->CreateTrunc(cr_bi_i32, m_ir_builder->getInt1Ty()); - if (!bo1) { - cond_ok_i1 = m_ir_builder->CreateXor(cond_ok_i1, m_ir_builder->getInt1(!bo1)); - } - } - - Value * cmp_i1 = nullptr; - if (ctr_ok_i1 && cond_ok_i1) { - cmp_i1 = m_ir_builder->CreateAnd(ctr_ok_i1, cond_ok_i1); - } else if (ctr_ok_i1) { - cmp_i1 = ctr_ok_i1; - } else if (cond_ok_i1) { - cmp_i1 = cond_ok_i1; - } - - return cmp_i1; -} - -void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool lk, bool target_is_lr) { - if (lk) { - SetLr(m_ir_builder->getInt64(m_state.current_instruction_address + 4)); - } - - auto current_block = m_ir_builder->GetInsertBlock(); - - BasicBlock * target_block = nullptr; - if (dyn_cast(target_i32)) { - // Target address is an immediate value. - u32 target_address = (u32)(dyn_cast(target_i32)->getLimitedValue()); - if (lk) { - // Function call - if (cmp_i1) { // There is no need to create a new block for an unconditional jump - target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); - m_ir_builder->SetInsertPoint(target_block); - } - - SetPc(target_i32); - IndirectCall(target_address, m_ir_builder->getInt64(0), true); - m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); - } else { - // Local branch - target_block = GetBasicBlockFromAddress(target_address); - } - } else { - // Target address is in a register - if (cmp_i1) { // There is no need to create a new block for an unconditional jump - target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); - m_ir_builder->SetInsertPoint(target_block); - } - - SetPc(target_i32); - if (target_is_lr && !lk) { - // Return from this function - m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); - } else if (lk) { - auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); - auto unknown_function_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "unknown_function"); - - auto switch_instr = m_ir_builder->CreateSwitch(target_i32, unknown_function_block); - m_ir_builder->SetInsertPoint(unknown_function_block); - m_ir_builder->CreateCall2(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_ir_builder->getInt64(0)); - m_ir_builder->CreateBr(next_block); - - auto call_i = m_state.cfg->calls.find(m_state.current_instruction_address); - if (call_i != m_state.cfg->calls.end()) { - for (auto function_i = call_i->second.begin(); function_i != call_i->second.end(); function_i++) { - auto block = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("0x%08X", *function_i)); - m_ir_builder->SetInsertPoint(block); - IndirectCall(*function_i, m_ir_builder->getInt64(0), true); - m_ir_builder->CreateBr(next_block); - switch_instr->addCase(m_ir_builder->getInt32(*function_i), block); - } - } - } else { - auto switch_instr = m_ir_builder->CreateSwitch(target_i32, GetBasicBlockFromAddress(0xFFFFFFFF)); - auto branch_i = m_state.cfg->branches.find(m_state.current_instruction_address); - if (branch_i != m_state.cfg->branches.end()) { - for (auto next_instr_i = branch_i->second.begin(); next_instr_i != branch_i->second.end(); next_instr_i++) { - switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); - } - } - } - } - - if (cmp_i1) { - // Conditional branch - auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); - m_ir_builder->SetInsertPoint(current_block); - m_ir_builder->CreateCondBr(cmp_i1, target_block, next_block); - } else { - // Unconditional branch - if (target_block) { - m_ir_builder->SetInsertPoint(current_block); - m_ir_builder->CreateBr(target_block); - } - } - - m_state.hit_branch_instruction = true; -} - -Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bswap, bool could_be_mmio) { + //auto addr_i64 = GetGpr(rb); + //if (ra) { + // auto ra_i64 = GetGpr(ra); + // addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + //} + + //WriteMemory(addr_i64, GetGpr(rs, 32)); +} + +void Compiler::STHUX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + WriteMemory(addr_i64, GetGpr(rs, 16)); + SetGpr(ra, addr_i64); +} + +void Compiler::OR(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateUDiv(ra_i64, rb_i64); + SetGpr(rd, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO implement oe + CompilationError("DIVDUO"); + } + + // TODO make sure an exception does not occur on divide by 0 and overflow +} + +void Compiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto res_i32 = m_ir_builder->CreateUDiv(ra_i32, rb_i32); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO implement oe + CompilationError("DIVWUO"); + } + + // TODO make sure an exception does not occur on divide by 0 and overflow +} + +void Compiler::MTSPR(u32 spr, u32 rs) { + auto rs_i64 = GetGpr(rs); + auto n = (spr >> 5) | ((spr & 0x1f) << 5); + + switch (n) { + case 0x001: + SetXer(rs_i64); + break; + case 0x008: + SetLr(rs_i64); + break; + case 0x009: + SetCtr(rs_i64); + break; + case 0x100: + SetVrsave(rs_i64); + break; + default: + assert(0); + break; + } + +} + +void Compiler::NAND(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); + res_i64 = m_ir_builder->CreateNot(res_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::STVXL(u32 vs, u32 ra, u32 rb) { + STVX(vs, ra, rb); +} + +void Compiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateSDiv(ra_i64, rb_i64); + SetGpr(rd, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO implement oe + CompilationError("DIVDO"); + } + + // TODO make sure an exception does not occur on divide by 0 and overflow +} + +void Compiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto res_i32 = m_ir_builder->CreateSDiv(ra_i32, rb_i32); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } + + if (oe) { + // TODO implement oe + CompilationError("DIVWO"); + } + + // TODO make sure an exception does not occur on divide by 0 and overflow +} + +void Compiler::LVLX(u32 vd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto eb_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); + eb_i64 = m_ir_builder->CreateShl(eb_i64, 3); + auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128)); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); + auto mem_i128 = ReadMemory(addr_i64, 128, 16); + mem_i128 = m_ir_builder->CreateShl(mem_i128, eb_i128); + SetVr(vd, mem_i128); +} + +void Compiler::LDBRX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i64 = ReadMemory(addr_i64, 64, 0, false); + SetGpr(rd, mem_i64); +} + +void Compiler::LSWX(u32 rd, u32 ra, u32 rb) { + CompilationError("LSWX"); +} + +void Compiler::LWBRX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i32 = ReadMemory(addr_i64, 32, 0, false); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::LFSX(u32 frd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i32 = ReadMemory(addr_i64, 32); + SetFpr(frd, mem_i32); +} + +void Compiler::SRW(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); + auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); + auto res_i64 = m_ir_builder->CreateLShr(rs_i64, rb_i64); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::SRD(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); + auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); + auto res_i128 = m_ir_builder->CreateLShr(rs_i128, rb_i128); + auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); + SetGpr(ra, res_i64); + + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::LVRX(u32 vd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto eb_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), addr_i64); + eb_i64 = m_ir_builder->CreateAnd(eb_i64, 0xF); + eb_i64 = m_ir_builder->CreateShl(eb_i64, 3); + auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128)); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); + auto mem_i128 = ReadMemory(addr_i64, 128, 16); + mem_i128 = m_ir_builder->CreateLShr(mem_i128, eb_i128); + auto cmp_i1 = m_ir_builder->CreateICmpNE(eb_i64, m_ir_builder->getInt64(0)); + auto cmp_i128 = m_ir_builder->CreateSExt(cmp_i1, m_ir_builder->getIntNTy(128)); + mem_i128 = m_ir_builder->CreateAnd(mem_i128, cmp_i128); + SetVr(vd, mem_i128); +} + +void Compiler::LSWI(u32 rd, u32 ra, u32 nb) { + auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); + + nb = nb ? nb : 32; + for (u32 i = 0; i < nb; i += 4) { + auto val_i32 = ReadMemory(addr_i64, 32, 0, true, false); + + if (i + 4 <= nb) { + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); + } else { + u32 mask = 0xFFFFFFFF << ((4 - (nb - i)) * 8); + val_i32 = m_ir_builder->CreateAnd(val_i32, mask); + } + + auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, val_i64); + rd = (rd + 1) % 32; + } +} + +void Compiler::LFSUX(u32 frd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + SetFpr(frd, mem_i32); + SetGpr(ra, addr_i64); +} + +void Compiler::SYNC(u32 l) { + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); +} + +void Compiler::LFDX(u32 frd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i64 = ReadMemory(addr_i64, 64); + SetFpr(frd, mem_i64); +} + +void Compiler::LFDUX(u32 frd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetFpr(frd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::STVLX(u32 vs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + auto size_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), index_i64); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); + + auto vs_i128 = GetVr(vs); + vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); + auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); + vs_i128_ptr->setAlignment(16); + m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); + auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); + + Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; + m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), + addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); +} + +void Compiler::STDBRX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs), 0, false); +} + +void Compiler::STSWX(u32 rs, u32 ra, u32 rb) { + CompilationError("STSWX"); +} + +void Compiler::STWBRX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 32), 0, false); +} + +void Compiler::STFSX(u32 frs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); +} + +void Compiler::STVRX(u32 vs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto size_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + auto index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), size_i64); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFF0); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); + + auto vs_i128 = GetVr(vs); + vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); + auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); + vs_i128_ptr->setAlignment(16); + m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); + auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); + vs_i8_ptr = m_ir_builder->CreateGEP(vs_i8_ptr, index_i64); + + Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; + m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), + addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); +} + +void Compiler::STFSUX(u32 frs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); + SetGpr(ra, addr_i64); +} + +void Compiler::STSWI(u32 rd, u32 ra, u32 nb) { + auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); + + nb = nb ? nb : 32; + for (u32 i = 0; i < nb; i += 4) { + auto val_i32 = GetGpr(rd, 32); + + if (i + 4 <= nb) { + WriteMemory(addr_i64, val_i32, 0, true, false); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); + rd = (rd + 1) % 32; + } else { + u32 n = nb - i; + if (n >= 2) { + auto val_i16 = m_ir_builder->CreateLShr(val_i32, 16); + val_i16 = m_ir_builder->CreateTrunc(val_i16, m_ir_builder->getInt16Ty()); + WriteMemory(addr_i64, val_i16); + + if (n == 3) { + auto val_i8 = m_ir_builder->CreateLShr(val_i32, 8); + val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty()); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(2)); + WriteMemory(addr_i64, val_i8); + } + } else { + auto val_i8 = m_ir_builder->CreateLShr(val_i32, 24); + val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty()); + WriteMemory(addr_i64, val_i8); + } + } + } +} + +void Compiler::STFDX(u32 frs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + WriteMemory(addr_i64, frs_i64); +} + +void Compiler::STFDUX(u32 frs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + WriteMemory(addr_i64, frs_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::LVLXL(u32 vd, u32 ra, u32 rb) { + LVLX(vd, ra, rb); +} + +void Compiler::LHBRX(u32 rd, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i16 = ReadMemory(addr_i64, 16, 0, false); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::SRAW(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); + auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); + auto res_i64 = m_ir_builder->CreateAShr(rs_i64, rb_i64); + auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32); + SetGpr(ra, ra_i64); + + auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); + auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); + auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0)); + auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); + SetXerCa(ca_i1); + + if (rc) { + SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::SRAD(u32 ra, u32 rs, u32 rb, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); + rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); + auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); + auto res_i128 = m_ir_builder->CreateAShr(rs_i128, rb_i128); + auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64); + auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty()); + SetGpr(ra, ra_i64); + + auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); + auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); + auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0)); + auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); + SetXerCa(ca_i1); + + if (rc) { + SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::LVRXL(u32 vd, u32 ra, u32 rb) { + LVRX(vd, ra, rb); +} + +void Compiler::DSS(u32 strm, u32 a) { + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); + auto res_i64 = m_ir_builder->CreateAShr(rs_i64, sh); + auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32); + SetGpr(ra, ra_i64); + + auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); + auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); + auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0)); + auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); + SetXerCa(ca_i1); + + if (rc) { + SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { + auto rs_i64 = GetGpr(rs); + auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); + rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); + auto res_i128 = m_ir_builder->CreateAShr(rs_i128, sh); + auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64); + auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty()); + SetGpr(ra, ra_i64); + + auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); + auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); + auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0)); + auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); + SetXerCa(ca_i1); + + if (rc) { + SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { + SRADI1(ra, rs, sh, rc); +} + +void Compiler::EIEIO() { + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); +} + +void Compiler::STVLXL(u32 vs, u32 ra, u32 rb) { + STVLX(vs, ra, rb); +} + +void Compiler::STHBRX(u32 rs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 16), 0, false); +} + +void Compiler::EXTSH(u32 ra, u32 rs, bool rc) { + auto rs_i16 = GetGpr(rs, 16); + auto rs_i64 = m_ir_builder->CreateSExt(rs_i16, m_ir_builder->getInt64Ty()); + SetGpr(ra, rs_i64); + + if (rc) { + SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::STVRXL(u32 vs, u32 ra, u32 rb) { + STVRX(vs, ra, rb); +} + +void Compiler::EXTSB(u32 ra, u32 rs, bool rc) { + auto rs_i8 = GetGpr(rs, 8); + auto rs_i64 = m_ir_builder->CreateSExt(rs_i8, m_ir_builder->getInt64Ty()); + SetGpr(ra, rs_i64); + + if (rc) { + SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::STFIWX(u32 frs, u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + auto frs_i32 = m_ir_builder->CreateTrunc(frs_i64, m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); +} + +void Compiler::EXTSW(u32 ra, u32 rs, bool rc) { + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateSExt(rs_i32, m_ir_builder->getInt64Ty()); + SetGpr(ra, rs_i64); + + if (rc) { + SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); + } +} + +void Compiler::ICBI(u32 ra, u32 rs) { + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); +} + +void Compiler::DCBZ(u32 ra, u32 rb) { + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + addr_i64 = m_ir_builder->CreateAnd(addr_i64, ~(127ULL)); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); + + std::vector types = {(Type *)m_ir_builder->getInt8PtrTy(), (Type *)m_ir_builder->getInt32Ty()}; + m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memset, types), + addr_i8_ptr, m_ir_builder->getInt8(0), m_ir_builder->getInt32(128), m_ir_builder->getInt32(128), m_ir_builder->getInt1(true)); +} + +void Compiler::LWZ(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::LWZU(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::LBZ(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i8 = ReadMemory(addr_i64, 8); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::LBZU(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i8 = ReadMemory(addr_i64, 8); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::STW(u32 rs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 32)); +} + +void Compiler::STWU(u32 rs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + WriteMemory(addr_i64, GetGpr(rs, 32)); + SetGpr(ra, addr_i64); +} + +void Compiler::STB(u32 rs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 8)); +} + +void Compiler::STBU(u32 rs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + WriteMemory(addr_i64, GetGpr(rs, 8)); + SetGpr(ra, addr_i64); +} + +void Compiler::LHZ(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::LHZU(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::LHA(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::LHAU(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::STH(u32 rs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 16)); +} + +void Compiler::STHU(u32 rs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + WriteMemory(addr_i64, GetGpr(rs, 16)); + SetGpr(ra, addr_i64); +} + +void Compiler::LMW(u32 rd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); + } + + for (u32 i = rd; i < 32; i++) { + auto val_i32 = ReadMemory(addr_i64, 32); + auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); + SetGpr(i, val_i64); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); + } +} + +void Compiler::STMW(u32 rs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); + } + + for (u32 i = rs; i < 32; i++) { + auto val_i32 = GetGpr(i, 32); + WriteMemory(addr_i64, val_i32); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); + } +} + +void Compiler::LFS(u32 frd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i32 = ReadMemory(addr_i64, 32); + SetFpr(frd, mem_i32); +} + +void Compiler::LFSU(u32 frd, u32 ra, s32 ds) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + SetFpr(frd, mem_i32); + SetGpr(ra, addr_i64); +} + +void Compiler::LFD(u32 frd, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i64 = ReadMemory(addr_i64, 64); + SetFpr(frd, mem_i64); +} + +void Compiler::LFDU(u32 frd, u32 ra, s32 ds) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i64 = ReadMemory(addr_i64, 64); + SetFpr(frd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::STFS(u32 frs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); +} + +void Compiler::STFSU(u32 frs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); + SetGpr(ra, addr_i64); +} + +void Compiler::STFD(u32 frs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + WriteMemory(addr_i64, frs_i64); +} + +void Compiler::STFDU(u32 frs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + WriteMemory(addr_i64, frs_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::LD(u32 rd, u32 ra, s32 ds) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i64 = ReadMemory(addr_i64, 64); + SetGpr(rd, mem_i64); +} + +void Compiler::LDU(u32 rd, u32 ra, s32 ds) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + auto mem_i64 = ReadMemory(addr_i64, 64); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); +} + +void Compiler::LWA(u32 rd, u32 ra, s32 ds) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); +} + +void Compiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FDIVS."); + } + + // TODO: Set flags +} + +void Compiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FSUBS."); + } + + // TODO: Set flags +} + +void Compiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FADDS."); + } + + // TODO: Set flags +} + +void Compiler::FSQRTS(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FSQRTS."); + } + + // TODO: Set flags +} + +void Compiler::FRES(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FRES."); + } + + // TODO: Set flags +} + +void Compiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rc_f64 = GetFpr(frc); + auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FMULS."); + } + + // TODO: Set flags +} + +void Compiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FMADDS."); + } + + // TODO: Set flags +} + +void Compiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FMSUBS."); + } + + // TODO: Set flags +} + +void Compiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + res_f64 = m_ir_builder->CreateFNeg(res_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FNMSUBS."); + } + + // TODO: Set flags +} + +void Compiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + res_f64 = m_ir_builder->CreateFNeg(res_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); + + if (rc) { + // TODO: Implement this + CompilationError("FNMADDS."); + } + + // TODO: Set flags +} + +void Compiler::STD(u32 rs, u32 ra, s32 d) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } + + WriteMemory(addr_i64, GetGpr(rs, 64)); +} + +void Compiler::STDU(u32 rs, u32 ra, s32 ds) { + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + + WriteMemory(addr_i64, GetGpr(rs, 64)); + SetGpr(ra, addr_i64); +} + +void Compiler::MTFSB1(u32 crbd, bool rc) { + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false); + SetFpscr(fpscr_i32); + + if (rc) { + // TODO: Implement this + CompilationError("MTFSB1."); + } +} + +void Compiler::MCRFS(u32 crbd, u32 crbs) { + auto fpscr_i32 = GetFpscr(); + auto val_i32 = GetNibble(fpscr_i32, crbs); + SetCrField(crbd, val_i32); + + switch (crbs) { + case 0: + fpscr_i32 = ClrBit(fpscr_i32, 0); + fpscr_i32 = ClrBit(fpscr_i32, 3); + break; + case 1: + fpscr_i32 = ClrNibble(fpscr_i32, 1); + break; + case 2: + fpscr_i32 = ClrNibble(fpscr_i32, 2); + break; + case 3: + fpscr_i32 = ClrBit(fpscr_i32, 12); + break; + case 5: + fpscr_i32 = ClrBit(fpscr_i32, 21); + fpscr_i32 = ClrBit(fpscr_i32, 22); + fpscr_i32 = ClrBit(fpscr_i32, 23); + break; + default: + break; + } + + SetFpscr(fpscr_i32); +} + +void Compiler::MTFSB0(u32 crbd, bool rc) { + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = ClrBit(fpscr_i32, crbd); + SetFpscr(fpscr_i32); + + if (rc) { + // TODO: Implement this + CompilationError("MTFSB0."); + } +} + +void Compiler::MTFSFI(u32 crfd, u32 i, bool rc) { + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = SetNibble(fpscr_i32, crfd, m_ir_builder->getInt32(i & 0xF)); + SetFpscr(fpscr_i32); + + if (rc) { + // TODO: Implement this + CompilationError("MTFSFI."); + } +} + +void Compiler::MFFS(u32 frd, bool rc) { + auto fpscr_i32 = GetFpscr(); + auto fpscr_i64 = m_ir_builder->CreateZExt(fpscr_i32, m_ir_builder->getInt64Ty()); + SetFpr(frd, fpscr_i64); + + if (rc) { + // TODO: Implement this + CompilationError("MFFS."); + } +} + +void Compiler::MTFSF(u32 flm, u32 frb, bool rc) { + u32 mask = 0; + for(u32 i = 0; i < 8; i++) { + if (flm & (1 << i)) { + mask |= 0xF << (i * 4); + } + } + + auto rb_i32 = GetFpr(frb, 32, true); + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = m_ir_builder->CreateAnd(fpscr_i32, ~mask); + rb_i32 = m_ir_builder->CreateAnd(rb_i32, mask); + fpscr_i32 = m_ir_builder->CreateOr(fpscr_i32, rb_i32); + SetFpscr(fpscr_i32); + + if (rc) { + // TODO: Implement this + CompilationError("MTFSF."); + } +} + +void Compiler::FCMPU(u32 crfd, u32 fra, u32 frb) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); + auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); + auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); + SetCr(cr_i32); + + // TODO: Set flags / Handle NaN +} + +void Compiler::FRSP(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto res_f32 = m_ir_builder->CreateFPTrunc(rb_f64, m_ir_builder->getFloatTy()); + auto res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FRSP."); + } + + // TODO: Revisit this + // TODO: Set flags +} + +void Compiler::FCTIW(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); + auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); + SetFpr(frd, res_i64); + + if (rc) { + // TODO: Implement this + CompilationError("FCTIW."); + } + + // TODO: Set flags / Implement rounding modes +} + +void Compiler::FCTIWZ(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); + auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); + SetFpr(frd, res_i64); + + if (rc) { + // TODO: Implement this + CompilationError("FCTIWZ."); + } + + // TODO: Set flags +} + +void Compiler::FDIV(u32 frd, u32 fra, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FDIV."); + } + + // TODO: Set flags +} + +void Compiler::FSUB(u32 frd, u32 fra, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FSUB."); + } + + // TODO: Set flags +} + +void Compiler::FADD(u32 frd, u32 fra, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FADD."); + } + + // TODO: Set flags +} + +void Compiler::FSQRT(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FSQRT."); + } + + // TODO: Set flags +} + +void Compiler::FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto cmp_i1 = m_ir_builder->CreateFCmpOGE(ra_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0.0)); + auto res_f64 = m_ir_builder->CreateSelect(cmp_i1, rc_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FSEL."); + } + + // TODO: Set flags +} + +void Compiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rc_f64 = GetFpr(frc); + auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FMUL."); + } + + // TODO: Set flags +} + +void Compiler::FRSQRTE(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); + res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), res_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FRSQRTE."); + } +} + +void Compiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FMSUB."); + } + + // TODO: Set flags +} + +void Compiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FMADD."); + } + + // TODO: Set flags +} + +void Compiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rc_f64 = m_ir_builder->CreateFNeg(rc_f64); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FNMSUB."); + } + + // TODO: Set flags +} + +void Compiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + rc_f64 = m_ir_builder->CreateFNeg(rc_f64); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FNMADD."); + } + + // TODO: Set flags +} + +void Compiler::FCMPO(u32 crfd, u32 fra, u32 frb) { + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); + auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); + auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); + SetCr(cr_i32); + + // TODO: Set flags / Handle NaN +} + +void Compiler::FNEG(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + SetFpr(frd, rb_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FNEG."); + } + + // TODO: Set flags +} + +void Compiler::FMR(u32 frd, u32 frb, bool rc) { + SetFpr(frd, GetFpr(frb)); + + if (rc) { + // TODO: Implement this + CompilationError("FMR."); + } + + // TODO: Set flags +} + +void Compiler::FNABS(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); + res_f64 = m_ir_builder->CreateFNeg(res_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FNABS."); + } + + // TODO: Set flags +} + +void Compiler::FABS(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FABS."); + } + + // TODO: Set flags +} + +void Compiler::FCTID(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); + auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); + SetFpr(frd, res_i64); + + if (rc) { + // TODO: Implement this + CompilationError("FCTID."); + } + + // TODO: Set flags / Implement rounding modes +} + +void Compiler::FCTIDZ(u32 frd, u32 frb, bool rc) { + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); + auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); + SetFpr(frd, res_i64); + + if (rc) { + // TODO: Implement this + CompilationError("FCTIDZ."); + } + + // TODO: Set flags +} + +void Compiler::FCFID(u32 frd, u32 frb, bool rc) { + auto rb_i64 = GetFpr(frb, 64, true); + auto res_f64 = m_ir_builder->CreateSIToFP(rb_i64, m_ir_builder->getDoubleTy()); + SetFpr(frd, res_f64); + + if (rc) { + // TODO: Implement this + CompilationError("FCFID."); + } + + // TODO: Set flags +} + +void Compiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { + CompilationError(fmt::Format("Unknown/Illegal opcode! (0x%08x : 0x%x : 0x%x)", code, opcode, gcode)); +} + +std::string Compiler::GetBasicBlockNameFromAddress(u32 address, const std::string & suffix) const { + std::string name; + + if (address == 0) { + name = "entry"; + } else if (address == 0xFFFFFFFF) { + name = "default_exit"; + } else { + name = fmt::Format("instr_0x%08X", address); + } + + if (suffix != "") { + name += "_" + suffix; + } + + return name; +} + +u32 Compiler::GetAddressFromBasicBlockName(const std::string & name) const { + if (name.compare(0, 6, "instr_") == 0) { + return strtoul(name.c_str() + 6, nullptr, 0); + } else if (name == GetBasicBlockNameFromAddress(0)) { + return 0; + } else if (name == GetBasicBlockNameFromAddress(0xFFFFFFFF)) { + return 0xFFFFFFFF; + } + + return 0; +} + +BasicBlock * Compiler::GetBasicBlockFromAddress(u32 address, const std::string & suffix, bool create_if_not_exist) { + auto block_name = GetBasicBlockNameFromAddress(address, suffix); + BasicBlock * block = nullptr; + BasicBlock * next_block = nullptr; + for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { + if (i->getName() == block_name) { + block = &(*i); + break; + } + +#ifdef _DEBUG + auto block_address = GetAddressFromBasicBlockName(i->getName()); + if (block_address > address) { + next_block = &(*i); + break; + } +#endif + } + + if (!block && create_if_not_exist) { + block = BasicBlock::Create(m_ir_builder->getContext(), block_name, m_state.function, next_block); + } + + return block; +} + +Value * Compiler::GetBit(Value * val, u32 n) { + Value * bit; + +#ifdef PPU_LLVM_RECOMPILER_USE_BMI + if (val->getType()->isIntegerTy(32)) { + bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32(1 << (31- n))); + } else if (val->getType()->isIntegerTy(64)) { + bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)1 << (63 - n))); + } else { +#endif + if (val->getType()->getIntegerBitWidth() != (n + 1)) { + bit = m_ir_builder->CreateLShr(val, val->getType()->getIntegerBitWidth() - n - 1); + } + + bit = m_ir_builder->CreateAnd(bit, 1); +#ifdef PPU_LLVM_RECOMPILER_USE_BMI + } +#endif + + return bit; +} + +Value * Compiler::ClrBit(Value * val, u32 n) { + return m_ir_builder->CreateAnd(val, ~((u64)1 << (val->getType()->getIntegerBitWidth() - n - 1))); +} + +Value * Compiler::SetBit(Value * val, u32 n, Value * bit, bool doClear) { + if (doClear) { + val = ClrBit(val, n); + } + + if (bit->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) { + bit = m_ir_builder->CreateZExt(bit, val->getType()); + } else if (bit->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) { + bit = m_ir_builder->CreateTrunc(bit, val->getType()); + } + + if (val->getType()->getIntegerBitWidth() != (n + 1)) { + bit = m_ir_builder->CreateShl(bit, bit->getType()->getIntegerBitWidth() - n - 1); + } + + return m_ir_builder->CreateOr(val, bit); +} + +Value * Compiler::GetNibble(Value * val, u32 n) { + Value * nibble; + +#ifdef PPU_LLVM_RECOMPILER_USE_BMI + if (val->getType()->isIntegerTy(32)) { + nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32((u64)0xF << ((7 - n) * 4))); + } else if (val->getType()->isIntegerTy(64)) { + nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)0xF << ((15 - n) * 4))); + } else { +#endif + if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) { + val = m_ir_builder->CreateLShr(val, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4); + } + + nibble = m_ir_builder->CreateAnd(val, 0xF); +#ifdef PPU_LLVM_RECOMPILER_USE_BMI + } +#endif + + return nibble; +} + +Value * Compiler::ClrNibble(Value * val, u32 n) { + return m_ir_builder->CreateAnd(val, ~((u64)0xF << ((((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4))); +} + +Value * Compiler::SetNibble(Value * val, u32 n, Value * nibble, bool doClear) { + if (doClear) { + val = ClrNibble(val, n); + } + + if (nibble->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) { + nibble = m_ir_builder->CreateZExt(nibble, val->getType()); + } else if (nibble->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) { + nibble = m_ir_builder->CreateTrunc(nibble, val->getType()); + } + + if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) { + nibble = m_ir_builder->CreateShl(nibble, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4); + } + + return m_ir_builder->CreateOr(val, nibble); +} + +Value * Compiler::SetNibble(Value * val, u32 n, Value * b0, Value * b1, Value * b2, Value * b3, bool doClear) { + if (doClear) { + val = ClrNibble(val, n); + } + + if (b0) { + val = SetBit(val, n * 4, b0, false); + } + + if (b1) { + val = SetBit(val, (n * 4) + 1, b1, false); + } + + if (b2) { + val = SetBit(val, (n * 4) + 2, b2, false); + } + + if (b3) { + val = SetBit(val, (n * 4) + 3, b3, false); + } + + return val; +} + +Value * Compiler::GetPc() { + auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); + auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(pc_i32_ptr, 4); +} + +void Compiler::SetPc(Value * val_ix) { + auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); + auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_ix, m_ir_builder->getInt32Ty()); + m_ir_builder->CreateAlignedStore(val_i32, pc_i32_ptr, 4); +} + +Value * Compiler::GetGpr(u32 r, u32 num_bits) { + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); + auto r_ix_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getIntNTy(num_bits)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(r_ix_ptr, 8); +} + +void Compiler::SetGpr(u32 r, Value * val_x64) { + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); + auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + m_ir_builder->CreateAlignedStore(val_i64, r_i64_ptr, 8); +} + +Value * Compiler::GetCr() { + auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); + auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(cr_i32_ptr, 4); +} + +Value * Compiler::GetCrField(u32 n) { + return GetNibble(GetCr(), n); +} + +void Compiler::SetCr(Value * val_x32) { + auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); + auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); + auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, cr_i32_ptr, 4); +} + +void Compiler::SetCrField(u32 n, Value * field) { + SetCr(SetNibble(GetCr(), n, field)); +} + +void Compiler::SetCrField(u32 n, Value * b0, Value * b1, Value * b2, Value * b3) { + SetCr(SetNibble(GetCr(), n, b0, b1, b2, b3)); +} + +void Compiler::SetCrFieldSignedCmp(u32 n, Value * a, Value * b) { + auto lt_i1 = m_ir_builder->CreateICmpSLT(a, b); + auto gt_i1 = m_ir_builder->CreateICmpSGT(a, b); + auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo()); + SetCr(cr_i32); +} + +void Compiler::SetCrFieldUnsignedCmp(u32 n, Value * a, Value * b) { + auto lt_i1 = m_ir_builder->CreateICmpULT(a, b); + auto gt_i1 = m_ir_builder->CreateICmpUGT(a, b); + auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo()); + SetCr(cr_i32); +} + +void Compiler::SetCr6AfterVectorCompare(u32 vr) { + auto vr_v16i8 = GetVrAsIntVec(vr, 8); + auto vr_mask_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmovmskb_128), vr_v16i8); + auto cmp0_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0)); + auto cmp1_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0xFFFF)); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, 6, cmp1_i1, nullptr, cmp0_i1, nullptr); + SetCr(cr_i32); +} + +Value * Compiler::GetLr() { + auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); + auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(lr_i64_ptr, 8); +} + +void Compiler::SetLr(Value * val_x64) { + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); + auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i64, lr_i64_ptr, 8); +} + +Value * Compiler::GetCtr() { + auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); + auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(ctr_i64_ptr, 8); +} + +void Compiler::SetCtr(Value * val_x64) { + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); + auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i64, ctr_i64_ptr, 8); +} + +Value * Compiler::GetXer() { + auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); + auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(xer_i64_ptr, 8); +} + +Value * Compiler::GetXerCa() { + return GetBit(GetXer(), 34); +} + +Value * Compiler::GetXerSo() { + return GetBit(GetXer(), 32); +} + +void Compiler::SetXer(Value * val_x64) { + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); + auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i64, xer_i64_ptr, 8); +} + +void Compiler::SetXerCa(Value * ca) { + auto xer_i64 = GetXer(); + xer_i64 = SetBit(xer_i64, 34, ca); + SetXer(xer_i64); +} + +void Compiler::SetXerSo(Value * so) { + auto xer_i64 = GetXer(); + xer_i64 = SetBit(xer_i64, 32, so); + SetXer(xer_i64); +} + +Value * Compiler::GetVrsave() { + auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); + auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + auto val_i32 = m_ir_builder->CreateAlignedLoad(vrsave_i32_ptr, 4); + return m_ir_builder->CreateZExtOrTrunc(val_i32, m_ir_builder->getInt64Ty()); +} + +void Compiler::SetVrsave(Value * val_x64) { + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_i64, m_ir_builder->getInt32Ty()); + auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); + auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, vrsave_i32_ptr, 8); +} + +Value * Compiler::GetFpscr() { + auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); + auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(fpscr_i32_ptr, 4); +} + +void Compiler::SetFpscr(Value * val_x32) { + auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); + auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); + auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, fpscr_i32_ptr, 4); +} + +Value * Compiler::GetFpr(u32 r, u32 bits, bool as_int) { + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); + if (!as_int) { + auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); + auto r_f64 = m_ir_builder->CreateAlignedLoad(r_f64_ptr, 8); + if (bits == 32) { + return m_ir_builder->CreateFPTrunc(r_f64, m_ir_builder->getFloatTy()); + } else { + return r_f64; + } + } else { + auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + auto r_i64 = m_ir_builder->CreateAlignedLoad(r_i64_ptr, 8); + if (bits == 32) { + return m_ir_builder->CreateTrunc(r_i64, m_ir_builder->getInt32Ty()); + } else { + return r_i64; + } + } +} + +void Compiler::SetFpr(u32 r, Value * val) { + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); + auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); + + Value* val_f64; + if (val->getType()->isDoubleTy() || val->getType()->isIntegerTy(64)) { + val_f64 = m_ir_builder->CreateBitCast(val, m_ir_builder->getDoubleTy()); + } else if (val->getType()->isFloatTy() || val->getType()->isIntegerTy(32)) { + auto val_f32 = m_ir_builder->CreateBitCast(val, m_ir_builder->getFloatTy()); + val_f64 = m_ir_builder->CreateFPExt(val_f32, m_ir_builder->getDoubleTy()); + } else { + assert(0); + } + + m_ir_builder->CreateAlignedStore(val_f64, r_f64_ptr, 8); +} + +Value * Compiler::GetVscr() { + auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); + auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vscr_i32_ptr, 4); +} + +void Compiler::SetVscr(Value * val_x32) { + auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); + auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); + auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, vscr_i32_ptr, 4); +} + +Value * Compiler::GetVr(u32 vr) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vr_i128_ptr, 16); +} + +Value * Compiler::GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + auto vr_vec_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getIntNTy(vec_elt_num_bits), 128 / vec_elt_num_bits)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vr_vec_ptr, 16); +} + +Value * Compiler::GetVrAsFloatVec(u32 vr) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + auto vr_v4f32_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getFloatTy(), 4)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vr_v4f32_ptr, 16); +} + +Value * Compiler::GetVrAsDoubleVec(u32 vr) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + auto vr_v2f64_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getDoubleTy(), 2)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vr_v2f64_ptr, 16); +} + +void Compiler::SetVr(u32 vr, Value * val_x128) { + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + auto val_i128 = m_ir_builder->CreateBitCast(val_x128, m_ir_builder->getIntNTy(128)); + m_ir_builder->CreateAlignedStore(val_i128, vr_i128_ptr, 16); +} + +Value * Compiler::CheckBranchCondition(u32 bo, u32 bi) { + bool bo0 = bo & 0x10 ? true : false; + bool bo1 = bo & 0x08 ? true : false; + bool bo2 = bo & 0x04 ? true : false; + bool bo3 = bo & 0x02 ? true : false; + + auto ctr_i64 = GetCtr(); + if (!bo2) { + ctr_i64 = m_ir_builder->CreateSub(ctr_i64, m_ir_builder->getInt64(1)); + SetCtr(ctr_i64); + } + + Value * ctr_ok_i1 = nullptr; + if (!bo2) { + // TODO: Check if we should compare all bits or just the lower 32 bits. This depends on MSR[SF]. Not sure what it is for PS3. + ctr_ok_i1 = m_ir_builder->CreateICmpNE(ctr_i64, m_ir_builder->getInt64(0)); + if (bo3) { + ctr_ok_i1 = m_ir_builder->CreateXor(ctr_ok_i1, m_ir_builder->getInt1(bo3)); + } + } + + Value * cond_ok_i1 = nullptr; + if (!bo0) { + auto cr_bi_i32 = GetBit(GetCr(), bi); + cond_ok_i1 = m_ir_builder->CreateTrunc(cr_bi_i32, m_ir_builder->getInt1Ty()); + if (!bo1) { + cond_ok_i1 = m_ir_builder->CreateXor(cond_ok_i1, m_ir_builder->getInt1(!bo1)); + } + } + + Value * cmp_i1 = nullptr; + if (ctr_ok_i1 && cond_ok_i1) { + cmp_i1 = m_ir_builder->CreateAnd(ctr_ok_i1, cond_ok_i1); + } else if (ctr_ok_i1) { + cmp_i1 = ctr_ok_i1; + } else if (cond_ok_i1) { + cmp_i1 = cond_ok_i1; + } + + return cmp_i1; +} + +void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool lk, bool target_is_lr) { + if (lk) { + SetLr(m_ir_builder->getInt64(m_state.current_instruction_address + 4)); + } + + auto current_block = m_ir_builder->GetInsertBlock(); + + BasicBlock * target_block = nullptr; + if (dyn_cast(target_i32)) { + // Target address is an immediate value. + u32 target_address = (u32)(dyn_cast(target_i32)->getLimitedValue()); + if (lk) { + // Function call + if (cmp_i1) { // There is no need to create a new block for an unconditional jump + target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); + m_ir_builder->SetInsertPoint(target_block); + } + + SetPc(target_i32); + IndirectCall(target_address, m_ir_builder->getInt64(0), true); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); + } else { + // Local branch + target_block = GetBasicBlockFromAddress(target_address); + } + } else { + // Target address is in a register + if (cmp_i1) { // There is no need to create a new block for an unconditional jump + target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); + m_ir_builder->SetInsertPoint(target_block); + } + + SetPc(target_i32); + if (target_is_lr && !lk) { + // Return from this function + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); + } else if (lk) { + auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); + auto unknown_function_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "unknown_function"); + + auto switch_instr = m_ir_builder->CreateSwitch(target_i32, unknown_function_block); + m_ir_builder->SetInsertPoint(unknown_function_block); + m_ir_builder->CreateCall2(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_ir_builder->getInt64(0)); + m_ir_builder->CreateBr(next_block); + + auto call_i = m_state.cfg->calls.find(m_state.current_instruction_address); + if (call_i != m_state.cfg->calls.end()) { + for (auto function_i = call_i->second.begin(); function_i != call_i->second.end(); function_i++) { + auto block = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("0x%08X", *function_i)); + m_ir_builder->SetInsertPoint(block); + IndirectCall(*function_i, m_ir_builder->getInt64(0), true); + m_ir_builder->CreateBr(next_block); + switch_instr->addCase(m_ir_builder->getInt32(*function_i), block); + } + } + } else { + auto switch_instr = m_ir_builder->CreateSwitch(target_i32, GetBasicBlockFromAddress(0xFFFFFFFF)); + auto branch_i = m_state.cfg->branches.find(m_state.current_instruction_address); + if (branch_i != m_state.cfg->branches.end()) { + for (auto next_instr_i = branch_i->second.begin(); next_instr_i != branch_i->second.end(); next_instr_i++) { + switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); + } + } + } + } + + if (cmp_i1) { + // Conditional branch + auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); + m_ir_builder->SetInsertPoint(current_block); + m_ir_builder->CreateCondBr(cmp_i1, target_block, next_block); + } else { + // Unconditional branch + if (target_block) { + m_ir_builder->SetInsertPoint(current_block); + m_ir_builder->CreateBr(target_block); + } + } + + m_state.hit_branch_instruction = true; +} + +Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bswap, bool could_be_mmio) { addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, m_ir_builder->getIntNTy(bits)->getPointerTo()); auto val_ix = (Value *)m_ir_builder->CreateLoad(eaddr_ix_ptr, alignment); if (bits > 8 && bswap) { val_ix = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getIntNTy(bits)), val_ix); - } + } return val_ix; -} - -void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool bswap, bool could_be_mmio) { +} + +void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool bswap, bool could_be_mmio) { if (val_ix->getType()->getIntegerBitWidth() > 8 && bswap) { val_ix = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, val_ix->getType()), val_ix); - } + } addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, val_ix->getType()->getPointerTo()); m_ir_builder->CreateAlignedStore(val_ix, eaddr_ix_ptr, alignment); -} - -template -Type * Compiler::CppToLlvmType() { - if (std::is_void::value) { - return m_ir_builder->getVoidTy(); - } else if (std::is_same::value || std::is_same::value) { - return m_ir_builder->getInt64Ty(); - } else if (std::is_same::value || std::is_same::value) { - return m_ir_builder->getInt32Ty(); - } else if (std::is_same::value || std::is_same::value) { - return m_ir_builder->getInt16Ty(); - } else if (std::is_same::value || std::is_same::value) { - return m_ir_builder->getInt8Ty(); - } else if (std::is_same::value) { - return m_ir_builder->getFloatTy(); - } else if (std::is_same::value) { - return m_ir_builder->getDoubleTy(); +} + +template +Type * Compiler::CppToLlvmType() { + if (std::is_void::value) { + return m_ir_builder->getVoidTy(); + } else if (std::is_same::value || std::is_same::value) { + return m_ir_builder->getInt64Ty(); + } else if (std::is_same::value || std::is_same::value) { + return m_ir_builder->getInt32Ty(); + } else if (std::is_same::value || std::is_same::value) { + return m_ir_builder->getInt16Ty(); + } else if (std::is_same::value || std::is_same::value) { + return m_ir_builder->getInt8Ty(); + } else if (std::is_same::value) { + return m_ir_builder->getFloatTy(); + } else if (std::is_same::value) { + return m_ir_builder->getDoubleTy(); } else if (std::is_same::value) { return m_ir_builder->getInt1Ty(); - } else if (std::is_pointer::value) { - return m_ir_builder->getInt8PtrTy(); - } else { - assert(0); - } - - return nullptr; -} - -template -Value * Compiler::Call(const char * name, Func function, Args... args) { - auto fn = m_module->getFunction(name); - if (!fn) { - std::vector fn_args_type = {args->getType()...}; - auto fn_type = FunctionType::get(CppToLlvmType(), fn_args_type, false); - fn = cast(m_module->getOrInsertFunction(name, fn_type)); - fn->setCallingConv(CallingConv::X86_64_Win64); - m_execution_engine->addGlobalMapping(fn, (void *&)function); - } - - std::vector fn_args = {args...}; - return m_ir_builder->CreateCall(fn, fn_args); -} - -llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_function) { - auto ordinal = m_recompilation_engine.AllocateOrdinal(address, is_function); - auto location_i64 = m_ir_builder->getInt64(m_recompilation_engine.GetAddressOfExecutableLookup() + (ordinal * sizeof(u64))); - auto location_i64_ptr = m_ir_builder->CreateIntToPtr(location_i64, m_ir_builder->getInt64Ty()->getPointerTo()); - auto executable_i64 = m_ir_builder->CreateLoad(location_i64_ptr); - auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_i64, m_compiled_function_type->getPointerTo()); - return m_ir_builder->CreateCall2(executable_ptr, m_state.args[CompileTaskState::Args::State], context_i64); -} - -void Compiler::CompilationError(const std::string & error) { - LOG_ERROR(PPU, "[0x%08X] %s", m_state.current_instruction_address, error.c_str()); - Emu.Pause(); -} - -void Compiler::InitRotateMask() { - for (u32 mb = 0; mb < 64; mb++) { - for (u32 me = 0; me < 64; me++) { - u64 mask = ((u64)-1 >> mb) ^ ((me >= 63) ? 0 : (u64)-1 >> (me + 1)); - s_rotate_mask[mb][me] = mb > me ? ~mask : mask; - } - } -} - -std::mutex RecompilationEngine::s_mutex; -std::shared_ptr RecompilationEngine::s_the_instance = nullptr; - -RecompilationEngine::RecompilationEngine() - : ThreadBase("PPU Recompilation Engine") - , m_log(nullptr) - , m_next_ordinal(0) - , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) { - m_compiler.RunAllTests(); -} - -RecompilationEngine::~RecompilationEngine() { - Stop(); -} - -u32 RecompilationEngine::AllocateOrdinal(u32 address, bool is_function) { - std::lock_guard lock(m_address_to_ordinal_lock); - - auto i = m_address_to_ordinal.find(address); - if (i == m_address_to_ordinal.end()) { - assert(m_next_ordinal < (sizeof(m_executable_lookup) / sizeof(m_executable_lookup[0]))); - - m_executable_lookup[m_next_ordinal] = is_function ? ExecutionEngine::ExecuteFunction : ExecutionEngine::ExecuteTillReturn; - std::atomic_thread_fence(std::memory_order_release); - i = m_address_to_ordinal.insert(m_address_to_ordinal.end(), std::make_pair(address, m_next_ordinal++)); - } - - return i->second; -} - -u32 RecompilationEngine::GetOrdinal(u32 address) const { - std::lock_guard lock(m_address_to_ordinal_lock); - - auto i = m_address_to_ordinal.find(address); - if (i != m_address_to_ordinal.end()) { - return i->second; - } else { - return 0xFFFFFFFF; - } -} - -const Executable RecompilationEngine::GetExecutable(u32 ordinal) const { - std::atomic_thread_fence(std::memory_order_acquire); - return m_executable_lookup[ordinal]; -} - -u64 RecompilationEngine::GetAddressOfExecutableLookup() const { - return (u64)m_executable_lookup; -} - -void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { - { - std::lock_guard lock(m_pending_execution_traces_lock); - m_pending_execution_traces.push_back(execution_trace); - } - - if (!IsAlive()) { - Start(); - } - - Notify(); - // TODO: Increase the priority of the recompilation engine thread -} - -raw_fd_ostream & RecompilationEngine::Log() { - if (!m_log) { - std::string error; - m_log = new raw_fd_ostream("PPULLVMRecompiler.log", error, sys::fs::F_Text); - m_log->SetUnbuffered(); - } - - return *m_log; -} - -void RecompilationEngine::Task() { - bool is_idling = false; - std::chrono::nanoseconds idling_time(0); - std::chrono::nanoseconds recompiling_time(0); - - auto start = std::chrono::high_resolution_clock::now(); - while (!TestDestroy() && !Emu.IsStopped()) { - bool work_done_this_iteration = false; - ExecutionTrace * execution_trace = nullptr; - - { - std::lock_guard lock(m_pending_execution_traces_lock); - - auto i = m_pending_execution_traces.begin(); - if (i != m_pending_execution_traces.end()) { - execution_trace = *i; - m_pending_execution_traces.erase(i); - } - } - - if (execution_trace) { - ProcessExecutionTrace(*execution_trace); - delete execution_trace; - work_done_this_iteration = true; - } - - if (!work_done_this_iteration) { - // TODO: Reduce the priority of the recompilation engine thread if its set to high priority - } else { - is_idling = false; - } - - if (is_idling) { - auto recompiling_start = std::chrono::high_resolution_clock::now(); - - // Recompile the function whose CFG has changed the most since the last time it was compiled - auto candidate = (BlockEntry *)nullptr; - size_t max_diff = 0; - for (auto block : m_block_table) { - if (block->IsFunction() && block->is_compiled) { - auto diff = block->cfg.GetSize() - block->last_compiled_cfg_size; - if (diff > max_diff) { - candidate = block; - max_diff = diff; - } - } - } - - if (candidate != nullptr) { - Log() << "Recompiling: " << candidate->ToString() << "\n"; - CompileBlock(*candidate); - work_done_this_iteration = true; - } - - auto recompiling_end = std::chrono::high_resolution_clock::now(); - recompiling_time += std::chrono::duration_cast(recompiling_end - recompiling_start); - } - - if (!work_done_this_iteration) { - is_idling = true; - - // Wait a few ms for something to happen - auto idling_start = std::chrono::high_resolution_clock::now(); - WaitForAnySignal(250); - auto idling_end = std::chrono::high_resolution_clock::now(); - idling_time += std::chrono::duration_cast(idling_end - idling_start); - } - } - - std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); - auto total_time = std::chrono::duration_cast(end - start); - auto compiler_stats = m_compiler.GetStats(); - - Log() << "Total time = " << total_time.count() / 1000000 << "ms\n"; - Log() << " Time spent compiling = " << compiler_stats.total_time.count() / 1000000 << "ms\n"; - Log() << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n"; - Log() << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n"; - Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; - Log() << " Time spent recompiling = " << recompiling_time.count() / 1000000 << "ms\n"; - Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; - Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; - Log() << "Ordinals allocated = " << m_next_ordinal << "\n"; - - LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting."); - s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this. -} - -void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) { - auto execution_trace_id = execution_trace.GetId(); - auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); - if (processed_execution_trace_i == m_processed_execution_traces.end()) { -#ifdef _DEBUG - Log() << "Trace: " << execution_trace.ToString() << "\n"; -#endif - // Find the function block - BlockEntry key(execution_trace.function_address, execution_trace.function_address); - auto block_i = m_block_table.find(&key); - if (block_i == m_block_table.end()) { - block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); - } - - auto function_block = *block_i; - block_i = m_block_table.end(); - auto split_trace = false; - std::vector tmp_block_list; - for (auto trace_i = execution_trace.entries.begin(); trace_i != execution_trace.entries.end(); trace_i++) { - if (trace_i->type == ExecutionTraceEntry::Type::CompiledBlock) { - block_i = m_block_table.end(); - split_trace = true; - } - - if (block_i == m_block_table.end()) { - BlockEntry key(trace_i->GetPrimaryAddress(), execution_trace.function_address); - block_i = m_block_table.find(&key); - if (block_i == m_block_table.end()) { - block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); - } - - tmp_block_list.push_back(*block_i); - } - - const ExecutionTraceEntry * next_trace = nullptr; - if (trace_i + 1 != execution_trace.entries.end()) { - next_trace = &(*(trace_i + 1)); - } else if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { - next_trace = &(*(execution_trace.entries.begin())); - } - - UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace); - if (*block_i != function_block) { - UpdateControlFlowGraph(function_block->cfg, *trace_i, next_trace); - } - } - - processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list))); - } - - for (auto i = processed_execution_trace_i->second.begin(); i != processed_execution_trace_i->second.end(); i++) { - if (!(*i)->is_compiled) { - (*i)->num_hits++; - if ((*i)->num_hits >= 1000) { // TODO: Make this configurable - CompileBlock(*(*i)); - } - } - } - - std::remove_if(processed_execution_trace_i->second.begin(), processed_execution_trace_i->second.end(), [](const BlockEntry * b)->bool { return b->is_compiled; }); -} - -void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry) { - if (this_entry.type == ExecutionTraceEntry::Type::Instruction) { - cfg.instruction_addresses.insert(this_entry.GetPrimaryAddress()); - - if (next_entry) { - if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { - if (next_entry->GetPrimaryAddress() != (this_entry.GetPrimaryAddress() + 4)) { - cfg.branches[this_entry.GetPrimaryAddress()].insert(next_entry->GetPrimaryAddress()); - } - } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { - cfg.calls[this_entry.data.instruction.address].insert(next_entry->GetPrimaryAddress()); - } - } - } else if (this_entry.type == ExecutionTraceEntry::Type::CompiledBlock) { - if (next_entry) { - if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { - cfg.branches[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); - } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { - cfg.calls[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); - } - } - } -} - -void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { -#ifdef _DEBUG - Log() << "Compile: " << block_entry.ToString() << "\n"; - Log() << "CFG: " << block_entry.cfg.ToString() << "\n"; -#endif - - auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, block_entry.IsFunction()); - auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg, true, - block_entry.IsFunction() ? true : false /*generate_linkable_exits*/); - m_executable_lookup[ordinal] = executable; - block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize(); - block_entry.is_compiled = true; -} - -std::shared_ptr RecompilationEngine::GetInstance() { - std::lock_guard lock(s_mutex); - - if (s_the_instance == nullptr) { - s_the_instance = std::shared_ptr(new RecompilationEngine()); - } - - return s_the_instance; -} - -Tracer::Tracer() - : m_recompilation_engine(RecompilationEngine::GetInstance()) { - m_stack.reserve(100); -} - -Tracer::~Tracer() { - Terminate(); -} - -void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { - ExecutionTrace * execution_trace = nullptr; - - switch (trace_type) { - case TraceType::CallFunction: - // arg1 is address of the function - m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::FunctionCall, arg1)); - break; - case TraceType::EnterFunction: - // arg1 is address of the function - m_stack.push_back(new ExecutionTrace(arg1)); - break; - case TraceType::ExitFromCompiledFunction: - // arg1 is address of function. - // arg2 is the address of the exit instruction. - if (arg2) { - m_stack.push_back(new ExecutionTrace(arg1)); - m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); - } - break; - case TraceType::Return: - // No args used - execution_trace = m_stack.back(); - execution_trace->type = ExecutionTrace::Type::Linear; - m_stack.pop_back(); - break; - case TraceType::Instruction: - // arg1 is the address of the instruction - for (int i = (int)m_stack.back()->entries.size() - 1; i >= 0; i--) { - if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].data.instruction.address == arg1) || - (m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].data.compiled_block.entry_address == arg1)) { - // Found a loop - execution_trace = new ExecutionTrace(m_stack.back()->function_address); - execution_trace->type = ExecutionTrace::Type::Loop; - std::copy(m_stack.back()->entries.begin() + i, m_stack.back()->entries.end(), std::back_inserter(execution_trace->entries)); - m_stack.back()->entries.erase(m_stack.back()->entries.begin() + i + 1, m_stack.back()->entries.end()); - break; - } - } - - if (!execution_trace) { - // A loop was not found - m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::Instruction, arg1)); - } - break; - case TraceType::ExitFromCompiledBlock: - // arg1 is address of the compiled block. - // arg2 is the address of the exit instruction. - m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); - - if (arg2 == 0) { - // Return from function - execution_trace = m_stack.back(); - execution_trace->type = ExecutionTrace::Type::Linear; - m_stack.pop_back(); - } - break; - default: - assert(0); - break; - } - - if (execution_trace) { - m_recompilation_engine->NotifyTrace(execution_trace); - } -} - -void Tracer::Terminate() { - // TODO: Notify recompilation engine -} - -ppu_recompiler_llvm::ExecutionEngine::ExecutionEngine(PPUThread & ppu) - : m_ppu(ppu) - , m_interpreter(new PPUInterpreter(ppu)) - , m_decoder(m_interpreter) - , m_last_cache_clear_time(std::chrono::high_resolution_clock::now()) - , m_recompilation_engine(RecompilationEngine::GetInstance()) { -} - -ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() { + } else if (std::is_pointer::value) { + return m_ir_builder->getInt8PtrTy(); + } else { + assert(0); + } -} - -u32 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { - ExecuteFunction(&m_ppu, 0); - return 0; -} - -void ppu_recompiler_llvm::ExecutionEngine::RemoveUnusedEntriesFromCache() const { - auto now = std::chrono::high_resolution_clock::now(); - if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 10000) { - for (auto i = m_address_to_ordinal.begin(); i != m_address_to_ordinal.end();) { - auto tmp = i; - i++; - if (tmp->second.second == 0) { - m_address_to_ordinal.erase(tmp); - } else { - tmp->second.second = 0; - } - } - - m_last_cache_clear_time = now; - } -} - -Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Executable default_executable) const { - // Find the ordinal for the specified address and insert it to the cache - auto i = m_address_to_ordinal.find(address); - if (i == m_address_to_ordinal.end()) { - auto ordinal = m_recompilation_engine->GetOrdinal(address); - if (ordinal != 0xFFFFFFFF) { - i = m_address_to_ordinal.insert(m_address_to_ordinal.end(), std::make_pair(address, std::make_pair(ordinal, 0))); - } - } - - Executable executable = default_executable; - if (i != m_address_to_ordinal.end()) { - i->second.second++; - executable = m_recompilation_engine->GetExecutable(i->second.first); - } - - RemoveUnusedEntriesFromCache(); - return executable; -} - -u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(PPUThread * ppu_state, u64 context) { - auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder(); - execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0); - return ExecuteTillReturn(ppu_state, 0); -} - -u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_state, u64 context) { - auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder(); - auto terminate = false; - auto branch_type = BranchType::NonBranch; - - if (context) { - execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledFunction, context >> 32, context & 0xFFFFFFFF); - } - - while (!terminate && !Emu.IsStopped()) { - if (Emu.IsPaused()) { - std::this_thread::sleep_for(std::chrono::milliseconds(50)); - continue; - } - - auto executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteTillReturn); - if (executable != ExecuteTillReturn && executable != ExecuteFunction) { - auto entry = ppu_state->PC; - auto exit = (u32)executable(ppu_state, 0); - execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit); - if (exit == 0) { - terminate = true; - } - } else { - execution_engine->m_tracer.Trace(Tracer::TraceType::Instruction, ppu_state->PC, 0); - auto instruction = re32(vm::get_ref(ppu_state->PC)); - execution_engine->m_decoder.Decode(instruction); - branch_type = ppu_state->m_is_branch ? GetBranchTypeFromInstruction(instruction) : BranchType::NonBranch; - ppu_state->NextPc(4); - - switch (branch_type) { - case BranchType::Return: - execution_engine->m_tracer.Trace(Tracer::TraceType::Return, 0, 0); - terminate = true; - break; - case BranchType::FunctionCall: - execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); - executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction); - executable(ppu_state, 0); - break; - case BranchType::LocalBranch: - break; - case BranchType::NonBranch: - break; - default: - assert(0); - break; - } - } - } - - return 0; -} - -BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) { - auto type = BranchType::NonBranch; - auto field1 = instruction >> 26; - auto lk = instruction & 1; - - if (field1 == 16 || field1 == 18) { - type = lk ? BranchType::FunctionCall : BranchType::LocalBranch; - } else if (field1 == 19) { - u32 field2 = (instruction >> 1) & 0x3FF; - if (field2 == 16) { - type = lk ? BranchType::FunctionCall : BranchType::Return; - } else if (field2 == 528) { - type = lk ? BranchType::FunctionCall : BranchType::LocalBranch; - } - } - - return type; -} + return nullptr; +} + +template +Value * Compiler::Call(const char * name, Func function, Args... args) { + auto fn = m_module->getFunction(name); + if (!fn) { + std::vector fn_args_type = {args->getType()...}; + auto fn_type = FunctionType::get(CppToLlvmType(), fn_args_type, false); + fn = cast(m_module->getOrInsertFunction(name, fn_type)); + fn->setCallingConv(CallingConv::X86_64_Win64); + m_execution_engine->addGlobalMapping(fn, (void *&)function); + } + + std::vector fn_args = {args...}; + return m_ir_builder->CreateCall(fn, fn_args); +} + +llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_function) { + auto ordinal = m_recompilation_engine.AllocateOrdinal(address, is_function); + auto location_i64 = m_ir_builder->getInt64(m_recompilation_engine.GetAddressOfExecutableLookup() + (ordinal * sizeof(u64))); + auto location_i64_ptr = m_ir_builder->CreateIntToPtr(location_i64, m_ir_builder->getInt64Ty()->getPointerTo()); + auto executable_i64 = m_ir_builder->CreateLoad(location_i64_ptr); + auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_i64, m_compiled_function_type->getPointerTo()); + return m_ir_builder->CreateCall2(executable_ptr, m_state.args[CompileTaskState::Args::State], context_i64); +} + +void Compiler::CompilationError(const std::string & error) { + LOG_ERROR(PPU, "[0x%08X] %s", m_state.current_instruction_address, error.c_str()); + Emu.Pause(); +} + +void Compiler::InitRotateMask() { + for (u32 mb = 0; mb < 64; mb++) { + for (u32 me = 0; me < 64; me++) { + u64 mask = ((u64)-1 >> mb) ^ ((me >= 63) ? 0 : (u64)-1 >> (me + 1)); + s_rotate_mask[mb][me] = mb > me ? ~mask : mask; + } + } +} + +std::mutex RecompilationEngine::s_mutex; +std::shared_ptr RecompilationEngine::s_the_instance = nullptr; + +RecompilationEngine::RecompilationEngine() + : ThreadBase("PPU Recompilation Engine") + , m_log(nullptr) + , m_next_ordinal(0) + , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) { + m_compiler.RunAllTests(); +} + +RecompilationEngine::~RecompilationEngine() { + Stop(); +} + +u32 RecompilationEngine::AllocateOrdinal(u32 address, bool is_function) { + std::lock_guard lock(m_address_to_ordinal_lock); + + auto i = m_address_to_ordinal.find(address); + if (i == m_address_to_ordinal.end()) { + assert(m_next_ordinal < (sizeof(m_executable_lookup) / sizeof(m_executable_lookup[0]))); + + m_executable_lookup[m_next_ordinal] = is_function ? ExecutionEngine::ExecuteFunction : ExecutionEngine::ExecuteTillReturn; + std::atomic_thread_fence(std::memory_order_release); + i = m_address_to_ordinal.insert(m_address_to_ordinal.end(), std::make_pair(address, m_next_ordinal++)); + } + + return i->second; +} + +u32 RecompilationEngine::GetOrdinal(u32 address) const { + std::lock_guard lock(m_address_to_ordinal_lock); + + auto i = m_address_to_ordinal.find(address); + if (i != m_address_to_ordinal.end()) { + return i->second; + } else { + return 0xFFFFFFFF; + } +} + +const Executable RecompilationEngine::GetExecutable(u32 ordinal) const { + std::atomic_thread_fence(std::memory_order_acquire); + return m_executable_lookup[ordinal]; +} + +u64 RecompilationEngine::GetAddressOfExecutableLookup() const { + return (u64)m_executable_lookup; +} + +void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { + { + std::lock_guard lock(m_pending_execution_traces_lock); + m_pending_execution_traces.push_back(execution_trace); + } + + if (!IsAlive()) { + Start(); + } + + Notify(); + // TODO: Increase the priority of the recompilation engine thread +} + +raw_fd_ostream & RecompilationEngine::Log() { + if (!m_log) { + std::string error; + m_log = new raw_fd_ostream("PPULLVMRecompiler.log", error, sys::fs::F_Text); + m_log->SetUnbuffered(); + } + + return *m_log; +} + +void RecompilationEngine::Task() { + bool is_idling = false; + std::chrono::nanoseconds idling_time(0); + std::chrono::nanoseconds recompiling_time(0); + + auto start = std::chrono::high_resolution_clock::now(); + while (!TestDestroy() && !Emu.IsStopped()) { + bool work_done_this_iteration = false; + ExecutionTrace * execution_trace = nullptr; + + { + std::lock_guard lock(m_pending_execution_traces_lock); + + auto i = m_pending_execution_traces.begin(); + if (i != m_pending_execution_traces.end()) { + execution_trace = *i; + m_pending_execution_traces.erase(i); + } + } + + if (execution_trace) { + ProcessExecutionTrace(*execution_trace); + delete execution_trace; + work_done_this_iteration = true; + } + + if (!work_done_this_iteration) { + // TODO: Reduce the priority of the recompilation engine thread if its set to high priority + } else { + is_idling = false; + } + + if (is_idling) { + auto recompiling_start = std::chrono::high_resolution_clock::now(); + + // Recompile the function whose CFG has changed the most since the last time it was compiled + auto candidate = (BlockEntry *)nullptr; + size_t max_diff = 0; + for (auto block : m_block_table) { + if (block->IsFunction() && block->is_compiled) { + auto diff = block->cfg.GetSize() - block->last_compiled_cfg_size; + if (diff > max_diff) { + candidate = block; + max_diff = diff; + } + } + } + + if (candidate != nullptr) { + Log() << "Recompiling: " << candidate->ToString() << "\n"; + CompileBlock(*candidate); + work_done_this_iteration = true; + } + + auto recompiling_end = std::chrono::high_resolution_clock::now(); + recompiling_time += std::chrono::duration_cast(recompiling_end - recompiling_start); + } + + if (!work_done_this_iteration) { + is_idling = true; + + // Wait a few ms for something to happen + auto idling_start = std::chrono::high_resolution_clock::now(); + WaitForAnySignal(250); + auto idling_end = std::chrono::high_resolution_clock::now(); + idling_time += std::chrono::duration_cast(idling_end - idling_start); + } + } + + std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); + auto total_time = std::chrono::duration_cast(end - start); + auto compiler_stats = m_compiler.GetStats(); + + Log() << "Total time = " << total_time.count() / 1000000 << "ms\n"; + Log() << " Time spent compiling = " << compiler_stats.total_time.count() / 1000000 << "ms\n"; + Log() << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n"; + Log() << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n"; + Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; + Log() << " Time spent recompiling = " << recompiling_time.count() / 1000000 << "ms\n"; + Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; + Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; + Log() << "Ordinals allocated = " << m_next_ordinal << "\n"; + + LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting."); + s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this. +} + +void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) { + auto execution_trace_id = execution_trace.GetId(); + auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); + if (processed_execution_trace_i == m_processed_execution_traces.end()) { +#ifdef _DEBUG + Log() << "Trace: " << execution_trace.ToString() << "\n"; +#endif + // Find the function block + BlockEntry key(execution_trace.function_address, execution_trace.function_address); + auto block_i = m_block_table.find(&key); + if (block_i == m_block_table.end()) { + block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); + } + + auto function_block = *block_i; + block_i = m_block_table.end(); + auto split_trace = false; + std::vector tmp_block_list; + for (auto trace_i = execution_trace.entries.begin(); trace_i != execution_trace.entries.end(); trace_i++) { + if (trace_i->type == ExecutionTraceEntry::Type::CompiledBlock) { + block_i = m_block_table.end(); + split_trace = true; + } + + if (block_i == m_block_table.end()) { + BlockEntry key(trace_i->GetPrimaryAddress(), execution_trace.function_address); + block_i = m_block_table.find(&key); + if (block_i == m_block_table.end()) { + block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); + } + + tmp_block_list.push_back(*block_i); + } + + const ExecutionTraceEntry * next_trace = nullptr; + if (trace_i + 1 != execution_trace.entries.end()) { + next_trace = &(*(trace_i + 1)); + } else if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { + next_trace = &(*(execution_trace.entries.begin())); + } + + UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace); + if (*block_i != function_block) { + UpdateControlFlowGraph(function_block->cfg, *trace_i, next_trace); + } + } + + processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list))); + } + + for (auto i = processed_execution_trace_i->second.begin(); i != processed_execution_trace_i->second.end(); i++) { + if (!(*i)->is_compiled) { + (*i)->num_hits++; + if ((*i)->num_hits >= 1000) { // TODO: Make this configurable + CompileBlock(*(*i)); + } + } + } + + std::remove_if(processed_execution_trace_i->second.begin(), processed_execution_trace_i->second.end(), [](const BlockEntry * b)->bool { return b->is_compiled; }); +} + +void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry) { + if (this_entry.type == ExecutionTraceEntry::Type::Instruction) { + cfg.instruction_addresses.insert(this_entry.GetPrimaryAddress()); + + if (next_entry) { + if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { + if (next_entry->GetPrimaryAddress() != (this_entry.GetPrimaryAddress() + 4)) { + cfg.branches[this_entry.GetPrimaryAddress()].insert(next_entry->GetPrimaryAddress()); + } + } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { + cfg.calls[this_entry.data.instruction.address].insert(next_entry->GetPrimaryAddress()); + } + } + } else if (this_entry.type == ExecutionTraceEntry::Type::CompiledBlock) { + if (next_entry) { + if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { + cfg.branches[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); + } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { + cfg.calls[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); + } + } + } +} + +void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { +#ifdef _DEBUG + Log() << "Compile: " << block_entry.ToString() << "\n"; + Log() << "CFG: " << block_entry.cfg.ToString() << "\n"; +#endif + + auto ordinal = AllocateOrdinal(block_entry.cfg.start_address, block_entry.IsFunction()); + auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg, true, + block_entry.IsFunction() ? true : false /*generate_linkable_exits*/); + m_executable_lookup[ordinal] = executable; + block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize(); + block_entry.is_compiled = true; +} + +std::shared_ptr RecompilationEngine::GetInstance() { + std::lock_guard lock(s_mutex); + + if (s_the_instance == nullptr) { + s_the_instance = std::shared_ptr(new RecompilationEngine()); + } + + return s_the_instance; +} + +Tracer::Tracer() + : m_recompilation_engine(RecompilationEngine::GetInstance()) { + m_stack.reserve(100); +} + +Tracer::~Tracer() { + Terminate(); +} + +void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { + ExecutionTrace * execution_trace = nullptr; + + switch (trace_type) { + case TraceType::CallFunction: + // arg1 is address of the function + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::FunctionCall, arg1)); + break; + case TraceType::EnterFunction: + // arg1 is address of the function + m_stack.push_back(new ExecutionTrace(arg1)); + break; + case TraceType::ExitFromCompiledFunction: + // arg1 is address of function. + // arg2 is the address of the exit instruction. + if (arg2) { + m_stack.push_back(new ExecutionTrace(arg1)); + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); + } + break; + case TraceType::Return: + // No args used + execution_trace = m_stack.back(); + execution_trace->type = ExecutionTrace::Type::Linear; + m_stack.pop_back(); + break; + case TraceType::Instruction: + // arg1 is the address of the instruction + for (int i = (int)m_stack.back()->entries.size() - 1; i >= 0; i--) { + if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].data.instruction.address == arg1) || + (m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].data.compiled_block.entry_address == arg1)) { + // Found a loop + execution_trace = new ExecutionTrace(m_stack.back()->function_address); + execution_trace->type = ExecutionTrace::Type::Loop; + std::copy(m_stack.back()->entries.begin() + i, m_stack.back()->entries.end(), std::back_inserter(execution_trace->entries)); + m_stack.back()->entries.erase(m_stack.back()->entries.begin() + i + 1, m_stack.back()->entries.end()); + break; + } + } + + if (!execution_trace) { + // A loop was not found + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::Instruction, arg1)); + } + break; + case TraceType::ExitFromCompiledBlock: + // arg1 is address of the compiled block. + // arg2 is the address of the exit instruction. + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); + + if (arg2 == 0) { + // Return from function + execution_trace = m_stack.back(); + execution_trace->type = ExecutionTrace::Type::Linear; + m_stack.pop_back(); + } + break; + default: + assert(0); + break; + } + + if (execution_trace) { + m_recompilation_engine->NotifyTrace(execution_trace); + } +} + +void Tracer::Terminate() { + // TODO: Notify recompilation engine +} + +ppu_recompiler_llvm::ExecutionEngine::ExecutionEngine(PPUThread & ppu) + : m_ppu(ppu) + , m_interpreter(new PPUInterpreter(ppu)) + , m_decoder(m_interpreter) + , m_last_cache_clear_time(std::chrono::high_resolution_clock::now()) + , m_recompilation_engine(RecompilationEngine::GetInstance()) { +} + +ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() { + +} + +u32 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) { + ExecuteFunction(&m_ppu, 0); + return 0; +} + +void ppu_recompiler_llvm::ExecutionEngine::RemoveUnusedEntriesFromCache() const { + auto now = std::chrono::high_resolution_clock::now(); + if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 10000) { + for (auto i = m_address_to_ordinal.begin(); i != m_address_to_ordinal.end();) { + auto tmp = i; + i++; + if (tmp->second.second == 0) { + m_address_to_ordinal.erase(tmp); + } else { + tmp->second.second = 0; + } + } + + m_last_cache_clear_time = now; + } +} + +Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Executable default_executable) const { + // Find the ordinal for the specified address and insert it to the cache + auto i = m_address_to_ordinal.find(address); + if (i == m_address_to_ordinal.end()) { + auto ordinal = m_recompilation_engine->GetOrdinal(address); + if (ordinal != 0xFFFFFFFF) { + i = m_address_to_ordinal.insert(m_address_to_ordinal.end(), std::make_pair(address, std::make_pair(ordinal, 0))); + } + } + + Executable executable = default_executable; + if (i != m_address_to_ordinal.end()) { + i->second.second++; + executable = m_recompilation_engine->GetExecutable(i->second.first); + } + + RemoveUnusedEntriesFromCache(); + return executable; +} + +u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(PPUThread * ppu_state, u64 context) { + auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder(); + execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0); + return ExecuteTillReturn(ppu_state, 0); +} + +u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_state, u64 context) { + auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder(); + auto terminate = false; + auto branch_type = BranchType::NonBranch; + + if (context) { + execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledFunction, context >> 32, context & 0xFFFFFFFF); + } + + while (!terminate && !Emu.IsStopped()) { + if (Emu.IsPaused()) { + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + continue; + } + + auto executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteTillReturn); + if (executable != ExecuteTillReturn && executable != ExecuteFunction) { + auto entry = ppu_state->PC; + auto exit = (u32)executable(ppu_state, 0); + execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit); + if (exit == 0) { + terminate = true; + } + } else { + execution_engine->m_tracer.Trace(Tracer::TraceType::Instruction, ppu_state->PC, 0); + auto instruction = re32(vm::get_ref(ppu_state->PC)); + execution_engine->m_decoder.Decode(instruction); + branch_type = ppu_state->m_is_branch ? GetBranchTypeFromInstruction(instruction) : BranchType::NonBranch; + ppu_state->NextPc(4); + + switch (branch_type) { + case BranchType::Return: + execution_engine->m_tracer.Trace(Tracer::TraceType::Return, 0, 0); + terminate = true; + break; + case BranchType::FunctionCall: + execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); + executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction); + executable(ppu_state, 0); + break; + case BranchType::LocalBranch: + break; + case BranchType::NonBranch: + break; + default: + assert(0); + break; + } + } + } + + return 0; +} + +BranchType ppu_recompiler_llvm::GetBranchTypeFromInstruction(u32 instruction) { + auto type = BranchType::NonBranch; + auto field1 = instruction >> 26; + auto lk = instruction & 1; + + if (field1 == 16 || field1 == 18) { + type = lk ? BranchType::FunctionCall : BranchType::LocalBranch; + } else if (field1 == 19) { + u32 field2 = (instruction >> 1) & 0x3FF; + if (field2 == 16) { + type = lk ? BranchType::FunctionCall : BranchType::Return; + } else if (field2 == 528) { + type = lk ? BranchType::FunctionCall : BranchType::LocalBranch; + } + } + + return type; +}