PPU: Implemented more instructions

This commit is contained in:
S Gopal Rajagopal 2014-12-01 02:09:48 +05:30
parent ce0f713f03
commit 6cc6ca9f3c
4 changed files with 308 additions and 73 deletions

View File

@ -893,7 +893,7 @@ private:
// and between different executions on the same implementation.
for (uint w = 0; w < 4; w++)
{
CPU.VPR[vd]._f[w] = log2(CPU.VPR[vb]._f[w]);
CPU.VPR[vd]._f[w] = log2f(CPU.VPR[vb]._f[w]);
}
}
void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb)
@ -2824,7 +2824,7 @@ private:
return;
}
const u8 eb = (addr & 0xf) >> 1;
vm::write16((u32)addr & 0xFFFFFFFE, CPU.VPR[vs]._u16[7 - eb]);
vm::write16((u32)addr, CPU.VPR[vs]._u16[7 - eb]);
}
void STDUX(u32 rs, u32 ra, u32 rb)
{
@ -2860,7 +2860,7 @@ private:
return;
}
const u8 eb = (addr & 0xf) >> 2;
vm::write32((u32)addr & 0xFFFFFFFC, CPU.VPR[vs]._u32[3 - eb]);
vm::write32((u32)addr, CPU.VPR[vs]._u32[3 - eb]);
}
void ADDZE(u32 rd, u32 ra, u32 oe, bool rc)
{

View File

@ -104,8 +104,6 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph &
auto arg_i = m_state.function->arg_begin();
arg_i->setName("ppu_state");
m_state.args[CompileTaskState::Args::State] = arg_i;
(++arg_i)->setName("interpreter");
m_state.args[CompileTaskState::Args::Interpreter] = arg_i;
(++arg_i)->setName("context");
m_state.args[CompileTaskState::Args::Context] = arg_i;
@ -178,7 +176,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph &
m_ir_builder->SetInsertPoint(then_bb);
context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty());
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32);
m_ir_builder->CreateCall3(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64);
m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64);
m_ir_builder->CreateBr(merge_bb);
m_ir_builder->SetInsertPoint(merge_bb);
@ -204,7 +202,7 @@ Executable Compiler::Compile(const std::string & name, const ControlFlowGraph &
m_ir_builder->SetInsertPoint(then_bb);
auto context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty());
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32);
m_ir_builder->CreateCall3(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64);
m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64);
m_ir_builder->CreateBr(merge_bb);
m_ir_builder->SetInsertPoint(merge_bb);
@ -1098,23 +1096,21 @@ void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) {
auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32);
auto tmp_v8i64 = m_ir_builder->CreateZExt(tmp_v8i32, VectorType::get(m_ir_builder->getInt64Ty(), 8));
auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8));
u32 mask1_v4i32[4] = {0, 2, 4, 6};
auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
u32 mask2_v4i32[4] = {1, 3, 5, 7};
auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
auto tmp1_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
auto tmp2_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
auto vc_v4i32 = GetVrAsIntVec(vc, 32);
auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32);
auto cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, tmp1_v4i32);
auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32);
res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32);
cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vc_v4i32);
cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32);
auto vc_v4i64 = m_ir_builder->CreateZExt(vc_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
auto res_v4i64 = m_ir_builder->CreateAdd(tmp1_v4i64, tmp2_v4i64);
res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vc_v4i64);
auto gt_v4i1 = m_ir_builder->CreateICmpUGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF)));
auto gt_v4i64 = m_ir_builder->CreateSExt(gt_v4i1, VectorType::get(m_ir_builder->getInt64Ty(), 4));
res_v4i64 = m_ir_builder->CreateOr(res_v4i64, gt_v4i64);
auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4));
SetVr(vd, res_v4i32);
// TODO: Set VSCR.SAT
@ -1722,23 +1718,130 @@ void Compiler::VSUBUWS(u32 vd, u32 va, u32 vb) {
}
void Compiler::VSUMSWS(u32 vd, u32 va, u32 vb) {
InterpreterCall("VSUMSWS", &PPUInterpreter::VSUMSWS, vd, va, vb);
auto va_v4i32 = GetVrAsIntVec(va, 32);
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
auto res_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(3));
auto res_i64 = m_ir_builder->CreateSExt(res_i32, m_ir_builder->getInt64Ty());
for (auto i = 0; i < 4; i++) {
auto va_i32 = m_ir_builder->CreateExtractElement(va_v4i32, m_ir_builder->getInt32(i));
auto va_i64 = m_ir_builder->CreateSExt(va_i32, m_ir_builder->getInt64Ty());
res_i64 = m_ir_builder->CreateAdd(res_i64, va_i64);
}
auto gt_i1 = m_ir_builder->CreateICmpSGT(res_i64, m_ir_builder->getInt64(0x7FFFFFFFull));
auto lt_i1 = m_ir_builder->CreateICmpSLT(res_i64, m_ir_builder->getInt64(0xFFFFFFFF80000000ull));
res_i64 = m_ir_builder->CreateSelect(gt_i1, m_ir_builder->getInt64(0x7FFFFFFFull), res_i64);
res_i64 = m_ir_builder->CreateSelect(lt_i1, m_ir_builder->getInt64(0xFFFFFFFF80000000ull), res_i64);
auto res_i128 = m_ir_builder->CreateZExt(res_i64, m_ir_builder->getIntNTy(128));
SetVr(vd, res_i128);
// TODO: Set VSCR.SAT
}
void Compiler::VSUM2SWS(u32 vd, u32 va, u32 vb) {
InterpreterCall("VSUM2SWS", &PPUInterpreter::VSUM2SWS, vd, va, vb);
auto va_v4i32 = GetVrAsIntVec(va, 32);
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
u32 mask1_v2i32[2] = { 0, 2 };
u32 mask2_v2i32[2] = { 1, 3 };
auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
auto va1_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32));
auto va2_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v2i32));
auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
auto vb_v2i64 = m_ir_builder->CreateShuffleVector(vb_v4i64, UndefValue::get(vb_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32));
auto res_v2i64 = m_ir_builder->CreateAdd(va1_v2i64, va2_v2i64);
res_v2i64 = m_ir_builder->CreateAdd(res_v2i64, vb_v2i64);
auto gt_v2i1 = m_ir_builder->CreateICmpSGT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull)));
auto lt_v2i1 = m_ir_builder->CreateICmpSLT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0xFFFFFFFF80000000ull)));
res_v2i64 = m_ir_builder->CreateSelect(gt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v2i64);
res_v2i64 = m_ir_builder->CreateSelect(lt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x80000000ull)), res_v2i64);
SetVr(vd, res_v2i64);
// TODO: Set VSCR.SAT
}
void Compiler::VSUM4SBS(u32 vd, u32 va, u32 vb) {
InterpreterCall("VSUM4SBS", &PPUInterpreter::VSUM4SBS, vd, va, vb);
auto va_v16i8 = GetVrAsIntVec(va, 8);
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
u32 mask1_v4i32[4] = { 0, 4, 8, 12 };
u32 mask2_v4i32[4] = { 1, 5, 9, 13 };
u32 mask3_v4i32[4] = { 2, 6, 10, 14 };
u32 mask4_v4i32[4] = { 3, 7, 11, 15 };
auto va_v16i64 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt64Ty(), 16));
auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
auto va3_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32));
auto va4_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32));
auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64);
res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va3_v4i64);
res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va4_v4i64);
res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64);
auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)));
auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull)));
res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64);
res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64);
auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4));
SetVr(vd, res_v4i32);
// TODO: Set VSCR.SAT
}
void Compiler::VSUM4SHS(u32 vd, u32 va, u32 vb) {
InterpreterCall("VSUM4SHS", &PPUInterpreter::VSUM4SHS, vd, va, vb);
auto va_v8i16 = GetVrAsIntVec(va, 16);
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
u32 mask1_v4i32[4] = { 0, 2, 4, 6 };
u32 mask2_v4i32[4] = { 1, 3, 5, 7 };
auto va_v8i64 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt64Ty(), 8));
auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64);
res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64);
auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)));
auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull)));
res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64);
res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64);
auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4));
SetVr(vd, res_v4i32);
// TODO: Set VSCR.SAT
}
void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) {
InterpreterCall("VSUM4UBS", &PPUInterpreter::VSUM4UBS, vd, va, vb);
auto va_v16i8 = GetVrAsIntVec(va, 8);
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
u32 mask1_v4i32[4] = { 0, 4, 8, 12 };
u32 mask2_v4i32[4] = { 1, 5, 9, 13 };
u32 mask3_v4i32[4] = { 2, 6, 10, 14 };
u32 mask4_v4i32[4] = { 3, 7, 11, 15 };
auto va1_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
auto va1_v4i32 = m_ir_builder->CreateZExt(va1_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4));
auto va2_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
auto va2_v4i32 = m_ir_builder->CreateZExt(va2_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4));
auto va3_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32));
auto va3_v4i32 = m_ir_builder->CreateZExt(va3_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4));
auto va4_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32));
auto va4_v4i32 = m_ir_builder->CreateZExt(va4_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4));
auto res_v4i32 = m_ir_builder->CreateAdd(va1_v4i32, va2_v4i32);
res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va3_v4i32);
res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va4_v4i32);
res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vb_v4i32);
auto lt_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vb_v4i32);
auto lt_v4i32 = m_ir_builder->CreateSExt(lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
res_v4i32 = m_ir_builder->CreateOr(lt_v4i32, res_v4i32);
SetVr(vd, res_v4i32);
// TODO: Set VSCR.SAT
}
void Compiler::VUPKHPX(u32 vd, u32 vb) {
@ -2816,7 +2919,45 @@ void Compiler::STDX(u32 rs, u32 ra, u32 rb) {
}
void Compiler::STWCX_(u32 rs, u32 ra, u32 rb) {
InterpreterCall("STWCX_", &PPUInterpreter::STWCX_, rs, ra, rb);
auto addr_i64 = GetGpr(rb);
if (ra) {
auto ra_i64 = GetGpr(ra);
addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
}
auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_ADDR));
auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
auto resv_addr_i64 = (Value *)m_ir_builder->CreateAlignedLoad(resv_addr_i64_ptr, 8);
auto cmp_i1 = m_ir_builder->CreateICmpEQ(addr_i64, resv_addr_i64);
auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then");
auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "else");
auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge");
m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb);
m_ir_builder->SetInsertPoint(then_bb);
auto rs_i32 = GetGpr(rs, 32);
rs_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, rs_i32->getType()), rs_i32);
resv_addr_i64 = m_ir_builder->CreateAdd(resv_addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr<u8>(0)));
auto resv_addr_val_i32_ptr = m_ir_builder->CreateIntToPtr(resv_addr_i64, m_ir_builder->getInt32Ty()->getPointerTo());
auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_VALUE));
auto resv_val_i32_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo());
auto resv_val_i32 = m_ir_builder->CreateAlignedLoad(resv_val_i32_ptr, 8);
auto res_s = m_ir_builder->CreateAtomicCmpXchg(resv_addr_val_i32_ptr, resv_val_i32, rs_i32, AtomicOrdering::AcquireRelease, AtomicOrdering::Monotonic);
auto success_i1 = m_ir_builder->CreateExtractValue(res_s, {1});
auto cr_i32 = GetCr();
cr_i32 = SetBit(cr_i32, 2, success_i1);
SetCr(cr_i32);
m_ir_builder->CreateAlignedStore(m_ir_builder->getInt64(0), resv_addr_i64_ptr, 8);
m_ir_builder->CreateBr(merge_bb);
m_ir_builder->SetInsertPoint(else_bb);
cr_i32 = GetCr();
cr_i32 = ClrBit(cr_i32, 2);
SetCr(cr_i32);
m_ir_builder->CreateBr(merge_bb);
m_ir_builder->SetInsertPoint(merge_bb);
}
void Compiler::STWX(u32 rs, u32 ra, u32 rb) {
@ -2919,7 +3060,45 @@ void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) {
}
void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) {
InterpreterCall("STDCX_", &PPUInterpreter::STDCX_, rs, ra, rb);
auto addr_i64 = GetGpr(rb);
if (ra) {
auto ra_i64 = GetGpr(ra);
addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
}
auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_ADDR));
auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
auto resv_addr_i64 = (Value *)m_ir_builder->CreateAlignedLoad(resv_addr_i64_ptr, 8);
auto cmp_i1 = m_ir_builder->CreateICmpEQ(addr_i64, resv_addr_i64);
auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then");
auto else_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "else");
auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge");
m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb);
m_ir_builder->SetInsertPoint(then_bb);
auto rs_i64 = GetGpr(rs, 64);
rs_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, rs_i64->getType()), rs_i64);
resv_addr_i64 = m_ir_builder->CreateAdd(resv_addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr<u8>(0)));
auto resv_addr_val_i64_ptr = m_ir_builder->CreateIntToPtr(resv_addr_i64, m_ir_builder->getInt64Ty()->getPointerTo());
auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, R_VALUE));
auto resv_val_i64_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
auto resv_val_i64 = m_ir_builder->CreateAlignedLoad(resv_val_i64_ptr, 8);
auto res_s = m_ir_builder->CreateAtomicCmpXchg(resv_addr_val_i64_ptr, resv_val_i64, rs_i64, AtomicOrdering::AcquireRelease, AtomicOrdering::Monotonic);
auto success_i1 = m_ir_builder->CreateExtractValue(res_s, {1});
auto cr_i32 = GetCr();
cr_i32 = SetBit(cr_i32, 2, success_i1);
SetCr(cr_i32);
m_ir_builder->CreateAlignedStore(m_ir_builder->getInt64(0), resv_addr_i64_ptr, 8);
m_ir_builder->CreateBr(merge_bb);
m_ir_builder->SetInsertPoint(else_bb);
cr_i32 = GetCr();
cr_i32 = ClrBit(cr_i32, 2);
SetCr(cr_i32);
m_ir_builder->CreateBr(merge_bb);
m_ir_builder->SetInsertPoint(merge_bb);
}
void Compiler::STBX(u32 rs, u32 ra, u32 rb) {
@ -3414,7 +3593,7 @@ void Compiler::LDBRX(u32 rd, u32 ra, u32 rb) {
}
void Compiler::LSWX(u32 rd, u32 ra, u32 rb) {
InterpreterCall("LSWX", &PPUInterpreter::LSWX, rd, ra, rb);
CompilationError("LSWX");
}
void Compiler::LWBRX(u32 rd, u32 ra, u32 rb) {
@ -3543,11 +3722,32 @@ void Compiler::LFDUX(u32 frd, u32 ra, u32 rb) {
}
void Compiler::STVLX(u32 vs, u32 ra, u32 rb) {
InterpreterCall("STVLX", &PPUInterpreter::STVLX, vs, ra, rb);
auto addr_i64 = GetGpr(rb);
if (ra) {
auto ra_i64 = GetGpr(ra);
addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
}
auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf);
auto size_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), index_i64);
addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF);
addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr<u8>(0)));
auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy());
auto vs_i128 = GetVr(vs);
vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128);
auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType());
vs_i128_ptr->setAlignment(16);
m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16);
auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy());
Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() };
m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types),
addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false));
}
void Compiler::STSWX(u32 rs, u32 ra, u32 rb) {
InterpreterCall("STSWX", &PPUInterpreter::STSWX, rs, ra, rb);
CompilationError("STSWX");
}
void Compiler::STWBRX(u32 rs, u32 ra, u32 rb) {
@ -3572,7 +3772,29 @@ void Compiler::STFSX(u32 frs, u32 ra, u32 rb) {
}
void Compiler::STVRX(u32 vs, u32 ra, u32 rb) {
InterpreterCall("STVRX", &PPUInterpreter::STVRX, vs, ra, rb);
auto addr_i64 = GetGpr(rb);
if (ra) {
auto ra_i64 = GetGpr(ra);
addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
}
auto size_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf);
auto index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), size_i64);
addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFF0);
addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr<u8>(0)));
auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy());
auto vs_i128 = GetVr(vs);
vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128);
auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType());
vs_i128_ptr->setAlignment(16);
m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16);
auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy());
vs_i8_ptr = m_ir_builder->CreateGEP(vs_i8_ptr, index_i64);
Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() };
m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types),
addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false));
}
void Compiler::STFSUX(u32 frs, u32 ra, u32 rb) {
@ -4306,7 +4528,7 @@ void Compiler::STDU(u32 rs, u32 ra, s32 ds) {
void Compiler::MTFSB1(u32 crbd, bool rc) {
auto fpscr_i32 = GetFpscr();
SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false);
fpscr_i32 = SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false);
SetFpscr(fpscr_i32);
if (rc) {
@ -5241,7 +5463,7 @@ void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool
auto switch_instr = m_ir_builder->CreateSwitch(target_i32, unknown_function_block);
m_ir_builder->SetInsertPoint(unknown_function_block);
m_ir_builder->CreateCall3(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], m_ir_builder->getInt64(0));
m_ir_builder->CreateCall2(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_ir_builder->getInt64(0));
m_ir_builder->CreateBr(next_block);
auto call_i = m_state.cfg->calls.find(m_state.current_instruction_address);
@ -5370,18 +5592,6 @@ void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool
}
}
template<class Func, class... Args>
Value * Compiler::InterpreterCall(const char * name, Func function, Args... args) {
auto i = m_stats.interpreter_fallback_stats.find(name);
if (i == m_stats.interpreter_fallback_stats.end()) {
i = m_stats.interpreter_fallback_stats.insert(m_stats.interpreter_fallback_stats.end(), std::make_pair<std::string, u64>(name, 0));
}
i->second++;
return Call<void>(name, function, m_state.args[CompileTaskState::Args::Interpreter], m_ir_builder->getInt32(args)...);
}
template<class T>
Type * Compiler::CppToLlvmType() {
if (std::is_void<T>::value) {
@ -5428,7 +5638,7 @@ llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_f
auto location_i64_ptr = m_ir_builder->CreateIntToPtr(location_i64, m_ir_builder->getInt64Ty()->getPointerTo());
auto executable_i64 = m_ir_builder->CreateLoad(location_i64_ptr);
auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_i64, m_compiled_function_type->getPointerTo());
return m_ir_builder->CreateCall3(executable_ptr, m_state.args[CompileTaskState::Args::State], m_state.args[CompileTaskState::Args::Interpreter], context_i64);
return m_ir_builder->CreateCall2(executable_ptr, m_state.args[CompileTaskState::Args::State], context_i64);
}
void Compiler::CompilationError(const std::string & error) {
@ -5601,10 +5811,6 @@ void RecompilationEngine::Task() {
Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n";
Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n";
Log() << "Ordinals allocated = " << m_next_ordinal << "\n";
Log() << "\nInterpreter fallback stats:\n";
for (auto i = compiler_stats.interpreter_fallback_stats.begin(); i != compiler_stats.interpreter_fallback_stats.end(); i++) {
Log() << i->first << " = " << i->second << "\n";
}
LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting.");
s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this.
@ -5813,7 +6019,7 @@ ppu_recompiler_llvm::ExecutionEngine::~ExecutionEngine() {
}
u8 ppu_recompiler_llvm::ExecutionEngine::DecodeMemory(const u32 address) {
ExecuteFunction(&m_ppu, m_interpreter, 0);
ExecuteFunction(&m_ppu, 0);
return 0;
}
@ -5854,13 +6060,13 @@ Executable ppu_recompiler_llvm::ExecutionEngine::GetExecutable(u32 address, Exec
return executable;
}
u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context) {
u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteFunction(PPUThread * ppu_state, u64 context) {
auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder();
execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0);
return ExecuteTillReturn(ppu_state, interpreter, 0);
return ExecuteTillReturn(ppu_state, 0);
}
u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context) {
u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_state, u64 context) {
auto execution_engine = (ExecutionEngine *)ppu_state->GetDecoder();
auto terminate = false;
auto branch_type = BranchType::NonBranch;
@ -5878,7 +6084,7 @@ u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_stat
auto executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteTillReturn);
if (executable != ExecuteTillReturn && executable != ExecuteFunction) {
auto entry = ppu_state->PC;
auto exit = (u32)executable(ppu_state, interpreter, 0);
auto exit = (u32)executable(ppu_state, 0);
execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit);
if (exit == 0) {
terminate = true;
@ -5898,7 +6104,7 @@ u32 ppu_recompiler_llvm::ExecutionEngine::ExecuteTillReturn(PPUThread * ppu_stat
case BranchType::FunctionCall:
execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0);
executable = execution_engine->GetExecutable(ppu_state->PC, ExecuteFunction);
executable(ppu_state, interpreter, 0);
executable(ppu_state, 0);
break;
case BranchType::LocalBranch:
break;

View File

@ -253,7 +253,7 @@ namespace ppu_recompiler_llvm {
};
/// Pointer to an executable
typedef u32(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context);
typedef u32(*Executable)(PPUThread * ppu_state, u64 context);
/// PPU compiler that uses LLVM for code generation and optimization
class Compiler : protected PPUOpcodes, protected PPCDecoder {
@ -270,9 +270,6 @@ namespace ppu_recompiler_llvm {
/// Total time
std::chrono::nanoseconds total_time;
/// Contains the number of times interpreter fallback was used
std::map<std::string, u64> interpreter_fallback_stats;
};
Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, const Executable execute_unknown_block);
@ -705,7 +702,6 @@ namespace ppu_recompiler_llvm {
struct CompileTaskState {
enum Args {
State,
Interpreter,
Context,
MaxArgs,
};
@ -914,10 +910,6 @@ namespace ppu_recompiler_llvm {
/// Write to memory
void WriteMemory(llvm::Value * addr_i64, llvm::Value * val_ix, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true);
/// Call an interpreter function
template<class Func, class... Args>
llvm::Value * InterpreterCall(const char * name, Func function, Args... args);
/// Convert a C++ type to an LLVM type
template<class T>
llvm::Type * CppToLlvmType();
@ -1166,10 +1158,10 @@ namespace ppu_recompiler_llvm {
Executable GetExecutable(u32 address, Executable default_executable) const;
/// Execute a function
static u32 ExecuteFunction(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context);
static u32 ExecuteFunction(PPUThread * ppu_state, u64 context);
/// Execute till the current function returns
static u32 ExecuteTillReturn(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context);
static u32 ExecuteTillReturn(PPUThread * ppu_state, u64 context);
};
/// Get the branch type from a branch instruction

View File

@ -243,8 +243,6 @@ void Compiler::RunTest(const char * name, std::function<void()> test_case, std::
auto arg_i = m_state.function->arg_begin();
arg_i->setName("ppu_state");
m_state.args[CompileTaskState::Args::State] = arg_i;
(++arg_i)->setName("interpreter");
m_state.args[CompileTaskState::Args::Interpreter] = arg_i;
(++arg_i)->setName("context");
m_state.args[CompileTaskState::Args::Context] = arg_i;
m_state.current_instruction_address = s_ppu_state->PC;
@ -265,7 +263,7 @@ void Compiler::RunTest(const char * name, std::function<void()> test_case, std::
std::string verify_results;
raw_string_ostream verify_results_ostream(verify_results);
if (verifyFunction(*m_state.function, &verify_results_ostream)) {
m_recompilation_engine.Log() << "Verification Failed:\n" << verify_results;
m_recompilation_engine.Log() << "Verification Failed:\n" << verify_results << '\n';
return;
}
@ -298,7 +296,7 @@ void Compiler::RunTest(const char * name, std::function<void()> test_case, std::
// Run the test
input();
auto executable = (Executable)m_execution_engine->getPointerToFunction(m_state.function);
executable(s_ppu_state, s_interpreter, 0);
executable(s_ppu_state, 0);
// Verify results
std::string msg;
@ -497,6 +495,11 @@ void Compiler::RunAllTests() {
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUMSWS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM2SWS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4SBS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4SHS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4UBS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHPX, 0, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSB, 0, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSH, 0, 5, 0, 1);
@ -672,6 +675,27 @@ void Compiler::RunAllTests() {
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUBS, 0, 5, 0, 1, 2, 3, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUBS, 0, 5, 0, 1, 2, 3, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADDS, 0, 5, 0, 1, 2, 3, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 0, 5, 0, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 5, 5, 3, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 10, 5, 25, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 15, 5, 31, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 0, 5, 0, 7);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 5, 5, 7, 0);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 10, 5, 5, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 15, 5, 5, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 0, 5, 0, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 5, 5, 3, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 10, 5, 25, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 15, 5, 31, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 0, 5, 0, 1, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 5, 5, 2, 6, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 10, 5, 5, 11, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 15, 5, 7, 14, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFFS, 0, 5, 0, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 0, 5, 0, 0, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 5, 5, 2, 0, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 10, 5, 5, 0, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 15, 5, 7, 0, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCMPU, 0, 5, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSP, 0, 5, 0, 1, false);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIW, 0, 5, 0, 1, false);
@ -697,9 +721,12 @@ void Compiler::RunAllTests() {
PPUState input;
input.SetRandom(0x10000);
input.GPR[14] = 10;
input.GPR[21] = 15;
input.GPR[23] = 0x10000;
input.GPR[14] = 10;
input.GPR[21] = 15;
input.GPR[23] = 0x10000;
input.R_ADDR = 0x10000;
input.R_VALUE = 0x1122334455667788;
input.mem_block[0] = 0x8877665544332211;
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 0, input, 5, 0, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 1, input, 5, 14, 0x10000);
@ -792,6 +819,8 @@ void Compiler::RunAllTests() {
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 0, input, 3, 0, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 1, input, 3, 14, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBU, 0, input, 3, 14, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDCX_, 0, input, 3, 0, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDCX_, 1, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 0, input, 3, 0, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 1, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBUX, 0, input, 3, 14, 23);
@ -810,18 +839,26 @@ void Compiler::RunAllTests() {
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 0, input, 3, 0, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 1, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWUX, 0, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 0, input, 0, 0, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 1, input, 0, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 2, input, 0, 21, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWBRX, 0, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 0, input, 3, 0, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 1, input, 3, 14, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDU, 0, input, 3, 14, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 0, input, 3, 0, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 1, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWCX_, 0, input, 3, 0, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWCX_, 1, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDUX, 0, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 0, input, 3, 0, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 1, input, 3, 14, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSU, 0, input, 3, 14, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 0, input, 3, 0, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 1, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 0, input, 0, 0, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 1, input, 0, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 2, input, 0, 21, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSUX, 0, input, 3, 14, 23);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 0, input, 3, 0, 0x10000);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 1, input, 3, 14, 0x10000);