Added support for 3 more instructions

This commit is contained in:
S Gopal Rajagopal 2014-09-02 15:11:32 +05:30
parent 659a3d88c6
commit d8d0c0d2fe
3 changed files with 169 additions and 36 deletions

View File

@ -22,11 +22,13 @@ PPULLVMRecompiler::PPULLVMRecompiler(PPUThread & ppu)
m_module = new Module("Module", m_llvm_context);
m_gpr = new GlobalVariable(*m_module, ArrayType::get(Type::getInt64Ty(m_llvm_context), 32), false, GlobalValue::ExternalLinkage, nullptr, "gpr");
m_cr = new GlobalVariable(*m_module, Type::getInt32Ty(m_llvm_context), false, GlobalValue::ExternalLinkage, nullptr, "cr");
m_vpr = new GlobalVariable(*m_module, ArrayType::get(Type::getIntNTy(m_llvm_context, 128), 32), false, GlobalValue::ExternalLinkage, nullptr, "vpr");
m_vscr = new GlobalVariable(*m_module, Type::getInt32Ty(m_llvm_context), false, GlobalValue::ExternalLinkage, nullptr, "vscr");
m_execution_engine = EngineBuilder(m_module).create();
m_execution_engine->addGlobalMapping(m_gpr, m_ppu.GPR);
m_execution_engine->addGlobalMapping(m_cr, &m_ppu.CR);
m_execution_engine->addGlobalMapping(m_vpr, m_ppu.VPR);
m_execution_engine->addGlobalMapping(m_vscr, &m_ppu.VSCR);
@ -333,57 +335,62 @@ void PPULLVMRecompiler::VAVGUW(u32 vd, u32 va, u32 vb) {
}
void PPULLVMRecompiler::VCFSX(u32 vd, u32 uimm5, u32 vb) {
u32 scale = 1 << uimm5;
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
auto res_v4f32 = m_ir_builder.CreateSIToFP(vb_v4i32, VectorType::get(Type::getFloatTy(m_llvm_context), 4));
for (uint w = 0; w < 4; w++) {
m_ppu.VPR[vd]._f[w] = ((float)m_ppu.VPR[vb]._s32[w]) / scale;
if (uimm5) {
float scale = (float)(1 << uimm5);
float scale_v4f32[4] = {scale, scale, scale, scale};
res_v4f32 = m_ir_builder.CreateFDiv(res_v4f32, ConstantDataVector::get(m_llvm_context, scale_v4f32));
}
SetVr(vd, res_v4f32);
}
void PPULLVMRecompiler::VCFUX(u32 vd, u32 uimm5, u32 vb) {
u32 scale = 1 << uimm5;
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
auto res_v4f32 = m_ir_builder.CreateUIToFP(vb_v4i32, VectorType::get(Type::getFloatTy(m_llvm_context), 4));
for (uint w = 0; w < 4; w++) {
m_ppu.VPR[vd]._f[w] = ((float)m_ppu.VPR[vb]._u32[w]) / scale;
if (uimm5) {
float scale = (float)(1 << uimm5);
float scale_v4f32[4] = {scale, scale, scale, scale};
res_v4f32 = m_ir_builder.CreateFDiv(res_v4f32, ConstantDataVector::get(m_llvm_context, scale_v4f32));
}
SetVr(vd, res_v4f32);
}
void PPULLVMRecompiler::VCMPBFP(u32 vd, u32 va, u32 vb) {
//for (uint w = 0; w < 4; w++)
//{
// u32 mask = 0;
auto va_v4f32 = GetVrAsFloatVec(va);
auto vb_v4f32 = GetVrAsFloatVec(vb);
auto cmp_gt_v4i1 = m_ir_builder.CreateFCmpOGT(va_v4f32, vb_v4f32);
vb_v4f32 = m_ir_builder.CreateFNeg(vb_v4f32);
auto cmp_lt_v4i1 = m_ir_builder.CreateFCmpOLT(va_v4f32, vb_v4f32);
auto cmp_gt_v4i32 = m_ir_builder.CreateZExt(cmp_gt_v4i1, VectorType::get(Type::getInt32Ty(m_llvm_context), 4));
auto cmp_lt_v4i32 = m_ir_builder.CreateZExt(cmp_lt_v4i1, VectorType::get(Type::getInt32Ty(m_llvm_context), 4));
cmp_gt_v4i32 = m_ir_builder.CreateShl(cmp_gt_v4i32, 31);
cmp_lt_v4i32 = m_ir_builder.CreateShl(cmp_lt_v4i32, 30);
auto res_v4i32 = m_ir_builder.CreateOr(cmp_gt_v4i32, cmp_lt_v4i32);
SetVr(vd, res_v4i32);
// const float A = CheckVSCR_NJ(m_ppu.VPR[va]._f[w]);
// const float B = CheckVSCR_NJ(m_ppu.VPR[vb]._f[w]);
// if (A > B) mask |= 1 << 31;
// if (A < -B) mask |= 1 << 30;
// m_ppu.VPR[vd]._u32[w] = mask;
//}
// TODO: Implement NJ mode
}
void PPULLVMRecompiler::VCMPBFP_(u32 vd, u32 va, u32 vb) {
//bool allInBounds = true;
VCMPBFP(vd, va, vb);
//for (uint w = 0; w < 4; w++)
//{
// u32 mask = 0;
// const float A = CheckVSCR_NJ(m_ppu.VPR[va]._f[w]);
// const float B = CheckVSCR_NJ(m_ppu.VPR[vb]._f[w]);
// if (A > B) mask |= 1 << 31;
// if (A < -B) mask |= 1 << 30;
// m_ppu.VPR[vd]._u32[w] = mask;
// if (mask)
// allInBounds = false;
//}
//// Bit n°2 of CR6
//m_ppu.SetCRBit(6, 0x2, allInBounds);
auto vd_v16i8 = GetVrAsIntVec(vd, 8);
u8 mask_v16i8[16] = {3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
vd_v16i8 = m_ir_builder.CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), vd_v16i8, ConstantDataVector::get(m_llvm_context, mask_v16i8));
auto vd_v4i32 = m_ir_builder.CreateBitCast(vd_v16i8, VectorType::get(Type::getInt32Ty(m_llvm_context), 4));
auto vd_mask_i32 = m_ir_builder.CreateExtractElement(vd_v4i32, m_ir_builder.getInt32(0));
auto cmp_i1 = m_ir_builder.CreateICmpEQ(vd_mask_i32, m_ir_builder.getInt32(0));
auto cmp_i32 = m_ir_builder.CreateZExt(cmp_i1, Type::getInt32Ty(m_llvm_context));
cmp_i32 = m_ir_builder.CreateShl(cmp_i32, 5);
auto cr_i32 = (Value *)m_ir_builder.CreateLoad(m_cr);
cr_i32 = m_ir_builder.CreateAnd(cr_i32, 0xFFFFFF0F);
cr_i32 = m_ir_builder.CreateOr(cr_i32, cmp_i32);
m_ir_builder.CreateStore(cr_i32, m_cr);
}
void PPULLVMRecompiler::VCMPEQFP(u32 vd, u32 va, u32 vb) {

View File

@ -451,6 +451,9 @@ private:
/// Global variable in m_module that corresponds to m_ppu.GPR
llvm::GlobalVariable * m_gpr;
/// Global variable in m_module that corresponds to m_ppu.CR
llvm::GlobalVariable * m_cr;
/// Global variable in m_module that corresponds to m_ppu.VPR
llvm::GlobalVariable * m_vpr;

View File

@ -438,4 +438,127 @@ void PPULLVMRecompiler::RunAllTests() {
return m_ppu.VPR[0].Equals((u32)0x4DF01234, (u32)0x4DF01234, (u32)0x4DF01234, (u32)0x4DF01234);
};
RunTest("VAVGUW.1", test_case, input, check_result);
///////////////////////////////////////////////////////////////////////////
test_case = [this]() {
VCFSX(0, 0, 1);
};
input = [this]() {
m_ppu.VPR[0]._u32[0] = m_ppu.VPR[0]._u32[1] = m_ppu.VPR[0]._u32[2] = m_ppu.VPR[0]._u32[3] = 0x00000000;
m_ppu.VPR[1]._u32[0] = m_ppu.VPR[1]._u32[1] = m_ppu.VPR[1]._u32[2] = m_ppu.VPR[1]._u32[3] = 0x99999999;
};
check_result = [this](std::string & msg) {
msg = fmt::Format("VPR[0]=%s, VPR[1]=%s",
m_ppu.VPR[0].ToString().c_str(),
m_ppu.VPR[1].ToString().c_str());
return m_ppu.VPR[0].Equals(-1717986944.0f, -1717986944.0f, -1717986944.0f, -1717986944.0f);
};
RunTest("VCFSX.1", test_case, input, check_result);
///////////////////////////////////////////////////////////////////////////
test_case = [this]() {
VCFSX(0, 3, 1);
};
input = [this]() {
m_ppu.VPR[0]._u32[0] = m_ppu.VPR[0]._u32[1] = m_ppu.VPR[0]._u32[2] = m_ppu.VPR[0]._u32[3] = 0x00000000;
m_ppu.VPR[1]._u32[0] = m_ppu.VPR[1]._u32[1] = m_ppu.VPR[1]._u32[2] = m_ppu.VPR[1]._u32[3] = 0x99999999;
};
check_result = [this](std::string & msg) {
msg = fmt::Format("VPR[0]=%s, VPR[1]=%s",
m_ppu.VPR[0].ToString().c_str(),
m_ppu.VPR[1].ToString().c_str());
return m_ppu.VPR[0].Equals(-214748368.0f, -214748368.0f, -214748368.0f, -214748368.0f);
};
RunTest("VCFSX.2", test_case, input, check_result);
///////////////////////////////////////////////////////////////////////////
test_case = [this]() {
VCFUX(0, 0, 1);
};
input = [this]() {
m_ppu.VPR[0]._u32[0] = m_ppu.VPR[0]._u32[1] = m_ppu.VPR[0]._u32[2] = m_ppu.VPR[0]._u32[3] = 0x00000000;
m_ppu.VPR[1]._u32[0] = m_ppu.VPR[1]._u32[1] = m_ppu.VPR[1]._u32[2] = m_ppu.VPR[1]._u32[3] = 0x99999999;
};
check_result = [this](std::string & msg) {
msg = fmt::Format("VPR[0]=%s, VPR[1]=%s",
m_ppu.VPR[0].ToString().c_str(),
m_ppu.VPR[1].ToString().c_str());
return m_ppu.VPR[0].Equals(2576980480.0f, 2576980480.0f, 2576980480.0f, 2576980480.0f);
};
RunTest("VCFUX.1", test_case, input, check_result);
///////////////////////////////////////////////////////////////////////////
test_case = [this]() {
VCFUX(0, 3, 1);
};
input = [this]() {
m_ppu.VPR[0]._u32[0] = m_ppu.VPR[0]._u32[1] = m_ppu.VPR[0]._u32[2] = m_ppu.VPR[0]._u32[3] = 0x00000000;
m_ppu.VPR[1]._u32[0] = m_ppu.VPR[1]._u32[1] = m_ppu.VPR[1]._u32[2] = m_ppu.VPR[1]._u32[3] = 0x99999999;
};
check_result = [this](std::string & msg) {
msg = fmt::Format("VPR[0]=%s, VPR[1]=%s",
m_ppu.VPR[0].ToString().c_str(),
m_ppu.VPR[1].ToString().c_str());
return m_ppu.VPR[0].Equals(322122560.0f, 322122560.0f, 322122560.0f, 322122560.0f);
};
RunTest("VCFUX.2", test_case, input, check_result);
///////////////////////////////////////////////////////////////////////////
test_case = [this]() {
VCMPBFP(0, 1, 2);
};
input = [this]() {
m_ppu.VPR[0]._u32[0] = m_ppu.VPR[0]._u32[1] = m_ppu.VPR[0]._u32[2] = m_ppu.VPR[0]._u32[3] = 0x00000000;
m_ppu.VPR[1]._f[0] = m_ppu.VPR[1]._f[1] = 150.0f;
m_ppu.VPR[1]._f[2] = m_ppu.VPR[1]._f[3] = 50.0f;
m_ppu.VPR[2]._f[0] = m_ppu.VPR[2]._f[1] = m_ppu.VPR[2]._f[2] = m_ppu.VPR[2]._f[3] = 100.0f;
};
check_result = [this](std::string & msg) {
msg = fmt::Format("VPR[0]=%s, VPR[1]=%s, VPR[2]=%s",
m_ppu.VPR[0].ToString(true).c_str(),
m_ppu.VPR[1].ToString().c_str(),
m_ppu.VPR[2].ToString().c_str());
return m_ppu.VPR[0].Equals((u32)0x80000000, (u32)0x80000000, (u32)0x00000000, (u32)0x00000000);
};
RunTest("VCMPBFP.1", test_case, input, check_result);
///////////////////////////////////////////////////////////////////////////
test_case = [this]() {
VCMPBFP_(0, 1, 2);
};
input = [this]() {
m_ppu.SetCR(6, 0xF);
m_ppu.VPR[0]._u32[0] = m_ppu.VPR[0]._u32[1] = m_ppu.VPR[0]._u32[2] = m_ppu.VPR[0]._u32[3] = 0x00000000;
m_ppu.VPR[1]._f[0] = m_ppu.VPR[1]._f[1] = 150.0f;
m_ppu.VPR[1]._f[2] = m_ppu.VPR[1]._f[3] = 50.0f;
m_ppu.VPR[2]._f[0] = m_ppu.VPR[2]._f[1] = m_ppu.VPR[2]._f[2] = m_ppu.VPR[2]._f[3] = 100.0f;
};
check_result = [this](std::string & msg) {
msg = fmt::Format("VPR[0]=%s, VPR[1]=%s, VPR[2]=%s, CR=0x%X",
m_ppu.VPR[0].ToString(true).c_str(),
m_ppu.VPR[1].ToString().c_str(),
m_ppu.VPR[2].ToString().c_str(), m_ppu.CR.CR);
return m_ppu.VPR[0].Equals((u32)0x80000000, (u32)0x80000000, (u32)0x00000000, (u32)0x00000000) && (m_ppu.GetCR(6) == 0);
};
RunTest("VCMPBFP_.1", test_case, input, check_result);
///////////////////////////////////////////////////////////////////////////
test_case = [this]() {
VCMPBFP_(0, 1, 2);
};
input = [this]() {
m_ppu.SetCR(6, 0xF);
m_ppu.VPR[0]._u32[0] = m_ppu.VPR[0]._u32[1] = m_ppu.VPR[0]._u32[2] = m_ppu.VPR[0]._u32[3] = 0x00000000;
m_ppu.VPR[1]._f[0] = m_ppu.VPR[1]._f[1] = 50.0f;
m_ppu.VPR[1]._f[2] = m_ppu.VPR[1]._f[3] = 50.0f;
m_ppu.VPR[2]._f[0] = m_ppu.VPR[2]._f[1] = m_ppu.VPR[2]._f[2] = m_ppu.VPR[2]._f[3] = 100.0f;
};
check_result = [this](std::string & msg) {
msg = fmt::Format("VPR[0]=%s, VPR[1]=%s, VPR[2]=%s, CR=0x%X",
m_ppu.VPR[0].ToString(true).c_str(),
m_ppu.VPR[1].ToString().c_str(),
m_ppu.VPR[2].ToString().c_str(), m_ppu.CR.CR);
return m_ppu.VPR[0].Equals((u32)0x00000000, (u32)0x00000000, (u32)0x00000000, (u32)0x00000000) && (m_ppu.GetCR(6) == 2);
};
RunTest("VCMPBFP_.2", test_case, input, check_result);
}