From 251fb86ab0f16633a021479085c41719ea5d041b Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 21 Jun 2016 18:01:45 +0300 Subject: [PATCH] PPU LLVM: Some instructions simplified --- rpcs3/Emu/Cell/PPUThread.cpp | 16 +----- rpcs3/Emu/Cell/PPUTranslator.cpp | 92 ++++++++++++++------------------ rpcs3/Emu/Cell/PPUTranslator.h | 13 +++-- 3 files changed, 49 insertions(+), 72 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 7a52b5ad08..ef1655b7a4 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -535,16 +535,6 @@ struct MemoryManager final : llvm::RTDyldMemoryManager return vm::reservation_update(addr, &data, sizeof(data)); } - static bool sraw_carry(s32 arg, u64 shift) - { - return (arg < 0) && (shift > 31 || (arg >> shift << shift) != arg); - } - - static bool srad_carry(s64 arg, u64 shift) - { - return (arg < 0) && (shift > 63 || (arg >> shift << shift) != arg); - } - static bool adde_carry(u64 a, u64 b, bool c) { return _addcarry_u64(c, a, b, nullptr) != 0; @@ -704,8 +694,6 @@ struct MemoryManager final : llvm::RTDyldMemoryManager { "__ldarx", (u64)&ldarx }, { "__stwcx", (u64)&stwcx }, { "__stdcx", (u64)&stdcx }, - { "__sraw_get_ca", (u64)&sraw_carry }, - { "__srad_get_ca", (u64)&srad_carry }, { "__adde_get_ca", (u64)&adde_carry }, { "__vexptefp", (u64)&sse_exp2_ps }, { "__vlogefp", (u64)&sse_log2_ps }, @@ -761,7 +749,7 @@ struct MemoryManager final : llvm::RTDyldMemoryManager if (::mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC)) #endif { - LOG_FATAL(GENERAL, "LLVM: Failed to allocate code section '%s', error %u", sec_name.data(), GetLastError()); + LOG_FATAL(GENERAL, "LLVM: Failed to allocate code section '%s'", sec_name.data()); return nullptr; } @@ -780,7 +768,7 @@ struct MemoryManager final : llvm::RTDyldMemoryManager if (::mprotect(ptr, size, PROT_READ | PROT_WRITE)) #endif { - LOG_FATAL(GENERAL, "LLVM: Failed to allocate data section '%s', error %u", sec_name.data(), GetLastError()); + LOG_FATAL(GENERAL, "LLVM: Failed to allocate data section '%s'", sec_name.data()); return nullptr; } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index ceb0487640..41db86b163 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -609,8 +609,8 @@ void PPUTranslator::MTVSCR(ppu_opcode_t op) void PPUTranslator::VADDCUW(ppu_opcode_t op) { - const auto ab = ZExt(GetVrs(VrType::vi32, op.va, op.vb)); - SetVr(op.vd, m_ir->CreateLShr(m_ir->CreateAdd(ab[0], ab[1]), 32)); + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, ZExt(m_ir->CreateICmpULT(m_ir->CreateAdd(ab[0], ab[1]), ab[0]), GetType())); } void PPUTranslator::VADDFP(ppu_opcode_t op) @@ -867,10 +867,10 @@ void PPUTranslator::VCTSXS(ppu_opcode_t op) { const auto b = GetVr(op.vb, VrType::vf); const auto scaled = Scale(b, op.vuimm); - const auto is_nan = m_ir->CreateFCmpUNO(b, ConstantVector::getSplat(4, ConstantFP::get(GetType(), 0.0))); // NaN -> 0.0 + //const auto is_nan = m_ir->CreateFCmpUNO(b, ConstantVector::getSplat(4, ConstantFP::get(GetType(), 0.0))); // NaN -> 0.0 const auto sat_l = m_ir->CreateFCmpOLT(scaled, ConstantVector::getSplat(4, ConstantFP::get(GetType(), -std::pow(2, 31)))); // TODO ??? const auto sat_h = m_ir->CreateFCmpOGE(scaled, ConstantVector::getSplat(4, ConstantFP::get(GetType(), std::pow(2, 31)))); - const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, FP_SAT_OP(is_nan, scaled)), GetType()); + const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, scaled), GetType()); SetVr(op.vd, m_ir->CreateSelect(sat_h, ConstantVector::getSplat(4, m_ir->getInt32(0x7fffffff)), converted)); SetSat(IsNotZero(m_ir->CreateOr(sat_l, sat_h))); } @@ -879,10 +879,10 @@ void PPUTranslator::VCTUXS(ppu_opcode_t op) { const auto b = GetVr(op.vb, VrType::vf); const auto scaled = Scale(b, op.vuimm); - const auto is_nan = m_ir->CreateFCmpUNO(b, ConstantVector::getSplat(4, ConstantFP::get(GetType(), 0.0))); // NaN -> 0.0 + //const auto is_nan = m_ir->CreateFCmpUNO(b, ConstantVector::getSplat(4, ConstantFP::get(GetType(), 0.0))); // NaN -> 0.0 const auto sat_l = m_ir->CreateFCmpOLT(scaled, ConstantVector::getSplat(4, ConstantFP::get(GetType(), 0.0))); const auto sat_h = m_ir->CreateFCmpOGE(scaled, ConstantVector::getSplat(4, ConstantFP::get(GetType(), std::pow(2, 32)))); // TODO ??? - const auto converted = m_ir->CreateFPToUI(FP_SAT_OP(sat_l, FP_SAT_OP(is_nan, scaled)), GetType()); + const auto converted = m_ir->CreateFPToUI(FP_SAT_OP(sat_l, scaled), GetType()); SetVr(op.vd, m_ir->CreateSelect(sat_h, ConstantVector::getSplat(4, m_ir->getInt32(0xffffffff)), converted)); SetSat(IsNotZero(m_ir->CreateOr(sat_l, sat_h))); } @@ -901,7 +901,6 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op) { const auto acb = GetVrs(VrType::vf, op.va, op.vc, op.vb); SetVr(op.vd, m_ir->CreateFAdd(m_ir->CreateFMul(acb[0], acb[1]), acb[2])); - //SetVr(op.vd, Call(GetType(), "llvm.fmuladd.v4f32", acb[0], acb[1], acb[2])); } void PPUTranslator::VMAXFP(ppu_opcode_t op) @@ -1118,65 +1117,56 @@ void PPUTranslator::VMSUMUHS(ppu_opcode_t op) void PPUTranslator::VMULESB(ppu_opcode_t op) { - // TODO - const auto ab = SExt(Shuffle(GetVrs(VrType::vi8, op.va, op.vb), {}, { 0, 2, 4, 6, 8, 10, 12, 14 })); - SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateMul(m_ir->CreateAShr(ab[0], 8), m_ir->CreateAShr(ab[1], 8))); } void PPUTranslator::VMULESH(ppu_opcode_t op) { - // TODO - const auto ab = SExt(Shuffle(GetVrs(VrType::vi16, op.va, op.vb), {}, { 0, 2, 4, 6 })); - SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateMul(m_ir->CreateAShr(ab[0], 16), m_ir->CreateAShr(ab[1], 16))); } void PPUTranslator::VMULEUB(ppu_opcode_t op) { - // TODO - const auto ab = ZExt(Shuffle(GetVrs(VrType::vi8, op.va, op.vb), {}, { 0, 2, 4, 6, 8, 10, 12, 14 })); - SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateMul(m_ir->CreateLShr(ab[0], 8), m_ir->CreateLShr(ab[1], 8))); } void PPUTranslator::VMULEUH(ppu_opcode_t op) { - // TODO - const auto ab = ZExt(Shuffle(GetVrs(VrType::vi16, op.va, op.vb), {}, { 0, 2, 4, 6 })); - SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateMul(m_ir->CreateLShr(ab[0], 16), m_ir->CreateLShr(ab[1], 16))); } void PPUTranslator::VMULOSB(ppu_opcode_t op) { - // TODO - const auto ab = SExt(Shuffle(GetVrs(VrType::vi8, op.va, op.vb), {}, { 1, 3, 5, 7, 9, 11, 13, 15 })); - SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateMul(m_ir->CreateAShr(m_ir->CreateShl(ab[0], 8), 8), m_ir->CreateAShr(m_ir->CreateShl(ab[1], 8), 8))); } void PPUTranslator::VMULOSH(ppu_opcode_t op) { - // TODO - const auto ab = SExt(Shuffle(GetVrs(VrType::vi16, op.va, op.vb), {}, { 1, 3, 5, 7 })); - SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateMul(m_ir->CreateAShr(m_ir->CreateShl(ab[0], 16), 16), m_ir->CreateAShr(m_ir->CreateShl(ab[1], 16), 16))); } void PPUTranslator::VMULOUB(ppu_opcode_t op) { - // TODO - const auto ab = ZExt(Shuffle(GetVrs(VrType::vi8, op.va, op.vb), {}, { 1, 3, 5, 7, 9, 11, 13, 15 })); - SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateMul(m_ir->CreateLShr(m_ir->CreateShl(ab[0], 8), 8), m_ir->CreateLShr(m_ir->CreateShl(ab[1], 8), 8))); } void PPUTranslator::VMULOUH(ppu_opcode_t op) { - // TODO - const auto ab = ZExt(Shuffle(GetVrs(VrType::vi16, op.va, op.vb), {}, { 1, 3, 5, 7 })); - SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateMul(m_ir->CreateLShr(m_ir->CreateShl(ab[0], 16), 16), m_ir->CreateLShr(m_ir->CreateShl(ab[1], 16), 16))); } void PPUTranslator::VNMSUBFP(ppu_opcode_t op) { const auto acb = GetVrs(VrType::vf, op.va, op.vc, op.vb); SetVr(op.vd, m_ir->CreateFSub(acb[2], m_ir->CreateFMul(acb[0], acb[1]))); - //SetVr(op.vd, m_ir->CreateFNeg(Call(GetType(), "llvm.fmuladd.v4f32", acb[0], acb[1], m_ir->CreateFNeg(acb[2])))); } void PPUTranslator::VNOR(ppu_opcode_t op) @@ -2445,7 +2435,6 @@ void PPUTranslator::LVEWX(ppu_opcode_t op) const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -4); const auto pos = m_ir->CreateLShr(m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15), 2); SetVr(op.vd, m_ir->CreateInsertElement(ConstantVector::getSplat(4, m_ir->getInt32(0)), ReadMemory(addr, GetType(), true, 4), pos)); - //Call(GetType(), "__trace", m_ir->getInt64(0)); } void PPUTranslator::MULHD(ppu_opcode_t op) @@ -2481,7 +2470,8 @@ void PPUTranslator::LBZX(ppu_opcode_t op) void PPUTranslator::LVX(ppu_opcode_t op) { - SetVr(op.vd, ReadMemory(m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -16), GetType(), true, 16)); + const auto data = ReadMemory(m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -16), GetType(), m_is_be, 16); + SetVr(op.vd, m_is_be ? data : Shuffle(data, nullptr, { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 })); } void PPUTranslator::NEG(ppu_opcode_t op) @@ -2520,7 +2510,7 @@ void PPUTranslator::SUBFE(ppu_opcode_t op) const auto c = GetCarry(); const auto result = m_ir->CreateAdd(m_ir->CreateAdd(a, b), ZExt(c, GetType())); SetGpr(op.rd, result); - SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, b, c)); + SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, b, c)); // TODO if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__subfe_get_ov", a, b, c)); } @@ -2532,7 +2522,7 @@ void PPUTranslator::ADDE(ppu_opcode_t op) const auto c = GetCarry(); const auto result = m_ir->CreateAdd(m_ir->CreateAdd(a, b), ZExt(c, GetType())); SetGpr(op.rd, result); - SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, b, c)); + SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, b, c)); // TODO if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__adde_get_ov", a, b, c)); } @@ -2648,7 +2638,9 @@ void PPUTranslator::STBX(ppu_opcode_t op) void PPUTranslator::STVX(ppu_opcode_t op) { - WriteMemory(m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -16), GetVr(op.vs, VrType::vi32), true, 16); + const auto value = GetVr(op.vs, VrType::vi8); + const auto data = m_is_be ? value : Shuffle(value, nullptr, { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }); + WriteMemory(m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -16), data, m_is_be, 16); } void PPUTranslator::SUBFME(ppu_opcode_t op) @@ -3084,23 +3076,22 @@ void PPUTranslator::SRAW(ppu_opcode_t op) { const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x3f); const auto shift_arg = GetGpr(op.rs, 32); - const auto result = m_ir->CreateAShr(SExt(shift_arg), shift_num); + const auto arg_ext = SExt(shift_arg); + const auto result = m_ir->CreateAShr(arg_ext, shift_num); SetGpr(op.ra, result); - //const auto carry_mask = m_ir->CreateNot(m_ir->CreateShl(m_ir->getInt64(-1), shift_num)); - //SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(m_ir->CreateAnd(shift_arg, carry_mask), m_ir->getInt64(0)))); - SetCarry(Call(GetType(), m_pure_attr, "__sraw_get_ca", shift_arg, shift_num)); + SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt32(0)), m_ir->CreateICmpNE(arg_ext, m_ir->CreateShl(result, shift_num)))); if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); } void PPUTranslator::SRAD(ppu_opcode_t op) { - const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x7f); + const auto shift_num = ZExt(m_ir->CreateAnd(GetGpr(op.rb), 0x7f)); // i128 const auto shift_arg = GetGpr(op.rs); - const auto result = Trunc(m_ir->CreateAShr(SExt(shift_arg), ZExt(shift_num))); + const auto arg_ext = SExt(shift_arg); // i128 + const auto res_128 = m_ir->CreateAShr(arg_ext, shift_num); // i128 + const auto result = Trunc(res_128); SetGpr(op.ra, result); - //const auto carry_mask = m_ir->CreateNot(m_ir->CreateShl(m_ir->getInt64(-1), shift_num)); - //SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateOr(overshift, m_ir->CreateICmpNE(m_ir->CreateAnd(shift_arg, carry_mask), m_ir->getInt64(0))))); - SetCarry(Call(GetType(), m_pure_attr, "__srad_get_ca", shift_arg, shift_num)); + SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(arg_ext, m_ir->CreateShl(result, shift_num)))); if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); } @@ -3116,10 +3107,10 @@ void PPUTranslator::DSS(ppu_opcode_t op) void PPUTranslator::SRAWI(ppu_opcode_t op) { const auto shift_arg = GetGpr(op.rs, 32); - const auto result = m_ir->CreateAShr(SExt(shift_arg), op.sh32); + const auto res_32 = m_ir->CreateAShr(shift_arg, op.sh32); + const auto result = SExt(res_32); SetGpr(op.ra, result); - //SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt32(0)), m_ir->CreateICmpNE(m_ir->CreateAnd(shift_arg, ~(~0ull << op.sh32)), m_ir->getInt32(0)))); - SetCarry(Call(GetType(), m_pure_attr, "__sraw_get_ca", shift_arg, m_ir->getInt64(op.sh32))); + SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt32(0)), m_ir->CreateICmpNE(shift_arg, m_ir->CreateShl(res_32, op.sh32)))); if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); } @@ -3128,8 +3119,7 @@ void PPUTranslator::SRADI(ppu_opcode_t op) const auto shift_arg = GetGpr(op.rs); const auto result = m_ir->CreateAShr(shift_arg, op.sh64); SetGpr(op.ra, result); - //SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(m_ir->CreateAnd(shift_arg, ~(~0ull << op.sh64)), m_ir->getInt64(0)))); - SetCarry(Call(GetType(), m_pure_attr, "__srad_get_ca", shift_arg, m_ir->getInt64(op.sh32))); + SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(shift_arg, m_ir->CreateShl(result, op.sh64)))); if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); } diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 253d73a0cc..10f8969fbe 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -294,13 +294,6 @@ public: // Create shuffle instruction with constant args llvm::Value* Shuffle(llvm::Value* left, llvm::Value* right, std::initializer_list indices); - template - std::array Shuffle(std::array left, std::array right, std::initializer_list indices) - { - for (std::size_t i = 0; i < N; i++) left[i] = Shuffle(left[i], right[i], indices); - return left; - } - // Create sign extension (with double size if type is nullptr) llvm::Value* SExt(llvm::Value* value, llvm::Type* = nullptr); @@ -406,6 +399,12 @@ public: return TypeGen::get(m_context); } + template + llvm::PointerType* GetPtrType() + { + return TypeGen::get(m_context)->getPointerTo(); + } + // Get an undefined value with specified type template llvm::Value* GetUndef()