From d28b0ba2faf72ebc41a53059de17f3285b374e53 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 7 Sep 2021 19:42:05 +0300 Subject: [PATCH] SPU LLVM: implement spu_re, spu_rsqrte Improve matching with peek_through_bitcasts() helper. Implement erase_stores() helper. --- rpcs3/Emu/CPU/CPUTranslator.cpp | 35 +++++++++++++++++++++ rpcs3/Emu/CPU/CPUTranslator.h | 15 ++++++++- rpcs3/Emu/Cell/SPURecompiler.cpp | 53 ++++++++++++++++++++++++++++---- 3 files changed, 96 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 4262a87b94..52f67e7569 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -7,6 +7,18 @@ llvm::LLVMContext g_llvm_ctx; +llvm::Value* peek_through_bitcasts(llvm::Value* arg) +{ + llvm::CastInst* i; + + while ((i = llvm::dyn_cast_or_null(arg)) && i->getOpcode() == llvm::Instruction::BitCast) + { + arg = i->getOperand(0); + } + + return arg; +} + cpu_translator::cpu_translator(llvm::Module* _module, bool is_be) : m_context(g_llvm_ctx) , m_module(_module) @@ -312,4 +324,27 @@ void cpu_translator::replace_intrinsics(llvm::Function& f) } } +void cpu_translator::erase_stores(llvm::ArrayRef args) +{ + for (auto v : args) + { + for (auto it = v->use_begin(); it != v->use_end(); ++it) + { + llvm::Value* i = *it; + llvm::CastInst* bci = nullptr; + + // Walk through bitcasts + while (i && (bci = llvm::dyn_cast(i)) && bci->getOpcode() == llvm::Instruction::BitCast) + { + i = *bci->use_begin(); + } + + if (auto si = llvm::dyn_cast_or_null(i)) + { + si->eraseFromParent(); + } + } + } +} + #endif diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index ee644ce0c4..52d3f690d0 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -427,6 +427,9 @@ using llvm_common_t = std::enable_if_t<(is_llvm_expr_of::ok && ...), t template using llvm_match_tuple = decltype(std::tuple_cat(std::declval&>().match(std::declval(), nullptr)...)); +// Helper function +llvm::Value* peek_through_bitcasts(llvm::Value*); + template >> struct llvm_match_t { @@ -442,7 +445,8 @@ struct llvm_match_t template bool eq(const Args&... args) const { - return value && ((value == args.value) && ...); + llvm::Value* lhs = nullptr; + return value && (lhs = peek_through_bitcasts(value)) && ((lhs == peek_through_bitcasts(args.value)) && ...); } llvm::Value* eval(llvm::IRBuilder<>*) const @@ -3491,6 +3495,15 @@ public: // Finalize processing custom intrinsics void replace_intrinsics(llvm::Function&); + // Erase store instructions of provided + void erase_stores(llvm::ArrayRef args); + + template + void erase_stores(Args... args) + { + erase_stores({args.value...}); + } + template static auto pshufb(T&& a, U&& b) { diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 79410b3fa4..b3236a3031 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7778,12 +7778,9 @@ public: bool is_input_positive(value_t a) { - if (auto [ok, v0, v1] = match_expr(a, match() * match()); ok) + if (auto [ok, v0, v1] = match_expr(a, match() * match()); ok && v0.eq(v1)) { - if (v0.value == v1.value) - { - return true; - } + return true; } return false; @@ -8496,6 +8493,18 @@ public: return {"spu_fi", {std::forward(a), std::forward(b)}}; } + template + static llvm_calli spu_re(T&& a) + { + return {"spu_re", {std::forward(a)}}; + } + + template + static llvm_calli spu_rsqrte(T&& a) + { + return {"spu_rsqrte", {std::forward(a)}}; + } + void FI(spu_opcode_t op) { // TODO @@ -8527,7 +8536,39 @@ public: return bitcast((b & 0xff800000u) | (bitcast(fpcast(bnew)) & ~0xff800000u)); // Inject old sign and exponent }); - set_vr(op.rt, fi(get_vr(op.ra), get_vr(op.rb))); + register_intrinsic("spu_re", [&](llvm::CallInst* ci) + { + const auto a = value(ci->getOperand(0)); + return fre(a); + }); + + register_intrinsic("spu_rsqrte", [&](llvm::CallInst* ci) + { + const auto a = value(ci->getOperand(0)); + return frsqe(fabs(a)); + }); + + const auto [a, b] = get_vrs(op.ra, op.rb); + + if (const auto [ok, mb] = match_expr(b, frest(match())); ok && mb.eq(a)) + { + erase_stores(b); + set_vr(op.rt, spu_re(a)); + return; + } + + if (const auto [ok, mb] = match_expr(b, frsqest(match())); ok && mb.eq(a)) + { + erase_stores(b); + set_vr(op.rt, spu_rsqrte(a)); + return; + } + + const auto r = eval(fi(a, b)); + if (!m_interp_magn) + spu_log.todo("[%s:0x%05x] Unmatched spu_fi found", m_hash, m_pos); + + set_vr(op.rt, r); } void CFLTS(spu_opcode_t op)