arm64: implement pshufb intrinsic

This commit is contained in:
sguo35 2022-07-19 00:21:38 -07:00 committed by Megamouse
parent cc1e4d2194
commit 84a785ea67
2 changed files with 15 additions and 5 deletions

View File

@ -32,7 +32,21 @@ cpu_translator::cpu_translator(llvm::Module* _module, bool is_be)
if (m_use_ssse3)
{
#if defined(ARCH_X64)
return m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_ssse3_pshuf_b_128), {data0, index});
#elif defined(ARCH_ARM64)
// Modified from sse2neon
// movi v2.16b, #143
// and v1.16b, v1.16b, v2.16b
// tbl v0.16b, { v0.16b }, v1.16b
auto mask = llvm::ConstantInt::get(get_type<u8[16]>(), 0x8F);
auto and_mask = llvm::ConstantInt::get(get_type<bool[16]>(), true);
auto vec_len = llvm::ConstantInt::get(get_type<u32>(), 16);
auto index_masked = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::vp_and), {index, mask, and_mask, vec_len});
return m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbl1), {data0, index_masked});
#else
#error "Unimplemented"
#endif
}
else
{

View File

@ -2896,12 +2896,8 @@ protected:
bool m_is_be;
// Allow PSHUFB intrinsic
#ifdef ARCH_X64
bool m_use_ssse3 = true;
#else
// TODO: fix the pshufb arm64 native impl using TBL instruction
bool m_use_ssse3 = false;
#endif
// Allow FMA
bool m_use_fma = false;