mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-16 23:17:29 +00:00
simd_builder: fixups
Fix resetting vmask in reduce() step. Fix AVX-512 loads in vec_load_unaligned(). Fix bzhi reg size in build_look().
This commit is contained in:
parent
5d91caebe9
commit
a9437d69ab
@ -420,8 +420,9 @@ void asmjit::simd_builder::_init(uint new_vsize)
|
||||
vsize = new_vsize ? new_vsize : 16;
|
||||
}
|
||||
|
||||
if (!new_vsize && utils::has_avx512())
|
||||
if (utils::has_avx512())
|
||||
{
|
||||
if (!new_vsize)
|
||||
vmask = -1;
|
||||
}
|
||||
else
|
||||
@ -604,7 +605,7 @@ void asmjit::simd_builder::vec_load_unaligned(u32 esize, const Operand& v, const
|
||||
this->emit(x86::Inst::kIdVpinsrw, x86::Xmm(v.id()), x86::Xmm(v.id()), src, Imm(0));
|
||||
else if (vsize == 2)
|
||||
this->emit(x86::Inst::kIdPinsrw, v, src, Imm(0));
|
||||
else if (vmask && vmask < 8)
|
||||
else if ((vmask && vmask < 8) || vsize >= 64)
|
||||
this->emit(x86::Inst::kIdVmovdqu16, v, src);
|
||||
else
|
||||
return vec_load_unaligned(vsize, v, src);
|
||||
@ -616,7 +617,7 @@ void asmjit::simd_builder::vec_load_unaligned(u32 esize, const Operand& v, const
|
||||
this->emit(x86::Inst::kIdVmovd, x86::Xmm(v.id()), src);
|
||||
else if (vsize == 4)
|
||||
this->emit(x86::Inst::kIdMovd, v, src);
|
||||
else if (vmask && vmask < 8)
|
||||
else if ((vmask && vmask < 8) || vsize >= 64)
|
||||
this->emit(x86::Inst::kIdVmovdqu32, v, src);
|
||||
else
|
||||
return vec_load_unaligned(vsize, v, src);
|
||||
@ -628,7 +629,7 @@ void asmjit::simd_builder::vec_load_unaligned(u32 esize, const Operand& v, const
|
||||
this->emit(x86::Inst::kIdVmovq, x86::Xmm(v.id()), src);
|
||||
else if (vsize == 8)
|
||||
this->emit(x86::Inst::kIdMovq, v, src);
|
||||
else if (vmask && vmask < 8)
|
||||
else if ((vmask && vmask < 8) || vsize >= 64)
|
||||
this->emit(x86::Inst::kIdVmovdqu64, v, src);
|
||||
else
|
||||
return vec_load_unaligned(vsize, v, src);
|
||||
@ -636,7 +637,9 @@ void asmjit::simd_builder::vec_load_unaligned(u32 esize, const Operand& v, const
|
||||
else if (esize >= 16)
|
||||
{
|
||||
ensure(vsize >= 16);
|
||||
if (utils::has_avx())
|
||||
if ((vmask && vmask < 8) || vsize >= 64)
|
||||
this->emit(x86::Inst::kIdVmovdqu64, v, src); // Not really needed
|
||||
else if (utils::has_avx())
|
||||
this->emit(x86::Inst::kIdVmovdqu, v, src);
|
||||
else
|
||||
this->emit(x86::Inst::kIdMovups, v, src);
|
||||
|
@ -323,11 +323,20 @@ namespace asmjit
|
||||
// Build single last iteration (masked)
|
||||
this->test(reg_cnt, reg_cnt);
|
||||
this->jz(exit);
|
||||
this->bzhi(reg_cnt, x86::Mem(consts[~u128()], 0), reg_cnt);
|
||||
this->kmovq(x86::k7, reg_cnt);
|
||||
|
||||
if (esize == 1 && vsize == 64)
|
||||
{
|
||||
this->bzhi(reg_cnt.r64(), x86::Mem(consts[~u128()], 0), reg_cnt.r64());
|
||||
this->kmovq(x86::k7, reg_cnt.r64());
|
||||
}
|
||||
else
|
||||
{
|
||||
this->bzhi(reg_cnt.r32(), x86::Mem(consts[~u128()], 0), reg_cnt.r32());
|
||||
this->kmovd(x86::k7, reg_cnt.r32());
|
||||
}
|
||||
|
||||
vmask = 7;
|
||||
build();
|
||||
vmask = -1;
|
||||
|
||||
// Rollout reduction step
|
||||
this->bind(exit);
|
||||
|
Loading…
Reference in New Issue
Block a user