mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-17 08:11:51 +00:00
Replace utils::cntlz{32,64} with std::countl_zero
This commit is contained in:
parent
d0c199d455
commit
032e7c0491
@ -4,30 +4,6 @@
|
|||||||
|
|
||||||
namespace utils
|
namespace utils
|
||||||
{
|
{
|
||||||
inline u32 cntlz32(u32 arg, bool nonzero = false)
|
|
||||||
{
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
ulong res;
|
|
||||||
return _BitScanReverse(&res, arg) || nonzero ? res ^ 31 : 32;
|
|
||||||
#elif __LZCNT__
|
|
||||||
return _lzcnt_u32(arg);
|
|
||||||
#else
|
|
||||||
return arg || nonzero ? __builtin_clz(arg) : 32;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
inline u64 cntlz64(u64 arg, bool nonzero = false)
|
|
||||||
{
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
ulong res;
|
|
||||||
return _BitScanReverse64(&res, arg) || nonzero ? res ^ 63 : 64;
|
|
||||||
#elif __LZCNT__
|
|
||||||
return _lzcnt_u64(arg);
|
|
||||||
#else
|
|
||||||
return arg || nonzero ? __builtin_clzll(arg) : 64;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
inline u8 popcnt32(u32 arg)
|
inline u8 popcnt32(u32 arg)
|
||||||
{
|
{
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
|
@ -59,7 +59,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)
|
|||||||
|
|
||||||
const auto write_octal = [&](u64 value, u64 min_num)
|
const auto write_octal = [&](u64 value, u64 min_num)
|
||||||
{
|
{
|
||||||
out.resize(out.size() + std::max<u64>(min_num, 66 / 3 - (utils::cntlz64(value | 1, true) + 2) / 3), '0');
|
out.resize(out.size() + std::max<u64>(min_num, 66 / 3 - (std::countl_zero<u64>(value | 1) + 2) / 3), '0');
|
||||||
|
|
||||||
// Write in reversed order
|
// Write in reversed order
|
||||||
for (auto i = out.rbegin(); value; i++, value /= 8)
|
for (auto i = out.rbegin(); value; i++, value /= 8)
|
||||||
@ -70,7 +70,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)
|
|||||||
|
|
||||||
const auto write_hex = [&](u64 value, bool upper, u64 min_num)
|
const auto write_hex = [&](u64 value, bool upper, u64 min_num)
|
||||||
{
|
{
|
||||||
out.resize(out.size() + std::max<u64>(min_num, 64 / 4 - utils::cntlz64(value | 1, true) / 4), '0');
|
out.resize(out.size() + std::max<u64>(min_num, 64 / 4 - std::countl_zero<u64>(value | 1) / 4), '0');
|
||||||
|
|
||||||
// Write in reversed order
|
// Write in reversed order
|
||||||
for (auto i = out.rbegin(); value; i++, value /= 16)
|
for (auto i = out.rbegin(); value; i++, value /= 16)
|
||||||
|
@ -2114,7 +2114,7 @@ s32 _spurs::add_workload(vm::ptr<CellSpurs> spurs, vm::ptr<u32> wid, vm::cptr<vo
|
|||||||
const u32 wmax = spurs->flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed
|
const u32 wmax = spurs->flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed
|
||||||
spurs->wklEnabled.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
|
spurs->wklEnabled.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
|
||||||
{
|
{
|
||||||
wnum = utils::cntlz32(~value); // found empty position
|
wnum = std::countl_one<u32>(value); // found empty position
|
||||||
if (wnum < wmax)
|
if (wnum < wmax)
|
||||||
{
|
{
|
||||||
value |= (0x80000000 >> wnum); // set workload bit
|
value |= (0x80000000 >> wnum); // set workload bit
|
||||||
@ -2237,7 +2237,7 @@ s32 _spurs::add_workload(vm::ptr<CellSpurs> spurs, vm::ptr<u32> wid, vm::cptr<vo
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
k |= 0x80000000 >> current->uniqueId;
|
k |= 0x80000000 >> current->uniqueId;
|
||||||
res_wkl = utils::cntlz32(~k);
|
res_wkl = std::countl_one<u32>(k);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1017,7 +1017,7 @@ error_code _cellSyncLFQueueCompletePushPointer(ppu_thread& ppu, vm::ptr<CellSync
|
|||||||
{
|
{
|
||||||
var9_ = 1 << var9_;
|
var9_ = 1 << var9_;
|
||||||
}
|
}
|
||||||
s32 var9 = utils::cntlz32(static_cast<u16>(~(var9_ | push3.m_h6))) - 16; // count leading zeros in u16
|
s32 var9 = std::countl_zero<u32>(static_cast<u16>(~(var9_ | push3.m_h6))) - 16; // count leading zeros in u16
|
||||||
|
|
||||||
s32 var5 = push3.m_h6 | var9_;
|
s32 var5 = push3.m_h6 | var9_;
|
||||||
if (var9 & 0x30)
|
if (var9 & 0x30)
|
||||||
@ -1317,7 +1317,8 @@ error_code _cellSyncLFQueueCompletePopPointer(ppu_thread& ppu, vm::ptr<CellSyncL
|
|||||||
{
|
{
|
||||||
var9_ = 1 << var9_;
|
var9_ = 1 << var9_;
|
||||||
}
|
}
|
||||||
s32 var9 = utils::cntlz32(static_cast<u16>(~(var9_ | pop3.m_h2))) - 16; // count leading zeros in u16
|
|
||||||
|
s32 var9 = std::countl_zero<u32>(static_cast<u16>(~(var9_ | pop3.m_h2))) - 16; // count leading zeros in u16
|
||||||
|
|
||||||
s32 var5 = pop3.m_h2 | var9_;
|
s32 var5 = pop3.m_h2 | var9_;
|
||||||
if (var9 & 0x30)
|
if (var9 & 0x30)
|
||||||
|
@ -1238,7 +1238,7 @@ struct ppu_acontext
|
|||||||
if (min < max)
|
if (min < max)
|
||||||
{
|
{
|
||||||
// Inverted constant MSB mask
|
// Inverted constant MSB mask
|
||||||
const u64 mix = ~0ull >> utils::cntlz64(min ^ max, true);
|
const u64 mix = ~0ull >> std::countl_zero(min ^ max);
|
||||||
r.bmin |= min & ~mix;
|
r.bmin |= min & ~mix;
|
||||||
r.bmax &= max | mix;
|
r.bmax &= max | mix;
|
||||||
|
|
||||||
|
@ -3250,7 +3250,7 @@ bool ppu_interpreter::MFOCRF(ppu_thread& ppu, ppu_opcode_t op)
|
|||||||
if (op.l11)
|
if (op.l11)
|
||||||
{
|
{
|
||||||
// MFOCRF
|
// MFOCRF
|
||||||
const u32 n = utils::cntlz32(op.crm) & 7;
|
const u32 n = std::countl_zero<u32>(op.crm) & 7;
|
||||||
const u32 p = n * 4;
|
const u32 p = n * 4;
|
||||||
const u32 v = ppu.cr[p + 0] << 3 | ppu.cr[p + 1] << 2 | ppu.cr[p + 2] << 1 | ppu.cr[p + 3] << 0;
|
const u32 v = ppu.cr[p + 0] << 3 | ppu.cr[p + 1] << 2 | ppu.cr[p + 2] << 1 | ppu.cr[p + 3] << 0;
|
||||||
|
|
||||||
@ -3299,7 +3299,7 @@ bool ppu_interpreter::SLW(ppu_thread& ppu, ppu_opcode_t op)
|
|||||||
|
|
||||||
bool ppu_interpreter::CNTLZW(ppu_thread& ppu, ppu_opcode_t op)
|
bool ppu_interpreter::CNTLZW(ppu_thread& ppu, ppu_opcode_t op)
|
||||||
{
|
{
|
||||||
ppu.gpr[op.ra] = utils::cntlz32(static_cast<u32>(ppu.gpr[op.rs]));
|
ppu.gpr[op.ra] = std::countl_zero(static_cast<u32>(ppu.gpr[op.rs]));
|
||||||
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -3377,7 +3377,7 @@ bool ppu_interpreter::LWZUX(ppu_thread& ppu, ppu_opcode_t op)
|
|||||||
|
|
||||||
bool ppu_interpreter::CNTLZD(ppu_thread& ppu, ppu_opcode_t op)
|
bool ppu_interpreter::CNTLZD(ppu_thread& ppu, ppu_opcode_t op)
|
||||||
{
|
{
|
||||||
ppu.gpr[op.ra] = utils::cntlz64(ppu.gpr[op.rs]);
|
ppu.gpr[op.ra] = std::countl_zero(ppu.gpr[op.rs]);
|
||||||
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -3537,7 +3537,7 @@ bool ppu_interpreter::MTOCRF(ppu_thread& ppu, ppu_opcode_t op)
|
|||||||
{
|
{
|
||||||
// MTOCRF
|
// MTOCRF
|
||||||
|
|
||||||
const u32 n = utils::cntlz32(op.crm) & 7;
|
const u32 n = std::countl_zero<u32>(op.crm) & 7;
|
||||||
const u64 v = (s >> ((n * 4) ^ 0x1c)) & 0xf;
|
const u64 v = (s >> ((n * 4) ^ 0x1c)) & 0xf;
|
||||||
ppu.cr.fields[n] = *reinterpret_cast<const u32*>(s_table + v);
|
ppu.cr.fields[n] = *reinterpret_cast<const u32*>(s_table + v);
|
||||||
}
|
}
|
||||||
|
@ -675,7 +675,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Determine which value will be duplicated at hole positions
|
// Determine which value will be duplicated at hole positions
|
||||||
const u32 w3 = func.data.at((j - start + ~utils::cntlz32(cmask, true) % 4 * 4) / 4);
|
const u32 w3 = func.data.at((j - start + ~std::countl_zero(cmask) % 4 * 4) / 4);
|
||||||
words.push_back(cmask & 1 ? func.data[(j - start + 0) / 4] : w3);
|
words.push_back(cmask & 1 ? func.data[(j - start + 0) / 4] : w3);
|
||||||
words.push_back(cmask & 2 ? func.data[(j - start + 4) / 4] : w3);
|
words.push_back(cmask & 2 ? func.data[(j - start + 4) / 4] : w3);
|
||||||
words.push_back(cmask & 4 ? func.data[(j - start + 8) / 4] : w3);
|
words.push_back(cmask & 4 ? func.data[(j - start + 8) / 4] : w3);
|
||||||
|
@ -903,7 +903,7 @@ bool spu_interpreter::CLZ(spu_thread& spu, spu_opcode_t op)
|
|||||||
{
|
{
|
||||||
for (u32 i = 0; i < 4; i++)
|
for (u32 i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
spu.gpr[op.rt]._u32[i] = utils::cntlz32(spu.gpr[op.ra]._u32[i]);
|
spu.gpr[op.rt]._u32[i] = std::countl_zero(spu.gpr[op.ra]._u32[i]);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -697,7 +697,7 @@ namespace vm
|
|||||||
const u32 size = ::align(orig_size, min_page_size) + (flags & 0x10 ? 0x2000 : 0);
|
const u32 size = ::align(orig_size, min_page_size) + (flags & 0x10 ? 0x2000 : 0);
|
||||||
|
|
||||||
// Check alignment (it's page allocation, so passing small values there is just silly)
|
// Check alignment (it's page allocation, so passing small values there is just silly)
|
||||||
if (align < min_page_size || align != (0x80000000u >> utils::cntlz32(align, true)))
|
if (align < min_page_size || align != (0x80000000u >> std::countl_zero(align)))
|
||||||
{
|
{
|
||||||
fmt::throw_exception("Invalid alignment (size=0x%x, align=0x%x)" HERE, size, align);
|
fmt::throw_exception("Invalid alignment (size=0x%x, align=0x%x)" HERE, size, align);
|
||||||
}
|
}
|
||||||
@ -992,7 +992,7 @@ namespace vm
|
|||||||
const u32 size = ::align(orig_size, 0x10000);
|
const u32 size = ::align(orig_size, 0x10000);
|
||||||
|
|
||||||
// Check alignment
|
// Check alignment
|
||||||
if (align < 0x10000 || align != (0x80000000u >> utils::cntlz32(align, true)))
|
if (align < 0x10000 || align != (0x80000000u >> std::countl_zero(align)))
|
||||||
{
|
{
|
||||||
fmt::throw_exception("Invalid alignment (size=0x%x, align=0x%x)" HERE, size, align);
|
fmt::throw_exception("Invalid alignment (size=0x%x, align=0x%x)" HERE, size, align);
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
#include "../system_config.h"
|
#include "../system_config.h"
|
||||||
#include "Utilities/address_range.h"
|
#include "Utilities/address_range.h"
|
||||||
#include "Utilities/geometry.h"
|
#include "Utilities/geometry.h"
|
||||||
#include "Utilities/asm.h"
|
|
||||||
#include "gcm_enums.h"
|
#include "gcm_enums.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -239,19 +238,19 @@ namespace rsx
|
|||||||
//
|
//
|
||||||
static inline u32 floor_log2(u32 value)
|
static inline u32 floor_log2(u32 value)
|
||||||
{
|
{
|
||||||
return value <= 1 ? 0 : utils::cntlz32(value, true) ^ 31;
|
return value <= 1 ? 0 : std::countl_zero(value) ^ 31;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u32 ceil_log2(u32 value)
|
static inline u32 ceil_log2(u32 value)
|
||||||
{
|
{
|
||||||
return value <= 1 ? 0 : utils::cntlz32((value - 1) << 1, true) ^ 31;
|
return value <= 1 ? 0 : std::countl_zero((value - 1) << 1) ^ 31;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u32 next_pow2(u32 x)
|
static inline u32 next_pow2(u32 x)
|
||||||
{
|
{
|
||||||
if (x <= 2) return x;
|
if (x <= 2) return x;
|
||||||
|
|
||||||
return static_cast<u32>((1ULL << 32) >> utils::cntlz32(x - 1, true));
|
return static_cast<u32>((1ULL << 32) >> std::countl_zero(x - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool fcmp(float a, float b, float epsilon = 0.000001f)
|
static inline bool fcmp(float a, float b, float epsilon = 0.000001f)
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "Utilities/sync.h"
|
#include "Utilities/sync.h"
|
||||||
#include "Utilities/asm.h"
|
|
||||||
|
|
||||||
#ifdef USE_POSIX
|
#ifdef USE_POSIX
|
||||||
#include <semaphore.h>
|
#include <semaphore.h>
|
||||||
@ -141,7 +140,7 @@ static sync_var* slot_get(std::uintptr_t iptr, sync_var* loc, u64 lv = 0)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the number of leading equal bits to determine subslot
|
// Get the number of leading equal bits to determine subslot
|
||||||
const u64 eq_bits = utils::cntlz64((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16, true);
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
||||||
|
|
||||||
// Proceed recursively, increment level
|
// Proceed recursively, increment level
|
||||||
return slot_get(iptr, s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits, eq_bits + 1);
|
return slot_get(iptr, s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits, eq_bits + 1);
|
||||||
@ -166,7 +165,7 @@ static void slot_free(std::uintptr_t iptr, sync_var* loc, u64 lv = 0)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the number of leading equal bits to determine subslot
|
// Get the number of leading equal bits to determine subslot
|
||||||
const u64 eq_bits = utils::cntlz64((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16, true);
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
||||||
|
|
||||||
// Proceed recursively, to deallocate deepest branch first
|
// Proceed recursively, to deallocate deepest branch first
|
||||||
slot_free(iptr, s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits, eq_bits + 1);
|
slot_free(iptr, s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits, eq_bits + 1);
|
||||||
@ -445,7 +444,7 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the number of leading equal bits (between iptr and slot owner)
|
// Get the number of leading equal bits (between iptr and slot owner)
|
||||||
const u64 eq_bits = utils::cntlz64((((iptr ^ ok) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16, true);
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ ok) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
||||||
|
|
||||||
// Collision; need to go deeper
|
// Collision; need to go deeper
|
||||||
ptr = s_slot_list[(ok & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits;
|
ptr = s_slot_list[(ok & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits;
|
||||||
|
Loading…
Reference in New Issue
Block a user