mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-29 22:20:48 +00:00
vm: refactor vm::range_lock again
Move bits to the highest, set RWX order. Use only one reserved value (W = locked). Assume lock size 128 for range_locked. Add new "Size" template argument that replaces normal argument.
This commit is contained in:
parent
bacfa9be19
commit
b68bdafadc
@ -2167,28 +2167,28 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
{
|
{
|
||||||
vm::range_lock(range_lock, eal, 1);
|
vm::range_lock<true, 1>(range_lock, eal, 1);
|
||||||
*reinterpret_cast<u8*>(dst) = *reinterpret_cast<const u8*>(src);
|
*reinterpret_cast<u8*>(dst) = *reinterpret_cast<const u8*>(src);
|
||||||
range_lock->release(0);
|
range_lock->release(0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 2:
|
case 2:
|
||||||
{
|
{
|
||||||
vm::range_lock(range_lock, eal, 2);
|
vm::range_lock<true, 2>(range_lock, eal, 2);
|
||||||
*reinterpret_cast<u16*>(dst) = *reinterpret_cast<const u16*>(src);
|
*reinterpret_cast<u16*>(dst) = *reinterpret_cast<const u16*>(src);
|
||||||
range_lock->release(0);
|
range_lock->release(0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 4:
|
case 4:
|
||||||
{
|
{
|
||||||
vm::range_lock(range_lock, eal, 4);
|
vm::range_lock<true, 4>(range_lock, eal, 4);
|
||||||
*reinterpret_cast<u32*>(dst) = *reinterpret_cast<const u32*>(src);
|
*reinterpret_cast<u32*>(dst) = *reinterpret_cast<const u32*>(src);
|
||||||
range_lock->release(0);
|
range_lock->release(0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 8:
|
case 8:
|
||||||
{
|
{
|
||||||
vm::range_lock(range_lock, eal, 8);
|
vm::range_lock<true, 8>(range_lock, eal, 8);
|
||||||
*reinterpret_cast<u64*>(dst) = *reinterpret_cast<const u64*>(src);
|
*reinterpret_cast<u64*>(dst) = *reinterpret_cast<const u64*>(src);
|
||||||
range_lock->release(0);
|
range_lock->release(0);
|
||||||
break;
|
break;
|
||||||
|
@ -167,17 +167,24 @@ namespace vm
|
|||||||
{
|
{
|
||||||
const u64 lock_val = g_range_lock.load();
|
const u64 lock_val = g_range_lock.load();
|
||||||
const u64 is_shared = g_shareable[begin >> 16].load();
|
const u64 is_shared = g_shareable[begin >> 16].load();
|
||||||
const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
|
|
||||||
const u32 lock_size = static_cast<u32>(lock_val >> 35);
|
u64 lock_addr = static_cast<u32>(lock_val); // -> u64
|
||||||
|
u32 lock_size = static_cast<u32>(lock_val << range_bits >> (range_bits + 32));
|
||||||
|
|
||||||
u64 addr = begin;
|
u64 addr = begin;
|
||||||
|
|
||||||
if (is_shared)
|
if ((lock_val & range_full_mask) == range_locked) [[likely]]
|
||||||
{
|
{
|
||||||
addr = addr & 0xffff;
|
lock_size = 128;
|
||||||
|
|
||||||
|
if (is_shared)
|
||||||
|
{
|
||||||
|
addr = addr & 0xffff;
|
||||||
|
lock_addr = lock_val << 3 >> 3;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((lock_val & range_full_mask) != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
||||||
{
|
{
|
||||||
range_lock->store(begin | (u64{size} << 32));
|
range_lock->store(begin | (u64{size} << 32));
|
||||||
|
|
||||||
@ -249,11 +256,11 @@ namespace vm
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
FORCE_INLINE static u64 for_all_range_locks(F func)
|
FORCE_INLINE static u64 for_all_range_locks(u64 input, F func)
|
||||||
{
|
{
|
||||||
u64 result = 0;
|
u64 result = input;
|
||||||
|
|
||||||
for (u64 bits = g_range_lock_bits.load(); bits; bits &= bits - 1)
|
for (u64 bits = input; bits; bits &= bits - 1)
|
||||||
{
|
{
|
||||||
const u32 id = std::countr_zero(bits);
|
const u32 id = std::countr_zero(bits);
|
||||||
|
|
||||||
@ -263,8 +270,13 @@ namespace vm
|
|||||||
{
|
{
|
||||||
const u32 addr = static_cast<u32>(lock_val);
|
const u32 addr = static_cast<u32>(lock_val);
|
||||||
|
|
||||||
result += func(addr, size);
|
if (func(addr, size)) [[unlikely]]
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result &= ~(1ull << id);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -287,20 +299,20 @@ namespace vm
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Block or signal new range locks
|
// Block or signal new range locks
|
||||||
g_range_lock = addr | u64{size} << 35 | flags;
|
g_range_lock = addr | u64{size} << 32 | flags;
|
||||||
|
|
||||||
|
_mm_prefetch(g_range_lock_set + 0, _MM_HINT_T0);
|
||||||
|
_mm_prefetch(g_range_lock_set + 2, _MM_HINT_T0);
|
||||||
|
_mm_prefetch(g_range_lock_set + 4, _MM_HINT_T0);
|
||||||
|
|
||||||
const auto range = utils::address_range::start_length(addr, size);
|
const auto range = utils::address_range::start_length(addr, size);
|
||||||
|
|
||||||
while (true)
|
u64 to_clear = g_range_lock_bits.load();
|
||||||
{
|
|
||||||
const u64 bads = for_all_range_locks([&](u32 addr2, u32 size2)
|
|
||||||
{
|
|
||||||
// TODO (currently not possible): handle 2 64K pages (inverse range), or more pages
|
|
||||||
if (g_shareable[addr2 >> 16])
|
|
||||||
{
|
|
||||||
addr2 &= 0xffff;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
while (to_clear)
|
||||||
|
{
|
||||||
|
to_clear = for_all_range_locks(to_clear, [&](u32 addr2, u32 size2)
|
||||||
|
{
|
||||||
ASSUME(size2);
|
ASSUME(size2);
|
||||||
|
|
||||||
if (range.overlaps(utils::address_range::start_length(addr2, size2))) [[unlikely]]
|
if (range.overlaps(utils::address_range::start_length(addr2, size2))) [[unlikely]]
|
||||||
@ -311,7 +323,7 @@ namespace vm
|
|||||||
return 0;
|
return 0;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!bads) [[likely]]
|
if (!to_clear) [[likely]]
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -477,22 +489,28 @@ namespace vm
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_shareable[addr >> 16])
|
if (g_shareable[addr >> 16]) [[unlikely]]
|
||||||
{
|
{
|
||||||
// Reservation address in shareable memory range
|
// Reservation address in shareable memory range
|
||||||
addr = addr & 0xffff;
|
addr = addr & 0xffff;
|
||||||
}
|
}
|
||||||
|
|
||||||
g_range_lock = addr | (u64{128} << 35) | range_locked;
|
g_range_lock = addr | range_locked;
|
||||||
|
|
||||||
|
_mm_prefetch(g_range_lock_set + 0, _MM_HINT_T0);
|
||||||
|
_mm_prefetch(g_range_lock_set + 2, _MM_HINT_T0);
|
||||||
|
_mm_prefetch(g_range_lock_set + 4, _MM_HINT_T0);
|
||||||
|
|
||||||
const auto range = utils::address_range::start_length(addr, 128);
|
const auto range = utils::address_range::start_length(addr, 128);
|
||||||
|
|
||||||
|
u64 to_clear = g_range_lock_bits.load();
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
const u64 bads = for_all_range_locks([&](u32 addr2, u32 size2)
|
to_clear = for_all_range_locks(to_clear, [&](u32 addr2, u32 size2)
|
||||||
{
|
{
|
||||||
// TODO (currently not possible): handle 2 64K pages (inverse range), or more pages
|
// TODO (currently not possible): handle 2 64K pages (inverse range), or more pages
|
||||||
if (g_shareable[addr2 >> 16])
|
if (g_shareable[addr2 >> 16]) [[unlikely]]
|
||||||
{
|
{
|
||||||
addr2 &= 0xffff;
|
addr2 &= 0xffff;
|
||||||
}
|
}
|
||||||
@ -507,7 +525,7 @@ namespace vm
|
|||||||
return 0;
|
return 0;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!bads) [[likely]]
|
if (!to_clear) [[likely]]
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -13,18 +13,20 @@ namespace vm
|
|||||||
|
|
||||||
enum range_lock_flags : u64
|
enum range_lock_flags : u64
|
||||||
{
|
{
|
||||||
/* flags (3 bits) */
|
/* flags (3 bits, RWX) */
|
||||||
|
|
||||||
range_readable = 1ull << 32,
|
range_readable = 4ull << 61,
|
||||||
range_writable = 2ull << 32,
|
range_writable = 2ull << 61,
|
||||||
range_executable = 4ull << 32,
|
range_executable = 1ull << 61,
|
||||||
range_full_mask = 7ull << 32,
|
range_full_mask = 7ull << 61,
|
||||||
|
|
||||||
/* flag combinations with special meaning */
|
/* flag combinations with special meaning */
|
||||||
|
|
||||||
range_normal = 3ull << 32, // R+W, testing as mask for zero can check no access
|
range_locked = 1ull << 61, // R+W as well, but being exclusively accessed (size extends addr)
|
||||||
range_locked = 2ull << 32, // R+W as well, the only range flag that should block by address
|
|
||||||
range_allocation = 0, // Allocation, no safe access, g_shareable may change at ANY location
|
range_allocation = 0, // Allocation, no safe access, g_shareable may change at ANY location
|
||||||
|
|
||||||
|
range_pos = 61,
|
||||||
|
range_bits = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
extern atomic_t<u64> g_range_lock;
|
extern atomic_t<u64> g_range_lock;
|
||||||
@ -40,27 +42,35 @@ namespace vm
|
|||||||
void range_lock_internal(atomic_t<u64, 64>* range_lock, u32 begin, u32 size);
|
void range_lock_internal(atomic_t<u64, 64>* range_lock, u32 begin, u32 size);
|
||||||
|
|
||||||
// Lock memory range
|
// Lock memory range
|
||||||
template <bool TouchMem = true>
|
template <bool TouchMem = true, uint Size = 0>
|
||||||
FORCE_INLINE void range_lock(atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
|
FORCE_INLINE void range_lock(atomic_t<u64, 64>* range_lock, u32 begin, u32 _size)
|
||||||
{
|
{
|
||||||
|
const u32 size = Size ? Size : _size;
|
||||||
const u64 lock_val = g_range_lock.load();
|
const u64 lock_val = g_range_lock.load();
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
__asm__(""); // Tiny barrier
|
__asm__(""); // Tiny barrier
|
||||||
#endif
|
#endif
|
||||||
const u64 is_shared = g_shareable[begin >> 16].load();
|
const u64 is_shared = g_shareable[begin >> 16].load();
|
||||||
const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
|
|
||||||
const u32 lock_size = static_cast<u32>(lock_val >> 35);
|
u64 lock_addr = static_cast<u32>(lock_val); // -> u64
|
||||||
|
u32 lock_size = static_cast<u32>(lock_val << range_bits >> (32 + range_bits));
|
||||||
|
|
||||||
u64 addr = begin;
|
u64 addr = begin;
|
||||||
|
|
||||||
// Optimization: if range_locked is not used, the addr check will always pass
|
// Optimization: if range_locked is not used, the addr check will always pass
|
||||||
// Otherwise, g_shareable is unchanged and its value is reliable to read
|
// Otherwise, g_shareable is unchanged and its value is reliable to read
|
||||||
if (is_shared)
|
if ((lock_val >> range_pos) == (range_locked >> range_pos)) [[likely]]
|
||||||
{
|
{
|
||||||
addr = addr & 0xffff;
|
lock_size = 128;
|
||||||
|
|
||||||
|
if (TouchMem && is_shared) [[unlikely]]
|
||||||
|
{
|
||||||
|
addr = addr & 0xffff;
|
||||||
|
lock_addr = lock_val << range_bits >> range_bits;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (addr + size <= lock_addr || addr >= lock_addr + lock_size || (TouchMem && ((lock_val >> 32) ^ (range_locked >> 32)) & (range_full_mask >> 32))) [[likely]]
|
if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
||||||
{
|
{
|
||||||
// Optimistic locking.
|
// Optimistic locking.
|
||||||
// Note that we store the range we will be accessing, without any clamping.
|
// Note that we store the range we will be accessing, without any clamping.
|
||||||
@ -77,7 +87,7 @@ namespace vm
|
|||||||
range_lock->release(0);
|
range_lock->release(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user