rsx: Block-level reservation access

This commit is contained in:
kd-11 2020-09-14 23:38:17 +03:00 committed by kd-11
parent af6fcb19a8
commit 18ca3ed449
6 changed files with 133 additions and 53 deletions

View File

@ -1719,10 +1719,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
auto& cline_data = vm::_ref<spu_rdata_t>(addr);
data += 0;
const auto render = rsx::get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
rsx::reservation_lock rsx_lock(addr, 128);
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);
const bool success = [&]()
@ -1742,7 +1739,6 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
return false;
}();
if (render) render->unpause();
return success;
}

View File

@ -2126,6 +2126,9 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
const auto& to_write = _ref<spu_rdata_t>(args.lsa & 0x3ff80);
auto& res = vm::reservation_acquire(addr, 128);
// TODO: Limit scope!!
rsx::reservation_lock rsx_lock(addr, 128);
if (!g_use_rtm && rtime != res)
{
return false;
@ -2143,10 +2146,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
{
case UINT32_MAX:
{
const auto render = rsx::get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
const bool ok = cpu_thread::suspend_all(this, [&]()
{
if ((res & -128) == rtime)
@ -2165,7 +2164,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
return false;
});
if (render) render->unpause();
return ok;
}
case 0: return false;
@ -2200,10 +2198,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
vm::_ref<atomic_t<u32>>(addr) += 0;
const auto render = rsx::get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);
const bool success = [&]()
{
@ -2222,7 +2216,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
return false;
}();
if (render) render->unpause();
return success;
}())
{
@ -2258,15 +2251,12 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
perf_meter<"STORE128"_u64> perf0;
const auto cpu = get_current_cpu_thread();
rsx::reservation_lock rsx_lock(addr, 128);
if (g_use_rtm) [[likely]]
{
const u32 result = spu_putlluc_tx(addr, to_write, cpu);
const auto render = result != 1 ? rsx::get_rsx_if_needs_res_pause(addr) : nullptr;
if (render) render->pause();
if (result == 0)
{
// Execute with increased priority
@ -2281,7 +2271,6 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
perf_log.warning("STORE128: took too long: %u", result);
}
if (render) render->unpause();
static_cast<void>(cpu->test_stopped());
}
else
@ -2291,10 +2280,6 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
*reinterpret_cast<atomic_t<u32>*>(&data) += 0;
const auto render = rsx::get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);
{
// Full lock (heavyweight)
@ -2303,8 +2288,6 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
mov_rdata(super_data, *static_cast<const spu_rdata_t*>(to_write));
res += 64;
}
if (render) render->unpause();
}
}
@ -2498,6 +2481,7 @@ bool spu_thread::process_mfc_cmd()
alignas(64) spu_rdata_t temp;
u64 ntime;
rsx::reservation_lock rsx_lock(addr, 128);
if (raddr)
{

View File

@ -314,6 +314,10 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f
sys_rsx.warning("sys_rsx_context_iomap(): RSX is not idle while mapping io");
}
// Wait until we have no active RSX locks and reserve iomap for use. Must do so before acquiring vm lock to avoid deadlocks
vm::temporary_unlock(*get_current_cpu_thread());
rsx::reservation_lock<true> rsx_lock(ea, size);
vm::reader_lock rlock;
for (u32 addr = ea, end = ea + size; addr < end; addr += 0x100000)
@ -326,6 +330,7 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f
io >>= 20, ea >>= 20, size >>= 20;
render->pause();
std::scoped_lock lock(g_fxo->get<lv2_rsx_config>()->mutex);
for (u32 i = 0; i < size; i++)
@ -339,6 +344,7 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f
table.io[ea + i].release((io + i) << 20);
}
render->unpause();
return CELL_OK;
}

View File

@ -2397,6 +2397,7 @@ namespace rsx
}
}
rsx::reservation_lock<true> lock(sink, 16);
vm::_ref<atomic_t<CellGcmReportData>>(sink).store({ timestamp(), value, 0});
}
@ -3258,6 +3259,7 @@ namespace rsx
break;
}
rsx::reservation_lock<true> lock(sink, 16);
vm::_ref<atomic_t<CellGcmReportData>>(sink).store({ timestamp, value, 0});
}

View File

@ -44,6 +44,7 @@ namespace rsx
{
std::array<atomic_t<u32>, 4096> ea;
std::array<atomic_t<u32>, 4096> io;
std::array<shared_mutex, 4096> rs;
rsx_iomap_table() noexcept
{
@ -57,6 +58,46 @@ namespace rsx
{
return this->ea[offs >> 20] | (offs & 0xFFFFF);
}
template<bool IsFullLock>
bool lock(u32 addr, u32 len) noexcept
{
if (len <= 1) return false;
const u32 end = addr + len - 1;
for (u32 block = (addr >> 20); block <= (end >> 20); ++block)
{
if constexpr (IsFullLock)
{
rs[block].lock();
}
else
{
rs[block].lock_shared();
}
}
return true;
}
template<bool IsFullLock>
void unlock(u32 addr, u32 len) noexcept
{
ASSERT(len >= 1);
const u32 end = addr + len - 1;
for (u32 block = (addr >> 20); block <= (end >> 20); ++block)
{
if constexpr (IsFullLock)
{
rs[block].unlock();
}
else
{
rs[block].unlock_shared();
}
}
}
};
enum framebuffer_creation_context : u8
@ -966,23 +1007,64 @@ namespace rsx
return g_fxo->get<rsx::thread>();
}
// Returns nullptr if rsx does not need pausing on reservations op, rsx ptr otherwise
inline thread* get_rsx_if_needs_res_pause(u32 addr)
template<bool IsFullLock = false>
class reservation_lock
{
if (!g_cfg.core.rsx_accurate_res_access) [[likely]]
u32 addr = 0, length = 0;
bool locked = false;
inline void lock_range(u32 addr, u32 length)
{
return {};
this->addr = addr;
this->length = length;
auto renderer = get_current_renderer();
this->locked = renderer->iomap_table.lock<IsFullLock>(addr, length);
}
const auto render = get_current_renderer();
ASSUME(render);
if (render->iomap_table.io[addr >> 20].load() == umax) [[likely]]
public:
reservation_lock(u32 addr, u32 length)
{
return {};
if (g_cfg.core.rsx_accurate_res_access &&
addr < constants::local_mem_base)
{
lock_range(addr, length);
}
}
return render;
}
// Multi-range lock. If ranges overlap, the combined range will be acquired.
// If ranges do not overlap, the first range that is in main memory will be acquired.
reservation_lock(u32 dst_addr, u32 dst_length, u32 src_addr, u32 src_length)
{
if (g_cfg.core.rsx_accurate_res_access)
{
const auto range1 = utils::address_range::start_length(dst_addr, dst_length);
const auto range2 = utils::address_range::start_length(src_addr, src_length);
utils::address_range target_range;
if (!range1.overlaps(range2)) [[likely]]
{
target_range = (dst_addr < constants::local_mem_base) ? range1 : range2;
}
else
{
// Very unlikely
target_range = range1.get_min_max(range2);
}
if (target_range.start < constants::local_mem_base)
{
lock_range(target_range.start, target_range.length());
}
}
}
~reservation_lock()
{
if (locked)
{
get_current_renderer()->iomap_table.unlock<IsFullLock>(addr, length);
}
}
};
}

View File

@ -940,10 +940,13 @@ namespace rsx
{
// Bit cast - optimize to mem copy
const auto dst = vm::_ptr<u8>(get_address(dst_offset + (x * 4) + (out_pitch * y), dst_dma, HERE));
const auto src = vm::_ptr<const u8>(get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE));
const auto dst_address = get_address(dst_offset + (x * 4) + (out_pitch * y), dst_dma, HERE);
const auto src_address = get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE);
const auto dst = vm::_ptr<u8>(dst_address);
const auto src = vm::_ptr<const u8>(src_address);
const u32 data_length = count * 4;
auto res = rsx::reservation_lock<true>(dst_address, data_length, src_address, data_length);
if (rsx->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]]
{
@ -971,8 +974,13 @@ namespace rsx
}
case blit_engine::transfer_destination_format::r5g6b5:
{
const auto dst = vm::_ptr<u16>(get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, HERE));
const auto src = vm::_ptr<const u32>(get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE));
const auto dst_address = get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, HERE);
const auto src_address = get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE);
const auto dst = vm::_ptr<u16>(dst_address);
const auto src = vm::_ptr<const u32>(src_address);
const auto data_length = count * 2;
auto res = rsx::reservation_lock<true>(dst_address, data_length, src_address, data_length);
auto convert = [](u32 input) -> u16
{
@ -1162,8 +1170,6 @@ namespace rsx
const u32 src_line_length = (in_w * in_bpp);
//auto res = vm::passive_lock(dst_address, dst_address + (in_pitch * (in_h - 1) + src_line_length));
if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch)))
{
const u32 nb_lines = std::min(clip_h, in_h);
@ -1223,6 +1229,9 @@ namespace rsx
return;
}
// Lock here. RSX cannot execute any locking operations from this point, including ZCULL read barriers
auto res = ::rsx::reservation_lock<true>(dst_address, out_pitch * out_h, src_address, in_pitch * in_h);
if (!g_cfg.video.force_cpu_blit_processing && (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER))
{
blit_src_info src_info = {};
@ -1522,29 +1531,30 @@ namespace rsx
const bool is_block_transfer = (in_pitch == out_pitch && out_pitch + 0u == line_length);
const auto read_address = get_address(src_offset, src_dma, HERE);
const auto write_address = get_address(dst_offset, dst_dma, HERE);
const auto data_length = in_pitch * (line_count - 1) + line_length;
const auto read_length = in_pitch * (line_count - 1) + line_length;
const auto write_length = out_pitch * (line_count - 1) + line_length;
rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length);
if (const auto result = rsx->read_barrier(read_address, data_length, !is_block_transfer);
rsx->invalidate_fragment_program(dst_dma, dst_offset, write_length);
if (const auto result = rsx->read_barrier(read_address, read_length, !is_block_transfer);
result == rsx::result_zcull_intr)
{
// This transfer overlaps will zcull data pool
if (rsx->copy_zcull_stats(read_address, data_length, write_address) == data_length)
if (rsx->copy_zcull_stats(read_address, read_length, write_address) == write_length)
{
// All writes deferred
return;
}
}
//auto res = vm::passive_lock(write_address, data_length + write_address);
auto res = ::rsx::reservation_lock<true>(write_address, write_length, read_address, read_length);
u8 *dst = vm::_ptr<u8>(write_address);
const u8 *src = vm::_ptr<u8>(read_address);
const bool is_overlapping = dst_dma == src_dma && [&]() -> bool
{
const u32 src_max = src_offset + data_length;
const u32 src_max = src_offset + read_length;
const u32 dst_max = dst_offset + (out_pitch * (line_count - 1) + line_length);
return (src_offset >= dst_offset && src_offset < dst_max) ||
(dst_offset >= src_offset && dst_offset < src_max);
@ -1554,7 +1564,7 @@ namespace rsx
{
if (is_block_transfer)
{
std::memmove(dst, src, line_length * line_count);
std::memmove(dst, src, read_length);
}
else
{
@ -1582,7 +1592,7 @@ namespace rsx
{
if (is_block_transfer)
{
std::memcpy(dst, src, line_length * line_count);
std::memcpy(dst, src, read_length);
}
else
{