RSX/SPU: Accurate reservation access

This commit is contained in:
Eladash 2020-02-11 23:36:46 +02:00 committed by Ani
parent 57a9844279
commit df8d0cde4a
9 changed files with 129 additions and 36 deletions

View File

@ -5,6 +5,7 @@
#include "Emu/Memory/vm_reservation.h"
#include "Emu/IdManager.h"
#include "Emu/RSX/RSXThread.h"
#include "Emu/Cell/PPUThread.h"
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/lv2/sys_spu.h"
@ -67,6 +68,26 @@ static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const declt
}
}
// Returns nullptr if rsx does not need pausing on reservations op, rsx ptr otherwise
static FORCE_INLINE rsx::thread* get_rsx_if_needs_res_pause(u32 addr)
{
if (!g_cfg.core.rsx_accurate_res_access) [[likely]]
{
return {};
}
const auto render = rsx::get_current_renderer();
ASSUME(render);
if (render->iomap_table.io[addr >> 20] == -1) [[likely]]
{
return {};
}
return render;
}
extern u64 get_timebased_time();
extern u64 get_system_time();
@ -1655,12 +1676,20 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
if (g_cfg.core.spu_accurate_putlluc)
{
// Full lock (heavyweight)
// TODO: vm::check_addr
const auto render = get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
auto& super_data = *vm::get_super_ptr<decltype(rdata)>(addr);
vm::writer_lock lock(addr);
mov_rdata(super_data, to_write);
res.release(res.load() + 127);
{
// Full lock (heavyweight)
// TODO: vm::check_addr
vm::writer_lock lock(addr);
mov_rdata(super_data, to_write);
res.release(res.load() + 127);
}
if (render) render->unpause();
}
else
{
@ -1868,15 +1897,23 @@ bool spu_thread::process_mfc_cmd()
if (g_cfg.core.spu_accurate_getllar)
{
*reinterpret_cast<atomic_t<u32>*>(&data) += 0;
const auto render = get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
const auto& super_data = *vm::get_super_ptr<decltype(rdata)>(addr);
{
// Full lock (heavyweight)
// TODO: vm::check_addr
vm::writer_lock lock(addr);
// Full lock (heavyweight)
// TODO: vm::check_addr
vm::writer_lock lock(addr);
ntime = old_time;
mov_rdata(dst, super_data);
res.release(old_time);
}
ntime = old_time;
mov_rdata(dst, super_data);
res.release(old_time);
if (render) render->unpause();
}
else
{
@ -1969,22 +2006,29 @@ bool spu_thread::process_mfc_cmd()
{
*reinterpret_cast<atomic_t<u32>*>(&data) += 0;
const auto render = get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
auto& super_data = *vm::get_super_ptr<decltype(rdata)>(addr);
// Full lock (heavyweight)
// TODO: vm::check_addr
vm::writer_lock lock(addr);
if (cmp_rdata(rdata, super_data))
{
mov_rdata(super_data, to_write);
res.release(old_time + 128);
result = 1;
}
else
{
res.release(old_time);
// Full lock (heavyweight)
// TODO: vm::check_addr
vm::writer_lock lock(addr);
if (cmp_rdata(rdata, super_data))
{
mov_rdata(super_data, to_write);
res.release(old_time + 128);
result = 1;
}
else
{
res.release(old_time);
}
}
if (render) render->unpause();
}
else
{

View File

@ -615,9 +615,7 @@ namespace rsx
// Wait for external pause events
if (external_interrupt_lock)
{
external_interrupt_ack.store(true);
while (external_interrupt_lock) _mm_pause();
wait_pause();
}
// Note a possible rollback address
@ -2468,10 +2466,7 @@ namespace rsx
//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
void thread::pause()
{
while (external_interrupt_lock.exchange(true)) [[unlikely]]
{
_mm_pause();
}
external_interrupt_lock++;
while (!external_interrupt_ack)
{
@ -2480,14 +2475,34 @@ namespace rsx
_mm_pause();
}
external_interrupt_ack.store(false);
}
void thread::unpause()
{
// TODO: Clean this shit up
external_interrupt_lock.store(false);
external_interrupt_lock--;
}
void thread::wait_pause()
{
do
{
if (g_cfg.video.multithreaded_rsx)
{
g_dma_manager.sync();
}
external_interrupt_ack.store(true);
while (external_interrupt_lock)
{
// TODO: Investigate non busy-spinning method
_mm_pause();
}
external_interrupt_ack.store(false);
}
while (external_interrupt_lock);
}
u32 thread::get_load()

View File

@ -599,7 +599,7 @@ namespace rsx
RsxDmaControl* ctrl = nullptr;
rsx_iomap_table iomap_table;
u32 restore_point = 0;
atomic_t<bool> external_interrupt_lock{ false };
atomic_t<u32> external_interrupt_lock{ 0 };
atomic_t<bool> external_interrupt_ack{ false };
void flush_fifo();
void recover_fifo();
@ -899,6 +899,7 @@ namespace rsx
void pause();
void unpause();
void wait_pause();
// Get RSX approximate load in %
u32 get_load();

View File

@ -98,17 +98,26 @@ namespace rsx
if (Emu.IsStopped())
return;
// Wait for external pause events
if (rsx->external_interrupt_lock)
{
rsx->wait_pause();
continue;
}
if (const auto tdr = static_cast<u64>(g_cfg.video.driver_recovery_timeout))
{
if (Emu.IsPaused())
{
const u64 start0 = get_system_time();
while (Emu.IsPaused())
{
std::this_thread::sleep_for(1ms);
}
// Reset
start = get_system_time();
start += get_system_time() - start0;
}
else
{
@ -827,6 +836,8 @@ namespace rsx
const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x * write_len);
u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + (index * write_len), method_registers.blit_engine_output_location_nv3062(), HERE);
//auto res = vm::passive_lock(address, address + write_len);
switch (write_len)
{
case 4:
@ -839,6 +850,7 @@ namespace rsx
fmt::throw_exception("Unreachable" HERE);
}
//res->release(0);
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
}
};
@ -980,6 +992,9 @@ namespace rsx
const u32 dst_address = get_address(dst_offset, dst_dma, HERE);
const u32 src_line_length = (in_w * in_bpp);
//auto res = vm::passive_lock(dst_address, dst_address + (in_pitch * (in_h - 1) + src_line_length));
if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch)))
{
const u32 nb_lines = std::min(clip_h, in_h);
@ -1347,6 +1362,8 @@ namespace rsx
}
}
//auto res = vm::passive_lock(write_address, data_length + write_address);
u8 *dst = vm::_ptr<u8>(write_address);
const u8 *src = vm::_ptr<u8>(read_address);
@ -1402,6 +1419,8 @@ namespace rsx
}
}
}
//res->release(0);
}
}

View File

@ -38,6 +38,7 @@ struct cfg_root : cfg::node
cfg::_enum<spu_block_size_type> spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe };
cfg::_bool spu_accurate_getllar{ this, "Accurate GETLLAR", false };
cfg::_bool spu_accurate_putlluc{ this, "Accurate PUTLLUC", false };
cfg::_bool rsx_accurate_res_access{this, "Accurate RSX reservation access", false, true};
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
cfg::_bool spu_cache{ this, "SPU Cache", true };
cfg::_bool spu_prof{ this, "SPU Profiler", false };

View File

@ -41,6 +41,7 @@ public:
EnableTSX,
AccurateGETLLAR,
AccuratePUTLLUC,
AccurateRSXAccess,
AccurateXFloat,
SetDAZandFTZ,
SPUBlockSize,
@ -274,6 +275,7 @@ private:
{ EnableTSX, { "Core", "Enable TSX"}},
{ AccurateGETLLAR, { "Core", "Accurate GETLLAR"}},
{ AccuratePUTLLUC, { "Core", "Accurate PUTLLUC"}},
{ AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
{ AccurateXFloat, { "Core", "Accurate xfloat"}},
{ SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},
{ SPUBlockSize, { "Core", "SPU Block Size"}},

View File

@ -1668,6 +1668,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> guiSettings, std:
xemu_settings->EnhanceCheckBox(ui->accuratePUTLLUC, emu_settings::AccuratePUTLLUC);
SubscribeTooltip(ui->accuratePUTLLUC, tooltips.settings.accurate_putlluc);
xemu_settings->EnhanceCheckBox(ui->accurateRSXAccess, emu_settings::AccurateRSXAccess);
SubscribeTooltip(ui->accurateRSXAccess, tooltips.settings.accurate_rsx_access);
xemu_settings->EnhanceCheckBox(ui->hookStFunc, emu_settings::HookStaticFuncs);
SubscribeTooltip(ui->hookStFunc, tooltips.settings.hook_static_functions);

View File

@ -3137,6 +3137,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="accurateRSXAccess">
<property name="text">
<string>Accurate RSX reservation access</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="hookStFunc">
<property name="text">

View File

@ -78,6 +78,7 @@ public:
const QString set_daz_and_ftz = tr("Never use this.");
const QString accurate_getllar = tr("Never use this.");
const QString accurate_putlluc = tr("Never use this.");
const QString accurate_rsx_access = tr("Never use this.");
const QString hook_static_functions = tr("Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental.");
const QString gl_legacy_buffers = tr("Enables use of classic OpenGL buffers which allows capturing tools to work with RPCS3 e.g RenderDoc.\nIf unsure, don't use this option.");
const QString force_high_pz = tr("Only useful when debugging differences in GPU hardware.\nNot necessary for average users.\nIf unsure, don't use this option.");