rsx/vk: Implement asynchronous host memory management.

This commit is contained in:
kd-11 2024-11-25 23:10:29 +03:00 committed by kd-11
parent 64ec19f018
commit 83764fbbb4
10 changed files with 174 additions and 4 deletions

View File

@ -2490,7 +2490,7 @@ namespace rsx
// Invalidate
const address_range tex_range = address_range::start_length(attributes.address, tex_size);
invalidate_range_impl_base(cmd, tex_range, invalidation_cause::read, {}, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, tex_range, invalidation_cause::cause_is_read | invalidation_cause::cause_uses_strict_data_bounds, {}, std::forward<Args>(extras)...);
// Upload from CPU. Note that sRGB conversion is handled in the FS
auto uploaded = upload_image_from_cpu(cmd, tex_range, attributes.width, attributes.height, attributes.depth, tex.get_exact_mipmap_count(), attributes.pitch, attributes.gcm_format,

View File

@ -5,6 +5,7 @@
#include "TextureUtils.h"
#include "Emu/Memory/vm.h"
#include "Emu/RSX/Host/MM.h"
#include "util/vm.hpp"
#include <list>
@ -29,8 +30,7 @@ namespace rsx
{
ensure(range.is_page_range());
//rsx_log.error("memory_protect(0x%x, 0x%x, %x)", static_cast<u32>(range.start), static_cast<u32>(range.length()), static_cast<u32>(prot));
utils::memory_protect(vm::base(range.start), range.length(), prot);
rsx::mm_protect(vm::base(range.start), range.length(), prot);
#ifdef TEXTURE_CACHE_DEBUG
tex_cache_checker.set_protection(range, prot);

104
rpcs3/Emu/RSX/Host/MM.cpp Normal file
View File

@ -0,0 +1,104 @@
#include "stdafx.h"
#include "MM.h"
#include <Emu/RSX/Common/simple_array.hpp>
#include <Emu/RSX/RSXOffload.h>
#include <Emu/Memory/vm.h>
#include <Emu/IdManager.h>
#include <Emu/system_config.h>
#include <Utilities/address_range.h>
#include <Utilities/mutex.h>
namespace rsx
{
rsx::simple_array<MM_block> g_deferred_mprotect_queue;
shared_mutex g_mprotect_queue_lock;
void mm_flush_mprotect_queue_internal()
{
for (const auto& block : g_deferred_mprotect_queue)
{
utils::memory_protect(reinterpret_cast<void*>(block.start), block.length, block.prot);
}
g_deferred_mprotect_queue.clear();
}
void mm_defer_mprotect_internal(u64 start, u64 length, utils::protection prot)
{
// We could stack and merge requests here, but that is more trouble than it is truly worth.
// A fresh call to memory_protect only takes a few nanoseconds of setup overhead, it is not worth the risk of hanging because of conflicts.
g_deferred_mprotect_queue.push_back({ start, length, prot });
}
void mm_protect(void* ptr, u64 length, utils::protection prot)
{
if (!g_cfg.video.async_host_memory_manager)
{
utils::memory_protect(ptr, length, prot);
return;
}
// Naive merge. Eventually it makes more sense to do conflict resolution, but it's not as important.
const auto start = reinterpret_cast<u64>(ptr);
const auto end = start + length;
std::lock_guard lock(g_mprotect_queue_lock);
if (prot == utils::protection::rw || prot == utils::protection::wx)
{
// Basically an unlock op. Flush if any overlap is detected
for (const auto& block : g_deferred_mprotect_queue)
{
if (block.overlaps(start, end))
{
mm_flush_mprotect_queue_internal();
break;
}
}
utils::memory_protect(ptr, length, prot);
return;
}
// No, Ro, etc.
mm_defer_mprotect_internal(start, length, prot);
}
void mm_flush()
{
std::lock_guard lock(g_mprotect_queue_lock);
mm_flush_mprotect_queue_internal();
}
void mm_flush(u32 vm_address)
{
std::lock_guard lock(g_mprotect_queue_lock);
if (g_deferred_mprotect_queue.empty())
{
return;
}
const auto addr = reinterpret_cast<u64>(vm::base(vm_address));
for (const auto& block : g_deferred_mprotect_queue)
{
if (block.overlaps(addr))
{
mm_flush_mprotect_queue_internal();
return;
}
}
}
void mm_flush_lazy()
{
if (!g_cfg.video.multithreaded_rsx)
{
mm_flush();
return;
}
auto& rsxdma = g_fxo->get<rsx::dma_manager>();
rsxdma.backend_ctrl(static_cast<u32>(mm_backend_ctrl::mm_flush), nullptr);
}
}

40
rpcs3/Emu/RSX/Host/MM.h Normal file
View File

@ -0,0 +1,40 @@
#pragma once
#include <util/types.hpp>
#include <util/vm.hpp>
namespace rsx
{
struct MM_block
{
u64 start;
u64 length;
utils::protection prot;
inline bool overlaps(u64 start, u64 end) const
{
// [Start, End] is not a proper closed range, there is an off-by-one by design.
// FIXME: Use address_range64
const u64 this_end = this->start + this->length;
return (this->start < end && start < this_end);
}
inline bool overlaps(u64 addr) const
{
// [Start, End] is not a proper closed range, there is an off-by-one by design.
// FIXME: Use address_range64
const u64 this_end = this->start + this->length;
return (addr >= start && addr < this_end);
}
};
enum class mm_backend_ctrl : u32
{
mm_flush = 0x80000002
};
void mm_protect(void* start, u64 length, utils::protection prot);
void mm_flush_lazy();
void mm_flush(u32 vm_address);
void mm_flush();
}

View File

@ -7,6 +7,9 @@
namespace rsx
{
void mm_flush_lazy();
void mm_flush();
namespace util
{
template <bool FlushDMA, bool FlushPipe>
@ -27,6 +30,7 @@ namespace rsx
if constexpr (FlushDMA)
{
// If the backend handled the request, this call will basically be a NOP
rsx::mm_flush_lazy();
g_fxo->get<rsx::dma_manager>().sync();
}
@ -34,6 +38,7 @@ namespace rsx
{
// Manually flush the pipeline.
// It is possible to stream report writes using the host GPU, but that generates too much submit traffic.
rsx::mm_flush();
RSX(ctx)->sync();
}

View File

@ -9,7 +9,8 @@ namespace vk
enum // callback commands
{
rctrl_queue_submit = 0x80000000,
rctrl_run_gc = 0x80000001
rctrl_run_gc = 0x80000001,
rctrl_mem_protect = 0x80000002,
};
struct submit_packet

View File

@ -15,6 +15,7 @@
#include "vkutils/scratch.h"
#include "Emu/RSX/rsx_methods.h"
#include "Emu/RSX/Host/MM.h"
#include "Emu/RSX/Host/RSXDMAWriter.h"
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
#include "Emu/Memory/vm_locking.h"
@ -1010,6 +1011,8 @@ VKGSRender::~VKGSRender()
bool VKGSRender::on_access_violation(u32 address, bool is_writing)
{
rsx::mm_flush(address);
vk::texture_cache::thrashed_set result;
{
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
@ -2460,6 +2463,9 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
{
ensure(!m_queue_status.test_and_set(flush_queue_state::flushing));
// Host MM sync before executing anything on the GPU
rsx::mm_flush();
// Workaround for deadlock occuring during RSX offloader fault
// TODO: Restructure command submission infrastructure to avoid this condition
const bool sync_success = g_fxo->get<rsx::dma_manager>().sync();
@ -2823,6 +2829,11 @@ void VKGSRender::renderctl(u32 request_code, void* args)
vk::on_event_completed(eid, true);
break;
}
case vk::rctrl_mem_protect:
{
rsx::mm_flush();
break;
}
default:
fmt::throw_exception("Unhandled request code 0x%x", request_code);
}

View File

@ -178,6 +178,7 @@ struct cfg_root : cfg::node
cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console
cfg::_bool host_label_synchronization{ this, "Allow Host GPU Labels", false };
cfg::_bool disable_msl_fast_math{ this, "Disable MSL Fast Math", false };
cfg::_bool async_host_memory_manager{ this, "Asynchronous Host Memory Manager", true, true };
cfg::_enum<output_scaling_mode> output_scaling{ this, "Output Scaling Mode", output_scaling_mode::bilinear, true };
struct node_vk : cfg::node

View File

@ -104,6 +104,7 @@
<ClCompile Include="Emu\perf_monitor.cpp" />
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
<ClCompile Include="Emu\RSX\Host\MM.cpp" />
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\reg_context.cpp" />
@ -621,6 +622,7 @@
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
<ClInclude Include="Emu\RSX\Host\MM.h" />
<ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h" />
<ClInclude Include="Emu\RSX\NV47\FW\draw_call.hpp" />
<ClInclude Include="Emu\RSX\NV47\FW\draw_call.inc.h" />

View File

@ -1312,6 +1312,9 @@
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Host\MM.cpp">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -2644,6 +2647,9 @@
<ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Host\MM.h">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">