mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-15 22:21:25 +00:00
rsx/vk: Implement asynchronous host memory management.
This commit is contained in:
parent
64ec19f018
commit
83764fbbb4
@ -2490,7 +2490,7 @@ namespace rsx
|
||||
|
||||
// Invalidate
|
||||
const address_range tex_range = address_range::start_length(attributes.address, tex_size);
|
||||
invalidate_range_impl_base(cmd, tex_range, invalidation_cause::read, {}, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(cmd, tex_range, invalidation_cause::cause_is_read | invalidation_cause::cause_uses_strict_data_bounds, {}, std::forward<Args>(extras)...);
|
||||
|
||||
// Upload from CPU. Note that sRGB conversion is handled in the FS
|
||||
auto uploaded = upload_image_from_cpu(cmd, tex_range, attributes.width, attributes.height, attributes.depth, tex.get_exact_mipmap_count(), attributes.pitch, attributes.gcm_format,
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "TextureUtils.h"
|
||||
|
||||
#include "Emu/Memory/vm.h"
|
||||
#include "Emu/RSX/Host/MM.h"
|
||||
#include "util/vm.hpp"
|
||||
|
||||
#include <list>
|
||||
@ -29,8 +30,7 @@ namespace rsx
|
||||
{
|
||||
ensure(range.is_page_range());
|
||||
|
||||
//rsx_log.error("memory_protect(0x%x, 0x%x, %x)", static_cast<u32>(range.start), static_cast<u32>(range.length()), static_cast<u32>(prot));
|
||||
utils::memory_protect(vm::base(range.start), range.length(), prot);
|
||||
rsx::mm_protect(vm::base(range.start), range.length(), prot);
|
||||
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
tex_cache_checker.set_protection(range, prot);
|
||||
|
104
rpcs3/Emu/RSX/Host/MM.cpp
Normal file
104
rpcs3/Emu/RSX/Host/MM.cpp
Normal file
@ -0,0 +1,104 @@
|
||||
#include "stdafx.h"
|
||||
#include "MM.h"
|
||||
#include <Emu/RSX/Common/simple_array.hpp>
|
||||
#include <Emu/RSX/RSXOffload.h>
|
||||
|
||||
#include <Emu/Memory/vm.h>
|
||||
#include <Emu/IdManager.h>
|
||||
#include <Emu/system_config.h>
|
||||
#include <Utilities/address_range.h>
|
||||
#include <Utilities/mutex.h>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
rsx::simple_array<MM_block> g_deferred_mprotect_queue;
|
||||
shared_mutex g_mprotect_queue_lock;
|
||||
|
||||
void mm_flush_mprotect_queue_internal()
|
||||
{
|
||||
for (const auto& block : g_deferred_mprotect_queue)
|
||||
{
|
||||
utils::memory_protect(reinterpret_cast<void*>(block.start), block.length, block.prot);
|
||||
}
|
||||
|
||||
g_deferred_mprotect_queue.clear();
|
||||
}
|
||||
|
||||
void mm_defer_mprotect_internal(u64 start, u64 length, utils::protection prot)
|
||||
{
|
||||
// We could stack and merge requests here, but that is more trouble than it is truly worth.
|
||||
// A fresh call to memory_protect only takes a few nanoseconds of setup overhead, it is not worth the risk of hanging because of conflicts.
|
||||
g_deferred_mprotect_queue.push_back({ start, length, prot });
|
||||
}
|
||||
|
||||
void mm_protect(void* ptr, u64 length, utils::protection prot)
|
||||
{
|
||||
if (!g_cfg.video.async_host_memory_manager)
|
||||
{
|
||||
utils::memory_protect(ptr, length, prot);
|
||||
return;
|
||||
}
|
||||
|
||||
// Naive merge. Eventually it makes more sense to do conflict resolution, but it's not as important.
|
||||
const auto start = reinterpret_cast<u64>(ptr);
|
||||
const auto end = start + length;
|
||||
|
||||
std::lock_guard lock(g_mprotect_queue_lock);
|
||||
|
||||
if (prot == utils::protection::rw || prot == utils::protection::wx)
|
||||
{
|
||||
// Basically an unlock op. Flush if any overlap is detected
|
||||
for (const auto& block : g_deferred_mprotect_queue)
|
||||
{
|
||||
if (block.overlaps(start, end))
|
||||
{
|
||||
mm_flush_mprotect_queue_internal();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
utils::memory_protect(ptr, length, prot);
|
||||
return;
|
||||
}
|
||||
|
||||
// No, Ro, etc.
|
||||
mm_defer_mprotect_internal(start, length, prot);
|
||||
}
|
||||
|
||||
void mm_flush()
|
||||
{
|
||||
std::lock_guard lock(g_mprotect_queue_lock);
|
||||
mm_flush_mprotect_queue_internal();
|
||||
}
|
||||
|
||||
void mm_flush(u32 vm_address)
|
||||
{
|
||||
std::lock_guard lock(g_mprotect_queue_lock);
|
||||
if (g_deferred_mprotect_queue.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const auto addr = reinterpret_cast<u64>(vm::base(vm_address));
|
||||
for (const auto& block : g_deferred_mprotect_queue)
|
||||
{
|
||||
if (block.overlaps(addr))
|
||||
{
|
||||
mm_flush_mprotect_queue_internal();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mm_flush_lazy()
|
||||
{
|
||||
if (!g_cfg.video.multithreaded_rsx)
|
||||
{
|
||||
mm_flush();
|
||||
return;
|
||||
}
|
||||
|
||||
auto& rsxdma = g_fxo->get<rsx::dma_manager>();
|
||||
rsxdma.backend_ctrl(static_cast<u32>(mm_backend_ctrl::mm_flush), nullptr);
|
||||
}
|
||||
}
|
40
rpcs3/Emu/RSX/Host/MM.h
Normal file
40
rpcs3/Emu/RSX/Host/MM.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include <util/vm.hpp>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct MM_block
|
||||
{
|
||||
u64 start;
|
||||
u64 length;
|
||||
utils::protection prot;
|
||||
|
||||
inline bool overlaps(u64 start, u64 end) const
|
||||
{
|
||||
// [Start, End] is not a proper closed range, there is an off-by-one by design.
|
||||
// FIXME: Use address_range64
|
||||
const u64 this_end = this->start + this->length;
|
||||
return (this->start < end && start < this_end);
|
||||
}
|
||||
|
||||
inline bool overlaps(u64 addr) const
|
||||
{
|
||||
// [Start, End] is not a proper closed range, there is an off-by-one by design.
|
||||
// FIXME: Use address_range64
|
||||
const u64 this_end = this->start + this->length;
|
||||
return (addr >= start && addr < this_end);
|
||||
}
|
||||
};
|
||||
|
||||
enum class mm_backend_ctrl : u32
|
||||
{
|
||||
mm_flush = 0x80000002
|
||||
};
|
||||
|
||||
void mm_protect(void* start, u64 length, utils::protection prot);
|
||||
void mm_flush_lazy();
|
||||
void mm_flush(u32 vm_address);
|
||||
void mm_flush();
|
||||
}
|
@ -7,6 +7,9 @@
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
void mm_flush_lazy();
|
||||
void mm_flush();
|
||||
|
||||
namespace util
|
||||
{
|
||||
template <bool FlushDMA, bool FlushPipe>
|
||||
@ -27,6 +30,7 @@ namespace rsx
|
||||
if constexpr (FlushDMA)
|
||||
{
|
||||
// If the backend handled the request, this call will basically be a NOP
|
||||
rsx::mm_flush_lazy();
|
||||
g_fxo->get<rsx::dma_manager>().sync();
|
||||
}
|
||||
|
||||
@ -34,6 +38,7 @@ namespace rsx
|
||||
{
|
||||
// Manually flush the pipeline.
|
||||
// It is possible to stream report writes using the host GPU, but that generates too much submit traffic.
|
||||
rsx::mm_flush();
|
||||
RSX(ctx)->sync();
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,8 @@ namespace vk
|
||||
enum // callback commands
|
||||
{
|
||||
rctrl_queue_submit = 0x80000000,
|
||||
rctrl_run_gc = 0x80000001
|
||||
rctrl_run_gc = 0x80000001,
|
||||
rctrl_mem_protect = 0x80000002,
|
||||
};
|
||||
|
||||
struct submit_packet
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "vkutils/scratch.h"
|
||||
|
||||
#include "Emu/RSX/rsx_methods.h"
|
||||
#include "Emu/RSX/Host/MM.h"
|
||||
#include "Emu/RSX/Host/RSXDMAWriter.h"
|
||||
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||
#include "Emu/Memory/vm_locking.h"
|
||||
@ -1010,6 +1011,8 @@ VKGSRender::~VKGSRender()
|
||||
|
||||
bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
{
|
||||
rsx::mm_flush(address);
|
||||
|
||||
vk::texture_cache::thrashed_set result;
|
||||
{
|
||||
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
|
||||
@ -2460,6 +2463,9 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||
{
|
||||
ensure(!m_queue_status.test_and_set(flush_queue_state::flushing));
|
||||
|
||||
// Host MM sync before executing anything on the GPU
|
||||
rsx::mm_flush();
|
||||
|
||||
// Workaround for deadlock occuring during RSX offloader fault
|
||||
// TODO: Restructure command submission infrastructure to avoid this condition
|
||||
const bool sync_success = g_fxo->get<rsx::dma_manager>().sync();
|
||||
@ -2823,6 +2829,11 @@ void VKGSRender::renderctl(u32 request_code, void* args)
|
||||
vk::on_event_completed(eid, true);
|
||||
break;
|
||||
}
|
||||
case vk::rctrl_mem_protect:
|
||||
{
|
||||
rsx::mm_flush();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fmt::throw_exception("Unhandled request code 0x%x", request_code);
|
||||
}
|
||||
|
@ -178,6 +178,7 @@ struct cfg_root : cfg::node
|
||||
cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console
|
||||
cfg::_bool host_label_synchronization{ this, "Allow Host GPU Labels", false };
|
||||
cfg::_bool disable_msl_fast_math{ this, "Disable MSL Fast Math", false };
|
||||
cfg::_bool async_host_memory_manager{ this, "Asynchronous Host Memory Manager", true, true };
|
||||
cfg::_enum<output_scaling_mode> output_scaling{ this, "Output Scaling Mode", output_scaling_mode::bilinear, true };
|
||||
|
||||
struct node_vk : cfg::node
|
||||
|
@ -104,6 +104,7 @@
|
||||
<ClCompile Include="Emu\perf_monitor.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Host\MM.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
|
||||
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
|
||||
<ClCompile Include="Emu\RSX\NV47\FW\reg_context.cpp" />
|
||||
@ -621,6 +622,7 @@
|
||||
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
|
||||
<ClInclude Include="Emu\RSX\Host\MM.h" />
|
||||
<ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h" />
|
||||
<ClInclude Include="Emu\RSX\NV47\FW\draw_call.hpp" />
|
||||
<ClInclude Include="Emu\RSX\NV47\FW\draw_call.inc.h" />
|
||||
|
@ -1312,6 +1312,9 @@
|
||||
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp">
|
||||
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Emu\RSX\Host\MM.cpp">
|
||||
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Crypto\aes.h">
|
||||
@ -2644,6 +2647,9 @@
|
||||
<ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h">
|
||||
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Host\MM.h">
|
||||
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">
|
||||
|
Loading…
x
Reference in New Issue
Block a user