From 0fa3bcc336b43613ab8d9636e955043b77a0bd6e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 17 Jun 2019 22:59:03 +0300 Subject: [PATCH] rsx: Asynchronous data transfer --- rpcs3/Emu/RSX/RSXThread.cpp | 126 ++++++++++++++++++++++++++- rpcs3/Emu/RSX/RSXThread.h | 56 ++++++++++++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 1 + rpcs3/Emu/RSX/VK/VKHelpers.h | 2 +- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 4 +- rpcs3/Emu/System.h | 1 + rpcs3/Json/tooltips.json | 3 +- rpcs3/rpcs3qt/emu_settings.h | 2 + rpcs3/rpcs3qt/settings_dialog.cpp | 3 + rpcs3/rpcs3qt/settings_dialog.ui | 9 +- 10 files changed, 200 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 7796c6baec..caaed33974 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -43,6 +43,7 @@ namespace rsx { std::function g_access_violation_handler; thread* g_current_renderer = nullptr; + dma_manager g_dma_manager; u32 get_address(u32 offset, u32 location) { @@ -250,6 +251,126 @@ namespace rsx } } + // initialization + void dma_manager::init() + { + m_worker_state = thread_state::created; + m_worker_thread = std::thread([this]() + { + if (!g_cfg.video.multithreaded_rsx) + { + // Abort + return; + } + + if (g_cfg.core.thread_scheduler_enabled) + { + thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx)); + } + + bool idle = false; + while (m_worker_state != thread_state::finished) + { + if (!m_work_queue.empty()) + { + m_queue_mutex.lock(); + auto task = std::move(m_work_queue.front()); + m_work_queue.pop_front(); + m_queue_mutex.unlock(); + + if (idle) + { + thread_ctrl::set_native_priority(0); + idle = false; + } + + switch (task.type) + { + case raw_copy: + memcpy(task.dst, task.src, task.length); + break; + case vector_copy: + memcpy(task.dst, task.opt_storage.data(), task.length); + break; + case index_emulate: + write_index_array_for_non_indexed_non_native_primitive_to_buffer( + reinterpret_cast(task.dst), + static_cast(task.aux_param0), + task.length); + break; + default: + ASSUME(0); + fmt::throw_exception("Unreachable" HERE); + } + } + else + { + idle = true; + thread_ctrl::set_native_priority(-1); + std::this_thread::yield(); + } + } + }); + } + + // General tranport + void dma_manager::copy(void *dst, std::vector& src, u32 length) + { + if (!g_cfg.video.multithreaded_rsx) + { + std::memcpy(dst, src.data(), length); + } + else + { + std::lock_guard lock(m_queue_mutex); + m_work_queue.emplace_back(dst, src, length); + } + } + + void dma_manager::copy(void *dst, void *src, u32 length) + { + if (!g_cfg.video.multithreaded_rsx) + { + std::memcpy(dst, src, length); + } + else + { + std::lock_guard lock(m_queue_mutex); + m_work_queue.emplace_back(dst, src, length); + } + } + + // Vertex utilities + void dma_manager::emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count) + { + if (!g_cfg.video.multithreaded_rsx) + { + write_index_array_for_non_indexed_non_native_primitive_to_buffer( + reinterpret_cast(dst), primitive, count); + } + else + { + std::lock_guard lock(m_queue_mutex); + m_work_queue.emplace_back(dst, primitive, count); + } + } + + // Synchronization + void dma_manager::sync() + { + if (g_cfg.video.multithreaded_rsx) + { + while (!m_work_queue.empty()) + _mm_lfence(); + } + } + + void dma_manager::join() + { + m_worker_state = thread_state::finished; + m_worker_thread.join(); + } + thread::thread() { g_current_renderer = this; @@ -436,6 +557,8 @@ namespace rsx method_registers.init(); + g_dma_manager.init(); + if (!zcull_ctrl) { //Backend did not provide an implementation, provide NULL object @@ -572,6 +695,7 @@ namespace rsx void thread::on_exit() { m_rsx_thread_exiting = true; + g_dma_manager.join(); } void thread::fill_scale_offset_data(void *buffer, bool flip_y) const @@ -2094,7 +2218,7 @@ namespace rsx const u32 data_size = range.second * block.attribute_stride; const u32 vertex_base = range.first * block.attribute_stride; - memcpy(persistent, (char*)vm::base(block.real_offset_address) + vertex_base, data_size); + g_dma_manager.copy(persistent, (char*)vm::base(block.real_offset_address) + vertex_base, data_size); persistent += data_size; } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 644b2db32a..260f66ea56 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -297,6 +297,62 @@ namespace rsx } }; + class dma_manager + { + enum op + { + raw_copy = 0, + vector_copy = 1, + index_emulate = 2 + }; + + struct transport_packet + { + op type; + std::vector opt_storage; + void *src; + void *dst; + u32 length; + u32 aux_param0; + u32 aux_param1; + + transport_packet(void *_dst, void *_src, u32 len) + : src(_src), dst(_dst), length(len), type(op::raw_copy) + {} + + transport_packet(void *_dst, std::vector& _src, u32 len) + : dst(_dst), opt_storage(std::move(_src)), length(len), type(op::vector_copy) + {} + + transport_packet(void *_dst, rsx::primitive_type prim, u32 len) + : dst(_dst), aux_param0(static_cast(prim)), length(len), type(op::index_emulate) + {} + }; + + std::deque m_work_queue; + std::thread m_worker_thread; + std::mutex m_queue_mutex; + thread_state m_worker_state; + + public: + dma_manager() = default; + + // initialization + void init(); + + // General tranport + void copy(void *dst, std::vector& src, u32 length); + void copy(void *dst, void *src, u32 length); + + // Vertex utilities + void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count); + + // Synchronization + void sync(); + void join(); + }; + extern dma_manager g_dma_manager; + struct framebuffer_layout { u16 width; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 980a8f03c5..cf85cbf507 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2137,6 +2137,7 @@ void VKGSRender::clear_surface(u32 mask) void VKGSRender::flush_command_queue(bool hard_sync) { + rsx::g_dma_manager.sync(); close_and_submit_command_buffer(m_current_command_buffer->submit_fence); if (hard_sync) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 88c11d6ac7..fa97b4f228 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -3276,7 +3276,7 @@ public: void unmap(bool force = false) { - if (force || g_cfg.video.disable_vulkan_mem_allocator) + if (force) { if (shadow) shadow->unmap(); diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 6dfd49014d..8da5cfdd31 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -60,7 +60,6 @@ namespace vk namespace { - std::tuple> generate_emulating_index_buffer( const rsx::draw_clause& clause, u32 vertex_count, vk::data_heap& m_index_buffer_ring_info) @@ -71,8 +70,7 @@ namespace VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size); - write_index_array_for_non_indexed_non_native_primitive_to_buffer( - reinterpret_cast(buf), clause.primitive, vertex_count); + rsx::g_dma_manager.emulate_as_indexed(buf, clause.primitive, vertex_count); m_index_buffer_ring_info.unmap(); return std::make_tuple( diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index cdf38b1027..081cc34655 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -461,6 +461,7 @@ struct cfg_root : cfg::node cfg::_bool disable_asynchronous_shader_compiler{this, "Disable Asynchronous Shader Compiler", false}; cfg::_bool strict_texture_flushing{this, "Strict Texture Flushing", false}; cfg::_bool disable_native_float16{this, "Disable native float16 support", false}; + cfg::_bool multithreaded_rsx{this, "Multithreaded RSX", false}; cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames To Draw", 1}; cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames To Skip", 1}; cfg::_int<50, 800> resolution_scale_percent{this, "Resolution Scale", 100}; diff --git a/rpcs3/Json/tooltips.json b/rpcs3/Json/tooltips.json index 3c94bd901a..c58177906c 100644 --- a/rpcs3/Json/tooltips.json +++ b/rpcs3/Json/tooltips.json @@ -120,7 +120,8 @@ "scrictModeRendering": "Enforces strict compliance to the API specification.\nMight result in degraded performance in some games.\nCan resolve rare cases of missing graphics and flickering.\nIf unsure, don't use this option.", "disableVertexCache": "Disables the vertex cache.\nMight resolve missing or flickering graphics output.\nMay degrade performance.", "disableAsyncShaders": "Disables asynchronous shader compilation.\nFixes missing graphics while shaders are compiling but introduces stuttering.\nDisable if you do not want to deal with graphics pop-in, or for testing before filing any bug reports.", - "stretchToDisplayArea": "Overrides the aspect ratio and stretches the image to the full display area." + "stretchToDisplayArea": "Overrides the aspect ratio and stretches the image to the full display area.", + "multithreadedRSX": "Offloads some RSX operations to a secondary thread.\nMay improve performance for some high-core processors.\nMay cause slowdown in some situations due to the extra worker thread load." } }, "gui": { diff --git a/rpcs3/rpcs3qt/emu_settings.h b/rpcs3/rpcs3qt/emu_settings.h index dc869134ba..8c21ed6501 100644 --- a/rpcs3/rpcs3qt/emu_settings.h +++ b/rpcs3/rpcs3qt/emu_settings.h @@ -80,6 +80,7 @@ public: DisableOnDiskShaderCache, DisableVulkanMemAllocator, DisableAsyncShaderCompiler, + MultithreadedRSX, // Performance Overlay PerfOverlayEnabled, @@ -285,6 +286,7 @@ private: { DisableOnDiskShaderCache, { "Video", "Disable On-Disk Shader Cache"}}, { DisableVulkanMemAllocator, { "Video", "Disable Vulkan Memory Allocator"}}, { DisableAsyncShaderCompiler, { "Video", "Disable Asynchronous Shader Compiler"}}, + { MultithreadedRSX, { "Video", "Multithreaded RSX"}}, { AnisotropicFilterOverride, { "Video", "Anisotropic Filter Override"}}, { ResolutionScale, { "Video", "Resolution Scale"}}, { MinimumScalableDimension, { "Video", "Minimum Scalable Dimension"}}, diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index 0505716812..6126a4dfce 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -563,6 +563,9 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: xemu_settings->EnhanceCheckBox(ui->disableVertexCache, emu_settings::DisableVertexCache); SubscribeTooltip(ui->disableVertexCache, json_gpu_main["disableVertexCache"].toString()); + xemu_settings->EnhanceCheckBox(ui->multithreadedRSX, emu_settings::MultithreadedRSX); + SubscribeTooltip(ui->multithreadedRSX, json_gpu_main["multithreadedRSX"].toString()); + xemu_settings->EnhanceCheckBox(ui->disableAsyncShaders, emu_settings::DisableAsyncShaderCompiler); SubscribeTooltip(ui->disableAsyncShaders, json_gpu_main["disableAsyncShaders"].toString()); diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index b7e5415ea5..e750a1037a 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -36,7 +36,7 @@ - 0 + 1 @@ -695,6 +695,13 @@ + + + + Multithreaded RSX + + +