rsx: Asynchronous data transfer

This commit is contained in:
kd-11 2019-06-17 22:59:03 +03:00 committed by kd-11
parent 358169507c
commit 0fa3bcc336
10 changed files with 200 additions and 7 deletions

View File

@ -43,6 +43,7 @@ namespace rsx
{
std::function<bool(u32 addr, bool is_writing)> g_access_violation_handler;
thread* g_current_renderer = nullptr;
dma_manager g_dma_manager;
u32 get_address(u32 offset, u32 location)
{
@ -250,6 +251,126 @@ namespace rsx
}
}
// initialization
void dma_manager::init()
{
m_worker_state = thread_state::created;
m_worker_thread = std::thread([this]()
{
if (!g_cfg.video.multithreaded_rsx)
{
// Abort
return;
}
if (g_cfg.core.thread_scheduler_enabled)
{
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx));
}
bool idle = false;
while (m_worker_state != thread_state::finished)
{
if (!m_work_queue.empty())
{
m_queue_mutex.lock();
auto task = std::move(m_work_queue.front());
m_work_queue.pop_front();
m_queue_mutex.unlock();
if (idle)
{
thread_ctrl::set_native_priority(0);
idle = false;
}
switch (task.type)
{
case raw_copy:
memcpy(task.dst, task.src, task.length);
break;
case vector_copy:
memcpy(task.dst, task.opt_storage.data(), task.length);
break;
case index_emulate:
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
reinterpret_cast<char*>(task.dst),
static_cast<rsx::primitive_type>(task.aux_param0),
task.length);
break;
default:
ASSUME(0);
fmt::throw_exception("Unreachable" HERE);
}
}
else
{
idle = true;
thread_ctrl::set_native_priority(-1);
std::this_thread::yield();
}
}
});
}
// General tranport
void dma_manager::copy(void *dst, std::vector<u8>& src, u32 length)
{
if (!g_cfg.video.multithreaded_rsx)
{
std::memcpy(dst, src.data(), length);
}
else
{
std::lock_guard lock(m_queue_mutex);
m_work_queue.emplace_back(dst, src, length);
}
}
void dma_manager::copy(void *dst, void *src, u32 length)
{
if (!g_cfg.video.multithreaded_rsx)
{
std::memcpy(dst, src, length);
}
else
{
std::lock_guard lock(m_queue_mutex);
m_work_queue.emplace_back(dst, src, length);
}
}
// Vertex utilities
void dma_manager::emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count)
{
if (!g_cfg.video.multithreaded_rsx)
{
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
reinterpret_cast<char*>(dst), primitive, count);
}
else
{
std::lock_guard lock(m_queue_mutex);
m_work_queue.emplace_back(dst, primitive, count);
}
}
// Synchronization
void dma_manager::sync()
{
if (g_cfg.video.multithreaded_rsx)
{
while (!m_work_queue.empty())
_mm_lfence();
}
}
void dma_manager::join()
{
m_worker_state = thread_state::finished;
m_worker_thread.join();
}
thread::thread()
{
g_current_renderer = this;
@ -436,6 +557,8 @@ namespace rsx
method_registers.init();
g_dma_manager.init();
if (!zcull_ctrl)
{
//Backend did not provide an implementation, provide NULL object
@ -572,6 +695,7 @@ namespace rsx
void thread::on_exit()
{
m_rsx_thread_exiting = true;
g_dma_manager.join();
}
void thread::fill_scale_offset_data(void *buffer, bool flip_y) const
@ -2094,7 +2218,7 @@ namespace rsx
const u32 data_size = range.second * block.attribute_stride;
const u32 vertex_base = range.first * block.attribute_stride;
memcpy(persistent, (char*)vm::base(block.real_offset_address) + vertex_base, data_size);
g_dma_manager.copy(persistent, (char*)vm::base(block.real_offset_address) + vertex_base, data_size);
persistent += data_size;
}
}

View File

@ -297,6 +297,62 @@ namespace rsx
}
};
class dma_manager
{
enum op
{
raw_copy = 0,
vector_copy = 1,
index_emulate = 2
};
struct transport_packet
{
op type;
std::vector<u8> opt_storage;
void *src;
void *dst;
u32 length;
u32 aux_param0;
u32 aux_param1;
transport_packet(void *_dst, void *_src, u32 len)
: src(_src), dst(_dst), length(len), type(op::raw_copy)
{}
transport_packet(void *_dst, std::vector<u8>& _src, u32 len)
: dst(_dst), opt_storage(std::move(_src)), length(len), type(op::vector_copy)
{}
transport_packet(void *_dst, rsx::primitive_type prim, u32 len)
: dst(_dst), aux_param0(static_cast<u8>(prim)), length(len), type(op::index_emulate)
{}
};
std::deque<transport_packet> m_work_queue;
std::thread m_worker_thread;
std::mutex m_queue_mutex;
thread_state m_worker_state;
public:
dma_manager() = default;
// initialization
void init();
// General tranport
void copy(void *dst, std::vector<u8>& src, u32 length);
void copy(void *dst, void *src, u32 length);
// Vertex utilities
void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count);
// Synchronization
void sync();
void join();
};
extern dma_manager g_dma_manager;
struct framebuffer_layout
{
u16 width;

View File

@ -2137,6 +2137,7 @@ void VKGSRender::clear_surface(u32 mask)
void VKGSRender::flush_command_queue(bool hard_sync)
{
rsx::g_dma_manager.sync();
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
if (hard_sync)

View File

@ -3276,7 +3276,7 @@ public:
void unmap(bool force = false)
{
if (force || g_cfg.video.disable_vulkan_mem_allocator)
if (force)
{
if (shadow)
shadow->unmap();

View File

@ -60,7 +60,6 @@ namespace vk
namespace
{
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType>> generate_emulating_index_buffer(
const rsx::draw_clause& clause, u32 vertex_count,
vk::data_heap& m_index_buffer_ring_info)
@ -71,8 +70,7 @@ namespace
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
reinterpret_cast<char*>(buf), clause.primitive, vertex_count);
rsx::g_dma_manager.emulate_as_indexed(buf, clause.primitive, vertex_count);
m_index_buffer_ring_info.unmap();
return std::make_tuple(

View File

@ -461,6 +461,7 @@ struct cfg_root : cfg::node
cfg::_bool disable_asynchronous_shader_compiler{this, "Disable Asynchronous Shader Compiler", false};
cfg::_bool strict_texture_flushing{this, "Strict Texture Flushing", false};
cfg::_bool disable_native_float16{this, "Disable native float16 support", false};
cfg::_bool multithreaded_rsx{this, "Multithreaded RSX", false};
cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames To Draw", 1};
cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames To Skip", 1};
cfg::_int<50, 800> resolution_scale_percent{this, "Resolution Scale", 100};

View File

@ -120,7 +120,8 @@
"scrictModeRendering": "Enforces strict compliance to the API specification.\nMight result in degraded performance in some games.\nCan resolve rare cases of missing graphics and flickering.\nIf unsure, don't use this option.",
"disableVertexCache": "Disables the vertex cache.\nMight resolve missing or flickering graphics output.\nMay degrade performance.",
"disableAsyncShaders": "Disables asynchronous shader compilation.\nFixes missing graphics while shaders are compiling but introduces stuttering.\nDisable if you do not want to deal with graphics pop-in, or for testing before filing any bug reports.",
"stretchToDisplayArea": "Overrides the aspect ratio and stretches the image to the full display area."
"stretchToDisplayArea": "Overrides the aspect ratio and stretches the image to the full display area.",
"multithreadedRSX": "Offloads some RSX operations to a secondary thread.\nMay improve performance for some high-core processors.\nMay cause slowdown in some situations due to the extra worker thread load."
}
},
"gui": {

View File

@ -80,6 +80,7 @@ public:
DisableOnDiskShaderCache,
DisableVulkanMemAllocator,
DisableAsyncShaderCompiler,
MultithreadedRSX,
// Performance Overlay
PerfOverlayEnabled,
@ -285,6 +286,7 @@ private:
{ DisableOnDiskShaderCache, { "Video", "Disable On-Disk Shader Cache"}},
{ DisableVulkanMemAllocator, { "Video", "Disable Vulkan Memory Allocator"}},
{ DisableAsyncShaderCompiler, { "Video", "Disable Asynchronous Shader Compiler"}},
{ MultithreadedRSX, { "Video", "Multithreaded RSX"}},
{ AnisotropicFilterOverride, { "Video", "Anisotropic Filter Override"}},
{ ResolutionScale, { "Video", "Resolution Scale"}},
{ MinimumScalableDimension, { "Video", "Minimum Scalable Dimension"}},

View File

@ -563,6 +563,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> guiSettings, std:
xemu_settings->EnhanceCheckBox(ui->disableVertexCache, emu_settings::DisableVertexCache);
SubscribeTooltip(ui->disableVertexCache, json_gpu_main["disableVertexCache"].toString());
xemu_settings->EnhanceCheckBox(ui->multithreadedRSX, emu_settings::MultithreadedRSX);
SubscribeTooltip(ui->multithreadedRSX, json_gpu_main["multithreadedRSX"].toString());
xemu_settings->EnhanceCheckBox(ui->disableAsyncShaders, emu_settings::DisableAsyncShaderCompiler);
SubscribeTooltip(ui->disableAsyncShaders, json_gpu_main["disableAsyncShaders"].toString());

View File

@ -36,7 +36,7 @@
</sizepolicy>
</property>
<property name="currentIndex">
<number>0</number>
<number>1</number>
</property>
<widget class="QWidget" name="coreTab">
<attribute name="title">
@ -695,6 +695,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="multithreadedRSX">
<property name="text">
<string>Multithreaded RSX</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="disableAsyncShaders">
<property name="text">