rsx: Add support for batched multidraw

gl: Fix multidraw [WIP]
rsx: Ignore vertex base when data source is generated using arithmetic
vk: Check pending flag before doing fence poke
vk/gl: Fix for inlined array and immediate draws
rsx: Collapse joined draws when batching
This commit is contained in:
kd-11 2017-09-07 22:32:52 +03:00
parent abb56a354d
commit 061824a7ec
7 changed files with 288 additions and 19 deletions

View File

@ -30,6 +30,8 @@ GLGSRender::GLGSRender() : GSRender()
m_vertex_cache.reset(new gl::null_vertex_cache());
else
m_vertex_cache.reset(new gl::weak_vertex_cache());
supports_multidraw = true;
}
extern CellGcmContextData current_context;
@ -510,21 +512,85 @@ void GLGSRender::end()
m_program->validate();
}
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
bool single_draw = rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive;
if (indexed_draw_info)
{
const GLenum index_type = std::get<0>(indexed_draw_info.value());
const u32 index_offset = std::get<1>(indexed_draw_info.value());
if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
if (gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
{
__glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
}
__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
if (single_draw)
{
glDrawElements(draw_mode, vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
}
else
{
std::vector<GLsizei> counts;
std::vector<const GLvoid*> offsets;
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
uintptr_t index_ptr = index_offset;
counts.reserve(draw_count);
offsets.reserve(draw_count);
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second);
counts.push_back(index_size);
offsets.push_back((const GLvoid*)index_ptr);
index_ptr += (index_size << type_scale);
}
for (int i = 0; i < draw_count; ++i)
{
if (counts[i] > 0)
glDrawElements(draw_mode, counts[i], index_type, offsets[i]);
}
//glMultiDrawElements(draw_mode, counts.data(), index_type, offsets.data(), (GLsizei)draw_count);
}
}
else
{
glDrawArrays(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), 0, vertex_draw_count);
if (single_draw)
{
glDrawArrays(draw_mode, 0, vertex_draw_count);
}
else
{
std::vector<GLint> firsts;
std::vector<GLsizei> counts;
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
firsts.reserve(draw_count);
counts.reserve(draw_count);
u32 base_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{
firsts.push_back(range.first - base_index);
counts.push_back(range.second);
}
///*
// TEST FOR DRIVER BUGS - AMD: SHAME, SHAME, SHAME
for (int i = 0; i < draw_count; i++)
{
if (counts[i] > 0)
glDrawArrays(draw_mode, firsts[i], counts[i]);
}//*/
//glMultiDrawArrays(draw_mode, firsts.data(), counts.data(), (GLsizei)draw_count);
}
}
m_attrib_ring_buffer->notify();
@ -570,7 +636,7 @@ void GLGSRender::set_viewport()
//NOTE: window origin does not affect scissor region (probably only affects viewport matrix; already applied)
//See LIMBO [NPUB-30373] which uses shader window origin = top
__glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
glEnable(GL_SCISSOR_TEST);
}

View File

@ -168,6 +168,7 @@ OPENGL_PROC(PFNGLMAPBUFFERRANGEPROC, MapBufferRange);
OPENGL_PROC(PFNGLBINDBUFFERRANGEPROC, BindBufferRange);
OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase);
OPENGL_PROC(PFNGLMULTIDRAWELEMENTSPROC, MultiDrawElements);
OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays);
OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT);

View File

@ -277,6 +277,21 @@ namespace rsx
{
rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0);
in_begin_end = true;
switch (rsx::method_registers.current_draw_clause.primitive)
{
case rsx::primitive_type::line_loop:
case rsx::primitive_type::line_strip:
case rsx::primitive_type::polygon:
case rsx::primitive_type::quad_strip:
case rsx::primitive_type::triangle_fan:
case rsx::primitive_type::triangle_strip:
// Adjacency matters for these types
rsx::method_registers.current_draw_clause.is_disjoint_primitive = false;
break;
default:
rsx::method_registers.current_draw_clause.is_disjoint_primitive = true;
}
}
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
@ -376,6 +391,65 @@ namespace rsx
// Raise priority above other threads
thread_ctrl::set_native_priority(1);
// Deferred calls are used to batch draws together
u32 deferred_primitive_type = 0;
u32 deferred_call_size = 0;
bool has_deferred_call = false;
auto flush_command_queue = [&]()
{
//TODO: Split first-count pairs if not consecutive
bool split_command = false;
std::vector <std::pair<u32, u32>> split_ranges;
auto first_count_cmds = method_registers.current_draw_clause.first_count_commands;
if (method_registers.current_draw_clause.first_count_commands.size() > 1)
{
u32 next = method_registers.current_draw_clause.first_count_commands.front().first;
u32 last_head = 0;
for (int n = 0; n < first_count_cmds.size(); ++n)
{
const auto &v = first_count_cmds[n];
if (v.first != next)
{
split_command = true;
split_ranges.push_back(std::make_pair(last_head, n));
last_head = n + 1;
}
next = v.first + v.second;
}
if (split_command)
split_ranges.push_back(std::make_pair(last_head, first_count_cmds.size() - 1));
}
if (!split_command)
{
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
}
else
{
std::vector<std::pair<u32, u32>> tmp;
auto list_head = first_count_cmds.begin();
for (auto &range : split_ranges)
{
tmp.resize(range.second - range.first + 1);
std::copy(list_head + range.first, list_head + range.second, tmp.begin());
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type);
method_registers.current_draw_clause.first_count_commands = tmp;
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
}
}
deferred_primitive_type = 0;
deferred_call_size = 0;
has_deferred_call = false;
};
// TODO: exit condition
while (!Emu.IsStopped())
{
@ -387,6 +461,9 @@ namespace rsx
if (put == get || !Emu.IsRunning())
{
if (has_deferred_call)
flush_command_queue();
do_internal_task();
continue;
}
@ -472,7 +549,92 @@ namespace rsx
u32 reg = ((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD) ? first_cmd : first_cmd + i;
u32 value = args[i];
//LOG_NOTICE(RSX, "%s(0x%x) = 0x%x", get_method_name(reg).c_str(), reg, value);
bool execute_method_call = true;
if (supports_multidraw)
{
//TODO: Make this cleaner
bool flush_commands_flag = has_deferred_call;
switch (reg)
{
case NV4097_SET_BEGIN_END:
{
// Hook; Allows begin to go through, but ignores end
if (value && value != deferred_primitive_type)
deferred_primitive_type = value;
else
{
deferred_call_size++;
// Combine all calls since the last one
auto &first_count = method_registers.current_draw_clause.first_count_commands;
if (first_count.size() > deferred_call_size)
{
const auto &batch_first_count = first_count[deferred_call_size - 1];
u32 count = batch_first_count.second;
u32 next = batch_first_count.first + count;
for (int n = deferred_call_size; n < first_count.size(); n++)
{
if (first_count[n].first != next)
{
LOG_ERROR(RSX, "Non-continous first-count range passed as one draw; will be split.");
first_count[deferred_call_size - 1].second = count;
deferred_call_size++;
count = first_count[deferred_call_size - 1].second;
next = first_count[deferred_call_size - 1].first + count;
continue;
}
count += first_count[n].second;
next += first_count[n].second;
}
first_count[deferred_call_size - 1].second = count;
first_count.resize(deferred_call_size);
}
has_deferred_call = true;
flush_commands_flag = false;
execute_method_call = false;
}
break;
}
// These commands do not alter the pipeline state and deferred calls can still be active
// TODO: Add more commands here
case NV4097_INVALIDATE_VERTEX_FILE:
flush_commands_flag = false;
break;
case NV4097_DRAW_ARRAYS:
{
const auto cmd = method_registers.current_draw_clause.command;
if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none)
break;
flush_commands_flag = false;
break;
}
case NV4097_DRAW_INDEX_ARRAY:
{
const auto cmd = method_registers.current_draw_clause.command;
if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none)
break;
flush_commands_flag = false;
break;
}
}
if (flush_commands_flag)
{
flush_command_queue();
}
}
method_registers.decode(reg, value);
if (capture_current_frame)
@ -480,9 +642,12 @@ namespace rsx
frame_debug.command_queue.push_back(std::make_pair(reg, value));
}
if (auto method = methods[reg])
if (execute_method_call)
{
method(this, reg, value);
if (auto method = methods[reg])
{
method(this, reg, value);
}
}
if (invalid_command_interrupt_raised)
@ -1534,7 +1699,7 @@ namespace rsx
for (const auto &block : layout.interleaved_blocks)
{
u32 unique_verts;
u32 vertex_base = first_vertex * block.attribute_stride;
u32 vertex_base = 0;
if (block.single_vertex)
{
@ -1553,6 +1718,7 @@ namespace rsx
else
{
unique_verts = vertex_count;
vertex_base = first_vertex * block.attribute_stride;
}
const u32 data_size = block.attribute_stride * unique_verts;

View File

@ -144,6 +144,11 @@ namespace rsx
std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
std::vector<u32> element_push_buffer;
s32 m_skip_frame_ctr = 0;
bool skip_frame = false;
bool supports_multidraw = false;
public:
RsxDmaControl* ctrl = nullptr;
@ -183,9 +188,6 @@ namespace rsx
bool m_transform_constants_dirty;
bool m_textures_dirty[16];
protected:
s32 m_skip_frame_ctr = 0;
bool skip_frame = false;
protected:
std::array<u32, 4> get_color_surface_addresses() const;
u32 get_zeta_surface_address() const;

View File

@ -637,6 +637,8 @@ VKGSRender::VKGSRender() : GSRender()
}
m_current_frame = &frame_context_storage[0];
supports_multidraw = true;
}
VKGSRender::~VKGSRender()
@ -1166,10 +1168,23 @@ void VKGSRender::end()
}
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<4>(upload_info);
bool single_draw = rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive;
if (!index_info)
{
const auto vertex_count = std::get<1>(upload_info);
vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0);
if (single_draw)
{
const auto vertex_count = std::get<1>(upload_info);
vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0);
}
else
{
const auto base_vertex = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{
vkCmdDraw(*m_current_command_buffer, range.second, 1, range.first - base_vertex, 0);
}
}
}
else
{
@ -1178,9 +1193,22 @@ void VKGSRender::end()
VkDeviceSize offset;
std::tie(offset, index_type) = index_info.value();
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
if (single_draw)
{
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
}
else
{
u32 first_vertex = 0;
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{
const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second);
vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, 0, first_vertex, 0);
first_vertex += verts;
}
}
}
vk::leave_uninterruptible();
@ -1441,7 +1469,10 @@ void VKGSRender::flush_command_queue(bool hard_sync)
//Clear all command buffer statuses
for (auto &cb : m_primary_cb_list)
cb.poke();
{
if (cb.pending)
cb.poke();
}
m_last_flushable_cb = -1;
m_flush_commands = false;
@ -1623,7 +1654,9 @@ void VKGSRender::do_local_task()
if (m_last_flushable_cb > -1)
{
auto cb = &m_primary_cb_list[m_last_flushable_cb];
cb->poke();
if (cb->pending)
cb->poke();
if (!cb->pending)
m_last_flushable_cb = -1;

View File

@ -300,7 +300,7 @@ namespace rsx
{
if (arg)
{
rsx::method_registers.current_draw_clause.first_count_commands.clear();
rsx::method_registers.current_draw_clause.first_count_commands.resize(0);
rsx::method_registers.current_draw_clause.command = draw_command::none;
rsx::method_registers.current_draw_clause.primitive = rsx::method_registers.primitive_mode();
rsxthr->begin();

View File

@ -29,6 +29,7 @@ namespace rsx
draw_command command;
bool is_immediate_draw;
bool is_disjoint_primitive;
std::vector<u32> inline_vertex_array;