mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 12:32:43 +00:00
gl: Improve AMD multidraw workaround
- Reimplements the AMD workaround using an identity buffer to avoid the performance hit of doing multiple glDrawArrays for every single compiled set - Reimplements first/count allocation using a scratch buffer to reduce allocation overhead when large number of draw calls is used
This commit is contained in:
parent
eccb57d4b8
commit
1aa44ede31
@ -489,32 +489,33 @@ void GLGSRender::end()
|
||||
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
|
||||
}
|
||||
|
||||
m_index_ring_buffer->bind();
|
||||
|
||||
if (single_draw)
|
||||
{
|
||||
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<GLsizei> counts;
|
||||
std::vector<const GLvoid*> offsets;
|
||||
|
||||
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
|
||||
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
|
||||
uintptr_t index_ptr = index_offset;
|
||||
m_scratch_buffer.resize(draw_count * 16);
|
||||
|
||||
counts.reserve(draw_count);
|
||||
offsets.reserve(draw_count);
|
||||
GLsizei *counts = (GLsizei*)m_scratch_buffer.data();
|
||||
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
|
||||
int dst_index = 0;
|
||||
|
||||
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
|
||||
{
|
||||
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second);
|
||||
counts.push_back(index_size);
|
||||
offsets.push_back((const GLvoid*)index_ptr);
|
||||
counts[dst_index] = index_size;
|
||||
offsets[dst_index++] = (const GLvoid*)index_ptr;
|
||||
|
||||
index_ptr += (index_size << type_scale);
|
||||
}
|
||||
|
||||
glMultiDrawElements(draw_mode, counts.data(), index_type, offsets.data(), (GLsizei)draw_count);
|
||||
glMultiDrawElements(draw_mode, counts, index_type, offsets, (GLsizei)draw_count);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -525,25 +526,36 @@ void GLGSRender::end()
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 base_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
|
||||
if (gl::get_driver_caps().vendor_AMD == false)
|
||||
const u32 base_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
|
||||
bool use_draw_arrays_fallback = false;
|
||||
|
||||
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
|
||||
const auto driver_caps = gl::get_driver_caps();
|
||||
|
||||
m_scratch_buffer.resize(draw_count * 24);
|
||||
GLint* firsts = (GLint*)m_scratch_buffer.data();
|
||||
GLsizei* counts = (GLsizei*)(firsts + draw_count);
|
||||
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
|
||||
int dst_index = 0;
|
||||
|
||||
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
|
||||
{
|
||||
std::vector<GLint> firsts;
|
||||
std::vector<GLsizei> counts;
|
||||
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
|
||||
const GLint first = range.first - base_index;
|
||||
const GLsizei count = range.second;
|
||||
|
||||
firsts.reserve(draw_count);
|
||||
counts.reserve(draw_count);
|
||||
firsts[dst_index] = first;
|
||||
counts[dst_index] = count;
|
||||
offsets[dst_index++] = (const GLvoid*)(first << 2);
|
||||
|
||||
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
|
||||
if (driver_caps.vendor_AMD && (first + count) > (0x100000 >> 2))
|
||||
{
|
||||
firsts.push_back(range.first - base_index);
|
||||
counts.push_back(range.second);
|
||||
//Unlikely, but added here in case the identity buffer is not large enough somehow
|
||||
use_draw_arrays_fallback = true;
|
||||
break;
|
||||
}
|
||||
|
||||
glMultiDrawArrays(draw_mode, firsts.data(), counts.data(), (GLsizei)draw_count);
|
||||
}
|
||||
else
|
||||
|
||||
if (use_draw_arrays_fallback)
|
||||
{
|
||||
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more
|
||||
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
|
||||
@ -551,6 +563,17 @@ void GLGSRender::end()
|
||||
glDrawArrays(draw_mode, range.first - base_index, range.second);
|
||||
}
|
||||
}
|
||||
else if (driver_caps.vendor_AMD)
|
||||
{
|
||||
//Use identity index buffer to fix broken vertexID on AMD
|
||||
m_identity_index_buffer->bind();
|
||||
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, (GLsizei)draw_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Normal render
|
||||
glMultiDrawArrays(draw_mode, firsts, counts, (GLsizei)draw_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -733,6 +756,21 @@ void GLGSRender::on_init_thread()
|
||||
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
|
||||
if (gl_caps.vendor_AMD)
|
||||
{
|
||||
m_identity_index_buffer.reset(new gl::buffer);
|
||||
m_identity_index_buffer->create(gl::buffer::target::element_array, 1 * 0x100000);
|
||||
|
||||
// Initialize with 256k identity entries
|
||||
auto *dst = (u32*)m_identity_index_buffer->map(gl::buffer::access::write);
|
||||
for (u32 n = 0; n < (0x100000 >> 2); ++n)
|
||||
{
|
||||
dst[n] = n;
|
||||
}
|
||||
|
||||
m_identity_index_buffer->unmap();
|
||||
}
|
||||
|
||||
m_persistent_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size));
|
||||
m_volatile_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size));
|
||||
m_gl_persistent_stream_buffer->copy_from(m_persistent_stream_view);
|
||||
@ -902,6 +940,11 @@ void GLGSRender::on_exit()
|
||||
m_index_ring_buffer->remove();
|
||||
}
|
||||
|
||||
if (m_identity_index_buffer)
|
||||
{
|
||||
m_identity_index_buffer->remove();
|
||||
}
|
||||
|
||||
m_null_textures.clear();
|
||||
m_text_printer.close();
|
||||
m_gl_texture_cache.destroy();
|
||||
|
@ -290,6 +290,9 @@ private:
|
||||
std::unique_ptr<gl::ring_buffer> m_vertex_state_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_index_ring_buffer;
|
||||
|
||||
// Identity buffer used to fix broken gl_VertexID on ATI stack
|
||||
std::unique_ptr<gl::buffer> m_identity_index_buffer;
|
||||
|
||||
u32 m_draw_calls = 0;
|
||||
s64 m_begin_time = 0;
|
||||
s64 m_draw_time = 0;
|
||||
@ -334,6 +337,7 @@ private:
|
||||
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
|
||||
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {};
|
||||
std::unordered_map<GLenum, std::unique_ptr<gl::texture>> m_null_textures;
|
||||
std::vector<u8> m_scratch_buffer;
|
||||
|
||||
public:
|
||||
GLGSRender();
|
||||
|
Loading…
x
Reference in New Issue
Block a user