mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-06 00:40:11 +00:00
vulkan: Optimize vertex data upload
- Reuse buffer views as much as possible, vkCreateBufferView is slow on NV Implemented as a large sliding window, reuseable until it is filled
This commit is contained in:
parent
01349b8cee
commit
8ccaabb502
@ -131,4 +131,9 @@ public:
|
||||
else
|
||||
fmt::throw_exception("m_put_pos == m_get_pos!" HERE);
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
};
|
||||
|
@ -1034,7 +1034,7 @@ bool GLGSRender::check_program_state()
|
||||
return (rsx::method_registers.shader_program_address() != 0);
|
||||
}
|
||||
|
||||
void GLGSRender::load_program(const vertex_upload_info& upload_info)
|
||||
void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
|
||||
{
|
||||
get_current_fragment_program(fs_sampler_state);
|
||||
verify(HERE), current_fragment_program.valid;
|
||||
|
@ -21,6 +21,16 @@ namespace gl
|
||||
using null_vertex_cache = vertex_cache;
|
||||
|
||||
using shader_cache = rsx::shaders_cache<void*, GLProgramBuffer>;
|
||||
|
||||
struct vertex_upload_info
|
||||
{
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_index_base;
|
||||
u32 persistent_mapping_offset;
|
||||
u32 volatile_mapping_offset;
|
||||
std::optional<std::tuple<GLenum, u32> > index_info;
|
||||
};
|
||||
}
|
||||
|
||||
struct work_item
|
||||
@ -255,16 +265,6 @@ struct driver_state
|
||||
}
|
||||
};
|
||||
|
||||
struct vertex_upload_info
|
||||
{
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_index_base;
|
||||
u32 persistent_mapping_offset;
|
||||
u32 volatile_mapping_offset;
|
||||
std::optional<std::tuple<GLenum, u32> > index_info;
|
||||
};
|
||||
|
||||
class GLGSRender : public GSRender
|
||||
{
|
||||
private:
|
||||
@ -340,14 +340,14 @@ private:
|
||||
driver_state gl_state;
|
||||
|
||||
// Return element to draw and in case of indexed draw index type and offset in index buffer
|
||||
vertex_upload_info set_vertex_buffer();
|
||||
gl::vertex_upload_info set_vertex_buffer();
|
||||
rsx::vertex_input_layout m_vertex_layout = {};
|
||||
|
||||
void clear_surface(u32 arg);
|
||||
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
|
||||
|
||||
bool check_program_state();
|
||||
void load_program(const vertex_upload_info& upload_info);
|
||||
void load_program(const gl::vertex_upload_info& upload_info);
|
||||
|
||||
void update_draw_state();
|
||||
|
||||
|
@ -180,7 +180,7 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
{
|
||||
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
||||
|
||||
@ -196,7 +196,7 @@ vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
|
||||
std::pair<void*, u32> persistent_mapping = {}, volatile_mapping = {};
|
||||
vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info };
|
||||
gl::vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info };
|
||||
|
||||
if (required.first > 0)
|
||||
{
|
||||
|
@ -666,6 +666,9 @@ VKGSRender::~VKGSRender()
|
||||
vk::finalize_compiler_context();
|
||||
m_prog_buffer->clear();
|
||||
|
||||
m_persistent_attribute_storage.reset();
|
||||
m_volatile_attribute_storage.reset();
|
||||
|
||||
//Global resources
|
||||
vk::destroy_global_resources();
|
||||
|
||||
@ -1209,10 +1212,12 @@ void VKGSRender::end()
|
||||
|
||||
//Load program
|
||||
std::chrono::time_point<steady_clock> program_start = textures_end;
|
||||
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
|
||||
load_program(upload_info);
|
||||
|
||||
m_program->bind_uniform(m_persistent_attribute_storage, "persistent_input_stream", m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(m_volatile_attribute_storage, "volatile_input_stream", m_current_frame->descriptor_set);
|
||||
VkBufferView persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||
VkBufferView volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||
m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
|
||||
|
||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||
@ -1445,8 +1450,6 @@ void VKGSRender::end()
|
||||
vkCmdClearAttachments(*m_current_command_buffer, static_cast<u32>(buffers_to_clear.size()), buffers_to_clear.data(), 1, &clear_rect);
|
||||
}
|
||||
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<4>(upload_info);
|
||||
|
||||
bool primitive_emulated = false;
|
||||
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
|
||||
|
||||
@ -1461,12 +1464,11 @@ void VKGSRender::end()
|
||||
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
|
||||
}
|
||||
|
||||
if (!index_info)
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (single_draw)
|
||||
{
|
||||
const auto vertex_count = std::get<1>(upload_info);
|
||||
vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0);
|
||||
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1480,10 +1482,10 @@ void VKGSRender::end()
|
||||
else
|
||||
{
|
||||
VkIndexType index_type;
|
||||
u32 index_count = std::get<1>(upload_info);
|
||||
const u32 index_count = upload_info.vertex_draw_count;
|
||||
VkDeviceSize offset;
|
||||
|
||||
std::tie(offset, index_type) = index_info.value();
|
||||
std::tie(offset, index_type) = upload_info.index_info.value();
|
||||
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
|
||||
|
||||
if (single_draw)
|
||||
@ -2160,7 +2162,7 @@ bool VKGSRender::check_program_status()
|
||||
return (rsx::method_registers.shader_program_address() != 0);
|
||||
}
|
||||
|
||||
void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
|
||||
void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
|
||||
{
|
||||
get_current_fragment_program(fs_sampler_state);
|
||||
verify(HERE), current_fragment_program.valid;
|
||||
@ -2343,11 +2345,13 @@ void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
|
||||
fill_scale_offset_data(buf, false);
|
||||
fill_user_clip_data(buf + 64);
|
||||
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
|
||||
*(reinterpret_cast<u32*>(buf + 132)) = vertex_base;
|
||||
*(reinterpret_cast<u32*>(buf + 132)) = vertex_info.vertex_index_base;
|
||||
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
|
||||
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
|
||||
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
|
||||
fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast<s32*>(buf + 160));
|
||||
|
||||
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160),
|
||||
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
|
||||
|
||||
//Vertex constants
|
||||
buf = buf + 512;
|
||||
|
@ -23,6 +23,17 @@ namespace vk
|
||||
using null_vertex_cache = vertex_cache;
|
||||
|
||||
using shader_cache = rsx::shaders_cache<vk::pipeline_props, VKProgramBuffer>;
|
||||
|
||||
struct vertex_upload_info
|
||||
{
|
||||
VkPrimitiveTopology primitive;
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_index_base;
|
||||
u32 persistent_window_offset;
|
||||
u32 volatile_window_offset;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
};
|
||||
}
|
||||
|
||||
//Heap allocation sizes in MB
|
||||
@ -262,8 +273,8 @@ private:
|
||||
std::array<std::unique_ptr<vk::sampler>, rsx::limits::fragment_textures_count> fs_sampler_handles;
|
||||
std::array<std::unique_ptr<vk::sampler>, rsx::limits::vertex_textures_count> vs_sampler_handles;
|
||||
|
||||
VkBufferView m_persistent_attribute_storage;
|
||||
VkBufferView m_volatile_attribute_storage;
|
||||
std::unique_ptr<vk::buffer_view> m_persistent_attribute_storage;
|
||||
std::unique_ptr<vk::buffer_view> m_volatile_attribute_storage;
|
||||
|
||||
public:
|
||||
//vk::fbo draw_fbo;
|
||||
@ -379,11 +390,11 @@ private:
|
||||
|
||||
void check_heap_status();
|
||||
|
||||
/// returns primitive topology, index_count, allocated_verts, vertex_base_index, (offset in index buffer, index type)
|
||||
std::tuple<VkPrimitiveTopology, u32, u32, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
|
||||
vk::vertex_upload_info upload_vertex_data();
|
||||
|
||||
public:
|
||||
bool check_program_status();
|
||||
void load_program(u32 vertex_count, u32 vertex_base);
|
||||
void load_program(const vk::vertex_upload_info& vertex_info);
|
||||
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
|
||||
void read_buffers();
|
||||
void write_buffers();
|
||||
|
@ -616,6 +616,25 @@ namespace vk
|
||||
buffer_view(const buffer_view&) = delete;
|
||||
buffer_view(buffer_view&&) = delete;
|
||||
|
||||
bool in_range(u32 address, u32 size, u32& offset) const
|
||||
{
|
||||
if (address < info.offset)
|
||||
return false;
|
||||
|
||||
const u32 _offset = address - (u32)info.offset;
|
||||
if (info.range < _offset)
|
||||
return false;
|
||||
|
||||
const auto remaining = info.range - _offset;
|
||||
if (size <= remaining)
|
||||
{
|
||||
offset = _offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
VkDevice m_device;
|
||||
};
|
||||
|
@ -253,8 +253,7 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
std::tuple<VkPrimitiveTopology, u32, u32, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > >
|
||||
VKGSRender::upload_vertex_data()
|
||||
vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||
{
|
||||
m_vertex_layout = analyse_inputs_interleaved();
|
||||
|
||||
@ -266,11 +265,9 @@ VKGSRender::upload_vertex_data()
|
||||
|
||||
//Do actual vertex upload
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
u32 persistent_range_base = UINT32_MAX, volatile_range_base = UINT32_MAX;
|
||||
size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX;
|
||||
|
||||
m_persistent_attribute_storage = VK_NULL_HANDLE;
|
||||
m_volatile_attribute_storage = VK_NULL_HANDLE;
|
||||
|
||||
if (required.first > 0)
|
||||
{
|
||||
//Check if cacheable
|
||||
@ -287,8 +284,7 @@ VKGSRender::upload_vertex_data()
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
|
||||
{
|
||||
in_cache = true;
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device,
|
||||
m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, cached->offset_in_heap, required.first));
|
||||
persistent_range_base = cached->offset_in_heap;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -299,8 +295,7 @@ VKGSRender::upload_vertex_data()
|
||||
if (!in_cache)
|
||||
{
|
||||
persistent_offset = (u32)m_attrib_ring_info.alloc<256>(required.first);
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device,
|
||||
m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_offset, required.first));
|
||||
persistent_range_base = (u32)persistent_offset;
|
||||
|
||||
if (to_store)
|
||||
{
|
||||
@ -308,25 +303,12 @@ VKGSRender::upload_vertex_data()
|
||||
m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, (u32)persistent_offset);
|
||||
}
|
||||
}
|
||||
|
||||
m_persistent_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_persistent_attribute_storage = null_buffer_view->value;
|
||||
}
|
||||
|
||||
if (required.second > 0)
|
||||
{
|
||||
volatile_offset = (u32)m_attrib_ring_info.alloc<256>(required.second);
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device,
|
||||
m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second));
|
||||
|
||||
m_volatile_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_volatile_attribute_storage = null_buffer_view->value;
|
||||
volatile_range_base = (u32)volatile_offset;
|
||||
}
|
||||
|
||||
//Write all the data once if possible
|
||||
@ -358,5 +340,32 @@ VKGSRender::upload_vertex_data()
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_tuple(result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info);
|
||||
if (persistent_range_base != UINT32_MAX)
|
||||
{
|
||||
if (!m_persistent_attribute_storage || !m_persistent_attribute_storage->in_range(persistent_range_base, required.first, persistent_range_base))
|
||||
{
|
||||
if (m_persistent_attribute_storage)
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::move(m_persistent_attribute_storage));
|
||||
|
||||
//View 64M blocks at a time (different drivers will only allow a fixed viewable heap size, 64M should be safe)
|
||||
const size_t view_size = (persistent_range_base + 0x4000000) > m_attrib_ring_info.size() ? m_attrib_ring_info.size() - persistent_range_base : 0x4000000;
|
||||
m_persistent_attribute_storage = std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_range_base, view_size);
|
||||
persistent_range_base = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (volatile_range_base != UINT32_MAX)
|
||||
{
|
||||
if (!m_volatile_attribute_storage || !m_volatile_attribute_storage->in_range(volatile_range_base, required.second, volatile_range_base))
|
||||
{
|
||||
if (m_volatile_attribute_storage)
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::move(m_volatile_attribute_storage));
|
||||
|
||||
const size_t view_size = (volatile_range_base + 0x4000000) > m_attrib_ring_info.size() ? m_attrib_ring_info.size() - volatile_range_base : 0x4000000;
|
||||
m_volatile_attribute_storage = std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_range_base, view_size);
|
||||
volatile_range_base = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return{ result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, persistent_range_base, volatile_range_base, result.index_info };
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user