rsx: Get rid of an allocation in analyse_vertex_data that adds about 5% overhead.

This method is called many thousands of times per frame and that single allocation introduces a small perf hit.
Just get rid of it, it doesn't improve anything to have it there.
This commit is contained in:
kd-11 2022-09-08 00:02:52 +03:00 committed by kd-11
parent cd53bb7eff
commit f43824762a
6 changed files with 57 additions and 40 deletions

View File

@ -467,7 +467,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
for (auto& info : m_vertex_layout.interleaved_blocks)
{
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info->base_offset), info->memory_location);
}
}

View File

@ -127,7 +127,7 @@ namespace
vertex_input_state operator()(const rsx::draw_inlined_array& /*command*/)
{
const auto stream_length = rsx::method_registers.current_draw_clause.inline_vertex_array.size();
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0]->attribute_stride;
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
@ -192,8 +192,8 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
{
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
{

View File

@ -1933,9 +1933,8 @@ namespace rsx
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
{
interleaved_range_info info = {};
interleaved_range_info& info = *result.alloc_interleaved_block();
info.interleaved = true;
info.locations.reserve(8);
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
@ -1963,7 +1962,7 @@ namespace rsx
if (info.attribute_stride)
{
// At least one array feed must be enabled for vertex input
result.interleaved_blocks.emplace_back(std::move(info));
result.interleaved_blocks.push_back(&info);
}
return;
@ -2030,21 +2029,21 @@ namespace rsx
for (auto &block : result.interleaved_blocks)
{
if (block.single_vertex)
if (block->single_vertex)
{
//Single vertex definition, continue
continue;
}
if (block.attribute_stride != info.stride())
if (block->attribute_stride != info.stride())
{
//Stride does not match, continue
continue;
}
if (base_address > block.base_offset)
if (base_address > block->base_offset)
{
const u32 diff = base_address - block.base_offset;
const u32 diff = base_address - block->base_offset;
if (diff > info.stride())
{
//Not interleaved, continue
@ -2053,7 +2052,7 @@ namespace rsx
}
else
{
const u32 diff = block.base_offset - base_address;
const u32 diff = block->base_offset - base_address;
if (diff > info.stride())
{
//Not interleaved, continue
@ -2061,18 +2060,18 @@ namespace rsx
}
//Matches, and this address is lower than existing
block.base_offset = base_address;
block->base_offset = base_address;
}
alloc_new_block = false;
block.locations.push_back({ index, modulo, info.frequency() });
block.interleaved = true;
block->locations.push_back({ index, modulo, info.frequency() });
block->interleaved = true;
break;
}
if (alloc_new_block)
{
interleaved_range_info block = {};
interleaved_range_info& block = *result.alloc_interleaved_block();
block.base_offset = base_address;
block.attribute_stride = info.stride();
block.memory_location = info.offset() >> 31;
@ -2085,7 +2084,7 @@ namespace rsx
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
result.interleaved_blocks.emplace_back(std::move(block));
result.interleaved_blocks.push_back(&block);
}
}
}
@ -2093,7 +2092,7 @@ namespace rsx
for (auto &info : result.interleaved_blocks)
{
//Calculate real data address to be used during upload
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info.base_offset), info.memory_location);
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
}
}
@ -2353,7 +2352,7 @@ namespace rsx
{
for (const auto &block : layout.interleaved_blocks)
{
volatile_memory_size += block.attribute_stride * vertex_count;
volatile_memory_size += block->attribute_stride * vertex_count;
}
}
else
@ -2400,7 +2399,7 @@ namespace rsx
{
const auto &block = layout.interleaved_blocks[0];
u32 inline_data_offset = volatile_offset;
for (const auto& attrib : block.locations)
for (const auto& attrib : block->locations)
{
auto &info = rsx::method_registers.vertex_arrays_info[attrib.index];
@ -2412,14 +2411,14 @@ namespace rsx
{
for (const auto &block : layout.interleaved_blocks)
{
for (const auto& attrib : block.locations)
for (const auto& attrib : block->locations)
{
const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
offset_in_block[attrib.index] = persistent_offset + (local_address - block.base_offset);
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
}
const auto range = block.calculate_required_range(first_vertex, vertex_count);
persistent_offset += block.attribute_stride * range.second;
const auto range = block->calculate_required_range(first_vertex, vertex_count);
persistent_offset += block->attribute_stride * range.second;
}
}
@ -2484,7 +2483,7 @@ namespace rsx
type = info.type();
size = info.size();
attrib0 = layout.interleaved_blocks[0].attribute_stride | default_frequency_mask;
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
}
}
else
@ -2624,12 +2623,12 @@ namespace rsx
{
for (const auto &block : layout.interleaved_blocks)
{
auto range = block.calculate_required_range(first_vertex, vertex_count);
auto range = block->calculate_required_range(first_vertex, vertex_count);
const u32 data_size = range.second * block.attribute_stride;
const u32 vertex_base = range.first * block.attribute_stride;
const u32 data_size = range.second * block->attribute_stride;
const u32 vertex_base = range.first * block->attribute_stride;
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block.real_offset_address) + vertex_base, data_size);
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
persistent += data_size;
}
}

View File

@ -287,18 +287,36 @@ namespace rsx
transient = 2
};
struct vertex_input_layout
class vertex_input_layout
{
std::vector<interleaved_range_info> interleaved_blocks{}; // Interleaved blocks to be uploaded as-is
std::vector<std::pair<u8, u32>> volatile_blocks{}; // Volatile data blocks (immediate draw vertex data for example)
rsx::simple_array<u8> referenced_registers{}; // Volatile register data
int m_num_used_blocks = 0;
std::array<interleaved_range_info, 16> m_blocks_data{};
public:
rsx::simple_array<interleaved_range_info*> interleaved_blocks{}; // Interleaved blocks to be uploaded as-is
std::vector<std::pair<u8, u32>> volatile_blocks{}; // Volatile data blocks (immediate draw vertex data for example)
rsx::simple_array<u8> referenced_registers{}; // Volatile register data
std::array<attribute_buffer_placement, 16> attribute_placement = fill_array(attribute_buffer_placement::none);
vertex_input_layout() = default;
interleaved_range_info* alloc_interleaved_block()
{
auto result = &m_blocks_data[m_num_used_blocks++];
result->attribute_stride = 0;
result->base_offset = 0;
result->memory_location = 0;
result->real_offset_address = 0;
result->single_vertex = false;
result->locations.clear();
result->interleaved = true;
return result;
}
void clear()
{
m_num_used_blocks = 0;
interleaved_blocks.clear();
volatile_blocks.clear();
referenced_registers.clear();
@ -309,7 +327,7 @@ namespace rsx
// Criteria: At least one array stream has to be defined to feed vertex positions
// This stream cannot be a const register as the vertices cannot create a zero-area primitive
if (!interleaved_blocks.empty() && interleaved_blocks.front().attribute_stride != 0)
if (!interleaved_blocks.empty() && interleaved_blocks[0]->attribute_stride != 0)
return true;
if (!volatile_blocks.empty())
@ -351,8 +369,8 @@ namespace rsx
u32 mem = 0;
for (auto &block : interleaved_blocks)
{
const auto range = block.calculate_required_range(first_vertex, vertex_count);
mem += range.second * block.attribute_stride;
const auto range = block->calculate_required_range(first_vertex, vertex_count);
mem += range.second * block->attribute_stride;
}
return mem;

View File

@ -677,7 +677,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
for (auto& info : m_vertex_layout.interleaved_blocks)
{
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info->base_offset), info->memory_location);
}
}

View File

@ -206,7 +206,7 @@ namespace
VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_clause.primitive, primitives_emulated);
const auto stream_length = rsx::method_registers.current_draw_clause.inline_vertex_array.size();
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0]->attribute_stride;
if (!primitives_emulated)
{
@ -257,8 +257,8 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
{
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
{