mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-29 00:33:01 +00:00
rsx: Get rid of an allocation in analyse_vertex_data that adds about 5% overhead.
This method is called many thousands of times per frame and that single allocation introduces a small perf hit. Just get rid of it, it doesn't improve anything to have it there.
This commit is contained in:
parent
cd53bb7eff
commit
f43824762a
@ -467,7 +467,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
|
||||
for (auto& info : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
||||
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
|
||||
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info->base_offset), info->memory_location);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -127,7 +127,7 @@ namespace
|
||||
vertex_input_state operator()(const rsx::draw_inlined_array& /*command*/)
|
||||
{
|
||||
const auto stream_length = rsx::method_registers.current_draw_clause.inline_vertex_array.size();
|
||||
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
|
||||
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0]->attribute_stride;
|
||||
|
||||
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
|
||||
{
|
||||
@ -192,8 +192,8 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
|
||||
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
|
||||
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
|
||||
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
|
||||
{
|
||||
|
@ -1933,9 +1933,8 @@ namespace rsx
|
||||
|
||||
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
interleaved_range_info info = {};
|
||||
interleaved_range_info& info = *result.alloc_interleaved_block();
|
||||
info.interleaved = true;
|
||||
info.locations.reserve(8);
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
@ -1963,7 +1962,7 @@ namespace rsx
|
||||
if (info.attribute_stride)
|
||||
{
|
||||
// At least one array feed must be enabled for vertex input
|
||||
result.interleaved_blocks.emplace_back(std::move(info));
|
||||
result.interleaved_blocks.push_back(&info);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -2030,21 +2029,21 @@ namespace rsx
|
||||
|
||||
for (auto &block : result.interleaved_blocks)
|
||||
{
|
||||
if (block.single_vertex)
|
||||
if (block->single_vertex)
|
||||
{
|
||||
//Single vertex definition, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
if (block.attribute_stride != info.stride())
|
||||
if (block->attribute_stride != info.stride())
|
||||
{
|
||||
//Stride does not match, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
if (base_address > block.base_offset)
|
||||
if (base_address > block->base_offset)
|
||||
{
|
||||
const u32 diff = base_address - block.base_offset;
|
||||
const u32 diff = base_address - block->base_offset;
|
||||
if (diff > info.stride())
|
||||
{
|
||||
//Not interleaved, continue
|
||||
@ -2053,7 +2052,7 @@ namespace rsx
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 diff = block.base_offset - base_address;
|
||||
const u32 diff = block->base_offset - base_address;
|
||||
if (diff > info.stride())
|
||||
{
|
||||
//Not interleaved, continue
|
||||
@ -2061,18 +2060,18 @@ namespace rsx
|
||||
}
|
||||
|
||||
//Matches, and this address is lower than existing
|
||||
block.base_offset = base_address;
|
||||
block->base_offset = base_address;
|
||||
}
|
||||
|
||||
alloc_new_block = false;
|
||||
block.locations.push_back({ index, modulo, info.frequency() });
|
||||
block.interleaved = true;
|
||||
block->locations.push_back({ index, modulo, info.frequency() });
|
||||
block->interleaved = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (alloc_new_block)
|
||||
{
|
||||
interleaved_range_info block = {};
|
||||
interleaved_range_info& block = *result.alloc_interleaved_block();
|
||||
block.base_offset = base_address;
|
||||
block.attribute_stride = info.stride();
|
||||
block.memory_location = info.offset() >> 31;
|
||||
@ -2085,7 +2084,7 @@ namespace rsx
|
||||
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
|
||||
result.interleaved_blocks.emplace_back(std::move(block));
|
||||
result.interleaved_blocks.push_back(&block);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2093,7 +2092,7 @@ namespace rsx
|
||||
for (auto &info : result.interleaved_blocks)
|
||||
{
|
||||
//Calculate real data address to be used during upload
|
||||
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info.base_offset), info.memory_location);
|
||||
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2353,7 +2352,7 @@ namespace rsx
|
||||
{
|
||||
for (const auto &block : layout.interleaved_blocks)
|
||||
{
|
||||
volatile_memory_size += block.attribute_stride * vertex_count;
|
||||
volatile_memory_size += block->attribute_stride * vertex_count;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -2400,7 +2399,7 @@ namespace rsx
|
||||
{
|
||||
const auto &block = layout.interleaved_blocks[0];
|
||||
u32 inline_data_offset = volatile_offset;
|
||||
for (const auto& attrib : block.locations)
|
||||
for (const auto& attrib : block->locations)
|
||||
{
|
||||
auto &info = rsx::method_registers.vertex_arrays_info[attrib.index];
|
||||
|
||||
@ -2412,14 +2411,14 @@ namespace rsx
|
||||
{
|
||||
for (const auto &block : layout.interleaved_blocks)
|
||||
{
|
||||
for (const auto& attrib : block.locations)
|
||||
for (const auto& attrib : block->locations)
|
||||
{
|
||||
const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
|
||||
offset_in_block[attrib.index] = persistent_offset + (local_address - block.base_offset);
|
||||
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
|
||||
}
|
||||
|
||||
const auto range = block.calculate_required_range(first_vertex, vertex_count);
|
||||
persistent_offset += block.attribute_stride * range.second;
|
||||
const auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
persistent_offset += block->attribute_stride * range.second;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2484,7 +2483,7 @@ namespace rsx
|
||||
type = info.type();
|
||||
size = info.size();
|
||||
|
||||
attrib0 = layout.interleaved_blocks[0].attribute_stride | default_frequency_mask;
|
||||
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -2624,12 +2623,12 @@ namespace rsx
|
||||
{
|
||||
for (const auto &block : layout.interleaved_blocks)
|
||||
{
|
||||
auto range = block.calculate_required_range(first_vertex, vertex_count);
|
||||
auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
|
||||
const u32 data_size = range.second * block.attribute_stride;
|
||||
const u32 vertex_base = range.first * block.attribute_stride;
|
||||
const u32 data_size = range.second * block->attribute_stride;
|
||||
const u32 vertex_base = range.first * block->attribute_stride;
|
||||
|
||||
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block.real_offset_address) + vertex_base, data_size);
|
||||
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
|
||||
persistent += data_size;
|
||||
}
|
||||
}
|
||||
|
@ -287,18 +287,36 @@ namespace rsx
|
||||
transient = 2
|
||||
};
|
||||
|
||||
struct vertex_input_layout
|
||||
class vertex_input_layout
|
||||
{
|
||||
std::vector<interleaved_range_info> interleaved_blocks{}; // Interleaved blocks to be uploaded as-is
|
||||
std::vector<std::pair<u8, u32>> volatile_blocks{}; // Volatile data blocks (immediate draw vertex data for example)
|
||||
rsx::simple_array<u8> referenced_registers{}; // Volatile register data
|
||||
int m_num_used_blocks = 0;
|
||||
std::array<interleaved_range_info, 16> m_blocks_data{};
|
||||
|
||||
public:
|
||||
rsx::simple_array<interleaved_range_info*> interleaved_blocks{}; // Interleaved blocks to be uploaded as-is
|
||||
std::vector<std::pair<u8, u32>> volatile_blocks{}; // Volatile data blocks (immediate draw vertex data for example)
|
||||
rsx::simple_array<u8> referenced_registers{}; // Volatile register data
|
||||
|
||||
std::array<attribute_buffer_placement, 16> attribute_placement = fill_array(attribute_buffer_placement::none);
|
||||
|
||||
vertex_input_layout() = default;
|
||||
|
||||
interleaved_range_info* alloc_interleaved_block()
|
||||
{
|
||||
auto result = &m_blocks_data[m_num_used_blocks++];
|
||||
result->attribute_stride = 0;
|
||||
result->base_offset = 0;
|
||||
result->memory_location = 0;
|
||||
result->real_offset_address = 0;
|
||||
result->single_vertex = false;
|
||||
result->locations.clear();
|
||||
result->interleaved = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_num_used_blocks = 0;
|
||||
interleaved_blocks.clear();
|
||||
volatile_blocks.clear();
|
||||
referenced_registers.clear();
|
||||
@ -309,7 +327,7 @@ namespace rsx
|
||||
// Criteria: At least one array stream has to be defined to feed vertex positions
|
||||
// This stream cannot be a const register as the vertices cannot create a zero-area primitive
|
||||
|
||||
if (!interleaved_blocks.empty() && interleaved_blocks.front().attribute_stride != 0)
|
||||
if (!interleaved_blocks.empty() && interleaved_blocks[0]->attribute_stride != 0)
|
||||
return true;
|
||||
|
||||
if (!volatile_blocks.empty())
|
||||
@ -351,8 +369,8 @@ namespace rsx
|
||||
u32 mem = 0;
|
||||
for (auto &block : interleaved_blocks)
|
||||
{
|
||||
const auto range = block.calculate_required_range(first_vertex, vertex_count);
|
||||
mem += range.second * block.attribute_stride;
|
||||
const auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
mem += range.second * block->attribute_stride;
|
||||
}
|
||||
|
||||
return mem;
|
||||
|
@ -677,7 +677,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||
for (auto& info : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
||||
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
|
||||
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info->base_offset), info->memory_location);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -206,7 +206,7 @@ namespace
|
||||
VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_clause.primitive, primitives_emulated);
|
||||
|
||||
const auto stream_length = rsx::method_registers.current_draw_clause.inline_vertex_array.size();
|
||||
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
|
||||
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0]->attribute_stride;
|
||||
|
||||
if (!primitives_emulated)
|
||||
{
|
||||
@ -257,8 +257,8 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
|
||||
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
|
||||
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
|
||||
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user