mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-22 03:40:30 +00:00
rsx: Reimplement attrib divisor
This commit is contained in:
parent
736415fcd9
commit
fb778e4821
@ -361,7 +361,7 @@ namespace glsl
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) / int(desc.frequency); \n"
|
||||
" vertex_id = vertex_id / int(desc.frequency); \n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
|
@ -478,7 +478,7 @@ void GLGSRender::end()
|
||||
{
|
||||
if (!subdraw)
|
||||
{
|
||||
m_vertex_layout = analyse_inputs_interleaved();
|
||||
analyse_inputs_interleaved(m_vertex_layout);
|
||||
if (!m_vertex_layout.validate())
|
||||
{
|
||||
// Execute remainining pipeline barriers with NOP draw
|
||||
@ -1427,7 +1427,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
|
||||
buf[1] = upload_info.vertex_index_offset;
|
||||
buf += 4;
|
||||
|
||||
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, (s32*)buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
|
||||
fill_vertex_layout_state(m_vertex_layout, upload_info.first_vertex, upload_info.allocated_vertex_count, (s32*)buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
|
||||
|
||||
m_vertex_layout_buffer->bind_range(1, mapping.second, 128 + 16);
|
||||
|
||||
|
@ -24,6 +24,7 @@ namespace gl
|
||||
{
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 first_vertex;
|
||||
u32 vertex_index_base;
|
||||
u32 vertex_index_offset;
|
||||
u32 persistent_mapping_offset;
|
||||
|
@ -48,10 +48,10 @@ namespace
|
||||
|
||||
struct vertex_input_state
|
||||
{
|
||||
bool index_rebase;
|
||||
u32 min_index;
|
||||
u32 max_index;
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_data_base;
|
||||
u32 vertex_index_base;
|
||||
u32 vertex_index_offset;
|
||||
std::optional<std::tuple<GLenum, u32>> index_info;
|
||||
};
|
||||
@ -70,6 +70,7 @@ namespace
|
||||
{
|
||||
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
const u32 min_index = rsx::method_registers.current_draw_clause.min_index();
|
||||
const u32 max_index = (min_index + vertex_count) - 1;
|
||||
|
||||
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
|
||||
{
|
||||
@ -79,10 +80,10 @@ namespace
|
||||
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer,
|
||||
rsx::method_registers.current_draw_clause.get_elements_count());
|
||||
|
||||
return{ index_count, vertex_count, min_index, 0, 0, std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer) };
|
||||
return{ false, min_index, max_index, index_count, 0, std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer) };
|
||||
}
|
||||
|
||||
return{ vertex_count, vertex_count, min_index, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
|
||||
return{ false, min_index, max_index, vertex_count, 0, std::optional<std::tuple<GLenum, u32>>() };
|
||||
}
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_indexed_array_command& command)
|
||||
@ -117,26 +118,13 @@ namespace
|
||||
if (min_index >= max_index)
|
||||
{
|
||||
//empty set, do not draw
|
||||
return{ 0, 0, 0, 0, 0, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
|
||||
return{ false, 0, 0, 0, 0, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
|
||||
}
|
||||
|
||||
// Prefer only reading the vertices that are referenced in the index buffer itself
|
||||
// Offset data source by min_index verts, but also notify the shader to offset the vertexID (important for modulo op)
|
||||
const auto index_offset = rsx::method_registers.vertex_data_base_index();
|
||||
|
||||
//check for vertex arrays with frequency modifiers
|
||||
for (auto &block : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
if (block.min_divisor > 1)
|
||||
{
|
||||
//Ignore base offsets and return real results
|
||||
//The upload function will optimize the uploaded range anyway
|
||||
return{ index_count, max_index, 0, 0, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
|
||||
}
|
||||
}
|
||||
|
||||
//Prefer only reading the vertices that are referenced in the index buffer itself
|
||||
//Offset data source by min_index verts, but also notify the shader to offset the vertexID
|
||||
const auto data_offset = rsx::get_index_from_base(min_index, index_offset);
|
||||
return{ index_count, (max_index - min_index + 1), data_offset, min_index, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
|
||||
return{ true, min_index, max_index, index_count, index_offset, std::make_tuple(get_index_type(type), offset_in_index_buffer) };
|
||||
}
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_inlined_array& command)
|
||||
@ -151,10 +139,10 @@ namespace
|
||||
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
|
||||
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer, vertex_count);
|
||||
|
||||
return{ index_count, vertex_count, 0, 0, 0, std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer) };
|
||||
return{ false, index_count, vertex_count, 0, 0, std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer) };
|
||||
}
|
||||
|
||||
return{ vertex_count, vertex_count, 0, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
|
||||
return{ false, vertex_count, vertex_count, 0, 0, std::optional<std::tuple<GLenum, u32>>() };
|
||||
}
|
||||
|
||||
private:
|
||||
@ -170,18 +158,26 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
//Write index buffers and count verts
|
||||
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
|
||||
|
||||
auto &vertex_count = result.allocated_vertex_count;
|
||||
auto &vertex_base = result.vertex_data_base;
|
||||
const u32 vertex_count = (result.max_index - result.min_index) + 1;
|
||||
u32 vertex_base = result.min_index;
|
||||
u32 index_base = 0;
|
||||
|
||||
if (result.index_rebase)
|
||||
{
|
||||
vertex_base = rsx::get_index_from_base(vertex_base, rsx::method_registers.vertex_data_base_index());
|
||||
index_base = result.min_index;
|
||||
}
|
||||
|
||||
//Do actual vertex upload
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_base, vertex_count);
|
||||
|
||||
std::pair<void*, u32> persistent_mapping = {}, volatile_mapping = {};
|
||||
gl::vertex_upload_info upload_info =
|
||||
{
|
||||
result.vertex_draw_count, // Vertex count
|
||||
result.allocated_vertex_count, // Allocated vertex count
|
||||
result.vertex_index_base, // Index of attribute at data location 0
|
||||
vertex_count, // Allocated vertex count
|
||||
vertex_base, // First vertex in block
|
||||
index_base, // Index of attribute at data location 0
|
||||
result.vertex_index_offset, // Hw index offset
|
||||
0u, 0u, // Mapping
|
||||
result.index_info // Index buffer info
|
||||
|
@ -1342,15 +1342,15 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
vertex_input_layout thread::analyse_inputs_interleaved() const
|
||||
void thread::analyse_inputs_interleaved(vertex_input_layout& result) const
|
||||
{
|
||||
const rsx_state& state = rsx::method_registers;
|
||||
const u32 input_mask = state.vertex_attrib_input_mask();
|
||||
|
||||
result.clear();
|
||||
|
||||
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
vertex_input_layout result = {};
|
||||
|
||||
interleaved_range_info info = {};
|
||||
info.interleaved = true;
|
||||
info.locations.reserve(8);
|
||||
@ -1363,7 +1363,7 @@ namespace rsx
|
||||
{
|
||||
// Stride must be updated even if the stream is disabled
|
||||
info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size());
|
||||
info.locations.push_back(index);
|
||||
info.locations.push_back({ index, false, 1 });
|
||||
|
||||
if (input_mask & (1u << index))
|
||||
{
|
||||
@ -1378,12 +1378,11 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
result.interleaved_blocks.push_back(info);
|
||||
return result;
|
||||
result.interleaved_blocks.emplace_back(std::move(info));
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||
vertex_input_layout result = {};
|
||||
result.interleaved_blocks.reserve(16);
|
||||
result.referenced_registers.reserve(16);
|
||||
|
||||
@ -1433,6 +1432,7 @@ namespace rsx
|
||||
result.attribute_placement[index] = attribute_buffer_placement::persistent;
|
||||
const u32 base_address = info.offset() & 0x7fffffff;
|
||||
bool alloc_new_block = true;
|
||||
bool modulo = !!(frequency_divider_mask & (1 << index));
|
||||
|
||||
for (auto &block : result.interleaved_blocks)
|
||||
{
|
||||
@ -1471,13 +1471,8 @@ namespace rsx
|
||||
}
|
||||
|
||||
alloc_new_block = false;
|
||||
block.locations.push_back(index);
|
||||
block.locations.push_back({ index, modulo, info.frequency() });
|
||||
block.interleaved = true;
|
||||
block.min_divisor = std::min(block.min_divisor, info.frequency());
|
||||
|
||||
if (block.all_modulus)
|
||||
block.all_modulus = !!(frequency_divider_mask & (1 << index));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1488,9 +1483,7 @@ namespace rsx
|
||||
block.attribute_stride = info.stride();
|
||||
block.memory_location = info.offset() >> 31;
|
||||
block.locations.reserve(16);
|
||||
block.locations.push_back(index);
|
||||
block.min_divisor = info.frequency();
|
||||
block.all_modulus = !!(frequency_divider_mask & (1 << index));
|
||||
block.locations.push_back({ index, modulo, info.frequency() });
|
||||
|
||||
if (block.attribute_stride == 0)
|
||||
{
|
||||
@ -1498,7 +1491,7 @@ namespace rsx
|
||||
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
|
||||
result.interleaved_blocks.push_back(block);
|
||||
result.interleaved_blocks.emplace_back(std::move(block));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1508,8 +1501,6 @@ namespace rsx
|
||||
//Calculate real data address to be used during upload
|
||||
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info.base_offset), info.memory_location);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
|
||||
@ -1835,7 +1826,7 @@ namespace rsx
|
||||
fmt::throw_exception("%s(addr=0x%x): RSXIO memory not mapped" HERE, __FUNCTION__, addr);
|
||||
}
|
||||
|
||||
std::pair<u32, u32> thread::calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count)
|
||||
std::pair<u32, u32> thread::calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count)
|
||||
{
|
||||
u32 persistent_memory_size = 0;
|
||||
u32 volatile_memory_size = 0;
|
||||
@ -1861,37 +1852,13 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &block : layout.interleaved_blocks)
|
||||
{
|
||||
u32 unique_verts;
|
||||
|
||||
if (block.single_vertex)
|
||||
{
|
||||
unique_verts = 1;
|
||||
}
|
||||
else if (block.min_divisor > 1)
|
||||
{
|
||||
if (block.all_modulus)
|
||||
unique_verts = block.min_divisor;
|
||||
else
|
||||
{
|
||||
unique_verts = vertex_count / block.min_divisor;
|
||||
if (vertex_count % block.min_divisor) unique_verts++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unique_verts = vertex_count;
|
||||
}
|
||||
|
||||
persistent_memory_size += block.attribute_stride * unique_verts;
|
||||
}
|
||||
persistent_memory_size = layout.calculate_interleaved_memory_requirements(first_vertex, vertex_count);
|
||||
}
|
||||
|
||||
return std::make_pair(persistent_memory_size, volatile_memory_size);
|
||||
}
|
||||
|
||||
void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base)
|
||||
void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base)
|
||||
{
|
||||
std::array<s32, 16> offset_in_block = {};
|
||||
u32 volatile_offset = volatile_offset_base;
|
||||
@ -1917,11 +1884,11 @@ namespace rsx
|
||||
{
|
||||
const auto &block = layout.interleaved_blocks[0];
|
||||
u32 inline_data_offset = volatile_offset;
|
||||
for (const u8 index : block.locations)
|
||||
for (const auto& attrib : block.locations)
|
||||
{
|
||||
auto &info = rsx::method_registers.vertex_arrays_info[index];
|
||||
auto &info = rsx::method_registers.vertex_arrays_info[attrib.index];
|
||||
|
||||
offset_in_block[index] = inline_data_offset;
|
||||
offset_in_block[attrib.index] = inline_data_offset;
|
||||
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
}
|
||||
@ -1929,34 +1896,14 @@ namespace rsx
|
||||
{
|
||||
for (const auto &block : layout.interleaved_blocks)
|
||||
{
|
||||
for (u8 index : block.locations)
|
||||
for (const auto& attrib : block.locations)
|
||||
{
|
||||
const u32 local_address = (rsx::method_registers.vertex_arrays_info[index].offset() & 0x7fffffff);
|
||||
offset_in_block[index] = persistent_offset + (local_address - block.base_offset);
|
||||
const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
|
||||
offset_in_block[attrib.index] = persistent_offset + (local_address - block.base_offset);
|
||||
}
|
||||
|
||||
u32 unique_verts;
|
||||
|
||||
if (block.single_vertex)
|
||||
{
|
||||
unique_verts = 1;
|
||||
}
|
||||
else if (block.min_divisor > 1)
|
||||
{
|
||||
if (block.all_modulus)
|
||||
unique_verts = block.min_divisor;
|
||||
else
|
||||
{
|
||||
unique_verts = vertex_count / block.min_divisor;
|
||||
if (vertex_count % block.min_divisor) unique_verts++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unique_verts = vertex_count;
|
||||
}
|
||||
|
||||
persistent_offset += block.attribute_stride * unique_verts;
|
||||
const auto range = block.calculate_required_range(first_vertex, vertex_count);
|
||||
persistent_offset += block.attribute_stride * range.second;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1978,6 +1925,7 @@ namespace rsx
|
||||
const s32 modulo_op_frequency_mask = (1 << 31);
|
||||
|
||||
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||
const auto max_index = (first_vertex + vertex_count) - 1;
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
@ -2067,9 +2015,25 @@ namespace rsx
|
||||
default:
|
||||
{
|
||||
if (modulo_mask & (1 << index))
|
||||
attrib1 |= modulo_op_frequency_mask;
|
||||
|
||||
attrib0 |= (frequency << 8);
|
||||
{
|
||||
if (max_index >= frequency)
|
||||
{
|
||||
// Only set modulo mask if a modulo op is actually necessary!
|
||||
// This requires that the uploaded range for this attr = [0, freq-1]
|
||||
// Ignoring modulo op if the rendered range does not wrap allows for range optimization
|
||||
attrib0 |= (frequency << 8);
|
||||
attrib1 |= modulo_op_frequency_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
attrib0 |= default_frequency_mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Division
|
||||
attrib0 |= (frequency << 8);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2148,30 +2112,11 @@ namespace rsx
|
||||
{
|
||||
for (const auto &block : layout.interleaved_blocks)
|
||||
{
|
||||
u32 unique_verts;
|
||||
u32 vertex_base = 0;
|
||||
auto range = block.calculate_required_range(first_vertex, vertex_count);
|
||||
|
||||
if (block.single_vertex)
|
||||
{
|
||||
unique_verts = 1;
|
||||
}
|
||||
else if (block.min_divisor > 1)
|
||||
{
|
||||
if (block.all_modulus)
|
||||
unique_verts = block.min_divisor;
|
||||
else
|
||||
{
|
||||
unique_verts = vertex_count / block.min_divisor;
|
||||
if (vertex_count % block.min_divisor) unique_verts++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unique_verts = vertex_count;
|
||||
vertex_base = first_vertex * block.attribute_stride;
|
||||
}
|
||||
const u32 data_size = range.second * block.attribute_stride;
|
||||
const u32 vertex_base = range.first * block.attribute_stride;
|
||||
|
||||
const u32 data_size = block.attribute_stride * unique_verts;
|
||||
memcpy(persistent, (char*)vm::base(block.real_offset_address) + vertex_base, data_size);
|
||||
persistent += data_size;
|
||||
}
|
||||
|
@ -177,18 +177,69 @@ namespace rsx
|
||||
u32 __dummy2;
|
||||
};
|
||||
|
||||
struct interleaved_attribute_t
|
||||
{
|
||||
u8 index;
|
||||
bool modulo;
|
||||
u16 frequency;
|
||||
};
|
||||
|
||||
struct interleaved_range_info
|
||||
{
|
||||
bool interleaved = false;
|
||||
bool all_modulus = false;
|
||||
bool single_vertex = false;
|
||||
u32 base_offset = 0;
|
||||
u32 real_offset_address = 0;
|
||||
u8 memory_location = 0;
|
||||
u8 attribute_stride = 0;
|
||||
u16 min_divisor = 0;
|
||||
|
||||
std::vector<u8> locations;
|
||||
rsx::simple_array<interleaved_attribute_t> locations;
|
||||
|
||||
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
|
||||
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const
|
||||
{
|
||||
if (single_vertex)
|
||||
{
|
||||
return { 0, 1 };
|
||||
}
|
||||
|
||||
const u32 max_index = (first + count) - 1;
|
||||
u32 _max_index = first;
|
||||
u32 _min_index = first;
|
||||
|
||||
for (const auto &attrib : locations)
|
||||
{
|
||||
if (LIKELY(attrib.frequency <= 1))
|
||||
{
|
||||
_max_index = max_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (attrib.modulo)
|
||||
{
|
||||
if (max_index >= attrib.frequency)
|
||||
{
|
||||
// Actually uses the modulo operator, cannot safely optimize
|
||||
_min_index = 0;
|
||||
_max_index = std::max<u32>(_max_index, attrib.frequency - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Same as having no modulo
|
||||
_max_index = max_index;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Division operator
|
||||
_min_index = std::min(_min_index, first / attrib.frequency);
|
||||
_max_index = std::max<u32>(_max_index, max_index / attrib.frequency);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { _min_index, (_max_index - _min_index) + 1 };
|
||||
}
|
||||
};
|
||||
|
||||
enum attribute_buffer_placement : u8
|
||||
@ -201,8 +252,8 @@ namespace rsx
|
||||
struct vertex_input_layout
|
||||
{
|
||||
std::vector<interleaved_range_info> interleaved_blocks; // Interleaved blocks to be uploaded as-is
|
||||
std::vector<std::pair<u8, u32>> volatile_blocks; // Volatile data blocks (immediate draw vertex data for example)
|
||||
std::vector<u8> referenced_registers; // Volatile register data
|
||||
std::vector<std::pair<u8, u32>> volatile_blocks; // Volatile data blocks (immediate draw vertex data for example)
|
||||
rsx::simple_array<u8> referenced_registers; // Volatile register data
|
||||
|
||||
std::array<attribute_buffer_placement, 16> attribute_placement;
|
||||
|
||||
@ -211,6 +262,13 @@ namespace rsx
|
||||
attribute_placement.fill(attribute_buffer_placement::none);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
interleaved_blocks.resize(0);
|
||||
volatile_blocks.resize(0);
|
||||
referenced_registers.resize(0);
|
||||
}
|
||||
|
||||
bool validate() const
|
||||
{
|
||||
// Criteria: At least one array stream has to be defined to feed vertex positions
|
||||
@ -252,6 +310,18 @@ namespace rsx
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 calculate_interleaved_memory_requirements(u32 first_vertex, u32 vertex_count) const
|
||||
{
|
||||
u32 mem = 0;
|
||||
for (auto &block : interleaved_blocks)
|
||||
{
|
||||
const auto range = block.calculate_required_range(first_vertex, vertex_count);
|
||||
mem += range.second * block.attribute_stride;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
};
|
||||
|
||||
struct framebuffer_layout
|
||||
@ -478,7 +548,7 @@ namespace rsx
|
||||
/**
|
||||
* Analyze vertex inputs and group all interleaved blocks
|
||||
*/
|
||||
vertex_input_layout analyse_inputs_interleaved() const;
|
||||
void analyse_inputs_interleaved(vertex_input_layout&) const;
|
||||
|
||||
RSXVertexProgram current_vertex_program = {};
|
||||
RSXFragmentProgram current_fragment_program = {};
|
||||
@ -592,12 +662,12 @@ namespace rsx
|
||||
* result.first contains persistent memory requirements
|
||||
* result.second contains volatile memory requirements
|
||||
*/
|
||||
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count);
|
||||
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count);
|
||||
|
||||
/**
|
||||
* Generates vertex input descriptors as an array of 16x4 s32s
|
||||
*/
|
||||
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
|
||||
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
|
||||
|
||||
/**
|
||||
* Uploads vertex data described in the layout descriptor
|
||||
|
@ -1175,7 +1175,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||
|
||||
if (sub_index == 0)
|
||||
{
|
||||
m_vertex_layout = analyse_inputs_interleaved();
|
||||
analyse_inputs_interleaved(m_vertex_layout);
|
||||
}
|
||||
|
||||
if (!m_vertex_layout.validate())
|
||||
@ -1640,7 +1640,7 @@ void VKGSRender::end()
|
||||
occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
if (occlusion_id == UINT32_MAX)
|
||||
{
|
||||
LOG_ERROR(RSX, "Occlusion pool overflow");
|
||||
//LOG_ERROR(RSX, "Occlusion pool overflow");
|
||||
if (m_current_task) m_current_task->result = 1;
|
||||
}
|
||||
}
|
||||
@ -2718,7 +2718,7 @@ void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info)
|
||||
buf[1] = vertex_info.vertex_index_offset;
|
||||
buf += 4;
|
||||
|
||||
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, (s32*)buf,
|
||||
fill_vertex_layout_state(m_vertex_layout, vertex_info.first_vertex, vertex_info.allocated_vertex_count, (s32*)buf,
|
||||
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
|
||||
|
||||
m_vertex_layout_ring_info.unmap();
|
||||
|
@ -25,6 +25,7 @@ namespace vk
|
||||
VkPrimitiveTopology primitive;
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 first_vertex;
|
||||
u32 vertex_index_base;
|
||||
u32 vertex_index_offset;
|
||||
u32 persistent_window_offset;
|
||||
|
@ -82,10 +82,10 @@ namespace
|
||||
struct vertex_input_state
|
||||
{
|
||||
VkPrimitiveTopology native_primitive_type;
|
||||
bool index_rebase;
|
||||
u32 min_index;
|
||||
u32 max_index;
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_data_base;
|
||||
u32 vertex_index_base;
|
||||
u32 vertex_index_offset;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
};
|
||||
@ -106,6 +106,7 @@ namespace
|
||||
|
||||
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
const u32 min_index = rsx::method_registers.current_draw_clause.min_index();
|
||||
const u32 max_index = (min_index + vertex_count) - 1;
|
||||
|
||||
if (primitives_emulated)
|
||||
{
|
||||
@ -116,10 +117,10 @@ namespace
|
||||
generate_emulating_index_buffer(rsx::method_registers.current_draw_clause,
|
||||
vertex_count, m_index_buffer_ring_info);
|
||||
|
||||
return{ prims, index_count, vertex_count, min_index, 0, 0, index_info };
|
||||
return{ prims, false, min_index, max_index, index_count, 0, index_info };
|
||||
}
|
||||
|
||||
return{ prims, vertex_count, vertex_count, min_index, 0, {} };
|
||||
return{ prims, false, min_index, max_index, vertex_count, 0, {} };
|
||||
}
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_indexed_array_command& command)
|
||||
@ -173,7 +174,7 @@ namespace
|
||||
{
|
||||
//empty set, do not draw
|
||||
m_index_buffer_ring_info.unmap();
|
||||
return{ prims, 0, 0, 0, 0, 0, {} };
|
||||
return{ prims, false, 0, 0, 0, 0, {} };
|
||||
}
|
||||
|
||||
if (emulate_restart)
|
||||
@ -194,20 +195,7 @@ namespace
|
||||
std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type));
|
||||
|
||||
const auto index_offset = rsx::method_registers.vertex_data_base_index();
|
||||
|
||||
//check for vertex arrays with frequency modifiers
|
||||
for (auto &block : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
if (block.min_divisor > 1)
|
||||
{
|
||||
//Ignore base offsets and return real results
|
||||
//The upload function will optimize the uploaded range anyway
|
||||
return{ prims, index_count, max_index, 0, 0, index_offset, index_info };
|
||||
}
|
||||
}
|
||||
|
||||
const auto data_offset = rsx::get_index_from_base(min_index, index_offset);
|
||||
return {prims, index_count, (max_index - min_index + 1), data_offset, min_index, index_offset, index_info};
|
||||
return {prims, true, min_index, max_index, index_count, index_offset, index_info};
|
||||
}
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_inlined_array& command)
|
||||
@ -221,13 +209,13 @@ namespace
|
||||
|
||||
if (!primitives_emulated)
|
||||
{
|
||||
return{ prims, vertex_count, vertex_count, 0, 0, {} };
|
||||
return{ prims, false, 0, vertex_count - 1, vertex_count, 0, {} };
|
||||
}
|
||||
|
||||
u32 index_count;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
std::tie(index_count, index_info) = generate_emulating_index_buffer(draw_clause, vertex_count, m_index_buffer_ring_info);
|
||||
return{ prims, index_count, vertex_count, 0, 0, 0, index_info };
|
||||
return{ prims, false, 0, vertex_count - 1, index_count, 0, index_info };
|
||||
}
|
||||
|
||||
private:
|
||||
@ -241,11 +229,18 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
|
||||
auto result = std::visit(visitor, get_draw_command(rsx::method_registers));
|
||||
|
||||
auto &vertex_count = result.allocated_vertex_count;
|
||||
auto &vertex_base = result.vertex_data_base;
|
||||
const u32 vertex_count = (result.max_index - result.min_index) + 1;
|
||||
u32 vertex_base = result.min_index;
|
||||
u32 index_base = 0;
|
||||
|
||||
if (result.index_rebase)
|
||||
{
|
||||
vertex_base = rsx::get_index_from_base(vertex_base, rsx::method_registers.vertex_data_base_index());
|
||||
index_base = result.min_index;
|
||||
}
|
||||
|
||||
//Do actual vertex upload
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_base, vertex_count);
|
||||
u32 persistent_range_base = UINT32_MAX, volatile_range_base = UINT32_MAX;
|
||||
size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX;
|
||||
|
||||
@ -358,8 +353,9 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||
|
||||
return{ result.native_primitive_type, // Primitive
|
||||
result.vertex_draw_count, // Vertex count
|
||||
result.allocated_vertex_count, // Allocated vertex count
|
||||
result.vertex_index_base, // Index of vertex at data location 0
|
||||
vertex_count, // Allocated vertex count
|
||||
vertex_base, // First vertex in stream
|
||||
index_base, // Index of vertex at data location 0
|
||||
result.vertex_index_offset, // Index offset
|
||||
persistent_range_base, volatile_range_base, // Binding range
|
||||
result.index_info }; // Index buffer info
|
||||
|
Loading…
x
Reference in New Issue
Block a user