rsx: Move draw call related functions to their own class

This commit is contained in:
kd-11 2024-12-26 21:08:54 +03:00 committed by kd-11
parent 9e9ae54455
commit 05bab8ec4c
16 changed files with 754 additions and 633 deletions

View File

@ -476,6 +476,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/Common/TextureUtils.cpp
RSX/Common/texture_cache.cpp
RSX/Core/RSXContext.cpp
RSX/Core/RSXDrawCommands.cpp
RSX/gcm_enums.cpp
RSX/gcm_printing.cpp
RSX/GL/GLCommonDecompiler.cpp

View File

@ -0,0 +1,558 @@
#include "stdafx.h"
#include "RSXDrawCommands.h"
#include "Emu/RSX/Common/BufferUtils.h"
#include "Emu/RSX/rsx_methods.h"
#include "Emu/RSX/RSXThread.h"
#include "Emu/Memory/vm.h"
namespace rsx
{
void draw_command_processor::analyse_inputs_interleaved(vertex_input_layout& result, const vertex_program_metadata_t& vp_metadata)
{
const rsx_state& state = rsx::method_registers;
const u32 input_mask = state.vertex_attrib_input_mask() & vp_metadata.referenced_inputs_mask;
result.clear();
result.attribute_mask = static_cast<u16>(input_mask);
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
{
interleaved_range_info& info = *result.alloc_interleaved_block();
info.interleaved = true;
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
auto& vinfo = state.vertex_arrays_info[index];
result.attribute_placement[index] = attribute_buffer_placement::none;
if (vinfo.size() > 0)
{
// Stride must be updated even if the stream is disabled
info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size());
info.locations.push_back({ index, false, 1 });
if (input_mask & (1u << index))
{
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
}
else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index))
{
// Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
}
if (info.attribute_stride)
{
// At least one array feed must be enabled for vertex input
result.interleaved_blocks.push_back(&info);
}
return;
}
const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask();
result.interleaved_blocks.reserve(16);
result.referenced_registers.reserve(16);
for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1)
{
ensure(index < rsx::limits::vertex_count);
if (!(ref_mask & 1u))
{
// Nothing to do, uninitialized
continue;
}
// Always reset attribute placement by default
result.attribute_placement[index] = attribute_buffer_placement::none;
// Check for interleaving
if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::indexed)
{
// NOTE: In immediate rendering mode, all vertex setup is ignored
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
if (m_vertex_push_buffers[index].vertex_count > 1)
{
// Ensure consistent number of vertices per attribute.
m_vertex_push_buffers[index].pad_to(m_vertex_push_buffers[0].vertex_count, false);
// Read temp buffer (register array)
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(m_vertex_push_buffers[index].data.size() * sizeof(u32)));
result.volatile_blocks.push_back(volatile_range_info);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
else if (state.register_vertex_info[index].size > 0)
{
// Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
// Fall back to the default register value if no source is specified via register
continue;
}
const auto& info = state.vertex_arrays_info[index];
if (!info.size())
{
if (state.register_vertex_info[index].size > 0)
{
//Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
continue;
}
}
else
{
result.attribute_placement[index] = attribute_buffer_placement::persistent;
const u32 base_address = info.offset() & 0x7fffffff;
bool alloc_new_block = true;
bool modulo = !!(frequency_divider_mask & (1 << index));
for (auto& block : result.interleaved_blocks)
{
if (block->single_vertex)
{
//Single vertex definition, continue
continue;
}
if (block->attribute_stride != info.stride())
{
//Stride does not match, continue
continue;
}
if (base_address > block->base_offset)
{
const u32 diff = base_address - block->base_offset;
if (diff > info.stride())
{
//Not interleaved, continue
continue;
}
}
else
{
const u32 diff = block->base_offset - base_address;
if (diff > info.stride())
{
//Not interleaved, continue
continue;
}
//Matches, and this address is lower than existing
block->base_offset = base_address;
}
alloc_new_block = false;
block->locations.push_back({ index, modulo, info.frequency() });
block->interleaved = true;
break;
}
if (alloc_new_block)
{
interleaved_range_info& block = *result.alloc_interleaved_block();
block.base_offset = base_address;
block.attribute_stride = info.stride();
block.memory_location = info.offset() >> 31;
block.locations.reserve(16);
block.locations.push_back({ index, modulo, info.frequency() });
if (block.attribute_stride == 0)
{
block.single_vertex = true;
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
result.interleaved_blocks.push_back(&block);
}
}
}
for (auto& info : result.interleaved_blocks)
{
//Calculate real data address to be used during upload
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
}
}
std::span<const std::byte> draw_command_processor::get_raw_index_array(const draw_clause& draw_indexed_clause) const
{
if (!m_element_push_buffer.empty()) [[ unlikely ]]
{
// Indices provided via immediate mode
return { reinterpret_cast<const std::byte*>(m_element_push_buffer.data()), ::narrow<u32>(m_element_push_buffer.size() * sizeof(u32)) };
}
const rsx::index_array_type type = rsx::method_registers.index_type();
const u32 type_size = get_index_type_size(type);
// Force aligned indices as realhw
const u32 address = (0 - type_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location());
const u32 first = draw_indexed_clause.min_index();
const u32 count = draw_indexed_clause.get_elements_count();
const auto ptr = vm::_ptr<const std::byte>(address);
return { ptr + first * type_size, count * type_size };
}
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
draw_command_processor::get_draw_command(const rsx::rsx_state& state) const
{
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]]
{
return draw_indexed_array_command
{
get_raw_index_array(state.current_draw_clause)
};
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
return draw_array_command{};
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
return draw_inlined_array{};
}
fmt::throw_exception("ill-formed draw command");
}
void draw_command_processor::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
{
if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute)))
{
return;
}
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
const auto vertex_id = m_vertex_push_buffers[0].get_vertex_id();
m_vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
m_thread->m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
}
u32 draw_command_processor::get_push_buffer_vertex_count() const
{
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
return m_vertex_push_buffers[0].vertex_count;
}
void draw_command_processor::append_array_element(u32 index)
{
// Endianness is swapped because common upload code expects input in BE
// TODO: Implement fast upload path for LE inputs and do away with this
m_element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
}
u32 draw_command_processor::get_push_buffer_index_count() const
{
return ::size32(m_element_push_buffer);
}
void draw_command_processor::clear_push_buffers()
{
auto& graphics_state = m_thread->m_graphics_state;
if (graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
{
for (auto& push_buf : m_vertex_push_buffers)
{
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
//rsx::method_registers.register_vertex_info[index].size = 0;
push_buf.clear();
}
graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty);
}
m_element_push_buffer.clear();
}
void draw_command_processor::fill_vertex_layout_state(
const vertex_input_layout& layout,
const vertex_program_metadata_t& vp_metadata,
u32 first_vertex,
u32 vertex_count,
s32* buffer,
u32 persistent_offset_base,
u32 volatile_offset_base) const
{
std::array<s32, 16> offset_in_block = {};
u32 volatile_offset = volatile_offset_base;
u32 persistent_offset = persistent_offset_base;
//NOTE: Order is important! Transient ayout is always push_buffers followed by register data
if (rsx::method_registers.current_draw_clause.is_immediate_draw)
{
for (const auto& info : layout.volatile_blocks)
{
offset_in_block[info.first] = volatile_offset;
volatile_offset += info.second;
}
}
for (u8 index : layout.referenced_registers)
{
offset_in_block[index] = volatile_offset;
volatile_offset += 16;
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
const auto& block = layout.interleaved_blocks[0];
u32 inline_data_offset = volatile_offset;
for (const auto& attrib : block->locations)
{
auto& info = rsx::method_registers.vertex_arrays_info[attrib.index];
offset_in_block[attrib.index] = inline_data_offset;
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
}
else
{
for (const auto& block : layout.interleaved_blocks)
{
for (const auto& attrib : block->locations)
{
const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
}
const auto range = block->calculate_required_range(first_vertex, vertex_count);
persistent_offset += block->attribute_stride * range.second;
}
}
// Fill the data
// Each descriptor field is 64 bits wide
// [0-8] attribute stride
// [8-24] attribute divisor
// [24-27] attribute type
// [27-30] attribute size
// [30-31] reserved
// [31-60] starting offset
// [60-21] swap bytes flag
// [61-22] volatile flag
// [62-63] modulo enable flag
const s32 default_frequency_mask = (1 << 8);
const s32 swap_storage_mask = (1 << 29);
const s32 volatile_storage_mask = (1 << 30);
const s32 modulo_op_frequency_mask = smin;
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
const auto max_index = (first_vertex + vertex_count) - 1;
for (u16 ref_mask = vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1)
{
if (!(ref_mask & 1u))
{
// Unused input, ignore this
continue;
}
if (layout.attribute_placement[index] == attribute_buffer_placement::none)
{
static constexpr u64 zero = 0;
std::memcpy(buffer + index * 2, &zero, sizeof(zero));
continue;
}
rsx::vertex_base_type type = {};
s32 size = 0;
s32 attrib0 = 0;
s32 attrib1 = 0;
if (layout.attribute_placement[index] == attribute_buffer_placement::transient)
{
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
const auto& info = rsx::method_registers.vertex_arrays_info[index];
if (!info.size())
{
// Register
const auto& reginfo = rsx::method_registers.register_vertex_info[index];
type = reginfo.type;
size = reginfo.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
}
else
{
// Array
type = info.type();
size = info.size();
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
}
}
else
{
// Data is either from an immediate render or register input
// Immediate data overrides register input
if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
m_vertex_push_buffers[index].vertex_count > 1)
{
// Push buffer
const auto& info = m_vertex_push_buffers[index];
type = info.type;
size = info.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask;
}
else
{
// Register
const auto& info = rsx::method_registers.register_vertex_info[index];
type = info.type;
size = info.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
}
}
attrib1 |= volatile_storage_mask;
}
else
{
auto& info = rsx::method_registers.vertex_arrays_info[index];
type = info.type();
size = info.size();
auto stride = info.stride();
attrib0 = stride;
if (stride > 0) //when stride is 0, input is not an array but a single element
{
const u32 frequency = info.frequency();
switch (frequency)
{
case 0:
case 1:
{
attrib0 |= default_frequency_mask;
break;
}
default:
{
if (modulo_mask & (1 << index))
{
if (max_index >= frequency)
{
// Only set modulo mask if a modulo op is actually necessary!
// This requires that the uploaded range for this attr = [0, freq-1]
// Ignoring modulo op if the rendered range does not wrap allows for range optimization
attrib0 |= (frequency << 8);
attrib1 |= modulo_op_frequency_mask;
}
else
{
attrib0 |= default_frequency_mask;
}
}
else
{
// Division
attrib0 |= (frequency << 8);
}
break;
}
}
}
} //end attribute placement check
// Special compressed 4 components into one 4-byte value. Decoded as one value.
if (type == rsx::vertex_base_type::cmp)
{
size = 1;
}
// All data is passed in in PS3-native order (BE) so swap flag should be set
attrib1 |= swap_storage_mask;
attrib0 |= (static_cast<s32>(type) << 24);
attrib0 |= (size << 27);
attrib1 |= offset_in_block[index];
buffer[index * 2 + 0] = attrib0;
buffer[index * 2 + 1] = attrib1;
}
}
void draw_command_processor::write_vertex_data_to_memory(
const vertex_input_layout& layout,
u32 first_vertex,
u32 vertex_count,
void* persistent_data,
void* volatile_data) const
{
auto transient = static_cast<char*>(volatile_data);
auto persistent = static_cast<char*>(persistent_data);
auto& draw_call = rsx::method_registers.current_draw_clause;
if (transient != nullptr)
{
if (draw_call.command == rsx::draw_command::inlined_array)
{
for (const u8 index : layout.referenced_registers)
{
memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16);
transient += 16;
}
memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32));
//Is it possible to reference data outside of the inlined array?
return;
}
//NOTE: Order is important! Transient layout is always push_buffers followed by register data
if (draw_call.is_immediate_draw)
{
//NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory
for (const auto& info : layout.volatile_blocks)
{
memcpy(transient, m_vertex_push_buffers[info.first].data.data(), info.second);
transient += info.second;
}
}
for (const u8 index : layout.referenced_registers)
{
memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16);
transient += 16;
}
}
if (persistent != nullptr)
{
for (interleaved_range_info* block : layout.interleaved_blocks)
{
auto range = block->calculate_required_range(first_vertex, vertex_count);
const u32 data_size = range.second * block->attribute_stride;
const u32 vertex_base = range.first * block->attribute_stride;
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
persistent += data_size;
}
}
}
}

View File

@ -0,0 +1,81 @@
#pragma once
#include <util/types.hpp>
#include "Emu/RSX/Core/RSXVertexTypes.h"
#include "Emu/RSX/NV47/FW/draw_call.hpp"
#include "Emu/RSX/Program/ProgramStateCache.h"
#include "Emu/RSX/rsx_vertex_data.h"
#include <span>
#include <variant>
namespace rsx
{
struct rsx_state;
class thread;
class draw_command_processor
{
using vertex_program_metadata_t = program_hash_util::vertex_program_utils::vertex_program_metadata;
thread* m_thread = nullptr;
protected:
friend class thread;
std::array<push_buffer_vertex_info, 16> m_vertex_push_buffers;
rsx::simple_array<u32> m_element_push_buffer;
public:
draw_command_processor() = default;
void init(thread* rsxthr)
{
m_thread = rsxthr;
}
// Analyze vertex inputs and group all interleaved blocks
void analyse_inputs_interleaved(vertex_input_layout& layout, const vertex_program_metadata_t& vp_metadata);
// Retrieve raw bytes for the index array (untyped)
std::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
// Get compiled draw command for backend rendering
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
get_draw_command(const rsx::rsx_state& state) const;
// Push-buffers for immediate rendering (begin-end scopes)
void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value);
u32 get_push_buffer_vertex_count() const;
void append_array_element(u32 index);
u32 get_push_buffer_index_count() const;
void clear_push_buffers();
const std::span<const u32> element_push_buffer() const
{
return m_element_push_buffer;
}
// Host driver helpers
void fill_vertex_layout_state(
const vertex_input_layout& layout,
const vertex_program_metadata_t& vp_metadata,
u32 first_vertex,
u32 vertex_count,
s32* buffer,
u32 persistent_offset_base,
u32 volatile_offset_base) const;
void write_vertex_data_to_memory(
const vertex_input_layout& layout,
u32 first_vertex,
u32 vertex_count,
void* persistent_data,
void* volatile_data) const;
};
}

View File

@ -0,0 +1,52 @@
#pragma once
#include <util/types.hpp>
namespace rsx
{
enum pipeline_state : u32
{
fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed
vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed
fragment_program_state_dirty = (1 << 2), // Fragment program state changed
vertex_program_state_dirty = (1 << 3), // Vertex program state changed
fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc)
vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc)
transform_constants_dirty = (1 << 6), // Transform constants changed
fragment_constants_dirty = (1 << 7), // Fragment constants changed
framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed
fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed
vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed
scissor_config_state_dirty = (1 << 11), // Scissor region changed
zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed
scissor_setup_invalid = (1 << 13), // Scissor configuration is broken
scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint
polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed
line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed
push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers)
polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed
depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed
pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint.
rtt_config_dirty = (1 << 21), // Render target configuration changed
rtt_config_contested = (1 << 22), // Render target configuration is indeterminate
rtt_config_valid = (1 << 23), // Render target configuration is valid
rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty,
memory_barrier_bits = framebuffer_reads_dirty,
// Vulkan-specific signals
invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty,
all_dirty = ~0u
};
}

View File

@ -513,7 +513,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
if (vertex_state & rsx::vertex_arrays_changed)
{
analyse_inputs_interleaved(m_vertex_layout);
m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata);
}
else if (vertex_state & rsx::vertex_base_changed)
{

View File

@ -1007,7 +1007,14 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
buf[1] = upload_info.vertex_index_offset;
buf += 4;
fill_vertex_layout_state(m_vertex_layout, upload_info.first_vertex, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
m_draw_processor.fill_vertex_layout_state(
m_vertex_layout,
current_vp_metadata,
upload_info.first_vertex,
upload_info.allocated_vertex_count,
reinterpret_cast<s32*>(buf),
upload_info.persistent_mapping_offset,
upload_info.volatile_mapping_offset);
m_vertex_layout_buffer->bind_range(GL_VERTEX_LAYOUT_BIND_SLOT, mapping.second, 128 + 16);

View File

@ -153,7 +153,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
m_profiler.start();
//Write index buffers and count verts
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), m_draw_processor.get_draw_command(rsx::method_registers));
const u32 vertex_count = (result.max_index - result.min_index) + 1;
u32 vertex_base = result.min_index;
@ -250,7 +250,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
}
//Write all the data
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
m_frame_stats.vertex_upload_time += m_profiler.duration();
return upload_info;

View File

@ -18,7 +18,7 @@ namespace rsx
// NOTE: Push buffers still behave like register writes.
// You do not need to specify each attribute for each vertex, the register is referenced instead.
// This is classic OpenGL 1.x behavior as I remember.
RSX(ctx)->append_to_push_buffer(attrib_index, count, channel_select, vtype, value);
RSX(ctx)->GRAPH_frontend().append_to_push_buffer(attrib_index, count, channel_select, vtype, value);
}
auto& info = REGS(ctx)->register_vertex_info[attrib_index];

View File

@ -256,15 +256,15 @@ namespace rsx
{
if (RSX(ctx)->in_begin_end)
{
RSX(ctx)->append_array_element(arg & 0xFFFF);
RSX(ctx)->append_array_element(arg >> 16);
RSX(ctx)->GRAPH_frontend().append_array_element(arg & 0xFFFF);
RSX(ctx)->GRAPH_frontend().append_array_element(arg >> 16);
}
}
void set_array_element32(context* ctx, u32, u32 arg)
{
if (RSX(ctx)->in_begin_end)
RSX(ctx)->append_array_element(arg);
RSX(ctx)->GRAPH_frontend().append_array_element(arg);
}
void draw_arrays(context* /*rsx*/, u32 /*reg*/, u32 arg)
@ -353,8 +353,8 @@ namespace rsx
// Check if we have immediate mode vertex data in a driver-local buffer
if (REGS(ctx)->current_draw_clause.command == rsx::draw_command::none)
{
const u32 push_buffer_vertices_count = RSX(ctx)->get_push_buffer_vertex_count();
const u32 push_buffer_index_count = RSX(ctx)->get_push_buffer_index_count();
const u32 push_buffer_vertices_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_vertex_count();
const u32 push_buffer_index_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_index_count();
// Need to set this flag since it overrides some register contents
REGS(ctx)->current_draw_clause.is_immediate_draw = true;

View File

@ -409,12 +409,13 @@ namespace rsx
}
};
const auto element_push_buffer = render->draw_processor()->element_push_buffer();
if (index_size == 4)
{
if (!render->element_push_buffer.empty()) [[unlikely]]
if (!element_push_buffer.empty()) [[unlikely]]
{
// Indices provided via immediate mode
re_evaluate(reinterpret_cast<const std::byte*>(render->element_push_buffer.data()), u32{});
re_evaluate(reinterpret_cast<const std::byte*>(element_push_buffer.data()), u32{});
}
else
{
@ -424,10 +425,10 @@ namespace rsx
}
else
{
if (!render->element_push_buffer.empty()) [[unlikely]]
if (!element_push_buffer.empty()) [[unlikely]]
{
// Indices provided via immediate mode
re_evaluate(reinterpret_cast<const std::byte*>(render->element_push_buffer.data()), u16{});
re_evaluate(reinterpret_cast<const std::byte*>(element_push_buffer.data()), u16{});
}
else
{
@ -619,12 +620,12 @@ namespace rsx
ar(rsx::method_registers);
for (auto& v : vertex_push_buffers)
for (auto& v : m_draw_processor.m_vertex_push_buffers)
{
ar(v.attr, v.size, v.type, v.vertex_count, v.dword_count, v.data);
}
ar(element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout);
ar(m_draw_processor.m_element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout);
ar(dma_address, iomap_table, restore_point, tiles, zculls, display_buffers, display_buffers_count, current_display_buffer);
ar(enable_second_vhandler, requested_vsync);
ar(device_addr, label_addr, main_mem_size, local_mem_size, rsx_event_port, driver_info);
@ -689,6 +690,7 @@ namespace rsx
m_vertex_textures_dirty.fill(true);
m_graphics_state |= pipeline_state::all_dirty;
m_draw_processor.init(this);
g_user_asked_for_frame_capture = false;
@ -801,39 +803,6 @@ namespace rsx
in_begin_end = true;
}
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
{
if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute)))
{
return;
}
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
const auto vertex_id = vertex_push_buffers[0].get_vertex_id();
vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
}
u32 thread::get_push_buffer_vertex_count() const
{
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
return vertex_push_buffers[0].vertex_count;
}
void thread::append_array_element(u32 index)
{
// Endianness is swapped because common upload code expects input in BE
// TODO: Implement fast upload path for LE inputs and do away with this
element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
}
u32 thread::get_push_buffer_index_count() const
{
return ::size32(element_push_buffer);
}
void thread::end()
{
if (capture_current_frame)
@ -850,20 +819,7 @@ namespace rsx
m_eng_interrupt_mask |= rsx::backend_interrupt;
ROP_sync_timestamp = rsx::get_shared_tag();
if (m_graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
{
for (auto& push_buf : vertex_push_buffers)
{
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
//rsx::method_registers.register_vertex_info[index].size = 0;
push_buf.clear();
}
m_graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty);
}
element_push_buffer.clear();
m_draw_processor.clear_push_buffers();
zcull_ctrl->on_draw();
@ -1409,51 +1365,6 @@ namespace rsx
return t + timestamp_subvalue;
}
std::span<const std::byte> thread::get_raw_index_array(const draw_clause& draw_indexed_clause) const
{
if (!element_push_buffer.empty()) [[ unlikely ]]
{
// Indices provided via immediate mode
return {reinterpret_cast<const std::byte*>(element_push_buffer.data()), ::narrow<u32>(element_push_buffer.size() * sizeof(u32))};
}
const rsx::index_array_type type = rsx::method_registers.index_type();
const u32 type_size = get_index_type_size(type);
// Force aligned indices as realhw
const u32 address = (0 - type_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location());
const u32 first = draw_indexed_clause.min_index();
const u32 count = draw_indexed_clause.get_elements_count();
const auto ptr = vm::_ptr<const std::byte>(address);
return { ptr + first * type_size, count * type_size };
}
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
thread::get_draw_command(const rsx::rsx_state& state) const
{
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]]
{
return draw_indexed_array_command
{
get_raw_index_array(state.current_draw_clause)
};
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
return draw_array_command{};
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
return draw_inlined_array{};
}
fmt::throw_exception("ill-formed draw command");
}
void thread::do_local_task(FIFO::state state)
{
m_eng_interrupt_mask.clear(rsx::backend_interrupt);
@ -2285,183 +2196,6 @@ namespace rsx
current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask);
}
void thread::analyse_inputs_interleaved(vertex_input_layout& result)
{
const rsx_state& state = rsx::method_registers;
const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask;
result.clear();
result.attribute_mask = static_cast<u16>(input_mask);
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
{
interleaved_range_info& info = *result.alloc_interleaved_block();
info.interleaved = true;
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
auto &vinfo = state.vertex_arrays_info[index];
result.attribute_placement[index] = attribute_buffer_placement::none;
if (vinfo.size() > 0)
{
// Stride must be updated even if the stream is disabled
info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size());
info.locations.push_back({ index, false, 1 });
if (input_mask & (1u << index))
{
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
}
else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index))
{
// Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
}
if (info.attribute_stride)
{
// At least one array feed must be enabled for vertex input
result.interleaved_blocks.push_back(&info);
}
return;
}
const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask();
result.interleaved_blocks.reserve(16);
result.referenced_registers.reserve(16);
for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1)
{
ensure(index < rsx::limits::vertex_count);
if (!(ref_mask & 1u))
{
// Nothing to do, uninitialized
continue;
}
// Always reset attribute placement by default
result.attribute_placement[index] = attribute_buffer_placement::none;
// Check for interleaving
if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::indexed)
{
// NOTE: In immediate rendering mode, all vertex setup is ignored
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
if (vertex_push_buffers[index].vertex_count > 1)
{
// Ensure consistent number of vertices per attribute.
vertex_push_buffers[index].pad_to(vertex_push_buffers[0].vertex_count, false);
// Read temp buffer (register array)
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
result.volatile_blocks.push_back(volatile_range_info);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
else if (state.register_vertex_info[index].size > 0)
{
// Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
}
// Fall back to the default register value if no source is specified via register
continue;
}
const auto& info = state.vertex_arrays_info[index];
if (!info.size())
{
if (state.register_vertex_info[index].size > 0)
{
//Reads from register
result.referenced_registers.push_back(index);
result.attribute_placement[index] = attribute_buffer_placement::transient;
continue;
}
}
else
{
result.attribute_placement[index] = attribute_buffer_placement::persistent;
const u32 base_address = info.offset() & 0x7fffffff;
bool alloc_new_block = true;
bool modulo = !!(frequency_divider_mask & (1 << index));
for (auto &block : result.interleaved_blocks)
{
if (block->single_vertex)
{
//Single vertex definition, continue
continue;
}
if (block->attribute_stride != info.stride())
{
//Stride does not match, continue
continue;
}
if (base_address > block->base_offset)
{
const u32 diff = base_address - block->base_offset;
if (diff > info.stride())
{
//Not interleaved, continue
continue;
}
}
else
{
const u32 diff = block->base_offset - base_address;
if (diff > info.stride())
{
//Not interleaved, continue
continue;
}
//Matches, and this address is lower than existing
block->base_offset = base_address;
}
alloc_new_block = false;
block->locations.push_back({ index, modulo, info.frequency() });
block->interleaved = true;
break;
}
if (alloc_new_block)
{
interleaved_range_info& block = *result.alloc_interleaved_block();
block.base_offset = base_address;
block.attribute_stride = info.stride();
block.memory_location = info.offset() >> 31;
block.locations.reserve(16);
block.locations.push_back({ index, modulo, info.frequency() });
if (block.attribute_stride == 0)
{
block.single_vertex = true;
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
result.interleaved_blocks.push_back(&block);
}
}
}
for (auto &info : result.interleaved_blocks)
{
//Calculate real data address to be used during upload
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
}
}
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
{
if (!m_graphics_state.test(rsx::pipeline_state::fragment_program_dirty))
@ -2782,267 +2516,6 @@ namespace rsx
return std::make_pair(persistent_memory_size, volatile_memory_size);
}
void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base)
{
std::array<s32, 16> offset_in_block = {};
u32 volatile_offset = volatile_offset_base;
u32 persistent_offset = persistent_offset_base;
//NOTE: Order is important! Transient ayout is always push_buffers followed by register data
if (rsx::method_registers.current_draw_clause.is_immediate_draw)
{
for (const auto &info : layout.volatile_blocks)
{
offset_in_block[info.first] = volatile_offset;
volatile_offset += info.second;
}
}
for (u8 index : layout.referenced_registers)
{
offset_in_block[index] = volatile_offset;
volatile_offset += 16;
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
const auto &block = layout.interleaved_blocks[0];
u32 inline_data_offset = volatile_offset;
for (const auto& attrib : block->locations)
{
auto &info = rsx::method_registers.vertex_arrays_info[attrib.index];
offset_in_block[attrib.index] = inline_data_offset;
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
}
else
{
for (const auto &block : layout.interleaved_blocks)
{
for (const auto& attrib : block->locations)
{
const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
}
const auto range = block->calculate_required_range(first_vertex, vertex_count);
persistent_offset += block->attribute_stride * range.second;
}
}
// Fill the data
// Each descriptor field is 64 bits wide
// [0-8] attribute stride
// [8-24] attribute divisor
// [24-27] attribute type
// [27-30] attribute size
// [30-31] reserved
// [31-60] starting offset
// [60-21] swap bytes flag
// [61-22] volatile flag
// [62-63] modulo enable flag
const s32 default_frequency_mask = (1 << 8);
const s32 swap_storage_mask = (1 << 29);
const s32 volatile_storage_mask = (1 << 30);
const s32 modulo_op_frequency_mask = smin;
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
const auto max_index = (first_vertex + vertex_count) - 1;
for (u16 ref_mask = current_vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1)
{
if (!(ref_mask & 1u))
{
// Unused input, ignore this
continue;
}
if (layout.attribute_placement[index] == attribute_buffer_placement::none)
{
static constexpr u64 zero = 0;
std::memcpy(buffer + index * 2, &zero, sizeof(zero));
continue;
}
rsx::vertex_base_type type = {};
s32 size = 0;
s32 attrib0 = 0;
s32 attrib1 = 0;
if (layout.attribute_placement[index] == attribute_buffer_placement::transient)
{
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
const auto &info = rsx::method_registers.vertex_arrays_info[index];
if (!info.size())
{
// Register
const auto& reginfo = rsx::method_registers.register_vertex_info[index];
type = reginfo.type;
size = reginfo.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
}
else
{
// Array
type = info.type();
size = info.size();
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
}
}
else
{
// Data is either from an immediate render or register input
// Immediate data overrides register input
if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
vertex_push_buffers[index].vertex_count > 1)
{
// Push buffer
const auto &info = vertex_push_buffers[index];
type = info.type;
size = info.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask;
}
else
{
// Register
const auto& info = rsx::method_registers.register_vertex_info[index];
type = info.type;
size = info.size;
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
}
}
attrib1 |= volatile_storage_mask;
}
else
{
auto &info = rsx::method_registers.vertex_arrays_info[index];
type = info.type();
size = info.size();
auto stride = info.stride();
attrib0 = stride;
if (stride > 0) //when stride is 0, input is not an array but a single element
{
const u32 frequency = info.frequency();
switch (frequency)
{
case 0:
case 1:
{
attrib0 |= default_frequency_mask;
break;
}
default:
{
if (modulo_mask & (1 << index))
{
if (max_index >= frequency)
{
// Only set modulo mask if a modulo op is actually necessary!
// This requires that the uploaded range for this attr = [0, freq-1]
// Ignoring modulo op if the rendered range does not wrap allows for range optimization
attrib0 |= (frequency << 8);
attrib1 |= modulo_op_frequency_mask;
}
else
{
attrib0 |= default_frequency_mask;
}
}
else
{
// Division
attrib0 |= (frequency << 8);
}
break;
}
}
}
} //end attribute placement check
// Special compressed 4 components into one 4-byte value. Decoded as one value.
if (type == rsx::vertex_base_type::cmp)
{
size = 1;
}
// All data is passed in in PS3-native order (BE) so swap flag should be set
attrib1 |= swap_storage_mask;
attrib0 |= (static_cast<s32>(type) << 24);
attrib0 |= (size << 27);
attrib1 |= offset_in_block[index];
buffer[index * 2 + 0] = attrib0;
buffer[index * 2 + 1] = attrib1;
}
}
void thread::write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data)
{
auto transient = static_cast<char*>(volatile_data);
auto persistent = static_cast<char*>(persistent_data);
auto &draw_call = rsx::method_registers.current_draw_clause;
if (transient != nullptr)
{
if (draw_call.command == rsx::draw_command::inlined_array)
{
for (const u8 index : layout.referenced_registers)
{
memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16);
transient += 16;
}
memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32));
//Is it possible to reference data outside of the inlined array?
return;
}
//NOTE: Order is important! Transient layout is always push_buffers followed by register data
if (draw_call.is_immediate_draw)
{
//NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory
for (const auto &info : layout.volatile_blocks)
{
memcpy(transient, vertex_push_buffers[info.first].data.data(), info.second);
transient += info.second;
}
}
for (const u8 index : layout.referenced_registers)
{
memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16);
transient += 16;
}
}
if (persistent != nullptr)
{
for (interleaved_range_info* block : layout.interleaved_blocks)
{
auto range = block->calculate_required_range(first_vertex, vertex_count);
const u32 data_size = range.second * block->attribute_stride;
const u32 vertex_base = range.first * block->attribute_stride;
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
persistent += data_size;
}
}
}
void thread::flip(const display_flip_info_t& info)
{
m_eng_interrupt_mask.clear(rsx::display_interrupt);

View File

@ -28,6 +28,8 @@
#include "Emu/IdManager.h"
#include "Core/RSXDisplay.h"
#include "Core/RSXDrawCommands.h"
#include "Core/RSXDriverState.h"
#include "Core/RSXFrameBuffer.h"
#include "Core/RSXContext.h"
#include "Core/RSXIOMap.hpp"
@ -59,52 +61,6 @@ namespace rsx
context_clear_all = context_clear_color | context_clear_depth
};
enum pipeline_state : u32
{
fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed
vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed
fragment_program_state_dirty = (1 << 2), // Fragment program state changed
vertex_program_state_dirty = (1 << 3), // Vertex program state changed
fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc)
vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc)
transform_constants_dirty = (1 << 6), // Transform constants changed
fragment_constants_dirty = (1 << 7), // Fragment constants changed
framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed
fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed
vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed
scissor_config_state_dirty = (1 << 11), // Scissor region changed
zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed
scissor_setup_invalid = (1 << 13), // Scissor configuration is broken
scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint
polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed
line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed
push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers)
polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed
depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed
pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint.
rtt_config_dirty = (1 << 21), // Render target configuration changed
rtt_config_contested = (1 << 22), // Render target configuration is indeterminate
rtt_config_valid = (1 << 23), // Render target configuration is valid
rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty,
memory_barrier_bits = framebuffer_reads_dirty,
// Vulkan-specific signals
invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty,
all_dirty = ~0u
};
enum eng_interrupt_reason : u32
{
backend_interrupt = 0x0001, // Backend-related interrupt
@ -161,8 +117,6 @@ namespace rsx
void cpu_task() override;
protected:
std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
s32 m_skip_frame_ctr = 0;
bool skip_current_frame = false;
@ -217,6 +171,9 @@ namespace rsx
// Host DMA
std::unique_ptr<RSXDMAWriter> m_host_dma_ctrl;
// Draw call management
draw_command_processor m_draw_processor;
public:
atomic_t<u64> new_get_put = u64{umax};
u32 restore_point = 0;
@ -225,7 +182,7 @@ namespace rsx
atomic_t<u32> external_interrupt_lock{ 0 };
atomic_t<bool> external_interrupt_ack{ false };
atomic_t<u32> is_initialized{0};
rsx::simple_array<u32> element_push_buffer;
bool is_fifo_idle() const;
void flush_fifo();
@ -268,6 +225,8 @@ namespace rsx
void capture_frame(const std::string& name);
const backend_configuration& get_backend_config() const { return backend_config; }
const draw_command_processor* draw_processor() const { return &m_draw_processor; }
public:
shared_ptr<named_thread<ppu_thread>> intr_thread;
@ -301,11 +260,6 @@ namespace rsx
void get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout);
bool get_scissor(areau& region, bool clip_viewport);
/**
* Analyze vertex inputs and group all interleaved blocks
*/
void analyse_inputs_interleaved(vertex_input_layout&);
RSXVertexProgram current_vertex_program = {};
RSXFragmentProgram current_fragment_program = {};
@ -424,21 +378,6 @@ namespace rsx
virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload);
virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; }
std::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
get_draw_command(const rsx::rsx_state& state) const;
/**
* Immediate mode rendering requires a temp push buffer to hold attrib values
* Appends a value to the push buffer (currently only supports 32-wide types)
*/
void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value);
u32 get_push_buffer_vertex_count() const;
void append_array_element(u32 index);
u32 get_push_buffer_index_count() const;
protected:
/**
@ -448,17 +387,6 @@ namespace rsx
*/
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count);
/**
* Generates vertex input descriptors as an array of 16x4 s32s
*/
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
/**
* Uploads vertex data described in the layout descriptor
* Copies from local memory to the write-only output buffers provided in a sequential manner
*/
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
void evaluate_cpu_usage_reduction_limits();
private:
@ -468,6 +396,9 @@ namespace rsx
void handle_invalidated_memory_range();
public:
draw_command_processor& GRAPH_frontend() { return m_draw_processor; }
/**
* Fill buffer with 4x4 scale offset matrix.
* Vertex shader's position is to be multiplied by this matrix.

View File

@ -730,7 +730,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
if (state_flags & rsx::vertex_arrays_changed)
{
analyse_inputs_interleaved(m_vertex_layout);
m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata);
}
else if (state_flags & rsx::vertex_base_changed)
{

View File

@ -2360,8 +2360,14 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
fill_vertex_layout_state(m_vertex_layout, vertex_info.first_vertex, vertex_info.allocated_vertex_count, static_cast<s32*>(dst),
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
m_draw_processor.fill_vertex_layout_state(
m_vertex_layout,
current_vp_metadata,
vertex_info.first_vertex,
vertex_info.allocated_vertex_count,
static_cast<s32*>(dst),
vertex_info.persistent_window_offset,
vertex_info.volatile_window_offset);
m_vertex_layout_ring_info.unmap();
}

View File

@ -217,7 +217,7 @@ namespace
vk::vertex_upload_info VKGSRender::upload_vertex_data()
{
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
auto result = std::visit(visitor, get_draw_command(rsx::method_registers));
auto result = std::visit(visitor, m_draw_processor.get_draw_command(rsx::method_registers));
const u32 vertex_count = (result.max_index - result.min_index) + 1;
u32 vertex_base = result.min_index;
@ -294,7 +294,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
const usz volatile_offset_in_block = volatile_offset - persistent_offset;
void *block_mapping = m_attrib_ring_info.map(persistent_offset, block_size);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast<char*>(block_mapping) + volatile_offset_in_block);
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast<char*>(block_mapping) + volatile_offset_in_block);
m_attrib_ring_info.unmap();
}
else
@ -302,14 +302,14 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
if (required.first > 0 && persistent_offset != umax)
{
void *persistent_mapping = m_attrib_ring_info.map(persistent_offset, required.first);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr);
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr);
m_attrib_ring_info.unmap();
}
if (required.second > 0)
{
void *volatile_mapping = m_attrib_ring_info.map(volatile_offset, required.second);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping);
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping);
m_attrib_ring_info.unmap();
}
}

View File

@ -104,6 +104,7 @@
<ClCompile Include="Emu\perf_monitor.cpp" />
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXDrawCommands.cpp" />
<ClCompile Include="Emu\RSX\Host\MM.cpp" />
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
@ -619,6 +620,8 @@
<ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXContext.h" />
<ClInclude Include="Emu\RSX\Core\RSXDrawCommands.h" />
<ClInclude Include="Emu\RSX\Core\RSXDriverState.h" />
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" />
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" />

View File

@ -1315,6 +1315,9 @@
<ClCompile Include="Emu\RSX\Host\MM.cpp">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Core\RSXDrawCommands.cpp">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -2659,6 +2662,12 @@
<ClInclude Include="Emu\NP\np_gui_cache.h">
<Filter>Emu\NP</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXDrawCommands.h">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXDriverState.h">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">