From 05bab8ec4c4c2031f04f8a9411932139ef0a9b13 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 26 Dec 2024 21:08:54 +0300 Subject: [PATCH] rsx: Move draw call related functions to their own class --- rpcs3/Emu/CMakeLists.txt | 1 + rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp | 558 +++++++++++++++++++++++++ rpcs3/Emu/RSX/Core/RSXDrawCommands.h | 81 ++++ rpcs3/Emu/RSX/Core/RSXDriverState.h | 52 +++ rpcs3/Emu/RSX/GL/GLDraw.cpp | 2 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 9 +- rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp | 4 +- rpcs3/Emu/RSX/NV47/HW/common.cpp | 2 +- rpcs3/Emu/RSX/NV47/HW/nv4097.cpp | 10 +- rpcs3/Emu/RSX/RSXThread.cpp | 545 +----------------------- rpcs3/Emu/RSX/RSXThread.h | 91 +--- rpcs3/Emu/RSX/VK/VKDraw.cpp | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 10 +- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 8 +- rpcs3/emucore.vcxproj | 3 + rpcs3/emucore.vcxproj.filters | 9 + 16 files changed, 754 insertions(+), 633 deletions(-) create mode 100644 rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp create mode 100644 rpcs3/Emu/RSX/Core/RSXDrawCommands.h create mode 100644 rpcs3/Emu/RSX/Core/RSXDriverState.h diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 2d63e67ab8..7846a5d5d3 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -476,6 +476,7 @@ target_sources(rpcs3_emu PRIVATE RSX/Common/TextureUtils.cpp RSX/Common/texture_cache.cpp RSX/Core/RSXContext.cpp + RSX/Core/RSXDrawCommands.cpp RSX/gcm_enums.cpp RSX/gcm_printing.cpp RSX/GL/GLCommonDecompiler.cpp diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp new file mode 100644 index 0000000000..7beab46474 --- /dev/null +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -0,0 +1,558 @@ +#include "stdafx.h" +#include "RSXDrawCommands.h" + +#include "Emu/RSX/Common/BufferUtils.h" +#include "Emu/RSX/rsx_methods.h" +#include "Emu/RSX/RSXThread.h" + +#include "Emu/Memory/vm.h" + +namespace rsx +{ + void draw_command_processor::analyse_inputs_interleaved(vertex_input_layout& result, const vertex_program_metadata_t& vp_metadata) + { + const rsx_state& state = rsx::method_registers; + const u32 input_mask = state.vertex_attrib_input_mask() & vp_metadata.referenced_inputs_mask; + + result.clear(); + result.attribute_mask = static_cast(input_mask); + + if (state.current_draw_clause.command == rsx::draw_command::inlined_array) + { + interleaved_range_info& info = *result.alloc_interleaved_block(); + info.interleaved = true; + + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + { + auto& vinfo = state.vertex_arrays_info[index]; + result.attribute_placement[index] = attribute_buffer_placement::none; + + if (vinfo.size() > 0) + { + // Stride must be updated even if the stream is disabled + info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size()); + info.locations.push_back({ index, false, 1 }); + + if (input_mask & (1u << index)) + { + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + } + else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index)) + { + // Reads from register + result.referenced_registers.push_back(index); + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + } + + if (info.attribute_stride) + { + // At least one array feed must be enabled for vertex input + result.interleaved_blocks.push_back(&info); + } + + return; + } + + const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask(); + result.interleaved_blocks.reserve(16); + result.referenced_registers.reserve(16); + + for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1) + { + ensure(index < rsx::limits::vertex_count); + + if (!(ref_mask & 1u)) + { + // Nothing to do, uninitialized + continue; + } + + // Always reset attribute placement by default + result.attribute_placement[index] = attribute_buffer_placement::none; + + // Check for interleaving + if (rsx::method_registers.current_draw_clause.is_immediate_draw && + rsx::method_registers.current_draw_clause.command != rsx::draw_command::indexed) + { + // NOTE: In immediate rendering mode, all vertex setup is ignored + // Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults + if (m_vertex_push_buffers[index].vertex_count > 1) + { + // Ensure consistent number of vertices per attribute. + m_vertex_push_buffers[index].pad_to(m_vertex_push_buffers[0].vertex_count, false); + + // Read temp buffer (register array) + std::pair volatile_range_info = std::make_pair(index, static_cast(m_vertex_push_buffers[index].data.size() * sizeof(u32))); + result.volatile_blocks.push_back(volatile_range_info); + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + else if (state.register_vertex_info[index].size > 0) + { + // Reads from register + result.referenced_registers.push_back(index); + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + + // Fall back to the default register value if no source is specified via register + continue; + } + + const auto& info = state.vertex_arrays_info[index]; + if (!info.size()) + { + if (state.register_vertex_info[index].size > 0) + { + //Reads from register + result.referenced_registers.push_back(index); + result.attribute_placement[index] = attribute_buffer_placement::transient; + continue; + } + } + else + { + result.attribute_placement[index] = attribute_buffer_placement::persistent; + const u32 base_address = info.offset() & 0x7fffffff; + bool alloc_new_block = true; + bool modulo = !!(frequency_divider_mask & (1 << index)); + + for (auto& block : result.interleaved_blocks) + { + if (block->single_vertex) + { + //Single vertex definition, continue + continue; + } + + if (block->attribute_stride != info.stride()) + { + //Stride does not match, continue + continue; + } + + if (base_address > block->base_offset) + { + const u32 diff = base_address - block->base_offset; + if (diff > info.stride()) + { + //Not interleaved, continue + continue; + } + } + else + { + const u32 diff = block->base_offset - base_address; + if (diff > info.stride()) + { + //Not interleaved, continue + continue; + } + + //Matches, and this address is lower than existing + block->base_offset = base_address; + } + + alloc_new_block = false; + block->locations.push_back({ index, modulo, info.frequency() }); + block->interleaved = true; + break; + } + + if (alloc_new_block) + { + interleaved_range_info& block = *result.alloc_interleaved_block(); + block.base_offset = base_address; + block.attribute_stride = info.stride(); + block.memory_location = info.offset() >> 31; + block.locations.reserve(16); + block.locations.push_back({ index, modulo, info.frequency() }); + + if (block.attribute_stride == 0) + { + block.single_vertex = true; + block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size()); + } + + result.interleaved_blocks.push_back(&block); + } + } + } + + for (auto& info : result.interleaved_blocks) + { + //Calculate real data address to be used during upload + info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location); + } + } + + std::span draw_command_processor::get_raw_index_array(const draw_clause& draw_indexed_clause) const + { + if (!m_element_push_buffer.empty()) [[ unlikely ]] + { + // Indices provided via immediate mode + return { reinterpret_cast(m_element_push_buffer.data()), ::narrow(m_element_push_buffer.size() * sizeof(u32)) }; + } + + const rsx::index_array_type type = rsx::method_registers.index_type(); + const u32 type_size = get_index_type_size(type); + + // Force aligned indices as realhw + const u32 address = (0 - type_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location()); + + const u32 first = draw_indexed_clause.min_index(); + const u32 count = draw_indexed_clause.get_elements_count(); + + const auto ptr = vm::_ptr(address); + return { ptr + first * type_size, count * type_size }; + } + + std::variant + draw_command_processor::get_draw_command(const rsx::rsx_state& state) const + { + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]] + { + return draw_indexed_array_command + { + get_raw_index_array(state.current_draw_clause) + }; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) + { + return draw_array_command{}; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + return draw_inlined_array{}; + } + + fmt::throw_exception("ill-formed draw command"); + } + + void draw_command_processor::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) + { + if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute))) + { + return; + } + + // Enforce ATTR0 as vertex attribute for push buffers. + // This whole thing becomes a mess if we don't have a provoking attribute. + const auto vertex_id = m_vertex_push_buffers[0].get_vertex_id(); + m_vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value); + m_thread->m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty; + } + + u32 draw_command_processor::get_push_buffer_vertex_count() const + { + // Enforce ATTR0 as vertex attribute for push buffers. + // This whole thing becomes a mess if we don't have a provoking attribute. + return m_vertex_push_buffers[0].vertex_count; + } + + void draw_command_processor::append_array_element(u32 index) + { + // Endianness is swapped because common upload code expects input in BE + // TODO: Implement fast upload path for LE inputs and do away with this + m_element_push_buffer.push_back(std::bit_cast>(index)); + } + + u32 draw_command_processor::get_push_buffer_index_count() const + { + return ::size32(m_element_push_buffer); + } + + void draw_command_processor::clear_push_buffers() + { + auto& graphics_state = m_thread->m_graphics_state; + if (graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty) + { + for (auto& push_buf : m_vertex_push_buffers) + { + //Disabled, see https://github.com/RPCS3/rpcs3/issues/1932 + //rsx::method_registers.register_vertex_info[index].size = 0; + + push_buf.clear(); + } + + graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty); + } + + m_element_push_buffer.clear(); + } + + void draw_command_processor::fill_vertex_layout_state( + const vertex_input_layout& layout, + const vertex_program_metadata_t& vp_metadata, + u32 first_vertex, + u32 vertex_count, + s32* buffer, + u32 persistent_offset_base, + u32 volatile_offset_base) const + { + std::array offset_in_block = {}; + u32 volatile_offset = volatile_offset_base; + u32 persistent_offset = persistent_offset_base; + + //NOTE: Order is important! Transient ayout is always push_buffers followed by register data + if (rsx::method_registers.current_draw_clause.is_immediate_draw) + { + for (const auto& info : layout.volatile_blocks) + { + offset_in_block[info.first] = volatile_offset; + volatile_offset += info.second; + } + } + + for (u8 index : layout.referenced_registers) + { + offset_in_block[index] = volatile_offset; + volatile_offset += 16; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + const auto& block = layout.interleaved_blocks[0]; + u32 inline_data_offset = volatile_offset; + for (const auto& attrib : block->locations) + { + auto& info = rsx::method_registers.vertex_arrays_info[attrib.index]; + + offset_in_block[attrib.index] = inline_data_offset; + inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size()); + } + } + else + { + for (const auto& block : layout.interleaved_blocks) + { + for (const auto& attrib : block->locations) + { + const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff); + offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset); + } + + const auto range = block->calculate_required_range(first_vertex, vertex_count); + persistent_offset += block->attribute_stride * range.second; + } + } + + // Fill the data + // Each descriptor field is 64 bits wide + // [0-8] attribute stride + // [8-24] attribute divisor + // [24-27] attribute type + // [27-30] attribute size + // [30-31] reserved + // [31-60] starting offset + // [60-21] swap bytes flag + // [61-22] volatile flag + // [62-63] modulo enable flag + + const s32 default_frequency_mask = (1 << 8); + const s32 swap_storage_mask = (1 << 29); + const s32 volatile_storage_mask = (1 << 30); + const s32 modulo_op_frequency_mask = smin; + + const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); + const auto max_index = (first_vertex + vertex_count) - 1; + + for (u16 ref_mask = vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1) + { + if (!(ref_mask & 1u)) + { + // Unused input, ignore this + continue; + } + + if (layout.attribute_placement[index] == attribute_buffer_placement::none) + { + static constexpr u64 zero = 0; + std::memcpy(buffer + index * 2, &zero, sizeof(zero)); + continue; + } + + rsx::vertex_base_type type = {}; + s32 size = 0; + s32 attrib0 = 0; + s32 attrib1 = 0; + + if (layout.attribute_placement[index] == attribute_buffer_placement::transient) + { + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + const auto& info = rsx::method_registers.vertex_arrays_info[index]; + + if (!info.size()) + { + // Register + const auto& reginfo = rsx::method_registers.register_vertex_info[index]; + type = reginfo.type; + size = reginfo.size; + + attrib0 = rsx::get_vertex_type_size_on_host(type, size); + } + else + { + // Array + type = info.type(); + size = info.size(); + + attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask; + } + } + else + { + // Data is either from an immediate render or register input + // Immediate data overrides register input + + if (rsx::method_registers.current_draw_clause.is_immediate_draw && + m_vertex_push_buffers[index].vertex_count > 1) + { + // Push buffer + const auto& info = m_vertex_push_buffers[index]; + type = info.type; + size = info.size; + + attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask; + } + else + { + // Register + const auto& info = rsx::method_registers.register_vertex_info[index]; + type = info.type; + size = info.size; + + attrib0 = rsx::get_vertex_type_size_on_host(type, size); + } + } + + attrib1 |= volatile_storage_mask; + } + else + { + auto& info = rsx::method_registers.vertex_arrays_info[index]; + type = info.type(); + size = info.size(); + + auto stride = info.stride(); + attrib0 = stride; + + if (stride > 0) //when stride is 0, input is not an array but a single element + { + const u32 frequency = info.frequency(); + switch (frequency) + { + case 0: + case 1: + { + attrib0 |= default_frequency_mask; + break; + } + default: + { + if (modulo_mask & (1 << index)) + { + if (max_index >= frequency) + { + // Only set modulo mask if a modulo op is actually necessary! + // This requires that the uploaded range for this attr = [0, freq-1] + // Ignoring modulo op if the rendered range does not wrap allows for range optimization + attrib0 |= (frequency << 8); + attrib1 |= modulo_op_frequency_mask; + } + else + { + attrib0 |= default_frequency_mask; + } + } + else + { + // Division + attrib0 |= (frequency << 8); + } + break; + } + } + } + } //end attribute placement check + + // Special compressed 4 components into one 4-byte value. Decoded as one value. + if (type == rsx::vertex_base_type::cmp) + { + size = 1; + } + + // All data is passed in in PS3-native order (BE) so swap flag should be set + attrib1 |= swap_storage_mask; + attrib0 |= (static_cast(type) << 24); + attrib0 |= (size << 27); + attrib1 |= offset_in_block[index]; + + buffer[index * 2 + 0] = attrib0; + buffer[index * 2 + 1] = attrib1; + } + } + + void draw_command_processor::write_vertex_data_to_memory( + const vertex_input_layout& layout, + u32 first_vertex, + u32 vertex_count, + void* persistent_data, + void* volatile_data) const + { + auto transient = static_cast(volatile_data); + auto persistent = static_cast(persistent_data); + + auto& draw_call = rsx::method_registers.current_draw_clause; + + if (transient != nullptr) + { + if (draw_call.command == rsx::draw_command::inlined_array) + { + for (const u8 index : layout.referenced_registers) + { + memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16); + transient += 16; + } + + memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32)); + //Is it possible to reference data outside of the inlined array? + return; + } + + //NOTE: Order is important! Transient layout is always push_buffers followed by register data + if (draw_call.is_immediate_draw) + { + //NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory + for (const auto& info : layout.volatile_blocks) + { + memcpy(transient, m_vertex_push_buffers[info.first].data.data(), info.second); + transient += info.second; + } + } + + for (const u8 index : layout.referenced_registers) + { + memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16); + transient += 16; + } + } + + if (persistent != nullptr) + { + for (interleaved_range_info* block : layout.interleaved_blocks) + { + auto range = block->calculate_required_range(first_vertex, vertex_count); + + const u32 data_size = range.second * block->attribute_stride; + const u32 vertex_base = range.first * block->attribute_stride; + + g_fxo->get().copy(persistent, vm::_ptr(block->real_offset_address) + vertex_base, data_size); + persistent += data_size; + } + } + } +} diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.h b/rpcs3/Emu/RSX/Core/RSXDrawCommands.h new file mode 100644 index 0000000000..2bdf8b05b8 --- /dev/null +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.h @@ -0,0 +1,81 @@ +#pragma once + +#include + +#include "Emu/RSX/Core/RSXVertexTypes.h" +#include "Emu/RSX/NV47/FW/draw_call.hpp" +#include "Emu/RSX/Program/ProgramStateCache.h" +#include "Emu/RSX/rsx_vertex_data.h" + +#include +#include + +namespace rsx +{ + struct rsx_state; + class thread; + + class draw_command_processor + { + using vertex_program_metadata_t = program_hash_util::vertex_program_utils::vertex_program_metadata; + + thread* m_thread = nullptr; + + protected: + friend class thread; + + std::array m_vertex_push_buffers; + rsx::simple_array m_element_push_buffer; + + public: + draw_command_processor() = default; + + void init(thread* rsxthr) + { + m_thread = rsxthr; + } + + // Analyze vertex inputs and group all interleaved blocks + void analyse_inputs_interleaved(vertex_input_layout& layout, const vertex_program_metadata_t& vp_metadata); + + // Retrieve raw bytes for the index array (untyped) + std::span get_raw_index_array(const draw_clause& draw_indexed_clause) const; + + // Get compiled draw command for backend rendering + std::variant + get_draw_command(const rsx::rsx_state& state) const; + + // Push-buffers for immediate rendering (begin-end scopes) + void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value); + + u32 get_push_buffer_vertex_count() const; + + void append_array_element(u32 index); + + u32 get_push_buffer_index_count() const; + + void clear_push_buffers(); + + const std::span element_push_buffer() const + { + return m_element_push_buffer; + } + + // Host driver helpers + void fill_vertex_layout_state( + const vertex_input_layout& layout, + const vertex_program_metadata_t& vp_metadata, + u32 first_vertex, + u32 vertex_count, + s32* buffer, + u32 persistent_offset_base, + u32 volatile_offset_base) const; + + void write_vertex_data_to_memory( + const vertex_input_layout& layout, + u32 first_vertex, + u32 vertex_count, + void* persistent_data, + void* volatile_data) const; + }; +} diff --git a/rpcs3/Emu/RSX/Core/RSXDriverState.h b/rpcs3/Emu/RSX/Core/RSXDriverState.h new file mode 100644 index 0000000000..f49ce437f0 --- /dev/null +++ b/rpcs3/Emu/RSX/Core/RSXDriverState.h @@ -0,0 +1,52 @@ +#pragma once + +#include + +namespace rsx +{ + enum pipeline_state : u32 + { + fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed + vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed + fragment_program_state_dirty = (1 << 2), // Fragment program state changed + vertex_program_state_dirty = (1 << 3), // Vertex program state changed + fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc) + vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc) + transform_constants_dirty = (1 << 6), // Transform constants changed + fragment_constants_dirty = (1 << 7), // Fragment constants changed + framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed + fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed + vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed + scissor_config_state_dirty = (1 << 11), // Scissor region changed + zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed + + scissor_setup_invalid = (1 << 13), // Scissor configuration is broken + scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint + + polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed + line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed + + push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers) + + polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed + depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed + + pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint. + + rtt_config_dirty = (1 << 21), // Render target configuration changed + rtt_config_contested = (1 << 22), // Render target configuration is indeterminate + rtt_config_valid = (1 << 23), // Render target configuration is valid + rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate + + fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty, + vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty, + invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty, + invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty, + memory_barrier_bits = framebuffer_reads_dirty, + + // Vulkan-specific signals + invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty, + + all_dirty = ~0u + }; +} diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index b25b0afde5..a3cdd06986 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -513,7 +513,7 @@ void GLGSRender::emit_geometry(u32 sub_index) if (vertex_state & rsx::vertex_arrays_changed) { - analyse_inputs_interleaved(m_vertex_layout); + m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata); } else if (vertex_state & rsx::vertex_base_changed) { diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index c785ddc879..846567e595 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1007,7 +1007,14 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info) buf[1] = upload_info.vertex_index_offset; buf += 4; - fill_vertex_layout_state(m_vertex_layout, upload_info.first_vertex, upload_info.allocated_vertex_count, reinterpret_cast(buf), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); + m_draw_processor.fill_vertex_layout_state( + m_vertex_layout, + current_vp_metadata, + upload_info.first_vertex, + upload_info.allocated_vertex_count, + reinterpret_cast(buf), + upload_info.persistent_mapping_offset, + upload_info.volatile_mapping_offset); m_vertex_layout_buffer->bind_range(GL_VERTEX_LAYOUT_BIND_SLOT, mapping.second, 128 + 16); diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index f4e4405407..6d39b7c6b8 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -153,7 +153,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() m_profiler.start(); //Write index buffers and count verts - auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers)); + auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), m_draw_processor.get_draw_command(rsx::method_registers)); const u32 vertex_count = (result.max_index - result.min_index) + 1; u32 vertex_base = result.min_index; @@ -250,7 +250,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() } //Write all the data - write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first); + m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first); m_frame_stats.vertex_upload_time += m_profiler.duration(); return upload_info; diff --git a/rpcs3/Emu/RSX/NV47/HW/common.cpp b/rpcs3/Emu/RSX/NV47/HW/common.cpp index ee901bcd72..f60ac94f5f 100644 --- a/rpcs3/Emu/RSX/NV47/HW/common.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/common.cpp @@ -18,7 +18,7 @@ namespace rsx // NOTE: Push buffers still behave like register writes. // You do not need to specify each attribute for each vertex, the register is referenced instead. // This is classic OpenGL 1.x behavior as I remember. - RSX(ctx)->append_to_push_buffer(attrib_index, count, channel_select, vtype, value); + RSX(ctx)->GRAPH_frontend().append_to_push_buffer(attrib_index, count, channel_select, vtype, value); } auto& info = REGS(ctx)->register_vertex_info[attrib_index]; diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 448ab31afe..7efb92ba2b 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -256,15 +256,15 @@ namespace rsx { if (RSX(ctx)->in_begin_end) { - RSX(ctx)->append_array_element(arg & 0xFFFF); - RSX(ctx)->append_array_element(arg >> 16); + RSX(ctx)->GRAPH_frontend().append_array_element(arg & 0xFFFF); + RSX(ctx)->GRAPH_frontend().append_array_element(arg >> 16); } } void set_array_element32(context* ctx, u32, u32 arg) { if (RSX(ctx)->in_begin_end) - RSX(ctx)->append_array_element(arg); + RSX(ctx)->GRAPH_frontend().append_array_element(arg); } void draw_arrays(context* /*rsx*/, u32 /*reg*/, u32 arg) @@ -353,8 +353,8 @@ namespace rsx // Check if we have immediate mode vertex data in a driver-local buffer if (REGS(ctx)->current_draw_clause.command == rsx::draw_command::none) { - const u32 push_buffer_vertices_count = RSX(ctx)->get_push_buffer_vertex_count(); - const u32 push_buffer_index_count = RSX(ctx)->get_push_buffer_index_count(); + const u32 push_buffer_vertices_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_vertex_count(); + const u32 push_buffer_index_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_index_count(); // Need to set this flag since it overrides some register contents REGS(ctx)->current_draw_clause.is_immediate_draw = true; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 5d44a95a87..ea67bd6933 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -409,12 +409,13 @@ namespace rsx } }; + const auto element_push_buffer = render->draw_processor()->element_push_buffer(); if (index_size == 4) { - if (!render->element_push_buffer.empty()) [[unlikely]] + if (!element_push_buffer.empty()) [[unlikely]] { // Indices provided via immediate mode - re_evaluate(reinterpret_cast(render->element_push_buffer.data()), u32{}); + re_evaluate(reinterpret_cast(element_push_buffer.data()), u32{}); } else { @@ -424,10 +425,10 @@ namespace rsx } else { - if (!render->element_push_buffer.empty()) [[unlikely]] + if (!element_push_buffer.empty()) [[unlikely]] { // Indices provided via immediate mode - re_evaluate(reinterpret_cast(render->element_push_buffer.data()), u16{}); + re_evaluate(reinterpret_cast(element_push_buffer.data()), u16{}); } else { @@ -619,12 +620,12 @@ namespace rsx ar(rsx::method_registers); - for (auto& v : vertex_push_buffers) + for (auto& v : m_draw_processor.m_vertex_push_buffers) { ar(v.attr, v.size, v.type, v.vertex_count, v.dword_count, v.data); } - ar(element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout); + ar(m_draw_processor.m_element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout); ar(dma_address, iomap_table, restore_point, tiles, zculls, display_buffers, display_buffers_count, current_display_buffer); ar(enable_second_vhandler, requested_vsync); ar(device_addr, label_addr, main_mem_size, local_mem_size, rsx_event_port, driver_info); @@ -689,6 +690,7 @@ namespace rsx m_vertex_textures_dirty.fill(true); m_graphics_state |= pipeline_state::all_dirty; + m_draw_processor.init(this); g_user_asked_for_frame_capture = false; @@ -801,39 +803,6 @@ namespace rsx in_begin_end = true; } - void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) - { - if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute))) - { - return; - } - - // Enforce ATTR0 as vertex attribute for push buffers. - // This whole thing becomes a mess if we don't have a provoking attribute. - const auto vertex_id = vertex_push_buffers[0].get_vertex_id(); - vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value); - m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty; - } - - u32 thread::get_push_buffer_vertex_count() const - { - // Enforce ATTR0 as vertex attribute for push buffers. - // This whole thing becomes a mess if we don't have a provoking attribute. - return vertex_push_buffers[0].vertex_count; - } - - void thread::append_array_element(u32 index) - { - // Endianness is swapped because common upload code expects input in BE - // TODO: Implement fast upload path for LE inputs and do away with this - element_push_buffer.push_back(std::bit_cast>(index)); - } - - u32 thread::get_push_buffer_index_count() const - { - return ::size32(element_push_buffer); - } - void thread::end() { if (capture_current_frame) @@ -850,20 +819,7 @@ namespace rsx m_eng_interrupt_mask |= rsx::backend_interrupt; ROP_sync_timestamp = rsx::get_shared_tag(); - if (m_graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty) - { - for (auto& push_buf : vertex_push_buffers) - { - //Disabled, see https://github.com/RPCS3/rpcs3/issues/1932 - //rsx::method_registers.register_vertex_info[index].size = 0; - - push_buf.clear(); - } - - m_graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty); - } - - element_push_buffer.clear(); + m_draw_processor.clear_push_buffers(); zcull_ctrl->on_draw(); @@ -1409,51 +1365,6 @@ namespace rsx return t + timestamp_subvalue; } - std::span thread::get_raw_index_array(const draw_clause& draw_indexed_clause) const - { - if (!element_push_buffer.empty()) [[ unlikely ]] - { - // Indices provided via immediate mode - return {reinterpret_cast(element_push_buffer.data()), ::narrow(element_push_buffer.size() * sizeof(u32))}; - } - - const rsx::index_array_type type = rsx::method_registers.index_type(); - const u32 type_size = get_index_type_size(type); - - // Force aligned indices as realhw - const u32 address = (0 - type_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location()); - - const u32 first = draw_indexed_clause.min_index(); - const u32 count = draw_indexed_clause.get_elements_count(); - - const auto ptr = vm::_ptr(address); - return { ptr + first * type_size, count * type_size }; - } - - std::variant - thread::get_draw_command(const rsx::rsx_state& state) const - { - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]] - { - return draw_indexed_array_command - { - get_raw_index_array(state.current_draw_clause) - }; - } - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) - { - return draw_array_command{}; - } - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) - { - return draw_inlined_array{}; - } - - fmt::throw_exception("ill-formed draw command"); - } - void thread::do_local_task(FIFO::state state) { m_eng_interrupt_mask.clear(rsx::backend_interrupt); @@ -2285,183 +2196,6 @@ namespace rsx current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask); } - void thread::analyse_inputs_interleaved(vertex_input_layout& result) - { - const rsx_state& state = rsx::method_registers; - const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask; - - result.clear(); - result.attribute_mask = static_cast(input_mask); - - if (state.current_draw_clause.command == rsx::draw_command::inlined_array) - { - interleaved_range_info& info = *result.alloc_interleaved_block(); - info.interleaved = true; - - for (u8 index = 0; index < rsx::limits::vertex_count; ++index) - { - auto &vinfo = state.vertex_arrays_info[index]; - result.attribute_placement[index] = attribute_buffer_placement::none; - - if (vinfo.size() > 0) - { - // Stride must be updated even if the stream is disabled - info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size()); - info.locations.push_back({ index, false, 1 }); - - if (input_mask & (1u << index)) - { - result.attribute_placement[index] = attribute_buffer_placement::transient; - } - } - else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index)) - { - // Reads from register - result.referenced_registers.push_back(index); - result.attribute_placement[index] = attribute_buffer_placement::transient; - } - } - - if (info.attribute_stride) - { - // At least one array feed must be enabled for vertex input - result.interleaved_blocks.push_back(&info); - } - - return; - } - - const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask(); - result.interleaved_blocks.reserve(16); - result.referenced_registers.reserve(16); - - for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1) - { - ensure(index < rsx::limits::vertex_count); - - if (!(ref_mask & 1u)) - { - // Nothing to do, uninitialized - continue; - } - - // Always reset attribute placement by default - result.attribute_placement[index] = attribute_buffer_placement::none; - - // Check for interleaving - if (rsx::method_registers.current_draw_clause.is_immediate_draw && - rsx::method_registers.current_draw_clause.command != rsx::draw_command::indexed) - { - // NOTE: In immediate rendering mode, all vertex setup is ignored - // Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults - if (vertex_push_buffers[index].vertex_count > 1) - { - // Ensure consistent number of vertices per attribute. - vertex_push_buffers[index].pad_to(vertex_push_buffers[0].vertex_count, false); - - // Read temp buffer (register array) - std::pair volatile_range_info = std::make_pair(index, static_cast(vertex_push_buffers[index].data.size() * sizeof(u32))); - result.volatile_blocks.push_back(volatile_range_info); - result.attribute_placement[index] = attribute_buffer_placement::transient; - } - else if (state.register_vertex_info[index].size > 0) - { - // Reads from register - result.referenced_registers.push_back(index); - result.attribute_placement[index] = attribute_buffer_placement::transient; - } - - // Fall back to the default register value if no source is specified via register - continue; - } - - const auto& info = state.vertex_arrays_info[index]; - if (!info.size()) - { - if (state.register_vertex_info[index].size > 0) - { - //Reads from register - result.referenced_registers.push_back(index); - result.attribute_placement[index] = attribute_buffer_placement::transient; - continue; - } - } - else - { - result.attribute_placement[index] = attribute_buffer_placement::persistent; - const u32 base_address = info.offset() & 0x7fffffff; - bool alloc_new_block = true; - bool modulo = !!(frequency_divider_mask & (1 << index)); - - for (auto &block : result.interleaved_blocks) - { - if (block->single_vertex) - { - //Single vertex definition, continue - continue; - } - - if (block->attribute_stride != info.stride()) - { - //Stride does not match, continue - continue; - } - - if (base_address > block->base_offset) - { - const u32 diff = base_address - block->base_offset; - if (diff > info.stride()) - { - //Not interleaved, continue - continue; - } - } - else - { - const u32 diff = block->base_offset - base_address; - if (diff > info.stride()) - { - //Not interleaved, continue - continue; - } - - //Matches, and this address is lower than existing - block->base_offset = base_address; - } - - alloc_new_block = false; - block->locations.push_back({ index, modulo, info.frequency() }); - block->interleaved = true; - break; - } - - if (alloc_new_block) - { - interleaved_range_info& block = *result.alloc_interleaved_block(); - block.base_offset = base_address; - block.attribute_stride = info.stride(); - block.memory_location = info.offset() >> 31; - block.locations.reserve(16); - block.locations.push_back({ index, modulo, info.frequency() }); - - if (block.attribute_stride == 0) - { - block.single_vertex = true; - block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - - result.interleaved_blocks.push_back(&block); - } - } - } - - for (auto &info : result.interleaved_blocks) - { - //Calculate real data address to be used during upload - info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location); - } - } - void thread::get_current_fragment_program(const std::array, rsx::limits::fragment_textures_count>& sampler_descriptors) { if (!m_graphics_state.test(rsx::pipeline_state::fragment_program_dirty)) @@ -2782,267 +2516,6 @@ namespace rsx return std::make_pair(persistent_memory_size, volatile_memory_size); } - void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base) - { - std::array offset_in_block = {}; - u32 volatile_offset = volatile_offset_base; - u32 persistent_offset = persistent_offset_base; - - //NOTE: Order is important! Transient ayout is always push_buffers followed by register data - if (rsx::method_registers.current_draw_clause.is_immediate_draw) - { - for (const auto &info : layout.volatile_blocks) - { - offset_in_block[info.first] = volatile_offset; - volatile_offset += info.second; - } - } - - for (u8 index : layout.referenced_registers) - { - offset_in_block[index] = volatile_offset; - volatile_offset += 16; - } - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) - { - const auto &block = layout.interleaved_blocks[0]; - u32 inline_data_offset = volatile_offset; - for (const auto& attrib : block->locations) - { - auto &info = rsx::method_registers.vertex_arrays_info[attrib.index]; - - offset_in_block[attrib.index] = inline_data_offset; - inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - } - else - { - for (const auto &block : layout.interleaved_blocks) - { - for (const auto& attrib : block->locations) - { - const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff); - offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset); - } - - const auto range = block->calculate_required_range(first_vertex, vertex_count); - persistent_offset += block->attribute_stride * range.second; - } - } - - // Fill the data - // Each descriptor field is 64 bits wide - // [0-8] attribute stride - // [8-24] attribute divisor - // [24-27] attribute type - // [27-30] attribute size - // [30-31] reserved - // [31-60] starting offset - // [60-21] swap bytes flag - // [61-22] volatile flag - // [62-63] modulo enable flag - - const s32 default_frequency_mask = (1 << 8); - const s32 swap_storage_mask = (1 << 29); - const s32 volatile_storage_mask = (1 << 30); - const s32 modulo_op_frequency_mask = smin; - - const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); - const auto max_index = (first_vertex + vertex_count) - 1; - - for (u16 ref_mask = current_vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1) - { - if (!(ref_mask & 1u)) - { - // Unused input, ignore this - continue; - } - - if (layout.attribute_placement[index] == attribute_buffer_placement::none) - { - static constexpr u64 zero = 0; - std::memcpy(buffer + index * 2, &zero, sizeof(zero)); - continue; - } - - rsx::vertex_base_type type = {}; - s32 size = 0; - s32 attrib0 = 0; - s32 attrib1 = 0; - - if (layout.attribute_placement[index] == attribute_buffer_placement::transient) - { - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) - { - const auto &info = rsx::method_registers.vertex_arrays_info[index]; - - if (!info.size()) - { - // Register - const auto& reginfo = rsx::method_registers.register_vertex_info[index]; - type = reginfo.type; - size = reginfo.size; - - attrib0 = rsx::get_vertex_type_size_on_host(type, size); - } - else - { - // Array - type = info.type(); - size = info.size(); - - attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask; - } - } - else - { - // Data is either from an immediate render or register input - // Immediate data overrides register input - - if (rsx::method_registers.current_draw_clause.is_immediate_draw && - vertex_push_buffers[index].vertex_count > 1) - { - // Push buffer - const auto &info = vertex_push_buffers[index]; - type = info.type; - size = info.size; - - attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask; - } - else - { - // Register - const auto& info = rsx::method_registers.register_vertex_info[index]; - type = info.type; - size = info.size; - - attrib0 = rsx::get_vertex_type_size_on_host(type, size); - } - } - - attrib1 |= volatile_storage_mask; - } - else - { - auto &info = rsx::method_registers.vertex_arrays_info[index]; - type = info.type(); - size = info.size(); - - auto stride = info.stride(); - attrib0 = stride; - - if (stride > 0) //when stride is 0, input is not an array but a single element - { - const u32 frequency = info.frequency(); - switch (frequency) - { - case 0: - case 1: - { - attrib0 |= default_frequency_mask; - break; - } - default: - { - if (modulo_mask & (1 << index)) - { - if (max_index >= frequency) - { - // Only set modulo mask if a modulo op is actually necessary! - // This requires that the uploaded range for this attr = [0, freq-1] - // Ignoring modulo op if the rendered range does not wrap allows for range optimization - attrib0 |= (frequency << 8); - attrib1 |= modulo_op_frequency_mask; - } - else - { - attrib0 |= default_frequency_mask; - } - } - else - { - // Division - attrib0 |= (frequency << 8); - } - break; - } - } - } - } //end attribute placement check - - // Special compressed 4 components into one 4-byte value. Decoded as one value. - if (type == rsx::vertex_base_type::cmp) - { - size = 1; - } - - // All data is passed in in PS3-native order (BE) so swap flag should be set - attrib1 |= swap_storage_mask; - attrib0 |= (static_cast(type) << 24); - attrib0 |= (size << 27); - attrib1 |= offset_in_block[index]; - - buffer[index * 2 + 0] = attrib0; - buffer[index * 2 + 1] = attrib1; - } - } - - void thread::write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data) - { - auto transient = static_cast(volatile_data); - auto persistent = static_cast(persistent_data); - - auto &draw_call = rsx::method_registers.current_draw_clause; - - if (transient != nullptr) - { - if (draw_call.command == rsx::draw_command::inlined_array) - { - for (const u8 index : layout.referenced_registers) - { - memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16); - transient += 16; - } - - memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32)); - //Is it possible to reference data outside of the inlined array? - return; - } - - //NOTE: Order is important! Transient layout is always push_buffers followed by register data - if (draw_call.is_immediate_draw) - { - //NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory - for (const auto &info : layout.volatile_blocks) - { - memcpy(transient, vertex_push_buffers[info.first].data.data(), info.second); - transient += info.second; - } - } - - for (const u8 index : layout.referenced_registers) - { - memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16); - transient += 16; - } - } - - if (persistent != nullptr) - { - for (interleaved_range_info* block : layout.interleaved_blocks) - { - auto range = block->calculate_required_range(first_vertex, vertex_count); - - const u32 data_size = range.second * block->attribute_stride; - const u32 vertex_base = range.first * block->attribute_stride; - - g_fxo->get().copy(persistent, vm::_ptr(block->real_offset_address) + vertex_base, data_size); - persistent += data_size; - } - } - } - void thread::flip(const display_flip_info_t& info) { m_eng_interrupt_mask.clear(rsx::display_interrupt); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index e861a96e25..e120e3d57b 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -28,6 +28,8 @@ #include "Emu/IdManager.h" #include "Core/RSXDisplay.h" +#include "Core/RSXDrawCommands.h" +#include "Core/RSXDriverState.h" #include "Core/RSXFrameBuffer.h" #include "Core/RSXContext.h" #include "Core/RSXIOMap.hpp" @@ -59,52 +61,6 @@ namespace rsx context_clear_all = context_clear_color | context_clear_depth }; - enum pipeline_state : u32 - { - fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed - vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed - fragment_program_state_dirty = (1 << 2), // Fragment program state changed - vertex_program_state_dirty = (1 << 3), // Vertex program state changed - fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc) - vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc) - transform_constants_dirty = (1 << 6), // Transform constants changed - fragment_constants_dirty = (1 << 7), // Fragment constants changed - framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed - fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed - vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed - scissor_config_state_dirty = (1 << 11), // Scissor region changed - zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed - - scissor_setup_invalid = (1 << 13), // Scissor configuration is broken - scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint - - polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed - line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed - - push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers) - - polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed - depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed - - pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint. - - rtt_config_dirty = (1 << 21), // Render target configuration changed - rtt_config_contested = (1 << 22), // Render target configuration is indeterminate - rtt_config_valid = (1 << 23), // Render target configuration is valid - rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate - - fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty, - vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty, - invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty, - invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty, - memory_barrier_bits = framebuffer_reads_dirty, - - // Vulkan-specific signals - invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty, - - all_dirty = ~0u - }; - enum eng_interrupt_reason : u32 { backend_interrupt = 0x0001, // Backend-related interrupt @@ -161,8 +117,6 @@ namespace rsx void cpu_task() override; protected: - std::array vertex_push_buffers; - s32 m_skip_frame_ctr = 0; bool skip_current_frame = false; @@ -217,6 +171,9 @@ namespace rsx // Host DMA std::unique_ptr m_host_dma_ctrl; + // Draw call management + draw_command_processor m_draw_processor; + public: atomic_t new_get_put = u64{umax}; u32 restore_point = 0; @@ -225,7 +182,7 @@ namespace rsx atomic_t external_interrupt_lock{ 0 }; atomic_t external_interrupt_ack{ false }; atomic_t is_initialized{0}; - rsx::simple_array element_push_buffer; + bool is_fifo_idle() const; void flush_fifo(); @@ -268,6 +225,8 @@ namespace rsx void capture_frame(const std::string& name); const backend_configuration& get_backend_config() const { return backend_config; } + const draw_command_processor* draw_processor() const { return &m_draw_processor; } + public: shared_ptr> intr_thread; @@ -301,11 +260,6 @@ namespace rsx void get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout); bool get_scissor(areau& region, bool clip_viewport); - /** - * Analyze vertex inputs and group all interleaved blocks - */ - void analyse_inputs_interleaved(vertex_input_layout&); - RSXVertexProgram current_vertex_program = {}; RSXFragmentProgram current_fragment_program = {}; @@ -424,21 +378,6 @@ namespace rsx virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload); virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; } - std::span get_raw_index_array(const draw_clause& draw_indexed_clause) const; - - std::variant - get_draw_command(const rsx::rsx_state& state) const; - - /** - * Immediate mode rendering requires a temp push buffer to hold attrib values - * Appends a value to the push buffer (currently only supports 32-wide types) - */ - void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value); - u32 get_push_buffer_vertex_count() const; - - void append_array_element(u32 index); - u32 get_push_buffer_index_count() const; - protected: /** @@ -448,17 +387,6 @@ namespace rsx */ std::pair calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count); - /** - * Generates vertex input descriptors as an array of 16x4 s32s - */ - void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0); - - /** - * Uploads vertex data described in the layout descriptor - * Copies from local memory to the write-only output buffers provided in a sequential manner - */ - void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data); - void evaluate_cpu_usage_reduction_limits(); private: @@ -468,6 +396,9 @@ namespace rsx void handle_invalidated_memory_range(); public: + + draw_command_processor& GRAPH_frontend() { return m_draw_processor; } + /** * Fill buffer with 4x4 scale offset matrix. * Vertex shader's position is to be multiplied by this matrix. diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index ef268cfda3..7db23ee80d 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -730,7 +730,7 @@ void VKGSRender::emit_geometry(u32 sub_index) if (state_flags & rsx::vertex_arrays_changed) { - analyse_inputs_interleaved(m_vertex_layout); + m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata); } else if (state_flags & rsx::vertex_base_changed) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 5de8f62d92..4fcef46b86 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2360,8 +2360,14 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset; auto dst = m_vertex_layout_ring_info.map(data_offset, 128); - fill_vertex_layout_state(m_vertex_layout, vertex_info.first_vertex, vertex_info.allocated_vertex_count, static_cast(dst), - vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); + m_draw_processor.fill_vertex_layout_state( + m_vertex_layout, + current_vp_metadata, + vertex_info.first_vertex, + vertex_info.allocated_vertex_count, + static_cast(dst), + vertex_info.persistent_window_offset, + vertex_info.volatile_window_offset); m_vertex_layout_ring_info.unmap(); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index e690139653..526f592eb6 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -217,7 +217,7 @@ namespace vk::vertex_upload_info VKGSRender::upload_vertex_data() { draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout); - auto result = std::visit(visitor, get_draw_command(rsx::method_registers)); + auto result = std::visit(visitor, m_draw_processor.get_draw_command(rsx::method_registers)); const u32 vertex_count = (result.max_index - result.min_index) + 1; u32 vertex_base = result.min_index; @@ -294,7 +294,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data() const usz volatile_offset_in_block = volatile_offset - persistent_offset; void *block_mapping = m_attrib_ring_info.map(persistent_offset, block_size); - write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast(block_mapping) + volatile_offset_in_block); + m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast(block_mapping) + volatile_offset_in_block); m_attrib_ring_info.unmap(); } else @@ -302,14 +302,14 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data() if (required.first > 0 && persistent_offset != umax) { void *persistent_mapping = m_attrib_ring_info.map(persistent_offset, required.first); - write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr); + m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr); m_attrib_ring_info.unmap(); } if (required.second > 0) { void *volatile_mapping = m_attrib_ring_info.map(volatile_offset, required.second); - write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping); + m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping); m_attrib_ring_info.unmap(); } } diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 062d481b8a..728fe97bed 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -104,6 +104,7 @@ + @@ -619,6 +620,8 @@ + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 3471fdfcbb..87d65fbb51 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1315,6 +1315,9 @@ Emu\GPU\RSX\Host Mini-Driver + + Emu\GPU\RSX\Core + @@ -2659,6 +2662,12 @@ Emu\NP + + Emu\GPU\RSX\Core + + + Emu\GPU\RSX\Core +