From 406a519400fbcc3b43471e0694f35b10f77acae0 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 19 Apr 2024 04:13:56 +0300 Subject: [PATCH] rsx: Implement deferred transform constants load --- rpcs3/Emu/CMakeLists.txt | 1 + rpcs3/Emu/RSX/NV47/FW/draw_call.cpp | 134 ++++++++++++++++++++++++++ rpcs3/Emu/RSX/NV47/FW/draw_call.hpp | 12 ++- rpcs3/Emu/RSX/NV47/FW/draw_call.inc.h | 5 +- rpcs3/Emu/RSX/NV47/HW/nv4097.cpp | 28 +++++- rpcs3/Emu/RSX/NV47/HW/nv4097.h | 4 + rpcs3/Emu/RSX/rsx_methods.cpp | 113 +--------------------- rpcs3/emucore.vcxproj | 1 + rpcs3/emucore.vcxproj.filters | 3 + 9 files changed, 185 insertions(+), 116 deletions(-) create mode 100644 rpcs3/Emu/RSX/NV47/FW/draw_call.cpp diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index d595a49e48..1185cb3708 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -486,6 +486,7 @@ target_sources(rpcs3_emu PRIVATE RSX/GL/upscalers/fsr1/fsr_pass.cpp RSX/GSRender.cpp RSX/Null/NullGSRender.cpp + RSX/NV47/FW/draw_call.cpp RSX/NV47/FW/reg_context.cpp RSX/NV47/HW/common.cpp RSX/NV47/HW/nv0039.cpp diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp b/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp new file mode 100644 index 0000000000..ebedce9e3d --- /dev/null +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp @@ -0,0 +1,134 @@ +#include "stdafx.h" +#include "draw_call.hpp" + +#include "Emu/RSX/rsx_methods.h" // FIXME +#include "Emu/RSX/rsx_utils.h" +#include "Emu/RSX/Common/BufferUtils.h" +#include "Emu/RSX/NV47/HW/context.h" +#include "Emu/RSX/NV47/HW/nv4097.h" + +// Always import this after other HW definitions +#include "Emu/RSX/NV47/HW/context_accessors.define.h" + +#include + +namespace rsx +{ + void draw_clause::operator()(utils::serial& ar) + { + ar(draw_command_ranges, draw_command_barriers, current_range_index, primitive, command, is_immediate_draw, is_disjoint_primitive, primitive_barrier_enable, inline_vertex_array); + } + + void draw_clause::insert_command_barrier(command_barrier_type type, u32 arg, u32 index) + { + ensure(!draw_command_ranges.empty()); + + auto _do_barrier_insert = [this](barrier_t&& val) + { + if (draw_command_barriers.empty() || draw_command_barriers.back() < val) + { + draw_command_barriers.push_back(val); + return; + } + + for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++) + { + if (*it < val) + { + continue; + } + + draw_command_barriers.insert(it, val); + break; + } + }; + + if (type == primitive_restart_barrier) + { + // Rasterization flow barrier + const auto& last = draw_command_ranges[current_range_index]; + const auto address = last.first + last.count; + + _do_barrier_insert({ current_range_index, 0, address, index, arg, 0, type }); + } + else + { + // Execution dependency barrier. Requires breaking the current draw call sequence and start another. + if (draw_command_ranges.back().count > 0) + { + append_draw_command({}); + } + else + { + // In case of back-to-back modifiers, do not add duplicates + current_range_index = draw_command_ranges.size() - 1; + } + + _do_barrier_insert({ current_range_index, rsx::get_shared_tag(), ~0u, index, arg, 0, type }); + last_execution_barrier_index = current_range_index; + } + } + + void draw_clause::reset(primitive_type type) + { + current_range_index = ~0u; + last_execution_barrier_index = 0; + + command = draw_command::none; + primitive = type; + primitive_barrier_enable = false; + + draw_command_ranges.clear(); + draw_command_barriers.clear(); + inline_vertex_array.clear(); + + is_disjoint_primitive = is_primitive_disjointed(primitive); + } + + u32 draw_clause::execute_pipeline_dependencies(context* ctx) const + { + u32 result = 0u; + for (; + current_barrier_it != draw_command_barriers.end() && current_barrier_it->draw_id == current_range_index; + current_barrier_it++) + { + const auto& barrier = *current_barrier_it; + switch (barrier.type) + { + case primitive_restart_barrier: + break; + case index_base_modifier_barrier: + // Change index base offset + REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg); + result |= index_base_changed; + break; + case vertex_base_modifier_barrier: + // Change vertex base offset + REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg); + result |= vertex_base_changed; + break; + case vertex_array_offset_modifier_barrier: + // Change vertex array offset + REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg); + result |= vertex_arrays_changed; + break; + case transform_constant_load_modifier_barrier: + // Change the transform load target. Does not change result mask. + REGS(ctx)->decode(NV4097_SET_TRANSFORM_PROGRAM_LOAD, barrier.arg); + break; + case transform_constant_update_barrier: + // Update transform constants + // REGS(ctx)->decode(NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg); // This statement technically does the right thing but has no consequence other than wasting perf. + // FIXME: Batching + nv4097::set_transform_constant::decode_one(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg); + result |= transform_constants_changed; + break; + default: + fmt::throw_exception("Unreachable"); + } + } + + return result; + } +} + diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp b/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp index 94c6291ad6..8ce2e5973c 100644 --- a/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp @@ -21,6 +21,9 @@ namespace rsx // Location of last execution barrier u32 last_execution_barrier_index{}; + // Draw-time iterator to the draw_command_barriers struct + mutable simple_array::iterator current_barrier_it; + // Helper functions // Add a new draw command void append_draw_command(const draw_range_t& range) @@ -55,6 +58,7 @@ namespace rsx bool is_immediate_draw{}; // Set if part of the draw is submitted via push registers bool is_disjoint_primitive{}; // Set if primitive type does not rely on adjacency information bool primitive_barrier_enable{}; // Set once to signal that a primitive restart barrier can be inserted + bool is_rendering{}; // Set while we're actually pushing the draw calls to host GPU simple_array inline_vertex_array{}; @@ -111,7 +115,8 @@ namespace rsx if (draw_command_ranges[index].first == first && draw_command_ranges[index].count == count) { - // Duplicate entry? WTF! + // Duplicate entry. Usually this indicates a botched instancing setup. + rsx_log.error("Duplicate draw request. Start=%u, Count=%u", first, count); return; } @@ -206,12 +211,13 @@ namespace rsx : primitive_class::non_polygon; } - void reset(rsx::primitive_type type); void begin() { current_range_index = 0; + current_barrier_it = draw_command_barriers.begin(); + is_rendering = true; } void end() @@ -225,6 +231,7 @@ namespace rsx if (current_range_index >= draw_command_ranges.size()) { current_range_index = 0; + is_rendering = false; return false; } @@ -233,6 +240,7 @@ namespace rsx // Dangling execution barrier ensure(current_range_index > 0 && (current_range_index + 1) == draw_command_ranges.size()); current_range_index = 0; + is_rendering = false; return false; } diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.inc.h b/rpcs3/Emu/RSX/NV47/FW/draw_call.inc.h index a29915ead2..2d29553ece 100644 --- a/rpcs3/Emu/RSX/NV47/FW/draw_call.inc.h +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.inc.h @@ -17,7 +17,9 @@ namespace rsx primitive_restart_barrier, vertex_base_modifier_barrier, index_base_modifier_barrier, - vertex_array_offset_modifier_barrier + vertex_array_offset_modifier_barrier, + transform_constant_load_modifier_barrier, + transform_constant_update_barrier }; enum command_execution_flags : u32 @@ -25,6 +27,7 @@ namespace rsx vertex_base_changed = (1 << 0), index_base_changed = (1 << 1), vertex_arrays_changed = (1 << 2), + transform_constants_changed = (1 << 3) }; enum class primitive_class diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 96a92c8f98..6290f0dd5b 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -20,8 +20,29 @@ namespace rsx RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty; } - void set_transform_constant::impl(context* ctx, u32 reg, u32 /*arg*/) + void set_transform_constant::decode_one(context* ctx, u32 reg, u32 arg) { + const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT; + const u32 constant_id = index / 4; + const u8 subreg = index % 4; + const u32 load = REGS(ctx)->transform_constant_load(); + + REGS(ctx)->transform_constants[load + constant_id][subreg] = arg; + } + + void set_transform_constant::impl(context* ctx, u32 reg, u32 arg) + { + if (RSX(ctx)->in_begin_end && !REGS(ctx)->current_draw_clause.empty()) + { + // Updating constants mid-draw is messy. Push attr barrier. + REGS(ctx)->current_draw_clause.insert_command_barrier( + rsx::transform_constant_update_barrier, + arg, + reg - NV4097_SET_TRANSFORM_CONSTANT + ); + return; + } + const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT; const u32 constant_id = index / 4; const u8 subreg = index % 4; @@ -439,6 +460,11 @@ namespace rsx REGS(ctx)->decode(reg, REGS(ctx)->latch); } + void set_transform_constant_load(context* ctx, u32 reg, u32 arg) + { + util::push_draw_parameter_change(ctx, rsx::transform_constant_load_modifier_barrier, reg, arg); + } + ///// Reports void get_report(context* ctx, u32 /*reg*/, u32 arg) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.h b/rpcs3/Emu/RSX/NV47/HW/nv4097.h index 4d72acb494..06df8a53f6 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.h +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.h @@ -83,6 +83,8 @@ namespace rsx void set_blend_factor(context* ctx, u32 reg, u32 arg); + void set_transform_constant_load(context* ctx, u32 reg, u32 arg); + #define RSX(ctx) ctx->rsxthr #define REGS(ctx) (&rsx::method_registers) @@ -197,6 +199,8 @@ namespace rsx struct set_transform_constant { static void impl(context* ctx, u32 reg, u32 arg); + + static void decode_one(context* ctx, u32 reg, u32 arg); }; struct set_transform_program diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 4566a0418f..de7eabdbfa 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -39,11 +39,6 @@ namespace rsx rsx_log.trace("RSX method 0x%x (arg=0x%x)", reg << 2, arg); } - namespace nv0039 - { - - } - void flip_command(context* ctx, u32, u32 arg) { ensure(RSX(ctx)->isHLE); @@ -1217,113 +1212,6 @@ namespace rsx return registers[reg] == value; } - void draw_clause::operator()(utils::serial& ar) - { - ar(draw_command_ranges, draw_command_barriers, current_range_index, primitive, command, is_immediate_draw, is_disjoint_primitive, primitive_barrier_enable, inline_vertex_array); - } - - void draw_clause::insert_command_barrier(command_barrier_type type, u32 arg, u32 index) - { - ensure(!draw_command_ranges.empty()); - - auto _do_barrier_insert = [this](barrier_t&& val) - { - if (draw_command_barriers.empty() || draw_command_barriers.back() < val) - { - draw_command_barriers.push_back(val); - return; - } - - for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++) - { - if (*it < val) - { - continue; - } - - draw_command_barriers.insert(it, val); - break; - } - }; - - if (type == primitive_restart_barrier) - { - // Rasterization flow barrier - const auto& last = draw_command_ranges[current_range_index]; - const auto address = last.first + last.count; - - _do_barrier_insert({ current_range_index, 0, address, index, arg, 0, type }); - } - else - { - // Execution dependency barrier. Requires breaking the current draw call sequence and start another. - if (draw_command_ranges.back().count > 0) - { - append_draw_command({}); - } - else - { - // In case of back-to-back modifiers, do not add duplicates - current_range_index = draw_command_ranges.size() - 1; - } - - _do_barrier_insert({ current_range_index, rsx::get_shared_tag(), ~0u, index, arg, 0, type }); - last_execution_barrier_index = current_range_index; - } - } - - void draw_clause::reset(primitive_type type) - { - current_range_index = ~0u; - last_execution_barrier_index = 0; - - command = draw_command::none; - primitive = type; - primitive_barrier_enable = false; - - draw_command_ranges.clear(); - draw_command_barriers.clear(); - inline_vertex_array.clear(); - - is_disjoint_primitive = is_primitive_disjointed(primitive); - } - - u32 draw_clause::execute_pipeline_dependencies(context* ctx) const - { - u32 result = 0; - - for (const auto &barrier : draw_command_barriers) - { - if (barrier.draw_id != current_range_index) - continue; - - switch (barrier.type) - { - case primitive_restart_barrier: - break; - case index_base_modifier_barrier: - // Change index base offset - REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg); - result |= index_base_changed; - break; - case vertex_base_modifier_barrier: - // Change vertex base offset - REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg); - result |= vertex_base_changed; - break; - case vertex_array_offset_modifier_barrier: - // Change vertex array offset - REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg); - result |= vertex_arrays_changed; - break; - default: - fmt::throw_exception("Unreachable"); - } - } - - return result; - } - namespace method_detail { template class T, u32 Index = 0> @@ -1751,6 +1639,7 @@ namespace rsx bind_range(); bind_range(); bind_range(); + bind(NV4097_SET_TRANSFORM_CONSTANT_LOAD, nv4097::set_transform_constant_load); bind_array(NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant::impl); bind_array(NV4097_SET_TRANSFORM_PROGRAM, 1, 32, nv4097::set_transform_program::impl); bind(NV4097_GET_REPORT, nv4097::get_report); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 374a3c5d95..c9fa56775b 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -97,6 +97,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 8fe6fa5901..4850053e9f 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1246,6 +1246,9 @@ Emu\GPU\RSX\NV47\FW + + Emu\GPU\RSX\NV47\FW +