rsx: Implement deferred transform constants load

This commit is contained in:
kd-11 2024-04-19 04:13:56 +03:00 committed by kd-11
parent 70e40ce857
commit 406a519400
9 changed files with 185 additions and 116 deletions

View File

@ -486,6 +486,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/GL/upscalers/fsr1/fsr_pass.cpp
RSX/GSRender.cpp
RSX/Null/NullGSRender.cpp
RSX/NV47/FW/draw_call.cpp
RSX/NV47/FW/reg_context.cpp
RSX/NV47/HW/common.cpp
RSX/NV47/HW/nv0039.cpp

View File

@ -0,0 +1,134 @@
#include "stdafx.h"
#include "draw_call.hpp"
#include "Emu/RSX/rsx_methods.h" // FIXME
#include "Emu/RSX/rsx_utils.h"
#include "Emu/RSX/Common/BufferUtils.h"
#include "Emu/RSX/NV47/HW/context.h"
#include "Emu/RSX/NV47/HW/nv4097.h"
// Always import this after other HW definitions
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
#include <util/serialization.hpp>
namespace rsx
{
void draw_clause::operator()(utils::serial& ar)
{
ar(draw_command_ranges, draw_command_barriers, current_range_index, primitive, command, is_immediate_draw, is_disjoint_primitive, primitive_barrier_enable, inline_vertex_array);
}
void draw_clause::insert_command_barrier(command_barrier_type type, u32 arg, u32 index)
{
ensure(!draw_command_ranges.empty());
auto _do_barrier_insert = [this](barrier_t&& val)
{
if (draw_command_barriers.empty() || draw_command_barriers.back() < val)
{
draw_command_barriers.push_back(val);
return;
}
for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++)
{
if (*it < val)
{
continue;
}
draw_command_barriers.insert(it, val);
break;
}
};
if (type == primitive_restart_barrier)
{
// Rasterization flow barrier
const auto& last = draw_command_ranges[current_range_index];
const auto address = last.first + last.count;
_do_barrier_insert({ current_range_index, 0, address, index, arg, 0, type });
}
else
{
// Execution dependency barrier. Requires breaking the current draw call sequence and start another.
if (draw_command_ranges.back().count > 0)
{
append_draw_command({});
}
else
{
// In case of back-to-back modifiers, do not add duplicates
current_range_index = draw_command_ranges.size() - 1;
}
_do_barrier_insert({ current_range_index, rsx::get_shared_tag(), ~0u, index, arg, 0, type });
last_execution_barrier_index = current_range_index;
}
}
void draw_clause::reset(primitive_type type)
{
current_range_index = ~0u;
last_execution_barrier_index = 0;
command = draw_command::none;
primitive = type;
primitive_barrier_enable = false;
draw_command_ranges.clear();
draw_command_barriers.clear();
inline_vertex_array.clear();
is_disjoint_primitive = is_primitive_disjointed(primitive);
}
u32 draw_clause::execute_pipeline_dependencies(context* ctx) const
{
u32 result = 0u;
for (;
current_barrier_it != draw_command_barriers.end() && current_barrier_it->draw_id == current_range_index;
current_barrier_it++)
{
const auto& barrier = *current_barrier_it;
switch (barrier.type)
{
case primitive_restart_barrier:
break;
case index_base_modifier_barrier:
// Change index base offset
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg);
result |= index_base_changed;
break;
case vertex_base_modifier_barrier:
// Change vertex base offset
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg);
result |= vertex_base_changed;
break;
case vertex_array_offset_modifier_barrier:
// Change vertex array offset
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg);
result |= vertex_arrays_changed;
break;
case transform_constant_load_modifier_barrier:
// Change the transform load target. Does not change result mask.
REGS(ctx)->decode(NV4097_SET_TRANSFORM_PROGRAM_LOAD, barrier.arg);
break;
case transform_constant_update_barrier:
// Update transform constants
// REGS(ctx)->decode(NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg); // This statement technically does the right thing but has no consequence other than wasting perf.
// FIXME: Batching
nv4097::set_transform_constant::decode_one(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg);
result |= transform_constants_changed;
break;
default:
fmt::throw_exception("Unreachable");
}
}
return result;
}
}

View File

@ -21,6 +21,9 @@ namespace rsx
// Location of last execution barrier
u32 last_execution_barrier_index{};
// Draw-time iterator to the draw_command_barriers struct
mutable simple_array<barrier_t>::iterator current_barrier_it;
// Helper functions
// Add a new draw command
void append_draw_command(const draw_range_t& range)
@ -55,6 +58,7 @@ namespace rsx
bool is_immediate_draw{}; // Set if part of the draw is submitted via push registers
bool is_disjoint_primitive{}; // Set if primitive type does not rely on adjacency information
bool primitive_barrier_enable{}; // Set once to signal that a primitive restart barrier can be inserted
bool is_rendering{}; // Set while we're actually pushing the draw calls to host GPU
simple_array<u32> inline_vertex_array{};
@ -111,7 +115,8 @@ namespace rsx
if (draw_command_ranges[index].first == first &&
draw_command_ranges[index].count == count)
{
// Duplicate entry? WTF!
// Duplicate entry. Usually this indicates a botched instancing setup.
rsx_log.error("Duplicate draw request. Start=%u, Count=%u", first, count);
return;
}
@ -206,12 +211,13 @@ namespace rsx
: primitive_class::non_polygon;
}
void reset(rsx::primitive_type type);
void begin()
{
current_range_index = 0;
current_barrier_it = draw_command_barriers.begin();
is_rendering = true;
}
void end()
@ -225,6 +231,7 @@ namespace rsx
if (current_range_index >= draw_command_ranges.size())
{
current_range_index = 0;
is_rendering = false;
return false;
}
@ -233,6 +240,7 @@ namespace rsx
// Dangling execution barrier
ensure(current_range_index > 0 && (current_range_index + 1) == draw_command_ranges.size());
current_range_index = 0;
is_rendering = false;
return false;
}

View File

@ -17,7 +17,9 @@ namespace rsx
primitive_restart_barrier,
vertex_base_modifier_barrier,
index_base_modifier_barrier,
vertex_array_offset_modifier_barrier
vertex_array_offset_modifier_barrier,
transform_constant_load_modifier_barrier,
transform_constant_update_barrier
};
enum command_execution_flags : u32
@ -25,6 +27,7 @@ namespace rsx
vertex_base_changed = (1 << 0),
index_base_changed = (1 << 1),
vertex_arrays_changed = (1 << 2),
transform_constants_changed = (1 << 3)
};
enum class primitive_class

View File

@ -20,8 +20,29 @@ namespace rsx
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty;
}
void set_transform_constant::impl(context* ctx, u32 reg, u32 /*arg*/)
void set_transform_constant::decode_one(context* ctx, u32 reg, u32 arg)
{
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
const u32 constant_id = index / 4;
const u8 subreg = index % 4;
const u32 load = REGS(ctx)->transform_constant_load();
REGS(ctx)->transform_constants[load + constant_id][subreg] = arg;
}
void set_transform_constant::impl(context* ctx, u32 reg, u32 arg)
{
if (RSX(ctx)->in_begin_end && !REGS(ctx)->current_draw_clause.empty())
{
// Updating constants mid-draw is messy. Push attr barrier.
REGS(ctx)->current_draw_clause.insert_command_barrier(
rsx::transform_constant_update_barrier,
arg,
reg - NV4097_SET_TRANSFORM_CONSTANT
);
return;
}
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
const u32 constant_id = index / 4;
const u8 subreg = index % 4;
@ -439,6 +460,11 @@ namespace rsx
REGS(ctx)->decode(reg, REGS(ctx)->latch);
}
void set_transform_constant_load(context* ctx, u32 reg, u32 arg)
{
util::push_draw_parameter_change(ctx, rsx::transform_constant_load_modifier_barrier, reg, arg);
}
///// Reports
void get_report(context* ctx, u32 /*reg*/, u32 arg)

View File

@ -83,6 +83,8 @@ namespace rsx
void set_blend_factor(context* ctx, u32 reg, u32 arg);
void set_transform_constant_load(context* ctx, u32 reg, u32 arg);
#define RSX(ctx) ctx->rsxthr
#define REGS(ctx) (&rsx::method_registers)
@ -197,6 +199,8 @@ namespace rsx
struct set_transform_constant
{
static void impl(context* ctx, u32 reg, u32 arg);
static void decode_one(context* ctx, u32 reg, u32 arg);
};
struct set_transform_program

View File

@ -39,11 +39,6 @@ namespace rsx
rsx_log.trace("RSX method 0x%x (arg=0x%x)", reg << 2, arg);
}
namespace nv0039
{
}
void flip_command(context* ctx, u32, u32 arg)
{
ensure(RSX(ctx)->isHLE);
@ -1217,113 +1212,6 @@ namespace rsx
return registers[reg] == value;
}
void draw_clause::operator()(utils::serial& ar)
{
ar(draw_command_ranges, draw_command_barriers, current_range_index, primitive, command, is_immediate_draw, is_disjoint_primitive, primitive_barrier_enable, inline_vertex_array);
}
void draw_clause::insert_command_barrier(command_barrier_type type, u32 arg, u32 index)
{
ensure(!draw_command_ranges.empty());
auto _do_barrier_insert = [this](barrier_t&& val)
{
if (draw_command_barriers.empty() || draw_command_barriers.back() < val)
{
draw_command_barriers.push_back(val);
return;
}
for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++)
{
if (*it < val)
{
continue;
}
draw_command_barriers.insert(it, val);
break;
}
};
if (type == primitive_restart_barrier)
{
// Rasterization flow barrier
const auto& last = draw_command_ranges[current_range_index];
const auto address = last.first + last.count;
_do_barrier_insert({ current_range_index, 0, address, index, arg, 0, type });
}
else
{
// Execution dependency barrier. Requires breaking the current draw call sequence and start another.
if (draw_command_ranges.back().count > 0)
{
append_draw_command({});
}
else
{
// In case of back-to-back modifiers, do not add duplicates
current_range_index = draw_command_ranges.size() - 1;
}
_do_barrier_insert({ current_range_index, rsx::get_shared_tag(), ~0u, index, arg, 0, type });
last_execution_barrier_index = current_range_index;
}
}
void draw_clause::reset(primitive_type type)
{
current_range_index = ~0u;
last_execution_barrier_index = 0;
command = draw_command::none;
primitive = type;
primitive_barrier_enable = false;
draw_command_ranges.clear();
draw_command_barriers.clear();
inline_vertex_array.clear();
is_disjoint_primitive = is_primitive_disjointed(primitive);
}
u32 draw_clause::execute_pipeline_dependencies(context* ctx) const
{
u32 result = 0;
for (const auto &barrier : draw_command_barriers)
{
if (barrier.draw_id != current_range_index)
continue;
switch (barrier.type)
{
case primitive_restart_barrier:
break;
case index_base_modifier_barrier:
// Change index base offset
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg);
result |= index_base_changed;
break;
case vertex_base_modifier_barrier:
// Change vertex base offset
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg);
result |= vertex_base_changed;
break;
case vertex_array_offset_modifier_barrier:
// Change vertex array offset
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg);
result |= vertex_arrays_changed;
break;
default:
fmt::throw_exception("Unreachable");
}
}
return result;
}
namespace method_detail
{
template <u32 Id, u32 Step, u32 Count, template<u32> class T, u32 Index = 0>
@ -1751,6 +1639,7 @@ namespace rsx
bind_range<NV4097_SET_VERTEX_DATA4F_M, 1, 64, nv4097::set_vertex_data4f_m>();
bind_range<NV4097_SET_VERTEX_DATA2S_M, 1, 16, nv4097::set_vertex_data2s_m>();
bind_range<NV4097_SET_VERTEX_DATA4S_M, 1, 32, nv4097::set_vertex_data4s_m>();
bind(NV4097_SET_TRANSFORM_CONSTANT_LOAD, nv4097::set_transform_constant_load);
bind_array(NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant::impl);
bind_array(NV4097_SET_TRANSFORM_PROGRAM, 1, 32, nv4097::set_transform_program::impl);
bind(NV4097_GET_REPORT, nv4097::get_report);

View File

@ -97,6 +97,7 @@
<ClCompile Include="Emu\perf_monitor.cpp" />
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\reg_context.cpp" />
<ClCompile Include="Emu\RSX\NV47\HW\common.cpp" />
<ClCompile Include="Emu\RSX\NV47\HW\nv0039.cpp" />

View File

@ -1246,6 +1246,9 @@
<ClCompile Include="Emu\RSX\NV47\FW\reg_context.cpp">
<Filter>Emu\GPU\RSX\NV47\FW</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp">
<Filter>Emu\GPU\RSX\NV47\FW</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">