mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-19 12:40:29 +00:00
rsx/vk: Implement batched transform constant updates
This commit is contained in:
parent
ac6f77a744
commit
a09111052a
@ -39,7 +39,7 @@ std::tuple<u32, u32, u32> write_index_array_data_to_buffer(std::span<std::byte>
|
||||
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, rsx::primitive_type draw_mode, unsigned count);
|
||||
|
||||
// Copy and swap data in 32-bit units
|
||||
extern void(*const copy_data_swap_u32)(u32*, const u32*, u32);
|
||||
extern void(*const copy_data_swap_u32)(u32* dst, const u32* src, u32 count);
|
||||
|
||||
// Copy and swap data in 32-bit units, return true if changed
|
||||
extern bool(*const copy_data_swap_u32_cmp)(u32*, const u32*, u32);
|
||||
extern bool(*const copy_data_swap_u32_cmp)(u32* dst, const u32* src, u32 count);
|
||||
|
@ -15,7 +15,7 @@ namespace rsx
|
||||
// virtual void begin() = 0;
|
||||
// virtual void end() = 0;
|
||||
|
||||
// Patch transform constants
|
||||
virtual void patch_transform_constants(context* ctx, u32 first_index, const std::span<u32>& data) {};
|
||||
// Patch transform constants. Units are in 32x4 units
|
||||
virtual void patch_transform_constants(context* /*ctx*/, u32 /*index*/, u32 /*count*/) {};
|
||||
};
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include "Emu/RSX/rsx_methods.h" // FIXME
|
||||
#include "Emu/RSX/rsx_utils.h"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
#include "Emu/RSX/Common/BufferUtils.h"
|
||||
#include "Emu/RSX/NV47/HW/context.h"
|
||||
#include "Emu/RSX/NV47/HW/nv4097.h"
|
||||
@ -115,33 +116,45 @@ namespace rsx
|
||||
switch (barrier.type)
|
||||
{
|
||||
case primitive_restart_barrier:
|
||||
{
|
||||
break;
|
||||
}
|
||||
case index_base_modifier_barrier:
|
||||
{
|
||||
// Change index base offset
|
||||
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg0);
|
||||
result |= index_base_changed;
|
||||
break;
|
||||
}
|
||||
case vertex_base_modifier_barrier:
|
||||
{
|
||||
// Change vertex base offset
|
||||
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg0);
|
||||
result |= vertex_base_changed;
|
||||
break;
|
||||
}
|
||||
case vertex_array_offset_modifier_barrier:
|
||||
{
|
||||
// Change vertex array offset
|
||||
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg0);
|
||||
result |= vertex_arrays_changed;
|
||||
break;
|
||||
}
|
||||
case transform_constant_load_modifier_barrier:
|
||||
{
|
||||
// Change the transform load target. Does not change result mask.
|
||||
REGS(ctx)->decode(NV4097_SET_TRANSFORM_PROGRAM_LOAD, barrier.arg0);
|
||||
break;
|
||||
}
|
||||
case transform_constant_update_barrier:
|
||||
{
|
||||
// Update transform constants
|
||||
// REGS(ctx)->decode(NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg); // This statement technically does the right thing but has no consequence other than wasting perf.
|
||||
// FIXME: Batching
|
||||
nv4097::set_transform_constant::decode_one(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg0);
|
||||
auto ptr = RSX(ctx)->fifo_ctrl->translate_address(barrier.arg0);
|
||||
auto buffer = std::span<const u32>(static_cast<const u32*>(vm::base(ptr)), barrier.arg1);
|
||||
nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer);
|
||||
result |= transform_constants_changed;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fmt::throw_exception("Unreachable");
|
||||
}
|
||||
|
@ -30,20 +30,22 @@ namespace rsx
|
||||
REGS(ctx)->transform_constants[load + constant_id][subreg] = arg;
|
||||
}
|
||||
|
||||
void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span<const u32>& args)
|
||||
{
|
||||
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
|
||||
const u32 constant_id = index / 4;
|
||||
const u8 subreg = index % 4;
|
||||
const u32 load = REGS(ctx)->transform_constant_load();
|
||||
|
||||
auto dst = ®S(ctx)->transform_constants[load + constant_id][subreg];
|
||||
copy_data_swap_u32(dst, args.data(), ::size32(args));
|
||||
|
||||
const u32 last_constant_id = ((reg + ::size32(args) + 3) - NV4097_SET_TRANSFORM_CONSTANT) / 4; // Aligned div
|
||||
RSX(ctx)->patch_transform_constants(ctx, load + constant_id, last_constant_id - constant_id);
|
||||
}
|
||||
|
||||
void set_transform_constant::impl(context* ctx, u32 reg, u32 arg)
|
||||
{
|
||||
if (RSX(ctx)->in_begin_end && !REGS(ctx)->current_draw_clause.empty())
|
||||
{
|
||||
// Updating constants mid-draw is messy. Push attr barrier.
|
||||
REGS(ctx)->current_draw_clause.insert_command_barrier(
|
||||
rsx::transform_constant_update_barrier,
|
||||
arg,
|
||||
0,
|
||||
reg - NV4097_SET_TRANSFORM_CONSTANT
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
|
||||
const u32 constant_id = index / 4;
|
||||
const u8 subreg = index % 4;
|
||||
@ -73,6 +75,20 @@ namespace rsx
|
||||
rcount = 0;
|
||||
}
|
||||
|
||||
if (RSX(ctx)->in_begin_end && !REGS(ctx)->current_draw_clause.empty())
|
||||
{
|
||||
// Updating constants mid-draw is messy. Defer the writes
|
||||
REGS(ctx)->current_draw_clause.insert_command_barrier(
|
||||
rsx::transform_constant_update_barrier,
|
||||
RSX(ctx)->fifo_ctrl->get_pos(),
|
||||
rcount,
|
||||
reg - NV4097_SET_TRANSFORM_CONSTANT
|
||||
);
|
||||
|
||||
RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto values = ®S(ctx)->transform_constants[load + constant_id][subreg];
|
||||
|
||||
const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr();
|
||||
|
@ -6,6 +6,8 @@
|
||||
#include "Emu/RSX/gcm_enums.h"
|
||||
#include "Emu/RSX/NV47/FW/draw_call.inc.h"
|
||||
|
||||
#include <span>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
enum command_barrier_type : u32;
|
||||
@ -201,6 +203,8 @@ namespace rsx
|
||||
static void impl(context* ctx, u32 reg, u32 arg);
|
||||
|
||||
static void decode_one(context* ctx, u32 reg, u32 arg);
|
||||
|
||||
static void batch_decode(context* ctx, u32 reg, const std::span<const u32>& args);
|
||||
};
|
||||
|
||||
struct set_transform_program
|
||||
|
@ -29,6 +29,11 @@ namespace rsx
|
||||
m_iotable = &pctrl->iomap_table;
|
||||
}
|
||||
|
||||
u32 FIFO_control::translate_address(u32 address) const
|
||||
{
|
||||
return m_iotable->get_addr(address);
|
||||
}
|
||||
|
||||
void FIFO_control::sync_get() const
|
||||
{
|
||||
m_ctrl->get.release(m_internal_get);
|
||||
|
@ -151,6 +151,8 @@ namespace rsx
|
||||
FIFO_control(rsx::thread* pctrl);
|
||||
~FIFO_control() = default;
|
||||
|
||||
u32 translate_address(u32 addr) const;
|
||||
|
||||
std::pair<bool, u32> fetch_u32(u32 addr);
|
||||
void invalidate_cache() { m_cache_size = 0; }
|
||||
|
||||
|
@ -732,43 +732,6 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||
}
|
||||
}
|
||||
|
||||
if (state_flags & rsx::transform_constants_changed)
|
||||
{
|
||||
auto allocate_mem = [&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
vertex_scratchpad.resize(size);
|
||||
return { vertex_scratchpad.data(), size };
|
||||
};
|
||||
|
||||
rsx::io_buffer iobuf(allocate_mem);
|
||||
upload_transform_constants(iobuf);
|
||||
|
||||
ensure(iobuf.size() >= m_vertex_constants_buffer_info.range);
|
||||
|
||||
vk::insert_buffer_memory_barrier(
|
||||
*m_current_command_buffer,
|
||||
m_vertex_constants_buffer_info.buffer,
|
||||
m_vertex_constants_buffer_info.offset,
|
||||
m_vertex_constants_buffer_info.range,
|
||||
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
|
||||
vkCmdUpdateBuffer(
|
||||
*m_current_command_buffer,
|
||||
m_vertex_constants_buffer_info.buffer,
|
||||
m_vertex_constants_buffer_info.offset,
|
||||
m_vertex_constants_buffer_info.range,
|
||||
iobuf.data());
|
||||
|
||||
vk::insert_buffer_memory_barrier(
|
||||
*m_current_command_buffer,
|
||||
m_vertex_constants_buffer_info.buffer,
|
||||
m_vertex_constants_buffer_info.offset,
|
||||
m_vertex_constants_buffer_info.range,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT);
|
||||
}
|
||||
|
||||
if ((state_flags & vertex_state_mask) && !m_vertex_layout.validate())
|
||||
{
|
||||
// No vertex inputs enabled
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "vkutils/scratch.h"
|
||||
|
||||
#include "Emu/RSX/rsx_methods.h"
|
||||
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||
#include "Emu/Memory/vm_locking.h"
|
||||
|
||||
#include "../Program/program_state_cache2.hpp"
|
||||
@ -2354,9 +2355,60 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
|
||||
m_vertex_layout_ring_info.unmap();
|
||||
}
|
||||
|
||||
void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 first_index, const std::span<u32>& data)
|
||||
void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count)
|
||||
{
|
||||
// Hot-patching transform constants mid-draw (instanced draw)
|
||||
utils::address_range data_range;
|
||||
void* data_source = nullptr;
|
||||
|
||||
if (!m_vertex_prog || m_vertex_prog->has_indexed_constants)
|
||||
{
|
||||
// We're working with a full range. We can do a direct patch in this case since no index translation is required.
|
||||
const auto byte_count = count * 16;
|
||||
const auto byte_offset = index * 16;
|
||||
|
||||
data_range = utils::address_range::start_length(m_vertex_constants_buffer_info.offset + byte_offset, byte_count);
|
||||
data_source = ®S(ctx)->transform_constants[index];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Indexed. This is a bit trickier. Use scratchpad to avoid UAF
|
||||
auto allocate_mem = [&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
scratchpad.resize(size);
|
||||
return { scratchpad.data(), size };
|
||||
};
|
||||
|
||||
rsx::io_buffer iobuf(allocate_mem);
|
||||
upload_transform_constants(iobuf);
|
||||
|
||||
ensure(iobuf.size() >= m_vertex_constants_buffer_info.range);
|
||||
data_range = utils::address_range::start_length(m_vertex_constants_buffer_info.offset, m_vertex_constants_buffer_info.range);
|
||||
data_source = iobuf.data();
|
||||
}
|
||||
|
||||
vk::insert_buffer_memory_barrier(
|
||||
*m_current_command_buffer,
|
||||
m_vertex_constants_buffer_info.buffer,
|
||||
data_range.start,
|
||||
data_range.length(),
|
||||
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
|
||||
vkCmdUpdateBuffer(
|
||||
*m_current_command_buffer,
|
||||
m_vertex_constants_buffer_info.buffer,
|
||||
data_range.start,
|
||||
data_range.length(),
|
||||
data_source);
|
||||
|
||||
vk::insert_buffer_memory_barrier(
|
||||
*m_current_command_buffer,
|
||||
m_vertex_constants_buffer_info.buffer,
|
||||
data_range.start,
|
||||
data_range.length(),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT);
|
||||
}
|
||||
|
||||
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool)
|
||||
|
@ -234,7 +234,7 @@ private:
|
||||
VkDescriptorSet allocate_descriptor_set();
|
||||
|
||||
vk::vertex_upload_info upload_vertex_data();
|
||||
rsx::simple_array<u8> vertex_scratchpad;
|
||||
rsx::simple_array<u8> scratchpad;
|
||||
|
||||
bool load_program();
|
||||
void load_program_env();
|
||||
@ -277,7 +277,7 @@ public:
|
||||
inline std::pair<volatile vk::host_data_t*, VkBuffer> map_host_object_data() { return { m_host_data_ptr, m_host_object_data->value }; }
|
||||
|
||||
// GRAPH backend
|
||||
void patch_transform_constants(rsx::context* ctx, u32 first_index, const std::span<u32>& data) override;
|
||||
void patch_transform_constants(rsx::context* ctx, u32 index, u32 count) override;
|
||||
|
||||
protected:
|
||||
void clear_surface(u32 mask) override;
|
||||
|
Loading…
x
Reference in New Issue
Block a user