mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-21 09:40:01 +00:00
rsx/vk: Implement batched transform constant updates
This commit is contained in:
parent
ac6f77a744
commit
a09111052a
@ -39,7 +39,7 @@ std::tuple<u32, u32, u32> write_index_array_data_to_buffer(std::span<std::byte>
|
|||||||
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, rsx::primitive_type draw_mode, unsigned count);
|
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, rsx::primitive_type draw_mode, unsigned count);
|
||||||
|
|
||||||
// Copy and swap data in 32-bit units
|
// Copy and swap data in 32-bit units
|
||||||
extern void(*const copy_data_swap_u32)(u32*, const u32*, u32);
|
extern void(*const copy_data_swap_u32)(u32* dst, const u32* src, u32 count);
|
||||||
|
|
||||||
// Copy and swap data in 32-bit units, return true if changed
|
// Copy and swap data in 32-bit units, return true if changed
|
||||||
extern bool(*const copy_data_swap_u32_cmp)(u32*, const u32*, u32);
|
extern bool(*const copy_data_swap_u32_cmp)(u32* dst, const u32* src, u32 count);
|
||||||
|
@ -15,7 +15,7 @@ namespace rsx
|
|||||||
// virtual void begin() = 0;
|
// virtual void begin() = 0;
|
||||||
// virtual void end() = 0;
|
// virtual void end() = 0;
|
||||||
|
|
||||||
// Patch transform constants
|
// Patch transform constants. Units are in 32x4 units
|
||||||
virtual void patch_transform_constants(context* ctx, u32 first_index, const std::span<u32>& data) {};
|
virtual void patch_transform_constants(context* /*ctx*/, u32 /*index*/, u32 /*count*/) {};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include "Emu/RSX/rsx_methods.h" // FIXME
|
#include "Emu/RSX/rsx_methods.h" // FIXME
|
||||||
#include "Emu/RSX/rsx_utils.h"
|
#include "Emu/RSX/rsx_utils.h"
|
||||||
|
#include "Emu/RSX/RSXThread.h"
|
||||||
#include "Emu/RSX/Common/BufferUtils.h"
|
#include "Emu/RSX/Common/BufferUtils.h"
|
||||||
#include "Emu/RSX/NV47/HW/context.h"
|
#include "Emu/RSX/NV47/HW/context.h"
|
||||||
#include "Emu/RSX/NV47/HW/nv4097.h"
|
#include "Emu/RSX/NV47/HW/nv4097.h"
|
||||||
@ -115,33 +116,45 @@ namespace rsx
|
|||||||
switch (barrier.type)
|
switch (barrier.type)
|
||||||
{
|
{
|
||||||
case primitive_restart_barrier:
|
case primitive_restart_barrier:
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case index_base_modifier_barrier:
|
case index_base_modifier_barrier:
|
||||||
|
{
|
||||||
// Change index base offset
|
// Change index base offset
|
||||||
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg0);
|
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg0);
|
||||||
result |= index_base_changed;
|
result |= index_base_changed;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case vertex_base_modifier_barrier:
|
case vertex_base_modifier_barrier:
|
||||||
|
{
|
||||||
// Change vertex base offset
|
// Change vertex base offset
|
||||||
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg0);
|
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg0);
|
||||||
result |= vertex_base_changed;
|
result |= vertex_base_changed;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case vertex_array_offset_modifier_barrier:
|
case vertex_array_offset_modifier_barrier:
|
||||||
|
{
|
||||||
// Change vertex array offset
|
// Change vertex array offset
|
||||||
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg0);
|
REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg0);
|
||||||
result |= vertex_arrays_changed;
|
result |= vertex_arrays_changed;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case transform_constant_load_modifier_barrier:
|
case transform_constant_load_modifier_barrier:
|
||||||
|
{
|
||||||
// Change the transform load target. Does not change result mask.
|
// Change the transform load target. Does not change result mask.
|
||||||
REGS(ctx)->decode(NV4097_SET_TRANSFORM_PROGRAM_LOAD, barrier.arg0);
|
REGS(ctx)->decode(NV4097_SET_TRANSFORM_PROGRAM_LOAD, barrier.arg0);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case transform_constant_update_barrier:
|
case transform_constant_update_barrier:
|
||||||
|
{
|
||||||
// Update transform constants
|
// Update transform constants
|
||||||
// REGS(ctx)->decode(NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg); // This statement technically does the right thing but has no consequence other than wasting perf.
|
auto ptr = RSX(ctx)->fifo_ctrl->translate_address(barrier.arg0);
|
||||||
// FIXME: Batching
|
auto buffer = std::span<const u32>(static_cast<const u32*>(vm::base(ptr)), barrier.arg1);
|
||||||
nv4097::set_transform_constant::decode_one(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg0);
|
nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer);
|
||||||
result |= transform_constants_changed;
|
result |= transform_constants_changed;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
fmt::throw_exception("Unreachable");
|
fmt::throw_exception("Unreachable");
|
||||||
}
|
}
|
||||||
|
@ -30,20 +30,22 @@ namespace rsx
|
|||||||
REGS(ctx)->transform_constants[load + constant_id][subreg] = arg;
|
REGS(ctx)->transform_constants[load + constant_id][subreg] = arg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span<const u32>& args)
|
||||||
|
{
|
||||||
|
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
|
||||||
|
const u32 constant_id = index / 4;
|
||||||
|
const u8 subreg = index % 4;
|
||||||
|
const u32 load = REGS(ctx)->transform_constant_load();
|
||||||
|
|
||||||
|
auto dst = ®S(ctx)->transform_constants[load + constant_id][subreg];
|
||||||
|
copy_data_swap_u32(dst, args.data(), ::size32(args));
|
||||||
|
|
||||||
|
const u32 last_constant_id = ((reg + ::size32(args) + 3) - NV4097_SET_TRANSFORM_CONSTANT) / 4; // Aligned div
|
||||||
|
RSX(ctx)->patch_transform_constants(ctx, load + constant_id, last_constant_id - constant_id);
|
||||||
|
}
|
||||||
|
|
||||||
void set_transform_constant::impl(context* ctx, u32 reg, u32 arg)
|
void set_transform_constant::impl(context* ctx, u32 reg, u32 arg)
|
||||||
{
|
{
|
||||||
if (RSX(ctx)->in_begin_end && !REGS(ctx)->current_draw_clause.empty())
|
|
||||||
{
|
|
||||||
// Updating constants mid-draw is messy. Push attr barrier.
|
|
||||||
REGS(ctx)->current_draw_clause.insert_command_barrier(
|
|
||||||
rsx::transform_constant_update_barrier,
|
|
||||||
arg,
|
|
||||||
0,
|
|
||||||
reg - NV4097_SET_TRANSFORM_CONSTANT
|
|
||||||
);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
|
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
|
||||||
const u32 constant_id = index / 4;
|
const u32 constant_id = index / 4;
|
||||||
const u8 subreg = index % 4;
|
const u8 subreg = index % 4;
|
||||||
@ -73,6 +75,20 @@ namespace rsx
|
|||||||
rcount = 0;
|
rcount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (RSX(ctx)->in_begin_end && !REGS(ctx)->current_draw_clause.empty())
|
||||||
|
{
|
||||||
|
// Updating constants mid-draw is messy. Defer the writes
|
||||||
|
REGS(ctx)->current_draw_clause.insert_command_barrier(
|
||||||
|
rsx::transform_constant_update_barrier,
|
||||||
|
RSX(ctx)->fifo_ctrl->get_pos(),
|
||||||
|
rcount,
|
||||||
|
reg - NV4097_SET_TRANSFORM_CONSTANT
|
||||||
|
);
|
||||||
|
|
||||||
|
RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const auto values = ®S(ctx)->transform_constants[load + constant_id][subreg];
|
const auto values = ®S(ctx)->transform_constants[load + constant_id][subreg];
|
||||||
|
|
||||||
const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr();
|
const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr();
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
#include "Emu/RSX/gcm_enums.h"
|
#include "Emu/RSX/gcm_enums.h"
|
||||||
#include "Emu/RSX/NV47/FW/draw_call.inc.h"
|
#include "Emu/RSX/NV47/FW/draw_call.inc.h"
|
||||||
|
|
||||||
|
#include <span>
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
enum command_barrier_type : u32;
|
enum command_barrier_type : u32;
|
||||||
@ -201,6 +203,8 @@ namespace rsx
|
|||||||
static void impl(context* ctx, u32 reg, u32 arg);
|
static void impl(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
static void decode_one(context* ctx, u32 reg, u32 arg);
|
static void decode_one(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
static void batch_decode(context* ctx, u32 reg, const std::span<const u32>& args);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct set_transform_program
|
struct set_transform_program
|
||||||
|
@ -29,6 +29,11 @@ namespace rsx
|
|||||||
m_iotable = &pctrl->iomap_table;
|
m_iotable = &pctrl->iomap_table;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 FIFO_control::translate_address(u32 address) const
|
||||||
|
{
|
||||||
|
return m_iotable->get_addr(address);
|
||||||
|
}
|
||||||
|
|
||||||
void FIFO_control::sync_get() const
|
void FIFO_control::sync_get() const
|
||||||
{
|
{
|
||||||
m_ctrl->get.release(m_internal_get);
|
m_ctrl->get.release(m_internal_get);
|
||||||
|
@ -151,6 +151,8 @@ namespace rsx
|
|||||||
FIFO_control(rsx::thread* pctrl);
|
FIFO_control(rsx::thread* pctrl);
|
||||||
~FIFO_control() = default;
|
~FIFO_control() = default;
|
||||||
|
|
||||||
|
u32 translate_address(u32 addr) const;
|
||||||
|
|
||||||
std::pair<bool, u32> fetch_u32(u32 addr);
|
std::pair<bool, u32> fetch_u32(u32 addr);
|
||||||
void invalidate_cache() { m_cache_size = 0; }
|
void invalidate_cache() { m_cache_size = 0; }
|
||||||
|
|
||||||
|
@ -732,43 +732,6 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state_flags & rsx::transform_constants_changed)
|
|
||||||
{
|
|
||||||
auto allocate_mem = [&](usz size) -> std::pair<void*, usz>
|
|
||||||
{
|
|
||||||
vertex_scratchpad.resize(size);
|
|
||||||
return { vertex_scratchpad.data(), size };
|
|
||||||
};
|
|
||||||
|
|
||||||
rsx::io_buffer iobuf(allocate_mem);
|
|
||||||
upload_transform_constants(iobuf);
|
|
||||||
|
|
||||||
ensure(iobuf.size() >= m_vertex_constants_buffer_info.range);
|
|
||||||
|
|
||||||
vk::insert_buffer_memory_barrier(
|
|
||||||
*m_current_command_buffer,
|
|
||||||
m_vertex_constants_buffer_info.buffer,
|
|
||||||
m_vertex_constants_buffer_info.offset,
|
|
||||||
m_vertex_constants_buffer_info.range,
|
|
||||||
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
||||||
VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
|
||||||
|
|
||||||
vkCmdUpdateBuffer(
|
|
||||||
*m_current_command_buffer,
|
|
||||||
m_vertex_constants_buffer_info.buffer,
|
|
||||||
m_vertex_constants_buffer_info.offset,
|
|
||||||
m_vertex_constants_buffer_info.range,
|
|
||||||
iobuf.data());
|
|
||||||
|
|
||||||
vk::insert_buffer_memory_barrier(
|
|
||||||
*m_current_command_buffer,
|
|
||||||
m_vertex_constants_buffer_info.buffer,
|
|
||||||
m_vertex_constants_buffer_info.offset,
|
|
||||||
m_vertex_constants_buffer_info.range,
|
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
|
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((state_flags & vertex_state_mask) && !m_vertex_layout.validate())
|
if ((state_flags & vertex_state_mask) && !m_vertex_layout.validate())
|
||||||
{
|
{
|
||||||
// No vertex inputs enabled
|
// No vertex inputs enabled
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "vkutils/scratch.h"
|
#include "vkutils/scratch.h"
|
||||||
|
|
||||||
#include "Emu/RSX/rsx_methods.h"
|
#include "Emu/RSX/rsx_methods.h"
|
||||||
|
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||||
#include "Emu/Memory/vm_locking.h"
|
#include "Emu/Memory/vm_locking.h"
|
||||||
|
|
||||||
#include "../Program/program_state_cache2.hpp"
|
#include "../Program/program_state_cache2.hpp"
|
||||||
@ -2354,9 +2355,60 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
|
|||||||
m_vertex_layout_ring_info.unmap();
|
m_vertex_layout_ring_info.unmap();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 first_index, const std::span<u32>& data)
|
void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count)
|
||||||
{
|
{
|
||||||
|
// Hot-patching transform constants mid-draw (instanced draw)
|
||||||
|
utils::address_range data_range;
|
||||||
|
void* data_source = nullptr;
|
||||||
|
|
||||||
|
if (!m_vertex_prog || m_vertex_prog->has_indexed_constants)
|
||||||
|
{
|
||||||
|
// We're working with a full range. We can do a direct patch in this case since no index translation is required.
|
||||||
|
const auto byte_count = count * 16;
|
||||||
|
const auto byte_offset = index * 16;
|
||||||
|
|
||||||
|
data_range = utils::address_range::start_length(m_vertex_constants_buffer_info.offset + byte_offset, byte_count);
|
||||||
|
data_source = ®S(ctx)->transform_constants[index];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Indexed. This is a bit trickier. Use scratchpad to avoid UAF
|
||||||
|
auto allocate_mem = [&](usz size) -> std::pair<void*, usz>
|
||||||
|
{
|
||||||
|
scratchpad.resize(size);
|
||||||
|
return { scratchpad.data(), size };
|
||||||
|
};
|
||||||
|
|
||||||
|
rsx::io_buffer iobuf(allocate_mem);
|
||||||
|
upload_transform_constants(iobuf);
|
||||||
|
|
||||||
|
ensure(iobuf.size() >= m_vertex_constants_buffer_info.range);
|
||||||
|
data_range = utils::address_range::start_length(m_vertex_constants_buffer_info.offset, m_vertex_constants_buffer_info.range);
|
||||||
|
data_source = iobuf.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(
|
||||||
|
*m_current_command_buffer,
|
||||||
|
m_vertex_constants_buffer_info.buffer,
|
||||||
|
data_range.start,
|
||||||
|
data_range.length(),
|
||||||
|
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||||
|
|
||||||
|
vkCmdUpdateBuffer(
|
||||||
|
*m_current_command_buffer,
|
||||||
|
m_vertex_constants_buffer_info.buffer,
|
||||||
|
data_range.start,
|
||||||
|
data_range.length(),
|
||||||
|
data_source);
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(
|
||||||
|
*m_current_command_buffer,
|
||||||
|
m_vertex_constants_buffer_info.buffer,
|
||||||
|
data_range.start,
|
||||||
|
data_range.length(),
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
|
||||||
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool)
|
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool)
|
||||||
|
@ -234,7 +234,7 @@ private:
|
|||||||
VkDescriptorSet allocate_descriptor_set();
|
VkDescriptorSet allocate_descriptor_set();
|
||||||
|
|
||||||
vk::vertex_upload_info upload_vertex_data();
|
vk::vertex_upload_info upload_vertex_data();
|
||||||
rsx::simple_array<u8> vertex_scratchpad;
|
rsx::simple_array<u8> scratchpad;
|
||||||
|
|
||||||
bool load_program();
|
bool load_program();
|
||||||
void load_program_env();
|
void load_program_env();
|
||||||
@ -277,7 +277,7 @@ public:
|
|||||||
inline std::pair<volatile vk::host_data_t*, VkBuffer> map_host_object_data() { return { m_host_data_ptr, m_host_object_data->value }; }
|
inline std::pair<volatile vk::host_data_t*, VkBuffer> map_host_object_data() { return { m_host_data_ptr, m_host_object_data->value }; }
|
||||||
|
|
||||||
// GRAPH backend
|
// GRAPH backend
|
||||||
void patch_transform_constants(rsx::context* ctx, u32 first_index, const std::span<u32>& data) override;
|
void patch_transform_constants(rsx::context* ctx, u32 index, u32 count) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void clear_surface(u32 mask) override;
|
void clear_surface(u32 mask) override;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user