mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-14 10:21:21 +00:00
vk: Add support for hardware instanced draws
This commit is contained in:
parent
d4573e233a
commit
15961b353a
@ -3,6 +3,8 @@
|
||||
|
||||
#include "Emu/RSX/Common/BufferUtils.h"
|
||||
#include "Emu/RSX/Common/buffer_stream.hpp"
|
||||
#include "Emu/RSX/Common/io_buffer.h"
|
||||
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||
#include "Emu/RSX/Program/GLSLCommon.h"
|
||||
#include "Emu/RSX/rsx_methods.h"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
@ -244,7 +246,7 @@ namespace rsx
|
||||
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||
const auto vertex_id = m_vertex_push_buffers[0].get_vertex_id();
|
||||
m_vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
|
||||
m_thread->m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
|
||||
RSX(m_ctx)->m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
|
||||
}
|
||||
|
||||
u32 draw_command_processor::get_push_buffer_vertex_count() const
|
||||
@ -268,7 +270,7 @@ namespace rsx
|
||||
|
||||
void draw_command_processor::clear_push_buffers()
|
||||
{
|
||||
auto& graphics_state = m_thread->m_graphics_state;
|
||||
auto& graphics_state = RSX(m_ctx)->m_graphics_state;
|
||||
if (graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
|
||||
{
|
||||
for (auto& push_buf : m_vertex_push_buffers)
|
||||
@ -631,7 +633,7 @@ namespace rsx
|
||||
* Fill buffer with vertex program constants.
|
||||
* Buffer must be at least 512 float4 wide.
|
||||
*/
|
||||
void draw_command_processor::fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table)
|
||||
void draw_command_processor::fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table) const
|
||||
{
|
||||
if (!reloc_table.empty()) [[ likely ]]
|
||||
{
|
||||
@ -648,7 +650,7 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/)
|
||||
void draw_command_processor::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/) const
|
||||
{
|
||||
ROP_control_t rop_control{};
|
||||
|
||||
@ -664,7 +666,7 @@ namespace rsx
|
||||
rop_control.enable_polygon_stipple();
|
||||
}
|
||||
|
||||
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !m_thread->get_backend_config().supports_hw_a2c)
|
||||
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !RSX(m_ctx)->get_backend_config().supports_hw_a2c)
|
||||
{
|
||||
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
|
||||
// Alpha values generate a coverage mask for order independent blending
|
||||
@ -731,4 +733,111 @@ namespace rsx
|
||||
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control.value, std::bit_cast<u32>(alpha_ref));
|
||||
utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
|
||||
}
|
||||
|
||||
#pragma optimize("", off)
|
||||
|
||||
void draw_command_processor::fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const
|
||||
{
|
||||
auto& draw_call = rsx::method_registers.current_draw_clause;
|
||||
|
||||
// Only call this for instanced draws!
|
||||
ensure(draw_call.is_trivial_instanced_draw);
|
||||
|
||||
// Temp indirection table. Used to track "running" updates.
|
||||
std::vector<u32> instancing_indirection_table;
|
||||
// indirection table size
|
||||
const auto redirection_table_size = prog.has_indexed_constants ? 468u : ::size32(prog.constant_ids);
|
||||
|
||||
// Temp constants data
|
||||
std::vector<u128> constants_data;
|
||||
constants_data.reserve(redirection_table_size * draw_call.pass_count());
|
||||
|
||||
// Allocate indirection buffer on GPU stream
|
||||
indirection_table_buf.reserve(redirection_table_size * draw_call.pass_count() * sizeof(u32));
|
||||
auto indirection_out = indirection_table_buf.data<u32>();
|
||||
|
||||
rsx::instanced_draw_config_t instance_config;
|
||||
u32 indirection_table_offset = 0;
|
||||
|
||||
// We now replay the draw call here to pack the data.
|
||||
draw_call.begin();
|
||||
|
||||
// Write initial draw data.
|
||||
instancing_indirection_table.resize(redirection_table_size);
|
||||
std::iota(instancing_indirection_table.begin(), instancing_indirection_table.end(), 0);
|
||||
|
||||
constants_data.resize(redirection_table_size);
|
||||
fill_vertex_program_constants_data(constants_data.data(), prog.constant_ids);
|
||||
|
||||
// Next draw. We're guaranteed more than one draw call by the caller.
|
||||
draw_call.next();
|
||||
|
||||
do
|
||||
{
|
||||
// Write previous state
|
||||
std::memcpy(indirection_out + indirection_table_offset, instancing_indirection_table.data(), instancing_indirection_table.size() * sizeof(u32));
|
||||
indirection_table_offset += redirection_table_size;
|
||||
|
||||
// Decode next draw state
|
||||
instance_config = {};
|
||||
draw_call.execute_pipeline_dependencies(m_ctx, &instance_config);
|
||||
|
||||
if (!instance_config.transform_constants_data_changed)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool do_full_reload = prog.has_indexed_constants;
|
||||
if (do_full_reload)
|
||||
{
|
||||
const u32 redirection_loc = ::size32(constants_data);
|
||||
constants_data.resize(redirection_loc + redirection_table_size);
|
||||
fill_vertex_program_constants_data(constants_data.data() + redirection_loc, prog.constant_ids);
|
||||
|
||||
std::iota(instancing_indirection_table.begin(), instancing_indirection_table.end(), redirection_loc);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto xform_id = prog.TranslateConstantsRange(instance_config.patch_load_offset, instance_config.patch_load_count); xform_id >= 0)
|
||||
{
|
||||
// Trivially patchable in bulk
|
||||
const u32 redirection_loc = ::size32(constants_data);
|
||||
constants_data.resize(::size32(constants_data) + instance_config.patch_load_count);
|
||||
std::memcpy(constants_data.data() + redirection_loc, ®S(m_ctx)->transform_constants[instance_config.patch_load_offset], instance_config.patch_load_count * sizeof(u128));
|
||||
|
||||
// Update indirection table
|
||||
for (auto i = xform_id, count = 0;
|
||||
static_cast<u32>(count) < instance_config.patch_load_count;
|
||||
++i, ++count)
|
||||
{
|
||||
instancing_indirection_table[i] = redirection_loc + count;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sparse. Update records individually instead of bulk
|
||||
const auto load_end = instance_config.patch_load_offset + instance_config.patch_load_count;
|
||||
for (u32 i = 0; i < redirection_table_size; ++i)
|
||||
{
|
||||
const auto read_index = prog.constant_ids[i];
|
||||
if (read_index < instance_config.patch_load_offset || read_index >= load_end)
|
||||
{
|
||||
// Reading outside "hot" range.
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 redirection_loc = ::size32(constants_data);
|
||||
constants_data.resize(::size32(constants_data) + 1);
|
||||
std::memcpy(constants_data.data() + redirection_loc, ®S(m_ctx)->transform_constants[read_index], sizeof(u128));
|
||||
|
||||
instancing_indirection_table[i] = redirection_loc;
|
||||
}
|
||||
|
||||
} while (draw_call.next());
|
||||
|
||||
// Now write the constants to the GPU buffer
|
||||
constants_data_array_buffer.reserve(constants_data.size());
|
||||
std::memcpy(constants_data_array_buffer.data(), constants_data.data(), constants_data.size() * sizeof(u128));
|
||||
}
|
||||
}
|
||||
|
@ -13,13 +13,14 @@
|
||||
namespace rsx
|
||||
{
|
||||
struct rsx_state;
|
||||
class thread;
|
||||
struct context;
|
||||
class io_buffer;
|
||||
|
||||
class draw_command_processor
|
||||
{
|
||||
using vertex_program_metadata_t = program_hash_util::vertex_program_utils::vertex_program_metadata;
|
||||
|
||||
thread* m_thread = nullptr;
|
||||
context* m_ctx = nullptr;
|
||||
|
||||
protected:
|
||||
friend class thread;
|
||||
@ -30,9 +31,9 @@ namespace rsx
|
||||
public:
|
||||
draw_command_processor() = default;
|
||||
|
||||
void init(thread* rsxthr)
|
||||
void init(context* ctx)
|
||||
{
|
||||
m_thread = rsxthr;
|
||||
m_ctx = ctx;
|
||||
}
|
||||
|
||||
// Analyze vertex inputs and group all interleaved blocks
|
||||
@ -94,12 +95,16 @@ namespace rsx
|
||||
* Fill buffer with vertex program constants.
|
||||
* Relocation table allows to do a partial fill with only selected registers.
|
||||
*/
|
||||
void fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table);
|
||||
void fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with fragment rasterization state.
|
||||
* Fills current fog values, alpha test parameters and texture scaling parameters
|
||||
*/
|
||||
void fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& fragment_program);
|
||||
void fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& fragment_program) const;
|
||||
|
||||
// Fill instancing buffers. A single iobuf is used for both. 256byte alignment enforced to allow global bind
|
||||
// Returns offsets to the index redirection lookup table and constants field array
|
||||
void fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const;
|
||||
};
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
enum pipeline_state : u32
|
||||
enum pipeline_state : u32
|
||||
{
|
||||
fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed
|
||||
vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed
|
||||
|
@ -91,7 +91,7 @@ namespace rsx
|
||||
|
||||
bool draw_clause::check_trivially_instanced() const
|
||||
{
|
||||
if (draw_command_ranges.size() <= 1)
|
||||
if (pass_count() <= 1)
|
||||
{
|
||||
// Cannot instance one draw call or less
|
||||
return false;
|
||||
@ -145,7 +145,7 @@ namespace rsx
|
||||
is_disjoint_primitive = is_primitive_disjointed(primitive);
|
||||
}
|
||||
|
||||
u32 draw_clause::execute_pipeline_dependencies(context* ctx) const
|
||||
u32 draw_clause::execute_pipeline_dependencies(context* ctx, instanced_draw_config_t* instance_config) const
|
||||
{
|
||||
u32 result = 0u;
|
||||
for (;
|
||||
@ -191,7 +191,20 @@ namespace rsx
|
||||
// Update transform constants
|
||||
auto ptr = RSX(ctx)->fifo_ctrl->translate_address(barrier.arg0);
|
||||
auto buffer = std::span<const u32>(static_cast<const u32*>(vm::base(ptr)), barrier.arg1);
|
||||
nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer);
|
||||
auto notify = [&](rsx::context*, u32 load, u32 count)
|
||||
{
|
||||
if (!instance_config)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
instance_config->transform_constants_data_changed = true;
|
||||
instance_config->patch_load_offset = load;
|
||||
instance_config->patch_load_count = count;
|
||||
return true;
|
||||
};
|
||||
|
||||
nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer, notify);
|
||||
result |= transform_constants_changed;
|
||||
break;
|
||||
}
|
||||
|
@ -7,6 +7,14 @@
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct instanced_draw_config_t
|
||||
{
|
||||
bool transform_constants_data_changed;
|
||||
|
||||
u32 patch_load_offset;
|
||||
u32 patch_load_count;
|
||||
};
|
||||
|
||||
class draw_clause
|
||||
{
|
||||
// Stores the first and count argument from draw/draw indexed parameters between begin/end clauses.
|
||||
@ -272,7 +280,7 @@ namespace rsx
|
||||
/**
|
||||
* Executes commands reqiured to make the current draw state valid
|
||||
*/
|
||||
u32 execute_pipeline_dependencies(struct context* ctx) const;
|
||||
u32 execute_pipeline_dependencies(struct context* ctx, instanced_draw_config_t* instance_config = nullptr) const;
|
||||
|
||||
const draw_range_t& get_range() const
|
||||
{
|
||||
|
@ -30,7 +30,7 @@ namespace rsx
|
||||
REGS(ctx)->transform_constants[load + constant_id][subreg] = arg;
|
||||
}
|
||||
|
||||
void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span<const u32>& args)
|
||||
void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span<const u32>& args, const std::function<bool(context*, u32, u32)>& notify)
|
||||
{
|
||||
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
|
||||
const u32 constant_id = index / 4;
|
||||
@ -40,8 +40,15 @@ namespace rsx
|
||||
auto dst = ®S(ctx)->transform_constants[load + constant_id][subreg];
|
||||
copy_data_swap_u32(dst, args.data(), ::size32(args));
|
||||
|
||||
// Notify
|
||||
const u32 last_constant_id = ((reg + ::size32(args) + 3) - NV4097_SET_TRANSFORM_CONSTANT) / 4; // Aligned div
|
||||
RSX(ctx)->patch_transform_constants(ctx, load + constant_id, last_constant_id - constant_id);
|
||||
const u32 load_index = load + constant_id;
|
||||
const u32 load_count = last_constant_id - constant_id;
|
||||
|
||||
if (!notify || !notify(ctx, load_index, load_count))
|
||||
{
|
||||
RSX(ctx)->patch_transform_constants(ctx, load_index, load_count);
|
||||
}
|
||||
}
|
||||
|
||||
void set_transform_constant::impl(context* ctx, u32 reg, [[maybe_unused]] u32 arg)
|
||||
|
@ -204,7 +204,7 @@ namespace rsx
|
||||
|
||||
static void decode_one(context* ctx, u32 reg, u32 arg);
|
||||
|
||||
static void batch_decode(context* ctx, u32 reg, const std::span<const u32>& args);
|
||||
static void batch_decode(context* ctx, u32 reg, const std::span<const u32>& args, const std::function<bool(context*, u32, u32)>& notify = {});
|
||||
};
|
||||
|
||||
struct set_transform_program
|
||||
|
@ -56,10 +56,17 @@ vec4 apply_zclip_xform(
|
||||
#endif
|
||||
|
||||
#if defined(_ENABLE_INSTANCED_CONSTANTS)
|
||||
// Workaround for GL vs VK builtin variable naming
|
||||
#ifdef VULKAN
|
||||
#define _gl_InstanceID gl_InstanceIndex
|
||||
#else
|
||||
#define _gl_InstanceID gl_InstanceID
|
||||
#endif
|
||||
|
||||
vec4 _fetch_constant(const in int base_offset)
|
||||
{
|
||||
// Get virtual draw/instance id. Normally will be 1:1 based on instance index
|
||||
const int indirection_offset = (gl_InstanceID * CONSTANTS_ARRAY_LENGTH) + base_offset;
|
||||
const int indirection_offset = (_gl_InstanceID * CONSTANTS_ARRAY_LENGTH) + base_offset;
|
||||
const int corrected_offset = constants_addressing_lookup[indirection_offset];
|
||||
return instanced_constants_array[corrected_offset];
|
||||
}
|
||||
|
@ -690,7 +690,6 @@ namespace rsx
|
||||
m_vertex_textures_dirty.fill(true);
|
||||
|
||||
m_graphics_state |= pipeline_state::all_dirty;
|
||||
m_draw_processor.init(this);
|
||||
|
||||
g_user_asked_for_frame_capture = false;
|
||||
|
||||
@ -698,6 +697,8 @@ namespace rsx
|
||||
s_ctx.rsxthr = this;
|
||||
m_ctx = &s_ctx;
|
||||
|
||||
m_draw_processor.init(m_ctx);
|
||||
|
||||
if (g_cfg.misc.use_native_interface && (g_cfg.video.renderer == video_renderer::opengl || g_cfg.video.renderer == video_renderer::vulkan))
|
||||
{
|
||||
m_overlay_manager = g_fxo->init<rsx::overlays::display_manager>(0);
|
||||
|
@ -929,7 +929,11 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (draw_call.is_single_draw())
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
{
|
||||
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, draw_call.pass_count(), 0, 0);
|
||||
}
|
||||
else if (draw_call.is_single_draw())
|
||||
{
|
||||
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
|
||||
}
|
||||
@ -951,10 +955,13 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||
|
||||
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
{
|
||||
const u32 index_count = upload_info.vertex_draw_count;
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, draw_call.pass_count(), 0, 0, 0);
|
||||
}
|
||||
else if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1052,7 +1059,10 @@ void VKGSRender::end()
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
// Apply write memory barriers
|
||||
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) ds->write_barrier(*m_current_command_buffer);
|
||||
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil))
|
||||
{
|
||||
ds->write_barrier(*m_current_command_buffer);
|
||||
}
|
||||
|
||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||
{
|
||||
@ -1111,12 +1121,19 @@ void VKGSRender::end()
|
||||
m_current_command_buffer->flags |= vk::command_buffer::cb_reload_dynamic_state;
|
||||
}
|
||||
|
||||
rsx::method_registers.current_draw_clause.begin();
|
||||
auto& draw_call = rsx::method_registers.current_draw_clause;
|
||||
draw_call.begin();
|
||||
do
|
||||
{
|
||||
emit_geometry(sub_index++);
|
||||
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
{
|
||||
// We already completed. End the draw.
|
||||
draw_call.end();
|
||||
}
|
||||
}
|
||||
while (rsx::method_registers.current_draw_clause.next());
|
||||
while (draw_call.next());
|
||||
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render)
|
||||
{
|
||||
|
@ -477,6 +477,22 @@ namespace
|
||||
|
||||
idx++;
|
||||
|
||||
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[idx].descriptorCount = 1;
|
||||
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[idx].binding = binding_table.instancing_lookup_table_bind_slot;
|
||||
bindings[idx].pImmutableSamplers = nullptr;
|
||||
|
||||
idx++;
|
||||
|
||||
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[idx].descriptorCount = 1;
|
||||
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[idx].binding = binding_table.instancing_constants_buffer_slot;
|
||||
bindings[idx].pImmutableSamplers = nullptr;
|
||||
|
||||
idx++;
|
||||
|
||||
for (auto binding = binding_table.textures_first_bind_slot;
|
||||
binding < binding_table.vertex_textures_first_bind_slot;
|
||||
binding++)
|
||||
@ -643,7 +659,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
|
||||
{ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) },
|
||||
|
||||
// Conditional rendering predicate slot; refactor to allow skipping this when not needed
|
||||
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 }
|
||||
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 }
|
||||
};
|
||||
m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls);
|
||||
|
||||
@ -661,6 +677,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
|
||||
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
|
||||
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
|
||||
m_raster_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "raster env buffer");
|
||||
m_instancing_buffer_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "instancing data buffer");
|
||||
|
||||
const auto shadermode = g_cfg.video.shadermode.get();
|
||||
|
||||
@ -949,6 +966,7 @@ VKGSRender::~VKGSRender()
|
||||
m_vertex_instructions_buffer.destroy();
|
||||
m_fragment_instructions_buffer.destroy();
|
||||
m_raster_env_ring_info.destroy();
|
||||
m_instancing_buffer_ring_info.destroy();
|
||||
|
||||
// Fallback bindables
|
||||
null_buffer.reset();
|
||||
@ -1286,7 +1304,8 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||
m_fragment_constants_ring_info.is_critical() ||
|
||||
m_transform_constants_ring_info.is_critical() ||
|
||||
m_index_buffer_ring_info.is_critical() ||
|
||||
m_raster_env_ring_info.is_critical();
|
||||
m_raster_env_ring_info.is_critical() ||
|
||||
m_instancing_buffer_ring_info.is_critical();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1318,7 +1337,9 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||
heap_critical = m_vertex_layout_ring_info.is_critical();
|
||||
break;
|
||||
case VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE:
|
||||
heap_critical = m_transform_constants_ring_info.is_critical();
|
||||
heap_critical = (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)
|
||||
? m_instancing_buffer_ring_info.is_critical()
|
||||
: m_transform_constants_ring_info.is_critical();
|
||||
break;
|
||||
case VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE:
|
||||
heap_critical = m_fragment_constants_ring_info.is_critical();
|
||||
@ -1361,6 +1382,7 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||
m_attrib_ring_info.reset_allocation_stats();
|
||||
m_texture_upload_buffer_ring_info.reset_allocation_stats();
|
||||
m_raster_env_ring_info.reset_allocation_stats();
|
||||
m_instancing_buffer_ring_info.reset_allocation_stats();
|
||||
m_current_frame->reset_heap_ptrs();
|
||||
m_last_heap_sync_time = rsx::get_shared_tag();
|
||||
}
|
||||
@ -2130,6 +2152,7 @@ void VKGSRender::load_program_env()
|
||||
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
|
||||
const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program));
|
||||
const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty));
|
||||
const bool update_instancing_data = rsx::method_registers.current_draw_clause.is_trivial_instanced_draw;
|
||||
|
||||
if (update_vertex_env)
|
||||
{
|
||||
@ -2150,7 +2173,32 @@ void VKGSRender::load_program_env()
|
||||
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 144 };
|
||||
}
|
||||
|
||||
if (update_transform_constants)
|
||||
if (update_instancing_data)
|
||||
{
|
||||
// Combines transform load + instancing lookup table
|
||||
const auto alignment = m_device->gpu().get_limits().minStorageBufferOffsetAlignment;
|
||||
usz indirection_table_offset = 0;
|
||||
usz constants_data_table_offset = 0;
|
||||
|
||||
rsx::io_buffer indirection_table_buf([&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
indirection_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
|
||||
return std::make_pair(m_instancing_buffer_ring_info.map(indirection_table_offset, size), size);
|
||||
});
|
||||
|
||||
rsx::io_buffer constants_array_buf([&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
constants_data_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
|
||||
return std::make_pair(m_instancing_buffer_ring_info.map(constants_data_table_offset, size), size);
|
||||
});
|
||||
|
||||
m_draw_processor.fill_constants_instancing_buffer(indirection_table_buf, constants_array_buf, *m_vertex_prog);
|
||||
m_instancing_buffer_ring_info.unmap();
|
||||
|
||||
m_instancing_indirection_buffer_info = { m_instancing_buffer_ring_info.heap->value, indirection_table_offset, indirection_table_buf.size() };
|
||||
m_instancing_constants_array_buffer_info = { m_instancing_buffer_ring_info.heap->value, constants_data_table_offset, constants_array_buf.size() };
|
||||
}
|
||||
else if (update_transform_constants)
|
||||
{
|
||||
// Transform constants
|
||||
usz mem_offset = 0;
|
||||
@ -2295,13 +2343,24 @@ void VKGSRender::load_program_env()
|
||||
m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)
|
||||
{
|
||||
m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
||||
m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
// Clear flags
|
||||
m_graphics_state.clear(
|
||||
rsx::pipeline_state::fragment_state_dirty |
|
||||
u32 handled_flags = rsx::pipeline_state::fragment_state_dirty |
|
||||
rsx::pipeline_state::vertex_state_dirty |
|
||||
rsx::pipeline_state::transform_constants_dirty |
|
||||
rsx::pipeline_state::fragment_constants_dirty |
|
||||
rsx::pipeline_state::fragment_texture_state_dirty);
|
||||
rsx::pipeline_state::fragment_texture_state_dirty;
|
||||
|
||||
if (!update_instancing_data)
|
||||
{
|
||||
handled_flags |= rsx::pipeline_state::transform_constants_dirty;
|
||||
}
|
||||
|
||||
m_graphics_state.clear(handled_flags);
|
||||
}
|
||||
|
||||
void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
|
||||
@ -2488,7 +2547,8 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||
m_index_buffer_ring_info.is_dirty() ||
|
||||
m_transform_constants_ring_info.is_dirty() ||
|
||||
m_texture_upload_buffer_ring_info.is_dirty() ||
|
||||
m_raster_env_ring_info.is_dirty())
|
||||
m_raster_env_ring_info.is_dirty() ||
|
||||
m_instancing_buffer_ring_info.is_dirty())
|
||||
{
|
||||
auto secondary_command_buffer = m_secondary_cb_list.next();
|
||||
secondary_command_buffer->begin();
|
||||
@ -2503,6 +2563,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||
m_transform_constants_ring_info.sync(*secondary_command_buffer);
|
||||
m_texture_upload_buffer_ring_info.sync(*secondary_command_buffer);
|
||||
m_raster_env_ring_info.sync(*secondary_command_buffer);
|
||||
m_instancing_buffer_ring_info.sync(*secondary_command_buffer);
|
||||
|
||||
secondary_command_buffer->end();
|
||||
|
||||
|
@ -149,6 +149,7 @@ private:
|
||||
vk::data_heap m_index_buffer_ring_info; // Index data
|
||||
vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap
|
||||
vk::data_heap m_raster_env_ring_info; // Raster control such as polygon and line stipple
|
||||
vk::data_heap m_instancing_buffer_ring_info; // Instanced rendering data (constants indirection table + instanced constants)
|
||||
|
||||
vk::data_heap m_fragment_instructions_buffer;
|
||||
vk::data_heap m_vertex_instructions_buffer;
|
||||
@ -160,6 +161,8 @@ private:
|
||||
VkDescriptorBufferInfo m_fragment_constants_buffer_info {};
|
||||
VkDescriptorBufferInfo m_fragment_texture_params_buffer_info {};
|
||||
VkDescriptorBufferInfo m_raster_env_buffer_info {};
|
||||
VkDescriptorBufferInfo m_instancing_indirection_buffer_info {};
|
||||
VkDescriptorBufferInfo m_instancing_constants_array_buffer_info{};
|
||||
|
||||
VkDescriptorBufferInfo m_vertex_instructions_buffer_info {};
|
||||
VkDescriptorBufferInfo m_fragment_instructions_buffer_info {};
|
||||
|
@ -197,6 +197,7 @@ namespace vk
|
||||
s64 index_heap_ptr = 0;
|
||||
s64 texture_upload_heap_ptr = 0;
|
||||
s64 rasterizer_env_heap_ptr = 0;
|
||||
s64 instancing_heap_ptr = 0;
|
||||
|
||||
u64 last_frame_sync_time = 0;
|
||||
|
||||
@ -218,6 +219,7 @@ namespace vk
|
||||
index_heap_ptr = other.index_heap_ptr;
|
||||
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
|
||||
rasterizer_env_heap_ptr = other.rasterizer_env_heap_ptr;
|
||||
instancing_heap_ptr = other.instancing_heap_ptr;
|
||||
}
|
||||
|
||||
// Exchange storage (non-copyable)
|
||||
@ -229,7 +231,7 @@ namespace vk
|
||||
void tag_frame_end(
|
||||
s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc,
|
||||
s64 fragtex_loc, s64 fragconst_loc, s64 vtxconst_loc, s64 index_loc,
|
||||
s64 texture_loc, s64 rasterizer_loc)
|
||||
s64 texture_loc, s64 rasterizer_loc, s64 instancing_loc)
|
||||
{
|
||||
attrib_heap_ptr = attrib_loc;
|
||||
vtx_env_heap_ptr = vtxenv_loc;
|
||||
@ -241,6 +243,7 @@ namespace vk
|
||||
index_heap_ptr = index_loc;
|
||||
texture_upload_heap_ptr = texture_loc;
|
||||
rasterizer_env_heap_ptr = rasterizer_loc;
|
||||
instancing_heap_ptr = instancing_loc;
|
||||
|
||||
last_frame_sync_time = rsx::get_shared_tag();
|
||||
}
|
||||
|
@ -163,7 +163,8 @@ void VKGSRender::advance_queued_frames()
|
||||
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
|
||||
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
|
||||
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(),
|
||||
m_raster_env_ring_info.get_current_put_pos_minus_one());
|
||||
m_raster_env_ring_info.get_current_put_pos_minus_one(),
|
||||
m_instancing_buffer_ring_info.get_current_put_pos_minus_one());
|
||||
|
||||
m_queued_frames.push_back(m_current_frame);
|
||||
ensure(m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES);
|
||||
@ -266,6 +267,8 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx)
|
||||
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
|
||||
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
|
||||
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
|
||||
m_raster_env_ring_info.m_get_pos = ctx->rasterizer_env_heap_ptr;
|
||||
m_instancing_buffer_ring_info.m_get_pos = ctx->instancing_heap_ptr;
|
||||
|
||||
m_attrib_ring_info.notify();
|
||||
m_vertex_env_ring_info.notify();
|
||||
@ -276,6 +279,8 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx)
|
||||
m_fragment_texture_params_ring_info.notify();
|
||||
m_index_buffer_ring_info.notify();
|
||||
m_texture_upload_buffer_ring_info.notify();
|
||||
m_raster_env_ring_info.notify();
|
||||
m_instancing_buffer_ring_info.notify();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -32,31 +32,34 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
||||
OS << "#version 450\n\n";
|
||||
OS << "#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||
|
||||
OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " mat4 scale_offset_mat;\n";
|
||||
OS << " ivec4 user_clip_enabled[2];\n";
|
||||
OS << " vec4 user_clip_factor[2];\n";
|
||||
OS << " uint transform_branch_bits;\n";
|
||||
OS << " float point_size;\n";
|
||||
OS << " float z_near;\n";
|
||||
OS << " float z_far;\n";
|
||||
OS << "};\n\n";
|
||||
OS <<
|
||||
"layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n"
|
||||
"{\n"
|
||||
" mat4 scale_offset_mat;\n"
|
||||
" ivec4 user_clip_enabled[2];\n"
|
||||
" vec4 user_clip_factor[2];\n"
|
||||
" uint transform_branch_bits;\n"
|
||||
" float point_size;\n"
|
||||
" float z_near;\n"
|
||||
" float z_far;\n"
|
||||
"};\n\n";
|
||||
|
||||
if (m_device_props.emulate_conditional_rendering)
|
||||
{
|
||||
OS << "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n";
|
||||
OS << "{\n";
|
||||
OS << " uint conditional_rendering_predicate;\n";
|
||||
OS << "};\n\n";
|
||||
OS <<
|
||||
"layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n"
|
||||
"{\n"
|
||||
" uint conditional_rendering_predicate;\n"
|
||||
"};\n\n";
|
||||
}
|
||||
|
||||
OS << "layout(push_constant) uniform VertexLayoutBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " uint vertex_base_index;\n";
|
||||
OS << " uint vertex_index_offset;\n";
|
||||
OS << " uint draw_id;\n";
|
||||
OS << " uint layout_ptr_offset;\n";
|
||||
OS <<
|
||||
"layout(push_constant) uniform VertexLayoutBuffer\n"
|
||||
"{\n"
|
||||
" uint vertex_base_index;\n"
|
||||
" uint vertex_index_offset;\n"
|
||||
" uint draw_id;\n"
|
||||
" uint layout_ptr_offset;\n";
|
||||
|
||||
if (m_device_props.emulate_conditional_rendering)
|
||||
{
|
||||
@ -110,18 +113,50 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
|
||||
{
|
||||
if (PI.name.starts_with("vc["))
|
||||
{
|
||||
OS << "layout(std140, set=0, binding = " << static_cast<int>(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " vec4 " << PI.name << ";\n";
|
||||
OS << "};\n\n";
|
||||
if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS))
|
||||
{
|
||||
OS << "layout(std140, set=0, binding=" << static_cast<int>(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " vec4 " << PI.name << ";\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
in.location = m_binding_table.vertex_constant_buffers_bind_slot;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "VertexConstantsBuffer";
|
||||
in.type = vk::glsl::input_type_uniform_buffer;
|
||||
in.location = m_binding_table.vertex_constant_buffers_bind_slot;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "VertexConstantsBuffer";
|
||||
in.type = vk::glsl::input_type_uniform_buffer;
|
||||
|
||||
inputs.push_back(in);
|
||||
continue;
|
||||
inputs.push_back(in);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 1. Bind indirection lookup buffer
|
||||
OS << "layout(std430, set=0, binding=" << static_cast<int>(m_binding_table.instancing_lookup_table_bind_slot) << ") readonly buffer InstancingData\n";
|
||||
OS << "{\n";
|
||||
OS << " int constants_addressing_lookup[];\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
in.location = m_binding_table.instancing_lookup_table_bind_slot;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "InstancingData";
|
||||
in.type = vk::glsl::input_type_storage_buffer;
|
||||
inputs.push_back(in);
|
||||
|
||||
// 2. Bind actual constants buffer
|
||||
OS << "layout(std430, set=0, binding=" << static_cast<int>(m_binding_table.instancing_constants_buffer_slot) << ") readonly buffer VertexConstantsBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " vec4 instanced_constants_array[];\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
OS << "#define CONSTANTS_ARRAY_LENGTH " << (properties.has_indexed_constants ? 468 : ::size32(m_constant_ids)) << "\n\n";
|
||||
|
||||
in.location = m_binding_table.instancing_constants_buffer_slot;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "VertexConstantsBuffer";
|
||||
in.type = vk::glsl::input_type_storage_buffer;
|
||||
inputs.push_back(in);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (PT.type == "sampler2D" ||
|
||||
@ -209,6 +244,7 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
properties2.emulate_depth_clip_only = vk::g_render_device->get_shader_types_support().allow_float64;
|
||||
properties2.low_precision_tests = vk::is_NVIDIA(vk::get_driver_vendor());
|
||||
properties2.require_explicit_invariance = (vk::is_NVIDIA(vk::get_driver_vendor()) && g_cfg.video.shader_precision != gpu_preset_level::low);
|
||||
properties2.require_instanced_render = !!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS);
|
||||
|
||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
||||
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_vulkan);
|
||||
|
@ -14,8 +14,10 @@ namespace vk
|
||||
u8 vertex_buffers_first_bind_slot = 5;
|
||||
u8 conditional_render_predicate_slot = 8;
|
||||
u8 rasterizer_env_bind_slot = 9;
|
||||
u8 textures_first_bind_slot = 10;
|
||||
u8 vertex_textures_first_bind_slot = 10; // Invalid, has to be initialized properly
|
||||
u8 instancing_lookup_table_bind_slot = 10;
|
||||
u8 instancing_constants_buffer_slot = 11;
|
||||
u8 textures_first_bind_slot = 12;
|
||||
u8 vertex_textures_first_bind_slot = 12; // Invalid, has to be initialized properly
|
||||
u8 total_descriptor_bindings = vertex_textures_first_bind_slot; // Invalid, has to be initialized properly
|
||||
};
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user