mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-14 10:21:21 +00:00
Revert "rsx/vk: Implement hardware instancing (#16466)"
This reverts commit 62701154f1a6ebd71499cabba2341cc240ff50c4.
This commit is contained in:
parent
62701154f1
commit
43e04f3fc7
@ -476,7 +476,6 @@ target_sources(rpcs3_emu PRIVATE
|
||||
RSX/Common/TextureUtils.cpp
|
||||
RSX/Common/texture_cache.cpp
|
||||
RSX/Core/RSXContext.cpp
|
||||
RSX/Core/RSXDrawCommands.cpp
|
||||
RSX/gcm_enums.cpp
|
||||
RSX/gcm_printing.cpp
|
||||
RSX/GL/GLCommonDecompiler.cpp
|
||||
|
@ -1,843 +0,0 @@
|
||||
#include "stdafx.h"
|
||||
#include "RSXDrawCommands.h"
|
||||
|
||||
#include "Emu/RSX/Common/BufferUtils.h"
|
||||
#include "Emu/RSX/Common/buffer_stream.hpp"
|
||||
#include "Emu/RSX/Common/io_buffer.h"
|
||||
#include "Emu/RSX/Common/simple_array.hpp"
|
||||
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||
#include "Emu/RSX/Program/GLSLCommon.h"
|
||||
#include "Emu/RSX/rsx_methods.h"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
|
||||
#include "Emu/Memory/vm.h"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
void draw_command_processor::analyse_inputs_interleaved(vertex_input_layout& result, const vertex_program_metadata_t& vp_metadata)
|
||||
{
|
||||
const rsx_state& state = *REGS(m_ctx);
|
||||
const u32 input_mask = state.vertex_attrib_input_mask() & vp_metadata.referenced_inputs_mask;
|
||||
|
||||
result.clear();
|
||||
result.attribute_mask = static_cast<u16>(input_mask);
|
||||
|
||||
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
interleaved_range_info& info = *result.alloc_interleaved_block();
|
||||
info.interleaved = true;
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
auto& vinfo = state.vertex_arrays_info[index];
|
||||
result.attribute_placement[index] = attribute_buffer_placement::none;
|
||||
|
||||
if (vinfo.size() > 0)
|
||||
{
|
||||
// Stride must be updated even if the stream is disabled
|
||||
info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size());
|
||||
info.locations.push_back({ index, false, 1 });
|
||||
|
||||
if (input_mask & (1u << index))
|
||||
{
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
}
|
||||
else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index))
|
||||
{
|
||||
// Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
}
|
||||
|
||||
if (info.attribute_stride)
|
||||
{
|
||||
// At least one array feed must be enabled for vertex input
|
||||
result.interleaved_blocks.push_back(&info);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 frequency_divider_mask = REGS(m_ctx)->frequency_divider_operation_mask();
|
||||
result.interleaved_blocks.reserve(16);
|
||||
result.referenced_registers.reserve(16);
|
||||
|
||||
for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1)
|
||||
{
|
||||
ensure(index < rsx::limits::vertex_count);
|
||||
|
||||
if (!(ref_mask & 1u))
|
||||
{
|
||||
// Nothing to do, uninitialized
|
||||
continue;
|
||||
}
|
||||
|
||||
// Always reset attribute placement by default
|
||||
result.attribute_placement[index] = attribute_buffer_placement::none;
|
||||
|
||||
// Check for interleaving
|
||||
if (REGS(m_ctx)->current_draw_clause.is_immediate_draw &&
|
||||
REGS(m_ctx)->current_draw_clause.command != rsx::draw_command::indexed)
|
||||
{
|
||||
// NOTE: In immediate rendering mode, all vertex setup is ignored
|
||||
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
|
||||
if (m_vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
// Ensure consistent number of vertices per attribute.
|
||||
m_vertex_push_buffers[index].pad_to(m_vertex_push_buffers[0].vertex_count, false);
|
||||
|
||||
// Read temp buffer (register array)
|
||||
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(m_vertex_push_buffers[index].data.size() * sizeof(u32)));
|
||||
result.volatile_blocks.push_back(volatile_range_info);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
else if (state.register_vertex_info[index].size > 0)
|
||||
{
|
||||
// Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
|
||||
// Fall back to the default register value if no source is specified via register
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& info = state.vertex_arrays_info[index];
|
||||
if (!info.size())
|
||||
{
|
||||
if (state.register_vertex_info[index].size > 0)
|
||||
{
|
||||
// Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result.attribute_placement[index] = attribute_buffer_placement::persistent;
|
||||
const u32 base_address = info.offset() & 0x7fffffff;
|
||||
bool alloc_new_block = true;
|
||||
bool modulo = !!(frequency_divider_mask & (1 << index));
|
||||
|
||||
for (auto& block : result.interleaved_blocks)
|
||||
{
|
||||
if (block->single_vertex)
|
||||
{
|
||||
// Single vertex definition, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
if (block->attribute_stride != info.stride())
|
||||
{
|
||||
// Stride does not match, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
if (base_address > block->base_offset)
|
||||
{
|
||||
const u32 diff = base_address - block->base_offset;
|
||||
if (diff > info.stride())
|
||||
{
|
||||
// Not interleaved, continue
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 diff = block->base_offset - base_address;
|
||||
if (diff > info.stride())
|
||||
{
|
||||
// Not interleaved, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
// Matches, and this address is lower than existing
|
||||
block->base_offset = base_address;
|
||||
}
|
||||
|
||||
alloc_new_block = false;
|
||||
block->locations.push_back({ index, modulo, info.frequency() });
|
||||
block->interleaved = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (alloc_new_block)
|
||||
{
|
||||
interleaved_range_info& block = *result.alloc_interleaved_block();
|
||||
block.base_offset = base_address;
|
||||
block.attribute_stride = info.stride();
|
||||
block.memory_location = info.offset() >> 31;
|
||||
block.locations.reserve(16);
|
||||
block.locations.push_back({ index, modulo, info.frequency() });
|
||||
|
||||
if (block.attribute_stride == 0)
|
||||
{
|
||||
block.single_vertex = true;
|
||||
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
|
||||
result.interleaved_blocks.push_back(&block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& info : result.interleaved_blocks)
|
||||
{
|
||||
// Calculate real data address to be used during upload
|
||||
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
|
||||
}
|
||||
}
|
||||
|
||||
std::span<const std::byte> draw_command_processor::get_raw_index_array(const draw_clause& draw_indexed_clause) const
|
||||
{
|
||||
if (!m_element_push_buffer.empty()) [[ unlikely ]]
|
||||
{
|
||||
// Indices provided via immediate mode
|
||||
return { reinterpret_cast<const std::byte*>(m_element_push_buffer.data()), ::narrow<u32>(m_element_push_buffer.size() * sizeof(u32)) };
|
||||
}
|
||||
|
||||
const rsx::index_array_type type = REGS(m_ctx)->index_type();
|
||||
const u32 type_size = get_index_type_size(type);
|
||||
|
||||
// Force aligned indices as realhw
|
||||
const u32 address = (0 - type_size) & get_address(REGS(m_ctx)->index_array_address(), REGS(m_ctx)->index_array_location());
|
||||
|
||||
const u32 first = draw_indexed_clause.min_index();
|
||||
const u32 count = draw_indexed_clause.get_elements_count();
|
||||
|
||||
const auto ptr = vm::_ptr<const std::byte>(address);
|
||||
return { ptr + first * type_size, count * type_size };
|
||||
}
|
||||
|
||||
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
||||
draw_command_processor::get_draw_command(const rsx::rsx_state& state) const
|
||||
{
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]]
|
||||
{
|
||||
return draw_indexed_array_command
|
||||
{
|
||||
get_raw_index_array(state.current_draw_clause)
|
||||
};
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::array)
|
||||
{
|
||||
return draw_array_command{};
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
return draw_inlined_array{};
|
||||
}
|
||||
|
||||
fmt::throw_exception("ill-formed draw command");
|
||||
}
|
||||
|
||||
void draw_command_processor::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
|
||||
{
|
||||
if (!(REGS(m_ctx)->vertex_attrib_input_mask() & (1 << attribute)))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Enforce ATTR0 as vertex attribute for push buffers.
|
||||
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||
const auto vertex_id = m_vertex_push_buffers[0].get_vertex_id();
|
||||
m_vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
|
||||
RSX(m_ctx)->m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
|
||||
}
|
||||
|
||||
u32 draw_command_processor::get_push_buffer_vertex_count() const
|
||||
{
|
||||
// Enforce ATTR0 as vertex attribute for push buffers.
|
||||
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||
return m_vertex_push_buffers[0].vertex_count;
|
||||
}
|
||||
|
||||
void draw_command_processor::append_array_element(u32 index)
|
||||
{
|
||||
// Endianness is swapped because common upload code expects input in BE
|
||||
// TODO: Implement fast upload path for LE inputs and do away with this
|
||||
m_element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
|
||||
}
|
||||
|
||||
u32 draw_command_processor::get_push_buffer_index_count() const
|
||||
{
|
||||
return ::size32(m_element_push_buffer);
|
||||
}
|
||||
|
||||
void draw_command_processor::clear_push_buffers()
|
||||
{
|
||||
auto& graphics_state = RSX(m_ctx)->m_graphics_state;
|
||||
if (graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
|
||||
{
|
||||
for (auto& push_buf : m_vertex_push_buffers)
|
||||
{
|
||||
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
|
||||
//REGS(m_ctx)->register_vertex_info[index].size = 0;
|
||||
|
||||
push_buf.clear();
|
||||
}
|
||||
|
||||
graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty);
|
||||
}
|
||||
|
||||
m_element_push_buffer.clear();
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_vertex_layout_state(
|
||||
const vertex_input_layout& layout,
|
||||
const vertex_program_metadata_t& vp_metadata,
|
||||
u32 first_vertex,
|
||||
u32 vertex_count,
|
||||
s32* buffer,
|
||||
u32 persistent_offset_base,
|
||||
u32 volatile_offset_base) const
|
||||
{
|
||||
std::array<s32, 16> offset_in_block = {};
|
||||
u32 volatile_offset = volatile_offset_base;
|
||||
u32 persistent_offset = persistent_offset_base;
|
||||
|
||||
// NOTE: Order is important! Transient ayout is always push_buffers followed by register data
|
||||
if (REGS(m_ctx)->current_draw_clause.is_immediate_draw)
|
||||
{
|
||||
for (const auto& info : layout.volatile_blocks)
|
||||
{
|
||||
offset_in_block[info.first] = volatile_offset;
|
||||
volatile_offset += info.second;
|
||||
}
|
||||
}
|
||||
|
||||
for (u8 index : layout.referenced_registers)
|
||||
{
|
||||
offset_in_block[index] = volatile_offset;
|
||||
volatile_offset += 16;
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto& block = layout.interleaved_blocks[0];
|
||||
u32 inline_data_offset = volatile_offset;
|
||||
for (const auto& attrib : block->locations)
|
||||
{
|
||||
auto& info = REGS(m_ctx)->vertex_arrays_info[attrib.index];
|
||||
|
||||
offset_in_block[attrib.index] = inline_data_offset;
|
||||
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto& block : layout.interleaved_blocks)
|
||||
{
|
||||
for (const auto& attrib : block->locations)
|
||||
{
|
||||
const u32 local_address = (REGS(m_ctx)->vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
|
||||
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
|
||||
}
|
||||
|
||||
const auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
persistent_offset += block->attribute_stride * range.second;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the data
|
||||
// Each descriptor field is 64 bits wide
|
||||
// [0-8] attribute stride
|
||||
// [8-24] attribute divisor
|
||||
// [24-27] attribute type
|
||||
// [27-30] attribute size
|
||||
// [30-31] reserved
|
||||
// [31-60] starting offset
|
||||
// [60-21] swap bytes flag
|
||||
// [61-22] volatile flag
|
||||
// [62-63] modulo enable flag
|
||||
|
||||
const s32 default_frequency_mask = (1 << 8);
|
||||
const s32 swap_storage_mask = (1 << 29);
|
||||
const s32 volatile_storage_mask = (1 << 30);
|
||||
const s32 modulo_op_frequency_mask = smin;
|
||||
|
||||
const u32 modulo_mask = REGS(m_ctx)->frequency_divider_operation_mask();
|
||||
const auto max_index = (first_vertex + vertex_count) - 1;
|
||||
|
||||
for (u16 ref_mask = vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1)
|
||||
{
|
||||
if (!(ref_mask & 1u))
|
||||
{
|
||||
// Unused input, ignore this
|
||||
continue;
|
||||
}
|
||||
|
||||
if (layout.attribute_placement[index] == attribute_buffer_placement::none)
|
||||
{
|
||||
static constexpr u64 zero = 0;
|
||||
std::memcpy(buffer + index * 2, &zero, sizeof(zero));
|
||||
continue;
|
||||
}
|
||||
|
||||
rsx::vertex_base_type type = {};
|
||||
s32 size = 0;
|
||||
s32 attrib0 = 0;
|
||||
s32 attrib1 = 0;
|
||||
|
||||
if (layout.attribute_placement[index] == attribute_buffer_placement::transient)
|
||||
{
|
||||
if (REGS(m_ctx)->current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto& info = REGS(m_ctx)->vertex_arrays_info[index];
|
||||
|
||||
if (!info.size())
|
||||
{
|
||||
// Register
|
||||
const auto& reginfo = REGS(m_ctx)->register_vertex_info[index];
|
||||
type = reginfo.type;
|
||||
size = reginfo.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Array
|
||||
type = info.type();
|
||||
size = info.size();
|
||||
|
||||
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Data is either from an immediate render or register input
|
||||
// Immediate data overrides register input
|
||||
|
||||
if (REGS(m_ctx)->current_draw_clause.is_immediate_draw &&
|
||||
m_vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
// Push buffer
|
||||
const auto& info = m_vertex_push_buffers[index];
|
||||
type = info.type;
|
||||
size = info.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Register
|
||||
const auto& info = REGS(m_ctx)->register_vertex_info[index];
|
||||
type = info.type;
|
||||
size = info.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
|
||||
}
|
||||
}
|
||||
|
||||
attrib1 |= volatile_storage_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto& info = REGS(m_ctx)->vertex_arrays_info[index];
|
||||
type = info.type();
|
||||
size = info.size();
|
||||
|
||||
auto stride = info.stride();
|
||||
attrib0 = stride;
|
||||
|
||||
if (stride > 0) // when stride is 0, input is not an array but a single element
|
||||
{
|
||||
const u32 frequency = info.frequency();
|
||||
switch (frequency)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
{
|
||||
attrib0 |= default_frequency_mask;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
if (modulo_mask & (1 << index))
|
||||
{
|
||||
if (max_index >= frequency)
|
||||
{
|
||||
// Only set modulo mask if a modulo op is actually necessary!
|
||||
// This requires that the uploaded range for this attr = [0, freq-1]
|
||||
// Ignoring modulo op if the rendered range does not wrap allows for range optimization
|
||||
attrib0 |= (frequency << 8);
|
||||
attrib1 |= modulo_op_frequency_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
attrib0 |= default_frequency_mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Division
|
||||
attrib0 |= (frequency << 8);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end attribute placement check
|
||||
|
||||
// Special compressed 4 components into one 4-byte value. Decoded as one value.
|
||||
if (type == rsx::vertex_base_type::cmp)
|
||||
{
|
||||
size = 1;
|
||||
}
|
||||
|
||||
// All data is passed in in PS3-native order (BE) so swap flag should be set
|
||||
attrib1 |= swap_storage_mask;
|
||||
attrib0 |= (static_cast<s32>(type) << 24);
|
||||
attrib0 |= (size << 27);
|
||||
attrib1 |= offset_in_block[index];
|
||||
|
||||
buffer[index * 2 + 0] = attrib0;
|
||||
buffer[index * 2 + 1] = attrib1;
|
||||
}
|
||||
}
|
||||
|
||||
void draw_command_processor::write_vertex_data_to_memory(
|
||||
const vertex_input_layout& layout,
|
||||
u32 first_vertex,
|
||||
u32 vertex_count,
|
||||
void* persistent_data,
|
||||
void* volatile_data) const
|
||||
{
|
||||
auto transient = static_cast<char*>(volatile_data);
|
||||
auto persistent = static_cast<char*>(persistent_data);
|
||||
|
||||
auto& draw_call = REGS(m_ctx)->current_draw_clause;
|
||||
|
||||
if (transient != nullptr)
|
||||
{
|
||||
if (draw_call.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
for (const u8 index : layout.referenced_registers)
|
||||
{
|
||||
memcpy(transient, REGS(m_ctx)->register_vertex_info[index].data.data(), 16);
|
||||
transient += 16;
|
||||
}
|
||||
|
||||
memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32));
|
||||
// Is it possible to reference data outside of the inlined array?
|
||||
return;
|
||||
}
|
||||
|
||||
// NOTE: Order is important! Transient layout is always push_buffers followed by register data
|
||||
if (draw_call.is_immediate_draw)
|
||||
{
|
||||
// NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory
|
||||
for (const auto& info : layout.volatile_blocks)
|
||||
{
|
||||
memcpy(transient, m_vertex_push_buffers[info.first].data.data(), info.second);
|
||||
transient += info.second;
|
||||
}
|
||||
}
|
||||
|
||||
for (const u8 index : layout.referenced_registers)
|
||||
{
|
||||
memcpy(transient, REGS(m_ctx)->register_vertex_info[index].data.data(), 16);
|
||||
transient += 16;
|
||||
}
|
||||
}
|
||||
|
||||
if (persistent != nullptr)
|
||||
{
|
||||
for (interleaved_range_info* block : layout.interleaved_blocks)
|
||||
{
|
||||
auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
|
||||
const u32 data_size = range.second * block->attribute_stride;
|
||||
const u32 vertex_base = range.first * block->attribute_stride;
|
||||
|
||||
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
|
||||
persistent += data_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_scale_offset_data(void* buffer, bool flip_y) const
|
||||
{
|
||||
const int clip_w = REGS(m_ctx)->surface_clip_width();
|
||||
const int clip_h = REGS(m_ctx)->surface_clip_height();
|
||||
|
||||
const float scale_x = REGS(m_ctx)->viewport_scale_x() / (clip_w / 2.f);
|
||||
float offset_x = REGS(m_ctx)->viewport_offset_x() - (clip_w / 2.f);
|
||||
offset_x /= clip_w / 2.f;
|
||||
|
||||
float scale_y = REGS(m_ctx)->viewport_scale_y() / (clip_h / 2.f);
|
||||
float offset_y = (REGS(m_ctx)->viewport_offset_y() - (clip_h / 2.f));
|
||||
offset_y /= clip_h / 2.f;
|
||||
if (flip_y) scale_y *= -1;
|
||||
if (flip_y) offset_y *= -1;
|
||||
|
||||
const float scale_z = REGS(m_ctx)->viewport_scale_z();
|
||||
const float offset_z = REGS(m_ctx)->viewport_offset_z();
|
||||
const float one = 1.f;
|
||||
|
||||
utils::stream_vector(buffer, std::bit_cast<u32>(scale_x), 0, 0, std::bit_cast<u32>(offset_x));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 16, 0, std::bit_cast<u32>(scale_y), 0, std::bit_cast<u32>(offset_y));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 32, 0, 0, std::bit_cast<u32>(scale_z), std::bit_cast<u32>(offset_z));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 48, 0, 0, 0, std::bit_cast<u32>(one));
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_user_clip_data(void* buffer) const
|
||||
{
|
||||
const rsx::user_clip_plane_op clip_plane_control[6] =
|
||||
{
|
||||
REGS(m_ctx)->clip_plane_0_enabled(),
|
||||
REGS(m_ctx)->clip_plane_1_enabled(),
|
||||
REGS(m_ctx)->clip_plane_2_enabled(),
|
||||
REGS(m_ctx)->clip_plane_3_enabled(),
|
||||
REGS(m_ctx)->clip_plane_4_enabled(),
|
||||
REGS(m_ctx)->clip_plane_5_enabled(),
|
||||
};
|
||||
|
||||
u8 data_block[64];
|
||||
s32* clip_enabled_flags = reinterpret_cast<s32*>(data_block);
|
||||
f32* clip_distance_factors = reinterpret_cast<f32*>(data_block + 32);
|
||||
|
||||
for (int index = 0; index < 6; ++index)
|
||||
{
|
||||
switch (clip_plane_control[index])
|
||||
{
|
||||
default:
|
||||
rsx_log.error("bad clip plane control (0x%x)", static_cast<u8>(clip_plane_control[index]));
|
||||
[[fallthrough]];
|
||||
|
||||
case rsx::user_clip_plane_op::disable:
|
||||
clip_enabled_flags[index] = 0;
|
||||
clip_distance_factors[index] = 0.f;
|
||||
break;
|
||||
|
||||
case rsx::user_clip_plane_op::greater_or_equal:
|
||||
clip_enabled_flags[index] = 1;
|
||||
clip_distance_factors[index] = 1.f;
|
||||
break;
|
||||
|
||||
case rsx::user_clip_plane_op::less_than:
|
||||
clip_enabled_flags[index] = 1;
|
||||
clip_distance_factors[index] = -1.f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(buffer, data_block, 2 * 8 * sizeof(u32));
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill buffer with vertex program constants.
|
||||
* Buffer must be at least 512 float4 wide.
|
||||
*/
|
||||
void draw_command_processor::fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table) const
|
||||
{
|
||||
if (!reloc_table.empty()) [[ likely ]]
|
||||
{
|
||||
char* dst = reinterpret_cast<char*>(buffer);
|
||||
for (const auto& index : reloc_table)
|
||||
{
|
||||
utils::stream_vector_from_memory(dst, ®S(m_ctx)->transform_constants[index]);
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(buffer, REGS(m_ctx)->transform_constants.data(), 468 * 4 * sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/) const
|
||||
{
|
||||
ROP_control_t rop_control{};
|
||||
|
||||
if (REGS(m_ctx)->alpha_test_enabled())
|
||||
{
|
||||
const u32 alpha_func = static_cast<u32>(REGS(m_ctx)->alpha_func());
|
||||
rop_control.set_alpha_test_func(alpha_func);
|
||||
rop_control.enable_alpha_test();
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->polygon_stipple_enabled())
|
||||
{
|
||||
rop_control.enable_polygon_stipple();
|
||||
}
|
||||
|
||||
if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !RSX(m_ctx)->get_backend_config().supports_hw_a2c)
|
||||
{
|
||||
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
|
||||
// Alpha values generate a coverage mask for order independent blending
|
||||
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
|
||||
// Simulated using combined alpha blend and alpha test
|
||||
rop_control.enable_alpha_to_coverage();
|
||||
if (REGS(m_ctx)->msaa_sample_mask())
|
||||
{
|
||||
rop_control.enable_MSAA_writes();
|
||||
}
|
||||
|
||||
// Sample configuration bits
|
||||
switch (REGS(m_ctx)->surface_antialias())
|
||||
{
|
||||
case rsx::surface_antialiasing::center_1_sample:
|
||||
break;
|
||||
case rsx::surface_antialiasing::diagonal_centered_2_samples:
|
||||
rop_control.set_msaa_control(1u);
|
||||
break;
|
||||
default:
|
||||
rop_control.set_msaa_control(3u);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const f32 fog0 = REGS(m_ctx)->fog_params_0();
|
||||
const f32 fog1 = REGS(m_ctx)->fog_params_1();
|
||||
const u32 fog_mode = static_cast<u32>(REGS(m_ctx)->fog_equation());
|
||||
|
||||
// Check if framebuffer is actually an XRGB format and not a WZYX format
|
||||
switch (REGS(m_ctx)->surface_color())
|
||||
{
|
||||
case rsx::surface_color_format::w16z16y16x16:
|
||||
case rsx::surface_color_format::w32z32y32x32:
|
||||
case rsx::surface_color_format::x32:
|
||||
// These behave very differently from "normal" formats.
|
||||
break;
|
||||
default:
|
||||
// Integer framebuffer formats.
|
||||
rop_control.enable_framebuffer_INT();
|
||||
|
||||
// Check if we want sRGB conversion.
|
||||
if (REGS(m_ctx)->framebuffer_srgb_enabled())
|
||||
{
|
||||
rop_control.enable_framebuffer_sRGB();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Generate wpos coefficients
|
||||
// wpos equation is now as follows:
|
||||
// wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0)
|
||||
// wpos.x = (frag_coord / resolution_scale)
|
||||
// wpos.zw = frag_coord.zw
|
||||
|
||||
const auto window_origin = REGS(m_ctx)->shader_window_origin();
|
||||
const u32 window_height = REGS(m_ctx)->shader_window_height();
|
||||
const f32 resolution_scale = (window_height <= static_cast<u32>(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale();
|
||||
const f32 wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale);
|
||||
const f32 wpos_bias = (window_origin == rsx::window_origin::top) ? 0.f : window_height;
|
||||
const f32 alpha_ref = REGS(m_ctx)->alpha_ref();
|
||||
|
||||
u32* dst = static_cast<u32*>(buffer);
|
||||
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control.value, std::bit_cast<u32>(alpha_ref));
|
||||
utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
|
||||
}
|
||||
|
||||
void draw_command_processor::fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const
|
||||
{
|
||||
auto& draw_call = REGS(m_ctx)->current_draw_clause;
|
||||
|
||||
// Only call this for instanced draws!
|
||||
ensure(draw_call.is_trivial_instanced_draw);
|
||||
|
||||
// Temp indirection table. Used to track "running" updates.
|
||||
rsx::simple_array<u32> instancing_indirection_table;
|
||||
// indirection table size
|
||||
const auto reloc_table = prog.has_indexed_constants ? decltype(prog.constant_ids){} : prog.constant_ids;
|
||||
const auto redirection_table_size = prog.has_indexed_constants ? 468u : ::size32(prog.constant_ids);
|
||||
instancing_indirection_table.resize(redirection_table_size);
|
||||
|
||||
// Temp constants data
|
||||
rsx::simple_array<u128> constants_data;
|
||||
constants_data.reserve(redirection_table_size * draw_call.pass_count());
|
||||
|
||||
// Allocate indirection buffer on GPU stream
|
||||
indirection_table_buf.reserve(instancing_indirection_table.size_bytes() * draw_call.pass_count());
|
||||
auto indirection_out = indirection_table_buf.data<u32>();
|
||||
|
||||
rsx::instanced_draw_config_t instance_config;
|
||||
u32 indirection_table_offset = 0;
|
||||
|
||||
// We now replay the draw call here to pack the data.
|
||||
draw_call.begin();
|
||||
|
||||
// Write initial draw data.
|
||||
std::iota(instancing_indirection_table.begin(), instancing_indirection_table.end(), 0);
|
||||
|
||||
constants_data.resize(redirection_table_size);
|
||||
fill_vertex_program_constants_data(constants_data.data(), reloc_table);
|
||||
|
||||
// Next draw. We're guaranteed more than one draw call by the caller.
|
||||
draw_call.next();
|
||||
|
||||
do
|
||||
{
|
||||
// Write previous state
|
||||
std::memcpy(indirection_out + indirection_table_offset, instancing_indirection_table.data(), instancing_indirection_table.size_bytes());
|
||||
indirection_table_offset += redirection_table_size;
|
||||
|
||||
// Decode next draw state
|
||||
instance_config = {};
|
||||
draw_call.execute_pipeline_dependencies(m_ctx, &instance_config);
|
||||
|
||||
if (!instance_config.transform_constants_data_changed)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const int translated_offset = prog.has_indexed_constants
|
||||
? instance_config.patch_load_offset
|
||||
: prog.TranslateConstantsRange(instance_config.patch_load_offset, instance_config.patch_load_count);
|
||||
|
||||
if (translated_offset >= 0)
|
||||
{
|
||||
// Trivially patchable in bulk
|
||||
const u32 redirection_loc = ::size32(constants_data);
|
||||
constants_data.resize(::size32(constants_data) + instance_config.patch_load_count);
|
||||
std::memcpy(constants_data.data() + redirection_loc, ®S(m_ctx)->transform_constants[instance_config.patch_load_offset], instance_config.patch_load_count * sizeof(u128));
|
||||
|
||||
// Update indirection table
|
||||
for (auto i = translated_offset, count = 0;
|
||||
static_cast<u32>(count) < instance_config.patch_load_count;
|
||||
++i, ++count)
|
||||
{
|
||||
instancing_indirection_table[i] = redirection_loc + count;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
ensure(!prog.has_indexed_constants);
|
||||
|
||||
// Sparse update. Update records individually instead of bulk
|
||||
// FIXME: Range batching optimization
|
||||
const auto load_end = instance_config.patch_load_offset + instance_config.patch_load_count;
|
||||
for (u32 i = 0; i < redirection_table_size; ++i)
|
||||
{
|
||||
const auto read_index = prog.constant_ids[i];
|
||||
if (read_index < instance_config.patch_load_offset || read_index >= load_end)
|
||||
{
|
||||
// Reading outside "hot" range.
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 redirection_loc = ::size32(constants_data);
|
||||
constants_data.resize(::size32(constants_data) + 1);
|
||||
std::memcpy(constants_data.data() + redirection_loc, ®S(m_ctx)->transform_constants[read_index], sizeof(u128));
|
||||
|
||||
instancing_indirection_table[i] = redirection_loc;
|
||||
}
|
||||
|
||||
} while (draw_call.next());
|
||||
|
||||
// Tail
|
||||
ensure(indirection_table_offset < (instancing_indirection_table.size() * draw_call.pass_count()));
|
||||
std::memcpy(indirection_out + indirection_table_offset, instancing_indirection_table.data(), instancing_indirection_table.size_bytes());
|
||||
|
||||
// Now write the constants to the GPU buffer
|
||||
constants_data_array_buffer.reserve(constants_data.size_bytes());
|
||||
std::memcpy(constants_data_array_buffer.data(), constants_data.data(), constants_data.size_bytes());
|
||||
}
|
||||
}
|
@ -1,110 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
|
||||
#include "Emu/RSX/Core/RSXVertexTypes.h"
|
||||
#include "Emu/RSX/NV47/FW/draw_call.hpp"
|
||||
#include "Emu/RSX/Program/ProgramStateCache.h"
|
||||
#include "Emu/RSX/rsx_vertex_data.h"
|
||||
|
||||
#include <span>
|
||||
#include <variant>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct rsx_state;
|
||||
struct context;
|
||||
class io_buffer;
|
||||
|
||||
class draw_command_processor
|
||||
{
|
||||
using vertex_program_metadata_t = program_hash_util::vertex_program_utils::vertex_program_metadata;
|
||||
|
||||
context* m_ctx = nullptr;
|
||||
|
||||
protected:
|
||||
friend class thread;
|
||||
|
||||
std::array<push_buffer_vertex_info, 16> m_vertex_push_buffers;
|
||||
rsx::simple_array<u32> m_element_push_buffer;
|
||||
|
||||
public:
|
||||
draw_command_processor() = default;
|
||||
|
||||
void init(context* ctx)
|
||||
{
|
||||
m_ctx = ctx;
|
||||
}
|
||||
|
||||
// Analyze vertex inputs and group all interleaved blocks
|
||||
void analyse_inputs_interleaved(vertex_input_layout& layout, const vertex_program_metadata_t& vp_metadata);
|
||||
|
||||
// Retrieve raw bytes for the index array (untyped)
|
||||
std::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
|
||||
|
||||
// Get compiled draw command for backend rendering
|
||||
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
||||
get_draw_command(const rsx::rsx_state& state) const;
|
||||
|
||||
// Push-buffers for immediate rendering (begin-end scopes)
|
||||
void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value);
|
||||
|
||||
u32 get_push_buffer_vertex_count() const;
|
||||
|
||||
void append_array_element(u32 index);
|
||||
|
||||
u32 get_push_buffer_index_count() const;
|
||||
|
||||
void clear_push_buffers();
|
||||
|
||||
const std::span<const u32> element_push_buffer() const
|
||||
{
|
||||
return m_element_push_buffer;
|
||||
}
|
||||
|
||||
// Host driver helpers
|
||||
void fill_vertex_layout_state(
|
||||
const vertex_input_layout& layout,
|
||||
const vertex_program_metadata_t& vp_metadata,
|
||||
u32 first_vertex,
|
||||
u32 vertex_count,
|
||||
s32* buffer,
|
||||
u32 persistent_offset_base,
|
||||
u32 volatile_offset_base) const;
|
||||
|
||||
void write_vertex_data_to_memory(
|
||||
const vertex_input_layout& layout,
|
||||
u32 first_vertex,
|
||||
u32 vertex_count,
|
||||
void* persistent_data,
|
||||
void* volatile_data) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with 4x4 scale offset matrix.
|
||||
* Vertex shader's position is to be multiplied by this matrix.
|
||||
* if flip_y is set, the matrix is modified to use d3d convention.
|
||||
*/
|
||||
void fill_scale_offset_data(void* buffer, bool flip_y) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with user clip information
|
||||
*/
|
||||
void fill_user_clip_data(void* buffer) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with vertex program constants.
|
||||
* Relocation table allows to do a partial fill with only selected registers.
|
||||
*/
|
||||
void fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with fragment rasterization state.
|
||||
* Fills current fog values, alpha test parameters and texture scaling parameters
|
||||
*/
|
||||
void fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& fragment_program) const;
|
||||
|
||||
// Fill instancing buffers. A single iobuf is used for both. 256byte alignment enforced to allow global bind
|
||||
// Returns offsets to the index redirection lookup table and constants field array
|
||||
void fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase& prog) const;
|
||||
};
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
enum pipeline_state : u32
|
||||
{
|
||||
fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed
|
||||
vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed
|
||||
fragment_program_state_dirty = (1 << 2), // Fragment program state changed
|
||||
vertex_program_state_dirty = (1 << 3), // Vertex program state changed
|
||||
fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc)
|
||||
vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc)
|
||||
transform_constants_dirty = (1 << 6), // Transform constants changed
|
||||
fragment_constants_dirty = (1 << 7), // Fragment constants changed
|
||||
framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed
|
||||
fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed
|
||||
vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed
|
||||
scissor_config_state_dirty = (1 << 11), // Scissor region changed
|
||||
zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed
|
||||
|
||||
scissor_setup_invalid = (1 << 13), // Scissor configuration is broken
|
||||
scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint
|
||||
|
||||
polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed
|
||||
line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed
|
||||
|
||||
push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers)
|
||||
|
||||
polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed
|
||||
depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed
|
||||
|
||||
pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint.
|
||||
|
||||
rtt_config_dirty = (1 << 21), // Render target configuration changed
|
||||
rtt_config_contested = (1 << 22), // Render target configuration is indeterminate
|
||||
rtt_config_valid = (1 << 23), // Render target configuration is valid
|
||||
rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate
|
||||
|
||||
xform_instancing_state_dirty = (1 << 25), // Transform instancing state has changed
|
||||
|
||||
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
|
||||
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
|
||||
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty | xform_instancing_state_dirty,
|
||||
invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty,
|
||||
memory_barrier_bits = framebuffer_reads_dirty,
|
||||
|
||||
// Vulkan-specific signals
|
||||
invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty,
|
||||
|
||||
all_dirty = ~0u
|
||||
};
|
||||
}
|
@ -513,7 +513,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
|
||||
|
||||
if (vertex_state & rsx::vertex_arrays_changed)
|
||||
{
|
||||
m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata);
|
||||
analyse_inputs_interleaved(m_vertex_layout);
|
||||
}
|
||||
else if (vertex_state & rsx::vertex_base_changed)
|
||||
{
|
||||
|
@ -840,8 +840,8 @@ void GLGSRender::load_program_env()
|
||||
// Vertex state
|
||||
auto mapping = m_vertex_env_buffer->alloc_from_heap(144, m_uniform_buffer_offset_align);
|
||||
auto buf = static_cast<u8*>(mapping.first);
|
||||
m_draw_processor.fill_scale_offset_data(buf, false);
|
||||
m_draw_processor.fill_user_clip_data(buf + 64);
|
||||
fill_scale_offset_data(buf, false);
|
||||
fill_user_clip_data(buf + 64);
|
||||
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
|
||||
*(reinterpret_cast<f32*>(buf + 132)) = rsx::method_registers.point_size() * rsx::get_resolution_scale();
|
||||
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.clip_min();
|
||||
@ -887,7 +887,7 @@ void GLGSRender::load_program_env()
|
||||
// Fragment state
|
||||
auto mapping = m_fragment_env_buffer->alloc_from_heap(32, m_uniform_buffer_offset_align);
|
||||
auto buf = static_cast<u8*>(mapping.first);
|
||||
m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program);
|
||||
fill_fragment_state_buffer(buf, current_fragment_program);
|
||||
|
||||
m_fragment_env_buffer->bind_range(GL_FRAGMENT_STATE_BIND_SLOT, mapping.second, 32);
|
||||
}
|
||||
@ -988,7 +988,7 @@ void GLGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
|
||||
: std::span<const u16>(m_vertex_prog->constant_ids);
|
||||
|
||||
buffer.reserve(transform_constants_size);
|
||||
m_draw_processor.fill_vertex_program_constants_data(buffer.data(), constant_ids);
|
||||
fill_vertex_program_constants_data(buffer.data(), constant_ids);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1007,14 +1007,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
|
||||
buf[1] = upload_info.vertex_index_offset;
|
||||
buf += 4;
|
||||
|
||||
m_draw_processor.fill_vertex_layout_state(
|
||||
m_vertex_layout,
|
||||
current_vp_metadata,
|
||||
upload_info.first_vertex,
|
||||
upload_info.allocated_vertex_count,
|
||||
reinterpret_cast<s32*>(buf),
|
||||
upload_info.persistent_mapping_offset,
|
||||
upload_info.volatile_mapping_offset);
|
||||
fill_vertex_layout_state(m_vertex_layout, upload_info.first_vertex, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
|
||||
|
||||
m_vertex_layout_buffer->bind_range(GL_VERTEX_LAYOUT_BIND_SLOT, mapping.second, 128 + 16);
|
||||
|
||||
|
@ -153,7 +153,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
m_profiler.start();
|
||||
|
||||
//Write index buffers and count verts
|
||||
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), m_draw_processor.get_draw_command(rsx::method_registers));
|
||||
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
|
||||
|
||||
const u32 vertex_count = (result.max_index - result.min_index) + 1;
|
||||
u32 vertex_base = result.min_index;
|
||||
@ -250,7 +250,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
}
|
||||
|
||||
//Write all the data
|
||||
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
|
||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
|
||||
|
||||
m_frame_stats.vertex_upload_time += m_profiler.duration();
|
||||
return upload_info;
|
||||
|
@ -89,52 +89,6 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
bool draw_clause::check_trivially_instanced() const
|
||||
{
|
||||
if (pass_count() <= 1)
|
||||
{
|
||||
// Cannot instance one draw call or less
|
||||
return false;
|
||||
}
|
||||
|
||||
// For instancing all draw calls must be identical
|
||||
const auto& ref = draw_command_ranges.front();
|
||||
for (const auto& range : draw_command_ranges)
|
||||
{
|
||||
if (range.first != ref.first || range.count != ref.count)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (draw_command_barriers.empty())
|
||||
{
|
||||
// Raise alarm here for investigation, we may be missing a corner case.
|
||||
rsx_log.error("Instanced draw detected, but no command barriers found!");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Barriers must exist, but can only involve updating transform constants (for now)
|
||||
for (const auto& barrier : draw_command_barriers)
|
||||
{
|
||||
if (barrier.type != rsx::transform_constant_load_modifier_barrier &&
|
||||
barrier.type != rsx::transform_constant_update_barrier)
|
||||
{
|
||||
ensure(barrier.draw_id < ::size32(draw_command_ranges));
|
||||
if (draw_command_ranges[barrier.draw_id].count == 0)
|
||||
{
|
||||
// Dangling command barriers are ignored. We're also at the end of the command, so abort.
|
||||
break;
|
||||
}
|
||||
|
||||
// Fail. Only transform constant instancing is supported at the moment.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void draw_clause::reset(primitive_type type)
|
||||
{
|
||||
current_range_index = ~0u;
|
||||
@ -143,7 +97,6 @@ namespace rsx
|
||||
command = draw_command::none;
|
||||
primitive = type;
|
||||
primitive_barrier_enable = false;
|
||||
is_trivial_instanced_draw = false;
|
||||
|
||||
draw_command_ranges.clear();
|
||||
draw_command_barriers.clear();
|
||||
@ -152,7 +105,7 @@ namespace rsx
|
||||
is_disjoint_primitive = is_primitive_disjointed(primitive);
|
||||
}
|
||||
|
||||
u32 draw_clause::execute_pipeline_dependencies(context* ctx, instanced_draw_config_t* instance_config) const
|
||||
u32 draw_clause::execute_pipeline_dependencies(context* ctx) const
|
||||
{
|
||||
u32 result = 0u;
|
||||
for (;
|
||||
@ -198,20 +151,7 @@ namespace rsx
|
||||
// Update transform constants
|
||||
auto ptr = RSX(ctx)->fifo_ctrl->translate_address(barrier.arg0);
|
||||
auto buffer = std::span<const u32>(static_cast<const u32*>(vm::base(ptr)), barrier.arg1);
|
||||
auto notify = [&](rsx::context*, u32 load, u32 count)
|
||||
{
|
||||
if (!instance_config)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
instance_config->transform_constants_data_changed = true;
|
||||
instance_config->patch_load_offset = load;
|
||||
instance_config->patch_load_count = count;
|
||||
return true;
|
||||
};
|
||||
|
||||
nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer, notify);
|
||||
nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer);
|
||||
result |= transform_constants_changed;
|
||||
break;
|
||||
}
|
||||
|
@ -7,14 +7,6 @@
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
struct instanced_draw_config_t
|
||||
{
|
||||
bool transform_constants_data_changed;
|
||||
|
||||
u32 patch_load_offset;
|
||||
u32 patch_load_count;
|
||||
};
|
||||
|
||||
class draw_clause
|
||||
{
|
||||
// Stores the first and count argument from draw/draw indexed parameters between begin/end clauses.
|
||||
@ -59,8 +51,6 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
bool check_trivially_instanced() const;
|
||||
|
||||
public:
|
||||
primitive_type primitive{};
|
||||
draw_command command{};
|
||||
@ -69,7 +59,6 @@ namespace rsx
|
||||
bool is_disjoint_primitive{}; // Set if primitive type does not rely on adjacency information
|
||||
bool primitive_barrier_enable{}; // Set once to signal that a primitive restart barrier can be inserted
|
||||
bool is_rendering{}; // Set while we're actually pushing the draw calls to host GPU
|
||||
bool is_trivial_instanced_draw{}; // Set if the draw call can be executed on the host GPU as a single instanced draw.
|
||||
|
||||
simple_array<u32> inline_vertex_array{};
|
||||
|
||||
@ -84,8 +73,8 @@ namespace rsx
|
||||
{
|
||||
// End draw call append mode
|
||||
current_range_index = ~0u;
|
||||
// Check if we can instance on host
|
||||
is_trivial_instanced_draw = check_trivially_instanced();
|
||||
|
||||
// TODO
|
||||
}
|
||||
|
||||
/**
|
||||
@ -280,7 +269,7 @@ namespace rsx
|
||||
/**
|
||||
* Executes commands reqiured to make the current draw state valid
|
||||
*/
|
||||
u32 execute_pipeline_dependencies(struct context* ctx, instanced_draw_config_t* instance_config = nullptr) const;
|
||||
u32 execute_pipeline_dependencies(struct context* ctx) const;
|
||||
|
||||
const draw_range_t& get_range() const
|
||||
{
|
||||
|
@ -18,7 +18,7 @@ namespace rsx
|
||||
// NOTE: Push buffers still behave like register writes.
|
||||
// You do not need to specify each attribute for each vertex, the register is referenced instead.
|
||||
// This is classic OpenGL 1.x behavior as I remember.
|
||||
RSX(ctx)->GRAPH_frontend().append_to_push_buffer(attrib_index, count, channel_select, vtype, value);
|
||||
RSX(ctx)->append_to_push_buffer(attrib_index, count, channel_select, vtype, value);
|
||||
}
|
||||
|
||||
auto& info = REGS(ctx)->register_vertex_info[attrib_index];
|
||||
|
@ -30,7 +30,7 @@ namespace rsx
|
||||
REGS(ctx)->transform_constants[load + constant_id][subreg] = arg;
|
||||
}
|
||||
|
||||
void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span<const u32>& args, const std::function<bool(context*, u32, u32)>& notify)
|
||||
void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span<const u32>& args)
|
||||
{
|
||||
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
|
||||
const u32 constant_id = index / 4;
|
||||
@ -40,15 +40,8 @@ namespace rsx
|
||||
auto dst = ®S(ctx)->transform_constants[load + constant_id][subreg];
|
||||
copy_data_swap_u32(dst, args.data(), ::size32(args));
|
||||
|
||||
// Notify
|
||||
const u32 last_constant_id = ((reg + ::size32(args) + 3) - NV4097_SET_TRANSFORM_CONSTANT) / 4; // Aligned div
|
||||
const u32 load_index = load + constant_id;
|
||||
const u32 load_count = last_constant_id - constant_id;
|
||||
|
||||
if (!notify || !notify(ctx, load_index, load_count))
|
||||
{
|
||||
RSX(ctx)->patch_transform_constants(ctx, load_index, load_count);
|
||||
}
|
||||
RSX(ctx)->patch_transform_constants(ctx, load + constant_id, last_constant_id - constant_id);
|
||||
}
|
||||
|
||||
void set_transform_constant::impl(context* ctx, u32 reg, [[maybe_unused]] u32 arg)
|
||||
@ -263,15 +256,15 @@ namespace rsx
|
||||
{
|
||||
if (RSX(ctx)->in_begin_end)
|
||||
{
|
||||
RSX(ctx)->GRAPH_frontend().append_array_element(arg & 0xFFFF);
|
||||
RSX(ctx)->GRAPH_frontend().append_array_element(arg >> 16);
|
||||
RSX(ctx)->append_array_element(arg & 0xFFFF);
|
||||
RSX(ctx)->append_array_element(arg >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
void set_array_element32(context* ctx, u32, u32 arg)
|
||||
{
|
||||
if (RSX(ctx)->in_begin_end)
|
||||
RSX(ctx)->GRAPH_frontend().append_array_element(arg);
|
||||
RSX(ctx)->append_array_element(arg);
|
||||
}
|
||||
|
||||
void draw_arrays(context* /*rsx*/, u32 /*reg*/, u32 arg)
|
||||
@ -360,8 +353,8 @@ namespace rsx
|
||||
// Check if we have immediate mode vertex data in a driver-local buffer
|
||||
if (REGS(ctx)->current_draw_clause.command == rsx::draw_command::none)
|
||||
{
|
||||
const u32 push_buffer_vertices_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_vertex_count();
|
||||
const u32 push_buffer_index_count = RSX(ctx)->GRAPH_frontend().get_push_buffer_index_count();
|
||||
const u32 push_buffer_vertices_count = RSX(ctx)->get_push_buffer_vertex_count();
|
||||
const u32 push_buffer_index_count = RSX(ctx)->get_push_buffer_index_count();
|
||||
|
||||
// Need to set this flag since it overrides some register contents
|
||||
REGS(ctx)->current_draw_clause.is_immediate_draw = true;
|
||||
@ -393,12 +386,6 @@ namespace rsx
|
||||
return;
|
||||
}
|
||||
|
||||
// Notify the backend if the drawing style changes (instanced vs non-instanced)
|
||||
if (REGS(ctx)->current_draw_clause.is_trivial_instanced_draw != RSX(ctx)->is_current_vertex_program_instanced())
|
||||
{
|
||||
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::xform_instancing_state_dirty;
|
||||
}
|
||||
|
||||
RSX(ctx)->end();
|
||||
}
|
||||
else
|
||||
|
@ -204,7 +204,7 @@ namespace rsx
|
||||
|
||||
static void decode_one(context* ctx, u32 reg, u32 arg);
|
||||
|
||||
static void batch_decode(context* ctx, u32 reg, const std::span<const u32>& args, const std::function<bool(context*, u32, u32)>& notify = {});
|
||||
static void batch_decode(context* ctx, u32 reg, const std::span<const u32>& args);
|
||||
};
|
||||
|
||||
struct set_transform_program
|
||||
|
@ -269,7 +269,6 @@ public:
|
||||
|
||||
struct
|
||||
{
|
||||
// Configuration properties (in)
|
||||
u16 in_register_mask = 0;
|
||||
|
||||
u16 common_access_sampler_mask = 0;
|
||||
@ -277,7 +276,6 @@ public:
|
||||
u16 redirected_sampler_mask = 0;
|
||||
u16 multisampled_sampler_mask = 0;
|
||||
|
||||
// Decoded properties (out)
|
||||
bool has_lit_op = false;
|
||||
bool has_gather_op = false;
|
||||
bool has_no_output = false;
|
||||
|
@ -262,11 +262,6 @@ namespace glsl
|
||||
}
|
||||
}
|
||||
|
||||
if (props.require_instanced_render)
|
||||
{
|
||||
enabled_options.push_back("_ENABLE_INSTANCED_CONSTANTS");
|
||||
}
|
||||
|
||||
// Import vertex header
|
||||
program_common::define_glsl_switches(OS, enabled_options);
|
||||
|
||||
|
@ -55,23 +55,4 @@ vec4 apply_zclip_xform(
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_ENABLE_INSTANCED_CONSTANTS)
|
||||
// Workaround for GL vs VK builtin variable naming
|
||||
#ifdef VULKAN
|
||||
#define _gl_InstanceID gl_InstanceIndex
|
||||
#else
|
||||
#define _gl_InstanceID gl_InstanceID
|
||||
#endif
|
||||
|
||||
vec4 _fetch_constant(const in int base_offset)
|
||||
{
|
||||
// Get virtual draw/instance id. Normally will be 1:1 based on instance index
|
||||
const int indirection_offset = (_gl_InstanceID * CONSTANTS_ARRAY_LENGTH) + base_offset;
|
||||
const int corrected_offset = constants_addressing_lookup[indirection_offset];
|
||||
return instanced_constants_array[corrected_offset];
|
||||
}
|
||||
#else
|
||||
#define _fetch_constant(x) vc[x]
|
||||
#endif
|
||||
|
||||
)"
|
||||
|
@ -22,7 +22,6 @@ namespace glsl
|
||||
// Applicable in vertex stage
|
||||
bool require_lit_emulation : 1;
|
||||
bool require_explicit_invariance : 1;
|
||||
bool require_instanced_render : 1;
|
||||
bool emulate_zclip_transform : 1;
|
||||
bool emulate_depth_clip_only : 1;
|
||||
|
||||
|
@ -341,7 +341,6 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
usz vertex_program_storage_hash::operator()(const RSXVertexProgram &program) const
|
||||
{
|
||||
usz hash = vertex_program_utils::get_vertex_program_ucode_hash(program);
|
||||
hash ^= program.ctrl;
|
||||
hash ^= program.output_mask;
|
||||
hash ^= program.texture_state.texture_dimensions;
|
||||
hash ^= program.texture_state.multisampled_textures;
|
||||
@ -352,8 +351,6 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
|
||||
{
|
||||
if (binary1.output_mask != binary2.output_mask)
|
||||
return false;
|
||||
if (binary1.ctrl != binary2.ctrl)
|
||||
return false;
|
||||
if (binary1.texture_state != binary2.texture_state)
|
||||
return false;
|
||||
if (binary1.data.size() != binary2.data.size())
|
||||
|
@ -131,7 +131,7 @@ std::string VertexProgramDecompiler::GetSRC(const u32 n)
|
||||
m_parr.AddParam(PF_PARAM_UNIFORM, float4, std::string("vc[468]"));
|
||||
properties.has_indexed_constants |= !!d3.index_const;
|
||||
m_constant_ids.insert(static_cast<u16>(d1.const_src));
|
||||
fmt::append(ret, "_fetch_constant(%u%s)", d1.const_src, (d3.index_const ? " + " + AddAddrReg() : ""));
|
||||
ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]";
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -362,13 +362,14 @@ std::string VertexProgramDecompiler::NotZeroPositive(const std::string& code)
|
||||
std::string VertexProgramDecompiler::BuildCode()
|
||||
{
|
||||
std::string main_body;
|
||||
for (int i = 0, lvl = 1; i < static_cast<int>(m_instr_count); i++)
|
||||
for (uint i = 0, lvl = 1; i < m_instr_count; i++)
|
||||
{
|
||||
lvl = std::max<int>(lvl - m_instructions[i].close_scopes, 0);
|
||||
|
||||
lvl -= m_instructions[i].close_scopes;
|
||||
if (lvl < 1) lvl = 1;
|
||||
for (int j = 0; j < m_instructions[i].put_close_scopes; ++j)
|
||||
{
|
||||
if (lvl > 1) --lvl;
|
||||
--lvl;
|
||||
if (lvl < 1) lvl = 1;
|
||||
main_body.append(lvl, '\t') += "}\n";
|
||||
}
|
||||
|
||||
@ -379,8 +380,6 @@ std::string VertexProgramDecompiler::BuildCode()
|
||||
lvl++;
|
||||
}
|
||||
|
||||
ensure(lvl >= 0); // Underflow of indent level will cause crashes!!
|
||||
|
||||
for (const auto& instruction_body : m_instructions[i].body)
|
||||
{
|
||||
main_body.append(lvl, '\t') += instruction_body + "\n";
|
||||
@ -410,7 +409,7 @@ std::string VertexProgramDecompiler::BuildCode()
|
||||
{
|
||||
const auto i = offset++;
|
||||
if (i == index) continue; // Replace with self
|
||||
reloc_table.emplace_back(fmt::format("_fetch_constant(%d)", index), fmt::format("_fetch_constant(%d)", i));
|
||||
reloc_table.emplace_back(fmt::format("vc[%d]", index), fmt::format("vc[%d]", i));
|
||||
}
|
||||
|
||||
// One-time patch
|
||||
|
@ -132,10 +132,6 @@ protected:
|
||||
public:
|
||||
struct
|
||||
{
|
||||
// Configuration properties (in)
|
||||
// None
|
||||
|
||||
// Decoded properties (out)
|
||||
bool has_lit_op = false;
|
||||
bool has_indexed_constants = false;
|
||||
}
|
||||
|
@ -409,13 +409,12 @@ namespace rsx
|
||||
}
|
||||
};
|
||||
|
||||
const auto element_push_buffer = render->draw_processor()->element_push_buffer();
|
||||
if (index_size == 4)
|
||||
{
|
||||
if (!element_push_buffer.empty()) [[unlikely]]
|
||||
if (!render->element_push_buffer.empty()) [[unlikely]]
|
||||
{
|
||||
// Indices provided via immediate mode
|
||||
re_evaluate(reinterpret_cast<const std::byte*>(element_push_buffer.data()), u32{});
|
||||
re_evaluate(reinterpret_cast<const std::byte*>(render->element_push_buffer.data()), u32{});
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -425,10 +424,10 @@ namespace rsx
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!element_push_buffer.empty()) [[unlikely]]
|
||||
if (!render->element_push_buffer.empty()) [[unlikely]]
|
||||
{
|
||||
// Indices provided via immediate mode
|
||||
re_evaluate(reinterpret_cast<const std::byte*>(element_push_buffer.data()), u16{});
|
||||
re_evaluate(reinterpret_cast<const std::byte*>(render->element_push_buffer.data()), u16{});
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -620,12 +619,12 @@ namespace rsx
|
||||
|
||||
ar(rsx::method_registers);
|
||||
|
||||
for (auto& v : m_draw_processor.m_vertex_push_buffers)
|
||||
for (auto& v : vertex_push_buffers)
|
||||
{
|
||||
ar(v.attr, v.size, v.type, v.vertex_count, v.dword_count, v.data);
|
||||
}
|
||||
|
||||
ar(m_draw_processor.m_element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout);
|
||||
ar(element_push_buffer, fifo_ret_addr, saved_fifo_ret, zcull_surface_active, m_surface_info, m_depth_surface_info, m_framebuffer_layout);
|
||||
ar(dma_address, iomap_table, restore_point, tiles, zculls, display_buffers, display_buffers_count, current_display_buffer);
|
||||
ar(enable_second_vhandler, requested_vsync);
|
||||
ar(device_addr, label_addr, main_mem_size, local_mem_size, rsx_event_port, driver_info);
|
||||
@ -697,8 +696,6 @@ namespace rsx
|
||||
s_ctx.rsxthr = this;
|
||||
m_ctx = &s_ctx;
|
||||
|
||||
m_draw_processor.init(m_ctx);
|
||||
|
||||
if (g_cfg.misc.use_native_interface && (g_cfg.video.renderer == video_renderer::opengl || g_cfg.video.renderer == video_renderer::vulkan))
|
||||
{
|
||||
m_overlay_manager = g_fxo->init<rsx::overlays::display_manager>(0);
|
||||
@ -804,6 +801,39 @@ namespace rsx
|
||||
in_begin_end = true;
|
||||
}
|
||||
|
||||
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
|
||||
{
|
||||
if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute)))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Enforce ATTR0 as vertex attribute for push buffers.
|
||||
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||
const auto vertex_id = vertex_push_buffers[0].get_vertex_id();
|
||||
vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
|
||||
m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
|
||||
}
|
||||
|
||||
u32 thread::get_push_buffer_vertex_count() const
|
||||
{
|
||||
// Enforce ATTR0 as vertex attribute for push buffers.
|
||||
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||
return vertex_push_buffers[0].vertex_count;
|
||||
}
|
||||
|
||||
void thread::append_array_element(u32 index)
|
||||
{
|
||||
// Endianness is swapped because common upload code expects input in BE
|
||||
// TODO: Implement fast upload path for LE inputs and do away with this
|
||||
element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
|
||||
}
|
||||
|
||||
u32 thread::get_push_buffer_index_count() const
|
||||
{
|
||||
return ::size32(element_push_buffer);
|
||||
}
|
||||
|
||||
void thread::end()
|
||||
{
|
||||
if (capture_current_frame)
|
||||
@ -820,7 +850,20 @@ namespace rsx
|
||||
m_eng_interrupt_mask |= rsx::backend_interrupt;
|
||||
ROP_sync_timestamp = rsx::get_shared_tag();
|
||||
|
||||
m_draw_processor.clear_push_buffers();
|
||||
if (m_graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
|
||||
{
|
||||
for (auto& push_buf : vertex_push_buffers)
|
||||
{
|
||||
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
|
||||
//rsx::method_registers.register_vertex_info[index].size = 0;
|
||||
|
||||
push_buf.clear();
|
||||
}
|
||||
|
||||
m_graphics_state.clear(rsx::pipeline_state::push_buffer_arrays_dirty);
|
||||
}
|
||||
|
||||
element_push_buffer.clear();
|
||||
|
||||
zcull_ctrl->on_draw();
|
||||
|
||||
@ -1154,6 +1197,180 @@ namespace rsx
|
||||
state += cpu_flag::exit;
|
||||
}
|
||||
|
||||
void thread::fill_scale_offset_data(void *buffer, bool flip_y) const
|
||||
{
|
||||
int clip_w = rsx::method_registers.surface_clip_width();
|
||||
int clip_h = rsx::method_registers.surface_clip_height();
|
||||
|
||||
float scale_x = rsx::method_registers.viewport_scale_x() / (clip_w / 2.f);
|
||||
float offset_x = rsx::method_registers.viewport_offset_x() - (clip_w / 2.f);
|
||||
offset_x /= clip_w / 2.f;
|
||||
|
||||
float scale_y = rsx::method_registers.viewport_scale_y() / (clip_h / 2.f);
|
||||
float offset_y = (rsx::method_registers.viewport_offset_y() - (clip_h / 2.f));
|
||||
offset_y /= clip_h / 2.f;
|
||||
if (flip_y) scale_y *= -1;
|
||||
if (flip_y) offset_y *= -1;
|
||||
|
||||
float scale_z = rsx::method_registers.viewport_scale_z();
|
||||
float offset_z = rsx::method_registers.viewport_offset_z();
|
||||
float one = 1.f;
|
||||
|
||||
utils::stream_vector(buffer, std::bit_cast<u32>(scale_x), 0, 0, std::bit_cast<u32>(offset_x));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 16, 0, std::bit_cast<u32>(scale_y), 0, std::bit_cast<u32>(offset_y));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 32, 0, 0, std::bit_cast<u32>(scale_z), std::bit_cast<u32>(offset_z));
|
||||
utils::stream_vector(static_cast<char*>(buffer) + 48, 0, 0, 0, std::bit_cast<u32>(one));
|
||||
}
|
||||
|
||||
void thread::fill_user_clip_data(void *buffer) const
|
||||
{
|
||||
const rsx::user_clip_plane_op clip_plane_control[6] =
|
||||
{
|
||||
rsx::method_registers.clip_plane_0_enabled(),
|
||||
rsx::method_registers.clip_plane_1_enabled(),
|
||||
rsx::method_registers.clip_plane_2_enabled(),
|
||||
rsx::method_registers.clip_plane_3_enabled(),
|
||||
rsx::method_registers.clip_plane_4_enabled(),
|
||||
rsx::method_registers.clip_plane_5_enabled(),
|
||||
};
|
||||
|
||||
u8 data_block[64];
|
||||
s32* clip_enabled_flags = reinterpret_cast<s32*>(data_block);
|
||||
f32* clip_distance_factors = reinterpret_cast<f32*>(data_block + 32);
|
||||
|
||||
for (int index = 0; index < 6; ++index)
|
||||
{
|
||||
switch (clip_plane_control[index])
|
||||
{
|
||||
default:
|
||||
rsx_log.error("bad clip plane control (0x%x)", static_cast<u8>(clip_plane_control[index]));
|
||||
[[fallthrough]];
|
||||
|
||||
case rsx::user_clip_plane_op::disable:
|
||||
clip_enabled_flags[index] = 0;
|
||||
clip_distance_factors[index] = 0.f;
|
||||
break;
|
||||
|
||||
case rsx::user_clip_plane_op::greater_or_equal:
|
||||
clip_enabled_flags[index] = 1;
|
||||
clip_distance_factors[index] = 1.f;
|
||||
break;
|
||||
|
||||
case rsx::user_clip_plane_op::less_than:
|
||||
clip_enabled_flags[index] = 1;
|
||||
clip_distance_factors[index] = -1.f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(buffer, data_block, 2 * 8 * sizeof(u32));
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill buffer with vertex program constants.
|
||||
* Buffer must be at least 512 float4 wide.
|
||||
*/
|
||||
void thread::fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table)
|
||||
{
|
||||
if (!reloc_table.empty()) [[ likely ]]
|
||||
{
|
||||
char* dst = reinterpret_cast<char*>(buffer);
|
||||
for (const auto& index : reloc_table)
|
||||
{
|
||||
utils::stream_vector_from_memory(dst, &rsx::method_registers.transform_constants[index]);
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(buffer, rsx::method_registers.transform_constants.data(), 468 * 4 * sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
void thread::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/)
|
||||
{
|
||||
ROP_control_t rop_control{};
|
||||
|
||||
if (rsx::method_registers.alpha_test_enabled())
|
||||
{
|
||||
const u32 alpha_func = static_cast<u32>(rsx::method_registers.alpha_func());
|
||||
rop_control.set_alpha_test_func(alpha_func);
|
||||
rop_control.enable_alpha_test();
|
||||
}
|
||||
|
||||
if (rsx::method_registers.polygon_stipple_enabled())
|
||||
{
|
||||
rop_control.enable_polygon_stipple();
|
||||
}
|
||||
|
||||
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !backend_config.supports_hw_a2c)
|
||||
{
|
||||
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
|
||||
// Alpha values generate a coverage mask for order independent blending
|
||||
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
|
||||
// Simulated using combined alpha blend and alpha test
|
||||
rop_control.enable_alpha_to_coverage();
|
||||
if (rsx::method_registers.msaa_sample_mask())
|
||||
{
|
||||
rop_control.enable_MSAA_writes();
|
||||
}
|
||||
|
||||
// Sample configuration bits
|
||||
switch (rsx::method_registers.surface_antialias())
|
||||
{
|
||||
case rsx::surface_antialiasing::center_1_sample:
|
||||
break;
|
||||
case rsx::surface_antialiasing::diagonal_centered_2_samples:
|
||||
rop_control.set_msaa_control(1u);
|
||||
break;
|
||||
default:
|
||||
rop_control.set_msaa_control(3u);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const f32 fog0 = rsx::method_registers.fog_params_0();
|
||||
const f32 fog1 = rsx::method_registers.fog_params_1();
|
||||
const u32 fog_mode = static_cast<u32>(rsx::method_registers.fog_equation());
|
||||
|
||||
// Check if framebuffer is actually an XRGB format and not a WZYX format
|
||||
switch (rsx::method_registers.surface_color())
|
||||
{
|
||||
case rsx::surface_color_format::w16z16y16x16:
|
||||
case rsx::surface_color_format::w32z32y32x32:
|
||||
case rsx::surface_color_format::x32:
|
||||
// These behave very differently from "normal" formats.
|
||||
break;
|
||||
default:
|
||||
// Integer framebuffer formats.
|
||||
rop_control.enable_framebuffer_INT();
|
||||
|
||||
// Check if we want sRGB conversion.
|
||||
if (rsx::method_registers.framebuffer_srgb_enabled())
|
||||
{
|
||||
rop_control.enable_framebuffer_sRGB();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Generate wpos coefficients
|
||||
// wpos equation is now as follows:
|
||||
// wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0)
|
||||
// wpos.x = (frag_coord / resolution_scale)
|
||||
// wpos.zw = frag_coord.zw
|
||||
|
||||
const auto window_origin = rsx::method_registers.shader_window_origin();
|
||||
const u32 window_height = rsx::method_registers.shader_window_height();
|
||||
const f32 resolution_scale = (window_height <= static_cast<u32>(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale();
|
||||
const f32 wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale);
|
||||
const f32 wpos_bias = (window_origin == rsx::window_origin::top) ? 0.f : window_height;
|
||||
const f32 alpha_ref = rsx::method_registers.alpha_ref();
|
||||
|
||||
u32 *dst = static_cast<u32*>(buffer);
|
||||
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control.value, std::bit_cast<u32>(alpha_ref));
|
||||
utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
|
||||
}
|
||||
|
||||
u64 thread::timestamp()
|
||||
{
|
||||
const u64 freq = sys_time_get_timebase_frequency();
|
||||
@ -1192,6 +1409,51 @@ namespace rsx
|
||||
return t + timestamp_subvalue;
|
||||
}
|
||||
|
||||
std::span<const std::byte> thread::get_raw_index_array(const draw_clause& draw_indexed_clause) const
|
||||
{
|
||||
if (!element_push_buffer.empty()) [[ unlikely ]]
|
||||
{
|
||||
// Indices provided via immediate mode
|
||||
return {reinterpret_cast<const std::byte*>(element_push_buffer.data()), ::narrow<u32>(element_push_buffer.size() * sizeof(u32))};
|
||||
}
|
||||
|
||||
const rsx::index_array_type type = rsx::method_registers.index_type();
|
||||
const u32 type_size = get_index_type_size(type);
|
||||
|
||||
// Force aligned indices as realhw
|
||||
const u32 address = (0 - type_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location());
|
||||
|
||||
const u32 first = draw_indexed_clause.min_index();
|
||||
const u32 count = draw_indexed_clause.get_elements_count();
|
||||
|
||||
const auto ptr = vm::_ptr<const std::byte>(address);
|
||||
return { ptr + first * type_size, count * type_size };
|
||||
}
|
||||
|
||||
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
||||
thread::get_draw_command(const rsx::rsx_state& state) const
|
||||
{
|
||||
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) [[ likely ]]
|
||||
{
|
||||
return draw_indexed_array_command
|
||||
{
|
||||
get_raw_index_array(state.current_draw_clause)
|
||||
};
|
||||
}
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
|
||||
{
|
||||
return draw_array_command{};
|
||||
}
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
return draw_inlined_array{};
|
||||
}
|
||||
|
||||
fmt::throw_exception("ill-formed draw command");
|
||||
}
|
||||
|
||||
void thread::do_local_task(FIFO::state state)
|
||||
{
|
||||
m_eng_interrupt_mask.clear(rsx::backend_interrupt);
|
||||
@ -1987,17 +2249,6 @@ namespace rsx
|
||||
|
||||
void thread::get_current_vertex_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count>& sampler_descriptors)
|
||||
{
|
||||
if (m_graphics_state.test(rsx::pipeline_state::xform_instancing_state_dirty))
|
||||
{
|
||||
current_vertex_program.ctrl = 0;
|
||||
if (rsx::method_registers.current_draw_clause.is_trivial_instanced_draw)
|
||||
{
|
||||
current_vertex_program.ctrl |= RSX_SHADER_CONTROL_INSTANCED_CONSTANTS;
|
||||
}
|
||||
|
||||
m_graphics_state.clear(rsx::pipeline_state::xform_instancing_state_dirty);
|
||||
}
|
||||
|
||||
if (!m_graphics_state.test(rsx::pipeline_state::vertex_program_dirty))
|
||||
{
|
||||
return;
|
||||
@ -2005,6 +2256,7 @@ namespace rsx
|
||||
|
||||
ensure(!m_graphics_state.test(rsx::pipeline_state::vertex_program_ucode_dirty));
|
||||
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
|
||||
current_vertex_program.ctrl = 0; // Reserved
|
||||
|
||||
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
|
||||
{
|
||||
@ -2027,6 +2279,183 @@ namespace rsx
|
||||
current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask);
|
||||
}
|
||||
|
||||
void thread::analyse_inputs_interleaved(vertex_input_layout& result)
|
||||
{
|
||||
const rsx_state& state = rsx::method_registers;
|
||||
const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask;
|
||||
|
||||
result.clear();
|
||||
result.attribute_mask = static_cast<u16>(input_mask);
|
||||
|
||||
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
interleaved_range_info& info = *result.alloc_interleaved_block();
|
||||
info.interleaved = true;
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
auto &vinfo = state.vertex_arrays_info[index];
|
||||
result.attribute_placement[index] = attribute_buffer_placement::none;
|
||||
|
||||
if (vinfo.size() > 0)
|
||||
{
|
||||
// Stride must be updated even if the stream is disabled
|
||||
info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size());
|
||||
info.locations.push_back({ index, false, 1 });
|
||||
|
||||
if (input_mask & (1u << index))
|
||||
{
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
}
|
||||
else if (state.register_vertex_info[index].size > 0 && input_mask & (1u << index))
|
||||
{
|
||||
// Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
}
|
||||
|
||||
if (info.attribute_stride)
|
||||
{
|
||||
// At least one array feed must be enabled for vertex input
|
||||
result.interleaved_blocks.push_back(&info);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||
result.interleaved_blocks.reserve(16);
|
||||
result.referenced_registers.reserve(16);
|
||||
|
||||
for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1)
|
||||
{
|
||||
ensure(index < rsx::limits::vertex_count);
|
||||
|
||||
if (!(ref_mask & 1u))
|
||||
{
|
||||
// Nothing to do, uninitialized
|
||||
continue;
|
||||
}
|
||||
|
||||
// Always reset attribute placement by default
|
||||
result.attribute_placement[index] = attribute_buffer_placement::none;
|
||||
|
||||
// Check for interleaving
|
||||
if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
|
||||
rsx::method_registers.current_draw_clause.command != rsx::draw_command::indexed)
|
||||
{
|
||||
// NOTE: In immediate rendering mode, all vertex setup is ignored
|
||||
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
|
||||
if (vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
// Ensure consistent number of vertices per attribute.
|
||||
vertex_push_buffers[index].pad_to(vertex_push_buffers[0].vertex_count, false);
|
||||
|
||||
// Read temp buffer (register array)
|
||||
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
|
||||
result.volatile_blocks.push_back(volatile_range_info);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
else if (state.register_vertex_info[index].size > 0)
|
||||
{
|
||||
// Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
}
|
||||
|
||||
// Fall back to the default register value if no source is specified via register
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& info = state.vertex_arrays_info[index];
|
||||
if (!info.size())
|
||||
{
|
||||
if (state.register_vertex_info[index].size > 0)
|
||||
{
|
||||
//Reads from register
|
||||
result.referenced_registers.push_back(index);
|
||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result.attribute_placement[index] = attribute_buffer_placement::persistent;
|
||||
const u32 base_address = info.offset() & 0x7fffffff;
|
||||
bool alloc_new_block = true;
|
||||
bool modulo = !!(frequency_divider_mask & (1 << index));
|
||||
|
||||
for (auto &block : result.interleaved_blocks)
|
||||
{
|
||||
if (block->single_vertex)
|
||||
{
|
||||
//Single vertex definition, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
if (block->attribute_stride != info.stride())
|
||||
{
|
||||
//Stride does not match, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
if (base_address > block->base_offset)
|
||||
{
|
||||
const u32 diff = base_address - block->base_offset;
|
||||
if (diff > info.stride())
|
||||
{
|
||||
//Not interleaved, continue
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 diff = block->base_offset - base_address;
|
||||
if (diff > info.stride())
|
||||
{
|
||||
//Not interleaved, continue
|
||||
continue;
|
||||
}
|
||||
|
||||
//Matches, and this address is lower than existing
|
||||
block->base_offset = base_address;
|
||||
}
|
||||
|
||||
alloc_new_block = false;
|
||||
block->locations.push_back({ index, modulo, info.frequency() });
|
||||
block->interleaved = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (alloc_new_block)
|
||||
{
|
||||
interleaved_range_info& block = *result.alloc_interleaved_block();
|
||||
block.base_offset = base_address;
|
||||
block.attribute_stride = info.stride();
|
||||
block.memory_location = info.offset() >> 31;
|
||||
block.locations.reserve(16);
|
||||
block.locations.push_back({ index, modulo, info.frequency() });
|
||||
|
||||
if (block.attribute_stride == 0)
|
||||
{
|
||||
block.single_vertex = true;
|
||||
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
|
||||
result.interleaved_blocks.push_back(&block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &info : result.interleaved_blocks)
|
||||
{
|
||||
//Calculate real data address to be used during upload
|
||||
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
|
||||
}
|
||||
}
|
||||
|
||||
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
|
||||
{
|
||||
if (!m_graphics_state.test(rsx::pipeline_state::fragment_program_dirty))
|
||||
@ -2347,6 +2776,267 @@ namespace rsx
|
||||
return std::make_pair(persistent_memory_size, volatile_memory_size);
|
||||
}
|
||||
|
||||
void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base)
|
||||
{
|
||||
std::array<s32, 16> offset_in_block = {};
|
||||
u32 volatile_offset = volatile_offset_base;
|
||||
u32 persistent_offset = persistent_offset_base;
|
||||
|
||||
//NOTE: Order is important! Transient ayout is always push_buffers followed by register data
|
||||
if (rsx::method_registers.current_draw_clause.is_immediate_draw)
|
||||
{
|
||||
for (const auto &info : layout.volatile_blocks)
|
||||
{
|
||||
offset_in_block[info.first] = volatile_offset;
|
||||
volatile_offset += info.second;
|
||||
}
|
||||
}
|
||||
|
||||
for (u8 index : layout.referenced_registers)
|
||||
{
|
||||
offset_in_block[index] = volatile_offset;
|
||||
volatile_offset += 16;
|
||||
}
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto &block = layout.interleaved_blocks[0];
|
||||
u32 inline_data_offset = volatile_offset;
|
||||
for (const auto& attrib : block->locations)
|
||||
{
|
||||
auto &info = rsx::method_registers.vertex_arrays_info[attrib.index];
|
||||
|
||||
offset_in_block[attrib.index] = inline_data_offset;
|
||||
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto &block : layout.interleaved_blocks)
|
||||
{
|
||||
for (const auto& attrib : block->locations)
|
||||
{
|
||||
const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
|
||||
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
|
||||
}
|
||||
|
||||
const auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
persistent_offset += block->attribute_stride * range.second;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the data
|
||||
// Each descriptor field is 64 bits wide
|
||||
// [0-8] attribute stride
|
||||
// [8-24] attribute divisor
|
||||
// [24-27] attribute type
|
||||
// [27-30] attribute size
|
||||
// [30-31] reserved
|
||||
// [31-60] starting offset
|
||||
// [60-21] swap bytes flag
|
||||
// [61-22] volatile flag
|
||||
// [62-63] modulo enable flag
|
||||
|
||||
const s32 default_frequency_mask = (1 << 8);
|
||||
const s32 swap_storage_mask = (1 << 29);
|
||||
const s32 volatile_storage_mask = (1 << 30);
|
||||
const s32 modulo_op_frequency_mask = smin;
|
||||
|
||||
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||
const auto max_index = (first_vertex + vertex_count) - 1;
|
||||
|
||||
for (u16 ref_mask = current_vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1)
|
||||
{
|
||||
if (!(ref_mask & 1u))
|
||||
{
|
||||
// Unused input, ignore this
|
||||
continue;
|
||||
}
|
||||
|
||||
if (layout.attribute_placement[index] == attribute_buffer_placement::none)
|
||||
{
|
||||
static constexpr u64 zero = 0;
|
||||
std::memcpy(buffer + index * 2, &zero, sizeof(zero));
|
||||
continue;
|
||||
}
|
||||
|
||||
rsx::vertex_base_type type = {};
|
||||
s32 size = 0;
|
||||
s32 attrib0 = 0;
|
||||
s32 attrib1 = 0;
|
||||
|
||||
if (layout.attribute_placement[index] == attribute_buffer_placement::transient)
|
||||
{
|
||||
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto &info = rsx::method_registers.vertex_arrays_info[index];
|
||||
|
||||
if (!info.size())
|
||||
{
|
||||
// Register
|
||||
const auto& reginfo = rsx::method_registers.register_vertex_info[index];
|
||||
type = reginfo.type;
|
||||
size = reginfo.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Array
|
||||
type = info.type();
|
||||
size = info.size();
|
||||
|
||||
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Data is either from an immediate render or register input
|
||||
// Immediate data overrides register input
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.is_immediate_draw &&
|
||||
vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
// Push buffer
|
||||
const auto &info = vertex_push_buffers[index];
|
||||
type = info.type;
|
||||
size = info.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Register
|
||||
const auto& info = rsx::method_registers.register_vertex_info[index];
|
||||
type = info.type;
|
||||
size = info.size;
|
||||
|
||||
attrib0 = rsx::get_vertex_type_size_on_host(type, size);
|
||||
}
|
||||
}
|
||||
|
||||
attrib1 |= volatile_storage_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto &info = rsx::method_registers.vertex_arrays_info[index];
|
||||
type = info.type();
|
||||
size = info.size();
|
||||
|
||||
auto stride = info.stride();
|
||||
attrib0 = stride;
|
||||
|
||||
if (stride > 0) //when stride is 0, input is not an array but a single element
|
||||
{
|
||||
const u32 frequency = info.frequency();
|
||||
switch (frequency)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
{
|
||||
attrib0 |= default_frequency_mask;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
if (modulo_mask & (1 << index))
|
||||
{
|
||||
if (max_index >= frequency)
|
||||
{
|
||||
// Only set modulo mask if a modulo op is actually necessary!
|
||||
// This requires that the uploaded range for this attr = [0, freq-1]
|
||||
// Ignoring modulo op if the rendered range does not wrap allows for range optimization
|
||||
attrib0 |= (frequency << 8);
|
||||
attrib1 |= modulo_op_frequency_mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
attrib0 |= default_frequency_mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Division
|
||||
attrib0 |= (frequency << 8);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} //end attribute placement check
|
||||
|
||||
// Special compressed 4 components into one 4-byte value. Decoded as one value.
|
||||
if (type == rsx::vertex_base_type::cmp)
|
||||
{
|
||||
size = 1;
|
||||
}
|
||||
|
||||
// All data is passed in in PS3-native order (BE) so swap flag should be set
|
||||
attrib1 |= swap_storage_mask;
|
||||
attrib0 |= (static_cast<s32>(type) << 24);
|
||||
attrib0 |= (size << 27);
|
||||
attrib1 |= offset_in_block[index];
|
||||
|
||||
buffer[index * 2 + 0] = attrib0;
|
||||
buffer[index * 2 + 1] = attrib1;
|
||||
}
|
||||
}
|
||||
|
||||
void thread::write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data)
|
||||
{
|
||||
auto transient = static_cast<char*>(volatile_data);
|
||||
auto persistent = static_cast<char*>(persistent_data);
|
||||
|
||||
auto &draw_call = rsx::method_registers.current_draw_clause;
|
||||
|
||||
if (transient != nullptr)
|
||||
{
|
||||
if (draw_call.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
for (const u8 index : layout.referenced_registers)
|
||||
{
|
||||
memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16);
|
||||
transient += 16;
|
||||
}
|
||||
|
||||
memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32));
|
||||
//Is it possible to reference data outside of the inlined array?
|
||||
return;
|
||||
}
|
||||
|
||||
//NOTE: Order is important! Transient layout is always push_buffers followed by register data
|
||||
if (draw_call.is_immediate_draw)
|
||||
{
|
||||
//NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory
|
||||
for (const auto &info : layout.volatile_blocks)
|
||||
{
|
||||
memcpy(transient, vertex_push_buffers[info.first].data.data(), info.second);
|
||||
transient += info.second;
|
||||
}
|
||||
}
|
||||
|
||||
for (const u8 index : layout.referenced_registers)
|
||||
{
|
||||
memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16);
|
||||
transient += 16;
|
||||
}
|
||||
}
|
||||
|
||||
if (persistent != nullptr)
|
||||
{
|
||||
for (interleaved_range_info* block : layout.interleaved_blocks)
|
||||
{
|
||||
auto range = block->calculate_required_range(first_vertex, vertex_count);
|
||||
|
||||
const u32 data_size = range.second * block->attribute_stride;
|
||||
const u32 vertex_base = range.first * block->attribute_stride;
|
||||
|
||||
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
|
||||
persistent += data_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void thread::flip(const display_flip_info_t& info)
|
||||
{
|
||||
m_eng_interrupt_mask.clear(rsx::display_interrupt);
|
||||
@ -3006,7 +3696,7 @@ namespace rsx
|
||||
|
||||
u32 thread::get_load()
|
||||
{
|
||||
// Average load over around 30 frames
|
||||
//Average load over around 30 frames
|
||||
if (!performance_counters.last_update_timestamp || performance_counters.sampled_frames > 30)
|
||||
{
|
||||
const auto timestamp = get_system_time();
|
||||
|
@ -28,8 +28,6 @@
|
||||
#include "Emu/IdManager.h"
|
||||
|
||||
#include "Core/RSXDisplay.h"
|
||||
#include "Core/RSXDrawCommands.h"
|
||||
#include "Core/RSXDriverState.h"
|
||||
#include "Core/RSXFrameBuffer.h"
|
||||
#include "Core/RSXContext.h"
|
||||
#include "Core/RSXIOMap.hpp"
|
||||
@ -61,6 +59,52 @@ namespace rsx
|
||||
context_clear_all = context_clear_color | context_clear_depth
|
||||
};
|
||||
|
||||
enum pipeline_state : u32
|
||||
{
|
||||
fragment_program_ucode_dirty = (1 << 0), // Fragment program ucode changed
|
||||
vertex_program_ucode_dirty = (1 << 1), // Vertex program ucode changed
|
||||
fragment_program_state_dirty = (1 << 2), // Fragment program state changed
|
||||
vertex_program_state_dirty = (1 << 3), // Vertex program state changed
|
||||
fragment_state_dirty = (1 << 4), // Fragment state changed (alpha test, etc)
|
||||
vertex_state_dirty = (1 << 5), // Vertex state changed (scale_offset, clip planes, etc)
|
||||
transform_constants_dirty = (1 << 6), // Transform constants changed
|
||||
fragment_constants_dirty = (1 << 7), // Fragment constants changed
|
||||
framebuffer_reads_dirty = (1 << 8), // Framebuffer contents changed
|
||||
fragment_texture_state_dirty = (1 << 9), // Fragment texture parameters changed
|
||||
vertex_texture_state_dirty = (1 << 10), // Fragment texture parameters changed
|
||||
scissor_config_state_dirty = (1 << 11), // Scissor region changed
|
||||
zclip_config_state_dirty = (1 << 12), // Viewport Z clip changed
|
||||
|
||||
scissor_setup_invalid = (1 << 13), // Scissor configuration is broken
|
||||
scissor_setup_clipped = (1 << 14), // Scissor region is cropped by viewport constraint
|
||||
|
||||
polygon_stipple_pattern_dirty = (1 << 15), // Rasterizer stippling pattern changed
|
||||
line_stipple_pattern_dirty = (1 << 16), // Line stippling pattern changed
|
||||
|
||||
push_buffer_arrays_dirty = (1 << 17), // Push buffers have data written to them (immediate mode vertex buffers)
|
||||
|
||||
polygon_offset_state_dirty = (1 << 18), // Polygon offset config was changed
|
||||
depth_bounds_state_dirty = (1 << 19), // Depth bounds configuration changed
|
||||
|
||||
pipeline_config_dirty = (1 << 20), // Generic pipeline configuration changes. Shader peek hint.
|
||||
|
||||
rtt_config_dirty = (1 << 21), // Render target configuration changed
|
||||
rtt_config_contested = (1 << 22), // Render target configuration is indeterminate
|
||||
rtt_config_valid = (1 << 23), // Render target configuration is valid
|
||||
rtt_cache_state_dirty = (1 << 24), // Texture cache state is indeterminate
|
||||
|
||||
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
|
||||
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
|
||||
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
|
||||
invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty,
|
||||
memory_barrier_bits = framebuffer_reads_dirty,
|
||||
|
||||
// Vulkan-specific signals
|
||||
invalidate_vk_dynamic_state = zclip_config_state_dirty | scissor_config_state_dirty | polygon_offset_state_dirty | depth_bounds_state_dirty,
|
||||
|
||||
all_dirty = ~0u
|
||||
};
|
||||
|
||||
enum eng_interrupt_reason : u32
|
||||
{
|
||||
backend_interrupt = 0x0001, // Backend-related interrupt
|
||||
@ -117,6 +161,8 @@ namespace rsx
|
||||
void cpu_task() override;
|
||||
protected:
|
||||
|
||||
std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
|
||||
|
||||
s32 m_skip_frame_ctr = 0;
|
||||
bool skip_current_frame = false;
|
||||
|
||||
@ -171,9 +217,6 @@ namespace rsx
|
||||
// Host DMA
|
||||
std::unique_ptr<RSXDMAWriter> m_host_dma_ctrl;
|
||||
|
||||
// Draw call management
|
||||
draw_command_processor m_draw_processor;
|
||||
|
||||
public:
|
||||
atomic_t<u64> new_get_put = u64{umax};
|
||||
u32 restore_point = 0;
|
||||
@ -182,7 +225,7 @@ namespace rsx
|
||||
atomic_t<u32> external_interrupt_lock{ 0 };
|
||||
atomic_t<bool> external_interrupt_ack{ false };
|
||||
atomic_t<u32> is_initialized{0};
|
||||
|
||||
rsx::simple_array<u32> element_push_buffer;
|
||||
bool is_fifo_idle() const;
|
||||
void flush_fifo();
|
||||
|
||||
@ -225,8 +268,6 @@ namespace rsx
|
||||
void capture_frame(const std::string& name);
|
||||
const backend_configuration& get_backend_config() const { return backend_config; }
|
||||
|
||||
const draw_command_processor* draw_processor() const { return &m_draw_processor; }
|
||||
|
||||
public:
|
||||
shared_ptr<named_thread<ppu_thread>> intr_thread;
|
||||
|
||||
@ -260,6 +301,11 @@ namespace rsx
|
||||
void get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout);
|
||||
bool get_scissor(areau& region, bool clip_viewport);
|
||||
|
||||
/**
|
||||
* Analyze vertex inputs and group all interleaved blocks
|
||||
*/
|
||||
void analyse_inputs_interleaved(vertex_input_layout&);
|
||||
|
||||
RSXVertexProgram current_vertex_program = {};
|
||||
RSXFragmentProgram current_fragment_program = {};
|
||||
|
||||
@ -378,6 +424,21 @@ namespace rsx
|
||||
virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload);
|
||||
virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; }
|
||||
|
||||
std::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
|
||||
|
||||
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
||||
get_draw_command(const rsx::rsx_state& state) const;
|
||||
|
||||
/**
|
||||
* Immediate mode rendering requires a temp push buffer to hold attrib values
|
||||
* Appends a value to the push buffer (currently only supports 32-wide types)
|
||||
*/
|
||||
void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value);
|
||||
u32 get_push_buffer_vertex_count() const;
|
||||
|
||||
void append_array_element(u32 index);
|
||||
u32 get_push_buffer_index_count() const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
@ -387,6 +448,17 @@ namespace rsx
|
||||
*/
|
||||
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count);
|
||||
|
||||
/**
|
||||
* Generates vertex input descriptors as an array of 16x4 s32s
|
||||
*/
|
||||
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
|
||||
|
||||
/**
|
||||
* Uploads vertex data described in the layout descriptor
|
||||
* Copies from local memory to the write-only output buffers provided in a sequential manner
|
||||
*/
|
||||
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
|
||||
|
||||
void evaluate_cpu_usage_reduction_limits();
|
||||
|
||||
private:
|
||||
@ -396,8 +468,29 @@ namespace rsx
|
||||
void handle_invalidated_memory_range();
|
||||
|
||||
public:
|
||||
/**
|
||||
* Fill buffer with 4x4 scale offset matrix.
|
||||
* Vertex shader's position is to be multiplied by this matrix.
|
||||
* if flip_y is set, the matrix is modified to use d3d convention.
|
||||
*/
|
||||
void fill_scale_offset_data(void *buffer, bool flip_y) const;
|
||||
|
||||
draw_command_processor& GRAPH_frontend() { return m_draw_processor; }
|
||||
/**
|
||||
* Fill buffer with user clip information
|
||||
*/
|
||||
void fill_user_clip_data(void *buffer) const;
|
||||
|
||||
/**
|
||||
* Fill buffer with vertex program constants.
|
||||
* Relocation table allows to do a partial fill with only selected registers.
|
||||
*/
|
||||
void fill_vertex_program_constants_data(void* buffer, const std::span<const u16>& reloc_table);
|
||||
|
||||
/**
|
||||
* Fill buffer with fragment rasterization state.
|
||||
* Fills current fog values, alpha test parameters and texture scaling parameters
|
||||
*/
|
||||
void fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& fragment_program);
|
||||
|
||||
/**
|
||||
* Notify that a section of memory has been mapped
|
||||
@ -424,17 +517,9 @@ namespace rsx
|
||||
*/
|
||||
virtual void on_semaphore_acquire_wait() {}
|
||||
|
||||
/**
|
||||
* Load an image from memory with optional scaling and rotation.
|
||||
* Returns false to tell the HW decoder to perform the operation on the CPU as a fallback when the operation cannot be safely accelerated.
|
||||
*/
|
||||
virtual bool scaled_image_from_memory(const blit_src_info& /*src_info*/, const blit_dst_info& /*dst_info*/, bool /*interpolate*/) { return false; }
|
||||
|
||||
|
||||
// Program public "get" handlers
|
||||
virtual std::pair<std::string, std::string> get_programs() const { return std::make_pair("", ""); }
|
||||
|
||||
bool is_current_vertex_program_instanced() const { return !!(current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS); }
|
||||
virtual bool scaled_image_from_memory(const blit_src_info& /*src_info*/, const blit_dst_info& /*dst_info*/, bool /*interpolate*/) { return false; }
|
||||
|
||||
public:
|
||||
void reset();
|
||||
|
@ -730,7 +730,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||
|
||||
if (state_flags & rsx::vertex_arrays_changed)
|
||||
{
|
||||
m_draw_processor.analyse_inputs_interleaved(m_vertex_layout, current_vp_metadata);
|
||||
analyse_inputs_interleaved(m_vertex_layout);
|
||||
}
|
||||
else if (state_flags & rsx::vertex_base_changed)
|
||||
{
|
||||
@ -929,11 +929,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
{
|
||||
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, draw_call.pass_count(), 0, 0);
|
||||
}
|
||||
else if (draw_call.is_single_draw())
|
||||
if (draw_call.is_single_draw())
|
||||
{
|
||||
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
|
||||
}
|
||||
@ -955,13 +951,10 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||
|
||||
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
|
||||
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, draw_call.pass_count(), 0, 0, 0);
|
||||
}
|
||||
else if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0, 0);
|
||||
const u32 index_count = upload_info.vertex_draw_count;
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1059,10 +1052,7 @@ void VKGSRender::end()
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
// Apply write memory barriers
|
||||
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil))
|
||||
{
|
||||
ds->write_barrier(*m_current_command_buffer);
|
||||
}
|
||||
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) ds->write_barrier(*m_current_command_buffer);
|
||||
|
||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||
{
|
||||
@ -1121,19 +1111,12 @@ void VKGSRender::end()
|
||||
m_current_command_buffer->flags |= vk::command_buffer::cb_reload_dynamic_state;
|
||||
}
|
||||
|
||||
auto& draw_call = rsx::method_registers.current_draw_clause;
|
||||
draw_call.begin();
|
||||
rsx::method_registers.current_draw_clause.begin();
|
||||
do
|
||||
{
|
||||
emit_geometry(sub_index++);
|
||||
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
{
|
||||
// We already completed. End the draw.
|
||||
draw_call.end();
|
||||
}
|
||||
}
|
||||
while (draw_call.next());
|
||||
while (rsx::method_registers.current_draw_clause.next());
|
||||
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render)
|
||||
{
|
||||
|
@ -477,22 +477,6 @@ namespace
|
||||
|
||||
idx++;
|
||||
|
||||
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[idx].descriptorCount = 1;
|
||||
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[idx].binding = binding_table.instancing_lookup_table_bind_slot;
|
||||
bindings[idx].pImmutableSamplers = nullptr;
|
||||
|
||||
idx++;
|
||||
|
||||
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[idx].descriptorCount = 1;
|
||||
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[idx].binding = binding_table.instancing_constants_buffer_slot;
|
||||
bindings[idx].pImmutableSamplers = nullptr;
|
||||
|
||||
idx++;
|
||||
|
||||
for (auto binding = binding_table.textures_first_bind_slot;
|
||||
binding < binding_table.vertex_textures_first_bind_slot;
|
||||
binding++)
|
||||
@ -659,7 +643,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
|
||||
{ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) },
|
||||
|
||||
// Conditional rendering predicate slot; refactor to allow skipping this when not needed
|
||||
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 }
|
||||
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 }
|
||||
};
|
||||
m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls);
|
||||
|
||||
@ -677,7 +661,6 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
|
||||
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
|
||||
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
|
||||
m_raster_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "raster env buffer");
|
||||
m_instancing_buffer_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "instancing data buffer");
|
||||
|
||||
const auto shadermode = g_cfg.video.shadermode.get();
|
||||
|
||||
@ -966,7 +949,6 @@ VKGSRender::~VKGSRender()
|
||||
m_vertex_instructions_buffer.destroy();
|
||||
m_fragment_instructions_buffer.destroy();
|
||||
m_raster_env_ring_info.destroy();
|
||||
m_instancing_buffer_ring_info.destroy();
|
||||
|
||||
// Fallback bindables
|
||||
null_buffer.reset();
|
||||
@ -1304,8 +1286,7 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||
m_fragment_constants_ring_info.is_critical() ||
|
||||
m_transform_constants_ring_info.is_critical() ||
|
||||
m_index_buffer_ring_info.is_critical() ||
|
||||
m_raster_env_ring_info.is_critical() ||
|
||||
m_instancing_buffer_ring_info.is_critical();
|
||||
m_raster_env_ring_info.is_critical();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1337,9 +1318,7 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||
heap_critical = m_vertex_layout_ring_info.is_critical();
|
||||
break;
|
||||
case VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE:
|
||||
heap_critical = (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)
|
||||
? m_instancing_buffer_ring_info.is_critical()
|
||||
: m_transform_constants_ring_info.is_critical();
|
||||
heap_critical = m_transform_constants_ring_info.is_critical();
|
||||
break;
|
||||
case VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE:
|
||||
heap_critical = m_fragment_constants_ring_info.is_critical();
|
||||
@ -1382,7 +1361,6 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||
m_attrib_ring_info.reset_allocation_stats();
|
||||
m_texture_upload_buffer_ring_info.reset_allocation_stats();
|
||||
m_raster_env_ring_info.reset_allocation_stats();
|
||||
m_instancing_buffer_ring_info.reset_allocation_stats();
|
||||
m_current_frame->reset_heap_ptrs();
|
||||
m_last_heap_sync_time = rsx::get_shared_tag();
|
||||
}
|
||||
@ -2152,7 +2130,6 @@ void VKGSRender::load_program_env()
|
||||
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
|
||||
const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program));
|
||||
const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty));
|
||||
const bool update_instancing_data = rsx::method_registers.current_draw_clause.is_trivial_instanced_draw;
|
||||
|
||||
if (update_vertex_env)
|
||||
{
|
||||
@ -2162,8 +2139,8 @@ void VKGSRender::load_program_env()
|
||||
const auto mem = m_vertex_env_ring_info.alloc<256>(256);
|
||||
auto buf = static_cast<u8*>(m_vertex_env_ring_info.map(mem, 148));
|
||||
|
||||
m_draw_processor.fill_scale_offset_data(buf, false);
|
||||
m_draw_processor.fill_user_clip_data(buf + 64);
|
||||
fill_scale_offset_data(buf, false);
|
||||
fill_user_clip_data(buf + 64);
|
||||
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
|
||||
*(reinterpret_cast<f32*>(buf + 132)) = rsx::method_registers.point_size() * rsx::get_resolution_scale();
|
||||
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.clip_min();
|
||||
@ -2173,32 +2150,7 @@ void VKGSRender::load_program_env()
|
||||
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 144 };
|
||||
}
|
||||
|
||||
if (update_instancing_data)
|
||||
{
|
||||
// Combines transform load + instancing lookup table
|
||||
const auto alignment = m_device->gpu().get_limits().minStorageBufferOffsetAlignment;
|
||||
usz indirection_table_offset = 0;
|
||||
usz constants_data_table_offset = 0;
|
||||
|
||||
rsx::io_buffer indirection_table_buf([&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
indirection_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
|
||||
return std::make_pair(m_instancing_buffer_ring_info.map(indirection_table_offset, size), size);
|
||||
});
|
||||
|
||||
rsx::io_buffer constants_array_buf([&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
constants_data_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
|
||||
return std::make_pair(m_instancing_buffer_ring_info.map(constants_data_table_offset, size), size);
|
||||
});
|
||||
|
||||
m_draw_processor.fill_constants_instancing_buffer(indirection_table_buf, constants_array_buf, *m_vertex_prog);
|
||||
m_instancing_buffer_ring_info.unmap();
|
||||
|
||||
m_instancing_indirection_buffer_info = { m_instancing_buffer_ring_info.heap->value, indirection_table_offset, indirection_table_buf.size() };
|
||||
m_instancing_constants_array_buffer_info = { m_instancing_buffer_ring_info.heap->value, constants_data_table_offset, constants_array_buf.size() };
|
||||
}
|
||||
else if (update_transform_constants)
|
||||
if (update_transform_constants)
|
||||
{
|
||||
// Transform constants
|
||||
usz mem_offset = 0;
|
||||
@ -2248,7 +2200,7 @@ void VKGSRender::load_program_env()
|
||||
auto mem = m_fragment_env_ring_info.alloc<256>(256);
|
||||
auto buf = m_fragment_env_ring_info.map(mem, 32);
|
||||
|
||||
m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program);
|
||||
fill_fragment_state_buffer(buf, current_fragment_program);
|
||||
m_fragment_env_ring_info.unmap();
|
||||
m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, mem, 32 };
|
||||
}
|
||||
@ -2343,24 +2295,13 @@ void VKGSRender::load_program_env()
|
||||
m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)
|
||||
{
|
||||
m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
||||
m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
// Clear flags
|
||||
u32 handled_flags = rsx::pipeline_state::fragment_state_dirty |
|
||||
m_graphics_state.clear(
|
||||
rsx::pipeline_state::fragment_state_dirty |
|
||||
rsx::pipeline_state::vertex_state_dirty |
|
||||
rsx::pipeline_state::transform_constants_dirty |
|
||||
rsx::pipeline_state::fragment_constants_dirty |
|
||||
rsx::pipeline_state::fragment_texture_state_dirty;
|
||||
|
||||
if (!update_instancing_data)
|
||||
{
|
||||
handled_flags |= rsx::pipeline_state::transform_constants_dirty;
|
||||
}
|
||||
|
||||
m_graphics_state.clear(handled_flags);
|
||||
rsx::pipeline_state::fragment_texture_state_dirty);
|
||||
}
|
||||
|
||||
void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
|
||||
@ -2376,7 +2317,7 @@ void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
|
||||
const auto constant_ids = (transform_constants_size == 8192)
|
||||
? std::span<const u16>{}
|
||||
: std::span<const u16>(m_vertex_prog->constant_ids);
|
||||
m_draw_processor.fill_vertex_program_constants_data(buf, constant_ids);
|
||||
fill_vertex_program_constants_data(buf, constant_ids);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2419,14 +2360,8 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
|
||||
const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
|
||||
auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
|
||||
|
||||
m_draw_processor.fill_vertex_layout_state(
|
||||
m_vertex_layout,
|
||||
current_vp_metadata,
|
||||
vertex_info.first_vertex,
|
||||
vertex_info.allocated_vertex_count,
|
||||
static_cast<s32*>(dst),
|
||||
vertex_info.persistent_window_offset,
|
||||
vertex_info.volatile_window_offset);
|
||||
fill_vertex_layout_state(m_vertex_layout, vertex_info.first_vertex, vertex_info.allocated_vertex_count, static_cast<s32*>(dst),
|
||||
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
|
||||
|
||||
m_vertex_layout_ring_info.unmap();
|
||||
}
|
||||
@ -2547,8 +2482,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||
m_index_buffer_ring_info.is_dirty() ||
|
||||
m_transform_constants_ring_info.is_dirty() ||
|
||||
m_texture_upload_buffer_ring_info.is_dirty() ||
|
||||
m_raster_env_ring_info.is_dirty() ||
|
||||
m_instancing_buffer_ring_info.is_dirty())
|
||||
m_raster_env_ring_info.is_dirty())
|
||||
{
|
||||
auto secondary_command_buffer = m_secondary_cb_list.next();
|
||||
secondary_command_buffer->begin();
|
||||
@ -2563,7 +2497,6 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||
m_transform_constants_ring_info.sync(*secondary_command_buffer);
|
||||
m_texture_upload_buffer_ring_info.sync(*secondary_command_buffer);
|
||||
m_raster_env_ring_info.sync(*secondary_command_buffer);
|
||||
m_instancing_buffer_ring_info.sync(*secondary_command_buffer);
|
||||
|
||||
secondary_command_buffer->end();
|
||||
|
||||
|
@ -149,7 +149,6 @@ private:
|
||||
vk::data_heap m_index_buffer_ring_info; // Index data
|
||||
vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap
|
||||
vk::data_heap m_raster_env_ring_info; // Raster control such as polygon and line stipple
|
||||
vk::data_heap m_instancing_buffer_ring_info; // Instanced rendering data (constants indirection table + instanced constants)
|
||||
|
||||
vk::data_heap m_fragment_instructions_buffer;
|
||||
vk::data_heap m_vertex_instructions_buffer;
|
||||
@ -161,8 +160,6 @@ private:
|
||||
VkDescriptorBufferInfo m_fragment_constants_buffer_info {};
|
||||
VkDescriptorBufferInfo m_fragment_texture_params_buffer_info {};
|
||||
VkDescriptorBufferInfo m_raster_env_buffer_info {};
|
||||
VkDescriptorBufferInfo m_instancing_indirection_buffer_info {};
|
||||
VkDescriptorBufferInfo m_instancing_constants_array_buffer_info{};
|
||||
|
||||
VkDescriptorBufferInfo m_vertex_instructions_buffer_info {};
|
||||
VkDescriptorBufferInfo m_fragment_instructions_buffer_info {};
|
||||
|
@ -197,7 +197,6 @@ namespace vk
|
||||
s64 index_heap_ptr = 0;
|
||||
s64 texture_upload_heap_ptr = 0;
|
||||
s64 rasterizer_env_heap_ptr = 0;
|
||||
s64 instancing_heap_ptr = 0;
|
||||
|
||||
u64 last_frame_sync_time = 0;
|
||||
|
||||
@ -219,7 +218,6 @@ namespace vk
|
||||
index_heap_ptr = other.index_heap_ptr;
|
||||
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
|
||||
rasterizer_env_heap_ptr = other.rasterizer_env_heap_ptr;
|
||||
instancing_heap_ptr = other.instancing_heap_ptr;
|
||||
}
|
||||
|
||||
// Exchange storage (non-copyable)
|
||||
@ -231,7 +229,7 @@ namespace vk
|
||||
void tag_frame_end(
|
||||
s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc,
|
||||
s64 fragtex_loc, s64 fragconst_loc, s64 vtxconst_loc, s64 index_loc,
|
||||
s64 texture_loc, s64 rasterizer_loc, s64 instancing_loc)
|
||||
s64 texture_loc, s64 rasterizer_loc)
|
||||
{
|
||||
attrib_heap_ptr = attrib_loc;
|
||||
vtx_env_heap_ptr = vtxenv_loc;
|
||||
@ -243,7 +241,6 @@ namespace vk
|
||||
index_heap_ptr = index_loc;
|
||||
texture_upload_heap_ptr = texture_loc;
|
||||
rasterizer_env_heap_ptr = rasterizer_loc;
|
||||
instancing_heap_ptr = instancing_loc;
|
||||
|
||||
last_frame_sync_time = rsx::get_shared_tag();
|
||||
}
|
||||
|
@ -163,8 +163,7 @@ void VKGSRender::advance_queued_frames()
|
||||
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
|
||||
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
|
||||
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(),
|
||||
m_raster_env_ring_info.get_current_put_pos_minus_one(),
|
||||
m_instancing_buffer_ring_info.get_current_put_pos_minus_one());
|
||||
m_raster_env_ring_info.get_current_put_pos_minus_one());
|
||||
|
||||
m_queued_frames.push_back(m_current_frame);
|
||||
ensure(m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES);
|
||||
@ -267,8 +266,6 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx)
|
||||
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
|
||||
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
|
||||
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
|
||||
m_raster_env_ring_info.m_get_pos = ctx->rasterizer_env_heap_ptr;
|
||||
m_instancing_buffer_ring_info.m_get_pos = ctx->instancing_heap_ptr;
|
||||
|
||||
m_attrib_ring_info.notify();
|
||||
m_vertex_env_ring_info.notify();
|
||||
@ -279,8 +276,6 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx)
|
||||
m_fragment_texture_params_ring_info.notify();
|
||||
m_index_buffer_ring_info.notify();
|
||||
m_texture_upload_buffer_ring_info.notify();
|
||||
m_raster_env_ring_info.notify();
|
||||
m_instancing_buffer_ring_info.notify();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -217,7 +217,7 @@ namespace
|
||||
vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||
{
|
||||
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
|
||||
auto result = std::visit(visitor, m_draw_processor.get_draw_command(rsx::method_registers));
|
||||
auto result = std::visit(visitor, get_draw_command(rsx::method_registers));
|
||||
|
||||
const u32 vertex_count = (result.max_index - result.min_index) + 1;
|
||||
u32 vertex_base = result.min_index;
|
||||
@ -294,7 +294,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||
const usz volatile_offset_in_block = volatile_offset - persistent_offset;
|
||||
|
||||
void *block_mapping = m_attrib_ring_info.map(persistent_offset, block_size);
|
||||
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast<char*>(block_mapping) + volatile_offset_in_block);
|
||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, static_cast<char*>(block_mapping) + volatile_offset_in_block);
|
||||
m_attrib_ring_info.unmap();
|
||||
}
|
||||
else
|
||||
@ -302,14 +302,14 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||
if (required.first > 0 && persistent_offset != umax)
|
||||
{
|
||||
void *persistent_mapping = m_attrib_ring_info.map(persistent_offset, required.first);
|
||||
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr);
|
||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr);
|
||||
m_attrib_ring_info.unmap();
|
||||
}
|
||||
|
||||
if (required.second > 0)
|
||||
{
|
||||
void *volatile_mapping = m_attrib_ring_info.map(volatile_offset, required.second);
|
||||
m_draw_processor.write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping);
|
||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping);
|
||||
m_attrib_ring_info.unmap();
|
||||
}
|
||||
}
|
||||
|
@ -32,34 +32,31 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
||||
OS << "#version 450\n\n";
|
||||
OS << "#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||
|
||||
OS <<
|
||||
"layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n"
|
||||
"{\n"
|
||||
" mat4 scale_offset_mat;\n"
|
||||
" ivec4 user_clip_enabled[2];\n"
|
||||
" vec4 user_clip_factor[2];\n"
|
||||
" uint transform_branch_bits;\n"
|
||||
" float point_size;\n"
|
||||
" float z_near;\n"
|
||||
" float z_far;\n"
|
||||
"};\n\n";
|
||||
OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " mat4 scale_offset_mat;\n";
|
||||
OS << " ivec4 user_clip_enabled[2];\n";
|
||||
OS << " vec4 user_clip_factor[2];\n";
|
||||
OS << " uint transform_branch_bits;\n";
|
||||
OS << " float point_size;\n";
|
||||
OS << " float z_near;\n";
|
||||
OS << " float z_far;\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
if (m_device_props.emulate_conditional_rendering)
|
||||
{
|
||||
OS <<
|
||||
"layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n"
|
||||
"{\n"
|
||||
" uint conditional_rendering_predicate;\n"
|
||||
"};\n\n";
|
||||
OS << "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n";
|
||||
OS << "{\n";
|
||||
OS << " uint conditional_rendering_predicate;\n";
|
||||
OS << "};\n\n";
|
||||
}
|
||||
|
||||
OS <<
|
||||
"layout(push_constant) uniform VertexLayoutBuffer\n"
|
||||
"{\n"
|
||||
" uint vertex_base_index;\n"
|
||||
" uint vertex_index_offset;\n"
|
||||
" uint draw_id;\n"
|
||||
" uint layout_ptr_offset;\n";
|
||||
OS << "layout(push_constant) uniform VertexLayoutBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " uint vertex_base_index;\n";
|
||||
OS << " uint vertex_index_offset;\n";
|
||||
OS << " uint draw_id;\n";
|
||||
OS << " uint layout_ptr_offset;\n";
|
||||
|
||||
if (m_device_props.emulate_conditional_rendering)
|
||||
{
|
||||
@ -113,50 +110,18 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
|
||||
{
|
||||
if (PI.name.starts_with("vc["))
|
||||
{
|
||||
if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS))
|
||||
{
|
||||
OS << "layout(std140, set=0, binding=" << static_cast<int>(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " vec4 " << PI.name << ";\n";
|
||||
OS << "};\n\n";
|
||||
OS << "layout(std140, set=0, binding = " << static_cast<int>(m_binding_table.vertex_constant_buffers_bind_slot) << ") uniform VertexConstantsBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " vec4 " << PI.name << ";\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
in.location = m_binding_table.vertex_constant_buffers_bind_slot;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "VertexConstantsBuffer";
|
||||
in.type = vk::glsl::input_type_uniform_buffer;
|
||||
in.location = m_binding_table.vertex_constant_buffers_bind_slot;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "VertexConstantsBuffer";
|
||||
in.type = vk::glsl::input_type_uniform_buffer;
|
||||
|
||||
inputs.push_back(in);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 1. Bind indirection lookup buffer
|
||||
OS << "layout(std430, set=0, binding=" << static_cast<int>(m_binding_table.instancing_lookup_table_bind_slot) << ") readonly buffer InstancingData\n";
|
||||
OS << "{\n";
|
||||
OS << " int constants_addressing_lookup[];\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
in.location = m_binding_table.instancing_lookup_table_bind_slot;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "InstancingData";
|
||||
in.type = vk::glsl::input_type_storage_buffer;
|
||||
inputs.push_back(in);
|
||||
|
||||
// 2. Bind actual constants buffer
|
||||
OS << "layout(std430, set=0, binding=" << static_cast<int>(m_binding_table.instancing_constants_buffer_slot) << ") readonly buffer VertexConstantsBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " vec4 instanced_constants_array[];\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
OS << "#define CONSTANTS_ARRAY_LENGTH " << (properties.has_indexed_constants ? 468 : ::size32(m_constant_ids)) << "\n\n";
|
||||
|
||||
in.location = m_binding_table.instancing_constants_buffer_slot;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "VertexConstantsBuffer";
|
||||
in.type = vk::glsl::input_type_storage_buffer;
|
||||
inputs.push_back(in);
|
||||
continue;
|
||||
}
|
||||
inputs.push_back(in);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (PT.type == "sampler2D" ||
|
||||
@ -244,7 +209,6 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
properties2.emulate_depth_clip_only = vk::g_render_device->get_shader_types_support().allow_float64;
|
||||
properties2.low_precision_tests = vk::is_NVIDIA(vk::get_driver_vendor());
|
||||
properties2.require_explicit_invariance = (vk::is_NVIDIA(vk::get_driver_vendor()) && g_cfg.video.shader_precision != gpu_preset_level::low);
|
||||
properties2.require_instanced_render = !!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS);
|
||||
|
||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
||||
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_vulkan);
|
||||
|
@ -14,10 +14,8 @@ namespace vk
|
||||
u8 vertex_buffers_first_bind_slot = 5;
|
||||
u8 conditional_render_predicate_slot = 8;
|
||||
u8 rasterizer_env_bind_slot = 9;
|
||||
u8 instancing_lookup_table_bind_slot = 10;
|
||||
u8 instancing_constants_buffer_slot = 11;
|
||||
u8 textures_first_bind_slot = 12;
|
||||
u8 vertex_textures_first_bind_slot = 12; // Invalid, has to be initialized properly
|
||||
u8 textures_first_bind_slot = 10;
|
||||
u8 vertex_textures_first_bind_slot = 10; // Invalid, has to be initialized properly
|
||||
u8 total_descriptor_bindings = vertex_textures_first_bind_slot; // Invalid, has to be initialized properly
|
||||
};
|
||||
}
|
||||
|
@ -455,8 +455,7 @@ namespace gcm
|
||||
RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used??
|
||||
|
||||
// Custom
|
||||
RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000, // Rasterizing triangles and not lines or points
|
||||
RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x20000, // Support instance ID offsets when loading constants
|
||||
RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000 // Rasterizing triangles and not lines or points
|
||||
};
|
||||
|
||||
// GCM Reports
|
||||
|
@ -104,7 +104,6 @@
|
||||
<ClCompile Include="Emu\perf_monitor.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Core\RSXDrawCommands.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Host\MM.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
|
||||
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
|
||||
@ -620,8 +619,6 @@
|
||||
<ClInclude Include="Emu\RSX\Common\time.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXContext.h" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXDrawCommands.h" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXDriverState.h" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" />
|
||||
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" />
|
||||
|
@ -1315,9 +1315,6 @@
|
||||
<ClCompile Include="Emu\RSX\Host\MM.cpp">
|
||||
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Emu\RSX\Core\RSXDrawCommands.cpp">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Crypto\aes.h">
|
||||
@ -2653,12 +2650,6 @@
|
||||
<ClInclude Include="Emu\RSX\Host\MM.h">
|
||||
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Core\RSXDrawCommands.h">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Core\RSXDriverState.h">
|
||||
<Filter>Emu\GPU\RSX\Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\NP\fb_helpers.h">
|
||||
<Filter>Emu\NP</Filter>
|
||||
</ClInclude>
|
||||
|
Loading…
x
Reference in New Issue
Block a user