mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-21 00:39:53 +00:00
vulkan hotfix (#3046)
* Rework vertex attribute binding for vulkan. Allows always providing a buffer view to the pipeline even if the game has the attribute disabled as long as it is consumed by the vertex shader.
This commit is contained in:
parent
9747ab61f9
commit
df8fa74e2a
@ -468,7 +468,7 @@ D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList* comma
|
||||
return std::apply_visitor(
|
||||
draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(),
|
||||
[this](
|
||||
const auto& state, const auto& list) { return get_vertex_buffers(state, list); }),
|
||||
const auto& state, const auto& list) { return get_vertex_buffers(state, list, 0); }),
|
||||
get_draw_command(rsx::method_registers));
|
||||
}
|
||||
|
||||
|
@ -454,7 +454,7 @@ std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_b
|
||||
auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer,
|
||||
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
|
||||
[this](const auto& state, const auto& list) {
|
||||
return this->get_vertex_buffers(state, list);
|
||||
return this->get_vertex_buffers(state, list, 0);
|
||||
}),
|
||||
get_draw_command(rsx::method_registers));
|
||||
|
||||
|
@ -748,7 +748,7 @@ namespace rsx
|
||||
}
|
||||
|
||||
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
|
||||
thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges) const
|
||||
thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges, const u64 consumed_attrib_mask) const
|
||||
{
|
||||
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result;
|
||||
result.reserve(rsx::limits::vertex_count);
|
||||
@ -756,8 +756,10 @@ namespace rsx
|
||||
u32 input_mask = state.vertex_attrib_input_mask();
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
bool enabled = !!(input_mask & (1 << index));
|
||||
if (!enabled)
|
||||
const bool enabled = !!(input_mask & (1 << index));
|
||||
const bool consumed = !!(consumed_attrib_mask & (1ull << index));
|
||||
|
||||
if (!enabled && !consumed)
|
||||
continue;
|
||||
|
||||
if (state.vertex_arrays_info[index].size() > 0)
|
||||
|
@ -213,7 +213,7 @@ namespace rsx
|
||||
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
||||
|
||||
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
|
||||
get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
||||
get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges, const u64 consumed_attrib_mask) const;
|
||||
|
||||
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
||||
get_draw_command(const rsx::rsx_state& state) const;
|
||||
|
@ -102,8 +102,7 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS)
|
||||
|
||||
void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
||||
{
|
||||
int location = 0;
|
||||
|
||||
int location = TEXTURES_FIRST_BIND_SLOT;
|
||||
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
|
||||
{
|
||||
if (PT.type != "sampler1D" &&
|
||||
@ -142,7 +141,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
||||
|
||||
inputs.push_back(in);
|
||||
|
||||
OS << "layout(set=0, binding=" << 19 + location++ << ") uniform " << samplerType << " " << PI.name << ";\n";
|
||||
OS << "layout(set=0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n";
|
||||
}
|
||||
}
|
||||
|
||||
@ -169,7 +168,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
||||
OS << "};\n";
|
||||
|
||||
vk::glsl::program_input in;
|
||||
in.location = 1;
|
||||
in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT;
|
||||
in.domain = vk::glsl::glsl_fragment_program;
|
||||
in.name = "FragmentConstantsBuffer";
|
||||
in.type = vk::glsl::input_type_uniform_buffer;
|
||||
|
@ -1420,6 +1420,7 @@ namespace vk
|
||||
public:
|
||||
VkPipeline pipeline;
|
||||
u64 attribute_location_mask;
|
||||
u64 vertex_attributes_mask;
|
||||
|
||||
program(VkDevice dev, VkPipeline p, const std::vector<program_input> &vertex_input, const std::vector<program_input>& fragment_inputs);
|
||||
program(const program&) = delete;
|
||||
@ -1432,6 +1433,8 @@ namespace vk
|
||||
void bind_uniform(VkDescriptorImageInfo image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set);
|
||||
void bind_uniform(VkDescriptorBufferInfo buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set);
|
||||
void bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set);
|
||||
|
||||
u64 get_vertex_input_attributes_mask();
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@ namespace vk
|
||||
load_uniforms(glsl::program_domain::glsl_vertex_program, vertex_input);
|
||||
load_uniforms(glsl::program_domain::glsl_vertex_program, fragment_inputs);
|
||||
attribute_location_mask = 0;
|
||||
vertex_attributes_mask = 0;
|
||||
}
|
||||
|
||||
program::~program()
|
||||
@ -58,10 +59,10 @@ namespace vk
|
||||
descriptor_writer.pImageInfo = &image_descriptor;
|
||||
descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
descriptor_writer.dstArrayElement = 0;
|
||||
descriptor_writer.dstBinding = uniform.location + TEXTURES_FIRST_BIND_SLOT;
|
||||
descriptor_writer.dstBinding = uniform.location;
|
||||
|
||||
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
|
||||
attribute_location_mask |= (1ull << (uniform.location + TEXTURES_FIRST_BIND_SLOT));
|
||||
attribute_location_mask |= (1ull << uniform.location);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -97,15 +98,32 @@ namespace vk
|
||||
descriptor_writer.pTexelBufferView = &buffer_view;
|
||||
descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
||||
descriptor_writer.dstArrayElement = 0;
|
||||
descriptor_writer.dstBinding = uniform.location + VERTEX_BUFFERS_FIRST_BIND_SLOT;
|
||||
descriptor_writer.dstBinding = uniform.location;
|
||||
|
||||
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
|
||||
attribute_location_mask |= (1ull << (uniform.location + VERTEX_BUFFERS_FIRST_BIND_SLOT));
|
||||
attribute_location_mask |= (1ull << uniform.location);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_NOTICE(RSX, "vertex buffer not found in program: %s", binding_name.c_str());
|
||||
}
|
||||
|
||||
u64 program::get_vertex_input_attributes_mask()
|
||||
{
|
||||
if (vertex_attributes_mask)
|
||||
return vertex_attributes_mask;
|
||||
|
||||
for (auto &uniform : uniforms)
|
||||
{
|
||||
if (uniform.domain == program_domain::glsl_vertex_program &&
|
||||
uniform.type == program_input_type::input_type_texel_buffer)
|
||||
{
|
||||
vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT));
|
||||
}
|
||||
}
|
||||
|
||||
return vertex_attributes_mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -259,6 +259,9 @@ namespace
|
||||
|
||||
void operator()(const rsx::vertex_array_buffer& vertex_array)
|
||||
{
|
||||
if (!m_program->has_uniform(s_reg_table[vertex_array.index]))
|
||||
return;
|
||||
|
||||
// Fill vertex_array
|
||||
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
|
||||
u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size);
|
||||
@ -284,6 +287,9 @@ namespace
|
||||
|
||||
void operator()(const rsx::vertex_array_register& vertex_register)
|
||||
{
|
||||
if (!m_program->has_uniform(s_reg_table[vertex_register.index]))
|
||||
return;
|
||||
|
||||
size_t data_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size);
|
||||
const VkFormat format = vk::get_suitable_vk_format(vertex_register.type, vertex_register.attribute_size);
|
||||
|
||||
@ -316,11 +322,10 @@ namespace
|
||||
|
||||
void operator()(const rsx::empty_vertex_array& vbo)
|
||||
{
|
||||
size_t offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(32);
|
||||
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, 32);
|
||||
memset(dst, 0, 32);
|
||||
m_attrib_ring_info.unmap();
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, VK_FORMAT_R32_SFLOAT, offset_in_attrib_buffer, 32));
|
||||
if (!m_program->has_uniform(s_reg_table[vbo.index]))
|
||||
return;
|
||||
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vbo.index], descriptor_sets);
|
||||
}
|
||||
|
||||
@ -476,28 +481,35 @@ namespace
|
||||
for (int i = 0; i < vertex_buffers.size(); ++i)
|
||||
{
|
||||
const auto &vbo = vertex_buffers[i];
|
||||
bool can_multithread = false;
|
||||
|
||||
if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready())
|
||||
if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && rsxthr->vertex_upload_task_ready())
|
||||
{
|
||||
//vertex array buffer. We can thread this thing heavily
|
||||
const auto& v = vbo.get<rsx::vertex_array_buffer>();
|
||||
|
||||
u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
|
||||
u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
|
||||
if (v.attribute_size > 1)
|
||||
{
|
||||
can_multithread = true;
|
||||
|
||||
u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
|
||||
u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
|
||||
|
||||
u32 upload_size = real_element_size * vertex_count;
|
||||
size_t offset = m_attrib_ring_info.alloc<256>(upload_size);
|
||||
u32 upload_size = real_element_size * vertex_count;
|
||||
size_t offset = m_attrib_ring_info.alloc<256>(upload_size);
|
||||
|
||||
memory_allocations.push_back(offset);
|
||||
allocated_sizes.push_back(upload_size);
|
||||
upload_jobs.push_back(i);
|
||||
memory_allocations.push_back(offset);
|
||||
allocated_sizes.push_back(upload_size);
|
||||
upload_jobs.push_back(i);
|
||||
|
||||
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
|
||||
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
|
||||
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
if (!can_multithread)
|
||||
std::apply_visitor(visitor, vbo);
|
||||
}
|
||||
|
||||
@ -595,13 +607,16 @@ namespace
|
||||
sizeof(u32)) /
|
||||
stride;
|
||||
|
||||
for (int index = 0; index < rsx::limits::vertex_count; ++index) {
|
||||
for (int index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
auto& vertex_info = rsx::method_registers.vertex_arrays_info[index];
|
||||
|
||||
if (!m_program->has_uniform(s_reg_table[index])) continue;
|
||||
|
||||
if (!vertex_info.size()) // disabled
|
||||
{
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -624,8 +639,7 @@ namespace
|
||||
|
||||
// TODO: properly handle cmp type
|
||||
if (vertex_info.type() == rsx::vertex_base_type::cmp)
|
||||
LOG_ERROR(
|
||||
RSX, "Compressed vertex attributes not supported for inlined arrays yet");
|
||||
LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet");
|
||||
|
||||
switch (vertex_info.type())
|
||||
{
|
||||
@ -668,6 +682,6 @@ VKGSRender::upload_vertex_data()
|
||||
{
|
||||
draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program,
|
||||
descriptor_sets, m_buffer_view_to_clean,
|
||||
[this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range);}, this);
|
||||
[this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range, m_program->get_vertex_input_attributes_mask());}, this);
|
||||
return std::apply_visitor(visitor, get_draw_command(rsx::method_registers));
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
||||
OS << "};\n";
|
||||
|
||||
vk::glsl::program_input in;
|
||||
in.location = 0;
|
||||
in.location = SCALE_OFFSET_BIND_SLOT;
|
||||
in.domain = vk::glsl::glsl_vertex_program;
|
||||
in.name = "ScaleOffsetBuffer";
|
||||
in.type = vk::glsl::input_type_uniform_buffer;
|
||||
@ -64,7 +64,6 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
|
||||
|
||||
std::sort(input_data.begin(), input_data.end());
|
||||
|
||||
int location = 2;
|
||||
for (const std::tuple<size_t, std::string> item : input_data)
|
||||
{
|
||||
for (const ParamType &PT : inputs)
|
||||
@ -74,7 +73,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
|
||||
if (PI.name == std::get<1>(item))
|
||||
{
|
||||
vk::glsl::program_input in;
|
||||
in.location = location;
|
||||
in.location = (int)std::get<0>(item) + VERTEX_BUFFERS_FIRST_BIND_SLOT;
|
||||
in.domain = vk::glsl::glsl_vertex_program;
|
||||
in.name = PI.name + "_buffer";
|
||||
in.type = vk::glsl::input_type_texel_buffer;
|
||||
@ -92,7 +91,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
|
||||
}
|
||||
|
||||
std::string samplerType = is_int ? "isamplerBuffer" : "samplerBuffer";
|
||||
OS << "layout(set = 0, binding=" << 3 + location++ << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n";
|
||||
OS << "layout(set = 0, binding=" << in.location << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -108,17 +107,15 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
|
||||
OS << "};\n\n";
|
||||
|
||||
vk::glsl::program_input in;
|
||||
in.location = 1;
|
||||
in.location = VERTEX_CONSTANT_BUFFERS_BIND_SLOT;
|
||||
in.domain = vk::glsl::glsl_vertex_program;
|
||||
in.name = "VertexConstantsBuffer";
|
||||
in.type = vk::glsl::input_type_uniform_buffer;
|
||||
|
||||
inputs.push_back(in);
|
||||
|
||||
//We offset this value by the index of the first fragment texture (19) below
|
||||
//and allow 16 fragment textures to precede this slot
|
||||
int location = 16;
|
||||
|
||||
int location = VERTEX_TEXTURES_FIRST_BIND_SLOT;
|
||||
for (const ParamType &PT : constants)
|
||||
{
|
||||
for (const ParamItem &PI : PT.items)
|
||||
@ -137,7 +134,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
|
||||
|
||||
inputs.push_back(in);
|
||||
|
||||
OS << "layout(set = 0, binding=" << 19 + location++ << ") uniform " << PT.type << " " << PI.name << ";\n";
|
||||
OS << "layout(set = 0, binding=" << location++ << ") uniform " << PT.type << " " << PI.name << ";\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user