diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 8868b9a219..72e01918fd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -468,7 +468,7 @@ D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList* comma return std::apply_visitor( draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(), [this]( - const auto& state, const auto& list) { return get_vertex_buffers(state, list); }), + const auto& state, const auto& list) { return get_vertex_buffers(state, list, 0); }), get_draw_command(rsx::method_registers)); } diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 6c5c143519..e24b9b6559 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -454,7 +454,7 @@ std::tuple>> GLGSRender::set_vertex_b auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer, m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment, [this](const auto& state, const auto& list) { - return this->get_vertex_buffers(state, list); + return this->get_vertex_buffers(state, list, 0); }), get_draw_command(rsx::method_registers)); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 512c290940..09e8208af2 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -748,7 +748,7 @@ namespace rsx } std::vector> - thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges) const + thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges, const u64 consumed_attrib_mask) const { std::vector> result; result.reserve(rsx::limits::vertex_count); @@ -756,8 +756,10 @@ namespace rsx u32 input_mask = state.vertex_attrib_input_mask(); for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { - bool enabled = !!(input_mask & (1 << index)); - if (!enabled) + const bool enabled = !!(input_mask & (1 << index)); + const bool consumed = !!(consumed_attrib_mask & (1ull << index)); + + if (!enabled && !consumed) continue; if (state.vertex_arrays_info[index].size() > 0) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 9f3a12cdbf..86257d72af 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -213,7 +213,7 @@ namespace rsx gsl::span get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector>& vertex_ranges) const; std::vector> - get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges) const; + get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges, const u64 consumed_attrib_mask) const; std::variant get_draw_command(const rsx::rsx_state& state) const; diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 861b5e250e..7ddef3c22a 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -102,8 +102,7 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) { - int location = 0; - + int location = TEXTURES_FIRST_BIND_SLOT; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { if (PT.type != "sampler1D" && @@ -142,7 +141,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) inputs.push_back(in); - OS << "layout(set=0, binding=" << 19 + location++ << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set=0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n"; } } @@ -169,7 +168,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << "};\n"; vk::glsl::program_input in; - in.location = 1; + in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT; in.domain = vk::glsl::glsl_fragment_program; in.name = "FragmentConstantsBuffer"; in.type = vk::glsl::input_type_uniform_buffer; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 479172856c..93d6d2cb85 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -1420,6 +1420,7 @@ namespace vk public: VkPipeline pipeline; u64 attribute_location_mask; + u64 vertex_attributes_mask; program(VkDevice dev, VkPipeline p, const std::vector &vertex_input, const std::vector& fragment_inputs); program(const program&) = delete; @@ -1432,6 +1433,8 @@ namespace vk void bind_uniform(VkDescriptorImageInfo image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set); void bind_uniform(VkDescriptorBufferInfo buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set); void bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set); + + u64 get_vertex_input_attributes_mask(); }; } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 50118456f6..2b836f38f8 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -11,6 +11,7 @@ namespace vk load_uniforms(glsl::program_domain::glsl_vertex_program, vertex_input); load_uniforms(glsl::program_domain::glsl_vertex_program, fragment_inputs); attribute_location_mask = 0; + vertex_attributes_mask = 0; } program::~program() @@ -58,10 +59,10 @@ namespace vk descriptor_writer.pImageInfo = &image_descriptor; descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; descriptor_writer.dstArrayElement = 0; - descriptor_writer.dstBinding = uniform.location + TEXTURES_FIRST_BIND_SLOT; + descriptor_writer.dstBinding = uniform.location; vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); - attribute_location_mask |= (1ull << (uniform.location + TEXTURES_FIRST_BIND_SLOT)); + attribute_location_mask |= (1ull << uniform.location); return; } } @@ -97,15 +98,32 @@ namespace vk descriptor_writer.pTexelBufferView = &buffer_view; descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; descriptor_writer.dstArrayElement = 0; - descriptor_writer.dstBinding = uniform.location + VERTEX_BUFFERS_FIRST_BIND_SLOT; + descriptor_writer.dstBinding = uniform.location; vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); - attribute_location_mask |= (1ull << (uniform.location + VERTEX_BUFFERS_FIRST_BIND_SLOT)); + attribute_location_mask |= (1ull << uniform.location); return; } } LOG_NOTICE(RSX, "vertex buffer not found in program: %s", binding_name.c_str()); } + + u64 program::get_vertex_input_attributes_mask() + { + if (vertex_attributes_mask) + return vertex_attributes_mask; + + for (auto &uniform : uniforms) + { + if (uniform.domain == program_domain::glsl_vertex_program && + uniform.type == program_input_type::input_type_texel_buffer) + { + vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT)); + } + } + + return vertex_attributes_mask; + } } } diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 2f50fe2cdd..8c89beb2de 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -259,6 +259,9 @@ namespace void operator()(const rsx::vertex_array_buffer& vertex_array) { + if (!m_program->has_uniform(s_reg_table[vertex_array.index])) + return; + // Fill vertex_array u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size); @@ -284,6 +287,9 @@ namespace void operator()(const rsx::vertex_array_register& vertex_register) { + if (!m_program->has_uniform(s_reg_table[vertex_register.index])) + return; + size_t data_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size); const VkFormat format = vk::get_suitable_vk_format(vertex_register.type, vertex_register.attribute_size); @@ -316,11 +322,10 @@ namespace void operator()(const rsx::empty_vertex_array& vbo) { - size_t offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(32); - void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, 32); - memset(dst, 0, 32); - m_attrib_ring_info.unmap(); - m_buffer_view_to_clean.push_back(std::make_unique(device, m_attrib_ring_info.heap->value, VK_FORMAT_R32_SFLOAT, offset_in_attrib_buffer, 32)); + if (!m_program->has_uniform(s_reg_table[vbo.index])) + return; + + m_buffer_view_to_clean.push_back(std::make_unique(device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0)); m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vbo.index], descriptor_sets); } @@ -476,28 +481,35 @@ namespace for (int i = 0; i < vertex_buffers.size(); ++i) { const auto &vbo = vertex_buffers[i]; + bool can_multithread = false; - if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready()) + if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && rsxthr->vertex_upload_task_ready()) { //vertex array buffer. We can thread this thing heavily const auto& v = vbo.get(); - u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size); - u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size); + if (v.attribute_size > 1) + { + can_multithread = true; + + u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size); + u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size); - u32 upload_size = real_element_size * vertex_count; - size_t offset = m_attrib_ring_info.alloc<256>(upload_size); + u32 upload_size = real_element_size * vertex_count; + size_t offset = m_attrib_ring_info.alloc<256>(upload_size); - memory_allocations.push_back(offset); - allocated_sizes.push_back(upload_size); - upload_jobs.push_back(i); + memory_allocations.push_back(offset); + allocated_sizes.push_back(upload_size); + upload_jobs.push_back(i); - const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size); + const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size); - m_buffer_view_to_clean.push_back(std::make_unique(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets); + m_buffer_view_to_clean.push_back(std::make_unique(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size)); + m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets); + } } - else + + if (!can_multithread) std::apply_visitor(visitor, vbo); } @@ -595,13 +607,16 @@ namespace sizeof(u32)) / stride; - for (int index = 0; index < rsx::limits::vertex_count; ++index) { + for (int index = 0; index < rsx::limits::vertex_count; ++index) + { auto& vertex_info = rsx::method_registers.vertex_arrays_info[index]; if (!m_program->has_uniform(s_reg_table[index])) continue; if (!vertex_info.size()) // disabled { + m_buffer_view_to_clean.push_back(std::make_unique(m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0)); + m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets); continue; } @@ -624,8 +639,7 @@ namespace // TODO: properly handle cmp type if (vertex_info.type() == rsx::vertex_base_type::cmp) - LOG_ERROR( - RSX, "Compressed vertex attributes not supported for inlined arrays yet"); + LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet"); switch (vertex_info.type()) { @@ -668,6 +682,6 @@ VKGSRender::upload_vertex_data() { draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program, descriptor_sets, m_buffer_view_to_clean, - [this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range);}, this); + [this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range, m_program->get_vertex_input_attributes_mask());}, this); return std::apply_visitor(visitor, get_draw_command(rsx::method_registers)); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index c7b01aab79..7978594e2a 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -38,7 +38,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << "};\n"; vk::glsl::program_input in; - in.location = 0; + in.location = SCALE_OFFSET_BIND_SLOT; in.domain = vk::glsl::glsl_vertex_program; in.name = "ScaleOffsetBuffer"; in.type = vk::glsl::input_type_uniform_buffer; @@ -64,7 +64,6 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v std::sort(input_data.begin(), input_data.end()); - int location = 2; for (const std::tuple item : input_data) { for (const ParamType &PT : inputs) @@ -74,7 +73,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v if (PI.name == std::get<1>(item)) { vk::glsl::program_input in; - in.location = location; + in.location = (int)std::get<0>(item) + VERTEX_BUFFERS_FIRST_BIND_SLOT; in.domain = vk::glsl::glsl_vertex_program; in.name = PI.name + "_buffer"; in.type = vk::glsl::input_type_texel_buffer; @@ -92,7 +91,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v } std::string samplerType = is_int ? "isamplerBuffer" : "samplerBuffer"; - OS << "layout(set = 0, binding=" << 3 + location++ << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n"; + OS << "layout(set = 0, binding=" << in.location << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n"; } } } @@ -108,17 +107,15 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std OS << "};\n\n"; vk::glsl::program_input in; - in.location = 1; + in.location = VERTEX_CONSTANT_BUFFERS_BIND_SLOT; in.domain = vk::glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_uniform_buffer; inputs.push_back(in); - //We offset this value by the index of the first fragment texture (19) below - //and allow 16 fragment textures to precede this slot - int location = 16; + int location = VERTEX_TEXTURES_FIRST_BIND_SLOT; for (const ParamType &PT : constants) { for (const ParamItem &PI : PT.items) @@ -137,7 +134,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std inputs.push_back(in); - OS << "layout(set = 0, binding=" << 19 + location++ << ") uniform " << PT.type << " " << PI.name << ";\n"; + OS << "layout(set = 0, binding=" << location++ << ") uniform " << PT.type << " " << PI.name << ";\n"; } } }