From 00b0311c867e4efc97e3947904b1f99296bfbc64 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 5 Aug 2017 00:11:14 +0300 Subject: [PATCH] rsx/gl/vulkan: Refactoring and partial vulkan rewrite - Updates vulkan to use GPU vertex processing - Rewrites vulkan to buffer entire frames and present when first available to avoid stalls - Move more state into dynamic descriptors to reduce progam cache misses; Fix render pass conflicts before texture access - Discards incomplete cb at destruction to avoid refs to destroyed objects - Move set_viewport to the uninterruptible block before drawing in case cb is switched before we're ready - Manage frame contexts separately for easier async frame management - Avoid wasteful create-destroy cycles when sampling rtts --- rpcs3/Emu/RSX/Common/GLSLCommon.h | 16 +- rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp | 10 +- rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 7 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 515 +++++++++++++---------- rpcs3/Emu/RSX/VK/VKGSRender.h | 63 +-- rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 26 +- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 10 + rpcs3/Emu/RSX/VK/VKTextOut.h | 3 + rpcs3/Emu/RSX/VK/VKTextureCache.h | 27 +- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 590 +++++++-------------------- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 127 ++---- 11 files changed, 592 insertions(+), 802 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index 0737decd6c..6b6942494d 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -11,6 +11,12 @@ namespace glsl glsl_fragment_program = 1 }; + enum glsl_rules + { + glsl_rules_opengl4, + glsl_rules_rpirv + }; + static std::string getFloatTypeNameImpl(size_t elementCount) { switch (elementCount) @@ -48,8 +54,10 @@ namespace glsl fmt::throw_exception("Unknown compare function" HERE); } - static void insert_vertex_input_fetch(std::stringstream& OS) + static void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules) { + std::string vertex_id_name = (rules == glsl_rules_opengl4) ? "gl_VertexID" : "gl_VertexIndex"; + //Actually decode a vertex attribute from a raw byte stream OS << "struct attribute_desc\n"; OS << "{\n"; @@ -194,16 +202,16 @@ namespace glsl OS << "{\n"; OS << " attribute_desc desc = fetch_desc(location);\n"; OS << "\n"; - OS << " int vertex_id = gl_VertexID - int(vertex_base_index);\n"; + OS << " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n"; OS << " if (desc.frequency == 0)\n"; OS << " vertex_id = 0;\n"; OS << " else if (desc.frequency > 1)\n"; OS << " {\n"; OS << " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"; OS << " if (desc.modulo != 0)\n"; - OS << " vertex_id = gl_VertexID % desc.divisor;\n"; + OS << " vertex_id = " << vertex_id_name << " % desc.divisor;\n"; OS << " else\n"; - OS << " vertex_id = gl_VertexID / desc.divisor;\n"; + OS << " vertex_id = " << vertex_id_name << " / desc.divisor;\n"; OS << " }\n"; OS << "\n"; OS << " if (desc.is_volatile != 0)\n"; diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 353b9941af..164ae7934d 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -92,9 +92,8 @@ namespace vertex_input_state operator()(const rsx::draw_array_command& command) { - u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); - u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; - u32 max_index = vertex_count - 1 + min_index; + const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); + const u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { @@ -153,7 +152,7 @@ namespace vertex_input_state operator()(const rsx::draw_inlined_array& command) { - u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride; + const u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride; if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { @@ -188,8 +187,7 @@ std::tuple>> GLGSRender::se auto &vertex_base = result.vertex_data_base; //Do actual vertex upload - auto &required = calculate_memory_requirements(m_vertex_layout, vertex_count); - + auto required = calculate_memory_requirements(m_vertex_layout, vertex_count); std::pair persistent_mapping = {}, volatile_mapping = {}; diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 44e64ddd91..3035234287 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -149,15 +149,10 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: OS << "out vec4 front_spec_color;\n"; } -namespace -{ - -} - void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) { insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); - glsl::insert_vertex_input_fetch(OS); + glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4); std::string parameters = ""; for (int i = 0; i < 16; ++i) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 6e69a30e24..abb10b96b8 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -564,27 +564,11 @@ VKGSRender::VKGSRender() : GSRender() m_current_command_buffer = &m_primary_cb_list[0]; - //Create secondar command_buffer for parallel operations + //Create secondary command_buffer for parallel operations m_secondary_command_buffer_pool.create((*m_device)); m_secondary_command_buffer.create(m_secondary_command_buffer_pool); - open_command_buffer(); - - for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) - { - vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); - - VkClearColorValue clear_color{}; - auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); - vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); - vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); - - } - + //VRAM allocation m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000); m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000); @@ -598,25 +582,25 @@ VKGSRender::VKGSRender() : GSRender() std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device); + //Generate frame contexts VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }; VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 16 * DESCRIPTOR_MAX_DRAW_CALLS }; VkDescriptorPoolSize texture_pool = { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS }; std::vector sizes{ uniform_buffer_pool, uniform_texel_pool, texture_pool }; - descriptor_pool.create(*m_device, sizes.data(), static_cast(sizes.size())); - - - null_buffer = std::make_unique(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); - null_buffer_view = std::make_unique(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32); - - VkFenceCreateInfo fence_info = {}; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - VkSemaphoreCreateInfo semaphore_info = {}; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore); + for (auto &ctx : frame_context) + { + ctx = {}; + vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_semaphore); + ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast(sizes.size())); + } + + null_buffer = std::make_unique(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); + null_buffer_view = std::make_unique(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32); vk::initialize_compiler_context(); @@ -631,6 +615,25 @@ VKGSRender::VKGSRender() : GSRender() m_vertex_cache.reset(new vk::null_vertex_cache()); else m_vertex_cache.reset(new vk::weak_vertex_cache()); + + open_command_buffer(); + + for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) + { + vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); + + VkClearColorValue clear_color{}; + auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); + vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); + vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); + + } + + m_current_frame = &frame_context[0]; } VKGSRender::~VKGSRender() @@ -641,23 +644,9 @@ VKGSRender::~VKGSRender() return; } - //Close recording and wait for all to finish - close_render_pass(); - CHECK_RESULT(vkEndCommandBuffer(*m_current_command_buffer)); - - for (auto &cb : m_primary_cb_list) - if (cb.pending) cb.wait(); - //Wait for device to finish up with resources vkDeviceWaitIdle(*m_device); - //Sync objects - if (m_present_semaphore) - { - vkDestroySemaphore((*m_device), m_present_semaphore, nullptr); - m_present_semaphore = nullptr; - } - //Texture cache m_texture_cache.destroy(); @@ -678,10 +667,17 @@ VKGSRender::~VKGSRender() null_buffer.reset(); null_buffer_view.reset(); - //Temporary objects - m_buffer_view_to_clean.clear(); - m_sampler_to_clean.clear(); - m_framebuffer_to_clean.clear(); + //Frame context + for (auto &ctx : frame_context) + { + vkDestroySemaphore((*m_device), ctx.present_semaphore, nullptr); + ctx.descriptor_pool.destroy(); + + ctx.buffer_views_to_clean.clear(); + ctx.samplers_to_clean.clear(); + ctx.framebuffers_to_clean.clear(); + } + m_draw_fbo.reset(); //Render passes @@ -699,8 +695,6 @@ VKGSRender::~VKGSRender() vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr); vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr); - descriptor_pool.destroy(); - //Command buffer for (auto &cb : m_primary_cb_list) cb.destroy(); @@ -736,8 +730,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) if (!flushable) return false; - close_render_pass(); - if (synchronized) { if (m_last_flushable_cb >= 0) @@ -807,8 +799,21 @@ void VKGSRender::begin() return; //Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources - if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS || - m_attrib_ring_info.is_critical() || + if (m_current_frame->used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS) + { + //No need to stall if we have more than one frame queue anyway + flush_command_queue(); + + CHECK_RESULT(vkResetDescriptorPool(*m_device, m_current_frame->descriptor_pool, 0)); + m_current_frame->used_descriptors = 0; + + m_uniform_buffer_ring_info.reset_allocation_stats(); + m_index_buffer_ring_info.reset_allocation_stats(); + m_attrib_ring_info.reset_allocation_stats(); + m_texture_upload_buffer_ring_info.reset_allocation_stats(); + } + + if (m_attrib_ring_info.is_critical() || m_texture_upload_buffer_ring_info.is_critical() || m_uniform_buffer_ring_info.is_critical() || m_index_buffer_ring_info.is_critical()) @@ -818,20 +823,17 @@ void VKGSRender::begin() flush_command_queue(true); m_vertex_cache->purge(); - CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0)); - m_used_descriptors = 0; - - m_uniform_buffer_ring_info.reset_allocation_stats(); - m_index_buffer_ring_info.reset_allocation_stats(); - m_attrib_ring_info.reset_allocation_stats(); - m_texture_upload_buffer_ring_info.reset_allocation_stats(); - std::chrono::time_point submit_end = steady_clock::now(); m_flip_time += std::chrono::duration_cast(submit_end - submit_start).count(); } + init_buffers(); + + if (!framebuffer_status_valid) + return; + VkDescriptorSetAllocateInfo alloc_info = {}; - alloc_info.descriptorPool = descriptor_pool; + alloc_info.descriptorPool = m_current_frame->descriptor_pool; alloc_info.descriptorSetCount = 1; alloc_info.pSetLayouts = &descriptor_layouts; alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; @@ -839,16 +841,11 @@ void VKGSRender::begin() VkDescriptorSet new_descriptor_set; CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set)); - descriptor_sets = new_descriptor_set; - m_used_descriptors++; + m_current_frame->descriptor_set = new_descriptor_set; + m_current_frame->used_descriptors++; std::chrono::time_point start = steady_clock::now(); - init_buffers(); - - if (!framebuffer_status_valid) - return; - float actual_line_width = rsx::method_registers.line_width(); vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width); @@ -901,20 +898,28 @@ void VKGSRender::end() return; } - std::chrono::time_point program_start = steady_clock::now(); - //Load program here since it is dependent on vertex state - if (!load_program()) + if (!check_program_status()) { LOG_ERROR(RSX, "No valid program bound to pipeline. Skipping draw"); rsx::thread::end(); return; } - std::chrono::time_point program_stop = steady_clock::now(); - //m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); + //Programs data is dependent on vertex state + std::chrono::time_point vertex_start = steady_clock::now(); + auto upload_info = upload_vertex_data(); + std::chrono::time_point vertex_end = steady_clock::now(); + m_vertex_upload_time += std::chrono::duration_cast(vertex_end - vertex_start).count(); - close_render_pass(); //Texture upload stuff conflicts active RPs + //Load program + std::chrono::time_point program_start = steady_clock::now(); + load_program(std::get<2>(upload_info), std::get<3>(upload_info)); + std::chrono::time_point program_stop = steady_clock::now(); + m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); + + //Close current pass to avoid conflict with texture functions + close_render_pass(); if (g_cfg.video.strict_rendering_mode) { @@ -967,11 +972,6 @@ void VKGSRender::end() } } - std::chrono::time_point vertex_start0 = steady_clock::now(); - auto upload_info = upload_vertex_data(); - std::chrono::time_point vertex_end0 = steady_clock::now(); - m_vertex_upload_time += std::chrono::duration_cast(vertex_end0 - vertex_start0).count(); - std::chrono::time_point textures_start = steady_clock::now(); for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) @@ -980,7 +980,7 @@ void VKGSRender::end() { if (!rsx::method_registers.fragment_textures[i].enabled()) { - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set); continue; } @@ -989,7 +989,7 @@ void VKGSRender::end() if (!texture0) { LOG_ERROR(RSX, "Texture upload failed to texture index %d. Binding null sampler.", i); - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set); continue; } @@ -1016,7 +1016,7 @@ void VKGSRender::end() mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST; } - m_sampler_to_clean.push_back(std::make_unique( + m_current_frame->samplers_to_clean.push_back(std::make_unique( *m_device, vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()), !!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN), @@ -1024,7 +1024,7 @@ void VKGSRender::end() min_filter, vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()), mip_mode, vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()), is_depth_texture, depth_compare)); - m_program->bind_uniform({ m_sampler_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets); + m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set); } } @@ -1034,7 +1034,7 @@ void VKGSRender::end() { if (!rsx::method_registers.vertex_textures[i].enabled()) { - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set); continue; } @@ -1043,11 +1043,11 @@ void VKGSRender::end() if (!texture0) { LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i); - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set); continue; } - m_sampler_to_clean.push_back(std::make_unique( + m_current_frame->samplers_to_clean.push_back(std::make_unique( *m_device, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN), @@ -1055,7 +1055,7 @@ void VKGSRender::end() VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color()) )); - m_program->bind_uniform({ m_sampler_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets); + m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set); } } @@ -1066,9 +1066,12 @@ void VKGSRender::end() //Only textures are synchronized tightly with the GPU and they have been read back above vk::enter_uninterruptible(); + set_viewport(); + begin_render_pass(); + vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); - vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr); + vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); //Clear any 'dirty' surfaces - possible is a recycled cache surface is used std::vector buffers_to_clear; @@ -1108,11 +1111,7 @@ void VKGSRender::end() vkCmdClearAttachments(*m_current_command_buffer, static_cast(buffers_to_clear.size()), buffers_to_clear.data(), 1, &clear_rect); } - std::optional > index_info = std::get<2>(upload_info); - - std::chrono::time_point vertex_end = steady_clock::now(); - m_vertex_upload_time += std::chrono::duration_cast(vertex_end - textures_end).count(); - + std::optional > index_info = std::get<4>(upload_info); if (!index_info) { const auto vertex_count = std::get<1>(upload_info); @@ -1133,7 +1132,7 @@ void VKGSRender::end() vk::leave_uninterruptible(); std::chrono::time_point draw_end = steady_clock::now(); - m_draw_time += std::chrono::duration_cast(draw_end - vertex_end).count(); + m_draw_time += std::chrono::duration_cast(draw_end - textures_end).count(); copy_render_targets_to_dma_location(); m_draw_calls++; @@ -1219,7 +1218,7 @@ void VKGSRender::clear_surface(u32 mask) if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return; if (!(mask & 0xF3)) return; - if (m_current_present_image == 0xFFFF) return; + if (m_current_frame->present_image == UINT32_MAX) return; init_buffers(); @@ -1339,6 +1338,8 @@ void VKGSRender::copy_render_targets_to_dma_location() if (g_cfg.video.write_color_buffers) { + close_render_pass(); + for (u8 index = 0; index < rsx::limits::color_buffers_count; index++) { if (!m_surface_info[index].pitch) @@ -1351,6 +1352,8 @@ void VKGSRender::copy_render_targets_to_dma_location() if (g_cfg.video.write_depth_buffer) { + close_render_pass(); + if (m_depth_surface_info.pitch) { m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, @@ -1374,7 +1377,7 @@ void VKGSRender::flush_command_queue(bool hard_sync) if (hard_sync) { //swap handler checks the pending flag, so call it here - process_swap_request(); + process_swap_request(m_current_frame); //wait for the latest intruction to execute m_current_command_buffer->pending = true; @@ -1395,83 +1398,128 @@ void VKGSRender::flush_command_queue(bool hard_sync) //Grab next cb in line and make it usable m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT; m_current_command_buffer = &m_primary_cb_list[m_current_cb_index]; + + //Soft sync if a present has not yet occured before consuming the wait event + for (auto &ctx : frame_context) + { + if (ctx.swap_command_buffer == m_current_command_buffer) + process_swap_request(&ctx, true); + } + m_current_command_buffer->reset(); } open_command_buffer(); } +void VKGSRender::advance_queued_frames() +{ + //Check all other frames for completion and clear resources + for (auto &ctx : frame_context) + { + if (&ctx == m_current_frame) + continue; + + if (ctx.swap_command_buffer) + { + ctx.swap_command_buffer->poke(); + if (ctx.swap_command_buffer->pending) + continue; + + //Present the bound image + process_swap_request(&ctx, true); + } + } + + //Only marks surfaces as dirty without actually deleting them so its safe to use + if (g_cfg.video.invalidate_surface_cache_every_frame) + m_rtts.invalidate_surface_cache_data(&*m_current_command_buffer); + + //m_rtts storage is double buffered and should be safe to tag on frame boundary + m_rtts.free_invalidated(); + + //texture cache is also double buffered to prevent use-after-free + m_texture_cache.flush(); + + m_vertex_cache->purge(); + + m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES; + m_current_frame = &frame_context[m_current_queue_index]; +} + +void VKGSRender::present(frame_context_t *ctx) +{ + VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain); + + VkPresentInfoKHR present = {}; + present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present.pNext = nullptr; + present.swapchainCount = 1; + present.pSwapchains = &swap_chain; + present.pImageIndices = &ctx->present_image; + CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); +} + void VKGSRender::queue_swap_request() { //buffer the swap request and return - if (m_swap_command_buffer && m_swap_command_buffer->pending) + if (m_current_frame->swap_command_buffer && + m_current_frame->swap_command_buffer->pending) { //Its probable that no actual drawing took place - process_swap_request(); + process_swap_request(m_current_frame); } - m_swap_command_buffer = m_current_command_buffer; - close_and_submit_command_buffer({ m_present_semaphore }, m_current_command_buffer->submit_fence); + m_current_frame->swap_command_buffer = m_current_command_buffer; + close_and_submit_command_buffer({ m_current_frame->present_semaphore }, m_current_command_buffer->submit_fence); + m_current_frame->swap_command_buffer->pending = true; //Grab next cb in line and make it usable m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT; m_current_command_buffer = &m_primary_cb_list[m_current_cb_index]; m_current_command_buffer->reset(); - m_swap_command_buffer->pending = true; + //Set up new pointers for the next frame + advance_queued_frames(); open_command_buffer(); } -void VKGSRender::process_swap_request() +void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) { - if (!m_swap_command_buffer) + if (!ctx->swap_command_buffer) return; - if (m_swap_command_buffer->pending) + if (ctx->swap_command_buffer->pending) { //Perform hard swap here - m_swap_command_buffer->wait(); - - VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain); - - VkPresentInfoKHR present = {}; - present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present.pNext = nullptr; - present.swapchainCount = 1; - present.pSwapchains = &swap_chain; - present.pImageIndices = &m_current_present_image; - CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); + ctx->swap_command_buffer->wait(); + free_resources = true; } - //Clean up all the resources from the last frame + //Always present + present(ctx); - //Feed back damaged resources to the main texture cache for management... - //m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources); - - m_rtts.free_invalidated(); - m_texture_cache.flush(); - - if (g_cfg.video.invalidate_surface_cache_every_frame) - m_rtts.invalidate_surface_cache_data(&*m_current_command_buffer); - - m_buffer_view_to_clean.clear(); - m_sampler_to_clean.clear(); - - m_framebuffer_to_clean.remove_if([](std::unique_ptr& fbo) + if (free_resources) { - if (fbo->deref_count >= 2) return true; - fbo->deref_count++; - return false; - }); + //Cleanup of reference sensitive resources + //TODO: These should be double buffered as well to prevent destruction of anything in use + if (g_cfg.video.overlay) + { + m_text_writer->reset_descriptors(); + } - if (g_cfg.video.overlay) - { - m_text_writer->reset_descriptors(); + ctx->buffer_views_to_clean.clear(); + ctx->samplers_to_clean.clear(); + + ctx->framebuffers_to_clean.remove_if([](std::unique_ptr& fbo) + { + if (fbo->deref_count >= 2) return true; + fbo->deref_count++; + return false; + }); } - m_vertex_cache->purge(); - - m_swap_command_buffer = nullptr; + ctx->swap_command_buffer = nullptr; } void VKGSRender::do_local_task() @@ -1482,6 +1530,7 @@ void VKGSRender::do_local_task() //TODO: Determine if a hard sync is necessary //Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline + close_render_pass(); flush_command_queue(); m_flush_commands = false; @@ -1509,11 +1558,8 @@ bool VKGSRender::do_method(u32 cmd, u32 arg) } } -bool VKGSRender::load_program(bool) +bool VKGSRender::check_program_status() { - auto &vertex_program = current_vertex_program; - auto &fragment_program = current_fragment_program; - auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple { vk::render_target *surface = nullptr; @@ -1528,24 +1574,29 @@ bool VKGSRender::load_program(bool) }; get_current_fragment_program(rtt_lookup_func); - if (!fragment_program.valid) return false; + if (!current_fragment_program.valid) return false; get_current_vertex_program(); + auto &vertex_program = current_vertex_program; + auto &fragment_program = current_fragment_program; + vk::pipeline_props properties = {}; - properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; bool unused; + bool update_blend_constants = false; + bool update_stencil_info_back = false; + bool update_stencil_info_front = false; + bool update_depth_bounds = false; + + properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; properties.ia.topology = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, unused); if (rsx::method_registers.restart_index_enabled()) - { properties.ia.primitiveRestartEnable = VK_TRUE; - } else properties.ia.primitiveRestartEnable = VK_FALSE; - for (int i = 0; i < 4; ++i) { properties.att_state[i].colorWriteMask = 0xf; @@ -1588,11 +1639,8 @@ bool VKGSRender::load_program(bool) properties.att_state[render_targets[idx]].alphaBlendOp = equation_a; } - auto blend_colors = rsx::get_constant_blend_colors(); - properties.cs.blendConstants[0] = blend_colors[0]; - properties.cs.blendConstants[1] = blend_colors[1]; - properties.cs.blendConstants[2] = blend_colors[2]; - properties.cs.blendConstants[3] = blend_colors[3]; + //Blend constants are dynamic + update_blend_constants = true; } else { @@ -1618,8 +1666,7 @@ bool VKGSRender::load_program(bool) if (rsx::method_registers.depth_bounds_test_enabled()) { properties.ds.depthBoundsTestEnable = VK_TRUE; - properties.ds.minDepthBounds = rsx::method_registers.depth_bounds_min(); - properties.ds.maxDepthBounds = rsx::method_registers.depth_bounds_max(); + update_depth_bounds = true; } else properties.ds.depthBoundsTestEnable = VK_FALSE; @@ -1627,9 +1674,6 @@ bool VKGSRender::load_program(bool) if (rsx::method_registers.stencil_test_enabled()) { properties.ds.stencilTestEnable = VK_TRUE; - properties.ds.front.writeMask = rsx::method_registers.stencil_mask(); - properties.ds.front.compareMask = rsx::method_registers.stencil_func_mask(); - properties.ds.front.reference = rsx::method_registers.stencil_func_ref(); properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers.stencil_op_fail()); properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()); properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()); @@ -1637,16 +1681,16 @@ bool VKGSRender::load_program(bool) if (rsx::method_registers.two_sided_stencil_test_enabled()) { - properties.ds.back.writeMask = rsx::method_registers.back_stencil_mask(); - properties.ds.back.compareMask = rsx::method_registers.back_stencil_func_mask(); - properties.ds.back.reference = rsx::method_registers.back_stencil_func_ref(); properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail()); properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass()); properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail()); properties.ds.back.compareOp = vk::get_compare_func(rsx::method_registers.back_stencil_func()); + update_stencil_info_back = true; } else properties.ds.back = properties.ds.front; + + update_stencil_info_front = true; } else properties.ds.stencilTestEnable = VK_FALSE; @@ -1684,55 +1728,84 @@ bool VKGSRender::load_program(bool) vk::enter_uninterruptible(); //Load current program from buffer + vertex_program.skip_vertex_input_check = true; m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get(); - //TODO: Update constant buffers.. - //1. Update scale-offset matrix - //2. Update vertex constants - //3. Update fragment constants - const size_t scale_offset_offset = m_uniform_buffer_ring_info.alloc<256>(256); + vk::leave_uninterruptible(); - u8 *buf = (u8*)m_uniform_buffer_ring_info.map(scale_offset_offset, 256); - - /** - * NOTE: While VK's coord system resembles GLs, the clip volume is no longer symetrical in z - * Its like D3D without the flip in y (depending on how you build the spir-v) - */ - fill_scale_offset_data(buf, false); - fill_user_clip_data(buf + 64); - - m_uniform_buffer_ring_info.unmap(); - - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, scale_offset_offset, 256 }, SCALE_OFFSET_BIND_SLOT, descriptor_sets); - - if (true)//m_transform_constants_dirty) + //Update dynamic state + if (update_blend_constants) { - const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float)); - buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float)); - fill_vertex_program_constants_data(buf); - *(reinterpret_cast(buf + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits(); - m_uniform_buffer_ring_info.unmap(); - - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 512 * 4 * sizeof(float) }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); - m_transform_constants_dirty = false; + //Update blend constants + auto blend_colors = rsx::get_constant_blend_colors(); + vkCmdSetBlendConstants(*m_current_command_buffer, blend_colors.data()); } + if (update_stencil_info_front) + { + VkStencilFaceFlags face_flag = (update_stencil_info_back)? VK_STENCIL_FACE_FRONT_BIT: VK_STENCIL_FRONT_AND_BACK; + + vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask()); + vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask()); + vkCmdSetStencilReference(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_ref()); + + if (update_stencil_info_back) + { + vkCmdSetStencilWriteMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_mask()); + vkCmdSetStencilCompareMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_mask()); + vkCmdSetStencilReference(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_ref()); + } + } + + if (update_depth_bounds) + { + //Update depth bounds min/max + vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); + } + + return true; +} + +void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) +{ + auto &vertex_program = current_vertex_program; + auto &fragment_program = current_fragment_program; + const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float)); - const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_buffer_sz); + const size_t required_mem = 512 + 8192 + fragment_buffer_sz; - buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_buffer_sz); + const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem); + const size_t vertex_constants_offset = vertex_state_offset + 512; + const size_t fragment_constants_offset = vertex_constants_offset + 8192; + + //We do this in one go + u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem); + + //Vertex state + fill_scale_offset_data(buf, false); + fill_user_clip_data(buf + 64); + *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); + *(reinterpret_cast(buf + 132)) = vertex_base; + fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast(buf + 144)); + + //Vertex constants + buf = buf + 512; + fill_vertex_program_constants_data(buf); + m_transform_constants_dirty = false; + + //Fragment constants + buf = buf + 8192; if (fragment_constants_sz) m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, fragment_program); fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); + m_uniform_buffer_ring_info.unmap(); - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); - - vk::leave_uninterruptible(); - - return true; + m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 }, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set); + m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 8192 }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); + m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); } static const u32 mr_color_offset[rsx::limits::color_buffers_count] = @@ -1762,7 +1835,20 @@ static const u32 mr_color_pitch[rsx::limits::color_buffers_count] = void VKGSRender::init_buffers(bool skip_reading) { //Clear any pending swap requests - process_swap_request(); + for (auto &ctx : frame_context) + { + if (ctx.swap_command_buffer) + { + if (ctx.swap_command_buffer->pending) + ctx.swap_command_buffer->poke(); + + if (!ctx.swap_command_buffer->pending) + { + //process swap without advancing the frame base + process_swap_request(&ctx, true); + } + } + } prepare_rtts(); @@ -1770,8 +1856,6 @@ void VKGSRender::init_buffers(bool skip_reading) { read_buffers(); } - - set_viewport(); } void VKGSRender::read_buffers() @@ -1965,14 +2049,13 @@ void VKGSRender::prepare_rtts() } } - for (auto &fbo : m_framebuffer_to_clean) + for (auto &fbo : m_current_frame->framebuffers_to_clean) { if (fbo->matches(bound_images, clip_width, clip_height)) { m_draw_fbo.swap(fbo); m_draw_fbo->reset_refs(); framebuffer_found = true; - //LOG_ERROR(RSX, "Matching framebuffer exists, using that instead"); break; } } @@ -2014,7 +2097,7 @@ void VKGSRender::prepare_rtts() VkRenderPass current_render_pass = m_render_passes[idx]; if (m_draw_fbo) - m_framebuffer_to_clean.push_back(std::move(m_draw_fbo)); + m_current_frame->framebuffers_to_clean.push_back(std::move(m_draw_fbo)); m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images))); } @@ -2060,7 +2143,7 @@ void VKGSRender::flip(int buffer) std::chrono::time_point flip_start = steady_clock::now(); close_render_pass(); - process_swap_request(); + process_swap_request(m_current_frame, true); if (!resize_screen) { @@ -2095,8 +2178,8 @@ void VKGSRender::flip(int buffer) aspect_ratio.size = new_size; - //Prepare surface for new frame - CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, VK_NULL_HANDLE, &m_current_present_image)); + //Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be + CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), UINT64_MAX, m_current_frame->present_semaphore, VK_NULL_HANDLE, &m_current_frame->present_image)); //Blit contents to screen.. vk::image* image_to_flip = nullptr; @@ -2106,7 +2189,7 @@ void VKGSRender::flip(int buffer) else if (std::get<1>(m_rtts.m_bound_render_targets[1]) != nullptr) image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1]); - VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image); + VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_frame->present_image); if (image_to_flip) { vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, @@ -2117,9 +2200,9 @@ void VKGSRender::flip(int buffer) //No draw call was issued! VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); VkClearColorValue clear_black = { 0 }; - vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_GENERAL, range); - vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, &clear_black, 1, &range); - vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range); + vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_GENERAL, range); + vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_GENERAL, &clear_black, 1, &range); + vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range); } std::unique_ptr direct_fbo; @@ -2144,14 +2227,14 @@ void VKGSRender::flip(int buffer) size_t idx = vk::get_render_pass_location(m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1); VkRenderPass single_target_pass = m_render_passes[idx]; - for (auto It = m_framebuffer_to_clean.begin(); It != m_framebuffer_to_clean.end(); It++) + for (auto It = m_current_frame->framebuffers_to_clean.begin(); It != m_current_frame->framebuffers_to_clean.end(); It++) { auto &fbo = *It; if (fbo->attachments[0]->info.image == target_image) { direct_fbo.swap(fbo); direct_fbo->reset_refs(); - m_framebuffer_to_clean.erase(It); + m_current_frame->framebuffers_to_clean.erase(It); break; } } @@ -2189,7 +2272,7 @@ void VKGSRender::flip(int buffer) m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), message); vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres); - m_framebuffer_to_clean.push_back(std::move(direct_fbo)); + m_current_frame->framebuffers_to_clean.push_back(std::move(direct_fbo)); } queue_swap_request(); @@ -2262,9 +2345,13 @@ void VKGSRender::flip(int buffer) m_current_command_buffer->reset(); open_command_buffer(); - //Do cleanup - m_swap_command_buffer = m_current_command_buffer; - process_swap_request(); + //Do cleanup; also present the previous frame for this frame if available + //Don't bother scheduling a swap event if the frame context is still uninitialized (no previous frame) + if (m_current_frame->present_image != UINT32_MAX) + { + m_current_frame->swap_command_buffer = m_current_command_buffer; + process_swap_request(m_current_frame); + } } std::chrono::time_point flip_end = steady_clock::now(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 0c1024c467..53865c7880 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -25,11 +25,12 @@ namespace vk //Heap allocation sizes in MB #define VK_ATTRIB_RING_BUFFER_SIZE_M 256 -#define VK_UBO_RING_BUFFER_SIZE_M 32 +#define VK_UBO_RING_BUFFER_SIZE_M 64 #define VK_INDEX_RING_BUFFER_SIZE_M 64 #define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 128 #define VK_MAX_ASYNC_CB_COUNT 64 +#define VK_MAX_ASYNC_FRAMES 2 struct command_buffer_chunk: public vk::command_buffer { @@ -135,32 +136,43 @@ private: vk::vk_data_heap m_texture_upload_buffer_ring_info; //Vulkan internals - u32 m_current_present_image = 0xFFFF; - VkSemaphore m_present_semaphore = nullptr; - vk::command_pool m_command_buffer_pool; - std::array m_primary_cb_list; - - command_buffer_chunk* m_current_command_buffer = nullptr; - command_buffer_chunk* m_swap_command_buffer = nullptr; - - u32 m_current_cb_index = 0; std::mutex m_secondary_cb_guard; vk::command_pool m_secondary_command_buffer_pool; vk::command_buffer m_secondary_command_buffer; - std::array m_render_passes; - VkDescriptorSetLayout descriptor_layouts; - VkDescriptorSet descriptor_sets; - VkPipelineLayout pipeline_layout; - vk::descriptor_pool descriptor_pool; + u32 m_current_cb_index = 0; + std::array m_primary_cb_list; + command_buffer_chunk* m_current_command_buffer = nullptr; + + std::array m_render_passes; + + VkDescriptorSetLayout descriptor_layouts; + VkPipelineLayout pipeline_layout; - std::vector > m_buffer_view_to_clean; - std::vector > m_sampler_to_clean; - std::list > m_framebuffer_to_clean; std::unique_ptr m_draw_fbo; + struct frame_context_t + { + VkSemaphore present_semaphore = VK_NULL_HANDLE; + VkDescriptorSet descriptor_set = VK_NULL_HANDLE; + vk::descriptor_pool descriptor_pool; + u32 used_descriptors = 0; + + std::vector> buffer_views_to_clean; + std::vector> samplers_to_clean; + std::list> framebuffers_to_clean; + + u32 present_image = UINT32_MAX; + command_buffer_chunk* swap_command_buffer = nullptr; + }; + + std::array frame_context; + + u32 m_current_queue_index = 0; + frame_context_t* m_current_frame = nullptr; + u32 m_client_width = 0; u32 m_client_height = 0; @@ -183,7 +195,6 @@ private: s64 m_draw_time = 0; s64 m_flip_time = 0; - u32 m_used_descriptors = 0; u8 m_draw_buffers_count = 0; bool framebuffer_status_valid = false; @@ -201,6 +212,9 @@ private: std::thread::id rsx_thread; bool render_pass_open = false; + + //Vertex layout + rsx::vertex_input_layout m_vertex_layout; #ifdef __linux__ Display *m_display_handle = nullptr; @@ -220,15 +234,18 @@ private: void flush_command_queue(bool hard_sync = false); void queue_swap_request(); - void process_swap_request(); + void process_swap_request(frame_context_t *ctx, bool free_resources = false); + void advance_queued_frames(); + void present(frame_context_t *ctx); void begin_render_pass(); void close_render_pass(); - /// returns primitive topology, is_indexed, index_count, offset in index buffer, index type - std::tuple > > upload_vertex_data(); + /// returns primitive topology, index_count, allocated_verts, vertex_base_index, (offset in index buffer, index type) + std::tuple > > upload_vertex_data(); public: - bool load_program(bool fast_update = false); + bool check_program_status(); + void load_program(u32 vertex_count, u32 vertex_base); void init_buffers(bool skip_reading = false); void read_buffers(); void write_buffers(); diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index b09e636200..56ad392a56 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -19,17 +19,22 @@ namespace vk bool operator==(const pipeline_props& other) const { - if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo))) - return false; - if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo))) - return false; if (memcmp(&att_state[0], &other.att_state[0], sizeof(VkPipelineColorBlendAttachmentState))) return false; - if (memcmp(&cs, &other.cs, sizeof(VkPipelineColorBlendStateCreateInfo))) + + if (render_pass != other.render_pass) return false; + if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) return false; - if (render_pass != other.render_pass) + + if (memcmp(&cs, &other.cs, sizeof(VkPipelineColorBlendStateCreateInfo))) + return false; + + if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo))) + return false; + + if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo))) return false; return num_targets == other.num_targets; @@ -90,9 +95,6 @@ struct VKTraits static pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout) { -// pstate.dynamic_state.pDynamicStates = pstate.dynamic_state_descriptors; -// pstate.cb.pAttachments = pstate.att_state; -// pstate.cb.attachmentCount = pstate.num_targets; VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -111,6 +113,11 @@ struct VKTraits dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_LINE_WIDTH; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_BLEND_CONSTANTS; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_REFERENCE; dynamic_state_info.pDynamicStates = dynamic_state_descriptors; VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO }; @@ -144,7 +151,6 @@ struct VKTraits info.renderPass = pipelineProperties.render_pass; CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline)); - pipeline_storage_type result = std::make_unique(dev, pipeline, vertexProgramData.uniforms, fragmentProgramData.uniforms); return result; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 7642197b6a..3a99503872 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -21,6 +21,7 @@ namespace vk bool dirty = false; u16 native_pitch = 0; VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; + std::unique_ptr view; render_target *old_contents = nullptr; //Data occupying the memory location that this surface is replacing @@ -40,6 +41,15 @@ namespace vk :image(dev, memory_type_index, access_flags, image_type, format, width, height, depth, mipmaps, layers, samples, initial_layout, tiling, usage, image_flags) {} + + vk::image_view* get_view() + { + if (!view) + view = std::make_unique(*vk::get_current_renderer(), value, VK_IMAGE_VIEW_TYPE_2D, info.format, + native_component_map, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT))); + + return view.get(); + } }; struct framebuffer_holder: public vk::framebuffer, public ref_counted diff --git a/rpcs3/Emu/RSX/VK/VKTextOut.h b/rpcs3/Emu/RSX/VK/VKTextOut.h index 014c4d1974..aba9dea317 100644 --- a/rpcs3/Emu/RSX/VK/VKTextOut.h +++ b/rpcs3/Emu/RSX/VK/VKTextOut.h @@ -362,6 +362,9 @@ namespace vk void reset_descriptors() { + if (m_used_descriptors == 0) + return; + vkResetDescriptorPool(device, m_descriptor_pool, 0); m_used_descriptors = 0; } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 9836036e89..a1387930fa 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -322,9 +322,14 @@ namespace vk std::pair read_only_range = std::make_pair(0xFFFFFFFF, 0); std::pair no_access_range = std::make_pair(0xFFFFFFFF, 0); + //Stuff that has been dereferenced goes into these std::vector > m_temporary_image_view; std::vector> m_dirty_textures; + //Stuff that has been dereferenced twice goes here. Contents are evicted before new ones are added + std::vector> m_image_views_to_purge; + std::vector> m_images_to_purge; + // Keep track of cache misses to pre-emptively flush some addresses struct framebuffer_memory_characteristics { @@ -431,6 +436,9 @@ namespace vk m_temporary_image_view.clear(); m_dirty_textures.clear(); + + m_image_views_to_purge.clear(); + m_images_to_purge.clear(); } //Helpers @@ -544,7 +552,7 @@ namespace vk } //First check if it exists as an rtt... - vk::image *rtt_texture = nullptr; + vk::render_target *rtt_texture = nullptr; if (rtt_texture = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) { if (g_cfg.video.strict_rendering_mode) @@ -559,10 +567,7 @@ namespace vk } } - m_temporary_image_view.push_back(std::make_unique(*vk::get_current_renderer(), rtt_texture->value, VK_IMAGE_VIEW_TYPE_2D, rtt_texture->info.format, - rtt_texture->native_component_map, - vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT))); - return m_temporary_image_view.back().get(); + return rtt_texture->get_view(); } if (rtt_texture = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) @@ -576,10 +581,7 @@ namespace vk } } - m_temporary_image_view.push_back(std::make_unique(*vk::get_current_renderer(), rtt_texture->value, VK_IMAGE_VIEW_TYPE_2D, rtt_texture->info.format, - rtt_texture->native_component_map, - vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT))); - return m_temporary_image_view.back().get(); + return rtt_texture->get_view(); } u32 raw_format = tex.format(); @@ -912,8 +914,11 @@ namespace vk void flush() { - m_dirty_textures.clear(); - m_temporary_image_view.clear(); + m_image_views_to_purge.clear(); + m_images_to_purge.clear(); + + m_image_views_to_purge = std::move(m_temporary_image_view); + m_images_to_purge = std::move(m_dirty_textures); } void record_cache_miss(cached_texture_section &tex) diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index dbad6d15db..6895c2db8b 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -7,53 +7,6 @@ namespace vk { - bool requires_component_expansion(rsx::vertex_base_type type, u32 size) - { - if (size == 3) - { - switch (type) - { - case rsx::vertex_base_type::f: - return true; - } - } - - return false; - } - - u32 get_suitable_vk_size(rsx::vertex_base_type type, u32 size) - { - if (size == 3) - { - switch (type) - { - case rsx::vertex_base_type::f: - return 16; - } - } - - return rsx::get_vertex_type_size_on_host(type, size); - } - - VkFormat get_suitable_vk_format(rsx::vertex_base_type type, u8 size) - { - /** - * Set up buffer fetches to only work on 4-component access. This is hardware dependant so we use 4-component access to avoid branching based on IHV implementation - * AMD GCN 1.0 for example does not support RGB32 formats for texel buffers - */ - const VkFormat vec1_types[] = { VK_FORMAT_R16_SNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8_UINT }; - const VkFormat vec2_types[] = { VK_FORMAT_R16G16_SNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8_UINT }; - const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT }; //VEC3 COMPONENTS NOT SUPPORTED! - const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT }; - - const VkFormat* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types }; - - if (type > rsx::vertex_base_type::ub256) - fmt::throw_exception("VKGS error: unknown vertex base type 0x%x" HERE, (u32)type); - - return vec_selectors[size][(int)type]; - } - VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification) { requires_modification = false; @@ -92,114 +45,6 @@ namespace vk return !result; } - template - void copy_inlined_data_to_buffer(void *src_data, void *dst_data, u32 vertex_count, rsx::vertex_base_type type, u8 src_channels, u8 dst_channels, u16 element_size, u16 stride) - { - u8 *src = static_cast(src_data); - u8 *dst = static_cast(dst_data); - - for (u32 i = 0; i < vertex_count; ++i) - { - T* src_ptr = reinterpret_cast(src); - T* dst_ptr = reinterpret_cast(dst); - - switch (type) - { - case rsx::vertex_base_type::ub: - { - if (src_channels == 4) - { - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; - - break; - } - } - default: - { - for (u8 ch = 0; ch < dst_channels; ++ch) - { - if (ch < src_channels) - { - *dst_ptr = *src_ptr; - src_ptr++; - } - else - *dst_ptr = (T)(padding); - - dst_ptr++; - } - } - } - - src += stride; - dst += element_size; - } - } - - void prepare_buffer_for_writing(void *data, rsx::vertex_base_type type, u8 vertex_size, u32 vertex_count) - { - switch (type) - { - case rsx::vertex_base_type::f: - { - if (vertex_size == 3) - { - float *dst = reinterpret_cast(data); - for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4) - dst[idx] = 1.f; - } - - break; - } - case rsx::vertex_base_type::sf: - { - if (vertex_size == 3) - { - /** - * Pad the 4th component for half-float arrays to 1, since texelfetch does not mask components - */ - u16 *dst = reinterpret_cast(data); - for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4) - dst[idx] = 0x3c00; - } - - break; - } - } - } - - /** - * Template: Expand any N-compoent vector to a larger X-component vector and pad unused slots with 1 - */ - template - void expand_array_components(const T* src_data, void *dst_ptr, u32 vertex_count) - { - T* src = const_cast(src_data); - T* dst = static_cast(dst_ptr); - - for (u32 index = 0; index < vertex_count; ++index) - { - for (u8 channel = 0; channel < dst_components; channel++) - { - if (channel < src_components) - { - *dst = *src; - - dst++; - src++; - } - else - { - *dst = (T)(padding); - dst++; - } - } - } - } - VkIndexType get_index_type(rsx::index_array_type type) { switch (type) @@ -215,20 +60,7 @@ namespace vk namespace { - static constexpr std::array s_reg_table = - { - "in_pos_buffer", "in_weight_buffer", "in_normal_buffer", - "in_diff_color_buffer", "in_spec_color_buffer", - "in_fog_buffer", - "in_point_size_buffer", "in_7_buffer", - "in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer", - "in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer" - }; - /** - * Creates and fills an index buffer emulating unsupported primitive type. - * Returns index_count and (offset_in_index_buffer, index_type) - */ std::tuple> generate_emulating_index_buffer( const rsx::draw_clause& clause, u32 vertex_count, vk::vk_data_heap& m_index_buffer_ring_info) @@ -247,161 +79,49 @@ namespace index_count, std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT16)); } - struct vertex_buffer_visitor + struct vertex_input_state { - vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap, - vk::glsl::program* prog, VkDescriptorSet desc_set, - std::vector>& buffer_view_to_clean, - vk::vertex_cache* vertex_cache) - : vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog), - descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean), - vertex_cache(vertex_cache) - { - } - - void operator()(const rsx::vertex_array_buffer& vertex_array) - { - if (!m_program->has_uniform(s_reg_table[vertex_array.index])) - return; - - // Fill vertex_array - const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); - const u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size); - const u32 upload_size = real_element_size * vertex_count; - const VkFormat format = vk::get_suitable_vk_format(vertex_array.type, vertex_array.attribute_size); - const uintptr_t local_addr = (uintptr_t)vertex_array.data.data(); - - if (auto found = vertex_cache->find_vertex_range(local_addr, format, upload_size)) - { - m_buffer_view_to_clean.push_back(std::make_unique(device, m_attrib_ring_info.heap->value, format, found->offset_in_heap, upload_size)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets); - return; - } - - VkDeviceSize offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(upload_size); - void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size); - - gsl::span dest_span(static_cast(dst), upload_size); - write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, real_element_size); - - //Padding the vertex buffer should be done after the writes have been done - //write_vertex_data function may 'dirty' unused sections of the buffer as optimization - vk::prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count); - - m_attrib_ring_info.unmap(); - - vertex_cache->store_range(local_addr, format, upload_size, (u32)offset_in_attrib_buffer); - - m_buffer_view_to_clean.push_back(std::make_unique(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, upload_size)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets); - } - - void operator()(const rsx::vertex_array_register& vertex_register) - { - if (!m_program->has_uniform(s_reg_table[vertex_register.index])) - return; - - size_t data_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size); - const VkFormat format = vk::get_suitable_vk_format(vertex_register.type, vertex_register.attribute_size); - - size_t offset_in_attrib_buffer = 0; - - if (vk::requires_component_expansion(vertex_register.type, vertex_register.attribute_size)) - { - const u32 num_stored_verts = static_cast( - data_size / (sizeof(float) * vertex_register.attribute_size)); - const u32 real_element_size = vk::get_suitable_vk_size(vertex_register.type, vertex_register.attribute_size); - - data_size = real_element_size * num_stored_verts; - offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size); - void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size); - - vk::expand_array_components(reinterpret_cast(vertex_register.data.data()), dst, num_stored_verts); - m_attrib_ring_info.unmap(); - } - else - { - offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size); - void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size); - memcpy(dst, vertex_register.data.data(), data_size); - m_attrib_ring_info.unmap(); - } - - m_buffer_view_to_clean.push_back(std::make_unique(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_register.index], descriptor_sets); - } - - void operator()(const rsx::empty_vertex_array& vbo) - { - if (!m_program->has_uniform(s_reg_table[vbo.index])) - return; - - m_buffer_view_to_clean.push_back(std::make_unique(device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vbo.index], descriptor_sets); - } - - protected: - VkDevice device; - u32 vertex_count; - vk::vk_data_heap& m_attrib_ring_info; - vk::glsl::program* m_program; - VkDescriptorSet descriptor_sets; - std::vector>& m_buffer_view_to_clean; - vk::vertex_cache* vertex_cache; + VkPrimitiveTopology native_primitive_type; + u32 vertex_draw_count; + u32 allocated_vertex_count; + u32 vertex_data_base; + u32 vertex_index_base; + std::optional> index_info; }; - using attribute_storage = std::vector>; - struct draw_command_visitor { - using result_type = std::tuple>>; - - draw_command_visitor(VkDevice device, vk::vk_data_heap& index_buffer_ring_info, - vk::vk_data_heap& attrib_ring_info, vk::glsl::program* program, - VkDescriptorSet descriptor_sets, - std::vector>& buffer_view_to_clean, - std::function>&)> - get_vertex_buffers_f, - VKGSRender *thread) - : m_device(device), m_index_buffer_ring_info(index_buffer_ring_info), - m_attrib_ring_info(attrib_ring_info), m_program(program), - m_descriptor_sets(descriptor_sets), m_buffer_view_to_clean(buffer_view_to_clean), - get_vertex_buffers(get_vertex_buffers_f), - rsxthr(thread) + draw_command_visitor(vk::vk_data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout) + : m_index_buffer_ring_info(index_buffer_ring_info) + , m_vertex_layout(layout) { } - result_type operator()(const rsx::draw_array_command& command) + vertex_input_state operator()(const rsx::draw_array_command& command) { bool primitives_emulated = false; VkPrimitiveTopology prims = vk::get_appropriate_topology( rsx::method_registers.current_draw_clause.primitive, primitives_emulated); - u32 index_count = 0; - std::optional> index_info; - u32 min_index = - rsx::method_registers.current_draw_clause.first_count_commands.front().first; - u32 max_index = - rsx::method_registers.current_draw_clause.get_elements_count() + min_index - 1; + const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); + const u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; + + if (primitives_emulated) + { + u32 index_count; + std::optional> index_info; - if (primitives_emulated) { std::tie(index_count, index_info) = generate_emulating_index_buffer(rsx::method_registers.current_draw_clause, - max_index - min_index + 1, m_index_buffer_ring_info); - } - else - { - index_count = rsx::method_registers.current_draw_clause.get_elements_count(); + vertex_count, m_index_buffer_ring_info); + + return{ prims, index_count, vertex_count, min_index, 0, index_info }; } - upload_vertex_buffers(min_index, max_index); - return std::make_tuple(prims, index_count, index_info); + return{ prims, vertex_count, vertex_count, min_index, 0, {} }; } - result_type operator()(const rsx::draw_indexed_array_command& command) + vertex_input_state operator()(const rsx::draw_indexed_array_command& command) { bool primitives_emulated = false; VkPrimitiveTopology prims = vk::get_appropriate_topology( @@ -438,146 +158,150 @@ namespace std::optional> index_info = std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type)); - upload_vertex_buffers(0, max_index); - return std::make_tuple(prims, index_count, index_info); - } - - result_type operator()(const rsx::draw_inlined_array& command) - { - bool primitives_emulated = false; - VkPrimitiveTopology prims = vk::get_appropriate_topology( - rsx::method_registers.current_draw_clause.primitive, primitives_emulated); - u32 index_count = upload_inlined_array(); - - if (!primitives_emulated) { - return std::make_tuple(prims, index_count, std::nullopt); + //check for vertex arrays with frquency modifiers + for (auto &block : m_vertex_layout.interleaved_blocks) + { + if (block.min_divisor > 1) + { + //Ignore base offsets and return real results + //The upload function will optimize the uploaded range anyway + return{ prims, index_count, max_index, 0, 0, index_info }; + } } + return {prims, index_count, (max_index - min_index + 1), min_index, min_index, index_info}; + } + + vertex_input_state operator()(const rsx::draw_inlined_array& command) + { + bool primitives_emulated = false; + auto &draw_clause = rsx::method_registers.current_draw_clause; + VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_clause.primitive, primitives_emulated); + + const u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride; + + if (!primitives_emulated) + { + return{ prims, vertex_count, vertex_count, 0, 0, {} }; + } + + u32 index_count; std::optional> index_info; - std::tie(index_count, index_info) = generate_emulating_index_buffer( - rsx::method_registers.current_draw_clause, index_count, m_index_buffer_ring_info); - return std::make_tuple(prims, index_count, index_info); + std::tie(index_count, index_info) = generate_emulating_index_buffer(draw_clause, vertex_count, m_index_buffer_ring_info); + return{ prims, index_count, vertex_count, 0, 0, index_info }; } private: vk::vk_data_heap& m_index_buffer_ring_info; - VkDevice m_device; - vk::vk_data_heap& m_attrib_ring_info; - vk::glsl::program* m_program; - VkDescriptorSet m_descriptor_sets; - std::vector>& m_buffer_view_to_clean; - std::function>&)> - get_vertex_buffers; - VKGSRender* rsxthr; - - void upload_vertex_buffers(u32 min_index, u32 vertex_max_index) - { - const u32 vertex_count = vertex_max_index - min_index + 1; - - vertex_buffer_visitor visitor(vertex_count, m_device, - m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache.get()); - - const auto& vertex_buffers = get_vertex_buffers( - rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}}); - - for (auto &vbo: vertex_buffers) - std::apply_visitor(visitor, vbo); - } - - u32 upload_inlined_array() - { - u32 stride = 0; - u32 offsets[rsx::limits::vertex_count] = {0}; - - for (u32 i = 0; i < rsx::limits::vertex_count; ++i) { - const auto& info = rsx::method_registers.vertex_arrays_info[i]; - if (!info.size()) continue; - - offsets[i] = stride; - stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - - u32 vertex_draw_count = - (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * - sizeof(u32)) / - stride; - - for (int index = 0; index < rsx::limits::vertex_count; ++index) - { - auto& vertex_info = rsx::method_registers.vertex_arrays_info[index]; - - if (!m_program->has_uniform(s_reg_table[index])) continue; - - if (!vertex_info.size()) // disabled - { - m_buffer_view_to_clean.push_back(std::make_unique(m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets); - continue; - } - - const u32 element_size = - vk::get_suitable_vk_size(vertex_info.type(), vertex_info.size()); - const u32 data_size = element_size * vertex_draw_count; - const VkFormat format = - vk::get_suitable_vk_format(vertex_info.type(), vertex_info.size()); - - size_t offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size); - u8* src = reinterpret_cast( - rsx::method_registers.current_draw_clause.inline_vertex_array.data()); - u8* dst = - static_cast(m_attrib_ring_info.map(offset_in_attrib_buffer, data_size)); - - src += offsets[index]; - u8 opt_size = vertex_info.size(); - - if (vertex_info.size() == 3) opt_size = 4; - - // TODO: properly handle cmp type - if (vertex_info.type() == rsx::vertex_base_type::cmp) - LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet"); - - switch (vertex_info.type()) - { - case rsx::vertex_base_type::f: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, - vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); - break; - case rsx::vertex_base_type::sf: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, - vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); - break; - case rsx::vertex_base_type::s1: - case rsx::vertex_base_type::ub: - case rsx::vertex_base_type::ub256: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, - vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); - break; - case rsx::vertex_base_type::s32k: - case rsx::vertex_base_type::cmp: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, - vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); - break; - default: fmt::throw_exception("Unknown base type %d" HERE, (u32)vertex_info.type()); - } - - m_attrib_ring_info.unmap(); - m_buffer_view_to_clean.push_back(std::make_unique(m_device, - m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size)); - m_program->bind_uniform( - m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets); - } - - return vertex_draw_count; - } + rsx::vertex_input_layout& m_vertex_layout; }; } -std::tuple>> +std::tuple > > VKGSRender::upload_vertex_data() { - draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program, - descriptor_sets, m_buffer_view_to_clean, - [this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range, m_program->get_vertex_input_attributes_mask());}, this); - return std::apply_visitor(visitor, get_draw_command(rsx::method_registers)); + m_vertex_layout = analyse_inputs_interleaved(); + + draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout); + auto result = std::apply_visitor(visitor, get_draw_command(rsx::method_registers)); + + auto &vertex_count = result.allocated_vertex_count; + auto &vertex_base = result.vertex_data_base; + + //Do actual vertex upload + auto required = calculate_memory_requirements(m_vertex_layout, vertex_count); + size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX; + + VkBufferView persistent_view = VK_NULL_HANDLE, volatile_view = VK_NULL_HANDLE; + + if (required.first > 0) + { + //Check if cacheable + //Only data in the 'persistent' block may be cached + //TODO: make vertex cache keep local data beyond frame boundaries and hook notify command + bool in_cache = false; + bool to_store = false; + u32 storage_address = UINT32_MAX; + + if (m_vertex_layout.interleaved_blocks.size() == 1 && + rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array) + { + storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base; + if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first)) + { + in_cache = true; + m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, cached->offset_in_heap, required.first)); + } + else + { + to_store = true; + } + } + + if (!in_cache) + { + persistent_offset = (u32)m_attrib_ring_info.alloc<256>(required.first); + m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_offset, required.first)); + + if (to_store) + { + //store ref in vertex cache + m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, (u32)persistent_offset); + } + } + + persistent_view = m_current_frame->buffer_views_to_clean.back()->value; + } + else + { + m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0)); + persistent_view = m_current_frame->buffer_views_to_clean.back()->value; + } + + if (required.second > 0) + { + volatile_offset = (u32)m_attrib_ring_info.alloc<256>(required.second); + m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second)); + + volatile_view = m_current_frame->buffer_views_to_clean.back()->value; + } + else + { + m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0)); + volatile_view = m_current_frame->buffer_views_to_clean.back()->value; + } + + m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set); + m_program->bind_uniform(volatile_view, "volatile_input_stream", m_current_frame->descriptor_set); + + //Write all the data once if possible + if (required.first && required.second && volatile_offset > persistent_offset) + { + //Do this once for both to save time on map/unmap cycles + const size_t block_end = (volatile_offset + required.second); + const size_t block_size = block_end - persistent_offset; + const size_t volatile_offset_in_block = volatile_offset - persistent_offset; + + void *block_mapping = m_attrib_ring_info.map(persistent_offset, block_size); + write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, (char*)block_mapping + volatile_offset_in_block); + m_attrib_ring_info.unmap(); + } + else + { + if (required.first > 0 && persistent_offset != UINT64_MAX) + { + void *persistent_mapping = m_attrib_ring_info.map(persistent_offset, required.first); + write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr); + m_attrib_ring_info.unmap(); + } + + if (required.second > 0) + { + void *volatile_mapping = m_attrib_ring_info.map(volatile_offset, required.second); + write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping); + m_attrib_ring_info.unmap(); + } + } + + return std::make_tuple(result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 3e5351e472..633b4cf2b0 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -30,17 +30,20 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) { OS << "#version 450\n\n"; OS << "#extension GL_ARB_separate_shader_objects : enable\n"; - OS << "layout(std140, set = 0, binding = 0) uniform ScaleOffsetBuffer\n"; + OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n"; OS << "{\n"; - OS << " mat4 scaleOffsetMat;\n"; - OS << " ivec4 userClipEnabled[2];\n"; - OS << " vec4 userClipFactor[2];\n"; + OS << " mat4 scale_offset_mat;\n"; + OS << " ivec4 user_clip_enabled[2];\n"; + OS << " vec4 user_clip_factor[2];\n"; + OS << " uint transform_branch_bits;\n"; + OS << " uint vertex_base_index;\n"; + OS << " ivec4 input_attributes[16];\n"; OS << "};\n"; vk::glsl::program_input in; in.location = SCALE_OFFSET_BIND_SLOT; in.domain = glsl::glsl_vertex_program; - in.name = "ScaleOffsetBuffer"; + in.name = "VertexContextBuffer"; in.type = vk::glsl::input_type_uniform_buffer; inputs.push_back(in); @@ -48,54 +51,21 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector& inputs) { - std::vector> input_data; - for (const ParamType &PT : inputs) - { - for (const ParamItem &PI : PT.items) - { - input_data.push_back(std::make_tuple(PI.location, PI.name)); - } - } + OS << "layout(set=0, binding=3) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable) + OS << "layout(set=0, binding=4) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data) - /** - * Its is important that the locations are in the order that vertex attributes are expected. - * If order is not adhered to, channels may be swapped leading to corruption - */ + vk::glsl::program_input in; + in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT; + in.domain = glsl::glsl_vertex_program; + in.name = "persistent_input_stream"; + in.type = vk::glsl::input_type_texel_buffer; + this->inputs.push_back(in); - std::sort(input_data.begin(), input_data.end()); - - for (const std::tuple item : input_data) - { - for (const ParamType &PT : inputs) - { - for (const ParamItem &PI : PT.items) - { - if (PI.name == std::get<1>(item)) - { - vk::glsl::program_input in; - in.location = (int)std::get<0>(item) + VERTEX_BUFFERS_FIRST_BIND_SLOT; - in.domain = glsl::glsl_vertex_program; - in.name = PI.name + "_buffer"; - in.type = vk::glsl::input_type_texel_buffer; - - this->inputs.push_back(in); - - bool is_int = false; - for (auto &attrib : rsx_vertex_program.rsx_vertex_inputs) - { - if (attrib.location == std::get<0>(item)) - { - if (attrib.int_type) is_int = true; - break; - } - } - - std::string samplerType = is_int ? "isamplerBuffer" : "samplerBuffer"; - OS << "layout(set = 0, binding=" << in.location << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n"; - } - } - } - } + in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT + 1; + in.domain = glsl::glsl_vertex_program; + in.name = "volatile_input_stream"; + in.type = vk::glsl::input_type_texel_buffer; + this->inputs.push_back(in); } void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) @@ -103,7 +73,6 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 vc[468];\n"; - OS << " uint transform_branch_bits;\n"; OS << "};\n\n"; vk::glsl::program_input in; @@ -150,13 +119,13 @@ static const vertex_reg_info reg_table[] = { "front_spec_color", true, "dst_reg4", "", false }, { "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG }, //Warning: With spir-v if you declare clip distance var, you must assign a value even when its disabled! Runtime does not assign a default value - { "gl_ClipDistance[0]", false, "dst_reg5", ".y * userClipFactor[0].x", false, "userClipEnabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 }, - { "gl_ClipDistance[1]", false, "dst_reg5", ".z * userClipFactor[0].y", false, "userClipEnabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 }, - { "gl_ClipDistance[2]", false, "dst_reg5", ".w * userClipFactor[0].z", false, "userClipEnabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, + { "gl_ClipDistance[0]", false, "dst_reg5", ".y * user_clip_factor[0].x", false, "user_clip_enabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 }, + { "gl_ClipDistance[1]", false, "dst_reg5", ".z * user_clip_factor[0].y", false, "user_clip_enabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 }, + { "gl_ClipDistance[2]", false, "dst_reg5", ".w * user_clip_factor[0].z", false, "user_clip_enabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, { "gl_PointSize", false, "dst_reg6", ".x", false }, - { "gl_ClipDistance[3]", false, "dst_reg6", ".y * userClipFactor[0].w", false, "userClipEnabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 }, - { "gl_ClipDistance[4]", false, "dst_reg6", ".z * userClipFactor[1].x", false, "userClipEnabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 }, - { "gl_ClipDistance[5]", false, "dst_reg6", ".w * userClipFactor[1].y", false, "userClipEnabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, + { "gl_ClipDistance[3]", false, "dst_reg6", ".y * user_clip_factor[0].w", false, "user_clip_enabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 }, + { "gl_ClipDistance[4]", false, "dst_reg6", ".z * user_clip_factor[1].x", false, "user_clip_enabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 }, + { "gl_ClipDistance[5]", false, "dst_reg6", ".w * user_clip_factor[1].y", false, "user_clip_enabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, { "tc0", true, "dst_reg7", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX0 }, { "tc1", true, "dst_reg8", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX1 }, { "tc2", true, "dst_reg9", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX2 }, @@ -212,44 +181,10 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: OS << "layout(location=" << vk::get_varying_register("front_spec_color").reg_location << ") out vec4 front_spec_color;\n"; } -namespace vk -{ - void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector &inputs) - { - for (const auto &real_input : inputs) - { - if (real_input.location != PI.location) - continue; - - if (!real_input.is_array) - { - OS << " vec4 " << PI.name << " = vec4(texelFetch(" << PI.name << "_buffer, 0));\n"; - return; - } - - if (real_input.frequency > 1) - { - if (real_input.is_modulo) - { - OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex %" << real_input.frequency << "));\n"; - return; - } - - OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex /" << real_input.frequency << "));\n"; - return; - } - - OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex).rgba);\n"; - return; - } - - OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex).rgba);\n"; - } -} - void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) { glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); + glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv); std::string parameters = ""; for (int i = 0; i < 16; ++i) @@ -286,7 +221,9 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) for (const ParamType &PT : m_parr.params[PF_PARAM_IN]) { for (const ParamItem &PI : PT.items) - vk::add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs); + { + OS << " vec4 " << PI.name << "= read_location(" << std::to_string(PI.location) << ");\n"; + } } } @@ -373,7 +310,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) if (m_parr.HasParam(PF_PARAM_NONE, "vec4", "dst_reg2")) OS << " front_spec_color = dst_reg2;\n"; - OS << " gl_Position = gl_Position * scaleOffsetMat;\n"; + OS << " gl_Position = gl_Position * scale_offset_mat;\n"; OS << "}\n"; }