mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-28 12:40:12 +00:00
rsx/gl/vulkan: Refactoring and partial vulkan rewrite
- Updates vulkan to use GPU vertex processing - Rewrites vulkan to buffer entire frames and present when first available to avoid stalls - Move more state into dynamic descriptors to reduce progam cache misses; Fix render pass conflicts before texture access - Discards incomplete cb at destruction to avoid refs to destroyed objects - Move set_viewport to the uninterruptible block before drawing in case cb is switched before we're ready - Manage frame contexts separately for easier async frame management - Avoid wasteful create-destroy cycles when sampling rtts
This commit is contained in:
parent
6a707f515e
commit
00b0311c86
@ -11,6 +11,12 @@ namespace glsl
|
||||
glsl_fragment_program = 1
|
||||
};
|
||||
|
||||
enum glsl_rules
|
||||
{
|
||||
glsl_rules_opengl4,
|
||||
glsl_rules_rpirv
|
||||
};
|
||||
|
||||
static std::string getFloatTypeNameImpl(size_t elementCount)
|
||||
{
|
||||
switch (elementCount)
|
||||
@ -48,8 +54,10 @@ namespace glsl
|
||||
fmt::throw_exception("Unknown compare function" HERE);
|
||||
}
|
||||
|
||||
static void insert_vertex_input_fetch(std::stringstream& OS)
|
||||
static void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules)
|
||||
{
|
||||
std::string vertex_id_name = (rules == glsl_rules_opengl4) ? "gl_VertexID" : "gl_VertexIndex";
|
||||
|
||||
//Actually decode a vertex attribute from a raw byte stream
|
||||
OS << "struct attribute_desc\n";
|
||||
OS << "{\n";
|
||||
@ -194,16 +202,16 @@ namespace glsl
|
||||
OS << "{\n";
|
||||
OS << " attribute_desc desc = fetch_desc(location);\n";
|
||||
OS << "\n";
|
||||
OS << " int vertex_id = gl_VertexID - int(vertex_base_index);\n";
|
||||
OS << " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n";
|
||||
OS << " if (desc.frequency == 0)\n";
|
||||
OS << " vertex_id = 0;\n";
|
||||
OS << " else if (desc.frequency > 1)\n";
|
||||
OS << " {\n";
|
||||
OS << " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n";
|
||||
OS << " if (desc.modulo != 0)\n";
|
||||
OS << " vertex_id = gl_VertexID % desc.divisor;\n";
|
||||
OS << " vertex_id = " << vertex_id_name << " % desc.divisor;\n";
|
||||
OS << " else\n";
|
||||
OS << " vertex_id = gl_VertexID / desc.divisor;\n";
|
||||
OS << " vertex_id = " << vertex_id_name << " / desc.divisor;\n";
|
||||
OS << " }\n";
|
||||
OS << "\n";
|
||||
OS << " if (desc.is_volatile != 0)\n";
|
||||
|
@ -92,9 +92,8 @@ namespace
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_array_command& command)
|
||||
{
|
||||
u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
|
||||
u32 max_index = vertex_count - 1 + min_index;
|
||||
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
const u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
|
||||
|
||||
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
|
||||
{
|
||||
@ -153,7 +152,7 @@ namespace
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_inlined_array& command)
|
||||
{
|
||||
u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
|
||||
const u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
|
||||
|
||||
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
|
||||
{
|
||||
@ -188,8 +187,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
|
||||
auto &vertex_base = result.vertex_data_base;
|
||||
|
||||
//Do actual vertex upload
|
||||
auto &required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
|
||||
std::pair<void*, u32> persistent_mapping = {}, volatile_mapping = {};
|
||||
|
||||
|
@ -149,15 +149,10 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
|
||||
OS << "out vec4 front_spec_color;\n";
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
{
|
||||
insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
|
||||
glsl::insert_vertex_input_fetch(OS);
|
||||
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4);
|
||||
|
||||
std::string parameters = "";
|
||||
for (int i = 0; i < 16; ++i)
|
||||
|
@ -564,27 +564,11 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
|
||||
m_current_command_buffer = &m_primary_cb_list[0];
|
||||
|
||||
//Create secondar command_buffer for parallel operations
|
||||
//Create secondary command_buffer for parallel operations
|
||||
m_secondary_command_buffer_pool.create((*m_device));
|
||||
m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
|
||||
|
||||
open_command_buffer();
|
||||
|
||||
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
|
||||
{
|
||||
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i),
|
||||
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
|
||||
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
|
||||
|
||||
VkClearColorValue clear_color{};
|
||||
auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range);
|
||||
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i),
|
||||
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
|
||||
|
||||
}
|
||||
|
||||
//VRAM allocation
|
||||
m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000);
|
||||
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
|
||||
m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000);
|
||||
@ -598,25 +582,25 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
|
||||
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
|
||||
|
||||
//Generate frame contexts
|
||||
VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS };
|
||||
VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 16 * DESCRIPTOR_MAX_DRAW_CALLS };
|
||||
VkDescriptorPoolSize texture_pool = { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS };
|
||||
|
||||
std::vector<VkDescriptorPoolSize> sizes{ uniform_buffer_pool, uniform_texel_pool, texture_pool };
|
||||
|
||||
descriptor_pool.create(*m_device, sizes.data(), static_cast<uint32_t>(sizes.size()));
|
||||
|
||||
|
||||
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
|
||||
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32);
|
||||
|
||||
VkFenceCreateInfo fence_info = {};
|
||||
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
|
||||
VkSemaphoreCreateInfo semaphore_info = {};
|
||||
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
|
||||
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore);
|
||||
for (auto &ctx : frame_context)
|
||||
{
|
||||
ctx = {};
|
||||
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_semaphore);
|
||||
ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast<uint32_t>(sizes.size()));
|
||||
}
|
||||
|
||||
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
|
||||
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32);
|
||||
|
||||
vk::initialize_compiler_context();
|
||||
|
||||
@ -631,6 +615,25 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
m_vertex_cache.reset(new vk::null_vertex_cache());
|
||||
else
|
||||
m_vertex_cache.reset(new vk::weak_vertex_cache());
|
||||
|
||||
open_command_buffer();
|
||||
|
||||
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
|
||||
{
|
||||
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i),
|
||||
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
|
||||
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
|
||||
|
||||
VkClearColorValue clear_color{};
|
||||
auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range);
|
||||
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i),
|
||||
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
|
||||
|
||||
}
|
||||
|
||||
m_current_frame = &frame_context[0];
|
||||
}
|
||||
|
||||
VKGSRender::~VKGSRender()
|
||||
@ -641,23 +644,9 @@ VKGSRender::~VKGSRender()
|
||||
return;
|
||||
}
|
||||
|
||||
//Close recording and wait for all to finish
|
||||
close_render_pass();
|
||||
CHECK_RESULT(vkEndCommandBuffer(*m_current_command_buffer));
|
||||
|
||||
for (auto &cb : m_primary_cb_list)
|
||||
if (cb.pending) cb.wait();
|
||||
|
||||
//Wait for device to finish up with resources
|
||||
vkDeviceWaitIdle(*m_device);
|
||||
|
||||
//Sync objects
|
||||
if (m_present_semaphore)
|
||||
{
|
||||
vkDestroySemaphore((*m_device), m_present_semaphore, nullptr);
|
||||
m_present_semaphore = nullptr;
|
||||
}
|
||||
|
||||
//Texture cache
|
||||
m_texture_cache.destroy();
|
||||
|
||||
@ -678,10 +667,17 @@ VKGSRender::~VKGSRender()
|
||||
null_buffer.reset();
|
||||
null_buffer_view.reset();
|
||||
|
||||
//Temporary objects
|
||||
m_buffer_view_to_clean.clear();
|
||||
m_sampler_to_clean.clear();
|
||||
m_framebuffer_to_clean.clear();
|
||||
//Frame context
|
||||
for (auto &ctx : frame_context)
|
||||
{
|
||||
vkDestroySemaphore((*m_device), ctx.present_semaphore, nullptr);
|
||||
ctx.descriptor_pool.destroy();
|
||||
|
||||
ctx.buffer_views_to_clean.clear();
|
||||
ctx.samplers_to_clean.clear();
|
||||
ctx.framebuffers_to_clean.clear();
|
||||
}
|
||||
|
||||
m_draw_fbo.reset();
|
||||
|
||||
//Render passes
|
||||
@ -699,8 +695,6 @@ VKGSRender::~VKGSRender()
|
||||
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
|
||||
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
|
||||
|
||||
descriptor_pool.destroy();
|
||||
|
||||
//Command buffer
|
||||
for (auto &cb : m_primary_cb_list)
|
||||
cb.destroy();
|
||||
@ -736,8 +730,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
if (!flushable)
|
||||
return false;
|
||||
|
||||
close_render_pass();
|
||||
|
||||
if (synchronized)
|
||||
{
|
||||
if (m_last_flushable_cb >= 0)
|
||||
@ -807,8 +799,21 @@ void VKGSRender::begin()
|
||||
return;
|
||||
|
||||
//Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources
|
||||
if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS ||
|
||||
m_attrib_ring_info.is_critical() ||
|
||||
if (m_current_frame->used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS)
|
||||
{
|
||||
//No need to stall if we have more than one frame queue anyway
|
||||
flush_command_queue();
|
||||
|
||||
CHECK_RESULT(vkResetDescriptorPool(*m_device, m_current_frame->descriptor_pool, 0));
|
||||
m_current_frame->used_descriptors = 0;
|
||||
|
||||
m_uniform_buffer_ring_info.reset_allocation_stats();
|
||||
m_index_buffer_ring_info.reset_allocation_stats();
|
||||
m_attrib_ring_info.reset_allocation_stats();
|
||||
m_texture_upload_buffer_ring_info.reset_allocation_stats();
|
||||
}
|
||||
|
||||
if (m_attrib_ring_info.is_critical() ||
|
||||
m_texture_upload_buffer_ring_info.is_critical() ||
|
||||
m_uniform_buffer_ring_info.is_critical() ||
|
||||
m_index_buffer_ring_info.is_critical())
|
||||
@ -818,20 +823,17 @@ void VKGSRender::begin()
|
||||
flush_command_queue(true);
|
||||
m_vertex_cache->purge();
|
||||
|
||||
CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
|
||||
m_used_descriptors = 0;
|
||||
|
||||
m_uniform_buffer_ring_info.reset_allocation_stats();
|
||||
m_index_buffer_ring_info.reset_allocation_stats();
|
||||
m_attrib_ring_info.reset_allocation_stats();
|
||||
m_texture_upload_buffer_ring_info.reset_allocation_stats();
|
||||
|
||||
std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
|
||||
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
|
||||
}
|
||||
|
||||
init_buffers();
|
||||
|
||||
if (!framebuffer_status_valid)
|
||||
return;
|
||||
|
||||
VkDescriptorSetAllocateInfo alloc_info = {};
|
||||
alloc_info.descriptorPool = descriptor_pool;
|
||||
alloc_info.descriptorPool = m_current_frame->descriptor_pool;
|
||||
alloc_info.descriptorSetCount = 1;
|
||||
alloc_info.pSetLayouts = &descriptor_layouts;
|
||||
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
@ -839,16 +841,11 @@ void VKGSRender::begin()
|
||||
VkDescriptorSet new_descriptor_set;
|
||||
CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set));
|
||||
|
||||
descriptor_sets = new_descriptor_set;
|
||||
m_used_descriptors++;
|
||||
m_current_frame->descriptor_set = new_descriptor_set;
|
||||
m_current_frame->used_descriptors++;
|
||||
|
||||
std::chrono::time_point<steady_clock> start = steady_clock::now();
|
||||
|
||||
init_buffers();
|
||||
|
||||
if (!framebuffer_status_valid)
|
||||
return;
|
||||
|
||||
float actual_line_width = rsx::method_registers.line_width();
|
||||
|
||||
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
|
||||
@ -901,20 +898,28 @@ void VKGSRender::end()
|
||||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
|
||||
|
||||
//Load program here since it is dependent on vertex state
|
||||
if (!load_program())
|
||||
if (!check_program_status())
|
||||
{
|
||||
LOG_ERROR(RSX, "No valid program bound to pipeline. Skipping draw");
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||
//Programs data is dependent on vertex state
|
||||
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
|
||||
auto upload_info = upload_vertex_data();
|
||||
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
||||
|
||||
close_render_pass(); //Texture upload stuff conflicts active RPs
|
||||
//Load program
|
||||
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
|
||||
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
|
||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||
|
||||
//Close current pass to avoid conflict with texture functions
|
||||
close_render_pass();
|
||||
|
||||
if (g_cfg.video.strict_rendering_mode)
|
||||
{
|
||||
@ -967,11 +972,6 @@ void VKGSRender::end()
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> vertex_start0 = steady_clock::now();
|
||||
auto upload_info = upload_vertex_data();
|
||||
std::chrono::time_point<steady_clock> vertex_end0 = steady_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end0 - vertex_start0).count();
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
@ -980,7 +980,7 @@ void VKGSRender::end()
|
||||
{
|
||||
if (!rsx::method_registers.fragment_textures[i].enabled())
|
||||
{
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets);
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -989,7 +989,7 @@ void VKGSRender::end()
|
||||
if (!texture0)
|
||||
{
|
||||
LOG_ERROR(RSX, "Texture upload failed to texture index %d. Binding null sampler.", i);
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets);
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1016,7 +1016,7 @@ void VKGSRender::end()
|
||||
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
}
|
||||
|
||||
m_sampler_to_clean.push_back(std::make_unique<vk::sampler>(
|
||||
m_current_frame->samplers_to_clean.push_back(std::make_unique<vk::sampler>(
|
||||
*m_device,
|
||||
vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()),
|
||||
!!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN),
|
||||
@ -1024,7 +1024,7 @@ void VKGSRender::end()
|
||||
min_filter, vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()), mip_mode, vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()),
|
||||
is_depth_texture, depth_compare));
|
||||
|
||||
m_program->bind_uniform({ m_sampler_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets);
|
||||
m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1034,7 +1034,7 @@ void VKGSRender::end()
|
||||
{
|
||||
if (!rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets);
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1043,11 +1043,11 @@ void VKGSRender::end()
|
||||
if (!texture0)
|
||||
{
|
||||
LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i);
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets);
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
continue;
|
||||
}
|
||||
|
||||
m_sampler_to_clean.push_back(std::make_unique<vk::sampler>(
|
||||
m_current_frame->samplers_to_clean.push_back(std::make_unique<vk::sampler>(
|
||||
*m_device,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
!!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN),
|
||||
@ -1055,7 +1055,7 @@ void VKGSRender::end()
|
||||
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color())
|
||||
));
|
||||
|
||||
m_program->bind_uniform({ m_sampler_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets);
|
||||
m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1066,9 +1066,12 @@ void VKGSRender::end()
|
||||
//Only textures are synchronized tightly with the GPU and they have been read back above
|
||||
vk::enter_uninterruptible();
|
||||
|
||||
set_viewport();
|
||||
|
||||
begin_render_pass();
|
||||
|
||||
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
|
||||
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
|
||||
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
|
||||
|
||||
//Clear any 'dirty' surfaces - possible is a recycled cache surface is used
|
||||
std::vector<VkClearAttachment> buffers_to_clear;
|
||||
@ -1108,11 +1111,7 @@ void VKGSRender::end()
|
||||
vkCmdClearAttachments(*m_current_command_buffer, static_cast<u32>(buffers_to_clear.size()), buffers_to_clear.data(), 1, &clear_rect);
|
||||
}
|
||||
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<2>(upload_info);
|
||||
|
||||
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
|
||||
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<4>(upload_info);
|
||||
if (!index_info)
|
||||
{
|
||||
const auto vertex_count = std::get<1>(upload_info);
|
||||
@ -1133,7 +1132,7 @@ void VKGSRender::end()
|
||||
vk::leave_uninterruptible();
|
||||
|
||||
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
||||
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
|
||||
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - textures_end).count();
|
||||
|
||||
copy_render_targets_to_dma_location();
|
||||
m_draw_calls++;
|
||||
@ -1219,7 +1218,7 @@ void VKGSRender::clear_surface(u32 mask)
|
||||
if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return;
|
||||
|
||||
if (!(mask & 0xF3)) return;
|
||||
if (m_current_present_image == 0xFFFF) return;
|
||||
if (m_current_frame->present_image == UINT32_MAX) return;
|
||||
|
||||
init_buffers();
|
||||
|
||||
@ -1339,6 +1338,8 @@ void VKGSRender::copy_render_targets_to_dma_location()
|
||||
|
||||
if (g_cfg.video.write_color_buffers)
|
||||
{
|
||||
close_render_pass();
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
|
||||
{
|
||||
if (!m_surface_info[index].pitch)
|
||||
@ -1351,6 +1352,8 @@ void VKGSRender::copy_render_targets_to_dma_location()
|
||||
|
||||
if (g_cfg.video.write_depth_buffer)
|
||||
{
|
||||
close_render_pass();
|
||||
|
||||
if (m_depth_surface_info.pitch)
|
||||
{
|
||||
m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height,
|
||||
@ -1374,7 +1377,7 @@ void VKGSRender::flush_command_queue(bool hard_sync)
|
||||
if (hard_sync)
|
||||
{
|
||||
//swap handler checks the pending flag, so call it here
|
||||
process_swap_request();
|
||||
process_swap_request(m_current_frame);
|
||||
|
||||
//wait for the latest intruction to execute
|
||||
m_current_command_buffer->pending = true;
|
||||
@ -1395,83 +1398,128 @@ void VKGSRender::flush_command_queue(bool hard_sync)
|
||||
//Grab next cb in line and make it usable
|
||||
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
|
||||
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
|
||||
|
||||
//Soft sync if a present has not yet occured before consuming the wait event
|
||||
for (auto &ctx : frame_context)
|
||||
{
|
||||
if (ctx.swap_command_buffer == m_current_command_buffer)
|
||||
process_swap_request(&ctx, true);
|
||||
}
|
||||
|
||||
m_current_command_buffer->reset();
|
||||
}
|
||||
|
||||
open_command_buffer();
|
||||
}
|
||||
|
||||
void VKGSRender::advance_queued_frames()
|
||||
{
|
||||
//Check all other frames for completion and clear resources
|
||||
for (auto &ctx : frame_context)
|
||||
{
|
||||
if (&ctx == m_current_frame)
|
||||
continue;
|
||||
|
||||
if (ctx.swap_command_buffer)
|
||||
{
|
||||
ctx.swap_command_buffer->poke();
|
||||
if (ctx.swap_command_buffer->pending)
|
||||
continue;
|
||||
|
||||
//Present the bound image
|
||||
process_swap_request(&ctx, true);
|
||||
}
|
||||
}
|
||||
|
||||
//Only marks surfaces as dirty without actually deleting them so its safe to use
|
||||
if (g_cfg.video.invalidate_surface_cache_every_frame)
|
||||
m_rtts.invalidate_surface_cache_data(&*m_current_command_buffer);
|
||||
|
||||
//m_rtts storage is double buffered and should be safe to tag on frame boundary
|
||||
m_rtts.free_invalidated();
|
||||
|
||||
//texture cache is also double buffered to prevent use-after-free
|
||||
m_texture_cache.flush();
|
||||
|
||||
m_vertex_cache->purge();
|
||||
|
||||
m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES;
|
||||
m_current_frame = &frame_context[m_current_queue_index];
|
||||
}
|
||||
|
||||
void VKGSRender::present(frame_context_t *ctx)
|
||||
{
|
||||
VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain);
|
||||
|
||||
VkPresentInfoKHR present = {};
|
||||
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
present.pNext = nullptr;
|
||||
present.swapchainCount = 1;
|
||||
present.pSwapchains = &swap_chain;
|
||||
present.pImageIndices = &ctx->present_image;
|
||||
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
|
||||
}
|
||||
|
||||
void VKGSRender::queue_swap_request()
|
||||
{
|
||||
//buffer the swap request and return
|
||||
if (m_swap_command_buffer && m_swap_command_buffer->pending)
|
||||
if (m_current_frame->swap_command_buffer &&
|
||||
m_current_frame->swap_command_buffer->pending)
|
||||
{
|
||||
//Its probable that no actual drawing took place
|
||||
process_swap_request();
|
||||
process_swap_request(m_current_frame);
|
||||
}
|
||||
|
||||
m_swap_command_buffer = m_current_command_buffer;
|
||||
close_and_submit_command_buffer({ m_present_semaphore }, m_current_command_buffer->submit_fence);
|
||||
m_current_frame->swap_command_buffer = m_current_command_buffer;
|
||||
close_and_submit_command_buffer({ m_current_frame->present_semaphore }, m_current_command_buffer->submit_fence);
|
||||
m_current_frame->swap_command_buffer->pending = true;
|
||||
|
||||
//Grab next cb in line and make it usable
|
||||
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
|
||||
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
|
||||
m_current_command_buffer->reset();
|
||||
|
||||
m_swap_command_buffer->pending = true;
|
||||
//Set up new pointers for the next frame
|
||||
advance_queued_frames();
|
||||
open_command_buffer();
|
||||
}
|
||||
|
||||
void VKGSRender::process_swap_request()
|
||||
void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
|
||||
{
|
||||
if (!m_swap_command_buffer)
|
||||
if (!ctx->swap_command_buffer)
|
||||
return;
|
||||
|
||||
if (m_swap_command_buffer->pending)
|
||||
if (ctx->swap_command_buffer->pending)
|
||||
{
|
||||
//Perform hard swap here
|
||||
m_swap_command_buffer->wait();
|
||||
|
||||
VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain);
|
||||
|
||||
VkPresentInfoKHR present = {};
|
||||
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
present.pNext = nullptr;
|
||||
present.swapchainCount = 1;
|
||||
present.pSwapchains = &swap_chain;
|
||||
present.pImageIndices = &m_current_present_image;
|
||||
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
|
||||
ctx->swap_command_buffer->wait();
|
||||
free_resources = true;
|
||||
}
|
||||
|
||||
//Clean up all the resources from the last frame
|
||||
//Always present
|
||||
present(ctx);
|
||||
|
||||
//Feed back damaged resources to the main texture cache for management...
|
||||
//m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources);
|
||||
|
||||
m_rtts.free_invalidated();
|
||||
m_texture_cache.flush();
|
||||
|
||||
if (g_cfg.video.invalidate_surface_cache_every_frame)
|
||||
m_rtts.invalidate_surface_cache_data(&*m_current_command_buffer);
|
||||
|
||||
m_buffer_view_to_clean.clear();
|
||||
m_sampler_to_clean.clear();
|
||||
|
||||
m_framebuffer_to_clean.remove_if([](std::unique_ptr<vk::framebuffer_holder>& fbo)
|
||||
if (free_resources)
|
||||
{
|
||||
if (fbo->deref_count >= 2) return true;
|
||||
fbo->deref_count++;
|
||||
return false;
|
||||
});
|
||||
//Cleanup of reference sensitive resources
|
||||
//TODO: These should be double buffered as well to prevent destruction of anything in use
|
||||
if (g_cfg.video.overlay)
|
||||
{
|
||||
m_text_writer->reset_descriptors();
|
||||
}
|
||||
|
||||
if (g_cfg.video.overlay)
|
||||
{
|
||||
m_text_writer->reset_descriptors();
|
||||
ctx->buffer_views_to_clean.clear();
|
||||
ctx->samplers_to_clean.clear();
|
||||
|
||||
ctx->framebuffers_to_clean.remove_if([](std::unique_ptr<vk::framebuffer_holder>& fbo)
|
||||
{
|
||||
if (fbo->deref_count >= 2) return true;
|
||||
fbo->deref_count++;
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
m_vertex_cache->purge();
|
||||
|
||||
m_swap_command_buffer = nullptr;
|
||||
ctx->swap_command_buffer = nullptr;
|
||||
}
|
||||
|
||||
void VKGSRender::do_local_task()
|
||||
@ -1482,6 +1530,7 @@ void VKGSRender::do_local_task()
|
||||
|
||||
//TODO: Determine if a hard sync is necessary
|
||||
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
|
||||
close_render_pass();
|
||||
flush_command_queue();
|
||||
|
||||
m_flush_commands = false;
|
||||
@ -1509,11 +1558,8 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
|
||||
}
|
||||
}
|
||||
|
||||
bool VKGSRender::load_program(bool)
|
||||
bool VKGSRender::check_program_status()
|
||||
{
|
||||
auto &vertex_program = current_vertex_program;
|
||||
auto &fragment_program = current_fragment_program;
|
||||
|
||||
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
|
||||
{
|
||||
vk::render_target *surface = nullptr;
|
||||
@ -1528,24 +1574,29 @@ bool VKGSRender::load_program(bool)
|
||||
};
|
||||
|
||||
get_current_fragment_program(rtt_lookup_func);
|
||||
if (!fragment_program.valid) return false;
|
||||
if (!current_fragment_program.valid) return false;
|
||||
|
||||
get_current_vertex_program();
|
||||
|
||||
auto &vertex_program = current_vertex_program;
|
||||
auto &fragment_program = current_fragment_program;
|
||||
|
||||
vk::pipeline_props properties = {};
|
||||
|
||||
properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
||||
bool unused;
|
||||
bool update_blend_constants = false;
|
||||
bool update_stencil_info_back = false;
|
||||
bool update_stencil_info_front = false;
|
||||
bool update_depth_bounds = false;
|
||||
|
||||
properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
||||
properties.ia.topology = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, unused);
|
||||
|
||||
if (rsx::method_registers.restart_index_enabled())
|
||||
{
|
||||
properties.ia.primitiveRestartEnable = VK_TRUE;
|
||||
}
|
||||
else
|
||||
properties.ia.primitiveRestartEnable = VK_FALSE;
|
||||
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
properties.att_state[i].colorWriteMask = 0xf;
|
||||
@ -1588,11 +1639,8 @@ bool VKGSRender::load_program(bool)
|
||||
properties.att_state[render_targets[idx]].alphaBlendOp = equation_a;
|
||||
}
|
||||
|
||||
auto blend_colors = rsx::get_constant_blend_colors();
|
||||
properties.cs.blendConstants[0] = blend_colors[0];
|
||||
properties.cs.blendConstants[1] = blend_colors[1];
|
||||
properties.cs.blendConstants[2] = blend_colors[2];
|
||||
properties.cs.blendConstants[3] = blend_colors[3];
|
||||
//Blend constants are dynamic
|
||||
update_blend_constants = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1618,8 +1666,7 @@ bool VKGSRender::load_program(bool)
|
||||
if (rsx::method_registers.depth_bounds_test_enabled())
|
||||
{
|
||||
properties.ds.depthBoundsTestEnable = VK_TRUE;
|
||||
properties.ds.minDepthBounds = rsx::method_registers.depth_bounds_min();
|
||||
properties.ds.maxDepthBounds = rsx::method_registers.depth_bounds_max();
|
||||
update_depth_bounds = true;
|
||||
}
|
||||
else
|
||||
properties.ds.depthBoundsTestEnable = VK_FALSE;
|
||||
@ -1627,9 +1674,6 @@ bool VKGSRender::load_program(bool)
|
||||
if (rsx::method_registers.stencil_test_enabled())
|
||||
{
|
||||
properties.ds.stencilTestEnable = VK_TRUE;
|
||||
properties.ds.front.writeMask = rsx::method_registers.stencil_mask();
|
||||
properties.ds.front.compareMask = rsx::method_registers.stencil_func_mask();
|
||||
properties.ds.front.reference = rsx::method_registers.stencil_func_ref();
|
||||
properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers.stencil_op_fail());
|
||||
properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zpass());
|
||||
properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zfail());
|
||||
@ -1637,16 +1681,16 @@ bool VKGSRender::load_program(bool)
|
||||
|
||||
if (rsx::method_registers.two_sided_stencil_test_enabled())
|
||||
{
|
||||
properties.ds.back.writeMask = rsx::method_registers.back_stencil_mask();
|
||||
properties.ds.back.compareMask = rsx::method_registers.back_stencil_func_mask();
|
||||
properties.ds.back.reference = rsx::method_registers.back_stencil_func_ref();
|
||||
properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail());
|
||||
properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass());
|
||||
properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail());
|
||||
properties.ds.back.compareOp = vk::get_compare_func(rsx::method_registers.back_stencil_func());
|
||||
update_stencil_info_back = true;
|
||||
}
|
||||
else
|
||||
properties.ds.back = properties.ds.front;
|
||||
|
||||
update_stencil_info_front = true;
|
||||
}
|
||||
else
|
||||
properties.ds.stencilTestEnable = VK_FALSE;
|
||||
@ -1684,55 +1728,84 @@ bool VKGSRender::load_program(bool)
|
||||
vk::enter_uninterruptible();
|
||||
|
||||
//Load current program from buffer
|
||||
vertex_program.skip_vertex_input_check = true;
|
||||
m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get();
|
||||
|
||||
//TODO: Update constant buffers..
|
||||
//1. Update scale-offset matrix
|
||||
//2. Update vertex constants
|
||||
//3. Update fragment constants
|
||||
const size_t scale_offset_offset = m_uniform_buffer_ring_info.alloc<256>(256);
|
||||
vk::leave_uninterruptible();
|
||||
|
||||
u8 *buf = (u8*)m_uniform_buffer_ring_info.map(scale_offset_offset, 256);
|
||||
|
||||
/**
|
||||
* NOTE: While VK's coord system resembles GLs, the clip volume is no longer symetrical in z
|
||||
* Its like D3D without the flip in y (depending on how you build the spir-v)
|
||||
*/
|
||||
fill_scale_offset_data(buf, false);
|
||||
fill_user_clip_data(buf + 64);
|
||||
|
||||
m_uniform_buffer_ring_info.unmap();
|
||||
|
||||
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, scale_offset_offset, 256 }, SCALE_OFFSET_BIND_SLOT, descriptor_sets);
|
||||
|
||||
if (true)//m_transform_constants_dirty)
|
||||
//Update dynamic state
|
||||
if (update_blend_constants)
|
||||
{
|
||||
const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float));
|
||||
buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float));
|
||||
fill_vertex_program_constants_data(buf);
|
||||
*(reinterpret_cast<u32*>(buf + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits();
|
||||
m_uniform_buffer_ring_info.unmap();
|
||||
|
||||
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 512 * 4 * sizeof(float) }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets);
|
||||
m_transform_constants_dirty = false;
|
||||
//Update blend constants
|
||||
auto blend_colors = rsx::get_constant_blend_colors();
|
||||
vkCmdSetBlendConstants(*m_current_command_buffer, blend_colors.data());
|
||||
}
|
||||
|
||||
if (update_stencil_info_front)
|
||||
{
|
||||
VkStencilFaceFlags face_flag = (update_stencil_info_back)? VK_STENCIL_FACE_FRONT_BIT: VK_STENCIL_FRONT_AND_BACK;
|
||||
|
||||
vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask());
|
||||
vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask());
|
||||
vkCmdSetStencilReference(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_ref());
|
||||
|
||||
if (update_stencil_info_back)
|
||||
{
|
||||
vkCmdSetStencilWriteMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_mask());
|
||||
vkCmdSetStencilCompareMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_mask());
|
||||
vkCmdSetStencilReference(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_ref());
|
||||
}
|
||||
}
|
||||
|
||||
if (update_depth_bounds)
|
||||
{
|
||||
//Update depth bounds min/max
|
||||
vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
|
||||
{
|
||||
auto &vertex_program = current_vertex_program;
|
||||
auto &fragment_program = current_fragment_program;
|
||||
|
||||
const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
|
||||
const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float));
|
||||
const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_buffer_sz);
|
||||
const size_t required_mem = 512 + 8192 + fragment_buffer_sz;
|
||||
|
||||
buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_buffer_sz);
|
||||
const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem);
|
||||
const size_t vertex_constants_offset = vertex_state_offset + 512;
|
||||
const size_t fragment_constants_offset = vertex_constants_offset + 8192;
|
||||
|
||||
//We do this in one go
|
||||
u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem);
|
||||
|
||||
//Vertex state
|
||||
fill_scale_offset_data(buf, false);
|
||||
fill_user_clip_data(buf + 64);
|
||||
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
|
||||
*(reinterpret_cast<u32*>(buf + 132)) = vertex_base;
|
||||
fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast<s32*>(buf + 144));
|
||||
|
||||
//Vertex constants
|
||||
buf = buf + 512;
|
||||
fill_vertex_program_constants_data(buf);
|
||||
m_transform_constants_dirty = false;
|
||||
|
||||
//Fragment constants
|
||||
buf = buf + 8192;
|
||||
if (fragment_constants_sz)
|
||||
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) }, fragment_program);
|
||||
|
||||
fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program);
|
||||
|
||||
m_uniform_buffer_ring_info.unmap();
|
||||
|
||||
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets);
|
||||
|
||||
vk::leave_uninterruptible();
|
||||
|
||||
return true;
|
||||
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 }, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 8192 }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
static const u32 mr_color_offset[rsx::limits::color_buffers_count] =
|
||||
@ -1762,7 +1835,20 @@ static const u32 mr_color_pitch[rsx::limits::color_buffers_count] =
|
||||
void VKGSRender::init_buffers(bool skip_reading)
|
||||
{
|
||||
//Clear any pending swap requests
|
||||
process_swap_request();
|
||||
for (auto &ctx : frame_context)
|
||||
{
|
||||
if (ctx.swap_command_buffer)
|
||||
{
|
||||
if (ctx.swap_command_buffer->pending)
|
||||
ctx.swap_command_buffer->poke();
|
||||
|
||||
if (!ctx.swap_command_buffer->pending)
|
||||
{
|
||||
//process swap without advancing the frame base
|
||||
process_swap_request(&ctx, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prepare_rtts();
|
||||
|
||||
@ -1770,8 +1856,6 @@ void VKGSRender::init_buffers(bool skip_reading)
|
||||
{
|
||||
read_buffers();
|
||||
}
|
||||
|
||||
set_viewport();
|
||||
}
|
||||
|
||||
void VKGSRender::read_buffers()
|
||||
@ -1965,14 +2049,13 @@ void VKGSRender::prepare_rtts()
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &fbo : m_framebuffer_to_clean)
|
||||
for (auto &fbo : m_current_frame->framebuffers_to_clean)
|
||||
{
|
||||
if (fbo->matches(bound_images, clip_width, clip_height))
|
||||
{
|
||||
m_draw_fbo.swap(fbo);
|
||||
m_draw_fbo->reset_refs();
|
||||
framebuffer_found = true;
|
||||
//LOG_ERROR(RSX, "Matching framebuffer exists, using that instead");
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2014,7 +2097,7 @@ void VKGSRender::prepare_rtts()
|
||||
VkRenderPass current_render_pass = m_render_passes[idx];
|
||||
|
||||
if (m_draw_fbo)
|
||||
m_framebuffer_to_clean.push_back(std::move(m_draw_fbo));
|
||||
m_current_frame->framebuffers_to_clean.push_back(std::move(m_draw_fbo));
|
||||
|
||||
m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images)));
|
||||
}
|
||||
@ -2060,7 +2143,7 @@ void VKGSRender::flip(int buffer)
|
||||
std::chrono::time_point<steady_clock> flip_start = steady_clock::now();
|
||||
|
||||
close_render_pass();
|
||||
process_swap_request();
|
||||
process_swap_request(m_current_frame, true);
|
||||
|
||||
if (!resize_screen)
|
||||
{
|
||||
@ -2095,8 +2178,8 @@ void VKGSRender::flip(int buffer)
|
||||
|
||||
aspect_ratio.size = new_size;
|
||||
|
||||
//Prepare surface for new frame
|
||||
CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, VK_NULL_HANDLE, &m_current_present_image));
|
||||
//Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
|
||||
CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), UINT64_MAX, m_current_frame->present_semaphore, VK_NULL_HANDLE, &m_current_frame->present_image));
|
||||
|
||||
//Blit contents to screen..
|
||||
vk::image* image_to_flip = nullptr;
|
||||
@ -2106,7 +2189,7 @@ void VKGSRender::flip(int buffer)
|
||||
else if (std::get<1>(m_rtts.m_bound_render_targets[1]) != nullptr)
|
||||
image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1]);
|
||||
|
||||
VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image);
|
||||
VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_frame->present_image);
|
||||
if (image_to_flip)
|
||||
{
|
||||
vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
@ -2117,9 +2200,9 @@ void VKGSRender::flip(int buffer)
|
||||
//No draw call was issued!
|
||||
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
VkClearColorValue clear_black = { 0 };
|
||||
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_GENERAL, range);
|
||||
vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, &clear_black, 1, &range);
|
||||
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range);
|
||||
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_GENERAL, range);
|
||||
vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_GENERAL, &clear_black, 1, &range);
|
||||
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range);
|
||||
}
|
||||
|
||||
std::unique_ptr<vk::framebuffer_holder> direct_fbo;
|
||||
@ -2144,14 +2227,14 @@ void VKGSRender::flip(int buffer)
|
||||
size_t idx = vk::get_render_pass_location(m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1);
|
||||
VkRenderPass single_target_pass = m_render_passes[idx];
|
||||
|
||||
for (auto It = m_framebuffer_to_clean.begin(); It != m_framebuffer_to_clean.end(); It++)
|
||||
for (auto It = m_current_frame->framebuffers_to_clean.begin(); It != m_current_frame->framebuffers_to_clean.end(); It++)
|
||||
{
|
||||
auto &fbo = *It;
|
||||
if (fbo->attachments[0]->info.image == target_image)
|
||||
{
|
||||
direct_fbo.swap(fbo);
|
||||
direct_fbo->reset_refs();
|
||||
m_framebuffer_to_clean.erase(It);
|
||||
m_current_frame->framebuffers_to_clean.erase(It);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2189,7 +2272,7 @@ void VKGSRender::flip(int buffer)
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), message);
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
|
||||
m_framebuffer_to_clean.push_back(std::move(direct_fbo));
|
||||
m_current_frame->framebuffers_to_clean.push_back(std::move(direct_fbo));
|
||||
}
|
||||
|
||||
queue_swap_request();
|
||||
@ -2262,9 +2345,13 @@ void VKGSRender::flip(int buffer)
|
||||
m_current_command_buffer->reset();
|
||||
open_command_buffer();
|
||||
|
||||
//Do cleanup
|
||||
m_swap_command_buffer = m_current_command_buffer;
|
||||
process_swap_request();
|
||||
//Do cleanup; also present the previous frame for this frame if available
|
||||
//Don't bother scheduling a swap event if the frame context is still uninitialized (no previous frame)
|
||||
if (m_current_frame->present_image != UINT32_MAX)
|
||||
{
|
||||
m_current_frame->swap_command_buffer = m_current_command_buffer;
|
||||
process_swap_request(m_current_frame);
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> flip_end = steady_clock::now();
|
||||
|
@ -25,11 +25,12 @@ namespace vk
|
||||
|
||||
//Heap allocation sizes in MB
|
||||
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
|
||||
#define VK_UBO_RING_BUFFER_SIZE_M 32
|
||||
#define VK_UBO_RING_BUFFER_SIZE_M 64
|
||||
#define VK_INDEX_RING_BUFFER_SIZE_M 64
|
||||
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 128
|
||||
|
||||
#define VK_MAX_ASYNC_CB_COUNT 64
|
||||
#define VK_MAX_ASYNC_FRAMES 2
|
||||
|
||||
struct command_buffer_chunk: public vk::command_buffer
|
||||
{
|
||||
@ -135,32 +136,43 @@ private:
|
||||
vk::vk_data_heap m_texture_upload_buffer_ring_info;
|
||||
|
||||
//Vulkan internals
|
||||
u32 m_current_present_image = 0xFFFF;
|
||||
VkSemaphore m_present_semaphore = nullptr;
|
||||
|
||||
vk::command_pool m_command_buffer_pool;
|
||||
std::array<command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
|
||||
|
||||
command_buffer_chunk* m_current_command_buffer = nullptr;
|
||||
command_buffer_chunk* m_swap_command_buffer = nullptr;
|
||||
|
||||
u32 m_current_cb_index = 0;
|
||||
|
||||
std::mutex m_secondary_cb_guard;
|
||||
vk::command_pool m_secondary_command_buffer_pool;
|
||||
vk::command_buffer m_secondary_command_buffer;
|
||||
|
||||
std::array<VkRenderPass, 120> m_render_passes;
|
||||
VkDescriptorSetLayout descriptor_layouts;
|
||||
VkDescriptorSet descriptor_sets;
|
||||
VkPipelineLayout pipeline_layout;
|
||||
vk::descriptor_pool descriptor_pool;
|
||||
u32 m_current_cb_index = 0;
|
||||
std::array<command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
|
||||
command_buffer_chunk* m_current_command_buffer = nullptr;
|
||||
|
||||
std::array<VkRenderPass, 120> m_render_passes;
|
||||
|
||||
VkDescriptorSetLayout descriptor_layouts;
|
||||
VkPipelineLayout pipeline_layout;
|
||||
|
||||
std::vector<std::unique_ptr<vk::buffer_view> > m_buffer_view_to_clean;
|
||||
std::vector<std::unique_ptr<vk::sampler> > m_sampler_to_clean;
|
||||
std::list<std::unique_ptr<vk::framebuffer_holder> > m_framebuffer_to_clean;
|
||||
std::unique_ptr<vk::framebuffer_holder> m_draw_fbo;
|
||||
|
||||
struct frame_context_t
|
||||
{
|
||||
VkSemaphore present_semaphore = VK_NULL_HANDLE;
|
||||
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
|
||||
vk::descriptor_pool descriptor_pool;
|
||||
u32 used_descriptors = 0;
|
||||
|
||||
std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
|
||||
std::vector<std::unique_ptr<vk::sampler>> samplers_to_clean;
|
||||
std::list<std::unique_ptr<vk::framebuffer_holder>> framebuffers_to_clean;
|
||||
|
||||
u32 present_image = UINT32_MAX;
|
||||
command_buffer_chunk* swap_command_buffer = nullptr;
|
||||
};
|
||||
|
||||
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context;
|
||||
|
||||
u32 m_current_queue_index = 0;
|
||||
frame_context_t* m_current_frame = nullptr;
|
||||
|
||||
u32 m_client_width = 0;
|
||||
u32 m_client_height = 0;
|
||||
|
||||
@ -183,7 +195,6 @@ private:
|
||||
s64 m_draw_time = 0;
|
||||
s64 m_flip_time = 0;
|
||||
|
||||
u32 m_used_descriptors = 0;
|
||||
u8 m_draw_buffers_count = 0;
|
||||
|
||||
bool framebuffer_status_valid = false;
|
||||
@ -201,6 +212,9 @@ private:
|
||||
std::thread::id rsx_thread;
|
||||
|
||||
bool render_pass_open = false;
|
||||
|
||||
//Vertex layout
|
||||
rsx::vertex_input_layout m_vertex_layout;
|
||||
|
||||
#ifdef __linux__
|
||||
Display *m_display_handle = nullptr;
|
||||
@ -220,15 +234,18 @@ private:
|
||||
|
||||
void flush_command_queue(bool hard_sync = false);
|
||||
void queue_swap_request();
|
||||
void process_swap_request();
|
||||
void process_swap_request(frame_context_t *ctx, bool free_resources = false);
|
||||
void advance_queued_frames();
|
||||
void present(frame_context_t *ctx);
|
||||
|
||||
void begin_render_pass();
|
||||
void close_render_pass();
|
||||
|
||||
/// returns primitive topology, is_indexed, index_count, offset in index buffer, index type
|
||||
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
|
||||
/// returns primitive topology, index_count, allocated_verts, vertex_base_index, (offset in index buffer, index type)
|
||||
std::tuple<VkPrimitiveTopology, u32, u32, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
|
||||
public:
|
||||
bool load_program(bool fast_update = false);
|
||||
bool check_program_status();
|
||||
void load_program(u32 vertex_count, u32 vertex_base);
|
||||
void init_buffers(bool skip_reading = false);
|
||||
void read_buffers();
|
||||
void write_buffers();
|
||||
|
@ -19,17 +19,22 @@ namespace vk
|
||||
|
||||
bool operator==(const pipeline_props& other) const
|
||||
{
|
||||
if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo)))
|
||||
return false;
|
||||
if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo)))
|
||||
return false;
|
||||
if (memcmp(&att_state[0], &other.att_state[0], sizeof(VkPipelineColorBlendAttachmentState)))
|
||||
return false;
|
||||
if (memcmp(&cs, &other.cs, sizeof(VkPipelineColorBlendStateCreateInfo)))
|
||||
|
||||
if (render_pass != other.render_pass)
|
||||
return false;
|
||||
|
||||
if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))
|
||||
return false;
|
||||
if (render_pass != other.render_pass)
|
||||
|
||||
if (memcmp(&cs, &other.cs, sizeof(VkPipelineColorBlendStateCreateInfo)))
|
||||
return false;
|
||||
|
||||
if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo)))
|
||||
return false;
|
||||
|
||||
if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo)))
|
||||
return false;
|
||||
|
||||
return num_targets == other.num_targets;
|
||||
@ -90,9 +95,6 @@ struct VKTraits
|
||||
static
|
||||
pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout)
|
||||
{
|
||||
// pstate.dynamic_state.pDynamicStates = pstate.dynamic_state_descriptors;
|
||||
// pstate.cb.pAttachments = pstate.att_state;
|
||||
// pstate.cb.attachmentCount = pstate.num_targets;
|
||||
|
||||
VkPipelineShaderStageCreateInfo shader_stages[2] = {};
|
||||
shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
@ -111,6 +113,11 @@ struct VKTraits
|
||||
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
|
||||
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
|
||||
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_LINE_WIDTH;
|
||||
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS;
|
||||
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_BLEND_CONSTANTS;
|
||||
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK;
|
||||
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
|
||||
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_REFERENCE;
|
||||
dynamic_state_info.pDynamicStates = dynamic_state_descriptors;
|
||||
|
||||
VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
|
||||
@ -144,7 +151,6 @@ struct VKTraits
|
||||
info.renderPass = pipelineProperties.render_pass;
|
||||
|
||||
CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline));
|
||||
|
||||
pipeline_storage_type result = std::make_unique<vk::glsl::program>(dev, pipeline, vertexProgramData.uniforms, fragmentProgramData.uniforms);
|
||||
|
||||
return result;
|
||||
|
@ -21,6 +21,7 @@ namespace vk
|
||||
bool dirty = false;
|
||||
u16 native_pitch = 0;
|
||||
VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
std::unique_ptr<vk::image_view> view;
|
||||
|
||||
render_target *old_contents = nullptr; //Data occupying the memory location that this surface is replacing
|
||||
|
||||
@ -40,6 +41,15 @@ namespace vk
|
||||
:image(dev, memory_type_index, access_flags, image_type, format, width, height, depth,
|
||||
mipmaps, layers, samples, initial_layout, tiling, usage, image_flags)
|
||||
{}
|
||||
|
||||
vk::image_view* get_view()
|
||||
{
|
||||
if (!view)
|
||||
view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), value, VK_IMAGE_VIEW_TYPE_2D, info.format,
|
||||
native_component_map, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT)));
|
||||
|
||||
return view.get();
|
||||
}
|
||||
};
|
||||
|
||||
struct framebuffer_holder: public vk::framebuffer, public ref_counted
|
||||
|
@ -362,6 +362,9 @@ namespace vk
|
||||
|
||||
void reset_descriptors()
|
||||
{
|
||||
if (m_used_descriptors == 0)
|
||||
return;
|
||||
|
||||
vkResetDescriptorPool(device, m_descriptor_pool, 0);
|
||||
m_used_descriptors = 0;
|
||||
}
|
||||
|
@ -322,9 +322,14 @@ namespace vk
|
||||
std::pair<u32, u32> read_only_range = std::make_pair(0xFFFFFFFF, 0);
|
||||
std::pair<u32, u32> no_access_range = std::make_pair(0xFFFFFFFF, 0);
|
||||
|
||||
//Stuff that has been dereferenced goes into these
|
||||
std::vector<std::unique_ptr<vk::image_view> > m_temporary_image_view;
|
||||
std::vector<std::unique_ptr<vk::image>> m_dirty_textures;
|
||||
|
||||
//Stuff that has been dereferenced twice goes here. Contents are evicted before new ones are added
|
||||
std::vector<std::unique_ptr<vk::image_view>> m_image_views_to_purge;
|
||||
std::vector<std::unique_ptr<vk::image>> m_images_to_purge;
|
||||
|
||||
// Keep track of cache misses to pre-emptively flush some addresses
|
||||
struct framebuffer_memory_characteristics
|
||||
{
|
||||
@ -431,6 +436,9 @@ namespace vk
|
||||
|
||||
m_temporary_image_view.clear();
|
||||
m_dirty_textures.clear();
|
||||
|
||||
m_image_views_to_purge.clear();
|
||||
m_images_to_purge.clear();
|
||||
}
|
||||
|
||||
//Helpers
|
||||
@ -544,7 +552,7 @@ namespace vk
|
||||
}
|
||||
|
||||
//First check if it exists as an rtt...
|
||||
vk::image *rtt_texture = nullptr;
|
||||
vk::render_target *rtt_texture = nullptr;
|
||||
if (rtt_texture = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
|
||||
{
|
||||
if (g_cfg.video.strict_rendering_mode)
|
||||
@ -559,10 +567,7 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
m_temporary_image_view.push_back(std::make_unique<vk::image_view>(*vk::get_current_renderer(), rtt_texture->value, VK_IMAGE_VIEW_TYPE_2D, rtt_texture->info.format,
|
||||
rtt_texture->native_component_map,
|
||||
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)));
|
||||
return m_temporary_image_view.back().get();
|
||||
return rtt_texture->get_view();
|
||||
}
|
||||
|
||||
if (rtt_texture = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
|
||||
@ -576,10 +581,7 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
m_temporary_image_view.push_back(std::make_unique<vk::image_view>(*vk::get_current_renderer(), rtt_texture->value, VK_IMAGE_VIEW_TYPE_2D, rtt_texture->info.format,
|
||||
rtt_texture->native_component_map,
|
||||
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT)));
|
||||
return m_temporary_image_view.back().get();
|
||||
return rtt_texture->get_view();
|
||||
}
|
||||
|
||||
u32 raw_format = tex.format();
|
||||
@ -912,8 +914,11 @@ namespace vk
|
||||
|
||||
void flush()
|
||||
{
|
||||
m_dirty_textures.clear();
|
||||
m_temporary_image_view.clear();
|
||||
m_image_views_to_purge.clear();
|
||||
m_images_to_purge.clear();
|
||||
|
||||
m_image_views_to_purge = std::move(m_temporary_image_view);
|
||||
m_images_to_purge = std::move(m_dirty_textures);
|
||||
}
|
||||
|
||||
void record_cache_miss(cached_texture_section &tex)
|
||||
|
@ -7,53 +7,6 @@
|
||||
|
||||
namespace vk
|
||||
{
|
||||
bool requires_component_expansion(rsx::vertex_base_type type, u32 size)
|
||||
{
|
||||
if (size == 3)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case rsx::vertex_base_type::f:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 get_suitable_vk_size(rsx::vertex_base_type type, u32 size)
|
||||
{
|
||||
if (size == 3)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case rsx::vertex_base_type::f:
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
||||
return rsx::get_vertex_type_size_on_host(type, size);
|
||||
}
|
||||
|
||||
VkFormat get_suitable_vk_format(rsx::vertex_base_type type, u8 size)
|
||||
{
|
||||
/**
|
||||
* Set up buffer fetches to only work on 4-component access. This is hardware dependant so we use 4-component access to avoid branching based on IHV implementation
|
||||
* AMD GCN 1.0 for example does not support RGB32 formats for texel buffers
|
||||
*/
|
||||
const VkFormat vec1_types[] = { VK_FORMAT_R16_SNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8_UINT };
|
||||
const VkFormat vec2_types[] = { VK_FORMAT_R16G16_SNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8_UINT };
|
||||
const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT }; //VEC3 COMPONENTS NOT SUPPORTED!
|
||||
const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT };
|
||||
|
||||
const VkFormat* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types };
|
||||
|
||||
if (type > rsx::vertex_base_type::ub256)
|
||||
fmt::throw_exception("VKGS error: unknown vertex base type 0x%x" HERE, (u32)type);
|
||||
|
||||
return vec_selectors[size][(int)type];
|
||||
}
|
||||
|
||||
VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification)
|
||||
{
|
||||
requires_modification = false;
|
||||
@ -92,114 +45,6 @@ namespace vk
|
||||
return !result;
|
||||
}
|
||||
|
||||
template <typename T, u32 padding>
|
||||
void copy_inlined_data_to_buffer(void *src_data, void *dst_data, u32 vertex_count, rsx::vertex_base_type type, u8 src_channels, u8 dst_channels, u16 element_size, u16 stride)
|
||||
{
|
||||
u8 *src = static_cast<u8*>(src_data);
|
||||
u8 *dst = static_cast<u8*>(dst_data);
|
||||
|
||||
for (u32 i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
T* src_ptr = reinterpret_cast<T*>(src);
|
||||
T* dst_ptr = reinterpret_cast<T*>(dst);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case rsx::vertex_base_type::ub:
|
||||
{
|
||||
if (src_channels == 4)
|
||||
{
|
||||
dst[0] = src[3];
|
||||
dst[1] = src[2];
|
||||
dst[2] = src[1];
|
||||
dst[3] = src[0];
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
for (u8 ch = 0; ch < dst_channels; ++ch)
|
||||
{
|
||||
if (ch < src_channels)
|
||||
{
|
||||
*dst_ptr = *src_ptr;
|
||||
src_ptr++;
|
||||
}
|
||||
else
|
||||
*dst_ptr = (T)(padding);
|
||||
|
||||
dst_ptr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
src += stride;
|
||||
dst += element_size;
|
||||
}
|
||||
}
|
||||
|
||||
void prepare_buffer_for_writing(void *data, rsx::vertex_base_type type, u8 vertex_size, u32 vertex_count)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case rsx::vertex_base_type::f:
|
||||
{
|
||||
if (vertex_size == 3)
|
||||
{
|
||||
float *dst = reinterpret_cast<float*>(data);
|
||||
for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4)
|
||||
dst[idx] = 1.f;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case rsx::vertex_base_type::sf:
|
||||
{
|
||||
if (vertex_size == 3)
|
||||
{
|
||||
/**
|
||||
* Pad the 4th component for half-float arrays to 1, since texelfetch does not mask components
|
||||
*/
|
||||
u16 *dst = reinterpret_cast<u16*>(data);
|
||||
for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4)
|
||||
dst[idx] = 0x3c00;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Template: Expand any N-compoent vector to a larger X-component vector and pad unused slots with 1
|
||||
*/
|
||||
template<typename T, u8 src_components, u8 dst_components, u32 padding>
|
||||
void expand_array_components(const T* src_data, void *dst_ptr, u32 vertex_count)
|
||||
{
|
||||
T* src = const_cast<T*>(src_data);
|
||||
T* dst = static_cast<T*>(dst_ptr);
|
||||
|
||||
for (u32 index = 0; index < vertex_count; ++index)
|
||||
{
|
||||
for (u8 channel = 0; channel < dst_components; channel++)
|
||||
{
|
||||
if (channel < src_components)
|
||||
{
|
||||
*dst = *src;
|
||||
|
||||
dst++;
|
||||
src++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*dst = (T)(padding);
|
||||
dst++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkIndexType get_index_type(rsx::index_array_type type)
|
||||
{
|
||||
switch (type)
|
||||
@ -215,20 +60,7 @@ namespace vk
|
||||
|
||||
namespace
|
||||
{
|
||||
static constexpr std::array<const char*, 16> s_reg_table =
|
||||
{
|
||||
"in_pos_buffer", "in_weight_buffer", "in_normal_buffer",
|
||||
"in_diff_color_buffer", "in_spec_color_buffer",
|
||||
"in_fog_buffer",
|
||||
"in_point_size_buffer", "in_7_buffer",
|
||||
"in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer",
|
||||
"in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer"
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates and fills an index buffer emulating unsupported primitive type.
|
||||
* Returns index_count and (offset_in_index_buffer, index_type)
|
||||
*/
|
||||
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType>> generate_emulating_index_buffer(
|
||||
const rsx::draw_clause& clause, u32 vertex_count,
|
||||
vk::vk_data_heap& m_index_buffer_ring_info)
|
||||
@ -247,161 +79,49 @@ namespace
|
||||
index_count, std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT16));
|
||||
}
|
||||
|
||||
struct vertex_buffer_visitor
|
||||
struct vertex_input_state
|
||||
{
|
||||
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
|
||||
vk::glsl::program* prog, VkDescriptorSet desc_set,
|
||||
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
|
||||
vk::vertex_cache* vertex_cache)
|
||||
: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
|
||||
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
|
||||
vertex_cache(vertex_cache)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const rsx::vertex_array_buffer& vertex_array)
|
||||
{
|
||||
if (!m_program->has_uniform(s_reg_table[vertex_array.index]))
|
||||
return;
|
||||
|
||||
// Fill vertex_array
|
||||
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
|
||||
const u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size);
|
||||
const u32 upload_size = real_element_size * vertex_count;
|
||||
const VkFormat format = vk::get_suitable_vk_format(vertex_array.type, vertex_array.attribute_size);
|
||||
const uintptr_t local_addr = (uintptr_t)vertex_array.data.data();
|
||||
|
||||
if (auto found = vertex_cache->find_vertex_range(local_addr, format, upload_size))
|
||||
{
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, format, found->offset_in_heap, upload_size));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets);
|
||||
return;
|
||||
}
|
||||
|
||||
VkDeviceSize offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(upload_size);
|
||||
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size);
|
||||
|
||||
gsl::span<gsl::byte> dest_span(static_cast<gsl::byte*>(dst), upload_size);
|
||||
write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, real_element_size);
|
||||
|
||||
//Padding the vertex buffer should be done after the writes have been done
|
||||
//write_vertex_data function may 'dirty' unused sections of the buffer as optimization
|
||||
vk::prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count);
|
||||
|
||||
m_attrib_ring_info.unmap();
|
||||
|
||||
vertex_cache->store_range(local_addr, format, upload_size, (u32)offset_in_attrib_buffer);
|
||||
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, upload_size));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets);
|
||||
}
|
||||
|
||||
void operator()(const rsx::vertex_array_register& vertex_register)
|
||||
{
|
||||
if (!m_program->has_uniform(s_reg_table[vertex_register.index]))
|
||||
return;
|
||||
|
||||
size_t data_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size);
|
||||
const VkFormat format = vk::get_suitable_vk_format(vertex_register.type, vertex_register.attribute_size);
|
||||
|
||||
size_t offset_in_attrib_buffer = 0;
|
||||
|
||||
if (vk::requires_component_expansion(vertex_register.type, vertex_register.attribute_size))
|
||||
{
|
||||
const u32 num_stored_verts = static_cast<u32>(
|
||||
data_size / (sizeof(float) * vertex_register.attribute_size));
|
||||
const u32 real_element_size = vk::get_suitable_vk_size(vertex_register.type, vertex_register.attribute_size);
|
||||
|
||||
data_size = real_element_size * num_stored_verts;
|
||||
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
|
||||
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size);
|
||||
|
||||
vk::expand_array_components<float, 3, 4, 1>(reinterpret_cast<const float*>(vertex_register.data.data()), dst, num_stored_verts);
|
||||
m_attrib_ring_info.unmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
|
||||
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size);
|
||||
memcpy(dst, vertex_register.data.data(), data_size);
|
||||
m_attrib_ring_info.unmap();
|
||||
}
|
||||
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_register.index], descriptor_sets);
|
||||
}
|
||||
|
||||
void operator()(const rsx::empty_vertex_array& vbo)
|
||||
{
|
||||
if (!m_program->has_uniform(s_reg_table[vbo.index]))
|
||||
return;
|
||||
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vbo.index], descriptor_sets);
|
||||
}
|
||||
|
||||
protected:
|
||||
VkDevice device;
|
||||
u32 vertex_count;
|
||||
vk::vk_data_heap& m_attrib_ring_info;
|
||||
vk::glsl::program* m_program;
|
||||
VkDescriptorSet descriptor_sets;
|
||||
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
|
||||
vk::vertex_cache* vertex_cache;
|
||||
VkPrimitiveTopology native_primitive_type;
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_data_base;
|
||||
u32 vertex_index_base;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
};
|
||||
|
||||
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
|
||||
rsx::vertex_array_register, rsx::empty_vertex_array>>;
|
||||
|
||||
struct draw_command_visitor
|
||||
{
|
||||
using result_type = std::tuple<VkPrimitiveTopology, u32,
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>>>;
|
||||
|
||||
draw_command_visitor(VkDevice device, vk::vk_data_heap& index_buffer_ring_info,
|
||||
vk::vk_data_heap& attrib_ring_info, vk::glsl::program* program,
|
||||
VkDescriptorSet descriptor_sets,
|
||||
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
|
||||
std::function<attribute_storage(
|
||||
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
|
||||
get_vertex_buffers_f,
|
||||
VKGSRender *thread)
|
||||
: m_device(device), m_index_buffer_ring_info(index_buffer_ring_info),
|
||||
m_attrib_ring_info(attrib_ring_info), m_program(program),
|
||||
m_descriptor_sets(descriptor_sets), m_buffer_view_to_clean(buffer_view_to_clean),
|
||||
get_vertex_buffers(get_vertex_buffers_f),
|
||||
rsxthr(thread)
|
||||
draw_command_visitor(vk::vk_data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout)
|
||||
: m_index_buffer_ring_info(index_buffer_ring_info)
|
||||
, m_vertex_layout(layout)
|
||||
{
|
||||
}
|
||||
|
||||
result_type operator()(const rsx::draw_array_command& command)
|
||||
vertex_input_state operator()(const rsx::draw_array_command& command)
|
||||
{
|
||||
bool primitives_emulated = false;
|
||||
VkPrimitiveTopology prims = vk::get_appropriate_topology(
|
||||
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
|
||||
u32 index_count = 0;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
|
||||
u32 min_index =
|
||||
rsx::method_registers.current_draw_clause.first_count_commands.front().first;
|
||||
u32 max_index =
|
||||
rsx::method_registers.current_draw_clause.get_elements_count() + min_index - 1;
|
||||
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
const u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
|
||||
|
||||
if (primitives_emulated)
|
||||
{
|
||||
u32 index_count;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
|
||||
if (primitives_emulated) {
|
||||
std::tie(index_count, index_info) =
|
||||
generate_emulating_index_buffer(rsx::method_registers.current_draw_clause,
|
||||
max_index - min_index + 1, m_index_buffer_ring_info);
|
||||
}
|
||||
else
|
||||
{
|
||||
index_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
vertex_count, m_index_buffer_ring_info);
|
||||
|
||||
return{ prims, index_count, vertex_count, min_index, 0, index_info };
|
||||
}
|
||||
|
||||
upload_vertex_buffers(min_index, max_index);
|
||||
return std::make_tuple(prims, index_count, index_info);
|
||||
return{ prims, vertex_count, vertex_count, min_index, 0, {} };
|
||||
}
|
||||
|
||||
result_type operator()(const rsx::draw_indexed_array_command& command)
|
||||
vertex_input_state operator()(const rsx::draw_indexed_array_command& command)
|
||||
{
|
||||
bool primitives_emulated = false;
|
||||
VkPrimitiveTopology prims = vk::get_appropriate_topology(
|
||||
@ -438,146 +158,150 @@ namespace
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info =
|
||||
std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type));
|
||||
|
||||
upload_vertex_buffers(0, max_index);
|
||||
return std::make_tuple(prims, index_count, index_info);
|
||||
}
|
||||
|
||||
result_type operator()(const rsx::draw_inlined_array& command)
|
||||
{
|
||||
bool primitives_emulated = false;
|
||||
VkPrimitiveTopology prims = vk::get_appropriate_topology(
|
||||
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
|
||||
u32 index_count = upload_inlined_array();
|
||||
|
||||
if (!primitives_emulated) {
|
||||
return std::make_tuple(prims, index_count, std::nullopt);
|
||||
//check for vertex arrays with frquency modifiers
|
||||
for (auto &block : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
if (block.min_divisor > 1)
|
||||
{
|
||||
//Ignore base offsets and return real results
|
||||
//The upload function will optimize the uploaded range anyway
|
||||
return{ prims, index_count, max_index, 0, 0, index_info };
|
||||
}
|
||||
}
|
||||
|
||||
return {prims, index_count, (max_index - min_index + 1), min_index, min_index, index_info};
|
||||
}
|
||||
|
||||
vertex_input_state operator()(const rsx::draw_inlined_array& command)
|
||||
{
|
||||
bool primitives_emulated = false;
|
||||
auto &draw_clause = rsx::method_registers.current_draw_clause;
|
||||
VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_clause.primitive, primitives_emulated);
|
||||
|
||||
const u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
|
||||
|
||||
if (!primitives_emulated)
|
||||
{
|
||||
return{ prims, vertex_count, vertex_count, 0, 0, {} };
|
||||
}
|
||||
|
||||
u32 index_count;
|
||||
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
||||
std::tie(index_count, index_info) = generate_emulating_index_buffer(
|
||||
rsx::method_registers.current_draw_clause, index_count, m_index_buffer_ring_info);
|
||||
return std::make_tuple(prims, index_count, index_info);
|
||||
std::tie(index_count, index_info) = generate_emulating_index_buffer(draw_clause, vertex_count, m_index_buffer_ring_info);
|
||||
return{ prims, index_count, vertex_count, 0, 0, index_info };
|
||||
}
|
||||
|
||||
private:
|
||||
vk::vk_data_heap& m_index_buffer_ring_info;
|
||||
VkDevice m_device;
|
||||
vk::vk_data_heap& m_attrib_ring_info;
|
||||
vk::glsl::program* m_program;
|
||||
VkDescriptorSet m_descriptor_sets;
|
||||
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
|
||||
std::function<attribute_storage(
|
||||
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
|
||||
get_vertex_buffers;
|
||||
VKGSRender* rsxthr;
|
||||
|
||||
void upload_vertex_buffers(u32 min_index, u32 vertex_max_index)
|
||||
{
|
||||
const u32 vertex_count = vertex_max_index - min_index + 1;
|
||||
|
||||
vertex_buffer_visitor visitor(vertex_count, m_device,
|
||||
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache.get());
|
||||
|
||||
const auto& vertex_buffers = get_vertex_buffers(
|
||||
rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
|
||||
|
||||
for (auto &vbo: vertex_buffers)
|
||||
std::apply_visitor(visitor, vbo);
|
||||
}
|
||||
|
||||
u32 upload_inlined_array()
|
||||
{
|
||||
u32 stride = 0;
|
||||
u32 offsets[rsx::limits::vertex_count] = {0};
|
||||
|
||||
for (u32 i = 0; i < rsx::limits::vertex_count; ++i) {
|
||||
const auto& info = rsx::method_registers.vertex_arrays_info[i];
|
||||
if (!info.size()) continue;
|
||||
|
||||
offsets[i] = stride;
|
||||
stride += rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
|
||||
u32 vertex_draw_count =
|
||||
(u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() *
|
||||
sizeof(u32)) /
|
||||
stride;
|
||||
|
||||
for (int index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
auto& vertex_info = rsx::method_registers.vertex_arrays_info[index];
|
||||
|
||||
if (!m_program->has_uniform(s_reg_table[index])) continue;
|
||||
|
||||
if (!vertex_info.size()) // disabled
|
||||
{
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0));
|
||||
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets);
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 element_size =
|
||||
vk::get_suitable_vk_size(vertex_info.type(), vertex_info.size());
|
||||
const u32 data_size = element_size * vertex_draw_count;
|
||||
const VkFormat format =
|
||||
vk::get_suitable_vk_format(vertex_info.type(), vertex_info.size());
|
||||
|
||||
size_t offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
|
||||
u8* src = reinterpret_cast<u8*>(
|
||||
rsx::method_registers.current_draw_clause.inline_vertex_array.data());
|
||||
u8* dst =
|
||||
static_cast<u8*>(m_attrib_ring_info.map(offset_in_attrib_buffer, data_size));
|
||||
|
||||
src += offsets[index];
|
||||
u8 opt_size = vertex_info.size();
|
||||
|
||||
if (vertex_info.size() == 3) opt_size = 4;
|
||||
|
||||
// TODO: properly handle cmp type
|
||||
if (vertex_info.type() == rsx::vertex_base_type::cmp)
|
||||
LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet");
|
||||
|
||||
switch (vertex_info.type())
|
||||
{
|
||||
case rsx::vertex_base_type::f:
|
||||
vk::copy_inlined_data_to_buffer<float, 1>(src, dst, vertex_draw_count,
|
||||
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
|
||||
break;
|
||||
case rsx::vertex_base_type::sf:
|
||||
vk::copy_inlined_data_to_buffer<u16, 0x3c00>(src, dst, vertex_draw_count,
|
||||
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
|
||||
break;
|
||||
case rsx::vertex_base_type::s1:
|
||||
case rsx::vertex_base_type::ub:
|
||||
case rsx::vertex_base_type::ub256:
|
||||
vk::copy_inlined_data_to_buffer<u8, 1>(src, dst, vertex_draw_count,
|
||||
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
|
||||
break;
|
||||
case rsx::vertex_base_type::s32k:
|
||||
case rsx::vertex_base_type::cmp:
|
||||
vk::copy_inlined_data_to_buffer<u16, 1>(src, dst, vertex_draw_count,
|
||||
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
|
||||
break;
|
||||
default: fmt::throw_exception("Unknown base type %d" HERE, (u32)vertex_info.type());
|
||||
}
|
||||
|
||||
m_attrib_ring_info.unmap();
|
||||
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device,
|
||||
m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
|
||||
m_program->bind_uniform(
|
||||
m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets);
|
||||
}
|
||||
|
||||
return vertex_draw_count;
|
||||
}
|
||||
rsx::vertex_input_layout& m_vertex_layout;
|
||||
};
|
||||
}
|
||||
|
||||
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType>>>
|
||||
std::tuple<VkPrimitiveTopology, u32, u32, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > >
|
||||
VKGSRender::upload_vertex_data()
|
||||
{
|
||||
draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program,
|
||||
descriptor_sets, m_buffer_view_to_clean,
|
||||
[this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range, m_program->get_vertex_input_attributes_mask());}, this);
|
||||
return std::apply_visitor(visitor, get_draw_command(rsx::method_registers));
|
||||
m_vertex_layout = analyse_inputs_interleaved();
|
||||
|
||||
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
|
||||
auto result = std::apply_visitor(visitor, get_draw_command(rsx::method_registers));
|
||||
|
||||
auto &vertex_count = result.allocated_vertex_count;
|
||||
auto &vertex_base = result.vertex_data_base;
|
||||
|
||||
//Do actual vertex upload
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX;
|
||||
|
||||
VkBufferView persistent_view = VK_NULL_HANDLE, volatile_view = VK_NULL_HANDLE;
|
||||
|
||||
if (required.first > 0)
|
||||
{
|
||||
//Check if cacheable
|
||||
//Only data in the 'persistent' block may be cached
|
||||
//TODO: make vertex cache keep local data beyond frame boundaries and hook notify command
|
||||
bool in_cache = false;
|
||||
bool to_store = false;
|
||||
u32 storage_address = UINT32_MAX;
|
||||
|
||||
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
|
||||
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
|
||||
{
|
||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
|
||||
{
|
||||
in_cache = true;
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, cached->offset_in_heap, required.first));
|
||||
}
|
||||
else
|
||||
{
|
||||
to_store = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!in_cache)
|
||||
{
|
||||
persistent_offset = (u32)m_attrib_ring_info.alloc<256>(required.first);
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_offset, required.first));
|
||||
|
||||
if (to_store)
|
||||
{
|
||||
//store ref in vertex cache
|
||||
m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, (u32)persistent_offset);
|
||||
}
|
||||
}
|
||||
|
||||
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
|
||||
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
}
|
||||
|
||||
if (required.second > 0)
|
||||
{
|
||||
volatile_offset = (u32)m_attrib_ring_info.alloc<256>(required.second);
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second));
|
||||
|
||||
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
|
||||
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
|
||||
}
|
||||
|
||||
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(volatile_view, "volatile_input_stream", m_current_frame->descriptor_set);
|
||||
|
||||
//Write all the data once if possible
|
||||
if (required.first && required.second && volatile_offset > persistent_offset)
|
||||
{
|
||||
//Do this once for both to save time on map/unmap cycles
|
||||
const size_t block_end = (volatile_offset + required.second);
|
||||
const size_t block_size = block_end - persistent_offset;
|
||||
const size_t volatile_offset_in_block = volatile_offset - persistent_offset;
|
||||
|
||||
void *block_mapping = m_attrib_ring_info.map(persistent_offset, block_size);
|
||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, (char*)block_mapping + volatile_offset_in_block);
|
||||
m_attrib_ring_info.unmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (required.first > 0 && persistent_offset != UINT64_MAX)
|
||||
{
|
||||
void *persistent_mapping = m_attrib_ring_info.map(persistent_offset, required.first);
|
||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr);
|
||||
m_attrib_ring_info.unmap();
|
||||
}
|
||||
|
||||
if (required.second > 0)
|
||||
{
|
||||
void *volatile_mapping = m_attrib_ring_info.map(volatile_offset, required.second);
|
||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping);
|
||||
m_attrib_ring_info.unmap();
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_tuple(result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info);
|
||||
}
|
||||
|
@ -30,17 +30,20 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
||||
{
|
||||
OS << "#version 450\n\n";
|
||||
OS << "#extension GL_ARB_separate_shader_objects : enable\n";
|
||||
OS << "layout(std140, set = 0, binding = 0) uniform ScaleOffsetBuffer\n";
|
||||
OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " mat4 scaleOffsetMat;\n";
|
||||
OS << " ivec4 userClipEnabled[2];\n";
|
||||
OS << " vec4 userClipFactor[2];\n";
|
||||
OS << " mat4 scale_offset_mat;\n";
|
||||
OS << " ivec4 user_clip_enabled[2];\n";
|
||||
OS << " vec4 user_clip_factor[2];\n";
|
||||
OS << " uint transform_branch_bits;\n";
|
||||
OS << " uint vertex_base_index;\n";
|
||||
OS << " ivec4 input_attributes[16];\n";
|
||||
OS << "};\n";
|
||||
|
||||
vk::glsl::program_input in;
|
||||
in.location = SCALE_OFFSET_BIND_SLOT;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "ScaleOffsetBuffer";
|
||||
in.name = "VertexContextBuffer";
|
||||
in.type = vk::glsl::input_type_uniform_buffer;
|
||||
|
||||
inputs.push_back(in);
|
||||
@ -48,54 +51,21 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
||||
|
||||
void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector<ParamType>& inputs)
|
||||
{
|
||||
std::vector<std::tuple<size_t, std::string>> input_data;
|
||||
for (const ParamType &PT : inputs)
|
||||
{
|
||||
for (const ParamItem &PI : PT.items)
|
||||
{
|
||||
input_data.push_back(std::make_tuple(PI.location, PI.name));
|
||||
}
|
||||
}
|
||||
OS << "layout(set=0, binding=3) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable)
|
||||
OS << "layout(set=0, binding=4) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data)
|
||||
|
||||
/**
|
||||
* Its is important that the locations are in the order that vertex attributes are expected.
|
||||
* If order is not adhered to, channels may be swapped leading to corruption
|
||||
*/
|
||||
vk::glsl::program_input in;
|
||||
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "persistent_input_stream";
|
||||
in.type = vk::glsl::input_type_texel_buffer;
|
||||
this->inputs.push_back(in);
|
||||
|
||||
std::sort(input_data.begin(), input_data.end());
|
||||
|
||||
for (const std::tuple<size_t, std::string> item : input_data)
|
||||
{
|
||||
for (const ParamType &PT : inputs)
|
||||
{
|
||||
for (const ParamItem &PI : PT.items)
|
||||
{
|
||||
if (PI.name == std::get<1>(item))
|
||||
{
|
||||
vk::glsl::program_input in;
|
||||
in.location = (int)std::get<0>(item) + VERTEX_BUFFERS_FIRST_BIND_SLOT;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = PI.name + "_buffer";
|
||||
in.type = vk::glsl::input_type_texel_buffer;
|
||||
|
||||
this->inputs.push_back(in);
|
||||
|
||||
bool is_int = false;
|
||||
for (auto &attrib : rsx_vertex_program.rsx_vertex_inputs)
|
||||
{
|
||||
if (attrib.location == std::get<0>(item))
|
||||
{
|
||||
if (attrib.int_type) is_int = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::string samplerType = is_int ? "isamplerBuffer" : "samplerBuffer";
|
||||
OS << "layout(set = 0, binding=" << in.location << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT + 1;
|
||||
in.domain = glsl::glsl_vertex_program;
|
||||
in.name = "volatile_input_stream";
|
||||
in.type = vk::glsl::input_type_texel_buffer;
|
||||
this->inputs.push_back(in);
|
||||
}
|
||||
|
||||
void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants)
|
||||
@ -103,7 +73,6 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
|
||||
OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " vec4 vc[468];\n";
|
||||
OS << " uint transform_branch_bits;\n";
|
||||
OS << "};\n\n";
|
||||
|
||||
vk::glsl::program_input in;
|
||||
@ -150,13 +119,13 @@ static const vertex_reg_info reg_table[] =
|
||||
{ "front_spec_color", true, "dst_reg4", "", false },
|
||||
{ "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG },
|
||||
//Warning: With spir-v if you declare clip distance var, you must assign a value even when its disabled! Runtime does not assign a default value
|
||||
{ "gl_ClipDistance[0]", false, "dst_reg5", ".y * userClipFactor[0].x", false, "userClipEnabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 },
|
||||
{ "gl_ClipDistance[1]", false, "dst_reg5", ".z * userClipFactor[0].y", false, "userClipEnabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 },
|
||||
{ "gl_ClipDistance[2]", false, "dst_reg5", ".w * userClipFactor[0].z", false, "userClipEnabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 },
|
||||
{ "gl_ClipDistance[0]", false, "dst_reg5", ".y * user_clip_factor[0].x", false, "user_clip_enabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 },
|
||||
{ "gl_ClipDistance[1]", false, "dst_reg5", ".z * user_clip_factor[0].y", false, "user_clip_enabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 },
|
||||
{ "gl_ClipDistance[2]", false, "dst_reg5", ".w * user_clip_factor[0].z", false, "user_clip_enabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 },
|
||||
{ "gl_PointSize", false, "dst_reg6", ".x", false },
|
||||
{ "gl_ClipDistance[3]", false, "dst_reg6", ".y * userClipFactor[0].w", false, "userClipEnabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 },
|
||||
{ "gl_ClipDistance[4]", false, "dst_reg6", ".z * userClipFactor[1].x", false, "userClipEnabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 },
|
||||
{ "gl_ClipDistance[5]", false, "dst_reg6", ".w * userClipFactor[1].y", false, "userClipEnabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 },
|
||||
{ "gl_ClipDistance[3]", false, "dst_reg6", ".y * user_clip_factor[0].w", false, "user_clip_enabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 },
|
||||
{ "gl_ClipDistance[4]", false, "dst_reg6", ".z * user_clip_factor[1].x", false, "user_clip_enabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 },
|
||||
{ "gl_ClipDistance[5]", false, "dst_reg6", ".w * user_clip_factor[1].y", false, "user_clip_enabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 },
|
||||
{ "tc0", true, "dst_reg7", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX0 },
|
||||
{ "tc1", true, "dst_reg8", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX1 },
|
||||
{ "tc2", true, "dst_reg9", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX2 },
|
||||
@ -212,44 +181,10 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
|
||||
OS << "layout(location=" << vk::get_varying_register("front_spec_color").reg_location << ") out vec4 front_spec_color;\n";
|
||||
}
|
||||
|
||||
namespace vk
|
||||
{
|
||||
void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector<rsx_vertex_input> &inputs)
|
||||
{
|
||||
for (const auto &real_input : inputs)
|
||||
{
|
||||
if (real_input.location != PI.location)
|
||||
continue;
|
||||
|
||||
if (!real_input.is_array)
|
||||
{
|
||||
OS << " vec4 " << PI.name << " = vec4(texelFetch(" << PI.name << "_buffer, 0));\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if (real_input.frequency > 1)
|
||||
{
|
||||
if (real_input.is_modulo)
|
||||
{
|
||||
OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex %" << real_input.frequency << "));\n";
|
||||
return;
|
||||
}
|
||||
|
||||
OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex /" << real_input.frequency << "));\n";
|
||||
return;
|
||||
}
|
||||
|
||||
OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex).rgba);\n";
|
||||
return;
|
||||
}
|
||||
|
||||
OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex).rgba);\n";
|
||||
}
|
||||
}
|
||||
|
||||
void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
{
|
||||
glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
|
||||
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv);
|
||||
|
||||
std::string parameters = "";
|
||||
for (int i = 0; i < 16; ++i)
|
||||
@ -286,7 +221,9 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
for (const ParamType &PT : m_parr.params[PF_PARAM_IN])
|
||||
{
|
||||
for (const ParamItem &PI : PT.items)
|
||||
vk::add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs);
|
||||
{
|
||||
OS << " vec4 " << PI.name << "= read_location(" << std::to_string(PI.location) << ");\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -373,7 +310,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
|
||||
if (m_parr.HasParam(PF_PARAM_NONE, "vec4", "dst_reg2"))
|
||||
OS << " front_spec_color = dst_reg2;\n";
|
||||
|
||||
OS << " gl_Position = gl_Position * scaleOffsetMat;\n";
|
||||
OS << " gl_Position = gl_Position * scale_offset_mat;\n";
|
||||
OS << "}\n";
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user