rsx/gl/vulkan: Refactoring and partial vulkan rewrite

- Updates vulkan to use GPU vertex processing
- Rewrites vulkan to buffer entire frames and present when first available to avoid stalls
- Move more state into dynamic descriptors to reduce progam cache misses; Fix render pass conflicts before texture access
- Discards incomplete cb at destruction to avoid refs to destroyed objects
- Move set_viewport to the uninterruptible block before drawing in case cb is switched before we're ready
- Manage frame contexts separately for easier async frame management
- Avoid wasteful create-destroy cycles when sampling rtts
This commit is contained in:
kd-11 2017-08-05 00:11:14 +03:00
parent 6a707f515e
commit 00b0311c86
11 changed files with 592 additions and 802 deletions

View File

@ -11,6 +11,12 @@ namespace glsl
glsl_fragment_program = 1
};
enum glsl_rules
{
glsl_rules_opengl4,
glsl_rules_rpirv
};
static std::string getFloatTypeNameImpl(size_t elementCount)
{
switch (elementCount)
@ -48,8 +54,10 @@ namespace glsl
fmt::throw_exception("Unknown compare function" HERE);
}
static void insert_vertex_input_fetch(std::stringstream& OS)
static void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules)
{
std::string vertex_id_name = (rules == glsl_rules_opengl4) ? "gl_VertexID" : "gl_VertexIndex";
//Actually decode a vertex attribute from a raw byte stream
OS << "struct attribute_desc\n";
OS << "{\n";
@ -194,16 +202,16 @@ namespace glsl
OS << "{\n";
OS << " attribute_desc desc = fetch_desc(location);\n";
OS << "\n";
OS << " int vertex_id = gl_VertexID - int(vertex_base_index);\n";
OS << " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n";
OS << " if (desc.frequency == 0)\n";
OS << " vertex_id = 0;\n";
OS << " else if (desc.frequency > 1)\n";
OS << " {\n";
OS << " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n";
OS << " if (desc.modulo != 0)\n";
OS << " vertex_id = gl_VertexID % desc.divisor;\n";
OS << " vertex_id = " << vertex_id_name << " % desc.divisor;\n";
OS << " else\n";
OS << " vertex_id = gl_VertexID / desc.divisor;\n";
OS << " vertex_id = " << vertex_id_name << " / desc.divisor;\n";
OS << " }\n";
OS << "\n";
OS << " if (desc.is_volatile != 0)\n";

View File

@ -92,9 +92,8 @@ namespace
vertex_input_state operator()(const rsx::draw_array_command& command)
{
u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
u32 max_index = vertex_count - 1 + min_index;
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
const u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
@ -153,7 +152,7 @@ namespace
vertex_input_state operator()(const rsx::draw_inlined_array& command)
{
u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
const u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
@ -188,8 +187,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
auto &vertex_base = result.vertex_data_base;
//Do actual vertex upload
auto &required = calculate_memory_requirements(m_vertex_layout, vertex_count);
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
std::pair<void*, u32> persistent_mapping = {}, volatile_mapping = {};

View File

@ -149,15 +149,10 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
OS << "out vec4 front_spec_color;\n";
}
namespace
{
}
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{
insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
glsl::insert_vertex_input_fetch(OS);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4);
std::string parameters = "";
for (int i = 0; i < 16; ++i)

View File

@ -564,27 +564,11 @@ VKGSRender::VKGSRender() : GSRender()
m_current_command_buffer = &m_primary_cb_list[0];
//Create secondar command_buffer for parallel operations
//Create secondary command_buffer for parallel operations
m_secondary_command_buffer_pool.create((*m_device));
m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
open_command_buffer();
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
{
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i),
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
VkClearColorValue clear_color{};
auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range);
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i),
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
}
//VRAM allocation
m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000);
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000);
@ -598,25 +582,25 @@ VKGSRender::VKGSRender() : GSRender()
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
//Generate frame contexts
VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS };
VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 16 * DESCRIPTOR_MAX_DRAW_CALLS };
VkDescriptorPoolSize texture_pool = { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS };
std::vector<VkDescriptorPoolSize> sizes{ uniform_buffer_pool, uniform_texel_pool, texture_pool };
descriptor_pool.create(*m_device, sizes.data(), static_cast<uint32_t>(sizes.size()));
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32);
VkFenceCreateInfo fence_info = {};
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore);
for (auto &ctx : frame_context)
{
ctx = {};
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_semaphore);
ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast<uint32_t>(sizes.size()));
}
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32);
vk::initialize_compiler_context();
@ -631,6 +615,25 @@ VKGSRender::VKGSRender() : GSRender()
m_vertex_cache.reset(new vk::null_vertex_cache());
else
m_vertex_cache.reset(new vk::weak_vertex_cache());
open_command_buffer();
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
{
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i),
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
VkClearColorValue clear_color{};
auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range);
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(i),
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
}
m_current_frame = &frame_context[0];
}
VKGSRender::~VKGSRender()
@ -641,23 +644,9 @@ VKGSRender::~VKGSRender()
return;
}
//Close recording and wait for all to finish
close_render_pass();
CHECK_RESULT(vkEndCommandBuffer(*m_current_command_buffer));
for (auto &cb : m_primary_cb_list)
if (cb.pending) cb.wait();
//Wait for device to finish up with resources
vkDeviceWaitIdle(*m_device);
//Sync objects
if (m_present_semaphore)
{
vkDestroySemaphore((*m_device), m_present_semaphore, nullptr);
m_present_semaphore = nullptr;
}
//Texture cache
m_texture_cache.destroy();
@ -678,10 +667,17 @@ VKGSRender::~VKGSRender()
null_buffer.reset();
null_buffer_view.reset();
//Temporary objects
m_buffer_view_to_clean.clear();
m_sampler_to_clean.clear();
m_framebuffer_to_clean.clear();
//Frame context
for (auto &ctx : frame_context)
{
vkDestroySemaphore((*m_device), ctx.present_semaphore, nullptr);
ctx.descriptor_pool.destroy();
ctx.buffer_views_to_clean.clear();
ctx.samplers_to_clean.clear();
ctx.framebuffers_to_clean.clear();
}
m_draw_fbo.reset();
//Render passes
@ -699,8 +695,6 @@ VKGSRender::~VKGSRender()
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
descriptor_pool.destroy();
//Command buffer
for (auto &cb : m_primary_cb_list)
cb.destroy();
@ -736,8 +730,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
if (!flushable)
return false;
close_render_pass();
if (synchronized)
{
if (m_last_flushable_cb >= 0)
@ -807,8 +799,21 @@ void VKGSRender::begin()
return;
//Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources
if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS ||
m_attrib_ring_info.is_critical() ||
if (m_current_frame->used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS)
{
//No need to stall if we have more than one frame queue anyway
flush_command_queue();
CHECK_RESULT(vkResetDescriptorPool(*m_device, m_current_frame->descriptor_pool, 0));
m_current_frame->used_descriptors = 0;
m_uniform_buffer_ring_info.reset_allocation_stats();
m_index_buffer_ring_info.reset_allocation_stats();
m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats();
}
if (m_attrib_ring_info.is_critical() ||
m_texture_upload_buffer_ring_info.is_critical() ||
m_uniform_buffer_ring_info.is_critical() ||
m_index_buffer_ring_info.is_critical())
@ -818,20 +823,17 @@ void VKGSRender::begin()
flush_command_queue(true);
m_vertex_cache->purge();
CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
m_used_descriptors = 0;
m_uniform_buffer_ring_info.reset_allocation_stats();
m_index_buffer_ring_info.reset_allocation_stats();
m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats();
std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
}
init_buffers();
if (!framebuffer_status_valid)
return;
VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = descriptor_pool;
alloc_info.descriptorPool = m_current_frame->descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &descriptor_layouts;
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
@ -839,16 +841,11 @@ void VKGSRender::begin()
VkDescriptorSet new_descriptor_set;
CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set));
descriptor_sets = new_descriptor_set;
m_used_descriptors++;
m_current_frame->descriptor_set = new_descriptor_set;
m_current_frame->used_descriptors++;
std::chrono::time_point<steady_clock> start = steady_clock::now();
init_buffers();
if (!framebuffer_status_valid)
return;
float actual_line_width = rsx::method_registers.line_width();
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
@ -901,20 +898,28 @@ void VKGSRender::end()
return;
}
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
if (!load_program())
if (!check_program_status())
{
LOG_ERROR(RSX, "No valid program bound to pipeline. Skipping draw");
rsx::thread::end();
return;
}
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
//Programs data is dependent on vertex state
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
close_render_pass(); //Texture upload stuff conflicts active RPs
//Load program
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
//Close current pass to avoid conflict with texture functions
close_render_pass();
if (g_cfg.video.strict_rendering_mode)
{
@ -967,11 +972,6 @@ void VKGSRender::end()
}
}
std::chrono::time_point<steady_clock> vertex_start0 = steady_clock::now();
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end0 = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end0 - vertex_start0).count();
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
@ -980,7 +980,7 @@ void VKGSRender::end()
{
if (!rsx::method_registers.fragment_textures[i].enabled())
{
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
continue;
}
@ -989,7 +989,7 @@ void VKGSRender::end()
if (!texture0)
{
LOG_ERROR(RSX, "Texture upload failed to texture index %d. Binding null sampler.", i);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
continue;
}
@ -1016,7 +1016,7 @@ void VKGSRender::end()
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
}
m_sampler_to_clean.push_back(std::make_unique<vk::sampler>(
m_current_frame->samplers_to_clean.push_back(std::make_unique<vk::sampler>(
*m_device,
vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()),
!!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN),
@ -1024,7 +1024,7 @@ void VKGSRender::end()
min_filter, vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()), mip_mode, vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()),
is_depth_texture, depth_compare));
m_program->bind_uniform({ m_sampler_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets);
m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
}
}
@ -1034,7 +1034,7 @@ void VKGSRender::end()
{
if (!rsx::method_registers.vertex_textures[i].enabled())
{
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
continue;
}
@ -1043,11 +1043,11 @@ void VKGSRender::end()
if (!texture0)
{
LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
continue;
}
m_sampler_to_clean.push_back(std::make_unique<vk::sampler>(
m_current_frame->samplers_to_clean.push_back(std::make_unique<vk::sampler>(
*m_device,
VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
!!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN),
@ -1055,7 +1055,7 @@ void VKGSRender::end()
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color())
));
m_program->bind_uniform({ m_sampler_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), descriptor_sets);
m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
}
}
@ -1066,9 +1066,12 @@ void VKGSRender::end()
//Only textures are synchronized tightly with the GPU and they have been read back above
vk::enter_uninterruptible();
set_viewport();
begin_render_pass();
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
//Clear any 'dirty' surfaces - possible is a recycled cache surface is used
std::vector<VkClearAttachment> buffers_to_clear;
@ -1108,11 +1111,7 @@ void VKGSRender::end()
vkCmdClearAttachments(*m_current_command_buffer, static_cast<u32>(buffers_to_clear.size()), buffers_to_clear.data(), 1, &clear_rect);
}
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<2>(upload_info);
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<4>(upload_info);
if (!index_info)
{
const auto vertex_count = std::get<1>(upload_info);
@ -1133,7 +1132,7 @@ void VKGSRender::end()
vk::leave_uninterruptible();
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - textures_end).count();
copy_render_targets_to_dma_location();
m_draw_calls++;
@ -1219,7 +1218,7 @@ void VKGSRender::clear_surface(u32 mask)
if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return;
if (!(mask & 0xF3)) return;
if (m_current_present_image == 0xFFFF) return;
if (m_current_frame->present_image == UINT32_MAX) return;
init_buffers();
@ -1339,6 +1338,8 @@ void VKGSRender::copy_render_targets_to_dma_location()
if (g_cfg.video.write_color_buffers)
{
close_render_pass();
for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
{
if (!m_surface_info[index].pitch)
@ -1351,6 +1352,8 @@ void VKGSRender::copy_render_targets_to_dma_location()
if (g_cfg.video.write_depth_buffer)
{
close_render_pass();
if (m_depth_surface_info.pitch)
{
m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height,
@ -1374,7 +1377,7 @@ void VKGSRender::flush_command_queue(bool hard_sync)
if (hard_sync)
{
//swap handler checks the pending flag, so call it here
process_swap_request();
process_swap_request(m_current_frame);
//wait for the latest intruction to execute
m_current_command_buffer->pending = true;
@ -1395,83 +1398,128 @@ void VKGSRender::flush_command_queue(bool hard_sync)
//Grab next cb in line and make it usable
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
//Soft sync if a present has not yet occured before consuming the wait event
for (auto &ctx : frame_context)
{
if (ctx.swap_command_buffer == m_current_command_buffer)
process_swap_request(&ctx, true);
}
m_current_command_buffer->reset();
}
open_command_buffer();
}
void VKGSRender::advance_queued_frames()
{
//Check all other frames for completion and clear resources
for (auto &ctx : frame_context)
{
if (&ctx == m_current_frame)
continue;
if (ctx.swap_command_buffer)
{
ctx.swap_command_buffer->poke();
if (ctx.swap_command_buffer->pending)
continue;
//Present the bound image
process_swap_request(&ctx, true);
}
}
//Only marks surfaces as dirty without actually deleting them so its safe to use
if (g_cfg.video.invalidate_surface_cache_every_frame)
m_rtts.invalidate_surface_cache_data(&*m_current_command_buffer);
//m_rtts storage is double buffered and should be safe to tag on frame boundary
m_rtts.free_invalidated();
//texture cache is also double buffered to prevent use-after-free
m_texture_cache.flush();
m_vertex_cache->purge();
m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES;
m_current_frame = &frame_context[m_current_queue_index];
}
void VKGSRender::present(frame_context_t *ctx)
{
VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain);
VkPresentInfoKHR present = {};
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
present.pNext = nullptr;
present.swapchainCount = 1;
present.pSwapchains = &swap_chain;
present.pImageIndices = &ctx->present_image;
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
}
void VKGSRender::queue_swap_request()
{
//buffer the swap request and return
if (m_swap_command_buffer && m_swap_command_buffer->pending)
if (m_current_frame->swap_command_buffer &&
m_current_frame->swap_command_buffer->pending)
{
//Its probable that no actual drawing took place
process_swap_request();
process_swap_request(m_current_frame);
}
m_swap_command_buffer = m_current_command_buffer;
close_and_submit_command_buffer({ m_present_semaphore }, m_current_command_buffer->submit_fence);
m_current_frame->swap_command_buffer = m_current_command_buffer;
close_and_submit_command_buffer({ m_current_frame->present_semaphore }, m_current_command_buffer->submit_fence);
m_current_frame->swap_command_buffer->pending = true;
//Grab next cb in line and make it usable
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
m_current_command_buffer->reset();
m_swap_command_buffer->pending = true;
//Set up new pointers for the next frame
advance_queued_frames();
open_command_buffer();
}
void VKGSRender::process_swap_request()
void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
{
if (!m_swap_command_buffer)
if (!ctx->swap_command_buffer)
return;
if (m_swap_command_buffer->pending)
if (ctx->swap_command_buffer->pending)
{
//Perform hard swap here
m_swap_command_buffer->wait();
VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain);
VkPresentInfoKHR present = {};
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
present.pNext = nullptr;
present.swapchainCount = 1;
present.pSwapchains = &swap_chain;
present.pImageIndices = &m_current_present_image;
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
ctx->swap_command_buffer->wait();
free_resources = true;
}
//Clean up all the resources from the last frame
//Always present
present(ctx);
//Feed back damaged resources to the main texture cache for management...
//m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources);
m_rtts.free_invalidated();
m_texture_cache.flush();
if (g_cfg.video.invalidate_surface_cache_every_frame)
m_rtts.invalidate_surface_cache_data(&*m_current_command_buffer);
m_buffer_view_to_clean.clear();
m_sampler_to_clean.clear();
m_framebuffer_to_clean.remove_if([](std::unique_ptr<vk::framebuffer_holder>& fbo)
if (free_resources)
{
if (fbo->deref_count >= 2) return true;
fbo->deref_count++;
return false;
});
//Cleanup of reference sensitive resources
//TODO: These should be double buffered as well to prevent destruction of anything in use
if (g_cfg.video.overlay)
{
m_text_writer->reset_descriptors();
}
if (g_cfg.video.overlay)
{
m_text_writer->reset_descriptors();
ctx->buffer_views_to_clean.clear();
ctx->samplers_to_clean.clear();
ctx->framebuffers_to_clean.remove_if([](std::unique_ptr<vk::framebuffer_holder>& fbo)
{
if (fbo->deref_count >= 2) return true;
fbo->deref_count++;
return false;
});
}
m_vertex_cache->purge();
m_swap_command_buffer = nullptr;
ctx->swap_command_buffer = nullptr;
}
void VKGSRender::do_local_task()
@ -1482,6 +1530,7 @@ void VKGSRender::do_local_task()
//TODO: Determine if a hard sync is necessary
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
close_render_pass();
flush_command_queue();
m_flush_commands = false;
@ -1509,11 +1558,8 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
}
}
bool VKGSRender::load_program(bool)
bool VKGSRender::check_program_status()
{
auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program;
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
{
vk::render_target *surface = nullptr;
@ -1528,24 +1574,29 @@ bool VKGSRender::load_program(bool)
};
get_current_fragment_program(rtt_lookup_func);
if (!fragment_program.valid) return false;
if (!current_fragment_program.valid) return false;
get_current_vertex_program();
auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program;
vk::pipeline_props properties = {};
properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
bool unused;
bool update_blend_constants = false;
bool update_stencil_info_back = false;
bool update_stencil_info_front = false;
bool update_depth_bounds = false;
properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
properties.ia.topology = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, unused);
if (rsx::method_registers.restart_index_enabled())
{
properties.ia.primitiveRestartEnable = VK_TRUE;
}
else
properties.ia.primitiveRestartEnable = VK_FALSE;
for (int i = 0; i < 4; ++i)
{
properties.att_state[i].colorWriteMask = 0xf;
@ -1588,11 +1639,8 @@ bool VKGSRender::load_program(bool)
properties.att_state[render_targets[idx]].alphaBlendOp = equation_a;
}
auto blend_colors = rsx::get_constant_blend_colors();
properties.cs.blendConstants[0] = blend_colors[0];
properties.cs.blendConstants[1] = blend_colors[1];
properties.cs.blendConstants[2] = blend_colors[2];
properties.cs.blendConstants[3] = blend_colors[3];
//Blend constants are dynamic
update_blend_constants = true;
}
else
{
@ -1618,8 +1666,7 @@ bool VKGSRender::load_program(bool)
if (rsx::method_registers.depth_bounds_test_enabled())
{
properties.ds.depthBoundsTestEnable = VK_TRUE;
properties.ds.minDepthBounds = rsx::method_registers.depth_bounds_min();
properties.ds.maxDepthBounds = rsx::method_registers.depth_bounds_max();
update_depth_bounds = true;
}
else
properties.ds.depthBoundsTestEnable = VK_FALSE;
@ -1627,9 +1674,6 @@ bool VKGSRender::load_program(bool)
if (rsx::method_registers.stencil_test_enabled())
{
properties.ds.stencilTestEnable = VK_TRUE;
properties.ds.front.writeMask = rsx::method_registers.stencil_mask();
properties.ds.front.compareMask = rsx::method_registers.stencil_func_mask();
properties.ds.front.reference = rsx::method_registers.stencil_func_ref();
properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers.stencil_op_fail());
properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zpass());
properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zfail());
@ -1637,16 +1681,16 @@ bool VKGSRender::load_program(bool)
if (rsx::method_registers.two_sided_stencil_test_enabled())
{
properties.ds.back.writeMask = rsx::method_registers.back_stencil_mask();
properties.ds.back.compareMask = rsx::method_registers.back_stencil_func_mask();
properties.ds.back.reference = rsx::method_registers.back_stencil_func_ref();
properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail());
properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass());
properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail());
properties.ds.back.compareOp = vk::get_compare_func(rsx::method_registers.back_stencil_func());
update_stencil_info_back = true;
}
else
properties.ds.back = properties.ds.front;
update_stencil_info_front = true;
}
else
properties.ds.stencilTestEnable = VK_FALSE;
@ -1684,55 +1728,84 @@ bool VKGSRender::load_program(bool)
vk::enter_uninterruptible();
//Load current program from buffer
vertex_program.skip_vertex_input_check = true;
m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get();
//TODO: Update constant buffers..
//1. Update scale-offset matrix
//2. Update vertex constants
//3. Update fragment constants
const size_t scale_offset_offset = m_uniform_buffer_ring_info.alloc<256>(256);
vk::leave_uninterruptible();
u8 *buf = (u8*)m_uniform_buffer_ring_info.map(scale_offset_offset, 256);
/**
* NOTE: While VK's coord system resembles GLs, the clip volume is no longer symetrical in z
* Its like D3D without the flip in y (depending on how you build the spir-v)
*/
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
m_uniform_buffer_ring_info.unmap();
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, scale_offset_offset, 256 }, SCALE_OFFSET_BIND_SLOT, descriptor_sets);
if (true)//m_transform_constants_dirty)
//Update dynamic state
if (update_blend_constants)
{
const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float));
buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float));
fill_vertex_program_constants_data(buf);
*(reinterpret_cast<u32*>(buf + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits();
m_uniform_buffer_ring_info.unmap();
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 512 * 4 * sizeof(float) }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets);
m_transform_constants_dirty = false;
//Update blend constants
auto blend_colors = rsx::get_constant_blend_colors();
vkCmdSetBlendConstants(*m_current_command_buffer, blend_colors.data());
}
if (update_stencil_info_front)
{
VkStencilFaceFlags face_flag = (update_stencil_info_back)? VK_STENCIL_FACE_FRONT_BIT: VK_STENCIL_FRONT_AND_BACK;
vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask());
vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask());
vkCmdSetStencilReference(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_ref());
if (update_stencil_info_back)
{
vkCmdSetStencilWriteMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_mask());
vkCmdSetStencilCompareMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_mask());
vkCmdSetStencilReference(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_ref());
}
}
if (update_depth_bounds)
{
//Update depth bounds min/max
vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
}
return true;
}
void VKGSRender::load_program(u32 vertex_count, u32 vertex_base)
{
auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program;
const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float));
const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_buffer_sz);
const size_t required_mem = 512 + 8192 + fragment_buffer_sz;
buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_buffer_sz);
const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem);
const size_t vertex_constants_offset = vertex_state_offset + 512;
const size_t fragment_constants_offset = vertex_constants_offset + 8192;
//We do this in one go
u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem);
//Vertex state
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<u32*>(buf + 132)) = vertex_base;
fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast<s32*>(buf + 144));
//Vertex constants
buf = buf + 512;
fill_vertex_program_constants_data(buf);
m_transform_constants_dirty = false;
//Fragment constants
buf = buf + 8192;
if (fragment_constants_sz)
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) }, fragment_program);
fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program);
m_uniform_buffer_ring_info.unmap();
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets);
vk::leave_uninterruptible();
return true;
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 }, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 8192 }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
}
static const u32 mr_color_offset[rsx::limits::color_buffers_count] =
@ -1762,7 +1835,20 @@ static const u32 mr_color_pitch[rsx::limits::color_buffers_count] =
void VKGSRender::init_buffers(bool skip_reading)
{
//Clear any pending swap requests
process_swap_request();
for (auto &ctx : frame_context)
{
if (ctx.swap_command_buffer)
{
if (ctx.swap_command_buffer->pending)
ctx.swap_command_buffer->poke();
if (!ctx.swap_command_buffer->pending)
{
//process swap without advancing the frame base
process_swap_request(&ctx, true);
}
}
}
prepare_rtts();
@ -1770,8 +1856,6 @@ void VKGSRender::init_buffers(bool skip_reading)
{
read_buffers();
}
set_viewport();
}
void VKGSRender::read_buffers()
@ -1965,14 +2049,13 @@ void VKGSRender::prepare_rtts()
}
}
for (auto &fbo : m_framebuffer_to_clean)
for (auto &fbo : m_current_frame->framebuffers_to_clean)
{
if (fbo->matches(bound_images, clip_width, clip_height))
{
m_draw_fbo.swap(fbo);
m_draw_fbo->reset_refs();
framebuffer_found = true;
//LOG_ERROR(RSX, "Matching framebuffer exists, using that instead");
break;
}
}
@ -2014,7 +2097,7 @@ void VKGSRender::prepare_rtts()
VkRenderPass current_render_pass = m_render_passes[idx];
if (m_draw_fbo)
m_framebuffer_to_clean.push_back(std::move(m_draw_fbo));
m_current_frame->framebuffers_to_clean.push_back(std::move(m_draw_fbo));
m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images)));
}
@ -2060,7 +2143,7 @@ void VKGSRender::flip(int buffer)
std::chrono::time_point<steady_clock> flip_start = steady_clock::now();
close_render_pass();
process_swap_request();
process_swap_request(m_current_frame, true);
if (!resize_screen)
{
@ -2095,8 +2178,8 @@ void VKGSRender::flip(int buffer)
aspect_ratio.size = new_size;
//Prepare surface for new frame
CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, VK_NULL_HANDLE, &m_current_present_image));
//Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), UINT64_MAX, m_current_frame->present_semaphore, VK_NULL_HANDLE, &m_current_frame->present_image));
//Blit contents to screen..
vk::image* image_to_flip = nullptr;
@ -2106,7 +2189,7 @@ void VKGSRender::flip(int buffer)
else if (std::get<1>(m_rtts.m_bound_render_targets[1]) != nullptr)
image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1]);
VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image);
VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_frame->present_image);
if (image_to_flip)
{
vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
@ -2117,9 +2200,9 @@ void VKGSRender::flip(int buffer)
//No draw call was issued!
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
VkClearColorValue clear_black = { 0 };
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_GENERAL, range);
vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, &clear_black, 1, &range);
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range);
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_GENERAL, range);
vkCmdClearColorImage(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_GENERAL, &clear_black, 1, &range);
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_frame->present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range);
}
std::unique_ptr<vk::framebuffer_holder> direct_fbo;
@ -2144,14 +2227,14 @@ void VKGSRender::flip(int buffer)
size_t idx = vk::get_render_pass_location(m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1);
VkRenderPass single_target_pass = m_render_passes[idx];
for (auto It = m_framebuffer_to_clean.begin(); It != m_framebuffer_to_clean.end(); It++)
for (auto It = m_current_frame->framebuffers_to_clean.begin(); It != m_current_frame->framebuffers_to_clean.end(); It++)
{
auto &fbo = *It;
if (fbo->attachments[0]->info.image == target_image)
{
direct_fbo.swap(fbo);
direct_fbo->reset_refs();
m_framebuffer_to_clean.erase(It);
m_current_frame->framebuffers_to_clean.erase(It);
break;
}
}
@ -2189,7 +2272,7 @@ void VKGSRender::flip(int buffer)
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), message);
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
m_framebuffer_to_clean.push_back(std::move(direct_fbo));
m_current_frame->framebuffers_to_clean.push_back(std::move(direct_fbo));
}
queue_swap_request();
@ -2262,9 +2345,13 @@ void VKGSRender::flip(int buffer)
m_current_command_buffer->reset();
open_command_buffer();
//Do cleanup
m_swap_command_buffer = m_current_command_buffer;
process_swap_request();
//Do cleanup; also present the previous frame for this frame if available
//Don't bother scheduling a swap event if the frame context is still uninitialized (no previous frame)
if (m_current_frame->present_image != UINT32_MAX)
{
m_current_frame->swap_command_buffer = m_current_command_buffer;
process_swap_request(m_current_frame);
}
}
std::chrono::time_point<steady_clock> flip_end = steady_clock::now();

View File

@ -25,11 +25,12 @@ namespace vk
//Heap allocation sizes in MB
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 32
#define VK_UBO_RING_BUFFER_SIZE_M 64
#define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 128
#define VK_MAX_ASYNC_CB_COUNT 64
#define VK_MAX_ASYNC_FRAMES 2
struct command_buffer_chunk: public vk::command_buffer
{
@ -135,32 +136,43 @@ private:
vk::vk_data_heap m_texture_upload_buffer_ring_info;
//Vulkan internals
u32 m_current_present_image = 0xFFFF;
VkSemaphore m_present_semaphore = nullptr;
vk::command_pool m_command_buffer_pool;
std::array<command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
command_buffer_chunk* m_current_command_buffer = nullptr;
command_buffer_chunk* m_swap_command_buffer = nullptr;
u32 m_current_cb_index = 0;
std::mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool;
vk::command_buffer m_secondary_command_buffer;
std::array<VkRenderPass, 120> m_render_passes;
VkDescriptorSetLayout descriptor_layouts;
VkDescriptorSet descriptor_sets;
VkPipelineLayout pipeline_layout;
vk::descriptor_pool descriptor_pool;
u32 m_current_cb_index = 0;
std::array<command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
command_buffer_chunk* m_current_command_buffer = nullptr;
std::array<VkRenderPass, 120> m_render_passes;
VkDescriptorSetLayout descriptor_layouts;
VkPipelineLayout pipeline_layout;
std::vector<std::unique_ptr<vk::buffer_view> > m_buffer_view_to_clean;
std::vector<std::unique_ptr<vk::sampler> > m_sampler_to_clean;
std::list<std::unique_ptr<vk::framebuffer_holder> > m_framebuffer_to_clean;
std::unique_ptr<vk::framebuffer_holder> m_draw_fbo;
struct frame_context_t
{
VkSemaphore present_semaphore = VK_NULL_HANDLE;
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
vk::descriptor_pool descriptor_pool;
u32 used_descriptors = 0;
std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
std::vector<std::unique_ptr<vk::sampler>> samplers_to_clean;
std::list<std::unique_ptr<vk::framebuffer_holder>> framebuffers_to_clean;
u32 present_image = UINT32_MAX;
command_buffer_chunk* swap_command_buffer = nullptr;
};
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context;
u32 m_current_queue_index = 0;
frame_context_t* m_current_frame = nullptr;
u32 m_client_width = 0;
u32 m_client_height = 0;
@ -183,7 +195,6 @@ private:
s64 m_draw_time = 0;
s64 m_flip_time = 0;
u32 m_used_descriptors = 0;
u8 m_draw_buffers_count = 0;
bool framebuffer_status_valid = false;
@ -201,6 +212,9 @@ private:
std::thread::id rsx_thread;
bool render_pass_open = false;
//Vertex layout
rsx::vertex_input_layout m_vertex_layout;
#ifdef __linux__
Display *m_display_handle = nullptr;
@ -220,15 +234,18 @@ private:
void flush_command_queue(bool hard_sync = false);
void queue_swap_request();
void process_swap_request();
void process_swap_request(frame_context_t *ctx, bool free_resources = false);
void advance_queued_frames();
void present(frame_context_t *ctx);
void begin_render_pass();
void close_render_pass();
/// returns primitive topology, is_indexed, index_count, offset in index buffer, index type
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
/// returns primitive topology, index_count, allocated_verts, vertex_base_index, (offset in index buffer, index type)
std::tuple<VkPrimitiveTopology, u32, u32, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
public:
bool load_program(bool fast_update = false);
bool check_program_status();
void load_program(u32 vertex_count, u32 vertex_base);
void init_buffers(bool skip_reading = false);
void read_buffers();
void write_buffers();

View File

@ -19,17 +19,22 @@ namespace vk
bool operator==(const pipeline_props& other) const
{
if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo)))
return false;
if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo)))
return false;
if (memcmp(&att_state[0], &other.att_state[0], sizeof(VkPipelineColorBlendAttachmentState)))
return false;
if (memcmp(&cs, &other.cs, sizeof(VkPipelineColorBlendStateCreateInfo)))
if (render_pass != other.render_pass)
return false;
if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))
return false;
if (render_pass != other.render_pass)
if (memcmp(&cs, &other.cs, sizeof(VkPipelineColorBlendStateCreateInfo)))
return false;
if (memcmp(&ia, &other.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo)))
return false;
if (memcmp(&ds, &other.ds, sizeof(VkPipelineDepthStencilStateCreateInfo)))
return false;
return num_targets == other.num_targets;
@ -90,9 +95,6 @@ struct VKTraits
static
pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const vk::pipeline_props &pipelineProperties, VkDevice dev, VkPipelineLayout common_pipeline_layout)
{
// pstate.dynamic_state.pDynamicStates = pstate.dynamic_state_descriptors;
// pstate.cb.pAttachments = pstate.att_state;
// pstate.cb.attachmentCount = pstate.num_targets;
VkPipelineShaderStageCreateInfo shader_stages[2] = {};
shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
@ -111,6 +113,11 @@ struct VKTraits
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_LINE_WIDTH;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_BLEND_CONSTANTS;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_REFERENCE;
dynamic_state_info.pDynamicStates = dynamic_state_descriptors;
VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
@ -144,7 +151,6 @@ struct VKTraits
info.renderPass = pipelineProperties.render_pass;
CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &info, NULL, &pipeline));
pipeline_storage_type result = std::make_unique<vk::glsl::program>(dev, pipeline, vertexProgramData.uniforms, fragmentProgramData.uniforms);
return result;

View File

@ -21,6 +21,7 @@ namespace vk
bool dirty = false;
u16 native_pitch = 0;
VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
std::unique_ptr<vk::image_view> view;
render_target *old_contents = nullptr; //Data occupying the memory location that this surface is replacing
@ -40,6 +41,15 @@ namespace vk
:image(dev, memory_type_index, access_flags, image_type, format, width, height, depth,
mipmaps, layers, samples, initial_layout, tiling, usage, image_flags)
{}
vk::image_view* get_view()
{
if (!view)
view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), value, VK_IMAGE_VIEW_TYPE_2D, info.format,
native_component_map, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT)));
return view.get();
}
};
struct framebuffer_holder: public vk::framebuffer, public ref_counted

View File

@ -362,6 +362,9 @@ namespace vk
void reset_descriptors()
{
if (m_used_descriptors == 0)
return;
vkResetDescriptorPool(device, m_descriptor_pool, 0);
m_used_descriptors = 0;
}

View File

@ -322,9 +322,14 @@ namespace vk
std::pair<u32, u32> read_only_range = std::make_pair(0xFFFFFFFF, 0);
std::pair<u32, u32> no_access_range = std::make_pair(0xFFFFFFFF, 0);
//Stuff that has been dereferenced goes into these
std::vector<std::unique_ptr<vk::image_view> > m_temporary_image_view;
std::vector<std::unique_ptr<vk::image>> m_dirty_textures;
//Stuff that has been dereferenced twice goes here. Contents are evicted before new ones are added
std::vector<std::unique_ptr<vk::image_view>> m_image_views_to_purge;
std::vector<std::unique_ptr<vk::image>> m_images_to_purge;
// Keep track of cache misses to pre-emptively flush some addresses
struct framebuffer_memory_characteristics
{
@ -431,6 +436,9 @@ namespace vk
m_temporary_image_view.clear();
m_dirty_textures.clear();
m_image_views_to_purge.clear();
m_images_to_purge.clear();
}
//Helpers
@ -544,7 +552,7 @@ namespace vk
}
//First check if it exists as an rtt...
vk::image *rtt_texture = nullptr;
vk::render_target *rtt_texture = nullptr;
if (rtt_texture = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{
if (g_cfg.video.strict_rendering_mode)
@ -559,10 +567,7 @@ namespace vk
}
}
m_temporary_image_view.push_back(std::make_unique<vk::image_view>(*vk::get_current_renderer(), rtt_texture->value, VK_IMAGE_VIEW_TYPE_2D, rtt_texture->info.format,
rtt_texture->native_component_map,
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)));
return m_temporary_image_view.back().get();
return rtt_texture->get_view();
}
if (rtt_texture = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
@ -576,10 +581,7 @@ namespace vk
}
}
m_temporary_image_view.push_back(std::make_unique<vk::image_view>(*vk::get_current_renderer(), rtt_texture->value, VK_IMAGE_VIEW_TYPE_2D, rtt_texture->info.format,
rtt_texture->native_component_map,
vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT)));
return m_temporary_image_view.back().get();
return rtt_texture->get_view();
}
u32 raw_format = tex.format();
@ -912,8 +914,11 @@ namespace vk
void flush()
{
m_dirty_textures.clear();
m_temporary_image_view.clear();
m_image_views_to_purge.clear();
m_images_to_purge.clear();
m_image_views_to_purge = std::move(m_temporary_image_view);
m_images_to_purge = std::move(m_dirty_textures);
}
void record_cache_miss(cached_texture_section &tex)

View File

@ -7,53 +7,6 @@
namespace vk
{
bool requires_component_expansion(rsx::vertex_base_type type, u32 size)
{
if (size == 3)
{
switch (type)
{
case rsx::vertex_base_type::f:
return true;
}
}
return false;
}
u32 get_suitable_vk_size(rsx::vertex_base_type type, u32 size)
{
if (size == 3)
{
switch (type)
{
case rsx::vertex_base_type::f:
return 16;
}
}
return rsx::get_vertex_type_size_on_host(type, size);
}
VkFormat get_suitable_vk_format(rsx::vertex_base_type type, u8 size)
{
/**
* Set up buffer fetches to only work on 4-component access. This is hardware dependant so we use 4-component access to avoid branching based on IHV implementation
* AMD GCN 1.0 for example does not support RGB32 formats for texel buffers
*/
const VkFormat vec1_types[] = { VK_FORMAT_R16_SNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8_UINT };
const VkFormat vec2_types[] = { VK_FORMAT_R16G16_SNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8_UINT };
const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT }; //VEC3 COMPONENTS NOT SUPPORTED!
const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT };
const VkFormat* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types };
if (type > rsx::vertex_base_type::ub256)
fmt::throw_exception("VKGS error: unknown vertex base type 0x%x" HERE, (u32)type);
return vec_selectors[size][(int)type];
}
VkPrimitiveTopology get_appropriate_topology(rsx::primitive_type& mode, bool &requires_modification)
{
requires_modification = false;
@ -92,114 +45,6 @@ namespace vk
return !result;
}
template <typename T, u32 padding>
void copy_inlined_data_to_buffer(void *src_data, void *dst_data, u32 vertex_count, rsx::vertex_base_type type, u8 src_channels, u8 dst_channels, u16 element_size, u16 stride)
{
u8 *src = static_cast<u8*>(src_data);
u8 *dst = static_cast<u8*>(dst_data);
for (u32 i = 0; i < vertex_count; ++i)
{
T* src_ptr = reinterpret_cast<T*>(src);
T* dst_ptr = reinterpret_cast<T*>(dst);
switch (type)
{
case rsx::vertex_base_type::ub:
{
if (src_channels == 4)
{
dst[0] = src[3];
dst[1] = src[2];
dst[2] = src[1];
dst[3] = src[0];
break;
}
}
default:
{
for (u8 ch = 0; ch < dst_channels; ++ch)
{
if (ch < src_channels)
{
*dst_ptr = *src_ptr;
src_ptr++;
}
else
*dst_ptr = (T)(padding);
dst_ptr++;
}
}
}
src += stride;
dst += element_size;
}
}
void prepare_buffer_for_writing(void *data, rsx::vertex_base_type type, u8 vertex_size, u32 vertex_count)
{
switch (type)
{
case rsx::vertex_base_type::f:
{
if (vertex_size == 3)
{
float *dst = reinterpret_cast<float*>(data);
for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4)
dst[idx] = 1.f;
}
break;
}
case rsx::vertex_base_type::sf:
{
if (vertex_size == 3)
{
/**
* Pad the 4th component for half-float arrays to 1, since texelfetch does not mask components
*/
u16 *dst = reinterpret_cast<u16*>(data);
for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4)
dst[idx] = 0x3c00;
}
break;
}
}
}
/**
* Template: Expand any N-compoent vector to a larger X-component vector and pad unused slots with 1
*/
template<typename T, u8 src_components, u8 dst_components, u32 padding>
void expand_array_components(const T* src_data, void *dst_ptr, u32 vertex_count)
{
T* src = const_cast<T*>(src_data);
T* dst = static_cast<T*>(dst_ptr);
for (u32 index = 0; index < vertex_count; ++index)
{
for (u8 channel = 0; channel < dst_components; channel++)
{
if (channel < src_components)
{
*dst = *src;
dst++;
src++;
}
else
{
*dst = (T)(padding);
dst++;
}
}
}
}
VkIndexType get_index_type(rsx::index_array_type type)
{
switch (type)
@ -215,20 +60,7 @@ namespace vk
namespace
{
static constexpr std::array<const char*, 16> s_reg_table =
{
"in_pos_buffer", "in_weight_buffer", "in_normal_buffer",
"in_diff_color_buffer", "in_spec_color_buffer",
"in_fog_buffer",
"in_point_size_buffer", "in_7_buffer",
"in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer",
"in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer"
};
/**
* Creates and fills an index buffer emulating unsupported primitive type.
* Returns index_count and (offset_in_index_buffer, index_type)
*/
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType>> generate_emulating_index_buffer(
const rsx::draw_clause& clause, u32 vertex_count,
vk::vk_data_heap& m_index_buffer_ring_info)
@ -247,161 +79,49 @@ namespace
index_count, std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT16));
}
struct vertex_buffer_visitor
struct vertex_input_state
{
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
vk::glsl::program* prog, VkDescriptorSet desc_set,
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
vk::vertex_cache* vertex_cache)
: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
vertex_cache(vertex_cache)
{
}
void operator()(const rsx::vertex_array_buffer& vertex_array)
{
if (!m_program->has_uniform(s_reg_table[vertex_array.index]))
return;
// Fill vertex_array
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
const u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size);
const u32 upload_size = real_element_size * vertex_count;
const VkFormat format = vk::get_suitable_vk_format(vertex_array.type, vertex_array.attribute_size);
const uintptr_t local_addr = (uintptr_t)vertex_array.data.data();
if (auto found = vertex_cache->find_vertex_range(local_addr, format, upload_size))
{
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, format, found->offset_in_heap, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets);
return;
}
VkDeviceSize offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(upload_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size);
gsl::span<gsl::byte> dest_span(static_cast<gsl::byte*>(dst), upload_size);
write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, real_element_size);
//Padding the vertex buffer should be done after the writes have been done
//write_vertex_data function may 'dirty' unused sections of the buffer as optimization
vk::prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count);
m_attrib_ring_info.unmap();
vertex_cache->store_range(local_addr, format, upload_size, (u32)offset_in_attrib_buffer);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets);
}
void operator()(const rsx::vertex_array_register& vertex_register)
{
if (!m_program->has_uniform(s_reg_table[vertex_register.index]))
return;
size_t data_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size);
const VkFormat format = vk::get_suitable_vk_format(vertex_register.type, vertex_register.attribute_size);
size_t offset_in_attrib_buffer = 0;
if (vk::requires_component_expansion(vertex_register.type, vertex_register.attribute_size))
{
const u32 num_stored_verts = static_cast<u32>(
data_size / (sizeof(float) * vertex_register.attribute_size));
const u32 real_element_size = vk::get_suitable_vk_size(vertex_register.type, vertex_register.attribute_size);
data_size = real_element_size * num_stored_verts;
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size);
vk::expand_array_components<float, 3, 4, 1>(reinterpret_cast<const float*>(vertex_register.data.data()), dst, num_stored_verts);
m_attrib_ring_info.unmap();
}
else
{
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size);
memcpy(dst, vertex_register.data.data(), data_size);
m_attrib_ring_info.unmap();
}
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_register.index], descriptor_sets);
}
void operator()(const rsx::empty_vertex_array& vbo)
{
if (!m_program->has_uniform(s_reg_table[vbo.index]))
return;
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vbo.index], descriptor_sets);
}
protected:
VkDevice device;
u32 vertex_count;
vk::vk_data_heap& m_attrib_ring_info;
vk::glsl::program* m_program;
VkDescriptorSet descriptor_sets;
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
vk::vertex_cache* vertex_cache;
VkPrimitiveTopology native_primitive_type;
u32 vertex_draw_count;
u32 allocated_vertex_count;
u32 vertex_data_base;
u32 vertex_index_base;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
};
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
rsx::vertex_array_register, rsx::empty_vertex_array>>;
struct draw_command_visitor
{
using result_type = std::tuple<VkPrimitiveTopology, u32,
std::optional<std::tuple<VkDeviceSize, VkIndexType>>>;
draw_command_visitor(VkDevice device, vk::vk_data_heap& index_buffer_ring_info,
vk::vk_data_heap& attrib_ring_info, vk::glsl::program* program,
VkDescriptorSet descriptor_sets,
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
std::function<attribute_storage(
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
get_vertex_buffers_f,
VKGSRender *thread)
: m_device(device), m_index_buffer_ring_info(index_buffer_ring_info),
m_attrib_ring_info(attrib_ring_info), m_program(program),
m_descriptor_sets(descriptor_sets), m_buffer_view_to_clean(buffer_view_to_clean),
get_vertex_buffers(get_vertex_buffers_f),
rsxthr(thread)
draw_command_visitor(vk::vk_data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout)
: m_index_buffer_ring_info(index_buffer_ring_info)
, m_vertex_layout(layout)
{
}
result_type operator()(const rsx::draw_array_command& command)
vertex_input_state operator()(const rsx::draw_array_command& command)
{
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
u32 index_count = 0;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
u32 min_index =
rsx::method_registers.current_draw_clause.first_count_commands.front().first;
u32 max_index =
rsx::method_registers.current_draw_clause.get_elements_count() + min_index - 1;
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
const u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
if (primitives_emulated)
{
u32 index_count;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
if (primitives_emulated) {
std::tie(index_count, index_info) =
generate_emulating_index_buffer(rsx::method_registers.current_draw_clause,
max_index - min_index + 1, m_index_buffer_ring_info);
}
else
{
index_count = rsx::method_registers.current_draw_clause.get_elements_count();
vertex_count, m_index_buffer_ring_info);
return{ prims, index_count, vertex_count, min_index, 0, index_info };
}
upload_vertex_buffers(min_index, max_index);
return std::make_tuple(prims, index_count, index_info);
return{ prims, vertex_count, vertex_count, min_index, 0, {} };
}
result_type operator()(const rsx::draw_indexed_array_command& command)
vertex_input_state operator()(const rsx::draw_indexed_array_command& command)
{
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(
@ -438,146 +158,150 @@ namespace
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info =
std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type));
upload_vertex_buffers(0, max_index);
return std::make_tuple(prims, index_count, index_info);
}
result_type operator()(const rsx::draw_inlined_array& command)
{
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
u32 index_count = upload_inlined_array();
if (!primitives_emulated) {
return std::make_tuple(prims, index_count, std::nullopt);
//check for vertex arrays with frquency modifiers
for (auto &block : m_vertex_layout.interleaved_blocks)
{
if (block.min_divisor > 1)
{
//Ignore base offsets and return real results
//The upload function will optimize the uploaded range anyway
return{ prims, index_count, max_index, 0, 0, index_info };
}
}
return {prims, index_count, (max_index - min_index + 1), min_index, min_index, index_info};
}
vertex_input_state operator()(const rsx::draw_inlined_array& command)
{
bool primitives_emulated = false;
auto &draw_clause = rsx::method_registers.current_draw_clause;
VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_clause.primitive, primitives_emulated);
const u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
if (!primitives_emulated)
{
return{ prims, vertex_count, vertex_count, 0, 0, {} };
}
u32 index_count;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
std::tie(index_count, index_info) = generate_emulating_index_buffer(
rsx::method_registers.current_draw_clause, index_count, m_index_buffer_ring_info);
return std::make_tuple(prims, index_count, index_info);
std::tie(index_count, index_info) = generate_emulating_index_buffer(draw_clause, vertex_count, m_index_buffer_ring_info);
return{ prims, index_count, vertex_count, 0, 0, index_info };
}
private:
vk::vk_data_heap& m_index_buffer_ring_info;
VkDevice m_device;
vk::vk_data_heap& m_attrib_ring_info;
vk::glsl::program* m_program;
VkDescriptorSet m_descriptor_sets;
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
std::function<attribute_storage(
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
get_vertex_buffers;
VKGSRender* rsxthr;
void upload_vertex_buffers(u32 min_index, u32 vertex_max_index)
{
const u32 vertex_count = vertex_max_index - min_index + 1;
vertex_buffer_visitor visitor(vertex_count, m_device,
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache.get());
const auto& vertex_buffers = get_vertex_buffers(
rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
for (auto &vbo: vertex_buffers)
std::apply_visitor(visitor, vbo);
}
u32 upload_inlined_array()
{
u32 stride = 0;
u32 offsets[rsx::limits::vertex_count] = {0};
for (u32 i = 0; i < rsx::limits::vertex_count; ++i) {
const auto& info = rsx::method_registers.vertex_arrays_info[i];
if (!info.size()) continue;
offsets[i] = stride;
stride += rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
u32 vertex_draw_count =
(u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() *
sizeof(u32)) /
stride;
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
auto& vertex_info = rsx::method_registers.vertex_arrays_info[index];
if (!m_program->has_uniform(s_reg_table[index])) continue;
if (!vertex_info.size()) // disabled
{
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets);
continue;
}
const u32 element_size =
vk::get_suitable_vk_size(vertex_info.type(), vertex_info.size());
const u32 data_size = element_size * vertex_draw_count;
const VkFormat format =
vk::get_suitable_vk_format(vertex_info.type(), vertex_info.size());
size_t offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
u8* src = reinterpret_cast<u8*>(
rsx::method_registers.current_draw_clause.inline_vertex_array.data());
u8* dst =
static_cast<u8*>(m_attrib_ring_info.map(offset_in_attrib_buffer, data_size));
src += offsets[index];
u8 opt_size = vertex_info.size();
if (vertex_info.size() == 3) opt_size = 4;
// TODO: properly handle cmp type
if (vertex_info.type() == rsx::vertex_base_type::cmp)
LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet");
switch (vertex_info.type())
{
case rsx::vertex_base_type::f:
vk::copy_inlined_data_to_buffer<float, 1>(src, dst, vertex_draw_count,
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
break;
case rsx::vertex_base_type::sf:
vk::copy_inlined_data_to_buffer<u16, 0x3c00>(src, dst, vertex_draw_count,
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
break;
case rsx::vertex_base_type::s1:
case rsx::vertex_base_type::ub:
case rsx::vertex_base_type::ub256:
vk::copy_inlined_data_to_buffer<u8, 1>(src, dst, vertex_draw_count,
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
break;
case rsx::vertex_base_type::s32k:
case rsx::vertex_base_type::cmp:
vk::copy_inlined_data_to_buffer<u16, 1>(src, dst, vertex_draw_count,
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
break;
default: fmt::throw_exception("Unknown base type %d" HERE, (u32)vertex_info.type());
}
m_attrib_ring_info.unmap();
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device,
m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
m_program->bind_uniform(
m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets);
}
return vertex_draw_count;
}
rsx::vertex_input_layout& m_vertex_layout;
};
}
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType>>>
std::tuple<VkPrimitiveTopology, u32, u32, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > >
VKGSRender::upload_vertex_data()
{
draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program,
descriptor_sets, m_buffer_view_to_clean,
[this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range, m_program->get_vertex_input_attributes_mask());}, this);
return std::apply_visitor(visitor, get_draw_command(rsx::method_registers));
m_vertex_layout = analyse_inputs_interleaved();
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
auto result = std::apply_visitor(visitor, get_draw_command(rsx::method_registers));
auto &vertex_count = result.allocated_vertex_count;
auto &vertex_base = result.vertex_data_base;
//Do actual vertex upload
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX;
VkBufferView persistent_view = VK_NULL_HANDLE, volatile_view = VK_NULL_HANDLE;
if (required.first > 0)
{
//Check if cacheable
//Only data in the 'persistent' block may be cached
//TODO: make vertex cache keep local data beyond frame boundaries and hook notify command
bool in_cache = false;
bool to_store = false;
u32 storage_address = UINT32_MAX;
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
{
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
{
in_cache = true;
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, cached->offset_in_heap, required.first));
}
else
{
to_store = true;
}
}
if (!in_cache)
{
persistent_offset = (u32)m_attrib_ring_info.alloc<256>(required.first);
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, persistent_offset, required.first));
if (to_store)
{
//store ref in vertex cache
m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, (u32)persistent_offset);
}
}
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
}
else
{
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
}
if (required.second > 0)
{
volatile_offset = (u32)m_attrib_ring_info.alloc<256>(required.second);
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second));
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
}
else
{
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
}
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_view, "volatile_input_stream", m_current_frame->descriptor_set);
//Write all the data once if possible
if (required.first && required.second && volatile_offset > persistent_offset)
{
//Do this once for both to save time on map/unmap cycles
const size_t block_end = (volatile_offset + required.second);
const size_t block_size = block_end - persistent_offset;
const size_t volatile_offset_in_block = volatile_offset - persistent_offset;
void *block_mapping = m_attrib_ring_info.map(persistent_offset, block_size);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, block_mapping, (char*)block_mapping + volatile_offset_in_block);
m_attrib_ring_info.unmap();
}
else
{
if (required.first > 0 && persistent_offset != UINT64_MAX)
{
void *persistent_mapping = m_attrib_ring_info.map(persistent_offset, required.first);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping, nullptr);
m_attrib_ring_info.unmap();
}
if (required.second > 0)
{
void *volatile_mapping = m_attrib_ring_info.map(volatile_offset, required.second);
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, nullptr, volatile_mapping);
m_attrib_ring_info.unmap();
}
}
return std::make_tuple(result.native_primitive_type, result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info);
}

View File

@ -30,17 +30,20 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
{
OS << "#version 450\n\n";
OS << "#extension GL_ARB_separate_shader_objects : enable\n";
OS << "layout(std140, set = 0, binding = 0) uniform ScaleOffsetBuffer\n";
OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n";
OS << "{\n";
OS << " mat4 scaleOffsetMat;\n";
OS << " ivec4 userClipEnabled[2];\n";
OS << " vec4 userClipFactor[2];\n";
OS << " mat4 scale_offset_mat;\n";
OS << " ivec4 user_clip_enabled[2];\n";
OS << " vec4 user_clip_factor[2];\n";
OS << " uint transform_branch_bits;\n";
OS << " uint vertex_base_index;\n";
OS << " ivec4 input_attributes[16];\n";
OS << "};\n";
vk::glsl::program_input in;
in.location = SCALE_OFFSET_BIND_SLOT;
in.domain = glsl::glsl_vertex_program;
in.name = "ScaleOffsetBuffer";
in.name = "VertexContextBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
@ -48,54 +51,21 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector<ParamType>& inputs)
{
std::vector<std::tuple<size_t, std::string>> input_data;
for (const ParamType &PT : inputs)
{
for (const ParamItem &PI : PT.items)
{
input_data.push_back(std::make_tuple(PI.location, PI.name));
}
}
OS << "layout(set=0, binding=3) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable)
OS << "layout(set=0, binding=4) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data)
/**
* Its is important that the locations are in the order that vertex attributes are expected.
* If order is not adhered to, channels may be swapped leading to corruption
*/
vk::glsl::program_input in;
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT;
in.domain = glsl::glsl_vertex_program;
in.name = "persistent_input_stream";
in.type = vk::glsl::input_type_texel_buffer;
this->inputs.push_back(in);
std::sort(input_data.begin(), input_data.end());
for (const std::tuple<size_t, std::string> item : input_data)
{
for (const ParamType &PT : inputs)
{
for (const ParamItem &PI : PT.items)
{
if (PI.name == std::get<1>(item))
{
vk::glsl::program_input in;
in.location = (int)std::get<0>(item) + VERTEX_BUFFERS_FIRST_BIND_SLOT;
in.domain = glsl::glsl_vertex_program;
in.name = PI.name + "_buffer";
in.type = vk::glsl::input_type_texel_buffer;
this->inputs.push_back(in);
bool is_int = false;
for (auto &attrib : rsx_vertex_program.rsx_vertex_inputs)
{
if (attrib.location == std::get<0>(item))
{
if (attrib.int_type) is_int = true;
break;
}
}
std::string samplerType = is_int ? "isamplerBuffer" : "samplerBuffer";
OS << "layout(set = 0, binding=" << in.location << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n";
}
}
}
}
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT + 1;
in.domain = glsl::glsl_vertex_program;
in.name = "volatile_input_stream";
in.type = vk::glsl::input_type_texel_buffer;
this->inputs.push_back(in);
}
void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants)
@ -103,7 +73,6 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer\n";
OS << "{\n";
OS << " vec4 vc[468];\n";
OS << " uint transform_branch_bits;\n";
OS << "};\n\n";
vk::glsl::program_input in;
@ -150,13 +119,13 @@ static const vertex_reg_info reg_table[] =
{ "front_spec_color", true, "dst_reg4", "", false },
{ "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG },
//Warning: With spir-v if you declare clip distance var, you must assign a value even when its disabled! Runtime does not assign a default value
{ "gl_ClipDistance[0]", false, "dst_reg5", ".y * userClipFactor[0].x", false, "userClipEnabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 },
{ "gl_ClipDistance[1]", false, "dst_reg5", ".z * userClipFactor[0].y", false, "userClipEnabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 },
{ "gl_ClipDistance[2]", false, "dst_reg5", ".w * userClipFactor[0].z", false, "userClipEnabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 },
{ "gl_ClipDistance[0]", false, "dst_reg5", ".y * user_clip_factor[0].x", false, "user_clip_enabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 },
{ "gl_ClipDistance[1]", false, "dst_reg5", ".z * user_clip_factor[0].y", false, "user_clip_enabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 },
{ "gl_ClipDistance[2]", false, "dst_reg5", ".w * user_clip_factor[0].z", false, "user_clip_enabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 },
{ "gl_PointSize", false, "dst_reg6", ".x", false },
{ "gl_ClipDistance[3]", false, "dst_reg6", ".y * userClipFactor[0].w", false, "userClipEnabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 },
{ "gl_ClipDistance[4]", false, "dst_reg6", ".z * userClipFactor[1].x", false, "userClipEnabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 },
{ "gl_ClipDistance[5]", false, "dst_reg6", ".w * userClipFactor[1].y", false, "userClipEnabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 },
{ "gl_ClipDistance[3]", false, "dst_reg6", ".y * user_clip_factor[0].w", false, "user_clip_enabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 },
{ "gl_ClipDistance[4]", false, "dst_reg6", ".z * user_clip_factor[1].x", false, "user_clip_enabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 },
{ "gl_ClipDistance[5]", false, "dst_reg6", ".w * user_clip_factor[1].y", false, "user_clip_enabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 },
{ "tc0", true, "dst_reg7", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX0 },
{ "tc1", true, "dst_reg8", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX1 },
{ "tc2", true, "dst_reg9", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX2 },
@ -212,44 +181,10 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
OS << "layout(location=" << vk::get_varying_register("front_spec_color").reg_location << ") out vec4 front_spec_color;\n";
}
namespace vk
{
void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector<rsx_vertex_input> &inputs)
{
for (const auto &real_input : inputs)
{
if (real_input.location != PI.location)
continue;
if (!real_input.is_array)
{
OS << " vec4 " << PI.name << " = vec4(texelFetch(" << PI.name << "_buffer, 0));\n";
return;
}
if (real_input.frequency > 1)
{
if (real_input.is_modulo)
{
OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex %" << real_input.frequency << "));\n";
return;
}
OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex /" << real_input.frequency << "));\n";
return;
}
OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex).rgba);\n";
return;
}
OS << " vec4 " << PI.name << "= vec4(texelFetch(" << PI.name << "_buffer, gl_VertexIndex).rgba);\n";
}
}
void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{
glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv);
std::string parameters = "";
for (int i = 0; i < 16; ++i)
@ -286,7 +221,9 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
for (const ParamType &PT : m_parr.params[PF_PARAM_IN])
{
for (const ParamItem &PI : PT.items)
vk::add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs);
{
OS << " vec4 " << PI.name << "= read_location(" << std::to_string(PI.location) << ");\n";
}
}
}
@ -373,7 +310,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
if (m_parr.HasParam(PF_PARAM_NONE, "vec4", "dst_reg2"))
OS << " front_spec_color = dst_reg2;\n";
OS << " gl_Position = gl_Position * scaleOffsetMat;\n";
OS << " gl_Position = gl_Position * scale_offset_mat;\n";
OS << "}\n";
}