diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 998597d4e7..af71f53678 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -199,10 +199,12 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan) CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); execute_command_buffer(false); - m_scale_offset_buffer.create((*m_device), 128); - m_vertex_constants_buffer.create((*m_device), 512 * 16); - m_fragment_constants_buffer.create((*m_device), 512 * 16); - m_index_buffer.create((*m_device), 65536, VK_FORMAT_R16_UINT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); + +#define RING_BUFFER_SIZE 16 * 1024 * 1024 + m_uniform_buffer_ring_info.init(RING_BUFFER_SIZE); + m_uniform_buffer.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); + m_index_buffer_ring_info.init(RING_BUFFER_SIZE); + m_index_buffer.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); } VKGSRender::~VKGSRender() @@ -225,11 +227,6 @@ VKGSRender::~VKGSRender() //TODO: Properly destroy shader modules instead of calling clear... m_prog_buffer.clear(); - m_scale_offset_buffer.destroy(); - m_vertex_constants_buffer.destroy(); - m_fragment_constants_buffer.destroy(); - m_index_buffer.destroy(); - if (m_render_pass) destroy_render_pass(); @@ -407,10 +404,12 @@ void VKGSRender::end() vkCmdDraw(m_command_buffer, vertex_draw_count, 1, 0, 0); else { - VkIndexType &index_type = std::get<3>(upload_info); - u32 &index_count = std::get<2>(upload_info); + VkIndexType index_type; + u32 index_count; + VkDeviceSize offset; + std::tie(std::ignore, std::ignore, index_count, offset, index_type) = upload_info; - vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer, 0, index_type); + vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer->value, offset, index_type); vkCmdDrawIndexed(m_command_buffer, index_count, 1, 0, 0, 0); } @@ -675,7 +674,9 @@ bool VKGSRender::load_program() //1. Update scale-offset matrix //2. Update vertex constants //3. Update fragment constants - u8 *buf = (u8*)m_scale_offset_buffer.map(0, VK_WHOLE_SIZE); + const size_t scale_offset_offset = m_uniform_buffer_ring_info.alloc<256>(256); + + u8 *buf = (u8*)m_uniform_buffer->map(scale_offset_offset, 256); //TODO: Add case for this in RSXThread /** @@ -708,21 +709,23 @@ bool VKGSRender::load_program() memset((char*)buf+64, 0, 8); memcpy((char*)buf + 64, &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float)); memcpy((char*)buf + 68, &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float)); - m_scale_offset_buffer.unmap(); + m_uniform_buffer->unmap(); - buf = (u8*)m_vertex_constants_buffer.map(0, VK_WHOLE_SIZE); + const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float)); + buf = (u8*)m_uniform_buffer->map(vertex_constants_offset, 512 * 4 * sizeof(float)); fill_vertex_program_constants_data(buf); - m_vertex_constants_buffer.unmap(); + m_uniform_buffer->unmap(); - size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); - buf = (u8*)m_fragment_constants_buffer.map(0, fragment_constants_sz); + const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); + const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_constants_sz); + buf = (u8*)m_uniform_buffer->map(fragment_constants_offset, fragment_constants_sz); m_prog_buffer.fill_fragment_constans_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_sz) }, fragment_program); - m_fragment_constants_buffer.unmap(); + m_uniform_buffer->unmap(); - m_program->bind_uniform(vk::glsl::glsl_vertex_program, "ScaleOffsetBuffer", m_scale_offset_buffer); - m_program->bind_uniform(vk::glsl::glsl_vertex_program, "VertexConstantsBuffer", m_vertex_constants_buffer); - m_program->bind_uniform(vk::glsl::glsl_fragment_program, "ScaleOffsetBuffer", m_scale_offset_buffer); - m_program->bind_uniform(vk::glsl::glsl_fragment_program, "FragmentConstantsBuffer", m_fragment_constants_buffer); + m_program->bind_uniform(vk::glsl::glsl_vertex_program, "ScaleOffsetBuffer", m_uniform_buffer->value, scale_offset_offset, 256); + m_program->bind_uniform(vk::glsl::glsl_vertex_program, "VertexConstantsBuffer", m_uniform_buffer->value, vertex_constants_offset, 512 * 4 * sizeof(float)); + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "ScaleOffsetBuffer", m_uniform_buffer->value, scale_offset_offset, 256); + m_program->bind_uniform(vk::glsl::glsl_fragment_program, "FragmentConstantsBuffer", m_uniform_buffer->value, fragment_constants_offset, fragment_constants_sz); return true; } @@ -1009,7 +1012,9 @@ void VKGSRender::flip(int buffer) CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue())); - + + m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); + m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); if (m_present_semaphore) { vkDestroySemaphore((*m_device), m_present_semaphore, nullptr); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 5ebd6f5f93..e8636074ff 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -12,6 +12,87 @@ #pragma comment(lib, "VKstatic.1.lib") +namespace vk +{ +// TODO: factorize between backends +class data_heap +{ + /** + * Does alloc cross get position ? + */ + template + bool can_alloc(size_t size) const + { + size_t alloc_size = align(size, Alignement); + size_t aligned_put_pos = align(m_put_pos, Alignement); + if (aligned_put_pos + alloc_size < m_size) + { + // range before get + if (aligned_put_pos + alloc_size < m_get_pos) + return true; + // range after get + if (aligned_put_pos > m_get_pos) + return true; + return false; + } + else + { + // ..]....[..get.. + if (aligned_put_pos < m_get_pos) + return false; + // ..get..]...[... + // Actually all resources extending beyond heap space starts at 0 + if (alloc_size > m_get_pos) + return false; + return true; + } + } + + size_t m_size; + size_t m_put_pos; // Start of free space +public: + data_heap() = default; + ~data_heap() = default; + data_heap(const data_heap&) = delete; + data_heap(data_heap&&) = delete; + + size_t m_get_pos; // End of free space + + void init(size_t heap_size) + { + m_size = heap_size; + m_put_pos = 0; + m_get_pos = heap_size - 1; + } + + template + size_t alloc(size_t size) + { + if (!can_alloc(size)) throw EXCEPTION("Working buffer not big enough"); + size_t alloc_size = align(size, Alignement); + size_t aligned_put_pos = align(m_put_pos, Alignement); + if (aligned_put_pos + alloc_size < m_size) + { + m_put_pos = aligned_put_pos + alloc_size; + return aligned_put_pos; + } + else + { + m_put_pos = alloc_size; + return 0; + } + } + + /** + * return current putpos - 1 + */ + size_t get_current_put_pos_minus_one() const + { + return (m_put_pos - 1 > 0) ? m_put_pos - 1 : m_size - 1; + } +}; +} + class VKGSRender : public GSRender { private: @@ -23,7 +104,7 @@ private: rsx::surface_info m_surface; - vk::buffer m_attrib_buffers[rsx::limits::vertex_count]; + vk::buffer_deprecated m_attrib_buffers[rsx::limits::vertex_count]; vk::texture_cache m_texture_cache; rsx::vk_render_targets m_rtts; @@ -41,11 +122,10 @@ private: vk::swap_chain* m_swap_chain; //buffer - vk::buffer m_scale_offset_buffer; - vk::buffer m_vertex_constants_buffer; - vk::buffer m_fragment_constants_buffer; - - vk::buffer m_index_buffer; + vk::data_heap m_uniform_buffer_ring_info; + std::unique_ptr m_uniform_buffer; + vk::data_heap m_index_buffer_ring_info; + std::unique_ptr m_index_buffer; //Vulkan internals u32 m_current_present_image = 0xFFFF; @@ -80,9 +160,8 @@ private: void end_command_buffer_recording(); void prepare_rtts(); - - std::tuple - upload_vertex_data(); + /// returns primitive topology, is_indexed, index_count, offset in index buffer, index type + std::tuple upload_vertex_data(); public: bool load_program(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 8090a3f5a0..2c941fb29d 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -6,7 +6,7 @@ namespace vk context *g_current_vulkan_ctx = nullptr; render_device g_current_renderer; - buffer g_null_buffer; + buffer_deprecated g_null_buffer; texture g_null_texture; VkSampler g_null_sampler = nullptr; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 627aef0c39..b0afb0051e 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -236,7 +236,33 @@ namespace vk } }; - class memory_block + struct memory_block + { + VkMemoryAllocateInfo info = {}; + VkDeviceMemory memory; + + memory_block(VkDevice dev, u64 block_sz, uint32_t memory_type_index) : m_device(dev) + { + info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + info.allocationSize = block_sz; + info.memoryTypeIndex = memory_type_index; + + CHECK_RESULT(vkAllocateMemory(m_device, &info, nullptr, &memory)); + } + + ~memory_block() + { + vkFreeMemory(m_device, memory, nullptr); + } + + memory_block(const memory_block&) = delete; + memory_block(memory_block&&) = delete; + + private: + VkDevice m_device; + }; + + class memory_block_deprecated { VkDeviceMemory vram = nullptr; vk::render_device *owner = nullptr; @@ -244,8 +270,8 @@ namespace vk bool mappable = false; public: - memory_block() {} - ~memory_block() {} + memory_block_deprecated() {} + ~memory_block_deprecated() {} void allocate_from_pool(vk::render_device &device, u64 block_sz, bool host_visible, u32 typeBits) { @@ -321,7 +347,7 @@ namespace vk VkImageUsageFlags m_usage = VK_IMAGE_USAGE_SAMPLED_BIT; VkImageTiling m_tiling = VK_IMAGE_TILING_LINEAR; - vk::memory_block vram_allocation; + vk::memory_block_deprecated vram_allocation; vk::render_device *owner = nullptr; u32 m_width; @@ -367,7 +393,83 @@ namespace vk operator VkImage(); }; - class buffer + struct buffer + { + VkBuffer value; + VkBufferCreateInfo info = {}; + std::unique_ptr memory; + + buffer(VkDevice dev, u64 size, uint32_t memory_type_index, VkBufferUsageFlagBits usage, VkBufferCreateFlags flags) + : m_device(dev) + { + info.size = size; + info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + info.flags = flags; + info.usage = usage; + + CHECK_RESULT(vkCreateBuffer(m_device, &info, nullptr, &value)); + + VkMemoryRequirements memory_reqs; + //Allocate vram for this buffer + vkGetBufferMemoryRequirements(m_device, value, &memory_reqs); + memory.reset(new memory_block(m_device, memory_reqs.size, memory_type_index)); + vkBindBufferMemory(dev, value, memory->memory, 0); + } + + ~buffer() + { + vkDestroyBuffer(m_device, value, nullptr); + } + + void *map(u32 offset, u64 size) + { + void *data = nullptr; + CHECK_RESULT(vkMapMemory(m_device, memory->memory, offset, size, 0, &data)); + return data; + } + + void unmap() + { + vkUnmapMemory(m_device, memory->memory); + } + + buffer(const buffer&) = delete; + buffer(buffer&&) = delete; + + private: + VkDevice m_device; + }; + + struct buffer_view + { + VkBufferView value; + VkBufferViewCreateInfo info = {}; + + buffer_view(VkDevice dev, VkBuffer buffer, VkFormat format, VkDeviceSize offset, VkDeviceSize size) + : m_device(dev) + { + info.buffer = buffer; + info.format = format; + info.offset = offset; + info.range = size; + info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + CHECK_RESULT(vkCreateBufferView(m_device, &info, nullptr, &value)); + } + + ~buffer_view() + { + vkDestroyBufferView(m_device, value, nullptr); + } + + buffer_view(const buffer_view&) = delete; + buffer_view(buffer_view&&) = delete; + + private: + VkDevice m_device; + }; + + class buffer_deprecated { VkBufferView m_view = nullptr; VkBuffer m_buffer = nullptr; @@ -377,16 +479,16 @@ namespace vk VkBufferCreateFlags m_flags = 0; vk::render_device *owner; - vk::memory_block vram; + vk::memory_block_deprecated vram; u64 m_size = 0; bool viewable = false; public: - buffer() {} - ~buffer() {} + buffer_deprecated() {} + ~buffer_deprecated() {} - void create(vk::render_device &dev, u64 size, VkFormat format, VkBufferUsageFlagBits usage, VkBufferCreateFlags flags) + void create(vk::render_device &dev, u64 size, VkFormat format = VK_FORMAT_UNDEFINED, VkBufferUsageFlagBits usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VkBufferCreateFlags flags = 0) { if (m_buffer) throw EXCEPTION("Buffer create called on an existing buffer!"); @@ -417,21 +519,6 @@ namespace vk set_format(format); } - void create(vk::render_device &dev, u32 size, VkFormat format, VkBufferUsageFlagBits usage) - { - create(dev, size, format, usage, 0); - } - - void create(vk::render_device &dev, u32 size, VkFormat format) - { - create(dev, size, format, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); - } - - void create(vk::render_device &dev, u32 size) - { - create(dev, size, VK_FORMAT_UNDEFINED); - } - void *map(u32 offset, u64 size) { if (!vram.is_mappable()) return nullptr; @@ -1323,8 +1410,9 @@ namespace vk bool has_uniform(program_domain domain, std::string uniform_name); bool bind_uniform(program_domain domain, std::string uniform_name); bool bind_uniform(program_domain domain, std::string uniform_name, vk::texture &_texture); - bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer); - bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer, bool is_texel_store); + bool bind_uniform(program_domain domain, std::string uniform_name, VkBuffer _buffer, VkDeviceSize offset, VkDeviceSize size); + bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer_deprecated &_buffer); + bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer_deprecated &_buffer, bool is_texel_store); program& operator = (const program&) = delete; program& operator = (program&& other); diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 2a60abd910..0127866d1d 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -520,6 +520,7 @@ namespace vk { buffer.buffer = input.as_buffer.buffer; buffer.range = input.as_buffer.size; + buffer.offset = input.as_buffer.offset; } else LOG_ERROR(RSX, "UBO was not bound: %s", input.name); @@ -717,7 +718,32 @@ namespace vk return false; } - bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer) + bool program::bind_uniform(program_domain domain, std::string uniform_name, VkBuffer _buffer, VkDeviceSize offset, VkDeviceSize size) + { + for (auto &uniform : uniforms) + { + if (uniform.name == uniform_name && + uniform.domain == domain) + { + if (uniform.as_buffer.buffer != _buffer || + uniform.as_buffer.size != size || + uniform.as_buffer.offset != offset) + { + uniform.as_buffer.size = size; + uniform.as_buffer.buffer = _buffer; + uniform.as_buffer.buffer_view = nullptr; //UBOs cannot be viewed! + uniform.as_buffer.offset = offset; + + uniforms_changed = true; + } + + uniform.type = input_type_uniform_buffer; + return true; + } + } + } + + bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer_deprecated &_buffer) { for (auto &uniform : uniforms) { @@ -728,11 +754,13 @@ namespace vk u64 size = _buffer.size(); if (uniform.as_buffer.buffer != buf || - uniform.as_buffer.size != size) + uniform.as_buffer.size != size || + uniform.as_buffer.offset != 0) { uniform.as_buffer.size = size; uniform.as_buffer.buffer = buf; uniform.as_buffer.buffer_view = nullptr; //UBOs cannot be viewed! + uniform.as_buffer.offset = 0; uniforms_changed = true; } @@ -746,7 +774,7 @@ namespace vk return false; } - bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer, bool is_texel_store) + bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer_deprecated &_buffer, bool is_texel_store) { if (!is_texel_store) { @@ -764,11 +792,13 @@ namespace vk if (uniform.as_buffer.buffer != buf || uniform.as_buffer.buffer_view != view || - uniform.as_buffer.size != size) + uniform.as_buffer.size != size || + uniform.as_buffer.offset != 0) { uniform.as_buffer.size = size; uniform.as_buffer.buffer = buf; uniform.as_buffer.buffer_view = view; + uniform.as_buffer.offset = 0; if (!view) throw EXCEPTION("Invalid buffer passed as texel storage"); diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index d7cb8194ce..f70ead5347 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -216,7 +216,7 @@ namespace vk } } -std::tuple +std::tuple VKGSRender::upload_vertex_data() { //initialize vertex attributes @@ -481,6 +481,8 @@ VKGSRender::upload_vertex_data() VkIndexType index_format = VK_INDEX_TYPE_UINT16; VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_mode, primitives_emulated); + size_t offset_in_index_buffer = -1; + if (primitives_emulated) { //Line loops are line-strips with loop-back; using line-strips-with-adj doesnt work for vulkan @@ -491,7 +493,11 @@ VKGSRender::upload_vertex_data() if (!is_indexed_draw) { index_count = vk::expand_line_loop_array_to_strip(vertex_draw_count, indices); - m_index_buffer.sub_data(0, index_count*sizeof(u16), indices.data()); + size_t upload_size = index_count * sizeof(u16); + offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); + void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size); + memcpy(buf, indices.data(), upload_size); + m_index_buffer->unmap(); } else { @@ -502,12 +508,20 @@ VKGSRender::upload_vertex_data() std::vector indices32; index_count = vk::expand_indexed_line_loop_to_strip(vertex_draw_count, (u32*)vertex_index_array.data(), indices32); - m_index_buffer.sub_data(0, index_count*sizeof(u32), indices32.data()); + size_t upload_size = index_count * sizeof(u32); + offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); + void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size); + memcpy(buf, indices32.data(), upload_size); + m_index_buffer->unmap(); } else { index_count = vk::expand_indexed_line_loop_to_strip(vertex_draw_count, (u16*)vertex_index_array.data(), indices); - m_index_buffer.sub_data(0, index_count*sizeof(u16), indices.data()); + size_t upload_size = index_count * sizeof(u16); + offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); + void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size); + memcpy(buf, indices.data(), upload_size); + m_index_buffer->unmap(); } } } @@ -532,7 +546,11 @@ VKGSRender::upload_vertex_data() write_index_array_for_non_indexed_non_native_primitive_to_buffer(reinterpret_cast(indices.data()), draw_mode, 0, vertex_draw_count); } - m_index_buffer.sub_data(0, index_count * sizeof(u16), indices.data()); + size_t upload_size = index_count * sizeof(u16); + offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); + void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size); + memcpy(buf, indices.data(), upload_size); + m_index_buffer->unmap(); } is_indexed_draw = true; @@ -557,9 +575,12 @@ VKGSRender::upload_vertex_data() if (index_sz != vertex_draw_count) LOG_ERROR(RSX, "Vertex draw count mismatch!"); - m_index_buffer.sub_data(0, vertex_index_array.size(), vertex_index_array.data()); - m_index_buffer.set_format(fmt); //Unnecessary unless viewing contents in sampler... + size_t upload_size = vertex_index_array.size(); + offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); + void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size); + memcpy(buf, vertex_index_array.data(), upload_size); + m_index_buffer->unmap(); } - return std::make_tuple(prims, is_indexed_draw, index_count, index_format); + return std::make_tuple(prims, is_indexed_draw, index_count, offset_in_index_buffer, index_format); } \ No newline at end of file