vulkan: Use ring allocation for uniform/index buffers by using simpler memory_block and buffer class.

This commit is contained in:
Vincent Lejeune 2016-03-14 22:29:18 +01:00
parent da2caa0881
commit 63ad2cce72
6 changed files with 295 additions and 72 deletions

View File

@ -199,10 +199,12 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan)
CHECK_RESULT(vkEndCommandBuffer(m_command_buffer));
execute_command_buffer(false);
m_scale_offset_buffer.create((*m_device), 128);
m_vertex_constants_buffer.create((*m_device), 512 * 16);
m_fragment_constants_buffer.create((*m_device), 512 * 16);
m_index_buffer.create((*m_device), 65536, VK_FORMAT_R16_UINT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
#define RING_BUFFER_SIZE 16 * 1024 * 1024
m_uniform_buffer_ring_info.init(RING_BUFFER_SIZE);
m_uniform_buffer.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
m_index_buffer_ring_info.init(RING_BUFFER_SIZE);
m_index_buffer.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
}
VKGSRender::~VKGSRender()
@ -225,11 +227,6 @@ VKGSRender::~VKGSRender()
//TODO: Properly destroy shader modules instead of calling clear...
m_prog_buffer.clear();
m_scale_offset_buffer.destroy();
m_vertex_constants_buffer.destroy();
m_fragment_constants_buffer.destroy();
m_index_buffer.destroy();
if (m_render_pass)
destroy_render_pass();
@ -407,10 +404,12 @@ void VKGSRender::end()
vkCmdDraw(m_command_buffer, vertex_draw_count, 1, 0, 0);
else
{
VkIndexType &index_type = std::get<3>(upload_info);
u32 &index_count = std::get<2>(upload_info);
VkIndexType index_type;
u32 index_count;
VkDeviceSize offset;
std::tie(std::ignore, std::ignore, index_count, offset, index_type) = upload_info;
vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer, 0, index_type);
vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer->value, offset, index_type);
vkCmdDrawIndexed(m_command_buffer, index_count, 1, 0, 0, 0);
}
@ -675,7 +674,9 @@ bool VKGSRender::load_program()
//1. Update scale-offset matrix
//2. Update vertex constants
//3. Update fragment constants
u8 *buf = (u8*)m_scale_offset_buffer.map(0, VK_WHOLE_SIZE);
const size_t scale_offset_offset = m_uniform_buffer_ring_info.alloc<256>(256);
u8 *buf = (u8*)m_uniform_buffer->map(scale_offset_offset, 256);
//TODO: Add case for this in RSXThread
/**
@ -708,21 +709,23 @@ bool VKGSRender::load_program()
memset((char*)buf+64, 0, 8);
memcpy((char*)buf + 64, &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float));
memcpy((char*)buf + 68, &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float));
m_scale_offset_buffer.unmap();
m_uniform_buffer->unmap();
buf = (u8*)m_vertex_constants_buffer.map(0, VK_WHOLE_SIZE);
const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float));
buf = (u8*)m_uniform_buffer->map(vertex_constants_offset, 512 * 4 * sizeof(float));
fill_vertex_program_constants_data(buf);
m_vertex_constants_buffer.unmap();
m_uniform_buffer->unmap();
size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
buf = (u8*)m_fragment_constants_buffer.map(0, fragment_constants_sz);
const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_constants_sz);
buf = (u8*)m_uniform_buffer->map(fragment_constants_offset, fragment_constants_sz);
m_prog_buffer.fill_fragment_constans_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_sz) }, fragment_program);
m_fragment_constants_buffer.unmap();
m_uniform_buffer->unmap();
m_program->bind_uniform(vk::glsl::glsl_vertex_program, "ScaleOffsetBuffer", m_scale_offset_buffer);
m_program->bind_uniform(vk::glsl::glsl_vertex_program, "VertexConstantsBuffer", m_vertex_constants_buffer);
m_program->bind_uniform(vk::glsl::glsl_fragment_program, "ScaleOffsetBuffer", m_scale_offset_buffer);
m_program->bind_uniform(vk::glsl::glsl_fragment_program, "FragmentConstantsBuffer", m_fragment_constants_buffer);
m_program->bind_uniform(vk::glsl::glsl_vertex_program, "ScaleOffsetBuffer", m_uniform_buffer->value, scale_offset_offset, 256);
m_program->bind_uniform(vk::glsl::glsl_vertex_program, "VertexConstantsBuffer", m_uniform_buffer->value, vertex_constants_offset, 512 * 4 * sizeof(float));
m_program->bind_uniform(vk::glsl::glsl_fragment_program, "ScaleOffsetBuffer", m_uniform_buffer->value, scale_offset_offset, 256);
m_program->bind_uniform(vk::glsl::glsl_fragment_program, "FragmentConstantsBuffer", m_uniform_buffer->value, fragment_constants_offset, fragment_constants_sz);
return true;
}
@ -1009,7 +1012,9 @@ void VKGSRender::flip(int buffer)
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue()));
m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one();
m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one();
if (m_present_semaphore)
{
vkDestroySemaphore((*m_device), m_present_semaphore, nullptr);

View File

@ -12,6 +12,87 @@
#pragma comment(lib, "VKstatic.1.lib")
namespace vk
{
// TODO: factorize between backends
class data_heap
{
/**
* Does alloc cross get position ?
*/
template<int Alignement>
bool can_alloc(size_t size) const
{
size_t alloc_size = align(size, Alignement);
size_t aligned_put_pos = align(m_put_pos, Alignement);
if (aligned_put_pos + alloc_size < m_size)
{
// range before get
if (aligned_put_pos + alloc_size < m_get_pos)
return true;
// range after get
if (aligned_put_pos > m_get_pos)
return true;
return false;
}
else
{
// ..]....[..get..
if (aligned_put_pos < m_get_pos)
return false;
// ..get..]...[...
// Actually all resources extending beyond heap space starts at 0
if (alloc_size > m_get_pos)
return false;
return true;
}
}
size_t m_size;
size_t m_put_pos; // Start of free space
public:
data_heap() = default;
~data_heap() = default;
data_heap(const data_heap&) = delete;
data_heap(data_heap&&) = delete;
size_t m_get_pos; // End of free space
void init(size_t heap_size)
{
m_size = heap_size;
m_put_pos = 0;
m_get_pos = heap_size - 1;
}
template<int Alignement>
size_t alloc(size_t size)
{
if (!can_alloc<Alignement>(size)) throw EXCEPTION("Working buffer not big enough");
size_t alloc_size = align(size, Alignement);
size_t aligned_put_pos = align(m_put_pos, Alignement);
if (aligned_put_pos + alloc_size < m_size)
{
m_put_pos = aligned_put_pos + alloc_size;
return aligned_put_pos;
}
else
{
m_put_pos = alloc_size;
return 0;
}
}
/**
* return current putpos - 1
*/
size_t get_current_put_pos_minus_one() const
{
return (m_put_pos - 1 > 0) ? m_put_pos - 1 : m_size - 1;
}
};
}
class VKGSRender : public GSRender
{
private:
@ -23,7 +104,7 @@ private:
rsx::surface_info m_surface;
vk::buffer m_attrib_buffers[rsx::limits::vertex_count];
vk::buffer_deprecated m_attrib_buffers[rsx::limits::vertex_count];
vk::texture_cache m_texture_cache;
rsx::vk_render_targets m_rtts;
@ -41,11 +122,10 @@ private:
vk::swap_chain* m_swap_chain;
//buffer
vk::buffer m_scale_offset_buffer;
vk::buffer m_vertex_constants_buffer;
vk::buffer m_fragment_constants_buffer;
vk::buffer m_index_buffer;
vk::data_heap m_uniform_buffer_ring_info;
std::unique_ptr<vk::buffer> m_uniform_buffer;
vk::data_heap m_index_buffer_ring_info;
std::unique_ptr<vk::buffer> m_index_buffer;
//Vulkan internals
u32 m_current_present_image = 0xFFFF;
@ -80,9 +160,8 @@ private:
void end_command_buffer_recording();
void prepare_rtts();
std::tuple<VkPrimitiveTopology, bool, u32, VkIndexType>
upload_vertex_data();
/// returns primitive topology, is_indexed, index_count, offset in index buffer, index type
std::tuple<VkPrimitiveTopology, bool, u32, VkDeviceSize, VkIndexType> upload_vertex_data();
public:
bool load_program();

View File

@ -6,7 +6,7 @@ namespace vk
context *g_current_vulkan_ctx = nullptr;
render_device g_current_renderer;
buffer g_null_buffer;
buffer_deprecated g_null_buffer;
texture g_null_texture;
VkSampler g_null_sampler = nullptr;

View File

@ -236,7 +236,33 @@ namespace vk
}
};
class memory_block
struct memory_block
{
VkMemoryAllocateInfo info = {};
VkDeviceMemory memory;
memory_block(VkDevice dev, u64 block_sz, uint32_t memory_type_index) : m_device(dev)
{
info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
info.allocationSize = block_sz;
info.memoryTypeIndex = memory_type_index;
CHECK_RESULT(vkAllocateMemory(m_device, &info, nullptr, &memory));
}
~memory_block()
{
vkFreeMemory(m_device, memory, nullptr);
}
memory_block(const memory_block&) = delete;
memory_block(memory_block&&) = delete;
private:
VkDevice m_device;
};
class memory_block_deprecated
{
VkDeviceMemory vram = nullptr;
vk::render_device *owner = nullptr;
@ -244,8 +270,8 @@ namespace vk
bool mappable = false;
public:
memory_block() {}
~memory_block() {}
memory_block_deprecated() {}
~memory_block_deprecated() {}
void allocate_from_pool(vk::render_device &device, u64 block_sz, bool host_visible, u32 typeBits)
{
@ -321,7 +347,7 @@ namespace vk
VkImageUsageFlags m_usage = VK_IMAGE_USAGE_SAMPLED_BIT;
VkImageTiling m_tiling = VK_IMAGE_TILING_LINEAR;
vk::memory_block vram_allocation;
vk::memory_block_deprecated vram_allocation;
vk::render_device *owner = nullptr;
u32 m_width;
@ -367,7 +393,83 @@ namespace vk
operator VkImage();
};
class buffer
struct buffer
{
VkBuffer value;
VkBufferCreateInfo info = {};
std::unique_ptr<vk::memory_block> memory;
buffer(VkDevice dev, u64 size, uint32_t memory_type_index, VkBufferUsageFlagBits usage, VkBufferCreateFlags flags)
: m_device(dev)
{
info.size = size;
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
info.flags = flags;
info.usage = usage;
CHECK_RESULT(vkCreateBuffer(m_device, &info, nullptr, &value));
VkMemoryRequirements memory_reqs;
//Allocate vram for this buffer
vkGetBufferMemoryRequirements(m_device, value, &memory_reqs);
memory.reset(new memory_block(m_device, memory_reqs.size, memory_type_index));
vkBindBufferMemory(dev, value, memory->memory, 0);
}
~buffer()
{
vkDestroyBuffer(m_device, value, nullptr);
}
void *map(u32 offset, u64 size)
{
void *data = nullptr;
CHECK_RESULT(vkMapMemory(m_device, memory->memory, offset, size, 0, &data));
return data;
}
void unmap()
{
vkUnmapMemory(m_device, memory->memory);
}
buffer(const buffer&) = delete;
buffer(buffer&&) = delete;
private:
VkDevice m_device;
};
struct buffer_view
{
VkBufferView value;
VkBufferViewCreateInfo info = {};
buffer_view(VkDevice dev, VkBuffer buffer, VkFormat format, VkDeviceSize offset, VkDeviceSize size)
: m_device(dev)
{
info.buffer = buffer;
info.format = format;
info.offset = offset;
info.range = size;
info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
CHECK_RESULT(vkCreateBufferView(m_device, &info, nullptr, &value));
}
~buffer_view()
{
vkDestroyBufferView(m_device, value, nullptr);
}
buffer_view(const buffer_view&) = delete;
buffer_view(buffer_view&&) = delete;
private:
VkDevice m_device;
};
class buffer_deprecated
{
VkBufferView m_view = nullptr;
VkBuffer m_buffer = nullptr;
@ -377,16 +479,16 @@ namespace vk
VkBufferCreateFlags m_flags = 0;
vk::render_device *owner;
vk::memory_block vram;
vk::memory_block_deprecated vram;
u64 m_size = 0;
bool viewable = false;
public:
buffer() {}
~buffer() {}
buffer_deprecated() {}
~buffer_deprecated() {}
void create(vk::render_device &dev, u64 size, VkFormat format, VkBufferUsageFlagBits usage, VkBufferCreateFlags flags)
void create(vk::render_device &dev, u64 size, VkFormat format = VK_FORMAT_UNDEFINED, VkBufferUsageFlagBits usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VkBufferCreateFlags flags = 0)
{
if (m_buffer) throw EXCEPTION("Buffer create called on an existing buffer!");
@ -417,21 +519,6 @@ namespace vk
set_format(format);
}
void create(vk::render_device &dev, u32 size, VkFormat format, VkBufferUsageFlagBits usage)
{
create(dev, size, format, usage, 0);
}
void create(vk::render_device &dev, u32 size, VkFormat format)
{
create(dev, size, format, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
void create(vk::render_device &dev, u32 size)
{
create(dev, size, VK_FORMAT_UNDEFINED);
}
void *map(u32 offset, u64 size)
{
if (!vram.is_mappable()) return nullptr;
@ -1323,8 +1410,9 @@ namespace vk
bool has_uniform(program_domain domain, std::string uniform_name);
bool bind_uniform(program_domain domain, std::string uniform_name);
bool bind_uniform(program_domain domain, std::string uniform_name, vk::texture &_texture);
bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer);
bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer, bool is_texel_store);
bool bind_uniform(program_domain domain, std::string uniform_name, VkBuffer _buffer, VkDeviceSize offset, VkDeviceSize size);
bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer_deprecated &_buffer);
bool bind_uniform(program_domain domain, std::string uniform_name, vk::buffer_deprecated &_buffer, bool is_texel_store);
program& operator = (const program&) = delete;
program& operator = (program&& other);

View File

@ -520,6 +520,7 @@ namespace vk
{
buffer.buffer = input.as_buffer.buffer;
buffer.range = input.as_buffer.size;
buffer.offset = input.as_buffer.offset;
}
else
LOG_ERROR(RSX, "UBO was not bound: %s", input.name);
@ -717,7 +718,32 @@ namespace vk
return false;
}
bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer)
bool program::bind_uniform(program_domain domain, std::string uniform_name, VkBuffer _buffer, VkDeviceSize offset, VkDeviceSize size)
{
for (auto &uniform : uniforms)
{
if (uniform.name == uniform_name &&
uniform.domain == domain)
{
if (uniform.as_buffer.buffer != _buffer ||
uniform.as_buffer.size != size ||
uniform.as_buffer.offset != offset)
{
uniform.as_buffer.size = size;
uniform.as_buffer.buffer = _buffer;
uniform.as_buffer.buffer_view = nullptr; //UBOs cannot be viewed!
uniform.as_buffer.offset = offset;
uniforms_changed = true;
}
uniform.type = input_type_uniform_buffer;
return true;
}
}
}
bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer_deprecated &_buffer)
{
for (auto &uniform : uniforms)
{
@ -728,11 +754,13 @@ namespace vk
u64 size = _buffer.size();
if (uniform.as_buffer.buffer != buf ||
uniform.as_buffer.size != size)
uniform.as_buffer.size != size ||
uniform.as_buffer.offset != 0)
{
uniform.as_buffer.size = size;
uniform.as_buffer.buffer = buf;
uniform.as_buffer.buffer_view = nullptr; //UBOs cannot be viewed!
uniform.as_buffer.offset = 0;
uniforms_changed = true;
}
@ -746,7 +774,7 @@ namespace vk
return false;
}
bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer &_buffer, bool is_texel_store)
bool program::bind_uniform(program_domain domain, std::string uniform_name, vk::buffer_deprecated &_buffer, bool is_texel_store)
{
if (!is_texel_store)
{
@ -764,11 +792,13 @@ namespace vk
if (uniform.as_buffer.buffer != buf ||
uniform.as_buffer.buffer_view != view ||
uniform.as_buffer.size != size)
uniform.as_buffer.size != size ||
uniform.as_buffer.offset != 0)
{
uniform.as_buffer.size = size;
uniform.as_buffer.buffer = buf;
uniform.as_buffer.buffer_view = view;
uniform.as_buffer.offset = 0;
if (!view)
throw EXCEPTION("Invalid buffer passed as texel storage");

View File

@ -216,7 +216,7 @@ namespace vk
}
}
std::tuple<VkPrimitiveTopology, bool, u32, VkIndexType>
std::tuple<VkPrimitiveTopology, bool, u32, VkDeviceSize, VkIndexType>
VKGSRender::upload_vertex_data()
{
//initialize vertex attributes
@ -481,6 +481,8 @@ VKGSRender::upload_vertex_data()
VkIndexType index_format = VK_INDEX_TYPE_UINT16;
VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_mode, primitives_emulated);
size_t offset_in_index_buffer = -1;
if (primitives_emulated)
{
//Line loops are line-strips with loop-back; using line-strips-with-adj doesnt work for vulkan
@ -491,7 +493,11 @@ VKGSRender::upload_vertex_data()
if (!is_indexed_draw)
{
index_count = vk::expand_line_loop_array_to_strip(vertex_draw_count, indices);
m_index_buffer.sub_data(0, index_count*sizeof(u16), indices.data());
size_t upload_size = index_count * sizeof(u16);
offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size);
memcpy(buf, indices.data(), upload_size);
m_index_buffer->unmap();
}
else
{
@ -502,12 +508,20 @@ VKGSRender::upload_vertex_data()
std::vector<u32> indices32;
index_count = vk::expand_indexed_line_loop_to_strip(vertex_draw_count, (u32*)vertex_index_array.data(), indices32);
m_index_buffer.sub_data(0, index_count*sizeof(u32), indices32.data());
size_t upload_size = index_count * sizeof(u32);
offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size);
memcpy(buf, indices32.data(), upload_size);
m_index_buffer->unmap();
}
else
{
index_count = vk::expand_indexed_line_loop_to_strip(vertex_draw_count, (u16*)vertex_index_array.data(), indices);
m_index_buffer.sub_data(0, index_count*sizeof(u16), indices.data());
size_t upload_size = index_count * sizeof(u16);
offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size);
memcpy(buf, indices.data(), upload_size);
m_index_buffer->unmap();
}
}
}
@ -532,7 +546,11 @@ VKGSRender::upload_vertex_data()
write_index_array_for_non_indexed_non_native_primitive_to_buffer(reinterpret_cast<char*>(indices.data()), draw_mode, 0, vertex_draw_count);
}
m_index_buffer.sub_data(0, index_count * sizeof(u16), indices.data());
size_t upload_size = index_count * sizeof(u16);
offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size);
memcpy(buf, indices.data(), upload_size);
m_index_buffer->unmap();
}
is_indexed_draw = true;
@ -557,9 +575,12 @@ VKGSRender::upload_vertex_data()
if (index_sz != vertex_draw_count)
LOG_ERROR(RSX, "Vertex draw count mismatch!");
m_index_buffer.sub_data(0, vertex_index_array.size(), vertex_index_array.data());
m_index_buffer.set_format(fmt); //Unnecessary unless viewing contents in sampler...
size_t upload_size = vertex_index_array.size();
offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer->map(offset_in_index_buffer, upload_size);
memcpy(buf, vertex_index_array.data(), upload_size);
m_index_buffer->unmap();
}
return std::make_tuple(prims, is_indexed_draw, index_count, index_format);
return std::make_tuple(prims, is_indexed_draw, index_count, offset_in_index_buffer, index_format);
}