vk: Implement heap aggregation

- Aggregate heaps of same type under one object and allow allocator to try each matching type.
This commit is contained in:
kd-11 2021-07-20 18:05:12 +03:00 committed by kd-11
parent be9be6e5c9
commit 59e7379010
5 changed files with 143 additions and 52 deletions

View File

@ -109,9 +109,21 @@ namespace vk
g_last_completed_event = 0; g_last_completed_event = 0;
} }
u64 vmm_get_application_memory_usage(u32 memory_type) u64 vmm_get_application_memory_usage(const memory_type_info& memory_type)
{ {
return g_vmm_stats.memory_usage[memory_type]; u64 result = 0;
for (const auto& memory_type_index : memory_type)
{
auto it = g_vmm_stats.memory_usage.find(memory_type_index);
if (it == g_vmm_stats.memory_usage.end())
{
continue;
}
result += it->second.observe();
}
return result;
} }
u64 vmm_get_application_pool_usage(vmm_allocation_pool pool) u64 vmm_get_application_pool_usage(vmm_allocation_pool pool)

View File

@ -645,8 +645,6 @@ namespace vk
vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties); vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);
memory_type_mapping result; memory_type_mapping result;
result.device_local = VK_MAX_MEMORY_TYPES;
result.host_visible_coherent = VK_MAX_MEMORY_TYPES;
result.device_local_total_bytes = 0; result.device_local_total_bytes = 0;
result.host_visible_total_bytes = 0; result.host_visible_total_bytes = 0;
bool host_visible_cached = false; bool host_visible_cached = false;
@ -658,11 +656,9 @@ namespace vk
bool is_device_local = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); bool is_device_local = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (is_device_local) if (is_device_local)
{ {
if (result.device_local_total_bytes < heap.size) // Allow multiple device_local heaps
{ result.device_local.push(i);
result.device_local = i; result.device_local_total_bytes += heap.size;
result.device_local_total_bytes = heap.size;
}
} }
bool is_host_visible = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); bool is_host_visible = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
@ -673,6 +669,7 @@ namespace vk
{ {
if ((is_cached && !host_visible_cached) || (result.host_visible_total_bytes < heap.size)) if ((is_cached && !host_visible_cached) || (result.host_visible_total_bytes < heap.size))
{ {
// Allow only a single host_visible heap. It makes no sense to have multiple of these otherwise
result.host_visible_coherent = i; result.host_visible_coherent = i;
result.host_visible_total_bytes = heap.size; result.host_visible_total_bytes = heap.size;
host_visible_cached = is_cached; host_visible_cached = is_cached;
@ -680,9 +677,9 @@ namespace vk
} }
} }
if (result.device_local == VK_MAX_MEMORY_TYPES) if (!result.device_local)
fmt::throw_exception("GPU doesn't support device local memory"); fmt::throw_exception("GPU doesn't support device local memory");
if (result.host_visible_coherent == VK_MAX_MEMORY_TYPES) if (!result.host_visible_coherent)
fmt::throw_exception("GPU doesn't support host coherent device local memory"); fmt::throw_exception("GPU doesn't support host coherent device local memory");
return result; return result;
} }

View File

@ -28,8 +28,8 @@ namespace vk
struct memory_type_mapping struct memory_type_mapping
{ {
u32 host_visible_coherent; memory_type_info host_visible_coherent;
u32 device_local; memory_type_info device_local;
u64 device_local_total_bytes; u64 device_local_total_bytes;
u64 host_visible_total_bytes; u64 host_visible_total_bytes;

View File

@ -13,6 +13,32 @@ namespace
namespace vk namespace vk
{ {
memory_type_info::memory_type_info(u32 index)
: num_entries(0)
{
push(index);
}
void memory_type_info::push(u32 index)
{
ensure(num_entries < pools.size());
pools[num_entries++] = index;
}
memory_type_info::const_iterator memory_type_info::begin() const
{
return pools.data();
}
memory_type_info::const_iterator memory_type_info::end() const
{
return pools.data() + num_entries;
}
memory_type_info::operator bool() const
{
return (num_entries > 0);
}
mem_allocator_vma::mem_allocator_vma(VkDevice dev, VkPhysicalDevice pdev) : mem_allocator_base(dev, pdev) mem_allocator_vma::mem_allocator_vma(VkDevice dev, VkPhysicalDevice pdev) : mem_allocator_base(dev, pdev)
{ {
// Initialize stats pool // Initialize stats pool
@ -33,40 +59,58 @@ namespace vk
vmaDestroyAllocator(m_allocator); vmaDestroyAllocator(m_allocator);
} }
mem_allocator_vk::mem_handle_t mem_allocator_vma::alloc(u64 block_sz, u64 alignment, u32 memory_type_index, vmm_allocation_pool pool) mem_allocator_vk::mem_handle_t mem_allocator_vma::alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool)
{ {
VmaAllocation vma_alloc; VmaAllocation vma_alloc;
VkMemoryRequirements mem_req = {}; VkMemoryRequirements mem_req = {};
VmaAllocationCreateInfo create_info = {}; VmaAllocationCreateInfo create_info = {};
VkResult error_code;
mem_req.memoryTypeBits = 1u << memory_type_index; auto do_vma_alloc = [&]() -> std::tuple<VkResult, u32>
mem_req.size = ::align2(block_sz, alignment);
mem_req.alignment = alignment;
create_info.memoryTypeBits = 1u << memory_type_index;
create_info.flags = m_allocation_flags;
if (VkResult result = vmaAllocateMemory(m_allocator, &mem_req, &create_info, &vma_alloc, nullptr);
result != VK_SUCCESS)
{ {
if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY && for (const auto& memory_type_index : memory_type)
vmm_handle_memory_pressure(rsx::problem_severity::fatal))
{ {
// If we just ran out of VRAM, attempt to release resources and try again mem_req.memoryTypeBits = 1u << memory_type_index;
result = vmaAllocateMemory(m_allocator, &mem_req, &create_info, &vma_alloc, nullptr); mem_req.size = ::align2(block_sz, alignment);
mem_req.alignment = alignment;
create_info.memoryTypeBits = 1u << memory_type_index;
create_info.flags = m_allocation_flags;
error_code = vmaAllocateMemory(m_allocator, &mem_req, &create_info, &vma_alloc, nullptr);
if (error_code == VK_SUCCESS)
{
return { VK_SUCCESS, memory_type_index };
}
} }
if (result != VK_SUCCESS) return { error_code, ~0u };
};
// On successful allocation, simply tag the transaction and carry on.
{
const auto [status, type] = do_vma_alloc();
if (status == VK_SUCCESS)
{ {
die_with_error(result); vmm_notify_memory_allocated(vma_alloc, type, block_sz, pool);
} return vma_alloc;
else
{
rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
} }
} }
vmm_notify_memory_allocated(vma_alloc, memory_type_index, block_sz, pool); if (error_code == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
return vma_alloc; vmm_handle_memory_pressure(rsx::problem_severity::fatal))
{
// Out of memory. Try again.
const auto [status, type] = do_vma_alloc();
if (status == VK_SUCCESS)
{
rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
vmm_notify_memory_allocated(vma_alloc, type, block_sz, pool);
return vma_alloc;
}
}
die_with_error(error_code);
fmt::throw_exception("Unreachable! Error_code=0x%x", static_cast<u32>(error_code));
} }
void mem_allocator_vma::free(mem_handle_t mem_handle) void mem_allocator_vma::free(mem_handle_t mem_handle)
@ -136,34 +180,54 @@ namespace vk
m_allocation_flags = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT; m_allocation_flags = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT;
} }
mem_allocator_vk::mem_handle_t mem_allocator_vk::alloc(u64 block_sz, u64 /*alignment*/, u32 memory_type_index, vmm_allocation_pool pool) mem_allocator_vk::mem_handle_t mem_allocator_vk::alloc(u64 block_sz, u64 /*alignment*/, const memory_type_info& memory_type, vmm_allocation_pool pool)
{ {
VkResult error_code;
VkDeviceMemory memory; VkDeviceMemory memory;
VkMemoryAllocateInfo info = {}; VkMemoryAllocateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
info.allocationSize = block_sz; info.allocationSize = block_sz;
info.memoryTypeIndex = memory_type_index;
if (VkResult result = vkAllocateMemory(m_device, &info, nullptr, &memory); result != VK_SUCCESS) auto do_vk_alloc = [&]() -> std::tuple<VkResult, u32>
{ {
if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY && vmm_handle_memory_pressure(rsx::problem_severity::fatal)) for (const auto& memory_type_index : memory_type)
{ {
// If we just ran out of VRAM, attempt to release resources and try again info.memoryTypeIndex = memory_type_index;
result = vkAllocateMemory(m_device, &info, nullptr, &memory); error_code = vkAllocateMemory(m_device, &info, nullptr, &memory);
if (error_code == VK_SUCCESS)
{
return { error_code, memory_type_index };
}
} }
if (result != VK_SUCCESS) return { error_code, ~0u };
};
{
const auto [status, type] = do_vk_alloc();
if (status == VK_SUCCESS)
{ {
die_with_error(result); vmm_notify_memory_allocated(memory, type, block_sz, pool);
} return memory;
else
{
rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
} }
} }
vmm_notify_memory_allocated(memory, memory_type_index, block_sz, pool); if (error_code == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
return memory; vmm_handle_memory_pressure(rsx::problem_severity::fatal))
{
// Out of memory. Try again.
const auto [status, type] = do_vk_alloc();
if (status == VK_SUCCESS)
{
rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
vmm_notify_memory_allocated(memory, type, block_sz, pool);
return memory;
}
}
die_with_error(error_code);
fmt::throw_exception("Unreachable! Error_code=0x%x", static_cast<u32>(error_code));
} }
void mem_allocator_vk::free(mem_handle_t mem_handle) void mem_allocator_vk::free(mem_handle_t mem_handle)

View File

@ -23,6 +23,24 @@ namespace vk
using namespace vk::vmm_allocation_pool_; using namespace vk::vmm_allocation_pool_;
class memory_type_info
{
std::array<u32, 4> pools;
u32 num_entries = 0;
public:
memory_type_info() = default;
memory_type_info(u32 index);
void push(u32 index);
using iterator = u32*;
using const_iterator = const u32*;
const_iterator begin() const;
const_iterator end() const;
operator bool() const;
};
class mem_allocator_base class mem_allocator_base
{ {
public: public:
@ -33,7 +51,7 @@ namespace vk
virtual void destroy() = 0; virtual void destroy() = 0;
virtual mem_handle_t alloc(u64 block_sz, u64 alignment, u32 memory_type_index, vmm_allocation_pool pool) = 0; virtual mem_handle_t alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool) = 0;
virtual void free(mem_handle_t mem_handle) = 0; virtual void free(mem_handle_t mem_handle) = 0;
virtual void* map(mem_handle_t mem_handle, u64 offset, u64 size) = 0; virtual void* map(mem_handle_t mem_handle, u64 offset, u64 size) = 0;
virtual void unmap(mem_handle_t mem_handle) = 0; virtual void unmap(mem_handle_t mem_handle) = 0;
@ -61,7 +79,7 @@ namespace vk
void destroy() override; void destroy() override;
mem_handle_t alloc(u64 block_sz, u64 alignment, u32 memory_type_index, vmm_allocation_pool pool) override; mem_handle_t alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool) override;
void free(mem_handle_t mem_handle) override; void free(mem_handle_t mem_handle) override;
void* map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override; void* map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override;
@ -90,7 +108,7 @@ namespace vk
void destroy() override {} void destroy() override {}
mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, u32 memory_type_index, vmm_allocation_pool pool) override; mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, const memory_type_info& memory_type, vmm_allocation_pool pool) override;
void free(mem_handle_t mem_handle) override; void free(mem_handle_t mem_handle) override;
void* map(mem_handle_t mem_handle, u64 offset, u64 size) override; void* map(mem_handle_t mem_handle, u64 offset, u64 size) override;
@ -151,7 +169,7 @@ namespace vk
void vmm_notify_memory_freed(void* handle); void vmm_notify_memory_freed(void* handle);
void vmm_reset(); void vmm_reset();
void vmm_check_memory_usage(); void vmm_check_memory_usage();
u64 vmm_get_application_memory_usage(u32 memory_type); u64 vmm_get_application_memory_usage(const memory_type_info& memory_type);
u64 vmm_get_application_pool_usage(vmm_allocation_pool pool); u64 vmm_get_application_pool_usage(vmm_allocation_pool pool);
bool vmm_handle_memory_pressure(rsx::problem_severity severity); bool vmm_handle_memory_pressure(rsx::problem_severity severity);
rsx::problem_severity vmm_determine_memory_load_severity(); rsx::problem_severity vmm_determine_memory_load_severity();