rsx/vk: Implement flushing surface cache blocks to linear mem

This commit is contained in:
kd-11 2022-08-23 16:34:24 +03:00 committed by kd-11
parent a71bdc761e
commit 1f9e04f72d
8 changed files with 339 additions and 125 deletions

View File

@ -0,0 +1,118 @@
#pragma once
#include <util/types.hpp>
#include "Utilities/address_range.h"
namespace rsx
{
template <typename Traits, int BlockSize>
class surface_cache_dma
{
protected:
static inline u32 block_for(u32 address)
{
return address / BlockSize;
}
static inline u32 block_address(u32 block_id)
{
return block_id * BlockSize;
}
using buffer_object_storage_type = typename Traits::buffer_object_storage_type;
using buffer_object_type = typename Traits::buffer_object_type;
struct memory_buffer_entry_t
{
u32 id;
buffer_object_storage_type bo;
u64 memory_tag = 0;
u32 base_address = 0;
inline buffer_object_type get() { return Traits::get(bo); }
inline operator bool () const { return base_address != 0; }
inline void release() { bo.release(); }
inline void acquire(buffer_object_type b) { bo = b; }
};
using buffer_block_array = typename std::array<memory_buffer_entry_t, 0x100000000ull / BlockSize>;
buffer_block_array m_buffer_list;
public:
surface_cache_dma()
{
for (usz i = 0; i < m_buffer_list.size(); ++i)
{
m_buffer_list[i].id = i;
}
}
surface_cache_dma& with_range(Traits::command_list_type cmd, const utils::address_range& range)
{
// Prepare underlying memory so that the range specified is provisioned and contiguous
// 1. Check if we have a pre-existing bo layer
const auto& this_entry = m_buffer_list[block_for(range.start)];
if (this_entry)
{
const auto bo = this_entry.get();
const auto buffer_range = utils::address_range::start_length(bo.base_address, ::size32(*bo));
if (range.inside(buffer_range))
{
// All is well
return *this;
}
}
// Data does not exist or is not contiguous. Merge the layer
std::vector<buffer_object_type> bo_list;
const auto start_address = this_entry ? this_entry.base_address : block_address(this_entry.id);
for (u32 address = start_address; address <= range.end;)
{
auto& bo_storage = m_buffer_list[block_for(address)];
bo_storage.base_address = start_address;
if (auto bo = bo_storage.get())
{
bo_list.push_back(bo);
bo_storage.release();
address += ::size32(*bo);
continue;
}
bo_list.push_back(nullptr);
address += BlockSize;
}
auto unified = Traits::merge_bo_list<BlockSize>(cmd, bo_list);
ensure(unified);
m_buffer_list[block_for(start_address)].acquire(unified);
return *this;
}
utils::address_range to_block_range(const utils::address_range& range)
{
u32 start = block_address(block_for(range.start));
u32 end = block_address(block_for(range.end + BlockSize - 1));
return utils::address_range::start_end(start, end - 1);
}
std::tuple<buffer_object_type, u32, u64> get(u32 address)
{
const auto& block = m_buffer_list[block_for(address)];
return { block.get(), block.base_address - address };
}
void touch(const utils::address_range& range)
{
const u64 stamp = rsx::get_shared_tag();
for (usz i = block_for(range.start); i <= block_for(range.end); i++)
{
m_buffer_list[i].memory_tag = stamp;
}
}
};
}

View File

@ -1,117 +0,0 @@
#pragma once
#include "ranged_map.hpp"
namespace rsx
{
template <typename Traits, int BlockSize>
class surface_cache_data_map : public ranged_map<typename Traits::surface_storage_type, BlockSize>
{
#ifdef _MSC_VER
using super = ranged_map<typename Traits::surface_storage_type, BlockSize>;
#else
using super = class ranged_map<typename Traits::surface_storage_type, BlockSize>;
#endif
using metadata_t = typename super::block_metadata_t;
const metadata_t& find_head_block(u32 address)
{
auto& meta = super::m_metadata[address];
if (meta.head_block != umax)
{
return find_head_block(meta.head_block * BlockSize);
}
return meta;
}
public:
using buffer_object_storage_type = typename Traits::buffer_object_storage_type;
using buffer_object_type = typename Traits::buffer_object_type;
struct buffer_object_t
{
buffer_object_storage_type bo;
u64 memory_tag = 0;
inline buffer_object_type get()
{
return Traits::get(bo);
}
inline void release()
{
bo.release();
}
inline void acquire(buffer_object_type obj)
{
ensure(!get());
bo = obj;
}
};
protected:
using buffer_block_array = typename std::array<buffer_object_t, 0x100000000ull / BlockSize>;
buffer_block_array m_buffer_list;
public:
surface_cache_data_map()
: super::ranged_map()
{}
surface_cache_data_map& with_range(const utils::address_range& range)
{
// Prepare underlying memory so that the range specified is provisioned and contiguous
const auto& head_block = find_head_block(range.start);
const auto start_address = block_address(head_block.id);
const auto& current = m_buffer_list[head_block.id];
if (auto bo = current.get())
{
if (::size32(*bo) >= (range.end - start_address))
{
return *this;
}
}
// Data does not exist or is not contiguous. Merge the layer
std::vector<buffer_object_type> bo_list;
for (u32 address = start_address; address <= range.end;)
{
auto& bo_storage = m_buffer_list[super::block_for(address)];
if (auto bo = bo_storage.get())
{
bo_list.push_back(bo);
bo_storage.release();
address += ::size32(*bo);
continue;
}
bo_list.push_back(nullptr);
address += BlockSize;
}
auto unified = Traits::merge_bo_list<BlockSize>(bo_list);
ensure(unified);
current.acquire(unified);
return *this;
}
void spill(const utils::address_range& range)
{
// Move VRAM to system RAM
const auto& meta = with_range(range).find_head_block(range.start);
auto& storage = m_buffer_list[meta.id];
Traits::spill_buffer(storage.bo);
}
void unspill(const utils::address_range& range)
{
// Move system RAM to VRAM
const auto& meta = with_range(range).find_head_block(range.start);
auto& storage = m_buffer_list[meta.id];
Traits::unspill_buffer(storage.bo);
}
};
}

View File

@ -2,7 +2,8 @@
#include "surface_utils.h"
#include "simple_array.hpp"
#include "surface_cache_storage.hpp"
#include "ranged_map.hpp"
#include "surface_cache_dma.hpp"
#include "../gcm_enums.h"
#include "../rsx_utils.h"
#include <list>
@ -45,7 +46,8 @@ namespace rsx
using surface_type = typename Traits::surface_type;
using command_list_type = typename Traits::command_list_type;
using surface_overlap_info = surface_overlap_info_t<surface_type>;
using surface_ranged_map = surface_cache_data_map<Traits, 0x400000>;
using surface_ranged_map = ranged_map<surface_storage_type, 0x400000>;
using surface_cache_dma_map = surface_cache_dma<Traits, 0x400000>;
protected:
surface_ranged_map m_render_targets_storage = {};
@ -54,6 +56,8 @@ namespace rsx
rsx::address_range m_render_targets_memory_range;
rsx::address_range m_depth_stencil_memory_range;
surface_cache_dma_map m_dma_block;
bool m_invalidate_on_write = false;
rsx::surface_raster_type m_active_raster_type = rsx::surface_raster_type::linear;
@ -856,6 +860,94 @@ namespace rsx
std::forward<Args>(extra_params)...);
}
std::tuple<std::vector<surface_type>, std::vector<surface_type>>
find_overlapping_set(const utils::address_range& range) const
{
std::vector<surface_type> color_result, depth_result;
utils::address_range result_range;
if (m_render_targets_memory_range.valid() &&
range.overlaps(m_render_targets_memory_range))
{
for (auto it = m_render_targets_storage.begin_range(range); it != m_render_targets_storage.end(); ++it)
{
auto surface = Traits::get(it->second);
const auto surface_range = surface->get_memory_range();
if (!range.overlaps(surface_range))
continue;
color_result.push_back(surface);
}
}
if (m_depth_stencil_memory_range.valid() &&
range.overlaps(m_depth_stencil_memory_range))
{
for (auto it = m_depth_stencil_storage.begin_range(range); it != m_depth_stencil_storage.end(); ++it)
{
auto surface = Traits::get(it->second);
const auto surface_range = surface->get_memory_range();
if (!range.overlaps(surface_range))
continue;
depth_result.push_back(surface);
}
}
return { color_result, depth_result, result_range };
}
void write_to_dma_buffers(
command_list_type command_list,
const utils::address_range& range)
{
auto block_range = m_dma_block.to_block_range(range);
auto [color_data, depth_stencil_data] = find_overlapping_set(block_range);
auto [bo, offset, bo_timestamp] = m_dma_block
.with_range(command_list, block_range)
.get(block_range.start);
u64 src_offset, dst_offset, write_length;
auto block_length = block_range.length();
auto all_data = std::move(color_data);
all_data.insert(all_data.end(), depth_stencil_data.begin(), depth_stencil_data.end());
if (all_data.size() > 1)
{
std::sort(all_data.begin(), all_data.end(), [](const auto& a, const auto& b)
{
return a->last_use_tag < b->last_use_tag;
});
}
for (const auto& surface : all_data)
{
if (surface->last_use_tag <= bo_timestamp)
{
continue;
}
const auto this_range = surface->get_memory_range();
const auto max_length = this_range.length();
if (this_range.start < block_range.start)
{
src_offset = block_range.start - this_range.start;
dst_offset = 0;
}
else
{
src_offset = 0;
dst_offset = this_range.start - block_range.start;
}
write_length = std::min(max_length, block_length - dst_offset);
Traits::write_render_target_to_memory(command_list, bo, surface, dst_offset, src_offset, write_length);
}
m_dma_block.touch(block_range);
}
public:
/**
* Update bound color and depth surface.

View File

@ -359,7 +359,18 @@ struct gl_render_target_traits
}
static
gl::buffer* merge_bo_list(const std::vector<gl::buffer*>& /*list*/)
void write_render_target_to_memory(
gl::command_context&,
gl::buffer*,
gl::render_target*,
u64, u64, u64)
{
// TODO
}
template <int BlockSize>
static
gl::buffer* merge_bo_list(gl::command_context&, const std::vector<gl::buffer*>& /*list*/)
{
// TODO
return nullptr;

View File

@ -3,6 +3,15 @@
namespace vk
{
namespace surface_cache_utils
{
void dispose(vk::buffer* buf)
{
auto obj = vk::disposable_t::make(buf);
vk::get_resource_manager()->dispose(obj);
}
}
void surface_cache::destroy()
{
invalidate_all();

View File

@ -16,6 +16,11 @@
namespace vk
{
namespace surface_cache_utils
{
void dispose(vk::buffer* buf);
}
void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
@ -463,10 +468,106 @@ namespace vk
// TODO
}
static vk::buffer* merge_bo_list(const std::vector<vk::buffer*>& /*list*/)
static void write_render_target_to_memory(
vk::command_buffer& cmd,
vk::buffer* bo,
vk::render_target* surface,
u64 dst_offset_in_buffer,
u64 src_offset_in_buffer,
u64 max_copy_length)
{
// TODO
return nullptr;
surface->read_barrier(cmd);
vk::image* source = surface->get_surface(rsx::surface_access::transfer_read);
const bool is_scaled = surface->width() != surface->surface_width;
if (is_scaled)
{
const areai src_rect = { 0, 0, source->width(), source->height() };
const areai dst_rect = { 0, 0, surface->get_surface_width<rsx::surface_metrics::samples>(), surface->get_surface_height<rsx::surface_metrics::samples>() };
auto scratch = vk::get_typeless_helper(source->format(), source->format_class(), dst_rect.x2, dst_rect.y2);
vk::copy_scaled_image(cmd, source, scratch, src_rect, dst_rect, 1, true, VK_FILTER_NEAREST);
source = scratch;
}
auto dest = bo;
const auto transfer_size = surface->get_memory_range().length();
if (transfer_size > max_copy_length || src_offset_in_buffer || surface->is_depth_surface())
{
auto scratch = vk::get_scratch_buffer(cmd, transfer_size * 4);
dest = scratch;
}
VkBufferImageCopy region =
{
.bufferOffset = (dest == bo) ? dst_offset_in_buffer : 0,
.bufferRowLength = surface->rsx_pitch / surface->get_bpp(),
.bufferImageHeight = 0,
.imageSubresource = { source->aspect(), 0, 0, 1 },
.imageOffset = {},
.imageExtent = {
.width = source->width(),
.height = source->height(),
.depth = 1
}
};
vk::copy_image_to_buffer(cmd, source, dest, region);
vk::insert_buffer_memory_barrier(cmd,
dest->value, src_offset_in_buffer, max_copy_length,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
if (dest != bo)
{
VkBufferCopy copy = { src_offset_in_buffer, dst_offset_in_buffer, max_copy_length };
vkCmdCopyBuffer(cmd, dest->value, bo->value, 1, &copy);
vk::insert_buffer_memory_barrier(cmd,
bo->value, dst_offset_in_buffer, max_copy_length,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
}
}
template <int BlockSize>
static vk::buffer* merge_bo_list(vk::command_buffer& cmd, std::vector<vk::buffer*>& list)
{
u32 required_bo_size = 0;
for (auto& bo : list)
{
required_bo_size += (bo ? bo->size() : BlockSize);
}
// Create dst
auto pdev = cmd.get_command_pool().owner;
auto dst = new vk::buffer(*pdev,
required_bo_size,
pdev->get_memory_mapping().device_local, 0,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
0, VMM_ALLOCATION_POOL_SURFACE_CACHE);
// TODO: Initialize the buffer with system RAM contents
// Copy all the data over from the sub-blocks
u32 offset = 0;
for (auto& bo : list)
{
if (!bo)
{
offset += BlockSize;
continue;
}
VkBufferCopy copy = { 0, offset, ::size32(*bo) };
offset += ::size32(*bo);
vkCmdCopyBuffer(cmd, bo->value, dst->value, 1, &copy);
// Cleanup
vk::surface_cache_utils::dispose(bo);
}
return dst;
}
template <typename T>

View File

@ -516,7 +516,7 @@
<ClInclude Include="Emu\RSX\Common\profiling_timer.hpp" />
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp" />
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
<ClInclude Include="Emu\RSX\Common\surface_cache_storage.hpp" />
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp" />
<ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_cursor.h" />
<ClInclude Include="Emu\RSX\Overlays\overlay_edit_text.hpp" />

View File

@ -2143,7 +2143,7 @@
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\surface_cache_storage.hpp">
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
<ClInclude Include="Emu\CPU\sse2neon.h">