mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 03:32:55 +00:00
rsx/vk: Implement flushing surface cache blocks to linear mem
This commit is contained in:
parent
a71bdc761e
commit
1f9e04f72d
118
rpcs3/Emu/RSX/Common/surface_cache_dma.hpp
Normal file
118
rpcs3/Emu/RSX/Common/surface_cache_dma.hpp
Normal file
@ -0,0 +1,118 @@
|
||||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include "Utilities/address_range.h"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
template <typename Traits, int BlockSize>
|
||||
class surface_cache_dma
|
||||
{
|
||||
protected:
|
||||
static inline u32 block_for(u32 address)
|
||||
{
|
||||
return address / BlockSize;
|
||||
}
|
||||
|
||||
static inline u32 block_address(u32 block_id)
|
||||
{
|
||||
return block_id * BlockSize;
|
||||
}
|
||||
|
||||
using buffer_object_storage_type = typename Traits::buffer_object_storage_type;
|
||||
using buffer_object_type = typename Traits::buffer_object_type;
|
||||
|
||||
struct memory_buffer_entry_t
|
||||
{
|
||||
u32 id;
|
||||
buffer_object_storage_type bo;
|
||||
u64 memory_tag = 0;
|
||||
u32 base_address = 0;
|
||||
|
||||
inline buffer_object_type get() { return Traits::get(bo); }
|
||||
inline operator bool () const { return base_address != 0; }
|
||||
|
||||
inline void release() { bo.release(); }
|
||||
inline void acquire(buffer_object_type b) { bo = b; }
|
||||
};
|
||||
|
||||
using buffer_block_array = typename std::array<memory_buffer_entry_t, 0x100000000ull / BlockSize>;
|
||||
buffer_block_array m_buffer_list;
|
||||
|
||||
public:
|
||||
surface_cache_dma()
|
||||
{
|
||||
for (usz i = 0; i < m_buffer_list.size(); ++i)
|
||||
{
|
||||
m_buffer_list[i].id = i;
|
||||
}
|
||||
}
|
||||
|
||||
surface_cache_dma& with_range(Traits::command_list_type cmd, const utils::address_range& range)
|
||||
{
|
||||
// Prepare underlying memory so that the range specified is provisioned and contiguous
|
||||
// 1. Check if we have a pre-existing bo layer
|
||||
const auto& this_entry = m_buffer_list[block_for(range.start)];
|
||||
if (this_entry)
|
||||
{
|
||||
const auto bo = this_entry.get();
|
||||
const auto buffer_range = utils::address_range::start_length(bo.base_address, ::size32(*bo));
|
||||
|
||||
if (range.inside(buffer_range))
|
||||
{
|
||||
// All is well
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
// Data does not exist or is not contiguous. Merge the layer
|
||||
std::vector<buffer_object_type> bo_list;
|
||||
const auto start_address = this_entry ? this_entry.base_address : block_address(this_entry.id);
|
||||
|
||||
for (u32 address = start_address; address <= range.end;)
|
||||
{
|
||||
auto& bo_storage = m_buffer_list[block_for(address)];
|
||||
bo_storage.base_address = start_address;
|
||||
|
||||
if (auto bo = bo_storage.get())
|
||||
{
|
||||
bo_list.push_back(bo);
|
||||
bo_storage.release();
|
||||
address += ::size32(*bo);
|
||||
continue;
|
||||
}
|
||||
|
||||
bo_list.push_back(nullptr);
|
||||
address += BlockSize;
|
||||
}
|
||||
|
||||
auto unified = Traits::merge_bo_list<BlockSize>(cmd, bo_list);
|
||||
ensure(unified);
|
||||
|
||||
m_buffer_list[block_for(start_address)].acquire(unified);
|
||||
return *this;
|
||||
}
|
||||
|
||||
utils::address_range to_block_range(const utils::address_range& range)
|
||||
{
|
||||
u32 start = block_address(block_for(range.start));
|
||||
u32 end = block_address(block_for(range.end + BlockSize - 1));
|
||||
return utils::address_range::start_end(start, end - 1);
|
||||
}
|
||||
|
||||
std::tuple<buffer_object_type, u32, u64> get(u32 address)
|
||||
{
|
||||
const auto& block = m_buffer_list[block_for(address)];
|
||||
return { block.get(), block.base_address - address };
|
||||
}
|
||||
|
||||
void touch(const utils::address_range& range)
|
||||
{
|
||||
const u64 stamp = rsx::get_shared_tag();
|
||||
for (usz i = block_for(range.start); i <= block_for(range.end); i++)
|
||||
{
|
||||
m_buffer_list[i].memory_tag = stamp;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
#pragma once
|
||||
#include "ranged_map.hpp"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
template <typename Traits, int BlockSize>
|
||||
class surface_cache_data_map : public ranged_map<typename Traits::surface_storage_type, BlockSize>
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
using super = ranged_map<typename Traits::surface_storage_type, BlockSize>;
|
||||
#else
|
||||
using super = class ranged_map<typename Traits::surface_storage_type, BlockSize>;
|
||||
#endif
|
||||
using metadata_t = typename super::block_metadata_t;
|
||||
|
||||
const metadata_t& find_head_block(u32 address)
|
||||
{
|
||||
auto& meta = super::m_metadata[address];
|
||||
if (meta.head_block != umax)
|
||||
{
|
||||
return find_head_block(meta.head_block * BlockSize);
|
||||
}
|
||||
|
||||
return meta;
|
||||
}
|
||||
|
||||
public:
|
||||
using buffer_object_storage_type = typename Traits::buffer_object_storage_type;
|
||||
using buffer_object_type = typename Traits::buffer_object_type;
|
||||
|
||||
struct buffer_object_t
|
||||
{
|
||||
buffer_object_storage_type bo;
|
||||
u64 memory_tag = 0;
|
||||
|
||||
inline buffer_object_type get()
|
||||
{
|
||||
return Traits::get(bo);
|
||||
}
|
||||
|
||||
inline void release()
|
||||
{
|
||||
bo.release();
|
||||
}
|
||||
|
||||
inline void acquire(buffer_object_type obj)
|
||||
{
|
||||
ensure(!get());
|
||||
bo = obj;
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
using buffer_block_array = typename std::array<buffer_object_t, 0x100000000ull / BlockSize>;
|
||||
buffer_block_array m_buffer_list;
|
||||
|
||||
public:
|
||||
surface_cache_data_map()
|
||||
: super::ranged_map()
|
||||
{}
|
||||
|
||||
surface_cache_data_map& with_range(const utils::address_range& range)
|
||||
{
|
||||
// Prepare underlying memory so that the range specified is provisioned and contiguous
|
||||
const auto& head_block = find_head_block(range.start);
|
||||
const auto start_address = block_address(head_block.id);
|
||||
|
||||
const auto& current = m_buffer_list[head_block.id];
|
||||
if (auto bo = current.get())
|
||||
{
|
||||
if (::size32(*bo) >= (range.end - start_address))
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
// Data does not exist or is not contiguous. Merge the layer
|
||||
std::vector<buffer_object_type> bo_list;
|
||||
for (u32 address = start_address; address <= range.end;)
|
||||
{
|
||||
auto& bo_storage = m_buffer_list[super::block_for(address)];
|
||||
if (auto bo = bo_storage.get())
|
||||
{
|
||||
bo_list.push_back(bo);
|
||||
bo_storage.release();
|
||||
address += ::size32(*bo);
|
||||
continue;
|
||||
}
|
||||
|
||||
bo_list.push_back(nullptr);
|
||||
address += BlockSize;
|
||||
}
|
||||
|
||||
auto unified = Traits::merge_bo_list<BlockSize>(bo_list);
|
||||
ensure(unified);
|
||||
|
||||
current.acquire(unified);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void spill(const utils::address_range& range)
|
||||
{
|
||||
// Move VRAM to system RAM
|
||||
const auto& meta = with_range(range).find_head_block(range.start);
|
||||
auto& storage = m_buffer_list[meta.id];
|
||||
Traits::spill_buffer(storage.bo);
|
||||
}
|
||||
|
||||
void unspill(const utils::address_range& range)
|
||||
{
|
||||
// Move system RAM to VRAM
|
||||
const auto& meta = with_range(range).find_head_block(range.start);
|
||||
auto& storage = m_buffer_list[meta.id];
|
||||
Traits::unspill_buffer(storage.bo);
|
||||
}
|
||||
};
|
||||
}
|
@ -2,7 +2,8 @@
|
||||
|
||||
#include "surface_utils.h"
|
||||
#include "simple_array.hpp"
|
||||
#include "surface_cache_storage.hpp"
|
||||
#include "ranged_map.hpp"
|
||||
#include "surface_cache_dma.hpp"
|
||||
#include "../gcm_enums.h"
|
||||
#include "../rsx_utils.h"
|
||||
#include <list>
|
||||
@ -45,7 +46,8 @@ namespace rsx
|
||||
using surface_type = typename Traits::surface_type;
|
||||
using command_list_type = typename Traits::command_list_type;
|
||||
using surface_overlap_info = surface_overlap_info_t<surface_type>;
|
||||
using surface_ranged_map = surface_cache_data_map<Traits, 0x400000>;
|
||||
using surface_ranged_map = ranged_map<surface_storage_type, 0x400000>;
|
||||
using surface_cache_dma_map = surface_cache_dma<Traits, 0x400000>;
|
||||
|
||||
protected:
|
||||
surface_ranged_map m_render_targets_storage = {};
|
||||
@ -54,6 +56,8 @@ namespace rsx
|
||||
rsx::address_range m_render_targets_memory_range;
|
||||
rsx::address_range m_depth_stencil_memory_range;
|
||||
|
||||
surface_cache_dma_map m_dma_block;
|
||||
|
||||
bool m_invalidate_on_write = false;
|
||||
|
||||
rsx::surface_raster_type m_active_raster_type = rsx::surface_raster_type::linear;
|
||||
@ -856,6 +860,94 @@ namespace rsx
|
||||
std::forward<Args>(extra_params)...);
|
||||
}
|
||||
|
||||
std::tuple<std::vector<surface_type>, std::vector<surface_type>>
|
||||
find_overlapping_set(const utils::address_range& range) const
|
||||
{
|
||||
std::vector<surface_type> color_result, depth_result;
|
||||
utils::address_range result_range;
|
||||
|
||||
if (m_render_targets_memory_range.valid() &&
|
||||
range.overlaps(m_render_targets_memory_range))
|
||||
{
|
||||
for (auto it = m_render_targets_storage.begin_range(range); it != m_render_targets_storage.end(); ++it)
|
||||
{
|
||||
auto surface = Traits::get(it->second);
|
||||
const auto surface_range = surface->get_memory_range();
|
||||
if (!range.overlaps(surface_range))
|
||||
continue;
|
||||
|
||||
color_result.push_back(surface);
|
||||
}
|
||||
}
|
||||
|
||||
if (m_depth_stencil_memory_range.valid() &&
|
||||
range.overlaps(m_depth_stencil_memory_range))
|
||||
{
|
||||
for (auto it = m_depth_stencil_storage.begin_range(range); it != m_depth_stencil_storage.end(); ++it)
|
||||
{
|
||||
auto surface = Traits::get(it->second);
|
||||
const auto surface_range = surface->get_memory_range();
|
||||
if (!range.overlaps(surface_range))
|
||||
continue;
|
||||
|
||||
depth_result.push_back(surface);
|
||||
}
|
||||
}
|
||||
|
||||
return { color_result, depth_result, result_range };
|
||||
}
|
||||
|
||||
void write_to_dma_buffers(
|
||||
command_list_type command_list,
|
||||
const utils::address_range& range)
|
||||
{
|
||||
auto block_range = m_dma_block.to_block_range(range);
|
||||
auto [color_data, depth_stencil_data] = find_overlapping_set(block_range);
|
||||
auto [bo, offset, bo_timestamp] = m_dma_block
|
||||
.with_range(command_list, block_range)
|
||||
.get(block_range.start);
|
||||
|
||||
u64 src_offset, dst_offset, write_length;
|
||||
auto block_length = block_range.length();
|
||||
|
||||
auto all_data = std::move(color_data);
|
||||
all_data.insert(all_data.end(), depth_stencil_data.begin(), depth_stencil_data.end());
|
||||
|
||||
if (all_data.size() > 1)
|
||||
{
|
||||
std::sort(all_data.begin(), all_data.end(), [](const auto& a, const auto& b)
|
||||
{
|
||||
return a->last_use_tag < b->last_use_tag;
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto& surface : all_data)
|
||||
{
|
||||
if (surface->last_use_tag <= bo_timestamp)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto this_range = surface->get_memory_range();
|
||||
const auto max_length = this_range.length();
|
||||
if (this_range.start < block_range.start)
|
||||
{
|
||||
src_offset = block_range.start - this_range.start;
|
||||
dst_offset = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
src_offset = 0;
|
||||
dst_offset = this_range.start - block_range.start;
|
||||
}
|
||||
|
||||
write_length = std::min(max_length, block_length - dst_offset);
|
||||
Traits::write_render_target_to_memory(command_list, bo, surface, dst_offset, src_offset, write_length);
|
||||
}
|
||||
|
||||
m_dma_block.touch(block_range);
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Update bound color and depth surface.
|
||||
|
@ -359,7 +359,18 @@ struct gl_render_target_traits
|
||||
}
|
||||
|
||||
static
|
||||
gl::buffer* merge_bo_list(const std::vector<gl::buffer*>& /*list*/)
|
||||
void write_render_target_to_memory(
|
||||
gl::command_context&,
|
||||
gl::buffer*,
|
||||
gl::render_target*,
|
||||
u64, u64, u64)
|
||||
{
|
||||
// TODO
|
||||
}
|
||||
|
||||
template <int BlockSize>
|
||||
static
|
||||
gl::buffer* merge_bo_list(gl::command_context&, const std::vector<gl::buffer*>& /*list*/)
|
||||
{
|
||||
// TODO
|
||||
return nullptr;
|
||||
|
@ -3,6 +3,15 @@
|
||||
|
||||
namespace vk
|
||||
{
|
||||
namespace surface_cache_utils
|
||||
{
|
||||
void dispose(vk::buffer* buf)
|
||||
{
|
||||
auto obj = vk::disposable_t::make(buf);
|
||||
vk::get_resource_manager()->dispose(obj);
|
||||
}
|
||||
}
|
||||
|
||||
void surface_cache::destroy()
|
||||
{
|
||||
invalidate_all();
|
||||
|
@ -16,6 +16,11 @@
|
||||
|
||||
namespace vk
|
||||
{
|
||||
namespace surface_cache_utils
|
||||
{
|
||||
void dispose(vk::buffer* buf);
|
||||
}
|
||||
|
||||
void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
|
||||
void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
|
||||
|
||||
@ -463,10 +468,106 @@ namespace vk
|
||||
// TODO
|
||||
}
|
||||
|
||||
static vk::buffer* merge_bo_list(const std::vector<vk::buffer*>& /*list*/)
|
||||
static void write_render_target_to_memory(
|
||||
vk::command_buffer& cmd,
|
||||
vk::buffer* bo,
|
||||
vk::render_target* surface,
|
||||
u64 dst_offset_in_buffer,
|
||||
u64 src_offset_in_buffer,
|
||||
u64 max_copy_length)
|
||||
{
|
||||
// TODO
|
||||
return nullptr;
|
||||
surface->read_barrier(cmd);
|
||||
vk::image* source = surface->get_surface(rsx::surface_access::transfer_read);
|
||||
const bool is_scaled = surface->width() != surface->surface_width;
|
||||
if (is_scaled)
|
||||
{
|
||||
const areai src_rect = { 0, 0, source->width(), source->height() };
|
||||
const areai dst_rect = { 0, 0, surface->get_surface_width<rsx::surface_metrics::samples>(), surface->get_surface_height<rsx::surface_metrics::samples>() };
|
||||
|
||||
auto scratch = vk::get_typeless_helper(source->format(), source->format_class(), dst_rect.x2, dst_rect.y2);
|
||||
vk::copy_scaled_image(cmd, source, scratch, src_rect, dst_rect, 1, true, VK_FILTER_NEAREST);
|
||||
|
||||
source = scratch;
|
||||
}
|
||||
|
||||
auto dest = bo;
|
||||
const auto transfer_size = surface->get_memory_range().length();
|
||||
if (transfer_size > max_copy_length || src_offset_in_buffer || surface->is_depth_surface())
|
||||
{
|
||||
auto scratch = vk::get_scratch_buffer(cmd, transfer_size * 4);
|
||||
dest = scratch;
|
||||
}
|
||||
|
||||
VkBufferImageCopy region =
|
||||
{
|
||||
.bufferOffset = (dest == bo) ? dst_offset_in_buffer : 0,
|
||||
.bufferRowLength = surface->rsx_pitch / surface->get_bpp(),
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource = { source->aspect(), 0, 0, 1 },
|
||||
.imageOffset = {},
|
||||
.imageExtent = {
|
||||
.width = source->width(),
|
||||
.height = source->height(),
|
||||
.depth = 1
|
||||
}
|
||||
};
|
||||
|
||||
vk::copy_image_to_buffer(cmd, source, dest, region);
|
||||
vk::insert_buffer_memory_barrier(cmd,
|
||||
dest->value, src_offset_in_buffer, max_copy_length,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
if (dest != bo)
|
||||
{
|
||||
VkBufferCopy copy = { src_offset_in_buffer, dst_offset_in_buffer, max_copy_length };
|
||||
vkCmdCopyBuffer(cmd, dest->value, bo->value, 1, ©);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd,
|
||||
bo->value, dst_offset_in_buffer, max_copy_length,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
template <int BlockSize>
|
||||
static vk::buffer* merge_bo_list(vk::command_buffer& cmd, std::vector<vk::buffer*>& list)
|
||||
{
|
||||
u32 required_bo_size = 0;
|
||||
for (auto& bo : list)
|
||||
{
|
||||
required_bo_size += (bo ? bo->size() : BlockSize);
|
||||
}
|
||||
|
||||
// Create dst
|
||||
auto pdev = cmd.get_command_pool().owner;
|
||||
auto dst = new vk::buffer(*pdev,
|
||||
required_bo_size,
|
||||
pdev->get_memory_mapping().device_local, 0,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
0, VMM_ALLOCATION_POOL_SURFACE_CACHE);
|
||||
|
||||
// TODO: Initialize the buffer with system RAM contents
|
||||
|
||||
// Copy all the data over from the sub-blocks
|
||||
u32 offset = 0;
|
||||
for (auto& bo : list)
|
||||
{
|
||||
if (!bo)
|
||||
{
|
||||
offset += BlockSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
VkBufferCopy copy = { 0, offset, ::size32(*bo) };
|
||||
offset += ::size32(*bo);
|
||||
vkCmdCopyBuffer(cmd, bo->value, dst->value, 1, ©);
|
||||
|
||||
// Cleanup
|
||||
vk::surface_cache_utils::dispose(bo);
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -516,7 +516,7 @@
|
||||
<ClInclude Include="Emu\RSX\Common\profiling_timer.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\surface_cache_storage.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\time.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_cursor.h" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_edit_text.hpp" />
|
||||
|
@ -2143,7 +2143,7 @@
|
||||
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp">
|
||||
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Common\surface_cache_storage.hpp">
|
||||
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp">
|
||||
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\CPU\sse2neon.h">
|
||||
|
Loading…
x
Reference in New Issue
Block a user