mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-19 12:40:29 +00:00
Texture cache cleanup, refactoring and fixes
This commit is contained in:
parent
8b3d1c2c91
commit
35139ebf5d
File diff suppressed because it is too large
Load Diff
221
rpcs3/Emu/RSX/Common/texture_cache_checker.h
Normal file
221
rpcs3/Emu/RSX/Common/texture_cache_checker.h
Normal file
@ -0,0 +1,221 @@
|
||||
#pragma once
|
||||
|
||||
#include "../rsx_utils.h"
|
||||
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
namespace rsx {
|
||||
|
||||
class tex_cache_checker_t {
|
||||
struct per_page_info_t {
|
||||
u8 prot = 0;
|
||||
u8 no = 0;
|
||||
u8 ro = 0;
|
||||
|
||||
FORCE_INLINE utils::protection get_protection() const
|
||||
{
|
||||
return static_cast<utils::protection>(prot);
|
||||
}
|
||||
|
||||
FORCE_INLINE void set_protection(utils::protection prot)
|
||||
{
|
||||
this->prot = static_cast<u8>(prot);
|
||||
}
|
||||
|
||||
FORCE_INLINE void reset_refcount()
|
||||
{
|
||||
no = 0;
|
||||
ro = 0;
|
||||
}
|
||||
|
||||
FORCE_INLINE u16 sum() const
|
||||
{
|
||||
return u16{ no } + ro;
|
||||
}
|
||||
|
||||
FORCE_INLINE bool verify() const
|
||||
{
|
||||
const utils::protection prot = get_protection();
|
||||
switch (prot)
|
||||
{
|
||||
case utils::protection::no: return no > 0;
|
||||
case utils::protection::ro: return no == 0 && ro > 0;
|
||||
case utils::protection::rw: return no == 0 && ro == 0;
|
||||
default: ASSUME(0);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE void add(utils::protection prot)
|
||||
{
|
||||
switch (prot)
|
||||
{
|
||||
case utils::protection::no: if (no++ == UINT8_MAX) fmt::throw_exception("add(protection::no) overflow with NO==%d", UINT8_MAX); return;
|
||||
case utils::protection::ro: if (ro++ == UINT8_MAX) fmt::throw_exception("add(protection::ro) overflow with RO==%d", UINT8_MAX); return;
|
||||
default: ASSUME(0);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE void remove(utils::protection prot)
|
||||
{
|
||||
switch (prot)
|
||||
{
|
||||
case utils::protection::no: if (no-- == 0) fmt::throw_exception("remove(protection::no) overflow with NO==0"); return;
|
||||
case utils::protection::ro: if (ro-- == 0) fmt::throw_exception("remove(protection::ro) overflow with RO==0"); return;
|
||||
default: ASSUME(0);
|
||||
}
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(per_page_info_t) <= 4, "page_info_elmnt must be less than 4-bytes in size");
|
||||
|
||||
|
||||
// 4GB memory space / 4096 bytes per page = 1048576 pages
|
||||
static constexpr size_t num_pages = 0x1'0000'0000 / 4096;
|
||||
per_page_info_t _info[num_pages];
|
||||
|
||||
static constexpr size_t rsx_address_to_index(u32 address)
|
||||
{
|
||||
return (address / 4096);
|
||||
}
|
||||
|
||||
static constexpr u32 index_to_rsx_address(size_t idx)
|
||||
{
|
||||
return static_cast<u32>(idx * 4096);
|
||||
}
|
||||
|
||||
constexpr per_page_info_t* rsx_address_to_info_pointer(u32 address)
|
||||
{
|
||||
return &(_info[rsx_address_to_index(address)]);
|
||||
}
|
||||
|
||||
constexpr const per_page_info_t* rsx_address_to_info_pointer(u32 address) const
|
||||
{
|
||||
return &(_info[rsx_address_to_index(address)]);
|
||||
}
|
||||
|
||||
constexpr u32 info_pointer_to_address(const per_page_info_t* ptr) const
|
||||
{
|
||||
return index_to_rsx_address(static_cast<size_t>(ptr - _info));
|
||||
}
|
||||
|
||||
std::string prot_to_str(utils::protection prot) const
|
||||
{
|
||||
switch (prot)
|
||||
{
|
||||
case utils::protection::no: return "NA";
|
||||
case utils::protection::ro: return "RO";
|
||||
case utils::protection::rw: return "RW";
|
||||
default: fmt::throw_exception("Unreachable " HERE);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
tex_cache_checker_t()
|
||||
{
|
||||
// Initialize array to all 0
|
||||
memset(&_info, 0, sizeof(_info));
|
||||
}
|
||||
static_assert(static_cast<u32>(utils::protection::rw) == 0, "utils::protection::rw must have value 0 for the above constructor to work");
|
||||
|
||||
void set_protection(const address_range& range, utils::protection prot)
|
||||
{
|
||||
AUDIT(range.is_page_range());
|
||||
AUDIT(prot == utils::protection::no || prot == utils::protection::ro || prot == utils::protection::rw);
|
||||
|
||||
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||
{
|
||||
ptr->set_protection(prot);
|
||||
}
|
||||
}
|
||||
|
||||
void discard(const address_range& range)
|
||||
{
|
||||
set_protection(range, utils::protection::rw);
|
||||
}
|
||||
|
||||
void reset_refcount()
|
||||
{
|
||||
for (per_page_info_t* ptr = rsx_address_to_info_pointer(0); ptr <= rsx_address_to_info_pointer(0xFF'FF'FF'FF); ptr++)
|
||||
{
|
||||
ptr->reset_refcount();
|
||||
}
|
||||
}
|
||||
|
||||
void add(const address_range& range, utils::protection prot)
|
||||
{
|
||||
AUDIT(range.is_page_range());
|
||||
AUDIT(prot == utils::protection::no || prot == utils::protection::ro);
|
||||
|
||||
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||
{
|
||||
ptr->add(prot);
|
||||
}
|
||||
}
|
||||
|
||||
void remove(const address_range& range, utils::protection prot)
|
||||
{
|
||||
AUDIT(range.is_page_range());
|
||||
AUDIT(prot == utils::protection::no || prot == utils::protection::ro);
|
||||
|
||||
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||
{
|
||||
ptr->remove(prot);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the a lower bound as to how many locked sections are known to be within the given range with each protection {NA,RO}
|
||||
// The assumption here is that the page in the given range with the largest number of refcounted sections represents the lower bound to how many there must be
|
||||
std::pair<u8,u8> get_minimum_number_of_sections(const address_range& range) const
|
||||
{
|
||||
AUDIT(range.is_page_range());
|
||||
|
||||
u8 no = 0;
|
||||
u8 ro = 0;
|
||||
for (const per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||
{
|
||||
no = std::max(no, ptr->no);
|
||||
ro = std::max(ro, ptr->ro);
|
||||
}
|
||||
|
||||
return { no,ro };
|
||||
}
|
||||
|
||||
void check_unprotected(const address_range& range, bool allow_ro = false, bool must_be_empty = true) const
|
||||
{
|
||||
AUDIT(range.is_page_range());
|
||||
for (const per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||
{
|
||||
const auto prot = ptr->get_protection();
|
||||
if (prot != utils::protection::rw && (!allow_ro || prot != utils::protection::ro))
|
||||
{
|
||||
const u32 addr = info_pointer_to_address(ptr);
|
||||
fmt::throw_exception("Page at addr=0x%8x should be RW%s: Prot=%s, RO=%d, NA=%d", addr, allow_ro ? " or RO" : "", prot_to_str(prot), ptr->ro, ptr->no);
|
||||
}
|
||||
|
||||
if (must_be_empty && (
|
||||
ptr->no > 0 ||
|
||||
(!allow_ro && ptr->ro > 0)
|
||||
))
|
||||
{
|
||||
const u32 addr = info_pointer_to_address(ptr);
|
||||
fmt::throw_exception("Page at addr=0x%8x should not have any NA%s sections: Prot=%s, RO=%d, NA=%d", addr, allow_ro ? " or RO" : "", prot_to_str(prot), ptr->ro, ptr->no);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void verify() const
|
||||
{
|
||||
for (size_t idx = 0; idx < num_pages; idx++)
|
||||
{
|
||||
auto &elmnt = _info[idx];
|
||||
if (!elmnt.verify())
|
||||
{
|
||||
const u32 addr = index_to_rsx_address(idx);
|
||||
const utils::protection prot = elmnt.get_protection();
|
||||
fmt::throw_exception("Protection verification failed at addr=0x%x: Prot=%s, RO=%d, NA=%d", addr, prot_to_str(prot), elmnt.ro, elmnt.no);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
extern tex_cache_checker_t tex_cache_checker;
|
||||
}; // namespace rsx
|
||||
#endif //TEXTURE_CACHE_DEBUG
|
1544
rpcs3/Emu/RSX/Common/texture_cache_utils.h
Normal file
1544
rpcs3/Emu/RSX/Common/texture_cache_utils.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -1593,12 +1593,12 @@ void GLGSRender::flip(int buffer)
|
||||
gl::screen.bind();
|
||||
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
|
||||
|
||||
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), "RSX Load: " + std::to_string(get_load()) + "%");
|
||||
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), "draw calls: " + std::to_string(m_draw_calls));
|
||||
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), "draw call setup: " + std::to_string(m_begin_time) + "us");
|
||||
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
|
||||
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "textures upload time: " + std::to_string(m_textures_upload_time) + "us");
|
||||
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
|
||||
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load()));
|
||||
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", m_draw_calls));
|
||||
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", m_begin_time));
|
||||
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", m_vertex_upload_time));
|
||||
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", m_textures_upload_time));
|
||||
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", m_draw_time));
|
||||
|
||||
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
|
||||
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||
@ -1606,9 +1606,9 @@ void GLGSRender::flip(int buffer)
|
||||
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
|
||||
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
|
||||
const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
|
||||
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
||||
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
|
||||
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
|
||||
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
|
||||
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
|
||||
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
|
||||
}
|
||||
|
||||
m_frame->flip(m_context);
|
||||
@ -1640,8 +1640,11 @@ void GLGSRender::flip(int buffer)
|
||||
|
||||
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
{
|
||||
bool can_flush = (std::this_thread::get_id() == m_thread_id);
|
||||
auto result = m_gl_texture_cache.invalidate_address(address, is_writing, can_flush);
|
||||
const bool can_flush = (std::this_thread::get_id() == m_thread_id);
|
||||
const rsx::invalidation_cause cause =
|
||||
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
|
||||
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
|
||||
auto result = m_gl_texture_cache.invalidate_address(address, cause);
|
||||
|
||||
if (!result.violation_handled)
|
||||
return false;
|
||||
@ -1664,12 +1667,15 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
return true;
|
||||
}
|
||||
|
||||
void GLGSRender::on_invalidate_memory_range(u32 address_base, u32 size)
|
||||
void GLGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
||||
{
|
||||
//Discard all memory in that range without bothering with writeback (Force it for strict?)
|
||||
if (m_gl_texture_cache.invalidate_range(address_base, size, true, true, false).violation_handled)
|
||||
auto data = std::move(m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap));
|
||||
AUDIT(data.empty());
|
||||
|
||||
if (data.violation_handled)
|
||||
{
|
||||
m_gl_texture_cache.purge_dirty();
|
||||
m_gl_texture_cache.purge_unreleased_sections();
|
||||
{
|
||||
std::lock_guard lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
|
@ -390,7 +390,7 @@ protected:
|
||||
void do_local_task(rsx::FIFO_state state) override;
|
||||
|
||||
bool on_access_violation(u32 address, bool is_writing) override;
|
||||
void on_invalidate_memory_range(u32 address_base, u32 size) override;
|
||||
void on_invalidate_memory_range(const utils::address_range &range) override;
|
||||
void notify_tile_unbound(u32 tile) override;
|
||||
|
||||
std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
|
||||
|
@ -237,8 +237,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||
old_format_found = true;
|
||||
}
|
||||
|
||||
m_gl_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once);
|
||||
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height);
|
||||
const utils::address_range surface_range = m_surface_info[i].get_memory_range();
|
||||
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
||||
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range);
|
||||
}
|
||||
|
||||
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
|
||||
@ -268,8 +269,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||
auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width;
|
||||
auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil;
|
||||
|
||||
m_gl_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once);
|
||||
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height);
|
||||
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
|
||||
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
||||
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range);
|
||||
}
|
||||
|
||||
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
||||
@ -381,8 +383,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||
{
|
||||
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
|
||||
|
||||
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * layout.aa_factors[1];
|
||||
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
|
||||
const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
|
||||
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
|
||||
color_format.format, color_format.type, color_format.swap_bytes);
|
||||
}
|
||||
}
|
||||
@ -392,8 +394,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||
{
|
||||
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
|
||||
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1];
|
||||
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
|
||||
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
|
||||
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
|
||||
depth_format_gl.format, depth_format_gl.type, true);
|
||||
}
|
||||
}
|
||||
@ -448,12 +450,11 @@ void GLGSRender::read_buffers()
|
||||
if (!m_surface_info[i].pitch)
|
||||
continue;
|
||||
|
||||
const u32 range = pitch * height;
|
||||
|
||||
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
|
||||
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
|
||||
|
||||
bool success = m_gl_texture_cache.load_memory_from_cache(texaddr, pitch * height, std::get<1>(m_rtts.m_bound_render_targets[i]));
|
||||
const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height);
|
||||
bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i]));
|
||||
|
||||
//Fall back to slower methods if the image could not be fetched from cache.
|
||||
if (!success)
|
||||
@ -464,7 +465,7 @@ void GLGSRender::read_buffers()
|
||||
}
|
||||
else
|
||||
{
|
||||
m_gl_texture_cache.invalidate_range(texaddr, range, false, false, true);
|
||||
m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::read);
|
||||
|
||||
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
|
||||
color_buffer.read(buffer.get(), width, height, pitch);
|
||||
@ -512,8 +513,9 @@ void GLGSRender::read_buffers()
|
||||
if (!pitch)
|
||||
return;
|
||||
|
||||
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
|
||||
bool in_cache = m_gl_texture_cache.load_memory_from_cache(depth_address, pitch * height, std::get<1>(m_rtts.m_bound_depth_stencil));
|
||||
const u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
|
||||
const utils::address_range range = utils::address_range::start_length(depth_address, pitch * height);
|
||||
bool in_cache = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_depth_stencil));
|
||||
|
||||
if (in_cache)
|
||||
return;
|
||||
|
@ -141,8 +141,10 @@ namespace gl
|
||||
}
|
||||
};
|
||||
|
||||
class cached_texture_section : public rsx::cached_texture_section
|
||||
class cached_texture_section : public rsx::cached_texture_section<gl::cached_texture_section>
|
||||
{
|
||||
using baseclass = rsx::cached_texture_section<gl::cached_texture_section>;
|
||||
|
||||
private:
|
||||
fence m_fence;
|
||||
u32 pbo_id = 0;
|
||||
@ -226,7 +228,7 @@ namespace gl
|
||||
void init_buffer()
|
||||
{
|
||||
const f32 resolution_scale = (context == rsx::texture_upload_context::framebuffer_storage? rsx::get_resolution_scale() : 1.f);
|
||||
const u32 real_buffer_size = (resolution_scale <= 1.f) ? cpu_address_range : (u32)(resolution_scale * resolution_scale * cpu_address_range);
|
||||
const u32 real_buffer_size = (resolution_scale <= 1.f) ? get_section_size() : (u32)(resolution_scale * resolution_scale * get_section_size());
|
||||
const u32 buffer_size = align(real_buffer_size, 4096);
|
||||
|
||||
if (pbo_id)
|
||||
@ -249,13 +251,14 @@ namespace gl
|
||||
}
|
||||
|
||||
public:
|
||||
using baseclass::cached_texture_section;
|
||||
|
||||
void reset(u32 base, u32 size, bool /*flushable*/=false)
|
||||
void reset(const utils::address_range &memory_range)
|
||||
{
|
||||
rsx::cached_texture_section::reset(base, size);
|
||||
|
||||
vram_texture = nullptr;
|
||||
managed_texture.reset();
|
||||
|
||||
baseclass::reset(memory_range);
|
||||
}
|
||||
|
||||
void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only,
|
||||
@ -283,7 +286,7 @@ namespace gl
|
||||
if (rsx_pitch > 0)
|
||||
this->rsx_pitch = rsx_pitch;
|
||||
else
|
||||
this->rsx_pitch = cpu_address_range / height;
|
||||
this->rsx_pitch = get_section_size() / height;
|
||||
|
||||
this->width = w;
|
||||
this->height = h;
|
||||
@ -292,6 +295,9 @@ namespace gl
|
||||
this->mipmaps = mipmaps;
|
||||
|
||||
set_format(gl_format, gl_type, swap_bytes);
|
||||
|
||||
// Notify baseclass
|
||||
baseclass::on_section_resources_created();
|
||||
}
|
||||
|
||||
void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps)
|
||||
@ -307,6 +313,9 @@ namespace gl
|
||||
|
||||
rsx_pitch = 0;
|
||||
real_pitch = 0;
|
||||
|
||||
// Notify baseclass
|
||||
baseclass::on_section_resources_created();
|
||||
}
|
||||
|
||||
void make_flushable()
|
||||
@ -458,11 +467,12 @@ namespace gl
|
||||
bool flush()
|
||||
{
|
||||
if (flushed) return true; //Already written, ignore
|
||||
AUDIT( is_locked() );
|
||||
|
||||
bool result = true;
|
||||
if (!synchronized)
|
||||
{
|
||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
|
||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
|
||||
copy_texture();
|
||||
|
||||
if (!synchronized)
|
||||
@ -480,11 +490,14 @@ namespace gl
|
||||
m_fence.wait_for_signal();
|
||||
flushed = true;
|
||||
|
||||
const auto valid_range = get_confirmed_range();
|
||||
void *dst = get_raw_ptr(valid_range.first, true);
|
||||
const auto valid_range = get_confirmed_range_delta();
|
||||
const u32 valid_offset = valid_range.first;
|
||||
const u32 valid_length = valid_range.second;
|
||||
AUDIT( valid_length > 0 );
|
||||
|
||||
void *dst = get_ptr_by_offset(valid_range.first, true);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
||||
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
|
||||
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_offset, valid_length, GL_MAP_READ_BIT);
|
||||
|
||||
//throw if map failed since we'll segfault anyway
|
||||
verify(HERE), src != nullptr;
|
||||
@ -496,20 +509,20 @@ namespace gl
|
||||
require_manual_shuffle = true;
|
||||
}
|
||||
|
||||
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
|
||||
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
|
||||
{
|
||||
memcpy(dst, src, valid_range.second);
|
||||
memcpy(dst, src, valid_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (valid_range.second % rsx_pitch)
|
||||
if (valid_length % rsx_pitch)
|
||||
{
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
u8 *_src = (u8*)src;
|
||||
u8 *_dst = (u8*)dst;
|
||||
const auto num_rows = valid_range.second / rsx_pitch;
|
||||
const auto num_rows = valid_length / rsx_pitch;
|
||||
for (u32 row = 0; row < num_rows; ++row)
|
||||
{
|
||||
memcpy(_dst, _src, real_pitch);
|
||||
@ -521,7 +534,7 @@ namespace gl
|
||||
if (require_manual_shuffle)
|
||||
{
|
||||
//byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
|
||||
rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, valid_range.second / rsx_pitch);
|
||||
rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, valid_length / rsx_pitch);
|
||||
}
|
||||
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
|
||||
{
|
||||
@ -537,7 +550,7 @@ namespace gl
|
||||
case texture::type::ushort_1_5_5_5_rev:
|
||||
case texture::type::ushort_5_5_5_1:
|
||||
{
|
||||
const u32 num_reps = valid_range.second / 2;
|
||||
const u32 num_reps = valid_length / 2;
|
||||
be_t<u16>* in = (be_t<u16>*)(dst);
|
||||
u16* out = (u16*)dst;
|
||||
|
||||
@ -556,7 +569,7 @@ namespace gl
|
||||
case texture::type::uint_2_10_10_10_rev:
|
||||
case texture::type::uint_8_8_8_8:
|
||||
{
|
||||
u32 num_reps = valid_range.second / 4;
|
||||
u32 num_reps = valid_length / 4;
|
||||
be_t<u32>* in = (be_t<u32>*)(dst);
|
||||
u32* out = (u32*)dst;
|
||||
|
||||
@ -575,7 +588,7 @@ namespace gl
|
||||
}
|
||||
}
|
||||
|
||||
flush_io(valid_range.first, valid_range.second);
|
||||
flush_ptr_by_offset(valid_offset, valid_length);
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
@ -586,13 +599,10 @@ namespace gl
|
||||
|
||||
void destroy()
|
||||
{
|
||||
if (!locked && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty())
|
||||
if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty())
|
||||
//Already destroyed
|
||||
return;
|
||||
|
||||
if (locked)
|
||||
unprotect();
|
||||
|
||||
if (pbo_id == 0)
|
||||
{
|
||||
//Read-only texture, destroy texture memory
|
||||
@ -611,6 +621,13 @@ namespace gl
|
||||
|
||||
if (!m_fence.is_empty())
|
||||
m_fence.destroy();
|
||||
|
||||
baseclass::on_section_resources_destroyed();
|
||||
}
|
||||
|
||||
inline bool exists() const
|
||||
{
|
||||
return (vram_texture != nullptr);
|
||||
}
|
||||
|
||||
texture::format get_format() const
|
||||
@ -618,16 +635,6 @@ namespace gl
|
||||
return format;
|
||||
}
|
||||
|
||||
bool exists() const
|
||||
{
|
||||
return vram_texture != nullptr;
|
||||
}
|
||||
|
||||
bool is_flushable() const
|
||||
{
|
||||
return (protection == utils::protection::no);
|
||||
}
|
||||
|
||||
bool is_flushed() const
|
||||
{
|
||||
return flushed;
|
||||
@ -683,9 +690,10 @@ namespace gl
|
||||
}
|
||||
};
|
||||
|
||||
class texture_cache : public rsx::texture_cache<void*, cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>
|
||||
class texture_cache : public rsx::texture_cache<void*, gl::cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>
|
||||
{
|
||||
private:
|
||||
using baseclass = rsx::texture_cache<void*, gl::cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>;
|
||||
|
||||
struct discardable_storage
|
||||
{
|
||||
@ -717,30 +725,10 @@ namespace gl
|
||||
blitter m_hw_blitter;
|
||||
std::vector<discardable_storage> m_temporary_surfaces;
|
||||
|
||||
cached_texture_section& create_texture(gl::viewable_image* image, u32 texaddr, u32 texsize, u32 w, u32 h, u32 depth, u32 mipmaps)
|
||||
{
|
||||
cached_texture_section& tex = find_cached_texture(texaddr, texsize, true, w, h, depth);
|
||||
tex.reset(texaddr, texsize, false);
|
||||
tex.create_read_only(image, w, h, depth, mipmaps);
|
||||
read_only_range = tex.get_min_max(read_only_range);
|
||||
return tex;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
for (auto &address_range : m_cache)
|
||||
{
|
||||
auto &range_data = address_range.second;
|
||||
for (auto &tex : range_data.data)
|
||||
{
|
||||
tex.destroy();
|
||||
}
|
||||
|
||||
range_data.data.resize(0);
|
||||
}
|
||||
|
||||
baseclass::clear();
|
||||
clear_temporary_subresources();
|
||||
m_unreleased_texture_objects = 0;
|
||||
}
|
||||
|
||||
void clear_temporary_subresources()
|
||||
@ -850,11 +838,6 @@ namespace gl
|
||||
|
||||
protected:
|
||||
|
||||
void free_texture_section(cached_texture_section& tex) override
|
||||
{
|
||||
tex.destroy();
|
||||
}
|
||||
|
||||
gl::texture_view* create_temporary_subresource_view(void*&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
|
||||
const texture_channel_remap_t& remap_vector) override
|
||||
{
|
||||
@ -946,7 +929,7 @@ namespace gl
|
||||
dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
|
||||
}
|
||||
|
||||
cached_texture_section* create_new_texture(void*&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
|
||||
cached_texture_section* create_new_texture(void*&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
|
||||
rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
|
||||
{
|
||||
auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type);
|
||||
@ -954,15 +937,23 @@ namespace gl
|
||||
const auto swizzle = get_component_mapping(gcm_format, flags);
|
||||
image->set_native_component_layout(swizzle);
|
||||
|
||||
auto& cached = create_texture(image, rsx_address, rsx_size, width, height, depth, mipmaps);
|
||||
cached.set_dirty(false);
|
||||
auto& cached = *find_cached_texture(rsx_range, true, true, width, width, depth, mipmaps);
|
||||
ASSERT(!cached.is_locked());
|
||||
|
||||
// Prepare section
|
||||
cached.reset(rsx_range);
|
||||
cached.set_view_flags(flags);
|
||||
cached.set_context(context);
|
||||
cached.set_gcm_format(gcm_format);
|
||||
cached.set_image_type(type);
|
||||
cached.set_gcm_format(gcm_format);
|
||||
|
||||
cached.create_read_only(image, width, height, depth, mipmaps);
|
||||
cached.set_dirty(false);
|
||||
|
||||
if (context != rsx::texture_upload_context::blit_engine_dst)
|
||||
{
|
||||
AUDIT( cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always );
|
||||
read_only_range = cached.get_min_max(read_only_range, rsx::section_bounds::locked_range); // TODO ruipin: This was outside the if, but is inside the if in Vulkan. Ask kd-11
|
||||
cached.protect(utils::protection::ro);
|
||||
}
|
||||
else
|
||||
@ -998,8 +989,8 @@ namespace gl
|
||||
|
||||
//NOTE: Protection is handled by the caller
|
||||
cached.make_flushable();
|
||||
cached.set_dimensions(width, height, depth, (rsx_size / height));
|
||||
no_access_range = cached.get_min_max(no_access_range);
|
||||
cached.set_dimensions(width, height, depth, (rsx_range.length() / height));
|
||||
no_access_range = cached.get_min_max(no_access_range, rsx::section_bounds::locked_range);
|
||||
}
|
||||
|
||||
update_cache_tag();
|
||||
@ -1010,7 +1001,8 @@ namespace gl
|
||||
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override
|
||||
{
|
||||
void* unused = nullptr;
|
||||
auto section = create_new_texture(unused, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, context, type,
|
||||
const utils::address_range rsx_range = utils::address_range::start_length(rsx_address, pitch * height);
|
||||
auto section = create_new_texture(unused, rsx_range, width, height, depth, mipmaps, gcm_format, context, type,
|
||||
rsx::texture_create_flags::default_component_order);
|
||||
|
||||
gl::upload_texture(section->get_raw_texture()->id(), rsx_address, gcm_format, width, height, depth, mipmaps,
|
||||
@ -1082,9 +1074,7 @@ namespace gl
|
||||
|
||||
public:
|
||||
|
||||
texture_cache() {}
|
||||
|
||||
~texture_cache() {}
|
||||
using baseclass::texture_cache;
|
||||
|
||||
void initialize()
|
||||
{
|
||||
@ -1103,19 +1093,17 @@ namespace gl
|
||||
{
|
||||
reader_lock lock(m_cache_mutex);
|
||||
|
||||
auto found = m_cache.find(get_block_address(rsx_address));
|
||||
if (found == m_cache.end())
|
||||
auto &block = m_storage.block_for(rsx_address);
|
||||
|
||||
if (block.get_locked_count() == 0)
|
||||
return false;
|
||||
|
||||
//if (found->second.valid_count == 0)
|
||||
//return false;
|
||||
|
||||
for (auto& tex : found->second.data)
|
||||
for (auto& tex : block)
|
||||
{
|
||||
if (tex.is_dirty())
|
||||
continue;
|
||||
|
||||
if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range))
|
||||
if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range))
|
||||
continue;
|
||||
|
||||
if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
|
||||
@ -1127,9 +1115,9 @@ namespace gl
|
||||
|
||||
void on_frame_end() override
|
||||
{
|
||||
if (m_unreleased_texture_objects >= m_max_zombie_objects)
|
||||
if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects)
|
||||
{
|
||||
purge_dirty();
|
||||
purge_unreleased_sections();
|
||||
}
|
||||
|
||||
clear_temporary_subresources();
|
||||
@ -1158,7 +1146,7 @@ namespace gl
|
||||
gl::texture::format::depth_stencil : gl::texture::format::depth;
|
||||
}
|
||||
|
||||
flush_if_cache_miss_likely(fmt, result.real_dst_address, result.real_dst_size);
|
||||
flush_if_cache_miss_likely(fmt, result.to_address_range());
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -1350,34 +1350,12 @@ namespace rsx
|
||||
{
|
||||
if (!in_begin_end && state != FIFO_state::lock_wait)
|
||||
{
|
||||
if (!m_invalidated_memory_ranges.empty())
|
||||
reader_lock lock(m_mtx_task);
|
||||
|
||||
if (m_invalidated_memory_range.valid())
|
||||
{
|
||||
std::lock_guard lock(m_mtx_task);
|
||||
|
||||
for (const auto& range : m_invalidated_memory_ranges)
|
||||
{
|
||||
on_invalidate_memory_range(range.first, range.second);
|
||||
|
||||
// Clean the main memory super_ptr cache if invalidated
|
||||
const auto range_end = range.first + range.second;
|
||||
for (auto It = main_super_memory_block.begin(); It != main_super_memory_block.end();)
|
||||
{
|
||||
const auto mem_start = It->first;
|
||||
const auto mem_end = mem_start + It->second.size();
|
||||
const bool overlaps = (mem_start < range_end && range.first < mem_end);
|
||||
|
||||
if (overlaps)
|
||||
{
|
||||
It = main_super_memory_block.erase(It);
|
||||
}
|
||||
else
|
||||
{
|
||||
It++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_invalidated_memory_ranges.clear();
|
||||
lock.upgrade();
|
||||
handle_invalidated_memory_range();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2676,15 +2654,32 @@ namespace rsx
|
||||
|
||||
void thread::on_notify_memory_mapped(u32 address, u32 size)
|
||||
{
|
||||
// TODO
|
||||
// In the case where an unmap is followed shortly after by a remap of the same address space
|
||||
// we must block until RSX has invalidated the memory
|
||||
// or lock m_mtx_task and do it ourselves
|
||||
|
||||
if (m_rsx_thread_exiting)
|
||||
return;
|
||||
|
||||
reader_lock lock(m_mtx_task);
|
||||
|
||||
const auto map_range = address_range::start_length(address, size);
|
||||
|
||||
if (!m_invalidated_memory_range.valid())
|
||||
return;
|
||||
|
||||
if (m_invalidated_memory_range.overlaps(map_range))
|
||||
{
|
||||
lock.upgrade();
|
||||
handle_invalidated_memory_range();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void thread::on_notify_memory_unmapped(u32 base_address, u32 size)
|
||||
void thread::on_notify_memory_unmapped(u32 address, u32 size)
|
||||
{
|
||||
if (!m_rsx_thread_exiting && base_address < 0xC0000000)
|
||||
if (!m_rsx_thread_exiting && address < 0xC0000000)
|
||||
{
|
||||
u32 ea = base_address >> 20, io = RSXIOMem.io[ea];
|
||||
u32 ea = address >> 20, io = RSXIOMem.io[ea];
|
||||
|
||||
if (io < 512)
|
||||
{
|
||||
@ -2704,11 +2699,56 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
// Queue up memory invalidation
|
||||
std::lock_guard lock(m_mtx_task);
|
||||
m_invalidated_memory_ranges.push_back({ base_address, size });
|
||||
const bool existing_range_valid = m_invalidated_memory_range.valid();
|
||||
const auto unmap_range = address_range::start_length(address, size);
|
||||
|
||||
if (existing_range_valid && m_invalidated_memory_range.touches(unmap_range))
|
||||
{
|
||||
// Merge range-to-invalidate in case of consecutive unmaps
|
||||
m_invalidated_memory_range.set_min_max(unmap_range);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (existing_range_valid)
|
||||
{
|
||||
// We can only delay consecutive unmaps.
|
||||
// Otherwise, to avoid VirtualProtect failures, we need to do the invalidation here
|
||||
handle_invalidated_memory_range();
|
||||
}
|
||||
|
||||
m_invalidated_memory_range = unmap_range;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: m_mtx_task lock must be acquired before calling this method
|
||||
void thread::handle_invalidated_memory_range()
|
||||
{
|
||||
if (!m_invalidated_memory_range.valid())
|
||||
return;
|
||||
|
||||
on_invalidate_memory_range(m_invalidated_memory_range);
|
||||
|
||||
// Clean the main memory super_ptr cache if invalidated
|
||||
for (auto It = main_super_memory_block.begin(); It != main_super_memory_block.end();)
|
||||
{
|
||||
const auto block_range = address_range::start_length(It->first, It->second.size());
|
||||
|
||||
if (m_invalidated_memory_range.overlaps(block_range))
|
||||
{
|
||||
It = main_super_memory_block.erase(It);
|
||||
}
|
||||
else
|
||||
{
|
||||
It++;
|
||||
}
|
||||
}
|
||||
|
||||
m_invalidated_memory_range.invalidate();
|
||||
}
|
||||
|
||||
//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
|
||||
void thread::pause()
|
||||
{
|
||||
|
@ -347,7 +347,7 @@ namespace rsx
|
||||
std::shared_ptr<rsx::overlays::display_manager> m_overlay_manager;
|
||||
|
||||
// Invalidated memory range
|
||||
std::vector<std::pair<u32, u32>> m_invalidated_memory_ranges;
|
||||
address_range m_invalidated_memory_range;
|
||||
|
||||
public:
|
||||
RsxDmaControl* ctrl = nullptr;
|
||||
@ -468,6 +468,8 @@ namespace rsx
|
||||
thread();
|
||||
virtual ~thread();
|
||||
|
||||
void handle_invalidated_memory_range();
|
||||
|
||||
virtual void on_task() override;
|
||||
virtual void on_exit() override;
|
||||
|
||||
@ -495,7 +497,7 @@ namespace rsx
|
||||
virtual void flip(int buffer) = 0;
|
||||
virtual u64 timestamp();
|
||||
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
|
||||
virtual void on_invalidate_memory_range(u32 /*address*/, u32 /*range*/) {}
|
||||
virtual void on_invalidate_memory_range(const address_range & /*range*/) {}
|
||||
virtual void notify_tile_unbound(u32 /*tile*/) {}
|
||||
|
||||
// zcull
|
||||
|
@ -812,7 +812,9 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
vk::texture_cache::thrashed_set result;
|
||||
{
|
||||
std::lock_guard lock(m_secondary_cb_guard);
|
||||
result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
|
||||
|
||||
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
|
||||
result = std::move(m_texture_cache.invalidate_address(address, cause, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
|
||||
}
|
||||
|
||||
if (!result.violation_handled)
|
||||
@ -893,13 +895,16 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
return false;
|
||||
}
|
||||
|
||||
void VKGSRender::on_invalidate_memory_range(u32 address_base, u32 size)
|
||||
void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
||||
{
|
||||
std::lock_guard lock(m_secondary_cb_guard);
|
||||
if (m_texture_cache.invalidate_range(address_base, size, true, true, false,
|
||||
m_secondary_command_buffer, m_swapchain->get_graphics_queue()).violation_handled)
|
||||
|
||||
auto data = std::move(m_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
|
||||
AUDIT(data.empty());
|
||||
|
||||
if (data.violation_handled)
|
||||
{
|
||||
m_texture_cache.purge_dirty();
|
||||
m_texture_cache.purge_unreleased_sections();
|
||||
{
|
||||
std::lock_guard lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
@ -2625,9 +2630,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||
if (old_format == VK_FORMAT_UNDEFINED)
|
||||
old_format = vk::get_compatible_surface_format(m_surface_info[i].color_format).first;
|
||||
|
||||
m_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once);
|
||||
m_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height,
|
||||
*m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
|
||||
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
|
||||
m_texture_cache.flush_if_cache_miss_likely(old_format, rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||
}
|
||||
|
||||
m_surface_info[i].address = m_surface_info[i].pitch = 0;
|
||||
@ -2641,9 +2646,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
|
||||
{
|
||||
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
|
||||
m_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once);
|
||||
m_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height,
|
||||
*m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
|
||||
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
||||
m_texture_cache.flush_if_cache_miss_likely(old_format, surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||
}
|
||||
|
||||
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
|
||||
@ -2697,8 +2702,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||
{
|
||||
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
||||
|
||||
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * layout.aa_factors[1];
|
||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
|
||||
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
|
||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
|
||||
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second);
|
||||
}
|
||||
}
|
||||
@ -2708,8 +2713,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||
{
|
||||
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
||||
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1];
|
||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
|
||||
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
|
||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
|
||||
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false);
|
||||
}
|
||||
}
|
||||
@ -3129,13 +3134,13 @@ void VKGSRender::flip(int buffer)
|
||||
|
||||
if (g_cfg.video.overlay)
|
||||
{
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "RSX Load: " + std::to_string(get_load()) + "%");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "texture upload time: " + std::to_string(m_textures_upload_time) + "us");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", m_draw_calls));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", m_setup_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", m_vertex_upload_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", m_textures_upload_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", m_draw_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", m_flip_time));
|
||||
|
||||
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
|
||||
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||
@ -3144,10 +3149,10 @@ void VKGSRender::flip(int buffer)
|
||||
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
|
||||
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
|
||||
const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
|
||||
}
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres);
|
||||
|
@ -433,7 +433,7 @@ protected:
|
||||
void notify_tile_unbound(u32 tile) override;
|
||||
|
||||
bool on_access_violation(u32 address, bool is_writing) override;
|
||||
void on_invalidate_memory_range(u32 address_base, u32 size) override;
|
||||
void on_invalidate_memory_range(const utils::address_range &range) override;
|
||||
|
||||
bool on_decompiler_task() override;
|
||||
};
|
||||
|
@ -13,8 +13,10 @@ extern u64 get_system_time();
|
||||
|
||||
namespace vk
|
||||
{
|
||||
class cached_texture_section : public rsx::cached_texture_section
|
||||
class cached_texture_section : public rsx::cached_texture_section<vk::cached_texture_section>
|
||||
{
|
||||
using baseclass = typename rsx::cached_texture_section<vk::cached_texture_section>;
|
||||
|
||||
std::unique_ptr<vk::viewable_image> managed_texture = nullptr;
|
||||
|
||||
//DMA relevant data
|
||||
@ -24,15 +26,14 @@ namespace vk
|
||||
std::unique_ptr<vk::buffer> dma_buffer;
|
||||
|
||||
public:
|
||||
using baseclass::cached_texture_section;
|
||||
|
||||
cached_texture_section() {}
|
||||
|
||||
void reset(u32 base, u32 length)
|
||||
void reset(const utils::address_range &memory_range)
|
||||
{
|
||||
if (length > cpu_address_range)
|
||||
if (memory_range.length() > get_section_size())
|
||||
release_dma_resources();
|
||||
|
||||
rsx::cached_texture_section::reset(base, length);
|
||||
baseclass::reset(memory_range);
|
||||
}
|
||||
|
||||
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false)
|
||||
@ -56,13 +57,16 @@ namespace vk
|
||||
if (rsx_pitch > 0)
|
||||
this->rsx_pitch = rsx_pitch;
|
||||
else
|
||||
this->rsx_pitch = cpu_address_range / height;
|
||||
this->rsx_pitch = get_section_size() / height;
|
||||
|
||||
//Even if we are managing the same vram section, we cannot guarantee contents are static
|
||||
//The create method is only invoked when a new mangaged session is required
|
||||
//The create method is only invoked when a new managed session is required
|
||||
synchronized = false;
|
||||
flushed = false;
|
||||
sync_timestamp = 0ull;
|
||||
|
||||
// Notify baseclass
|
||||
baseclass::on_section_resources_created();
|
||||
}
|
||||
|
||||
void release_dma_resources()
|
||||
@ -81,11 +85,14 @@ namespace vk
|
||||
|
||||
void destroy()
|
||||
{
|
||||
m_tex_cache->on_section_destroyed(*this);
|
||||
vram_texture = nullptr;
|
||||
release_dma_resources();
|
||||
|
||||
baseclass::on_section_resources_destroyed();
|
||||
}
|
||||
|
||||
bool exists() const
|
||||
inline bool exists() const
|
||||
{
|
||||
return (vram_texture != nullptr);
|
||||
}
|
||||
@ -115,12 +122,6 @@ namespace vk
|
||||
return vram_texture->info.format;
|
||||
}
|
||||
|
||||
bool is_flushable() const
|
||||
{
|
||||
//This section is active and can be flushed to cpu
|
||||
return (protection == utils::protection::no);
|
||||
}
|
||||
|
||||
bool is_flushed() const
|
||||
{
|
||||
//This memory section was flushable, but a flush has already removed protection
|
||||
@ -144,7 +145,7 @@ namespace vk
|
||||
if (dma_buffer.get() == nullptr)
|
||||
{
|
||||
auto memory_type = m_device->get_memory_mapping().host_visible_coherent;
|
||||
dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
|
||||
dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
|
||||
}
|
||||
|
||||
if (manage_cb_lifetime)
|
||||
@ -246,18 +247,18 @@ namespace vk
|
||||
{
|
||||
verify (HERE), mem_target->value != dma_buffer->value;
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, cpu_address_range,
|
||||
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
shuffle_kernel->run(cmd, mem_target, cpu_address_range);
|
||||
shuffle_kernel->run(cmd, mem_target, get_section_size());
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, cpu_address_range,
|
||||
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(),
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
VkBufferCopy copy = {};
|
||||
copy.size = cpu_address_range;
|
||||
copy.size = get_section_size();
|
||||
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, ©);
|
||||
}
|
||||
|
||||
@ -282,6 +283,7 @@ namespace vk
|
||||
bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
|
||||
{
|
||||
if (flushed) return true;
|
||||
AUDIT( is_locked() );
|
||||
|
||||
if (m_device == nullptr)
|
||||
{
|
||||
@ -293,7 +295,7 @@ namespace vk
|
||||
|
||||
if (!synchronized)
|
||||
{
|
||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
|
||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
|
||||
copy_texture(true, cmd, submit_queue);
|
||||
result = false;
|
||||
}
|
||||
@ -301,22 +303,26 @@ namespace vk
|
||||
verify(HERE), real_pitch > 0;
|
||||
flushed = true;
|
||||
|
||||
const auto valid_range = get_confirmed_range();
|
||||
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
|
||||
void* pixels_dst = get_raw_ptr(valid_range.first, true);
|
||||
const auto valid_range = get_confirmed_range_delta();
|
||||
const u32 valid_offset = valid_range.first;
|
||||
const u32 valid_length = valid_range.second;
|
||||
AUDIT( valid_length > 0 );
|
||||
|
||||
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
|
||||
void* pixels_src = dma_buffer->map(valid_offset, valid_length);
|
||||
void* pixels_dst = get_ptr_by_offset(valid_offset, true);
|
||||
|
||||
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
|
||||
{
|
||||
memcpy(pixels_dst, pixels_src, valid_range.second);
|
||||
memcpy(pixels_dst, pixels_src, valid_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (valid_range.second % rsx_pitch)
|
||||
if (valid_length % rsx_pitch)
|
||||
{
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
const u32 num_rows = valid_range.second / rsx_pitch;
|
||||
const u32 num_rows = valid_length / rsx_pitch;
|
||||
auto _src = (u8*)pixels_src;
|
||||
auto _dst = (u8*)pixels_dst;
|
||||
|
||||
@ -328,7 +334,7 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
flush_io(valid_range.first, valid_range.second);
|
||||
flush_ptr_by_offset(valid_offset, valid_length);
|
||||
dma_buffer->unmap();
|
||||
reset_write_statistics();
|
||||
|
||||
@ -405,9 +411,18 @@ namespace vk
|
||||
}
|
||||
};
|
||||
|
||||
class texture_cache : public rsx::texture_cache<vk::command_buffer, cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>
|
||||
class texture_cache : public rsx::texture_cache<vk::command_buffer, vk::cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>
|
||||
{
|
||||
public:
|
||||
virtual void on_section_destroyed(cached_texture_section& tex)
|
||||
{
|
||||
m_discarded_memory_size += tex.get_section_size();
|
||||
m_discardable_storage.push_back(tex);
|
||||
}
|
||||
|
||||
private:
|
||||
using baseclass = rsx::texture_cache<vk::command_buffer, vk::cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>;
|
||||
|
||||
//Vulkan internals
|
||||
vk::render_device* m_device;
|
||||
vk::memory_type_mapping m_memory_types;
|
||||
@ -419,30 +434,11 @@ namespace vk
|
||||
std::list<discarded_storage> m_discardable_storage;
|
||||
std::atomic<u32> m_discarded_memory_size = { 0 };
|
||||
|
||||
void purge_cache()
|
||||
void clear()
|
||||
{
|
||||
for (auto &address_range : m_cache)
|
||||
{
|
||||
auto &range_data = address_range.second;
|
||||
for (auto &tex : range_data.data)
|
||||
{
|
||||
if (tex.exists())
|
||||
{
|
||||
m_discardable_storage.push_back(tex);
|
||||
}
|
||||
|
||||
if (tex.is_locked())
|
||||
tex.unprotect();
|
||||
|
||||
tex.release_dma_resources();
|
||||
}
|
||||
|
||||
range_data.data.resize(0);
|
||||
}
|
||||
baseclass::clear();
|
||||
|
||||
m_discardable_storage.clear();
|
||||
m_unreleased_texture_objects = 0;
|
||||
m_texture_memory_in_use = 0;
|
||||
m_discarded_memory_size = 0;
|
||||
}
|
||||
|
||||
@ -486,14 +482,6 @@ namespace vk
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
void free_texture_section(cached_texture_section& tex) override
|
||||
{
|
||||
m_discarded_memory_size += tex.get_section_size();
|
||||
m_discardable_storage.push_back(tex);
|
||||
tex.destroy();
|
||||
}
|
||||
|
||||
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,
|
||||
u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy)
|
||||
{
|
||||
@ -776,7 +764,7 @@ namespace vk
|
||||
vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
|
||||
}
|
||||
|
||||
cached_texture_section* create_new_texture(vk::command_buffer& cmd, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
|
||||
cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
|
||||
rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
|
||||
{
|
||||
const u16 section_depth = depth;
|
||||
@ -846,26 +834,30 @@ namespace vk
|
||||
|
||||
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_flags, 0, mipmaps, 0, layer });
|
||||
|
||||
cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, section_depth);
|
||||
region.reset(rsx_address, rsx_size);
|
||||
region.create(width, height, section_depth, mipmaps, image, 0, true, gcm_format);
|
||||
region.set_dirty(false);
|
||||
cached_texture_section& region = *find_cached_texture(rsx_range, true, true, width, height, section_depth);
|
||||
ASSERT(!region.is_locked());
|
||||
|
||||
// New section, we must prepare it
|
||||
region.reset(rsx_range);
|
||||
region.set_context(context);
|
||||
region.set_gcm_format(gcm_format);
|
||||
region.set_image_type(type);
|
||||
|
||||
region.create(width, height, section_depth, mipmaps, image, 0, true, gcm_format);
|
||||
region.set_dirty(false);
|
||||
|
||||
//Its not necessary to lock blit dst textures as they are just reused as necessary
|
||||
if (context != rsx::texture_upload_context::blit_engine_dst)
|
||||
{
|
||||
region.protect(utils::protection::ro);
|
||||
read_only_range = region.get_min_max(read_only_range);
|
||||
read_only_range = region.get_min_max(read_only_range, rsx::section_bounds::locked_range);
|
||||
}
|
||||
else
|
||||
{
|
||||
//TODO: Confirm byte swap patterns
|
||||
//NOTE: Protection is handled by the caller
|
||||
region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
no_access_range = region.get_min_max(no_access_range);
|
||||
no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range);
|
||||
}
|
||||
|
||||
update_cache_tag();
|
||||
@ -875,7 +867,8 @@ namespace vk
|
||||
cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format,
|
||||
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override
|
||||
{
|
||||
auto section = create_new_texture(cmd, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, context, type,
|
||||
const utils::address_range rsx_range = utils::address_range::start_length(rsx_address, pitch * height);
|
||||
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, gcm_format, context, type,
|
||||
rsx::texture_create_flags::default_component_order);
|
||||
|
||||
auto image = section->get_raw_texture();
|
||||
@ -962,6 +955,7 @@ namespace vk
|
||||
}
|
||||
|
||||
public:
|
||||
using baseclass::texture_cache;
|
||||
|
||||
void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap)
|
||||
{
|
||||
@ -974,26 +968,24 @@ namespace vk
|
||||
|
||||
void destroy() override
|
||||
{
|
||||
purge_cache();
|
||||
clear();
|
||||
}
|
||||
|
||||
bool is_depth_texture(u32 rsx_address, u32 rsx_size) override
|
||||
{
|
||||
reader_lock lock(m_cache_mutex);
|
||||
|
||||
auto found = m_cache.find(get_block_address(rsx_address));
|
||||
if (found == m_cache.end())
|
||||
auto &block = m_storage.block_for(rsx_address);
|
||||
|
||||
if (block.get_locked_count() == 0)
|
||||
return false;
|
||||
|
||||
//if (found->second.valid_count == 0)
|
||||
//return false;
|
||||
|
||||
for (auto& tex : found->second.data)
|
||||
for (auto& tex : block)
|
||||
{
|
||||
if (tex.is_dirty())
|
||||
continue;
|
||||
|
||||
if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range))
|
||||
if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range))
|
||||
continue;
|
||||
|
||||
if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
|
||||
@ -1016,10 +1008,10 @@ namespace vk
|
||||
|
||||
void on_frame_end() override
|
||||
{
|
||||
if (m_unreleased_texture_objects >= m_max_zombie_objects ||
|
||||
if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects ||
|
||||
m_discarded_memory_size > 0x4000000) //If already holding over 64M in discardable memory, be frugal with memory resources
|
||||
{
|
||||
purge_dirty();
|
||||
purge_unreleased_sections();
|
||||
}
|
||||
|
||||
const u64 last_complete_frame = vk::get_last_completed_frame_id();
|
||||
@ -1228,7 +1220,7 @@ namespace vk
|
||||
{
|
||||
if (reply.real_dst_size)
|
||||
{
|
||||
flush_if_cache_miss_likely(helper.format, reply.real_dst_address, reply.real_dst_size, cmd, m_submit_queue);
|
||||
flush_if_cache_miss_likely(helper.format, reply.to_address_range(), cmd, m_submit_queue);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -1239,12 +1231,12 @@ namespace vk
|
||||
|
||||
const u32 get_unreleased_textures_count() const override
|
||||
{
|
||||
return m_unreleased_texture_objects + (u32)m_discardable_storage.size();
|
||||
return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size();
|
||||
}
|
||||
|
||||
const u32 get_texture_memory_in_use() const override
|
||||
{
|
||||
return m_texture_memory_in_use;
|
||||
return m_storage.m_texture_memory_in_use;
|
||||
}
|
||||
|
||||
const u32 get_temporary_memory_in_use()
|
||||
|
@ -1,4 +1,4 @@
|
||||
#pragma once
|
||||
#pragma once
|
||||
#include "Utilities/VirtualMemory.h"
|
||||
#include "Utilities/hash.h"
|
||||
#include "Emu/Memory/vm.h"
|
||||
@ -6,6 +6,7 @@
|
||||
#include "Common/ProgramStateCache.h"
|
||||
#include "Emu/Cell/Modules/cellMsgDialog.h"
|
||||
#include "Emu/System.h"
|
||||
#include "Common/texture_cache_checker.h"
|
||||
|
||||
#include "rsx_utils.h"
|
||||
#include <thread>
|
||||
@ -19,109 +20,125 @@ namespace rsx
|
||||
protect_policy_full_range //Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding
|
||||
};
|
||||
|
||||
enum overlap_test_bounds
|
||||
enum section_bounds
|
||||
{
|
||||
full_range,
|
||||
protected_range,
|
||||
locked_range,
|
||||
confirmed_range
|
||||
};
|
||||
|
||||
static inline void memory_protect(const address_range& range, utils::protection prot)
|
||||
{
|
||||
verify(HERE), range.is_page_range();
|
||||
|
||||
//LOG_ERROR(RSX, "memory_protect(0x%x, 0x%x, %x)", static_cast<u32>(range.start), static_cast<u32>(range.length()), static_cast<u32>(prot));
|
||||
utils::memory_protect(vm::base(range.start), range.length(), prot);
|
||||
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
tex_cache_checker.set_protection(range, prot);
|
||||
#endif
|
||||
}
|
||||
|
||||
class buffered_section
|
||||
{
|
||||
public:
|
||||
static const protection_policy guard_policy = protect_policy_full_range;
|
||||
|
||||
private:
|
||||
u32 locked_address_base = 0;
|
||||
u32 locked_address_range = 0;
|
||||
weak_ptr locked_memory_ptr;
|
||||
std::pair<u32, u32> confirmed_range;
|
||||
|
||||
inline void tag_memory()
|
||||
{
|
||||
if (locked_memory_ptr)
|
||||
{
|
||||
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
|
||||
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first, true);
|
||||
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4, true);
|
||||
|
||||
*first = cpu_address_base + confirmed_range.first;
|
||||
*last = cpu_address_base + valid_limit - 4;
|
||||
|
||||
locked_memory_ptr.flush(confirmed_range.first, 4);
|
||||
locked_memory_ptr.flush(valid_limit - 4, 4);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
u32 cpu_address_base = 0;
|
||||
u32 cpu_address_range = 0;
|
||||
address_range locked_range;
|
||||
address_range cpu_range = {};
|
||||
address_range confirmed_range;
|
||||
weak_ptr super_ptr;
|
||||
|
||||
utils::protection protection = utils::protection::rw;
|
||||
protection_policy guard_policy;
|
||||
|
||||
bool locked = false;
|
||||
bool dirty = false;
|
||||
|
||||
inline void init_lockable_range(u32 base, u32 length)
|
||||
inline void init_lockable_range(const address_range &range)
|
||||
{
|
||||
locked_address_base = (base & ~4095);
|
||||
locked_range = range.to_page_range();
|
||||
|
||||
if ((guard_policy != protect_policy_full_range) && (length >= 4096))
|
||||
if ((guard_policy != protect_policy_full_range) && (range.length() >= 4096))
|
||||
{
|
||||
const u32 limit = base + length;
|
||||
const u32 block_end = (limit & ~4095);
|
||||
const u32 block_start = (locked_address_base < base) ? (locked_address_base + 4096) : locked_address_base;
|
||||
|
||||
locked_address_range = 4096;
|
||||
const u32 block_start = (locked_range.start < range.start) ? (locked_range.start + 4096u) : locked_range.start;
|
||||
const u32 block_end = locked_range.end;
|
||||
|
||||
if (block_start < block_end)
|
||||
{
|
||||
//Page boundaries cover at least one unique page
|
||||
locked_address_base = block_start;
|
||||
// protect unique page range
|
||||
locked_range.start = block_start;
|
||||
locked_range.end = block_end;
|
||||
}
|
||||
|
||||
if (guard_policy == protect_policy_conservative)
|
||||
{
|
||||
//Protect full unique range
|
||||
locked_address_range = (block_end - block_start);
|
||||
}
|
||||
if (guard_policy == protect_policy_one_page)
|
||||
{
|
||||
// protect exactly one page
|
||||
locked_range.set_length(4096u);
|
||||
}
|
||||
}
|
||||
else
|
||||
locked_address_range = align(base + length, 4096) - locked_address_base;
|
||||
|
||||
verify(HERE), locked_address_range > 0;
|
||||
AUDIT( (locked_range.start == page_start(range.start)) || (locked_range.start == next_page(range.start)) );
|
||||
AUDIT( locked_range.end <= page_end(range.end) );
|
||||
verify(HERE), locked_range.is_page_range();
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
buffered_section() {}
|
||||
~buffered_section() {}
|
||||
buffered_section() {};
|
||||
~buffered_section() {};
|
||||
|
||||
void reset(u32 base, u32 length, protection_policy protect_policy = protect_policy_full_range)
|
||||
void reset(const address_range &memory_range)
|
||||
{
|
||||
verify(HERE), locked == false;
|
||||
verify(HERE), memory_range.valid() && locked == false;
|
||||
|
||||
cpu_address_base = base;
|
||||
cpu_address_range = length;
|
||||
cpu_range = address_range(memory_range);
|
||||
confirmed_range.invalidate();
|
||||
locked_range.invalidate();
|
||||
|
||||
confirmed_range = { 0, 0 };
|
||||
protection = utils::protection::rw;
|
||||
guard_policy = protect_policy;
|
||||
locked = false;
|
||||
|
||||
init_lockable_range(cpu_address_base, cpu_address_range);
|
||||
super_ptr = {};
|
||||
|
||||
init_lockable_range(cpu_range);
|
||||
}
|
||||
|
||||
void protect(utils::protection prot, bool force = false)
|
||||
protected:
|
||||
void invalidate_range()
|
||||
{
|
||||
if (prot == protection && !force) return;
|
||||
ASSERT(!locked);
|
||||
|
||||
verify(HERE), locked_address_range > 0;
|
||||
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
|
||||
protection = prot;
|
||||
locked = prot != utils::protection::rw;
|
||||
cpu_range.invalidate();
|
||||
confirmed_range.invalidate();
|
||||
locked_range.invalidate();
|
||||
}
|
||||
|
||||
if (prot == utils::protection::no)
|
||||
public:
|
||||
void protect(utils::protection new_prot, bool force = false)
|
||||
{
|
||||
if (new_prot == protection && !force) return;
|
||||
|
||||
verify(HERE), locked_range.is_page_range();
|
||||
AUDIT( !confirmed_range.valid() || confirmed_range.inside(cpu_range) );
|
||||
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
if (new_prot != protection || force)
|
||||
{
|
||||
locked_memory_ptr = rsx::get_super_ptr(cpu_address_base, cpu_address_range);
|
||||
if (locked && !force) // When force=true, it is the responsibility of the caller to remove this section from the checker refcounting
|
||||
tex_cache_checker.remove(locked_range, protection);
|
||||
if (new_prot != utils::protection::rw)
|
||||
tex_cache_checker.add(locked_range, new_prot);
|
||||
}
|
||||
#endif // TEXTURE_CACHE_DEBUG
|
||||
|
||||
rsx::memory_protect(locked_range, new_prot);
|
||||
protection = new_prot;
|
||||
locked = (protection != utils::protection::rw);
|
||||
|
||||
if (protection == utils::protection::no)
|
||||
{
|
||||
super_ptr = rsx::get_super_ptr(cpu_range);
|
||||
verify(HERE), super_ptr;
|
||||
tag_memory();
|
||||
}
|
||||
else
|
||||
@ -129,255 +146,274 @@ namespace rsx
|
||||
if (!locked)
|
||||
{
|
||||
//Unprotect range also invalidates secured range
|
||||
confirmed_range = { 0, 0 };
|
||||
confirmed_range.invalidate();
|
||||
}
|
||||
|
||||
locked_memory_ptr = {};
|
||||
super_ptr = {};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void protect(utils::protection prot, const std::pair<u32, u32>& range_confirm)
|
||||
void protect(utils::protection prot, const std::pair<u32, u32>& new_confirm)
|
||||
{
|
||||
// new_confirm.first is an offset after cpu_range.start
|
||||
// new_confirm.second is the length (after cpu_range.start + new_confirm.first)
|
||||
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
// We need to remove the lockable range from page_info as we will be re-protecting with force==true
|
||||
if (locked)
|
||||
tex_cache_checker.remove(locked_range, protection);
|
||||
#endif
|
||||
|
||||
if (prot != utils::protection::rw)
|
||||
{
|
||||
const auto old_prot = protection;
|
||||
const auto old_locked_base = locked_address_base;
|
||||
const auto old_locked_length = locked_address_range;
|
||||
|
||||
if (confirmed_range.second)
|
||||
if (confirmed_range.valid())
|
||||
{
|
||||
const u32 range_limit = std::max(range_confirm.first + range_confirm.second, confirmed_range.first + confirmed_range.second);
|
||||
confirmed_range.first = std::min(confirmed_range.first, range_confirm.first);
|
||||
confirmed_range.second = range_limit - confirmed_range.first;
|
||||
confirmed_range.start = std::min(confirmed_range.start, cpu_range.start + new_confirm.first);
|
||||
confirmed_range.end = std::max(confirmed_range.end, cpu_range.start + new_confirm.first + new_confirm.second - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
confirmed_range = range_confirm;
|
||||
confirmed_range = address_range::start_length(cpu_range.start + new_confirm.first, new_confirm.second);
|
||||
ASSERT(!locked || locked_range.inside(confirmed_range.to_page_range()));
|
||||
}
|
||||
|
||||
init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second);
|
||||
|
||||
verify(HERE), confirmed_range.inside(cpu_range);
|
||||
init_lockable_range(confirmed_range);
|
||||
}
|
||||
|
||||
protect(prot, true);
|
||||
}
|
||||
|
||||
void unprotect()
|
||||
inline void unprotect()
|
||||
{
|
||||
AUDIT(protection != utils::protection::rw);
|
||||
protect(utils::protection::rw);
|
||||
}
|
||||
|
||||
void discard()
|
||||
inline void discard()
|
||||
{
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
if (locked)
|
||||
tex_cache_checker.remove(locked_range, protection);
|
||||
#endif
|
||||
|
||||
protection = utils::protection::rw;
|
||||
dirty = true;
|
||||
confirmed_range.invalidate();
|
||||
super_ptr = {};
|
||||
locked = false;
|
||||
|
||||
confirmed_range = { 0, 0 };
|
||||
locked_memory_ptr = {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if range overlaps with this section.
|
||||
* ignore_protection_range - if true, the test should not check against the aligned protection range, instead
|
||||
* tests against actual range of contents in memory
|
||||
*/
|
||||
bool overlaps(std::pair<u32, u32> range) const
|
||||
{
|
||||
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
||||
}
|
||||
|
||||
bool overlaps(u32 address, overlap_test_bounds bounds) const
|
||||
inline const address_range& get_bounds(section_bounds bounds) const
|
||||
{
|
||||
switch (bounds)
|
||||
{
|
||||
case overlap_test_bounds::full_range:
|
||||
{
|
||||
return (cpu_address_base <= address && (address - cpu_address_base) < cpu_address_range);
|
||||
}
|
||||
case overlap_test_bounds::protected_range:
|
||||
{
|
||||
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
|
||||
}
|
||||
case overlap_test_bounds::confirmed_range:
|
||||
{
|
||||
const auto range = get_confirmed_range();
|
||||
return ((range.first + cpu_address_base) <= address && (address - range.first) < range.second);
|
||||
}
|
||||
case section_bounds::full_range:
|
||||
return cpu_range;
|
||||
case section_bounds::locked_range:
|
||||
return locked_range;
|
||||
case section_bounds::confirmed_range:
|
||||
return confirmed_range.valid() ? confirmed_range : cpu_range;
|
||||
default:
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
ASSUME(0);
|
||||
}
|
||||
}
|
||||
|
||||
bool overlaps(const std::pair<u32, u32>& range, overlap_test_bounds bounds) const
|
||||
{
|
||||
switch (bounds)
|
||||
{
|
||||
case overlap_test_bounds::full_range:
|
||||
{
|
||||
return region_overlaps(cpu_address_base, cpu_address_base + cpu_address_range, range.first, range.first + range.second);
|
||||
}
|
||||
case overlap_test_bounds::protected_range:
|
||||
{
|
||||
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
||||
}
|
||||
case overlap_test_bounds::confirmed_range:
|
||||
{
|
||||
const auto test_range = get_confirmed_range();
|
||||
return region_overlaps(test_range.first + cpu_address_base, test_range.first + cpu_address_base + test_range.second, range.first, range.first + range.second);
|
||||
}
|
||||
default:
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the page containing the address tramples this section. Also compares a former trampled page range to compare
|
||||
* If true, returns the range <min, max> with updated invalid range
|
||||
* Overlapping checks
|
||||
*/
|
||||
std::tuple<bool, std::pair<u32, u32>> overlaps_page(const std::pair<u32, u32>& old_range, u32 address, overlap_test_bounds bounds) const
|
||||
inline bool overlaps(const u32 address, section_bounds bounds) const
|
||||
{
|
||||
const u32 page_base = address & ~4095;
|
||||
const u32 page_limit = page_base + 4096;
|
||||
|
||||
const u32 compare_min = std::min(old_range.first, page_base);
|
||||
const u32 compare_max = std::max(old_range.second, page_limit);
|
||||
|
||||
u32 memory_base, memory_range;
|
||||
switch (bounds)
|
||||
{
|
||||
case overlap_test_bounds::full_range:
|
||||
{
|
||||
memory_base = (cpu_address_base & ~4095);
|
||||
memory_range = align(cpu_address_base + cpu_address_range, 4096u) - memory_base;
|
||||
break;
|
||||
}
|
||||
case overlap_test_bounds::protected_range:
|
||||
{
|
||||
memory_base = locked_address_base;
|
||||
memory_range = locked_address_range;
|
||||
break;
|
||||
}
|
||||
case overlap_test_bounds::confirmed_range:
|
||||
{
|
||||
const auto range = get_confirmed_range();
|
||||
memory_base = (cpu_address_base + range.first) & ~4095;
|
||||
memory_range = align(cpu_address_base + range.first + range.second, 4096u) - memory_base;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
if (!region_overlaps(memory_base, memory_base + memory_range, compare_min, compare_max))
|
||||
return std::make_tuple(false, old_range);
|
||||
|
||||
const u32 _min = std::min(memory_base, compare_min);
|
||||
const u32 _max = std::max(memory_base + memory_range, compare_max);
|
||||
return std::make_tuple(true, std::make_pair(_min, _max));
|
||||
return get_bounds(bounds).overlaps(address);
|
||||
}
|
||||
|
||||
bool is_locked() const
|
||||
inline bool overlaps(const address_range &other, section_bounds bounds) const
|
||||
{
|
||||
return get_bounds(bounds).overlaps(other);
|
||||
}
|
||||
|
||||
inline bool overlaps(const buffered_section &other, section_bounds bounds) const
|
||||
{
|
||||
return get_bounds(bounds).overlaps(other.get_bounds(bounds));
|
||||
}
|
||||
|
||||
inline bool inside(const address_range &other, section_bounds bounds) const
|
||||
{
|
||||
return get_bounds(bounds).inside(other);
|
||||
}
|
||||
|
||||
inline bool inside(const buffered_section &other, section_bounds bounds) const
|
||||
{
|
||||
return get_bounds(bounds).inside(other.get_bounds(bounds));
|
||||
}
|
||||
|
||||
inline s32 signed_distance(const address_range &other, section_bounds bounds) const
|
||||
{
|
||||
return get_bounds(bounds).signed_distance(other);
|
||||
}
|
||||
|
||||
inline u32 distance(const address_range &other, section_bounds bounds) const
|
||||
{
|
||||
return get_bounds(bounds).distance(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utilities
|
||||
*/
|
||||
inline bool valid_range() const
|
||||
{
|
||||
return cpu_range.valid();
|
||||
}
|
||||
|
||||
inline bool is_locked() const
|
||||
{
|
||||
return locked;
|
||||
}
|
||||
|
||||
bool is_dirty() const
|
||||
inline u32 get_section_base() const
|
||||
{
|
||||
return dirty;
|
||||
return cpu_range.start;
|
||||
}
|
||||
|
||||
void set_dirty(bool state)
|
||||
inline u32 get_section_size() const
|
||||
{
|
||||
dirty = state;
|
||||
return cpu_range.valid() ? cpu_range.length() : 0;
|
||||
}
|
||||
|
||||
u32 get_section_base() const
|
||||
inline const address_range& get_locked_range() const
|
||||
{
|
||||
return cpu_address_base;
|
||||
AUDIT( locked );
|
||||
return locked_range;
|
||||
}
|
||||
|
||||
u32 get_section_size() const
|
||||
inline const address_range& get_section_range() const
|
||||
{
|
||||
return cpu_address_range;
|
||||
return cpu_range;
|
||||
}
|
||||
|
||||
bool matches(u32 cpu_address, u32 size) const
|
||||
const address_range& get_confirmed_range() const
|
||||
{
|
||||
return (cpu_address_base == cpu_address && cpu_address_range == size);
|
||||
return confirmed_range.valid() ? confirmed_range : cpu_range;
|
||||
}
|
||||
|
||||
std::pair<u32, u32> get_min_max(const std::pair<u32, u32>& current_min_max) const
|
||||
const std::pair<u32, u32> get_confirmed_range_delta() const
|
||||
{
|
||||
u32 min = std::min(current_min_max.first, locked_address_base);
|
||||
u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);
|
||||
if (!confirmed_range.valid())
|
||||
return { 0, cpu_range.length() };
|
||||
|
||||
return std::make_pair(min, max);
|
||||
return { confirmed_range.start - cpu_range.start, confirmed_range.length() };
|
||||
}
|
||||
|
||||
utils::protection get_protection() const
|
||||
inline bool matches(const address_range &range) const
|
||||
{
|
||||
return cpu_range.valid() && cpu_range == range;
|
||||
}
|
||||
|
||||
inline utils::protection get_protection() const
|
||||
{
|
||||
return protection;
|
||||
}
|
||||
|
||||
template <typename T = void>
|
||||
T* get_raw_ptr(u32 offset = 0, bool no_sync = false)
|
||||
inline address_range get_min_max(const address_range& current_min_max, section_bounds bounds) const
|
||||
{
|
||||
verify(HERE), locked_memory_ptr;
|
||||
return locked_memory_ptr.get<T>(offset, no_sync);
|
||||
return get_bounds(bounds).get_min_max(current_min_max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Super Pointer
|
||||
*/
|
||||
template <typename T = void>
|
||||
inline T* get_ptr_by_offset(u32 offset = 0, bool no_sync = false)
|
||||
{
|
||||
verify(HERE), super_ptr && cpu_range.length() >= (offset + sizeof(T));
|
||||
return super_ptr.get<T>(offset, no_sync);
|
||||
}
|
||||
|
||||
// specialization due to sizeof(void) being illegal
|
||||
inline void* get_ptr_by_offset(u32 offset, bool no_sync)
|
||||
{
|
||||
verify(HERE), super_ptr && cpu_range.length() >= (offset + 1);
|
||||
return super_ptr.get<void>(offset, no_sync);
|
||||
}
|
||||
|
||||
template <typename T = void>
|
||||
inline T* get_ptr(u32 address, bool no_sync = false)
|
||||
{
|
||||
verify(HERE), cpu_range.start <= address; // super_ptr & sizeof(T) tests are done by get_ptr_by_offset
|
||||
return get_ptr_by_offset<T>(address - cpu_range.start, no_sync);
|
||||
}
|
||||
|
||||
inline void flush_ptr_by_offset(u32 offset = 0, u32 len = 0) const
|
||||
{
|
||||
verify(HERE), super_ptr && cpu_range.length() >= (offset + len);
|
||||
super_ptr.flush(offset, len);
|
||||
}
|
||||
|
||||
inline void flush_ptr(u32 address, u32 len = 0) const
|
||||
{
|
||||
verify(HERE), cpu_range.start <= address; // super_ptr & length tests are done by flush_ptr_by_offset
|
||||
return flush_ptr_by_offset(address - cpu_range.start, len);
|
||||
}
|
||||
|
||||
inline void flush_ptr(const address_range &range) const
|
||||
{
|
||||
return flush_ptr(range.start, range.length());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Memory tagging
|
||||
*/
|
||||
private:
|
||||
inline void tag_memory()
|
||||
{
|
||||
// We only need to tag memory if we are in full-range mode
|
||||
if (guard_policy == protect_policy_full_range)
|
||||
return;
|
||||
|
||||
AUDIT(locked && super_ptr);
|
||||
|
||||
const address_range& range = get_confirmed_range();
|
||||
|
||||
volatile u32* first = get_ptr<volatile u32>(range.start, true);
|
||||
volatile u32* last = get_ptr<volatile u32>(range.end - 3, true);
|
||||
|
||||
*first = range.start;
|
||||
*last = range.end;
|
||||
|
||||
flush_ptr(range.start, 4);
|
||||
flush_ptr(range.end - 3, 4);
|
||||
}
|
||||
|
||||
public:
|
||||
bool test_memory_head()
|
||||
{
|
||||
if (!locked_memory_ptr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (guard_policy == protect_policy_full_range)
|
||||
return true;
|
||||
|
||||
const u32* first = locked_memory_ptr.get<u32>(confirmed_range.first);
|
||||
return (*first == (cpu_address_base + confirmed_range.first));
|
||||
AUDIT(locked && super_ptr);
|
||||
|
||||
const auto& range = get_confirmed_range();
|
||||
volatile const u32* first = get_ptr<volatile const u32>(range.start);
|
||||
return (*first == range.start);
|
||||
}
|
||||
|
||||
bool test_memory_tail()
|
||||
{
|
||||
if (!locked_memory_ptr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (guard_policy == protect_policy_full_range)
|
||||
return true;
|
||||
|
||||
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
|
||||
const u32* last = locked_memory_ptr.get<u32>(valid_limit - 4);
|
||||
return (*last == (cpu_address_base + valid_limit - 4));
|
||||
}
|
||||
AUDIT(locked && super_ptr);
|
||||
|
||||
void flush_io(u32 offset = 0, u32 len = 0) const
|
||||
{
|
||||
const auto write_length = len ? len : (cpu_address_range - offset);
|
||||
locked_memory_ptr.flush(offset, write_length);
|
||||
}
|
||||
|
||||
std::pair<u32, u32> get_protected_range() const
|
||||
{
|
||||
if (locked)
|
||||
{
|
||||
return { locked_address_base, locked_address_range };
|
||||
}
|
||||
else
|
||||
{
|
||||
return { 0, 0 };
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<u32, u32> get_confirmed_range() const
|
||||
{
|
||||
if (confirmed_range.second == 0)
|
||||
{
|
||||
return { 0, cpu_address_range };
|
||||
}
|
||||
|
||||
return confirmed_range;
|
||||
const auto& range = get_confirmed_range();
|
||||
volatile const u32* last = get_ptr<volatile const u32>(range.end-3);
|
||||
return (*last == range.end);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
template <typename pipeline_storage_type, typename backend_storage>
|
||||
class shaders_cache
|
||||
{
|
||||
|
@ -76,6 +76,11 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
weak_ptr get_super_ptr(const address_range &range)
|
||||
{
|
||||
return get_super_ptr(range.start, range.length());
|
||||
}
|
||||
|
||||
weak_ptr get_super_ptr(u32 addr, u32 len)
|
||||
{
|
||||
verify(HERE), g_current_renderer;
|
||||
@ -507,4 +512,8 @@ namespace rsx
|
||||
++src_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TEXTURE_CACHE_DEBUG
|
||||
tex_cache_checker_t tex_cache_checker = {};
|
||||
#endif
|
||||
}
|
||||
|
@ -1,8 +1,11 @@
|
||||
#pragma once
|
||||
#pragma once
|
||||
|
||||
#include "../System.h"
|
||||
#include "Utilities/address_range.h"
|
||||
#include "Utilities/geometry.h"
|
||||
#include "Utilities/asm.h"
|
||||
#include "Utilities/VirtualMemory.h"
|
||||
#include "Emu/Memory/vm.h"
|
||||
#include "gcm_enums.h"
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
@ -16,6 +19,15 @@ extern "C"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
// Import address_range utilities
|
||||
using utils::address_range;
|
||||
using utils::address_range_vector;
|
||||
using utils::page_for;
|
||||
using utils::page_start;
|
||||
using utils::page_end;
|
||||
using utils::next_page;
|
||||
|
||||
// Definitions
|
||||
class thread;
|
||||
extern thread* g_current_renderer;
|
||||
|
||||
@ -200,7 +212,14 @@ namespace rsx
|
||||
}
|
||||
};
|
||||
|
||||
//Holds information about a framebuffer
|
||||
// Acquire memory mirror with r/w permissions
|
||||
weak_ptr get_super_ptr(const address_range &range);
|
||||
weak_ptr get_super_ptr(u32 addr, u32 size);
|
||||
|
||||
|
||||
/**
|
||||
* Holds information about a framebuffer
|
||||
*/
|
||||
struct gcm_framebuffer_info
|
||||
{
|
||||
u32 address = 0;
|
||||
@ -223,6 +242,11 @@ namespace rsx
|
||||
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
|
||||
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
|
||||
{}
|
||||
|
||||
address_range get_memory_range(u32 aa_factor = 1) const
|
||||
{
|
||||
return address_range::start_length(address, pitch * height * aa_factor);
|
||||
}
|
||||
};
|
||||
|
||||
struct avconf
|
||||
@ -463,9 +487,6 @@ namespace rsx
|
||||
|
||||
std::array<float, 4> get_constant_blend_colors();
|
||||
|
||||
// Acquire memory mirror with r/w permissions
|
||||
weak_ptr get_super_ptr(u32 addr, u32 size);
|
||||
|
||||
/**
|
||||
* Shuffle texel layout from xyzw to wzyx
|
||||
* TODO: Variable src/dst and optional se conversion
|
||||
@ -727,11 +748,6 @@ namespace rsx
|
||||
return g_current_renderer;
|
||||
}
|
||||
|
||||
static inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
|
||||
{
|
||||
return (base1 < limit2 && base2 < limit1);
|
||||
}
|
||||
|
||||
template <int N>
|
||||
void unpack_bitset(std::bitset<N>& block, u64* values)
|
||||
{
|
||||
@ -768,4 +784,4 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -528,6 +528,8 @@
|
||||
<ClInclude Include="Emu\RSX\Common\GLSLCommon.h" />
|
||||
<ClInclude Include="Emu\RSX\Common\TextGlyphs.h" />
|
||||
<ClInclude Include="Emu\RSX\Common\texture_cache.h" />
|
||||
<ClInclude Include="Emu\RSX\Common\texture_cache_checker.h" />
|
||||
<ClInclude Include="Emu\RSX\Common\texture_cache_utils.h" />
|
||||
<ClInclude Include="Emu\RSX\gcm_enums.h" />
|
||||
<ClInclude Include="Emu\RSX\gcm_printing.h" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlays.h" />
|
||||
|
@ -1444,5 +1444,11 @@
|
||||
<ClInclude Include="..\Utilities\address_range.h">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Common\texture_cache_checker.h">
|
||||
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Common\texture_cache_utils.h">
|
||||
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
x
Reference in New Issue
Block a user