Texture cache cleanup, refactoring and fixes

This commit is contained in:
Rui Pinheiro 2018-09-22 01:14:26 +01:00 committed by kd-11
parent 8b3d1c2c91
commit 35139ebf5d
17 changed files with 3209 additions and 1453 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,221 @@
#pragma once
#include "../rsx_utils.h"
#ifdef TEXTURE_CACHE_DEBUG
namespace rsx {
class tex_cache_checker_t {
struct per_page_info_t {
u8 prot = 0;
u8 no = 0;
u8 ro = 0;
FORCE_INLINE utils::protection get_protection() const
{
return static_cast<utils::protection>(prot);
}
FORCE_INLINE void set_protection(utils::protection prot)
{
this->prot = static_cast<u8>(prot);
}
FORCE_INLINE void reset_refcount()
{
no = 0;
ro = 0;
}
FORCE_INLINE u16 sum() const
{
return u16{ no } + ro;
}
FORCE_INLINE bool verify() const
{
const utils::protection prot = get_protection();
switch (prot)
{
case utils::protection::no: return no > 0;
case utils::protection::ro: return no == 0 && ro > 0;
case utils::protection::rw: return no == 0 && ro == 0;
default: ASSUME(0);
}
}
FORCE_INLINE void add(utils::protection prot)
{
switch (prot)
{
case utils::protection::no: if (no++ == UINT8_MAX) fmt::throw_exception("add(protection::no) overflow with NO==%d", UINT8_MAX); return;
case utils::protection::ro: if (ro++ == UINT8_MAX) fmt::throw_exception("add(protection::ro) overflow with RO==%d", UINT8_MAX); return;
default: ASSUME(0);
}
}
FORCE_INLINE void remove(utils::protection prot)
{
switch (prot)
{
case utils::protection::no: if (no-- == 0) fmt::throw_exception("remove(protection::no) overflow with NO==0"); return;
case utils::protection::ro: if (ro-- == 0) fmt::throw_exception("remove(protection::ro) overflow with RO==0"); return;
default: ASSUME(0);
}
}
};
static_assert(sizeof(per_page_info_t) <= 4, "page_info_elmnt must be less than 4-bytes in size");
// 4GB memory space / 4096 bytes per page = 1048576 pages
static constexpr size_t num_pages = 0x1'0000'0000 / 4096;
per_page_info_t _info[num_pages];
static constexpr size_t rsx_address_to_index(u32 address)
{
return (address / 4096);
}
static constexpr u32 index_to_rsx_address(size_t idx)
{
return static_cast<u32>(idx * 4096);
}
constexpr per_page_info_t* rsx_address_to_info_pointer(u32 address)
{
return &(_info[rsx_address_to_index(address)]);
}
constexpr const per_page_info_t* rsx_address_to_info_pointer(u32 address) const
{
return &(_info[rsx_address_to_index(address)]);
}
constexpr u32 info_pointer_to_address(const per_page_info_t* ptr) const
{
return index_to_rsx_address(static_cast<size_t>(ptr - _info));
}
std::string prot_to_str(utils::protection prot) const
{
switch (prot)
{
case utils::protection::no: return "NA";
case utils::protection::ro: return "RO";
case utils::protection::rw: return "RW";
default: fmt::throw_exception("Unreachable " HERE);
}
}
public:
tex_cache_checker_t()
{
// Initialize array to all 0
memset(&_info, 0, sizeof(_info));
}
static_assert(static_cast<u32>(utils::protection::rw) == 0, "utils::protection::rw must have value 0 for the above constructor to work");
void set_protection(const address_range& range, utils::protection prot)
{
AUDIT(range.is_page_range());
AUDIT(prot == utils::protection::no || prot == utils::protection::ro || prot == utils::protection::rw);
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
{
ptr->set_protection(prot);
}
}
void discard(const address_range& range)
{
set_protection(range, utils::protection::rw);
}
void reset_refcount()
{
for (per_page_info_t* ptr = rsx_address_to_info_pointer(0); ptr <= rsx_address_to_info_pointer(0xFF'FF'FF'FF); ptr++)
{
ptr->reset_refcount();
}
}
void add(const address_range& range, utils::protection prot)
{
AUDIT(range.is_page_range());
AUDIT(prot == utils::protection::no || prot == utils::protection::ro);
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
{
ptr->add(prot);
}
}
void remove(const address_range& range, utils::protection prot)
{
AUDIT(range.is_page_range());
AUDIT(prot == utils::protection::no || prot == utils::protection::ro);
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
{
ptr->remove(prot);
}
}
// Returns the a lower bound as to how many locked sections are known to be within the given range with each protection {NA,RO}
// The assumption here is that the page in the given range with the largest number of refcounted sections represents the lower bound to how many there must be
std::pair<u8,u8> get_minimum_number_of_sections(const address_range& range) const
{
AUDIT(range.is_page_range());
u8 no = 0;
u8 ro = 0;
for (const per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
{
no = std::max(no, ptr->no);
ro = std::max(ro, ptr->ro);
}
return { no,ro };
}
void check_unprotected(const address_range& range, bool allow_ro = false, bool must_be_empty = true) const
{
AUDIT(range.is_page_range());
for (const per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
{
const auto prot = ptr->get_protection();
if (prot != utils::protection::rw && (!allow_ro || prot != utils::protection::ro))
{
const u32 addr = info_pointer_to_address(ptr);
fmt::throw_exception("Page at addr=0x%8x should be RW%s: Prot=%s, RO=%d, NA=%d", addr, allow_ro ? " or RO" : "", prot_to_str(prot), ptr->ro, ptr->no);
}
if (must_be_empty && (
ptr->no > 0 ||
(!allow_ro && ptr->ro > 0)
))
{
const u32 addr = info_pointer_to_address(ptr);
fmt::throw_exception("Page at addr=0x%8x should not have any NA%s sections: Prot=%s, RO=%d, NA=%d", addr, allow_ro ? " or RO" : "", prot_to_str(prot), ptr->ro, ptr->no);
}
}
}
void verify() const
{
for (size_t idx = 0; idx < num_pages; idx++)
{
auto &elmnt = _info[idx];
if (!elmnt.verify())
{
const u32 addr = index_to_rsx_address(idx);
const utils::protection prot = elmnt.get_protection();
fmt::throw_exception("Protection verification failed at addr=0x%x: Prot=%s, RO=%d, NA=%d", addr, prot_to_str(prot), elmnt.ro, elmnt.no);
}
}
}
};
extern tex_cache_checker_t tex_cache_checker;
}; // namespace rsx
#endif //TEXTURE_CACHE_DEBUG

File diff suppressed because it is too large Load Diff

View File

@ -1593,12 +1593,12 @@ void GLGSRender::flip(int buffer)
gl::screen.bind();
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), "RSX Load: " + std::to_string(get_load()) + "%");
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), "draw calls: " + std::to_string(m_draw_calls));
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), "draw call setup: " + std::to_string(m_begin_time) + "us");
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "textures upload time: " + std::to_string(m_textures_upload_time) + "us");
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", m_draw_calls));
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", m_begin_time));
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", m_vertex_upload_time));
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", m_textures_upload_time));
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", m_draw_time));
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
@ -1606,9 +1606,9 @@ void GLGSRender::flip(int buffer)
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
}
m_frame->flip(m_context);
@ -1640,8 +1640,11 @@ void GLGSRender::flip(int buffer)
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
{
bool can_flush = (std::this_thread::get_id() == m_thread_id);
auto result = m_gl_texture_cache.invalidate_address(address, is_writing, can_flush);
const bool can_flush = (std::this_thread::get_id() == m_thread_id);
const rsx::invalidation_cause cause =
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
auto result = m_gl_texture_cache.invalidate_address(address, cause);
if (!result.violation_handled)
return false;
@ -1664,12 +1667,15 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
return true;
}
void GLGSRender::on_invalidate_memory_range(u32 address_base, u32 size)
void GLGSRender::on_invalidate_memory_range(const utils::address_range &range)
{
//Discard all memory in that range without bothering with writeback (Force it for strict?)
if (m_gl_texture_cache.invalidate_range(address_base, size, true, true, false).violation_handled)
auto data = std::move(m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap));
AUDIT(data.empty());
if (data.violation_handled)
{
m_gl_texture_cache.purge_dirty();
m_gl_texture_cache.purge_unreleased_sections();
{
std::lock_guard lock(m_sampler_mutex);
m_samplers_dirty.store(true);

View File

@ -390,7 +390,7 @@ protected:
void do_local_task(rsx::FIFO_state state) override;
bool on_access_violation(u32 address, bool is_writing) override;
void on_invalidate_memory_range(u32 address_base, u32 size) override;
void on_invalidate_memory_range(const utils::address_range &range) override;
void notify_tile_unbound(u32 tile) override;
std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;

View File

@ -237,8 +237,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
old_format_found = true;
}
m_gl_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height);
const utils::address_range surface_range = m_surface_info[i].get_memory_range();
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range);
}
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
@ -268,8 +269,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width;
auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil;
m_gl_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range);
}
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
@ -381,8 +383,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * layout.aa_factors[1];
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
color_format.format, color_format.type, color_format.swap_bytes);
}
}
@ -392,8 +394,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1];
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
depth_format_gl.format, depth_format_gl.type, true);
}
}
@ -448,12 +450,11 @@ void GLGSRender::read_buffers()
if (!m_surface_info[i].pitch)
continue;
const u32 range = pitch * height;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
bool success = m_gl_texture_cache.load_memory_from_cache(texaddr, pitch * height, std::get<1>(m_rtts.m_bound_render_targets[i]));
const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height);
bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i]));
//Fall back to slower methods if the image could not be fetched from cache.
if (!success)
@ -464,7 +465,7 @@ void GLGSRender::read_buffers()
}
else
{
m_gl_texture_cache.invalidate_range(texaddr, range, false, false, true);
m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::read);
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
color_buffer.read(buffer.get(), width, height, pitch);
@ -512,8 +513,9 @@ void GLGSRender::read_buffers()
if (!pitch)
return;
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
bool in_cache = m_gl_texture_cache.load_memory_from_cache(depth_address, pitch * height, std::get<1>(m_rtts.m_bound_depth_stencil));
const u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
const utils::address_range range = utils::address_range::start_length(depth_address, pitch * height);
bool in_cache = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_depth_stencil));
if (in_cache)
return;

View File

@ -141,8 +141,10 @@ namespace gl
}
};
class cached_texture_section : public rsx::cached_texture_section
class cached_texture_section : public rsx::cached_texture_section<gl::cached_texture_section>
{
using baseclass = rsx::cached_texture_section<gl::cached_texture_section>;
private:
fence m_fence;
u32 pbo_id = 0;
@ -226,7 +228,7 @@ namespace gl
void init_buffer()
{
const f32 resolution_scale = (context == rsx::texture_upload_context::framebuffer_storage? rsx::get_resolution_scale() : 1.f);
const u32 real_buffer_size = (resolution_scale <= 1.f) ? cpu_address_range : (u32)(resolution_scale * resolution_scale * cpu_address_range);
const u32 real_buffer_size = (resolution_scale <= 1.f) ? get_section_size() : (u32)(resolution_scale * resolution_scale * get_section_size());
const u32 buffer_size = align(real_buffer_size, 4096);
if (pbo_id)
@ -249,13 +251,14 @@ namespace gl
}
public:
using baseclass::cached_texture_section;
void reset(u32 base, u32 size, bool /*flushable*/=false)
void reset(const utils::address_range &memory_range)
{
rsx::cached_texture_section::reset(base, size);
vram_texture = nullptr;
managed_texture.reset();
baseclass::reset(memory_range);
}
void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only,
@ -283,7 +286,7 @@ namespace gl
if (rsx_pitch > 0)
this->rsx_pitch = rsx_pitch;
else
this->rsx_pitch = cpu_address_range / height;
this->rsx_pitch = get_section_size() / height;
this->width = w;
this->height = h;
@ -292,6 +295,9 @@ namespace gl
this->mipmaps = mipmaps;
set_format(gl_format, gl_type, swap_bytes);
// Notify baseclass
baseclass::on_section_resources_created();
}
void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps)
@ -307,6 +313,9 @@ namespace gl
rsx_pitch = 0;
real_pitch = 0;
// Notify baseclass
baseclass::on_section_resources_created();
}
void make_flushable()
@ -458,11 +467,12 @@ namespace gl
bool flush()
{
if (flushed) return true; //Already written, ignore
AUDIT( is_locked() );
bool result = true;
if (!synchronized)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
copy_texture();
if (!synchronized)
@ -480,11 +490,14 @@ namespace gl
m_fence.wait_for_signal();
flushed = true;
const auto valid_range = get_confirmed_range();
void *dst = get_raw_ptr(valid_range.first, true);
const auto valid_range = get_confirmed_range_delta();
const u32 valid_offset = valid_range.first;
const u32 valid_length = valid_range.second;
AUDIT( valid_length > 0 );
void *dst = get_ptr_by_offset(valid_range.first, true);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_offset, valid_length, GL_MAP_READ_BIT);
//throw if map failed since we'll segfault anyway
verify(HERE), src != nullptr;
@ -496,20 +509,20 @@ namespace gl
require_manual_shuffle = true;
}
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
{
memcpy(dst, src, valid_range.second);
memcpy(dst, src, valid_length);
}
else
{
if (valid_range.second % rsx_pitch)
if (valid_length % rsx_pitch)
{
fmt::throw_exception("Unreachable" HERE);
}
u8 *_src = (u8*)src;
u8 *_dst = (u8*)dst;
const auto num_rows = valid_range.second / rsx_pitch;
const auto num_rows = valid_length / rsx_pitch;
for (u32 row = 0; row < num_rows; ++row)
{
memcpy(_dst, _src, real_pitch);
@ -521,7 +534,7 @@ namespace gl
if (require_manual_shuffle)
{
//byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, valid_range.second / rsx_pitch);
rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, valid_length / rsx_pitch);
}
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
{
@ -537,7 +550,7 @@ namespace gl
case texture::type::ushort_1_5_5_5_rev:
case texture::type::ushort_5_5_5_1:
{
const u32 num_reps = valid_range.second / 2;
const u32 num_reps = valid_length / 2;
be_t<u16>* in = (be_t<u16>*)(dst);
u16* out = (u16*)dst;
@ -556,7 +569,7 @@ namespace gl
case texture::type::uint_2_10_10_10_rev:
case texture::type::uint_8_8_8_8:
{
u32 num_reps = valid_range.second / 4;
u32 num_reps = valid_length / 4;
be_t<u32>* in = (be_t<u32>*)(dst);
u32* out = (u32*)dst;
@ -575,7 +588,7 @@ namespace gl
}
}
flush_io(valid_range.first, valid_range.second);
flush_ptr_by_offset(valid_offset, valid_length);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
@ -586,13 +599,10 @@ namespace gl
void destroy()
{
if (!locked && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty())
if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty())
//Already destroyed
return;
if (locked)
unprotect();
if (pbo_id == 0)
{
//Read-only texture, destroy texture memory
@ -611,6 +621,13 @@ namespace gl
if (!m_fence.is_empty())
m_fence.destroy();
baseclass::on_section_resources_destroyed();
}
inline bool exists() const
{
return (vram_texture != nullptr);
}
texture::format get_format() const
@ -618,16 +635,6 @@ namespace gl
return format;
}
bool exists() const
{
return vram_texture != nullptr;
}
bool is_flushable() const
{
return (protection == utils::protection::no);
}
bool is_flushed() const
{
return flushed;
@ -683,9 +690,10 @@ namespace gl
}
};
class texture_cache : public rsx::texture_cache<void*, cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>
class texture_cache : public rsx::texture_cache<void*, gl::cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>
{
private:
using baseclass = rsx::texture_cache<void*, gl::cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>;
struct discardable_storage
{
@ -717,30 +725,10 @@ namespace gl
blitter m_hw_blitter;
std::vector<discardable_storage> m_temporary_surfaces;
cached_texture_section& create_texture(gl::viewable_image* image, u32 texaddr, u32 texsize, u32 w, u32 h, u32 depth, u32 mipmaps)
{
cached_texture_section& tex = find_cached_texture(texaddr, texsize, true, w, h, depth);
tex.reset(texaddr, texsize, false);
tex.create_read_only(image, w, h, depth, mipmaps);
read_only_range = tex.get_min_max(read_only_range);
return tex;
}
void clear()
{
for (auto &address_range : m_cache)
{
auto &range_data = address_range.second;
for (auto &tex : range_data.data)
{
tex.destroy();
}
range_data.data.resize(0);
}
baseclass::clear();
clear_temporary_subresources();
m_unreleased_texture_objects = 0;
}
void clear_temporary_subresources()
@ -850,11 +838,6 @@ namespace gl
protected:
void free_texture_section(cached_texture_section& tex) override
{
tex.destroy();
}
gl::texture_view* create_temporary_subresource_view(void*&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
const texture_channel_remap_t& remap_vector) override
{
@ -946,7 +929,7 @@ namespace gl
dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
}
cached_texture_section* create_new_texture(void*&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
cached_texture_section* create_new_texture(void*&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
{
auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type);
@ -954,15 +937,23 @@ namespace gl
const auto swizzle = get_component_mapping(gcm_format, flags);
image->set_native_component_layout(swizzle);
auto& cached = create_texture(image, rsx_address, rsx_size, width, height, depth, mipmaps);
cached.set_dirty(false);
auto& cached = *find_cached_texture(rsx_range, true, true, width, width, depth, mipmaps);
ASSERT(!cached.is_locked());
// Prepare section
cached.reset(rsx_range);
cached.set_view_flags(flags);
cached.set_context(context);
cached.set_gcm_format(gcm_format);
cached.set_image_type(type);
cached.set_gcm_format(gcm_format);
cached.create_read_only(image, width, height, depth, mipmaps);
cached.set_dirty(false);
if (context != rsx::texture_upload_context::blit_engine_dst)
{
AUDIT( cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always );
read_only_range = cached.get_min_max(read_only_range, rsx::section_bounds::locked_range); // TODO ruipin: This was outside the if, but is inside the if in Vulkan. Ask kd-11
cached.protect(utils::protection::ro);
}
else
@ -998,8 +989,8 @@ namespace gl
//NOTE: Protection is handled by the caller
cached.make_flushable();
cached.set_dimensions(width, height, depth, (rsx_size / height));
no_access_range = cached.get_min_max(no_access_range);
cached.set_dimensions(width, height, depth, (rsx_range.length() / height));
no_access_range = cached.get_min_max(no_access_range, rsx::section_bounds::locked_range);
}
update_cache_tag();
@ -1010,7 +1001,8 @@ namespace gl
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override
{
void* unused = nullptr;
auto section = create_new_texture(unused, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, context, type,
const utils::address_range rsx_range = utils::address_range::start_length(rsx_address, pitch * height);
auto section = create_new_texture(unused, rsx_range, width, height, depth, mipmaps, gcm_format, context, type,
rsx::texture_create_flags::default_component_order);
gl::upload_texture(section->get_raw_texture()->id(), rsx_address, gcm_format, width, height, depth, mipmaps,
@ -1082,9 +1074,7 @@ namespace gl
public:
texture_cache() {}
~texture_cache() {}
using baseclass::texture_cache;
void initialize()
{
@ -1103,19 +1093,17 @@ namespace gl
{
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(get_block_address(rsx_address));
if (found == m_cache.end())
auto &block = m_storage.block_for(rsx_address);
if (block.get_locked_count() == 0)
return false;
//if (found->second.valid_count == 0)
//return false;
for (auto& tex : found->second.data)
for (auto& tex : block)
{
if (tex.is_dirty())
continue;
if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range))
if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range))
continue;
if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
@ -1127,9 +1115,9 @@ namespace gl
void on_frame_end() override
{
if (m_unreleased_texture_objects >= m_max_zombie_objects)
if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects)
{
purge_dirty();
purge_unreleased_sections();
}
clear_temporary_subresources();
@ -1158,7 +1146,7 @@ namespace gl
gl::texture::format::depth_stencil : gl::texture::format::depth;
}
flush_if_cache_miss_likely(fmt, result.real_dst_address, result.real_dst_size);
flush_if_cache_miss_likely(fmt, result.to_address_range());
}
return true;

View File

@ -1350,34 +1350,12 @@ namespace rsx
{
if (!in_begin_end && state != FIFO_state::lock_wait)
{
if (!m_invalidated_memory_ranges.empty())
reader_lock lock(m_mtx_task);
if (m_invalidated_memory_range.valid())
{
std::lock_guard lock(m_mtx_task);
for (const auto& range : m_invalidated_memory_ranges)
{
on_invalidate_memory_range(range.first, range.second);
// Clean the main memory super_ptr cache if invalidated
const auto range_end = range.first + range.second;
for (auto It = main_super_memory_block.begin(); It != main_super_memory_block.end();)
{
const auto mem_start = It->first;
const auto mem_end = mem_start + It->second.size();
const bool overlaps = (mem_start < range_end && range.first < mem_end);
if (overlaps)
{
It = main_super_memory_block.erase(It);
}
else
{
It++;
}
}
}
m_invalidated_memory_ranges.clear();
lock.upgrade();
handle_invalidated_memory_range();
}
}
}
@ -2676,15 +2654,32 @@ namespace rsx
void thread::on_notify_memory_mapped(u32 address, u32 size)
{
// TODO
// In the case where an unmap is followed shortly after by a remap of the same address space
// we must block until RSX has invalidated the memory
// or lock m_mtx_task and do it ourselves
if (m_rsx_thread_exiting)
return;
reader_lock lock(m_mtx_task);
const auto map_range = address_range::start_length(address, size);
if (!m_invalidated_memory_range.valid())
return;
if (m_invalidated_memory_range.overlaps(map_range))
{
lock.upgrade();
handle_invalidated_memory_range();
}
}
void thread::on_notify_memory_unmapped(u32 base_address, u32 size)
void thread::on_notify_memory_unmapped(u32 address, u32 size)
{
if (!m_rsx_thread_exiting && base_address < 0xC0000000)
if (!m_rsx_thread_exiting && address < 0xC0000000)
{
u32 ea = base_address >> 20, io = RSXIOMem.io[ea];
u32 ea = address >> 20, io = RSXIOMem.io[ea];
if (io < 512)
{
@ -2704,11 +2699,56 @@ namespace rsx
}
}
// Queue up memory invalidation
std::lock_guard lock(m_mtx_task);
m_invalidated_memory_ranges.push_back({ base_address, size });
const bool existing_range_valid = m_invalidated_memory_range.valid();
const auto unmap_range = address_range::start_length(address, size);
if (existing_range_valid && m_invalidated_memory_range.touches(unmap_range))
{
// Merge range-to-invalidate in case of consecutive unmaps
m_invalidated_memory_range.set_min_max(unmap_range);
}
else
{
if (existing_range_valid)
{
// We can only delay consecutive unmaps.
// Otherwise, to avoid VirtualProtect failures, we need to do the invalidation here
handle_invalidated_memory_range();
}
m_invalidated_memory_range = unmap_range;
}
}
}
// NOTE: m_mtx_task lock must be acquired before calling this method
void thread::handle_invalidated_memory_range()
{
if (!m_invalidated_memory_range.valid())
return;
on_invalidate_memory_range(m_invalidated_memory_range);
// Clean the main memory super_ptr cache if invalidated
for (auto It = main_super_memory_block.begin(); It != main_super_memory_block.end();)
{
const auto block_range = address_range::start_length(It->first, It->second.size());
if (m_invalidated_memory_range.overlaps(block_range))
{
It = main_super_memory_block.erase(It);
}
else
{
It++;
}
}
m_invalidated_memory_range.invalidate();
}
//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
void thread::pause()
{

View File

@ -347,7 +347,7 @@ namespace rsx
std::shared_ptr<rsx::overlays::display_manager> m_overlay_manager;
// Invalidated memory range
std::vector<std::pair<u32, u32>> m_invalidated_memory_ranges;
address_range m_invalidated_memory_range;
public:
RsxDmaControl* ctrl = nullptr;
@ -468,6 +468,8 @@ namespace rsx
thread();
virtual ~thread();
void handle_invalidated_memory_range();
virtual void on_task() override;
virtual void on_exit() override;
@ -495,7 +497,7 @@ namespace rsx
virtual void flip(int buffer) = 0;
virtual u64 timestamp();
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
virtual void on_invalidate_memory_range(u32 /*address*/, u32 /*range*/) {}
virtual void on_invalidate_memory_range(const address_range & /*range*/) {}
virtual void notify_tile_unbound(u32 /*tile*/) {}
// zcull

View File

@ -812,7 +812,9 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
vk::texture_cache::thrashed_set result;
{
std::lock_guard lock(m_secondary_cb_guard);
result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
result = std::move(m_texture_cache.invalidate_address(address, cause, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
}
if (!result.violation_handled)
@ -893,13 +895,16 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
return false;
}
void VKGSRender::on_invalidate_memory_range(u32 address_base, u32 size)
void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
{
std::lock_guard lock(m_secondary_cb_guard);
if (m_texture_cache.invalidate_range(address_base, size, true, true, false,
m_secondary_command_buffer, m_swapchain->get_graphics_queue()).violation_handled)
auto data = std::move(m_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
AUDIT(data.empty());
if (data.violation_handled)
{
m_texture_cache.purge_dirty();
m_texture_cache.purge_unreleased_sections();
{
std::lock_guard lock(m_sampler_mutex);
m_samplers_dirty.store(true);
@ -2625,9 +2630,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (old_format == VK_FORMAT_UNDEFINED)
old_format = vk::get_compatible_surface_format(m_surface_info[i].color_format).first;
m_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height,
*m_current_command_buffer, m_swapchain->get_graphics_queue());
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(old_format, rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
}
m_surface_info[i].address = m_surface_info[i].pitch = 0;
@ -2641,9 +2646,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
{
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
m_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height,
*m_current_command_buffer, m_swapchain->get_graphics_queue());
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(old_format, surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
}
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
@ -2697,8 +2702,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * layout.aa_factors[1];
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second);
}
}
@ -2708,8 +2713,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1];
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false);
}
}
@ -3129,13 +3134,13 @@ void VKGSRender::flip(int buffer)
if (g_cfg.video.overlay)
{
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "RSX Load: " + std::to_string(get_load()) + "%");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "texture upload time: " + std::to_string(m_textures_upload_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", m_draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", m_setup_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", m_vertex_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", m_textures_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", m_draw_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", m_flip_time));
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
@ -3144,10 +3149,10 @@ void VKGSRender::flip(int buffer)
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
}
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres);

View File

@ -433,7 +433,7 @@ protected:
void notify_tile_unbound(u32 tile) override;
bool on_access_violation(u32 address, bool is_writing) override;
void on_invalidate_memory_range(u32 address_base, u32 size) override;
void on_invalidate_memory_range(const utils::address_range &range) override;
bool on_decompiler_task() override;
};

View File

@ -13,8 +13,10 @@ extern u64 get_system_time();
namespace vk
{
class cached_texture_section : public rsx::cached_texture_section
class cached_texture_section : public rsx::cached_texture_section<vk::cached_texture_section>
{
using baseclass = typename rsx::cached_texture_section<vk::cached_texture_section>;
std::unique_ptr<vk::viewable_image> managed_texture = nullptr;
//DMA relevant data
@ -24,15 +26,14 @@ namespace vk
std::unique_ptr<vk::buffer> dma_buffer;
public:
using baseclass::cached_texture_section;
cached_texture_section() {}
void reset(u32 base, u32 length)
void reset(const utils::address_range &memory_range)
{
if (length > cpu_address_range)
if (memory_range.length() > get_section_size())
release_dma_resources();
rsx::cached_texture_section::reset(base, length);
baseclass::reset(memory_range);
}
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false)
@ -56,13 +57,16 @@ namespace vk
if (rsx_pitch > 0)
this->rsx_pitch = rsx_pitch;
else
this->rsx_pitch = cpu_address_range / height;
this->rsx_pitch = get_section_size() / height;
//Even if we are managing the same vram section, we cannot guarantee contents are static
//The create method is only invoked when a new mangaged session is required
//The create method is only invoked when a new managed session is required
synchronized = false;
flushed = false;
sync_timestamp = 0ull;
// Notify baseclass
baseclass::on_section_resources_created();
}
void release_dma_resources()
@ -81,11 +85,14 @@ namespace vk
void destroy()
{
m_tex_cache->on_section_destroyed(*this);
vram_texture = nullptr;
release_dma_resources();
baseclass::on_section_resources_destroyed();
}
bool exists() const
inline bool exists() const
{
return (vram_texture != nullptr);
}
@ -115,12 +122,6 @@ namespace vk
return vram_texture->info.format;
}
bool is_flushable() const
{
//This section is active and can be flushed to cpu
return (protection == utils::protection::no);
}
bool is_flushed() const
{
//This memory section was flushable, but a flush has already removed protection
@ -144,7 +145,7 @@ namespace vk
if (dma_buffer.get() == nullptr)
{
auto memory_type = m_device->get_memory_mapping().host_visible_coherent;
dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
}
if (manage_cb_lifetime)
@ -246,18 +247,18 @@ namespace vk
{
verify (HERE), mem_target->value != dma_buffer->value;
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, cpu_address_range,
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(),
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
shuffle_kernel->run(cmd, mem_target, cpu_address_range);
shuffle_kernel->run(cmd, mem_target, get_section_size());
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, cpu_address_range,
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(),
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
VkBufferCopy copy = {};
copy.size = cpu_address_range;
copy.size = get_section_size();
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, &copy);
}
@ -282,6 +283,7 @@ namespace vk
bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
{
if (flushed) return true;
AUDIT( is_locked() );
if (m_device == nullptr)
{
@ -293,7 +295,7 @@ namespace vk
if (!synchronized)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
copy_texture(true, cmd, submit_queue);
result = false;
}
@ -301,22 +303,26 @@ namespace vk
verify(HERE), real_pitch > 0;
flushed = true;
const auto valid_range = get_confirmed_range();
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
void* pixels_dst = get_raw_ptr(valid_range.first, true);
const auto valid_range = get_confirmed_range_delta();
const u32 valid_offset = valid_range.first;
const u32 valid_length = valid_range.second;
AUDIT( valid_length > 0 );
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
void* pixels_src = dma_buffer->map(valid_offset, valid_length);
void* pixels_dst = get_ptr_by_offset(valid_offset, true);
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
{
memcpy(pixels_dst, pixels_src, valid_range.second);
memcpy(pixels_dst, pixels_src, valid_length);
}
else
{
if (valid_range.second % rsx_pitch)
if (valid_length % rsx_pitch)
{
fmt::throw_exception("Unreachable" HERE);
}
const u32 num_rows = valid_range.second / rsx_pitch;
const u32 num_rows = valid_length / rsx_pitch;
auto _src = (u8*)pixels_src;
auto _dst = (u8*)pixels_dst;
@ -328,7 +334,7 @@ namespace vk
}
}
flush_io(valid_range.first, valid_range.second);
flush_ptr_by_offset(valid_offset, valid_length);
dma_buffer->unmap();
reset_write_statistics();
@ -405,9 +411,18 @@ namespace vk
}
};
class texture_cache : public rsx::texture_cache<vk::command_buffer, cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>
class texture_cache : public rsx::texture_cache<vk::command_buffer, vk::cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>
{
public:
virtual void on_section_destroyed(cached_texture_section& tex)
{
m_discarded_memory_size += tex.get_section_size();
m_discardable_storage.push_back(tex);
}
private:
using baseclass = rsx::texture_cache<vk::command_buffer, vk::cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>;
//Vulkan internals
vk::render_device* m_device;
vk::memory_type_mapping m_memory_types;
@ -419,30 +434,11 @@ namespace vk
std::list<discarded_storage> m_discardable_storage;
std::atomic<u32> m_discarded_memory_size = { 0 };
void purge_cache()
void clear()
{
for (auto &address_range : m_cache)
{
auto &range_data = address_range.second;
for (auto &tex : range_data.data)
{
if (tex.exists())
{
m_discardable_storage.push_back(tex);
}
if (tex.is_locked())
tex.unprotect();
tex.release_dma_resources();
}
range_data.data.resize(0);
}
baseclass::clear();
m_discardable_storage.clear();
m_unreleased_texture_objects = 0;
m_texture_memory_in_use = 0;
m_discarded_memory_size = 0;
}
@ -486,14 +482,6 @@ namespace vk
}
protected:
void free_texture_section(cached_texture_section& tex) override
{
m_discarded_memory_size += tex.get_section_size();
m_discardable_storage.push_back(tex);
tex.destroy();
}
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,
u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy)
{
@ -776,7 +764,7 @@ namespace vk
vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
}
cached_texture_section* create_new_texture(vk::command_buffer& cmd, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
{
const u16 section_depth = depth;
@ -846,26 +834,30 @@ namespace vk
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_flags, 0, mipmaps, 0, layer });
cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, section_depth);
region.reset(rsx_address, rsx_size);
region.create(width, height, section_depth, mipmaps, image, 0, true, gcm_format);
region.set_dirty(false);
cached_texture_section& region = *find_cached_texture(rsx_range, true, true, width, height, section_depth);
ASSERT(!region.is_locked());
// New section, we must prepare it
region.reset(rsx_range);
region.set_context(context);
region.set_gcm_format(gcm_format);
region.set_image_type(type);
region.create(width, height, section_depth, mipmaps, image, 0, true, gcm_format);
region.set_dirty(false);
//Its not necessary to lock blit dst textures as they are just reused as necessary
if (context != rsx::texture_upload_context::blit_engine_dst)
{
region.protect(utils::protection::ro);
read_only_range = region.get_min_max(read_only_range);
read_only_range = region.get_min_max(read_only_range, rsx::section_bounds::locked_range);
}
else
{
//TODO: Confirm byte swap patterns
//NOTE: Protection is handled by the caller
region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT);
no_access_range = region.get_min_max(no_access_range);
no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range);
}
update_cache_tag();
@ -875,7 +867,8 @@ namespace vk
cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format,
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override
{
auto section = create_new_texture(cmd, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, context, type,
const utils::address_range rsx_range = utils::address_range::start_length(rsx_address, pitch * height);
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, gcm_format, context, type,
rsx::texture_create_flags::default_component_order);
auto image = section->get_raw_texture();
@ -962,6 +955,7 @@ namespace vk
}
public:
using baseclass::texture_cache;
void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap)
{
@ -974,26 +968,24 @@ namespace vk
void destroy() override
{
purge_cache();
clear();
}
bool is_depth_texture(u32 rsx_address, u32 rsx_size) override
{
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(get_block_address(rsx_address));
if (found == m_cache.end())
auto &block = m_storage.block_for(rsx_address);
if (block.get_locked_count() == 0)
return false;
//if (found->second.valid_count == 0)
//return false;
for (auto& tex : found->second.data)
for (auto& tex : block)
{
if (tex.is_dirty())
continue;
if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range))
if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range))
continue;
if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
@ -1016,10 +1008,10 @@ namespace vk
void on_frame_end() override
{
if (m_unreleased_texture_objects >= m_max_zombie_objects ||
if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects ||
m_discarded_memory_size > 0x4000000) //If already holding over 64M in discardable memory, be frugal with memory resources
{
purge_dirty();
purge_unreleased_sections();
}
const u64 last_complete_frame = vk::get_last_completed_frame_id();
@ -1228,7 +1220,7 @@ namespace vk
{
if (reply.real_dst_size)
{
flush_if_cache_miss_likely(helper.format, reply.real_dst_address, reply.real_dst_size, cmd, m_submit_queue);
flush_if_cache_miss_likely(helper.format, reply.to_address_range(), cmd, m_submit_queue);
}
return true;
@ -1239,12 +1231,12 @@ namespace vk
const u32 get_unreleased_textures_count() const override
{
return m_unreleased_texture_objects + (u32)m_discardable_storage.size();
return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size();
}
const u32 get_texture_memory_in_use() const override
{
return m_texture_memory_in_use;
return m_storage.m_texture_memory_in_use;
}
const u32 get_temporary_memory_in_use()

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "Utilities/VirtualMemory.h"
#include "Utilities/hash.h"
#include "Emu/Memory/vm.h"
@ -6,6 +6,7 @@
#include "Common/ProgramStateCache.h"
#include "Emu/Cell/Modules/cellMsgDialog.h"
#include "Emu/System.h"
#include "Common/texture_cache_checker.h"
#include "rsx_utils.h"
#include <thread>
@ -19,109 +20,125 @@ namespace rsx
protect_policy_full_range //Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding
};
enum overlap_test_bounds
enum section_bounds
{
full_range,
protected_range,
locked_range,
confirmed_range
};
static inline void memory_protect(const address_range& range, utils::protection prot)
{
verify(HERE), range.is_page_range();
//LOG_ERROR(RSX, "memory_protect(0x%x, 0x%x, %x)", static_cast<u32>(range.start), static_cast<u32>(range.length()), static_cast<u32>(prot));
utils::memory_protect(vm::base(range.start), range.length(), prot);
#ifdef TEXTURE_CACHE_DEBUG
tex_cache_checker.set_protection(range, prot);
#endif
}
class buffered_section
{
public:
static const protection_policy guard_policy = protect_policy_full_range;
private:
u32 locked_address_base = 0;
u32 locked_address_range = 0;
weak_ptr locked_memory_ptr;
std::pair<u32, u32> confirmed_range;
inline void tag_memory()
{
if (locked_memory_ptr)
{
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first, true);
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4, true);
*first = cpu_address_base + confirmed_range.first;
*last = cpu_address_base + valid_limit - 4;
locked_memory_ptr.flush(confirmed_range.first, 4);
locked_memory_ptr.flush(valid_limit - 4, 4);
}
}
protected:
u32 cpu_address_base = 0;
u32 cpu_address_range = 0;
address_range locked_range;
address_range cpu_range = {};
address_range confirmed_range;
weak_ptr super_ptr;
utils::protection protection = utils::protection::rw;
protection_policy guard_policy;
bool locked = false;
bool dirty = false;
inline void init_lockable_range(u32 base, u32 length)
inline void init_lockable_range(const address_range &range)
{
locked_address_base = (base & ~4095);
locked_range = range.to_page_range();
if ((guard_policy != protect_policy_full_range) && (length >= 4096))
if ((guard_policy != protect_policy_full_range) && (range.length() >= 4096))
{
const u32 limit = base + length;
const u32 block_end = (limit & ~4095);
const u32 block_start = (locked_address_base < base) ? (locked_address_base + 4096) : locked_address_base;
locked_address_range = 4096;
const u32 block_start = (locked_range.start < range.start) ? (locked_range.start + 4096u) : locked_range.start;
const u32 block_end = locked_range.end;
if (block_start < block_end)
{
//Page boundaries cover at least one unique page
locked_address_base = block_start;
// protect unique page range
locked_range.start = block_start;
locked_range.end = block_end;
}
if (guard_policy == protect_policy_conservative)
{
//Protect full unique range
locked_address_range = (block_end - block_start);
}
if (guard_policy == protect_policy_one_page)
{
// protect exactly one page
locked_range.set_length(4096u);
}
}
else
locked_address_range = align(base + length, 4096) - locked_address_base;
verify(HERE), locked_address_range > 0;
AUDIT( (locked_range.start == page_start(range.start)) || (locked_range.start == next_page(range.start)) );
AUDIT( locked_range.end <= page_end(range.end) );
verify(HERE), locked_range.is_page_range();
}
public:
buffered_section() {}
~buffered_section() {}
buffered_section() {};
~buffered_section() {};
void reset(u32 base, u32 length, protection_policy protect_policy = protect_policy_full_range)
void reset(const address_range &memory_range)
{
verify(HERE), locked == false;
verify(HERE), memory_range.valid() && locked == false;
cpu_address_base = base;
cpu_address_range = length;
cpu_range = address_range(memory_range);
confirmed_range.invalidate();
locked_range.invalidate();
confirmed_range = { 0, 0 };
protection = utils::protection::rw;
guard_policy = protect_policy;
locked = false;
init_lockable_range(cpu_address_base, cpu_address_range);
super_ptr = {};
init_lockable_range(cpu_range);
}
void protect(utils::protection prot, bool force = false)
protected:
void invalidate_range()
{
if (prot == protection && !force) return;
ASSERT(!locked);
verify(HERE), locked_address_range > 0;
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
protection = prot;
locked = prot != utils::protection::rw;
cpu_range.invalidate();
confirmed_range.invalidate();
locked_range.invalidate();
}
if (prot == utils::protection::no)
public:
void protect(utils::protection new_prot, bool force = false)
{
if (new_prot == protection && !force) return;
verify(HERE), locked_range.is_page_range();
AUDIT( !confirmed_range.valid() || confirmed_range.inside(cpu_range) );
#ifdef TEXTURE_CACHE_DEBUG
if (new_prot != protection || force)
{
locked_memory_ptr = rsx::get_super_ptr(cpu_address_base, cpu_address_range);
if (locked && !force) // When force=true, it is the responsibility of the caller to remove this section from the checker refcounting
tex_cache_checker.remove(locked_range, protection);
if (new_prot != utils::protection::rw)
tex_cache_checker.add(locked_range, new_prot);
}
#endif // TEXTURE_CACHE_DEBUG
rsx::memory_protect(locked_range, new_prot);
protection = new_prot;
locked = (protection != utils::protection::rw);
if (protection == utils::protection::no)
{
super_ptr = rsx::get_super_ptr(cpu_range);
verify(HERE), super_ptr;
tag_memory();
}
else
@ -129,255 +146,274 @@ namespace rsx
if (!locked)
{
//Unprotect range also invalidates secured range
confirmed_range = { 0, 0 };
confirmed_range.invalidate();
}
locked_memory_ptr = {};
super_ptr = {};
}
}
void protect(utils::protection prot, const std::pair<u32, u32>& range_confirm)
void protect(utils::protection prot, const std::pair<u32, u32>& new_confirm)
{
// new_confirm.first is an offset after cpu_range.start
// new_confirm.second is the length (after cpu_range.start + new_confirm.first)
#ifdef TEXTURE_CACHE_DEBUG
// We need to remove the lockable range from page_info as we will be re-protecting with force==true
if (locked)
tex_cache_checker.remove(locked_range, protection);
#endif
if (prot != utils::protection::rw)
{
const auto old_prot = protection;
const auto old_locked_base = locked_address_base;
const auto old_locked_length = locked_address_range;
if (confirmed_range.second)
if (confirmed_range.valid())
{
const u32 range_limit = std::max(range_confirm.first + range_confirm.second, confirmed_range.first + confirmed_range.second);
confirmed_range.first = std::min(confirmed_range.first, range_confirm.first);
confirmed_range.second = range_limit - confirmed_range.first;
confirmed_range.start = std::min(confirmed_range.start, cpu_range.start + new_confirm.first);
confirmed_range.end = std::max(confirmed_range.end, cpu_range.start + new_confirm.first + new_confirm.second - 1);
}
else
{
confirmed_range = range_confirm;
confirmed_range = address_range::start_length(cpu_range.start + new_confirm.first, new_confirm.second);
ASSERT(!locked || locked_range.inside(confirmed_range.to_page_range()));
}
init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second);
verify(HERE), confirmed_range.inside(cpu_range);
init_lockable_range(confirmed_range);
}
protect(prot, true);
}
void unprotect()
inline void unprotect()
{
AUDIT(protection != utils::protection::rw);
protect(utils::protection::rw);
}
void discard()
inline void discard()
{
#ifdef TEXTURE_CACHE_DEBUG
if (locked)
tex_cache_checker.remove(locked_range, protection);
#endif
protection = utils::protection::rw;
dirty = true;
confirmed_range.invalidate();
super_ptr = {};
locked = false;
confirmed_range = { 0, 0 };
locked_memory_ptr = {};
}
/**
* Check if range overlaps with this section.
* ignore_protection_range - if true, the test should not check against the aligned protection range, instead
* tests against actual range of contents in memory
*/
bool overlaps(std::pair<u32, u32> range) const
{
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
}
bool overlaps(u32 address, overlap_test_bounds bounds) const
inline const address_range& get_bounds(section_bounds bounds) const
{
switch (bounds)
{
case overlap_test_bounds::full_range:
{
return (cpu_address_base <= address && (address - cpu_address_base) < cpu_address_range);
}
case overlap_test_bounds::protected_range:
{
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
}
case overlap_test_bounds::confirmed_range:
{
const auto range = get_confirmed_range();
return ((range.first + cpu_address_base) <= address && (address - range.first) < range.second);
}
case section_bounds::full_range:
return cpu_range;
case section_bounds::locked_range:
return locked_range;
case section_bounds::confirmed_range:
return confirmed_range.valid() ? confirmed_range : cpu_range;
default:
fmt::throw_exception("Unreachable" HERE);
ASSUME(0);
}
}
bool overlaps(const std::pair<u32, u32>& range, overlap_test_bounds bounds) const
{
switch (bounds)
{
case overlap_test_bounds::full_range:
{
return region_overlaps(cpu_address_base, cpu_address_base + cpu_address_range, range.first, range.first + range.second);
}
case overlap_test_bounds::protected_range:
{
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
}
case overlap_test_bounds::confirmed_range:
{
const auto test_range = get_confirmed_range();
return region_overlaps(test_range.first + cpu_address_base, test_range.first + cpu_address_base + test_range.second, range.first, range.first + range.second);
}
default:
fmt::throw_exception("Unreachable" HERE);
}
}
/**
* Check if the page containing the address tramples this section. Also compares a former trampled page range to compare
* If true, returns the range <min, max> with updated invalid range
* Overlapping checks
*/
std::tuple<bool, std::pair<u32, u32>> overlaps_page(const std::pair<u32, u32>& old_range, u32 address, overlap_test_bounds bounds) const
inline bool overlaps(const u32 address, section_bounds bounds) const
{
const u32 page_base = address & ~4095;
const u32 page_limit = page_base + 4096;
const u32 compare_min = std::min(old_range.first, page_base);
const u32 compare_max = std::max(old_range.second, page_limit);
u32 memory_base, memory_range;
switch (bounds)
{
case overlap_test_bounds::full_range:
{
memory_base = (cpu_address_base & ~4095);
memory_range = align(cpu_address_base + cpu_address_range, 4096u) - memory_base;
break;
}
case overlap_test_bounds::protected_range:
{
memory_base = locked_address_base;
memory_range = locked_address_range;
break;
}
case overlap_test_bounds::confirmed_range:
{
const auto range = get_confirmed_range();
memory_base = (cpu_address_base + range.first) & ~4095;
memory_range = align(cpu_address_base + range.first + range.second, 4096u) - memory_base;
break;
}
default:
fmt::throw_exception("Unreachable" HERE);
}
if (!region_overlaps(memory_base, memory_base + memory_range, compare_min, compare_max))
return std::make_tuple(false, old_range);
const u32 _min = std::min(memory_base, compare_min);
const u32 _max = std::max(memory_base + memory_range, compare_max);
return std::make_tuple(true, std::make_pair(_min, _max));
return get_bounds(bounds).overlaps(address);
}
bool is_locked() const
inline bool overlaps(const address_range &other, section_bounds bounds) const
{
return get_bounds(bounds).overlaps(other);
}
inline bool overlaps(const buffered_section &other, section_bounds bounds) const
{
return get_bounds(bounds).overlaps(other.get_bounds(bounds));
}
inline bool inside(const address_range &other, section_bounds bounds) const
{
return get_bounds(bounds).inside(other);
}
inline bool inside(const buffered_section &other, section_bounds bounds) const
{
return get_bounds(bounds).inside(other.get_bounds(bounds));
}
inline s32 signed_distance(const address_range &other, section_bounds bounds) const
{
return get_bounds(bounds).signed_distance(other);
}
inline u32 distance(const address_range &other, section_bounds bounds) const
{
return get_bounds(bounds).distance(other);
}
/**
* Utilities
*/
inline bool valid_range() const
{
return cpu_range.valid();
}
inline bool is_locked() const
{
return locked;
}
bool is_dirty() const
inline u32 get_section_base() const
{
return dirty;
return cpu_range.start;
}
void set_dirty(bool state)
inline u32 get_section_size() const
{
dirty = state;
return cpu_range.valid() ? cpu_range.length() : 0;
}
u32 get_section_base() const
inline const address_range& get_locked_range() const
{
return cpu_address_base;
AUDIT( locked );
return locked_range;
}
u32 get_section_size() const
inline const address_range& get_section_range() const
{
return cpu_address_range;
return cpu_range;
}
bool matches(u32 cpu_address, u32 size) const
const address_range& get_confirmed_range() const
{
return (cpu_address_base == cpu_address && cpu_address_range == size);
return confirmed_range.valid() ? confirmed_range : cpu_range;
}
std::pair<u32, u32> get_min_max(const std::pair<u32, u32>& current_min_max) const
const std::pair<u32, u32> get_confirmed_range_delta() const
{
u32 min = std::min(current_min_max.first, locked_address_base);
u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);
if (!confirmed_range.valid())
return { 0, cpu_range.length() };
return std::make_pair(min, max);
return { confirmed_range.start - cpu_range.start, confirmed_range.length() };
}
utils::protection get_protection() const
inline bool matches(const address_range &range) const
{
return cpu_range.valid() && cpu_range == range;
}
inline utils::protection get_protection() const
{
return protection;
}
template <typename T = void>
T* get_raw_ptr(u32 offset = 0, bool no_sync = false)
inline address_range get_min_max(const address_range& current_min_max, section_bounds bounds) const
{
verify(HERE), locked_memory_ptr;
return locked_memory_ptr.get<T>(offset, no_sync);
return get_bounds(bounds).get_min_max(current_min_max);
}
/**
* Super Pointer
*/
template <typename T = void>
inline T* get_ptr_by_offset(u32 offset = 0, bool no_sync = false)
{
verify(HERE), super_ptr && cpu_range.length() >= (offset + sizeof(T));
return super_ptr.get<T>(offset, no_sync);
}
// specialization due to sizeof(void) being illegal
inline void* get_ptr_by_offset(u32 offset, bool no_sync)
{
verify(HERE), super_ptr && cpu_range.length() >= (offset + 1);
return super_ptr.get<void>(offset, no_sync);
}
template <typename T = void>
inline T* get_ptr(u32 address, bool no_sync = false)
{
verify(HERE), cpu_range.start <= address; // super_ptr & sizeof(T) tests are done by get_ptr_by_offset
return get_ptr_by_offset<T>(address - cpu_range.start, no_sync);
}
inline void flush_ptr_by_offset(u32 offset = 0, u32 len = 0) const
{
verify(HERE), super_ptr && cpu_range.length() >= (offset + len);
super_ptr.flush(offset, len);
}
inline void flush_ptr(u32 address, u32 len = 0) const
{
verify(HERE), cpu_range.start <= address; // super_ptr & length tests are done by flush_ptr_by_offset
return flush_ptr_by_offset(address - cpu_range.start, len);
}
inline void flush_ptr(const address_range &range) const
{
return flush_ptr(range.start, range.length());
}
/**
* Memory tagging
*/
private:
inline void tag_memory()
{
// We only need to tag memory if we are in full-range mode
if (guard_policy == protect_policy_full_range)
return;
AUDIT(locked && super_ptr);
const address_range& range = get_confirmed_range();
volatile u32* first = get_ptr<volatile u32>(range.start, true);
volatile u32* last = get_ptr<volatile u32>(range.end - 3, true);
*first = range.start;
*last = range.end;
flush_ptr(range.start, 4);
flush_ptr(range.end - 3, 4);
}
public:
bool test_memory_head()
{
if (!locked_memory_ptr)
{
return false;
}
if (guard_policy == protect_policy_full_range)
return true;
const u32* first = locked_memory_ptr.get<u32>(confirmed_range.first);
return (*first == (cpu_address_base + confirmed_range.first));
AUDIT(locked && super_ptr);
const auto& range = get_confirmed_range();
volatile const u32* first = get_ptr<volatile const u32>(range.start);
return (*first == range.start);
}
bool test_memory_tail()
{
if (!locked_memory_ptr)
{
return false;
}
if (guard_policy == protect_policy_full_range)
return true;
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
const u32* last = locked_memory_ptr.get<u32>(valid_limit - 4);
return (*last == (cpu_address_base + valid_limit - 4));
}
AUDIT(locked && super_ptr);
void flush_io(u32 offset = 0, u32 len = 0) const
{
const auto write_length = len ? len : (cpu_address_range - offset);
locked_memory_ptr.flush(offset, write_length);
}
std::pair<u32, u32> get_protected_range() const
{
if (locked)
{
return { locked_address_base, locked_address_range };
}
else
{
return { 0, 0 };
}
}
std::pair<u32, u32> get_confirmed_range() const
{
if (confirmed_range.second == 0)
{
return { 0, cpu_address_range };
}
return confirmed_range;
const auto& range = get_confirmed_range();
volatile const u32* last = get_ptr<volatile const u32>(range.end-3);
return (*last == range.end);
}
};
template <typename pipeline_storage_type, typename backend_storage>
class shaders_cache
{

View File

@ -76,6 +76,11 @@ namespace rsx
}
}
weak_ptr get_super_ptr(const address_range &range)
{
return get_super_ptr(range.start, range.length());
}
weak_ptr get_super_ptr(u32 addr, u32 len)
{
verify(HERE), g_current_renderer;
@ -507,4 +512,8 @@ namespace rsx
++src_ptr;
}
}
#ifdef TEXTURE_CACHE_DEBUG
tex_cache_checker_t tex_cache_checker = {};
#endif
}

View File

@ -1,8 +1,11 @@
#pragma once
#pragma once
#include "../System.h"
#include "Utilities/address_range.h"
#include "Utilities/geometry.h"
#include "Utilities/asm.h"
#include "Utilities/VirtualMemory.h"
#include "Emu/Memory/vm.h"
#include "gcm_enums.h"
#include <atomic>
#include <memory>
@ -16,6 +19,15 @@ extern "C"
namespace rsx
{
// Import address_range utilities
using utils::address_range;
using utils::address_range_vector;
using utils::page_for;
using utils::page_start;
using utils::page_end;
using utils::next_page;
// Definitions
class thread;
extern thread* g_current_renderer;
@ -200,7 +212,14 @@ namespace rsx
}
};
//Holds information about a framebuffer
// Acquire memory mirror with r/w permissions
weak_ptr get_super_ptr(const address_range &range);
weak_ptr get_super_ptr(u32 addr, u32 size);
/**
* Holds information about a framebuffer
*/
struct gcm_framebuffer_info
{
u32 address = 0;
@ -223,6 +242,11 @@ namespace rsx
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
{}
address_range get_memory_range(u32 aa_factor = 1) const
{
return address_range::start_length(address, pitch * height * aa_factor);
}
};
struct avconf
@ -463,9 +487,6 @@ namespace rsx
std::array<float, 4> get_constant_blend_colors();
// Acquire memory mirror with r/w permissions
weak_ptr get_super_ptr(u32 addr, u32 size);
/**
* Shuffle texel layout from xyzw to wzyx
* TODO: Variable src/dst and optional se conversion
@ -727,11 +748,6 @@ namespace rsx
return g_current_renderer;
}
static inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
{
return (base1 < limit2 && base2 < limit1);
}
template <int N>
void unpack_bitset(std::bitset<N>& block, u64* values)
{
@ -768,4 +784,4 @@ namespace rsx
}
}
}
}
}

View File

@ -528,6 +528,8 @@
<ClInclude Include="Emu\RSX\Common\GLSLCommon.h" />
<ClInclude Include="Emu\RSX\Common\TextGlyphs.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache_checker.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache_utils.h" />
<ClInclude Include="Emu\RSX\gcm_enums.h" />
<ClInclude Include="Emu\RSX\gcm_printing.h" />
<ClInclude Include="Emu\RSX\Overlays\overlays.h" />

View File

@ -1444,5 +1444,11 @@
<ClInclude Include="..\Utilities\address_range.h">
<Filter>Utilities</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\texture_cache_checker.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\texture_cache_utils.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
</Project>