mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 03:32:55 +00:00
rsx: Rework section reuse logic
This commit is contained in:
parent
874d18f761
commit
f3d3a1a4a5
@ -325,8 +325,9 @@ namespace rsx
|
||||
struct thrashed_set
|
||||
{
|
||||
bool violation_handled = false;
|
||||
std::vector<section_storage_type*> sections_to_flush; //Sections to be flushed
|
||||
std::vector<section_storage_type*> sections_to_unprotect; //These sections are to be unpotected and discarded by caller
|
||||
std::vector<section_storage_type*> sections_to_flush; // Sections to be flushed
|
||||
std::vector<section_storage_type*> sections_to_unprotect; // These sections are to be unpotected and discarded by caller
|
||||
std::vector<section_storage_type*> sections_to_exclude; // These sections are do be excluded from protection manipulation (subtracted from other sections)
|
||||
int num_flushable = 0;
|
||||
u64 cache_tag = 0;
|
||||
u32 address_base = 0;
|
||||
@ -564,7 +565,133 @@ namespace rsx
|
||||
return true;
|
||||
}
|
||||
|
||||
//Get intersecting set - Returns all objects intersecting a given range and their owning blocks
|
||||
std::vector<std::pair<u32, u32>> subtractive_intersect(std::vector<section_storage_type*> marked_sections, std::vector<section_storage_type*> sections_to_exclude)
|
||||
{
|
||||
std::vector<std::pair<u32, u32>> result;
|
||||
result.reserve(marked_sections.size());
|
||||
|
||||
auto in_range_inclusive = [](u32 base, u32 limit, u32 test) -> bool
|
||||
{
|
||||
return (base <= test && limit >= test);
|
||||
};
|
||||
|
||||
for (const auto §ion : marked_sections)
|
||||
{
|
||||
result.push_back(section->get_protected_range());
|
||||
}
|
||||
|
||||
for (const auto &excluded : sections_to_exclude)
|
||||
{
|
||||
const auto exclusion_range = excluded->get_protected_range();
|
||||
const auto exclude_start = exclusion_range.first;
|
||||
const auto exclude_end = exclusion_range.first + exclusion_range.second;
|
||||
|
||||
for (int n = 0; n < result.size(); ++n)
|
||||
{
|
||||
auto &this_range = result[n];
|
||||
|
||||
if (!this_range.second)
|
||||
{
|
||||
// Null
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto range_start = this_range.first;
|
||||
const auto range_end = this_range.second + range_start;
|
||||
|
||||
if (!region_overlaps(exclude_start, exclude_end, range_start, range_end))
|
||||
{
|
||||
// No overlap, skip
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto head_excluded = in_range_inclusive(exclude_start, exclude_end, range_start); // This section has its start inside excluded range
|
||||
const auto tail_excluded = in_range_inclusive(exclude_start, exclude_end, range_end); // This section has its end inside excluded range
|
||||
|
||||
if (head_excluded && tail_excluded)
|
||||
{
|
||||
// Cannot be salvaged, fully excluded
|
||||
this_range = { 0, 0 };
|
||||
}
|
||||
else if (head_excluded)
|
||||
{
|
||||
// Head overlaps, truncate head
|
||||
this_range.first = exclude_end;
|
||||
}
|
||||
else if (tail_excluded)
|
||||
{
|
||||
// Tail overlaps, truncate tail
|
||||
this_range.second = exclude_start - range_start;
|
||||
}
|
||||
else
|
||||
{
|
||||
verify(HERE), (exclude_start > range_start && exclude_end < range_end);
|
||||
|
||||
// Section sits in the middle
|
||||
this_range.second = exclude_start - range_start; // Head
|
||||
result.push_back({ exclude_end, range_end - exclude_end }); // Tail
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void unprotect_set(thrashed_set& data)
|
||||
{
|
||||
auto release_set = [this](std::vector<section_storage_type*>& _set)
|
||||
{
|
||||
for (auto& section : _set)
|
||||
{
|
||||
verify(HERE), section->is_flushed() || section->is_dirty();
|
||||
|
||||
section->unprotect();
|
||||
m_cache[get_block_address(section->get_section_base())].remove_one();
|
||||
}
|
||||
};
|
||||
|
||||
auto discard_set = [this](std::vector<section_storage_type*>& _set)
|
||||
{
|
||||
for (auto& section : _set)
|
||||
{
|
||||
verify(HERE), section->is_flushed() || section->is_dirty();
|
||||
|
||||
const bool dirty = section->is_dirty();
|
||||
section->discard();
|
||||
section->set_dirty(dirty);
|
||||
m_cache[get_block_address(section->get_section_base())].remove_one();
|
||||
}
|
||||
};
|
||||
|
||||
if (data.sections_to_exclude.empty())
|
||||
{
|
||||
release_set(data.sections_to_unprotect);
|
||||
release_set(data.sections_to_flush);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto removables = data.sections_to_unprotect;
|
||||
if (!data.sections_to_flush.empty())
|
||||
{
|
||||
removables.resize(removables.size() + data.sections_to_flush.size());
|
||||
std::copy(data.sections_to_flush.begin(), data.sections_to_flush.end(), removables.begin() + data.sections_to_unprotect.size());
|
||||
}
|
||||
|
||||
const auto intersect_info = subtractive_intersect(removables, data.sections_to_exclude);
|
||||
for (const auto &range : intersect_info)
|
||||
{
|
||||
if (range.second)
|
||||
{
|
||||
utils::memory_protect(vm::base(range.first), range.second, utils::protection::rw);
|
||||
}
|
||||
}
|
||||
|
||||
discard_set(data.sections_to_unprotect);
|
||||
discard_set(data.sections_to_flush);
|
||||
}
|
||||
}
|
||||
|
||||
// Get intersecting set - Returns all objects intersecting a given range and their owning blocks
|
||||
std::vector<std::pair<section_storage_type*, ranged_storage*>> get_intersecting_set(u32 address, u32 range)
|
||||
{
|
||||
std::vector<std::pair<section_storage_type*, ranged_storage*>> result;
|
||||
@ -585,7 +712,6 @@ namespace rsx
|
||||
if (trampled_range.first <= trampled_range.second &&
|
||||
!(trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second))
|
||||
{
|
||||
|
||||
for (int i = 0; i < range_data.data.size(); i++)
|
||||
{
|
||||
auto &tex = range_data.data[i];
|
||||
@ -635,7 +761,7 @@ namespace rsx
|
||||
|
||||
//Invalidate range base implementation
|
||||
template <typename ...Args>
|
||||
thrashed_set invalidate_range_impl_base(u32 address, u32 range, bool is_writing, bool discard_only, bool rebuild_cache, bool allow_flush, Args&&... extras)
|
||||
thrashed_set invalidate_range_impl_base(u32 address, u32 range, bool is_writing, bool discard_only, bool allow_flush, Args&&... extras)
|
||||
{
|
||||
if (!region_intersects_cache(address, range, is_writing))
|
||||
return {};
|
||||
@ -644,52 +770,52 @@ namespace rsx
|
||||
|
||||
if (trampled_set.size() > 0)
|
||||
{
|
||||
const auto mem_base = (address & ~4095u);
|
||||
const auto mem_end = (address + range + 4095u) & ~4095u;
|
||||
const auto mem_range = std::make_pair(mem_base, mem_end - mem_base);
|
||||
|
||||
update_cache_tag();
|
||||
|
||||
bool deferred_flush = false;
|
||||
bool allow_rebuild = true;
|
||||
|
||||
thrashed_set result = {};
|
||||
result.violation_handled = true;
|
||||
|
||||
if (!discard_only && !allow_flush)
|
||||
if (!discard_only)
|
||||
{
|
||||
for (auto &obj : trampled_set)
|
||||
{
|
||||
if (obj.first->is_flushable())
|
||||
{
|
||||
deferred_flush = true;
|
||||
break;
|
||||
if (obj.first->overlaps(mem_range, rsx::overlap_test_bounds::full_range))
|
||||
{
|
||||
// At least one section will introduce new data unconditionally
|
||||
allow_rebuild = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
deferred_flush = !allow_rebuild && !allow_flush;
|
||||
}
|
||||
|
||||
for (auto &obj : trampled_set)
|
||||
{
|
||||
if (!discard_only)
|
||||
{
|
||||
bool collateral = false;
|
||||
if (!deferred_flush)
|
||||
// NOTE: The memory test is page aligned to prevent continuously faulting in the page range
|
||||
if (allow_rebuild && !obj.first->overlaps(mem_range, rsx::overlap_test_bounds::full_range))
|
||||
{
|
||||
if (!is_writing && obj.first->get_protection() != utils::protection::no)
|
||||
{
|
||||
collateral = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rebuild_cache && allow_flush && obj.first->is_flushable())
|
||||
{
|
||||
const std::pair<u32, u32> null_check = std::make_pair(UINT32_MAX, 0);
|
||||
collateral = !std::get<0>(obj.first->overlaps_page(null_check, address, rsx::overlap_test_bounds::full_range));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (collateral)
|
||||
{
|
||||
//False positive
|
||||
// False positive
|
||||
result.sections_to_exclude.push_back(obj.first);
|
||||
continue;
|
||||
}
|
||||
else if (obj.first->is_flushable())
|
||||
|
||||
if (obj.first->is_flushable())
|
||||
{
|
||||
verify(HERE), !allow_rebuild;
|
||||
|
||||
//Write if and only if no one else has trashed section memory already
|
||||
//TODO: Proper section management should prevent this from happening
|
||||
//TODO: Blit engine section merge support and/or partial texture memory buffering
|
||||
@ -702,8 +828,11 @@ namespace rsx
|
||||
}
|
||||
|
||||
//Contents clobbered, destroy this
|
||||
obj.first->set_dirty(true);
|
||||
m_unreleased_texture_objects++;
|
||||
if (!obj.first->is_dirty())
|
||||
{
|
||||
obj.first->set_dirty(true);
|
||||
m_unreleased_texture_objects++;
|
||||
}
|
||||
|
||||
result.sections_to_unprotect.push_back(obj.first);
|
||||
}
|
||||
@ -738,34 +867,26 @@ namespace rsx
|
||||
|
||||
continue;
|
||||
}
|
||||
else if (deferred_flush)
|
||||
else
|
||||
{
|
||||
//allow_flush = false and not synchronized
|
||||
obj.first->set_dirty(true);
|
||||
m_unreleased_texture_objects++;
|
||||
if (!obj.first->is_dirty())
|
||||
{
|
||||
obj.first->set_dirty(true);
|
||||
m_unreleased_texture_objects++;
|
||||
}
|
||||
|
||||
result.sections_to_unprotect.push_back(obj.first);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!obj.first->is_flushable())
|
||||
{
|
||||
obj.first->set_dirty(true);
|
||||
m_unreleased_texture_objects++;
|
||||
}
|
||||
verify(HERE), discard_only;
|
||||
|
||||
// Only unsynchronized (no-flush) sections should reach here, and only if the rendering thread is the caller
|
||||
if (discard_only)
|
||||
{
|
||||
obj.first->discard();
|
||||
obj.second->remove_one();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Delay unprotect in case there are sections to flush
|
||||
result.sections_to_unprotect.push_back(obj.first);
|
||||
}
|
||||
m_unreleased_texture_objects++;
|
||||
|
||||
obj.first->discard();
|
||||
obj.second->remove_one();
|
||||
}
|
||||
|
||||
if (deferred_flush && result.sections_to_flush.size())
|
||||
@ -778,13 +899,7 @@ namespace rsx
|
||||
}
|
||||
else
|
||||
{
|
||||
//Flushes happen in one go, now its time to remove protection
|
||||
for (auto& section : result.sections_to_unprotect)
|
||||
{
|
||||
verify(HERE), section->is_flushed() || section->is_dirty();
|
||||
section->unprotect();
|
||||
m_cache[get_block_address(section->get_section_base())].remove_one();
|
||||
}
|
||||
unprotect_set(result);
|
||||
}
|
||||
|
||||
//Everything has been handled
|
||||
@ -1065,10 +1180,13 @@ namespace rsx
|
||||
|
||||
m_flush_always_cache[memory_address] = memory_size;
|
||||
|
||||
//Test for invalidated sections from surface cache occupying same address range
|
||||
const auto &overlapped = find_texture_from_range(memory_address, memory_size);
|
||||
// Test for invalidated sections from surface cache occupying same address range
|
||||
const auto mem_base = (memory_address & ~4095u);
|
||||
const auto mem_end = (memory_address + memory_size + 4095u) & ~4095u;
|
||||
const auto &overlapped = find_texture_from_range(mem_base, mem_end - mem_base);
|
||||
if (overlapped.size() > 1)
|
||||
{
|
||||
const auto mem_range = std::make_pair(memory_address, memory_size);
|
||||
for (auto surface : overlapped)
|
||||
{
|
||||
if (surface == ®ion)
|
||||
@ -1081,17 +1199,26 @@ namespace rsx
|
||||
else
|
||||
{
|
||||
if (surface->get_section_base() != memory_address)
|
||||
//HACK: preserve other overlapped sections despite overlap unless new section is superseding
|
||||
//TODO: write memory to cell or redesign sections to preserve the data
|
||||
// HACK: preserve other overlapped sections despite overlap unless new section is superseding
|
||||
// TODO: write memory to cell or redesign sections to preserve the data
|
||||
continue;
|
||||
}
|
||||
|
||||
//Memory is shared with another surface
|
||||
//Discard it - the backend should ensure memory contents are preserved if needed
|
||||
// Memory is shared with another surface
|
||||
// Discard it - the backend should ensure memory contents are preserved if needed
|
||||
surface->set_dirty(true);
|
||||
|
||||
if (surface->is_locked())
|
||||
{
|
||||
if (surface->is_flushable() && surface->test_memory_head() && surface->test_memory_tail())
|
||||
{
|
||||
if (!surface->overlaps(mem_range, rsx::overlap_test_bounds::full_range))
|
||||
{
|
||||
// TODO: This section contains data that should be flushed
|
||||
LOG_TODO(RSX, "Flushable section data may have been lost (0x%x)", surface->get_section_base());
|
||||
}
|
||||
}
|
||||
|
||||
surface->unprotect();
|
||||
m_cache[get_block_address(surface->get_section_base())].remove_one();
|
||||
}
|
||||
@ -1219,7 +1346,7 @@ namespace rsx
|
||||
return{};
|
||||
|
||||
writer_lock lock(m_cache_mutex);
|
||||
return invalidate_range_impl_base(address, range, is_writing, false, true, allow_flush, std::forward<Args>(extras)...);
|
||||
return invalidate_range_impl_base(address, range, is_writing, false, allow_flush, std::forward<Args>(extras)...);
|
||||
}
|
||||
|
||||
template <typename ...Args>
|
||||
@ -1230,7 +1357,7 @@ namespace rsx
|
||||
return {};
|
||||
|
||||
writer_lock lock(m_cache_mutex);
|
||||
return invalidate_range_impl_base(address, range, is_writing, discard, false, allow_flush, std::forward<Args>(extras)...);
|
||||
return invalidate_range_impl_base(address, range, is_writing, discard, allow_flush, std::forward<Args>(extras)...);
|
||||
}
|
||||
|
||||
template <typename ...Args>
|
||||
@ -1267,28 +1394,12 @@ namespace rsx
|
||||
}
|
||||
|
||||
//2. Release all obsolete sections
|
||||
for (auto &tex : data.sections_to_unprotect)
|
||||
{
|
||||
if (tex->is_locked())
|
||||
{
|
||||
verify(HERE), tex->is_dirty();
|
||||
tex->unprotect();
|
||||
m_cache[get_block_address(tex->get_section_base())].remove_one();
|
||||
}
|
||||
}
|
||||
|
||||
//3. Release all flushed sections
|
||||
for (auto &tex : data.sections_to_flush)
|
||||
{
|
||||
tex->unprotect();
|
||||
m_cache[get_block_address(tex->get_section_base())].remove_one();
|
||||
}
|
||||
unprotect_set(data);
|
||||
}
|
||||
else
|
||||
{
|
||||
//The cache contents have changed between the two readings. This means the data held is useless
|
||||
update_cache_tag();
|
||||
invalidate_range_impl_base(data.address_base, data.address_range, true, false, true, true, std::forward<Args>(extras)...);
|
||||
// The cache contents have changed between the two readings. This means the data held is useless
|
||||
invalidate_range_impl_base(data.address_base, data.address_range, true, false, true, std::forward<Args>(extras)...);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -1930,7 +2041,7 @@ namespace rsx
|
||||
}
|
||||
|
||||
//Invalidate with writing=false, discard=false, rebuild=false, native_flush=true
|
||||
invalidate_range_impl_base(texaddr, tex_size, false, false, false, true, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(texaddr, tex_size, false, false, true, std::forward<Args>(extras)...);
|
||||
|
||||
//NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB
|
||||
m_texture_memory_in_use += (tex_pitch * tex_height);
|
||||
@ -2073,8 +2184,8 @@ namespace rsx
|
||||
const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
|
||||
|
||||
lock.upgrade();
|
||||
invalidate_range_impl_base(src_address, memcpy_bytes_length, false, false, false, true, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(dst_address, memcpy_bytes_length, true, false, false, true, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(src_address, memcpy_bytes_length, false, false, true, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(dst_address, memcpy_bytes_length, true, false, true, std::forward<Args>(extras)...);
|
||||
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
|
||||
return true;
|
||||
}
|
||||
@ -2208,7 +2319,7 @@ namespace rsx
|
||||
{
|
||||
lock.upgrade();
|
||||
|
||||
invalidate_range_impl_base(src_address, src.pitch * src.slice_h, false, false, false, true, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(src_address, src.pitch * src.slice_h, false, false, true, std::forward<Args>(extras)...);
|
||||
|
||||
const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
|
||||
std::vector<rsx_subresource_layout> subresource_layout;
|
||||
@ -2304,7 +2415,7 @@ namespace rsx
|
||||
cached_dest->set_dirty(true);
|
||||
m_unreleased_texture_objects++;
|
||||
|
||||
invalidate_range_impl_base(cached_dest->get_section_base(), cached_dest->get_section_size(), true, false, false, true, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(cached_dest->get_section_base(), cached_dest->get_section_size(), true, false, true, std::forward<Args>(extras)...);
|
||||
|
||||
dest_texture = 0;
|
||||
cached_dest = nullptr;
|
||||
@ -2312,7 +2423,7 @@ namespace rsx
|
||||
else if (invalidate_dst_range)
|
||||
{
|
||||
lock.upgrade();
|
||||
invalidate_range_impl_base(dst_address, dst.pitch * dst.height, true, false, false, true, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(dst_address, dst.pitch * dst.height, true, false, true, std::forward<Args>(extras)...);
|
||||
}
|
||||
|
||||
u32 gcm_format;
|
||||
|
@ -60,11 +60,6 @@ namespace rsx
|
||||
bool locked = false;
|
||||
bool dirty = false;
|
||||
|
||||
inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) const
|
||||
{
|
||||
return (base1 < limit2 && base2 < limit1);
|
||||
}
|
||||
|
||||
inline void init_lockable_range(u32 base, u32 length)
|
||||
{
|
||||
locked_address_base = (base & ~4095);
|
||||
@ -115,9 +110,9 @@ namespace rsx
|
||||
init_lockable_range(cpu_address_base, cpu_address_range);
|
||||
}
|
||||
|
||||
void protect(utils::protection prot)
|
||||
void protect(utils::protection prot, bool force = false)
|
||||
{
|
||||
if (prot == protection) return;
|
||||
if (prot == protection && !force) return;
|
||||
|
||||
verify(HERE), locked_address_range > 0;
|
||||
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
|
||||
@ -148,7 +143,6 @@ namespace rsx
|
||||
const auto old_prot = protection;
|
||||
const auto old_locked_base = locked_address_base;
|
||||
const auto old_locked_length = locked_address_range;
|
||||
protection = utils::protection::rw;
|
||||
|
||||
if (confirmed_range.second)
|
||||
{
|
||||
@ -164,7 +158,7 @@ namespace rsx
|
||||
init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second);
|
||||
}
|
||||
|
||||
protect(prot);
|
||||
protect(prot, true);
|
||||
}
|
||||
|
||||
void unprotect()
|
||||
@ -358,6 +352,18 @@ namespace rsx
|
||||
locked_memory_ptr.flush(offset, write_length);
|
||||
}
|
||||
|
||||
std::pair<u32, u32> get_protected_range() const
|
||||
{
|
||||
if (locked)
|
||||
{
|
||||
return { locked_address_base, locked_address_range };
|
||||
}
|
||||
else
|
||||
{
|
||||
return { 0, 0 };
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<u32, u32> get_confirmed_range() const
|
||||
{
|
||||
if (confirmed_range.second == 0)
|
||||
|
@ -733,6 +733,11 @@ namespace rsx
|
||||
return g_current_renderer;
|
||||
}
|
||||
|
||||
static inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
|
||||
{
|
||||
return (base1 < limit2 && base2 < limit1);
|
||||
}
|
||||
|
||||
template <int N>
|
||||
void unpack_bitset(std::bitset<N>& block, u64* values)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user