rsx: Rework section reuse logic

This commit is contained in:
kd-11 2018-08-22 16:59:15 +03:00 committed by kd-11
parent 874d18f761
commit f3d3a1a4a5
3 changed files with 220 additions and 98 deletions

View File

@ -325,8 +325,9 @@ namespace rsx
struct thrashed_set
{
bool violation_handled = false;
std::vector<section_storage_type*> sections_to_flush; //Sections to be flushed
std::vector<section_storage_type*> sections_to_unprotect; //These sections are to be unpotected and discarded by caller
std::vector<section_storage_type*> sections_to_flush; // Sections to be flushed
std::vector<section_storage_type*> sections_to_unprotect; // These sections are to be unpotected and discarded by caller
std::vector<section_storage_type*> sections_to_exclude; // These sections are do be excluded from protection manipulation (subtracted from other sections)
int num_flushable = 0;
u64 cache_tag = 0;
u32 address_base = 0;
@ -564,7 +565,133 @@ namespace rsx
return true;
}
//Get intersecting set - Returns all objects intersecting a given range and their owning blocks
std::vector<std::pair<u32, u32>> subtractive_intersect(std::vector<section_storage_type*> marked_sections, std::vector<section_storage_type*> sections_to_exclude)
{
std::vector<std::pair<u32, u32>> result;
result.reserve(marked_sections.size());
auto in_range_inclusive = [](u32 base, u32 limit, u32 test) -> bool
{
return (base <= test && limit >= test);
};
for (const auto &section : marked_sections)
{
result.push_back(section->get_protected_range());
}
for (const auto &excluded : sections_to_exclude)
{
const auto exclusion_range = excluded->get_protected_range();
const auto exclude_start = exclusion_range.first;
const auto exclude_end = exclusion_range.first + exclusion_range.second;
for (int n = 0; n < result.size(); ++n)
{
auto &this_range = result[n];
if (!this_range.second)
{
// Null
continue;
}
const auto range_start = this_range.first;
const auto range_end = this_range.second + range_start;
if (!region_overlaps(exclude_start, exclude_end, range_start, range_end))
{
// No overlap, skip
continue;
}
const auto head_excluded = in_range_inclusive(exclude_start, exclude_end, range_start); // This section has its start inside excluded range
const auto tail_excluded = in_range_inclusive(exclude_start, exclude_end, range_end); // This section has its end inside excluded range
if (head_excluded && tail_excluded)
{
// Cannot be salvaged, fully excluded
this_range = { 0, 0 };
}
else if (head_excluded)
{
// Head overlaps, truncate head
this_range.first = exclude_end;
}
else if (tail_excluded)
{
// Tail overlaps, truncate tail
this_range.second = exclude_start - range_start;
}
else
{
verify(HERE), (exclude_start > range_start && exclude_end < range_end);
// Section sits in the middle
this_range.second = exclude_start - range_start; // Head
result.push_back({ exclude_end, range_end - exclude_end }); // Tail
}
}
}
return result;
}
void unprotect_set(thrashed_set& data)
{
auto release_set = [this](std::vector<section_storage_type*>& _set)
{
for (auto& section : _set)
{
verify(HERE), section->is_flushed() || section->is_dirty();
section->unprotect();
m_cache[get_block_address(section->get_section_base())].remove_one();
}
};
auto discard_set = [this](std::vector<section_storage_type*>& _set)
{
for (auto& section : _set)
{
verify(HERE), section->is_flushed() || section->is_dirty();
const bool dirty = section->is_dirty();
section->discard();
section->set_dirty(dirty);
m_cache[get_block_address(section->get_section_base())].remove_one();
}
};
if (data.sections_to_exclude.empty())
{
release_set(data.sections_to_unprotect);
release_set(data.sections_to_flush);
}
else
{
auto removables = data.sections_to_unprotect;
if (!data.sections_to_flush.empty())
{
removables.resize(removables.size() + data.sections_to_flush.size());
std::copy(data.sections_to_flush.begin(), data.sections_to_flush.end(), removables.begin() + data.sections_to_unprotect.size());
}
const auto intersect_info = subtractive_intersect(removables, data.sections_to_exclude);
for (const auto &range : intersect_info)
{
if (range.second)
{
utils::memory_protect(vm::base(range.first), range.second, utils::protection::rw);
}
}
discard_set(data.sections_to_unprotect);
discard_set(data.sections_to_flush);
}
}
// Get intersecting set - Returns all objects intersecting a given range and their owning blocks
std::vector<std::pair<section_storage_type*, ranged_storage*>> get_intersecting_set(u32 address, u32 range)
{
std::vector<std::pair<section_storage_type*, ranged_storage*>> result;
@ -585,7 +712,6 @@ namespace rsx
if (trampled_range.first <= trampled_range.second &&
!(trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second))
{
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
@ -635,7 +761,7 @@ namespace rsx
//Invalidate range base implementation
template <typename ...Args>
thrashed_set invalidate_range_impl_base(u32 address, u32 range, bool is_writing, bool discard_only, bool rebuild_cache, bool allow_flush, Args&&... extras)
thrashed_set invalidate_range_impl_base(u32 address, u32 range, bool is_writing, bool discard_only, bool allow_flush, Args&&... extras)
{
if (!region_intersects_cache(address, range, is_writing))
return {};
@ -644,52 +770,52 @@ namespace rsx
if (trampled_set.size() > 0)
{
const auto mem_base = (address & ~4095u);
const auto mem_end = (address + range + 4095u) & ~4095u;
const auto mem_range = std::make_pair(mem_base, mem_end - mem_base);
update_cache_tag();
bool deferred_flush = false;
bool allow_rebuild = true;
thrashed_set result = {};
result.violation_handled = true;
if (!discard_only && !allow_flush)
if (!discard_only)
{
for (auto &obj : trampled_set)
{
if (obj.first->is_flushable())
{
deferred_flush = true;
if (obj.first->overlaps(mem_range, rsx::overlap_test_bounds::full_range))
{
// At least one section will introduce new data unconditionally
allow_rebuild = false;
break;
}
}
}
deferred_flush = !allow_rebuild && !allow_flush;
}
for (auto &obj : trampled_set)
{
if (!discard_only)
{
bool collateral = false;
if (!deferred_flush)
// NOTE: The memory test is page aligned to prevent continuously faulting in the page range
if (allow_rebuild && !obj.first->overlaps(mem_range, rsx::overlap_test_bounds::full_range))
{
if (!is_writing && obj.first->get_protection() != utils::protection::no)
{
collateral = true;
}
else
{
if (rebuild_cache && allow_flush && obj.first->is_flushable())
{
const std::pair<u32, u32> null_check = std::make_pair(UINT32_MAX, 0);
collateral = !std::get<0>(obj.first->overlaps_page(null_check, address, rsx::overlap_test_bounds::full_range));
}
}
}
if (collateral)
{
//False positive
// False positive
result.sections_to_exclude.push_back(obj.first);
continue;
}
else if (obj.first->is_flushable())
if (obj.first->is_flushable())
{
verify(HERE), !allow_rebuild;
//Write if and only if no one else has trashed section memory already
//TODO: Proper section management should prevent this from happening
//TODO: Blit engine section merge support and/or partial texture memory buffering
@ -702,8 +828,11 @@ namespace rsx
}
//Contents clobbered, destroy this
if (!obj.first->is_dirty())
{
obj.first->set_dirty(true);
m_unreleased_texture_objects++;
}
result.sections_to_unprotect.push_back(obj.first);
}
@ -738,35 +867,27 @@ namespace rsx
continue;
}
else if (deferred_flush)
else
{
//allow_flush = false and not synchronized
if (!obj.first->is_dirty())
{
obj.first->set_dirty(true);
m_unreleased_texture_objects++;
}
result.sections_to_unprotect.push_back(obj.first);
continue;
}
}
if (!obj.first->is_flushable())
{
obj.first->set_dirty(true);
m_unreleased_texture_objects++;
}
verify(HERE), discard_only;
m_unreleased_texture_objects++;
// Only unsynchronized (no-flush) sections should reach here, and only if the rendering thread is the caller
if (discard_only)
{
obj.first->discard();
obj.second->remove_one();
}
else
{
// Delay unprotect in case there are sections to flush
result.sections_to_unprotect.push_back(obj.first);
}
}
if (deferred_flush && result.sections_to_flush.size())
{
@ -778,13 +899,7 @@ namespace rsx
}
else
{
//Flushes happen in one go, now its time to remove protection
for (auto& section : result.sections_to_unprotect)
{
verify(HERE), section->is_flushed() || section->is_dirty();
section->unprotect();
m_cache[get_block_address(section->get_section_base())].remove_one();
}
unprotect_set(result);
}
//Everything has been handled
@ -1065,10 +1180,13 @@ namespace rsx
m_flush_always_cache[memory_address] = memory_size;
//Test for invalidated sections from surface cache occupying same address range
const auto &overlapped = find_texture_from_range(memory_address, memory_size);
// Test for invalidated sections from surface cache occupying same address range
const auto mem_base = (memory_address & ~4095u);
const auto mem_end = (memory_address + memory_size + 4095u) & ~4095u;
const auto &overlapped = find_texture_from_range(mem_base, mem_end - mem_base);
if (overlapped.size() > 1)
{
const auto mem_range = std::make_pair(memory_address, memory_size);
for (auto surface : overlapped)
{
if (surface == &region)
@ -1081,17 +1199,26 @@ namespace rsx
else
{
if (surface->get_section_base() != memory_address)
//HACK: preserve other overlapped sections despite overlap unless new section is superseding
//TODO: write memory to cell or redesign sections to preserve the data
// HACK: preserve other overlapped sections despite overlap unless new section is superseding
// TODO: write memory to cell or redesign sections to preserve the data
continue;
}
//Memory is shared with another surface
//Discard it - the backend should ensure memory contents are preserved if needed
// Memory is shared with another surface
// Discard it - the backend should ensure memory contents are preserved if needed
surface->set_dirty(true);
if (surface->is_locked())
{
if (surface->is_flushable() && surface->test_memory_head() && surface->test_memory_tail())
{
if (!surface->overlaps(mem_range, rsx::overlap_test_bounds::full_range))
{
// TODO: This section contains data that should be flushed
LOG_TODO(RSX, "Flushable section data may have been lost (0x%x)", surface->get_section_base());
}
}
surface->unprotect();
m_cache[get_block_address(surface->get_section_base())].remove_one();
}
@ -1219,7 +1346,7 @@ namespace rsx
return{};
writer_lock lock(m_cache_mutex);
return invalidate_range_impl_base(address, range, is_writing, false, true, allow_flush, std::forward<Args>(extras)...);
return invalidate_range_impl_base(address, range, is_writing, false, allow_flush, std::forward<Args>(extras)...);
}
template <typename ...Args>
@ -1230,7 +1357,7 @@ namespace rsx
return {};
writer_lock lock(m_cache_mutex);
return invalidate_range_impl_base(address, range, is_writing, discard, false, allow_flush, std::forward<Args>(extras)...);
return invalidate_range_impl_base(address, range, is_writing, discard, allow_flush, std::forward<Args>(extras)...);
}
template <typename ...Args>
@ -1267,28 +1394,12 @@ namespace rsx
}
//2. Release all obsolete sections
for (auto &tex : data.sections_to_unprotect)
{
if (tex->is_locked())
{
verify(HERE), tex->is_dirty();
tex->unprotect();
m_cache[get_block_address(tex->get_section_base())].remove_one();
}
}
//3. Release all flushed sections
for (auto &tex : data.sections_to_flush)
{
tex->unprotect();
m_cache[get_block_address(tex->get_section_base())].remove_one();
}
unprotect_set(data);
}
else
{
//The cache contents have changed between the two readings. This means the data held is useless
update_cache_tag();
invalidate_range_impl_base(data.address_base, data.address_range, true, false, true, true, std::forward<Args>(extras)...);
// The cache contents have changed between the two readings. This means the data held is useless
invalidate_range_impl_base(data.address_base, data.address_range, true, false, true, std::forward<Args>(extras)...);
}
return true;
@ -1930,7 +2041,7 @@ namespace rsx
}
//Invalidate with writing=false, discard=false, rebuild=false, native_flush=true
invalidate_range_impl_base(texaddr, tex_size, false, false, false, true, std::forward<Args>(extras)...);
invalidate_range_impl_base(texaddr, tex_size, false, false, true, std::forward<Args>(extras)...);
//NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB
m_texture_memory_in_use += (tex_pitch * tex_height);
@ -2073,8 +2184,8 @@ namespace rsx
const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
lock.upgrade();
invalidate_range_impl_base(src_address, memcpy_bytes_length, false, false, false, true, std::forward<Args>(extras)...);
invalidate_range_impl_base(dst_address, memcpy_bytes_length, true, false, false, true, std::forward<Args>(extras)...);
invalidate_range_impl_base(src_address, memcpy_bytes_length, false, false, true, std::forward<Args>(extras)...);
invalidate_range_impl_base(dst_address, memcpy_bytes_length, true, false, true, std::forward<Args>(extras)...);
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
}
@ -2208,7 +2319,7 @@ namespace rsx
{
lock.upgrade();
invalidate_range_impl_base(src_address, src.pitch * src.slice_h, false, false, false, true, std::forward<Args>(extras)...);
invalidate_range_impl_base(src_address, src.pitch * src.slice_h, false, false, true, std::forward<Args>(extras)...);
const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
std::vector<rsx_subresource_layout> subresource_layout;
@ -2304,7 +2415,7 @@ namespace rsx
cached_dest->set_dirty(true);
m_unreleased_texture_objects++;
invalidate_range_impl_base(cached_dest->get_section_base(), cached_dest->get_section_size(), true, false, false, true, std::forward<Args>(extras)...);
invalidate_range_impl_base(cached_dest->get_section_base(), cached_dest->get_section_size(), true, false, true, std::forward<Args>(extras)...);
dest_texture = 0;
cached_dest = nullptr;
@ -2312,7 +2423,7 @@ namespace rsx
else if (invalidate_dst_range)
{
lock.upgrade();
invalidate_range_impl_base(dst_address, dst.pitch * dst.height, true, false, false, true, std::forward<Args>(extras)...);
invalidate_range_impl_base(dst_address, dst.pitch * dst.height, true, false, true, std::forward<Args>(extras)...);
}
u32 gcm_format;

View File

@ -60,11 +60,6 @@ namespace rsx
bool locked = false;
bool dirty = false;
inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) const
{
return (base1 < limit2 && base2 < limit1);
}
inline void init_lockable_range(u32 base, u32 length)
{
locked_address_base = (base & ~4095);
@ -115,9 +110,9 @@ namespace rsx
init_lockable_range(cpu_address_base, cpu_address_range);
}
void protect(utils::protection prot)
void protect(utils::protection prot, bool force = false)
{
if (prot == protection) return;
if (prot == protection && !force) return;
verify(HERE), locked_address_range > 0;
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
@ -148,7 +143,6 @@ namespace rsx
const auto old_prot = protection;
const auto old_locked_base = locked_address_base;
const auto old_locked_length = locked_address_range;
protection = utils::protection::rw;
if (confirmed_range.second)
{
@ -164,7 +158,7 @@ namespace rsx
init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second);
}
protect(prot);
protect(prot, true);
}
void unprotect()
@ -358,6 +352,18 @@ namespace rsx
locked_memory_ptr.flush(offset, write_length);
}
std::pair<u32, u32> get_protected_range() const
{
if (locked)
{
return { locked_address_base, locked_address_range };
}
else
{
return { 0, 0 };
}
}
std::pair<u32, u32> get_confirmed_range() const
{
if (confirmed_range.second == 0)

View File

@ -733,6 +733,11 @@ namespace rsx
return g_current_renderer;
}
static inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
{
return (base1 < limit2 && base2 < limit1);
}
template <int N>
void unpack_bitset(std::bitset<N>& block, u64* values)
{