From f3d3a1a4a5d7d32f4fbcf2057d7272ea40beeb2b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 22 Aug 2018 16:59:15 +0300 Subject: [PATCH] rsx: Rework section reuse logic --- rpcs3/Emu/RSX/Common/texture_cache.h | 289 ++++++++++++++++++--------- rpcs3/Emu/RSX/rsx_cache.h | 24 ++- rpcs3/Emu/RSX/rsx_utils.h | 5 + 3 files changed, 220 insertions(+), 98 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index f3ef02884c..49d41f16b5 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -325,8 +325,9 @@ namespace rsx struct thrashed_set { bool violation_handled = false; - std::vector sections_to_flush; //Sections to be flushed - std::vector sections_to_unprotect; //These sections are to be unpotected and discarded by caller + std::vector sections_to_flush; // Sections to be flushed + std::vector sections_to_unprotect; // These sections are to be unpotected and discarded by caller + std::vector sections_to_exclude; // These sections are do be excluded from protection manipulation (subtracted from other sections) int num_flushable = 0; u64 cache_tag = 0; u32 address_base = 0; @@ -564,7 +565,133 @@ namespace rsx return true; } - //Get intersecting set - Returns all objects intersecting a given range and their owning blocks + std::vector> subtractive_intersect(std::vector marked_sections, std::vector sections_to_exclude) + { + std::vector> result; + result.reserve(marked_sections.size()); + + auto in_range_inclusive = [](u32 base, u32 limit, u32 test) -> bool + { + return (base <= test && limit >= test); + }; + + for (const auto §ion : marked_sections) + { + result.push_back(section->get_protected_range()); + } + + for (const auto &excluded : sections_to_exclude) + { + const auto exclusion_range = excluded->get_protected_range(); + const auto exclude_start = exclusion_range.first; + const auto exclude_end = exclusion_range.first + exclusion_range.second; + + for (int n = 0; n < result.size(); ++n) + { + auto &this_range = result[n]; + + if (!this_range.second) + { + // Null + continue; + } + + const auto range_start = this_range.first; + const auto range_end = this_range.second + range_start; + + if (!region_overlaps(exclude_start, exclude_end, range_start, range_end)) + { + // No overlap, skip + continue; + } + + const auto head_excluded = in_range_inclusive(exclude_start, exclude_end, range_start); // This section has its start inside excluded range + const auto tail_excluded = in_range_inclusive(exclude_start, exclude_end, range_end); // This section has its end inside excluded range + + if (head_excluded && tail_excluded) + { + // Cannot be salvaged, fully excluded + this_range = { 0, 0 }; + } + else if (head_excluded) + { + // Head overlaps, truncate head + this_range.first = exclude_end; + } + else if (tail_excluded) + { + // Tail overlaps, truncate tail + this_range.second = exclude_start - range_start; + } + else + { + verify(HERE), (exclude_start > range_start && exclude_end < range_end); + + // Section sits in the middle + this_range.second = exclude_start - range_start; // Head + result.push_back({ exclude_end, range_end - exclude_end }); // Tail + } + } + } + + return result; + } + + void unprotect_set(thrashed_set& data) + { + auto release_set = [this](std::vector& _set) + { + for (auto& section : _set) + { + verify(HERE), section->is_flushed() || section->is_dirty(); + + section->unprotect(); + m_cache[get_block_address(section->get_section_base())].remove_one(); + } + }; + + auto discard_set = [this](std::vector& _set) + { + for (auto& section : _set) + { + verify(HERE), section->is_flushed() || section->is_dirty(); + + const bool dirty = section->is_dirty(); + section->discard(); + section->set_dirty(dirty); + m_cache[get_block_address(section->get_section_base())].remove_one(); + } + }; + + if (data.sections_to_exclude.empty()) + { + release_set(data.sections_to_unprotect); + release_set(data.sections_to_flush); + } + else + { + auto removables = data.sections_to_unprotect; + if (!data.sections_to_flush.empty()) + { + removables.resize(removables.size() + data.sections_to_flush.size()); + std::copy(data.sections_to_flush.begin(), data.sections_to_flush.end(), removables.begin() + data.sections_to_unprotect.size()); + } + + const auto intersect_info = subtractive_intersect(removables, data.sections_to_exclude); + for (const auto &range : intersect_info) + { + if (range.second) + { + utils::memory_protect(vm::base(range.first), range.second, utils::protection::rw); + } + } + + discard_set(data.sections_to_unprotect); + discard_set(data.sections_to_flush); + } + } + + // Get intersecting set - Returns all objects intersecting a given range and their owning blocks std::vector> get_intersecting_set(u32 address, u32 range) { std::vector> result; @@ -585,7 +712,6 @@ namespace rsx if (trampled_range.first <= trampled_range.second && !(trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second)) { - for (int i = 0; i < range_data.data.size(); i++) { auto &tex = range_data.data[i]; @@ -635,7 +761,7 @@ namespace rsx //Invalidate range base implementation template - thrashed_set invalidate_range_impl_base(u32 address, u32 range, bool is_writing, bool discard_only, bool rebuild_cache, bool allow_flush, Args&&... extras) + thrashed_set invalidate_range_impl_base(u32 address, u32 range, bool is_writing, bool discard_only, bool allow_flush, Args&&... extras) { if (!region_intersects_cache(address, range, is_writing)) return {}; @@ -644,52 +770,52 @@ namespace rsx if (trampled_set.size() > 0) { + const auto mem_base = (address & ~4095u); + const auto mem_end = (address + range + 4095u) & ~4095u; + const auto mem_range = std::make_pair(mem_base, mem_end - mem_base); + update_cache_tag(); + bool deferred_flush = false; + bool allow_rebuild = true; thrashed_set result = {}; result.violation_handled = true; - if (!discard_only && !allow_flush) + if (!discard_only) { for (auto &obj : trampled_set) { if (obj.first->is_flushable()) { - deferred_flush = true; - break; + if (obj.first->overlaps(mem_range, rsx::overlap_test_bounds::full_range)) + { + // At least one section will introduce new data unconditionally + allow_rebuild = false; + break; + } } } + + deferred_flush = !allow_rebuild && !allow_flush; } for (auto &obj : trampled_set) { if (!discard_only) { - bool collateral = false; - if (!deferred_flush) + // NOTE: The memory test is page aligned to prevent continuously faulting in the page range + if (allow_rebuild && !obj.first->overlaps(mem_range, rsx::overlap_test_bounds::full_range)) { - if (!is_writing && obj.first->get_protection() != utils::protection::no) - { - collateral = true; - } - else - { - if (rebuild_cache && allow_flush && obj.first->is_flushable()) - { - const std::pair null_check = std::make_pair(UINT32_MAX, 0); - collateral = !std::get<0>(obj.first->overlaps_page(null_check, address, rsx::overlap_test_bounds::full_range)); - } - } - } - - if (collateral) - { - //False positive + // False positive + result.sections_to_exclude.push_back(obj.first); continue; } - else if (obj.first->is_flushable()) + + if (obj.first->is_flushable()) { + verify(HERE), !allow_rebuild; + //Write if and only if no one else has trashed section memory already //TODO: Proper section management should prevent this from happening //TODO: Blit engine section merge support and/or partial texture memory buffering @@ -702,8 +828,11 @@ namespace rsx } //Contents clobbered, destroy this - obj.first->set_dirty(true); - m_unreleased_texture_objects++; + if (!obj.first->is_dirty()) + { + obj.first->set_dirty(true); + m_unreleased_texture_objects++; + } result.sections_to_unprotect.push_back(obj.first); } @@ -738,34 +867,26 @@ namespace rsx continue; } - else if (deferred_flush) + else { //allow_flush = false and not synchronized - obj.first->set_dirty(true); - m_unreleased_texture_objects++; + if (!obj.first->is_dirty()) + { + obj.first->set_dirty(true); + m_unreleased_texture_objects++; + } result.sections_to_unprotect.push_back(obj.first); continue; } } - if (!obj.first->is_flushable()) - { - obj.first->set_dirty(true); - m_unreleased_texture_objects++; - } + verify(HERE), discard_only; - // Only unsynchronized (no-flush) sections should reach here, and only if the rendering thread is the caller - if (discard_only) - { - obj.first->discard(); - obj.second->remove_one(); - } - else - { - // Delay unprotect in case there are sections to flush - result.sections_to_unprotect.push_back(obj.first); - } + m_unreleased_texture_objects++; + + obj.first->discard(); + obj.second->remove_one(); } if (deferred_flush && result.sections_to_flush.size()) @@ -778,13 +899,7 @@ namespace rsx } else { - //Flushes happen in one go, now its time to remove protection - for (auto& section : result.sections_to_unprotect) - { - verify(HERE), section->is_flushed() || section->is_dirty(); - section->unprotect(); - m_cache[get_block_address(section->get_section_base())].remove_one(); - } + unprotect_set(result); } //Everything has been handled @@ -1065,10 +1180,13 @@ namespace rsx m_flush_always_cache[memory_address] = memory_size; - //Test for invalidated sections from surface cache occupying same address range - const auto &overlapped = find_texture_from_range(memory_address, memory_size); + // Test for invalidated sections from surface cache occupying same address range + const auto mem_base = (memory_address & ~4095u); + const auto mem_end = (memory_address + memory_size + 4095u) & ~4095u; + const auto &overlapped = find_texture_from_range(mem_base, mem_end - mem_base); if (overlapped.size() > 1) { + const auto mem_range = std::make_pair(memory_address, memory_size); for (auto surface : overlapped) { if (surface == ®ion) @@ -1081,17 +1199,26 @@ namespace rsx else { if (surface->get_section_base() != memory_address) - //HACK: preserve other overlapped sections despite overlap unless new section is superseding - //TODO: write memory to cell or redesign sections to preserve the data + // HACK: preserve other overlapped sections despite overlap unless new section is superseding + // TODO: write memory to cell or redesign sections to preserve the data continue; } - //Memory is shared with another surface - //Discard it - the backend should ensure memory contents are preserved if needed + // Memory is shared with another surface + // Discard it - the backend should ensure memory contents are preserved if needed surface->set_dirty(true); if (surface->is_locked()) { + if (surface->is_flushable() && surface->test_memory_head() && surface->test_memory_tail()) + { + if (!surface->overlaps(mem_range, rsx::overlap_test_bounds::full_range)) + { + // TODO: This section contains data that should be flushed + LOG_TODO(RSX, "Flushable section data may have been lost (0x%x)", surface->get_section_base()); + } + } + surface->unprotect(); m_cache[get_block_address(surface->get_section_base())].remove_one(); } @@ -1219,7 +1346,7 @@ namespace rsx return{}; writer_lock lock(m_cache_mutex); - return invalidate_range_impl_base(address, range, is_writing, false, true, allow_flush, std::forward(extras)...); + return invalidate_range_impl_base(address, range, is_writing, false, allow_flush, std::forward(extras)...); } template @@ -1230,7 +1357,7 @@ namespace rsx return {}; writer_lock lock(m_cache_mutex); - return invalidate_range_impl_base(address, range, is_writing, discard, false, allow_flush, std::forward(extras)...); + return invalidate_range_impl_base(address, range, is_writing, discard, allow_flush, std::forward(extras)...); } template @@ -1267,28 +1394,12 @@ namespace rsx } //2. Release all obsolete sections - for (auto &tex : data.sections_to_unprotect) - { - if (tex->is_locked()) - { - verify(HERE), tex->is_dirty(); - tex->unprotect(); - m_cache[get_block_address(tex->get_section_base())].remove_one(); - } - } - - //3. Release all flushed sections - for (auto &tex : data.sections_to_flush) - { - tex->unprotect(); - m_cache[get_block_address(tex->get_section_base())].remove_one(); - } + unprotect_set(data); } else { - //The cache contents have changed between the two readings. This means the data held is useless - update_cache_tag(); - invalidate_range_impl_base(data.address_base, data.address_range, true, false, true, true, std::forward(extras)...); + // The cache contents have changed between the two readings. This means the data held is useless + invalidate_range_impl_base(data.address_base, data.address_range, true, false, true, std::forward(extras)...); } return true; @@ -1930,7 +2041,7 @@ namespace rsx } //Invalidate with writing=false, discard=false, rebuild=false, native_flush=true - invalidate_range_impl_base(texaddr, tex_size, false, false, false, true, std::forward(extras)...); + invalidate_range_impl_base(texaddr, tex_size, false, false, true, std::forward(extras)...); //NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB m_texture_memory_in_use += (tex_pitch * tex_height); @@ -2073,8 +2184,8 @@ namespace rsx const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height; lock.upgrade(); - invalidate_range_impl_base(src_address, memcpy_bytes_length, false, false, false, true, std::forward(extras)...); - invalidate_range_impl_base(dst_address, memcpy_bytes_length, true, false, false, true, std::forward(extras)...); + invalidate_range_impl_base(src_address, memcpy_bytes_length, false, false, true, std::forward(extras)...); + invalidate_range_impl_base(dst_address, memcpy_bytes_length, true, false, true, std::forward(extras)...); memcpy(dst.pixels, src.pixels, memcpy_bytes_length); return true; } @@ -2208,7 +2319,7 @@ namespace rsx { lock.upgrade(); - invalidate_range_impl_base(src_address, src.pitch * src.slice_h, false, false, false, true, std::forward(extras)...); + invalidate_range_impl_base(src_address, src.pitch * src.slice_h, false, false, true, std::forward(extras)...); const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1; std::vector subresource_layout; @@ -2304,7 +2415,7 @@ namespace rsx cached_dest->set_dirty(true); m_unreleased_texture_objects++; - invalidate_range_impl_base(cached_dest->get_section_base(), cached_dest->get_section_size(), true, false, false, true, std::forward(extras)...); + invalidate_range_impl_base(cached_dest->get_section_base(), cached_dest->get_section_size(), true, false, true, std::forward(extras)...); dest_texture = 0; cached_dest = nullptr; @@ -2312,7 +2423,7 @@ namespace rsx else if (invalidate_dst_range) { lock.upgrade(); - invalidate_range_impl_base(dst_address, dst.pitch * dst.height, true, false, false, true, std::forward(extras)...); + invalidate_range_impl_base(dst_address, dst.pitch * dst.height, true, false, true, std::forward(extras)...); } u32 gcm_format; diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 9838b36784..cc329f4049 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -60,11 +60,6 @@ namespace rsx bool locked = false; bool dirty = false; - inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) const - { - return (base1 < limit2 && base2 < limit1); - } - inline void init_lockable_range(u32 base, u32 length) { locked_address_base = (base & ~4095); @@ -115,9 +110,9 @@ namespace rsx init_lockable_range(cpu_address_base, cpu_address_range); } - void protect(utils::protection prot) + void protect(utils::protection prot, bool force = false) { - if (prot == protection) return; + if (prot == protection && !force) return; verify(HERE), locked_address_range > 0; utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot); @@ -148,7 +143,6 @@ namespace rsx const auto old_prot = protection; const auto old_locked_base = locked_address_base; const auto old_locked_length = locked_address_range; - protection = utils::protection::rw; if (confirmed_range.second) { @@ -164,7 +158,7 @@ namespace rsx init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second); } - protect(prot); + protect(prot, true); } void unprotect() @@ -358,6 +352,18 @@ namespace rsx locked_memory_ptr.flush(offset, write_length); } + std::pair get_protected_range() const + { + if (locked) + { + return { locked_address_base, locked_address_range }; + } + else + { + return { 0, 0 }; + } + } + std::pair get_confirmed_range() const { if (confirmed_range.second == 0) diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 174e760d38..a6981ec7e3 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -733,6 +733,11 @@ namespace rsx return g_current_renderer; } + static inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) + { + return (base1 < limit2 && base2 < limit1); + } + template void unpack_bitset(std::bitset& block, u64* values) {