diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 86032973ab..73d9fe2e82 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2,6 +2,7 @@ #include "../rsx_cache.h" #include "../rsx_utils.h" +#include "texture_cache_utils.h" #include "TextureUtils.h" #include @@ -10,330 +11,25 @@ extern u64 get_system_time(); namespace rsx { - enum texture_create_flags - { - default_component_order = 0, - native_component_order = 1, - swapped_native_component_order = 2, - }; - - enum memory_read_flags - { - flush_always = 0, - flush_once = 1 - }; - - struct typeless_xfer - { - bool src_is_typeless = false; - bool dst_is_typeless = false; - bool src_is_depth = false; - bool dst_is_depth = false; - u32 src_gcm_format = 0; - u32 dst_gcm_format = 0; - f32 src_scaling_hint = 1.f; - f32 dst_scaling_hint = 1.f; - texture_upload_context src_context = texture_upload_context::blit_engine_src; - texture_upload_context dst_context = texture_upload_context::blit_engine_dst; - - void analyse() - { - if (src_is_typeless && dst_is_typeless) - { - if (src_scaling_hint == dst_scaling_hint && - src_scaling_hint != 1.f) - { - if (src_is_depth == dst_is_depth) - { - src_is_typeless = dst_is_typeless = false; - src_scaling_hint = dst_scaling_hint = 1.f; - } - } - } - } - }; - - struct cached_texture_section : public rsx::buffered_section - { - u16 width; - u16 height; - u16 depth; - u16 mipmaps; - - u16 real_pitch; - u16 rsx_pitch; - - u32 gcm_format = 0; - bool pack_unpack_swap_bytes = false; - - u64 sync_timestamp = 0; - bool synchronized = false; - bool flushed = false; - - u32 num_writes = 0; - std::deque read_history; - - u64 cache_tag = 0; - u64 last_write_tag = 0; - - memory_read_flags readback_behaviour = memory_read_flags::flush_once; - rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order; - rsx::texture_upload_context context = rsx::texture_upload_context::shader_read; - rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d; - - void reset(u32 rsx_address, u32 rsx_size) - { - rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative; - rsx::buffered_section::reset(rsx_address, rsx_size, policy); - - flushed = false; - synchronized = false; - - sync_timestamp = 0ull; - last_write_tag = 0ull; - cache_tag = 0ull; - - // TODO: Fix write tracking and reset stats - } - - bool matches(u32 rsx_address, u32 rsx_size) - { - return rsx::buffered_section::matches(rsx_address, rsx_size); - } - - bool matches(u32 rsx_address, u32 width, u32 height, u32 depth, u32 mipmaps) - { - if (rsx_address == cpu_address_base) - { - if (!width && !height && !mipmaps) - return true; - - if (width && width != this->width) - return false; - - if (height && height != this->height) - return false; - - if (depth && depth != this->depth) - return false; - - if (mipmaps && mipmaps > this->mipmaps) - return false; - - return true; - } - - return false; - } - - void touch(u64 tag) - { - num_writes++; - last_write_tag = tag; - } - - void reset_write_statistics() - { - if (read_history.size() == 16) - { - read_history.pop_back(); - } - - read_history.push_front(num_writes); - num_writes = 0; - } - - void set_view_flags(rsx::texture_create_flags flags) - { - view_flags = flags; - } - - void set_context(rsx::texture_upload_context upload_context) - { - context = upload_context; - } - - void set_image_type(rsx::texture_dimension_extended type) - { - image_type = type; - } - - void set_gcm_format(u32 format) - { - gcm_format = format; - } - - void set_memory_read_flags(memory_read_flags flags) - { - readback_behaviour = flags; - } - - u16 get_width() const - { - return width; - } - - u16 get_height() const - { - return height; - } - - rsx::texture_create_flags get_view_flags() const - { - return view_flags; - } - - rsx::texture_upload_context get_context() const - { - return context; - } - - rsx::texture_dimension_extended get_image_type() const - { - return image_type; - } - - u32 get_gcm_format() const - { - return gcm_format; - } - - memory_read_flags get_memory_read_flags() const - { - return readback_behaviour; - } - - bool writes_likely_completed() const - { - // TODO: Move this to the miss statistics block - const auto num_records = read_history.size(); - - if (num_records == 0) - { - return false; - } - else if (num_records == 1) - { - return num_writes >= read_history.front(); - } - else - { - const u32 last = read_history.front(); - const u32 prev_last = read_history[1]; - - if (last == prev_last && num_records <= 3) - { - return num_writes >= last; - } - - u32 compare = UINT32_MAX; - for (u32 n = 1; n < num_records; n++) - { - if (read_history[n] == last) - { - // Uncertain, but possible - compare = read_history[n - 1]; - - if (num_records > (n + 1)) - { - if (read_history[n + 1] == prev_last) - { - // Confirmed with 2 values - break; - } - } - } - } - - return num_writes >= compare; - } - } - - void reprotect(utils::protection prot, const std::pair& range) - { - //Reset properties and protect again - flushed = false; - synchronized = false; - sync_timestamp = 0ull; - - protect(prot, range); - } - - void reprotect(utils::protection prot) - { - //Reset properties and protect again - flushed = false; - synchronized = false; - sync_timestamp = 0ull; - - protect(prot); - } - - u64 get_sync_timestamp() const - { - return sync_timestamp; - } - }; - template - class texture_cache + class texture_cache : public rsx::texture_cache_base { + static_assert(std::is_base_of, section_storage_type>::value, "section_storage_type must derive from rsx::cached_texture_section"); + + public: + using baseclass = typename rsx::texture_cache_base; + using ranged_storage = typename rsx::ranged_storage; + using ranged_storage_block = typename ranged_storage::block_type; + private: - - struct ranged_storage - { - std::vector data; //Stored data - std::atomic_int valid_count = { 0 }; //Number of usable (non-dirty) blocks - u32 max_range = 0; //Largest stored block - u32 max_addr = 0; - u32 min_addr = UINT32_MAX; - - void notify(u32 addr, u32 data_size) - { - //verify(HERE), valid_count >= 0; - - const u32 addr_base = addr & ~0xfff; - const u32 block_sz = align(addr + data_size, 4096u) - addr_base; - - max_range = std::max(max_range, block_sz); - max_addr = std::max(max_addr, addr); - min_addr = std::min(min_addr, addr_base); - valid_count++; - } - - void notify() - { - //verify(HERE), valid_count >= 0; - valid_count++; - } - - void add(section_storage_type& section, u32 addr, u32 data_size) - { - data.push_back(std::move(section)); - notify(addr, data_size); - } - - void remove_one() - { - //verify(HERE), valid_count > 0; - valid_count--; - } - - bool overlaps(u32 addr, u32 range) const - { - const u32 limit = addr + range; - if (limit <= min_addr) return false; - - const u32 this_limit = max_addr + max_range; - return (this_limit > addr); - } - }; + /** + * Helper structs/enums + */ // Keep track of cache misses to pre-emptively flush some addresses struct framebuffer_memory_characteristics { u32 misses; - u32 block_size; texture_format format; }; @@ -342,13 +38,86 @@ namespace rsx struct thrashed_set { bool violation_handled = false; + bool flushed = false; + invalidation_cause cause; std::vector sections_to_flush; // Sections to be flushed std::vector sections_to_unprotect; // These sections are to be unpotected and discarded by caller std::vector sections_to_exclude; // These sections are do be excluded from protection manipulation (subtracted from other sections) - int num_flushable = 0; + u32 num_flushable = 0; u64 cache_tag = 0; - u32 address_base = 0; - u32 address_range = 0; + address_range fault_range; + address_range invalidate_range; + + void clear_sections() + { + sections_to_flush = {}; + sections_to_unprotect = {}; + sections_to_exclude = {}; + num_flushable = 0; + } + + bool empty() const + { + return sections_to_flush.empty() && sections_to_unprotect.empty() && sections_to_exclude.empty(); + } + + bool is_flushed() const + { + return flushed || sections_to_flush.empty(); + } + +#ifdef TEXTURE_CACHE_DEBUG + void check_pre_sanity() const + { + size_t flush_and_unprotect_count = sections_to_flush.size() + sections_to_unprotect.size(); + size_t exclude_count = sections_to_exclude.size(); + + //------------------------- + // It is illegal to have only exclusions except when reading from a range with only RO sections + ASSERT(flush_and_unprotect_count > 0 || exclude_count == 0 || !cause.is_write()); + if (flush_and_unprotect_count == 0 && exclude_count > 0) + { + // double-check that only RO sections exists + for (auto *tex : sections_to_exclude) + ASSERT(tex->get_protection() == utils::protection::ro); + } + + //------------------------- + // Check that the number of sections we "found" matches the sections known to be in the fault range + const auto min_overlap_fault_no_ro = tex_cache_checker.get_minimum_number_of_sections(fault_range); + const auto min_overlap_invalidate_no_ro = tex_cache_checker.get_minimum_number_of_sections(invalidate_range); + + const u16 min_overlap_fault = min_overlap_fault_no_ro.first + (cause.is_write() ? min_overlap_fault_no_ro.second : 0); + const u16 min_overlap_invalidate = min_overlap_invalidate_no_ro.first + (cause.is_write() ? min_overlap_invalidate_no_ro.second : 0); + AUDIT(min_overlap_fault <= min_overlap_invalidate); + + const u16 min_flush_or_unprotect = min_overlap_fault; + + // we must flush or unprotect *all* sections that partially overlap the fault range + ASSERT(flush_and_unprotect_count >= min_flush_or_unprotect); + + // result must contain *all* sections that overlap (completely or partially) the invalidation range + ASSERT(flush_and_unprotect_count + exclude_count >= min_overlap_invalidate); + } + + void check_post_sanity() const + { + AUDIT(is_flushed()); + + // Check that the number of sections we "found" matches the sections known to be in the fault range + tex_cache_checker.check_unprotected(fault_range, cause.is_read() && invalidation_keep_ro_during_read, true); + + // Check that the cache has the correct protections + tex_cache_checker.verify(); + } +#endif // TEXTURE_CACHE_DEBUG + }; + + struct intersecting_set + { + std::vector sections = {}; + address_range invalidate_range = {}; + bool has_flushables = false; }; struct copy_region_descriptor @@ -406,6 +175,11 @@ namespace rsx blit_op_result(bool success) : succeeded(success) {} + + inline address_range to_address_range() const + { + return address_range::start_length(real_dst_address, real_dst_size); + } }; struct sampled_image_descriptor : public sampled_image_descriptor_base @@ -452,18 +226,23 @@ namespace rsx } }; + protected: + /** + * Variable declarations + */ + shared_mutex m_cache_mutex; - std::unordered_map m_cache; + ranged_storage m_storage; std::unordered_multimap> m_temporary_subresource_cache; std::atomic m_cache_update_tag = {0}; - std::pair read_only_range = std::make_pair(0xFFFFFFFF, 0); - std::pair no_access_range = std::make_pair(0xFFFFFFFF, 0); + address_range read_only_range; + address_range no_access_range; - std::unordered_map m_cache_miss_statistics_table; + std::unordered_map m_cache_miss_statistics_table; //Map of messages to only emit once std::unordered_set m_once_only_messages_set; @@ -472,13 +251,11 @@ namespace rsx bool read_only_tex_invalidate = false; //Store of all objects in a flush_always state. A lazy readback is attempted every draw call - std::unordered_map m_flush_always_cache; + std::unordered_map m_flush_always_cache; u64 m_flush_always_update_timestamp = 0; //Memory usage - const s32 m_max_zombie_objects = 64; //Limit on how many texture objects to keep around for reuse after they are invalidated - std::atomic m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory - std::atomic m_texture_memory_in_use = { 0 }; + const u32 m_max_zombie_objects = 64; //Limit on how many texture objects to keep around for reuse after they are invalidated //Other statistics const u32 m_cache_miss_threshold = 8; // How many times an address can miss speculative writing before it is considered high priority @@ -487,11 +264,17 @@ namespace rsx std::atomic m_num_cache_speculative_writes = { 0 }; std::atomic m_num_cache_mispredictions = { 0 }; - /* Helpers */ - virtual void free_texture_section(section_storage_type&) = 0; + // Invalidation + static const bool invalidation_ignore_unsynchronized = true; // If true, unsynchronized sections don't get forcefully flushed unless they overlap the fault range + static const bool invalidation_keep_ro_during_read = true; // If true, RO sections are not invalidated during read faults + + + /** + * Virtual Methods + */ virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_resource_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) = 0; virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_storage_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) = 0; - virtual section_storage_type* create_new_texture(commandbuffer_type&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, + virtual section_storage_type* create_new_texture(commandbuffer_type&, const address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, texture_create_flags flags) = 0; virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) = 0; @@ -503,9 +286,10 @@ namespace rsx virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0; virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0; - constexpr u32 get_block_size() const { return 0x1000000; } - inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); } + /** + * Helpers + */ inline void update_cache_tag() { m_cache_update_tag++; @@ -536,135 +320,50 @@ namespace rsx logs::RSX.warning(fmt, params...); } - private: - //Internal implementation methods and helpers + /** + * Internal implementation methods and helpers + */ - std::pair get_memory_protection(u32 address) + inline bool region_intersects_cache(const address_range &test_range, bool is_writing) { - auto found = m_cache.find(get_block_address(address)); - if (found != m_cache.end()) - { - for (auto &tex : found->second.data) - { - if (tex.is_locked() && tex.overlaps(address, rsx::overlap_test_bounds::protected_range)) - return{ tex.get_protection(), &tex }; - } - } + AUDIT( test_range.valid() ); - //Get the preceding block and check if any hits are found - found = m_cache.find(get_block_address(address) - get_block_size()); - if (found != m_cache.end()) - { - for (auto &tex : found->second.data) - { - if (tex.is_locked() && tex.overlaps(address, rsx::overlap_test_bounds::protected_range)) - return{ tex.get_protection(), &tex }; - } - } - - return{ utils::protection::rw, nullptr }; - } - - inline bool region_intersects_cache(u32 address, u32 range, bool is_writing) const - { - std::pair test_range = std::make_pair(address, address + range); + // Quick range overlaps with cache tests if (!is_writing) { - if (no_access_range.first > no_access_range.second || - test_range.second < no_access_range.first || - test_range.first > no_access_range.second) + if (!no_access_range.valid() || !test_range.overlaps(no_access_range)) return false; } else { - if (test_range.second < read_only_range.first || - test_range.first > read_only_range.second) + if (!read_only_range.valid() || !test_range.overlaps(read_only_range)) { //Doesnt fall in the read_only textures range; check render targets - if (test_range.second < no_access_range.first || - test_range.first > no_access_range.second) + if (!no_access_range.valid() || !test_range.overlaps(no_access_range)) return false; } } + // Check that there is at least one valid (locked) section in the test_range + reader_lock lock(m_cache_mutex); + if (m_storage.range_begin(test_range, locked_range, true) == m_storage.range_end()) + return false; + + // We do intersect the cache return true; } - std::vector> subtractive_intersect(std::vector marked_sections, std::vector sections_to_exclude) - { - std::vector> result; - result.reserve(marked_sections.size()); - auto in_range_inclusive = [](u32 base, u32 limit, u32 test) -> bool - { - return (base <= test && limit >= test); - }; - - for (const auto §ion : marked_sections) - { - result.push_back(section->get_protected_range()); - } - - for (const auto &excluded : sections_to_exclude) - { - const auto exclusion_range = excluded->get_protected_range(); - const auto exclude_start = exclusion_range.first; - const auto exclude_end = exclusion_range.first + exclusion_range.second; - - for (int n = 0; n < result.size(); ++n) - { - auto &this_range = result[n]; - - if (!this_range.second) - { - // Null - continue; - } - - const auto range_start = this_range.first; - const auto range_end = this_range.second + range_start; - - if (!region_overlaps(exclude_start, exclude_end, range_start, range_end)) - { - // No overlap, skip - continue; - } - - const auto head_excluded = in_range_inclusive(exclude_start, exclude_end, range_start); // This section has its start inside excluded range - const auto tail_excluded = in_range_inclusive(exclude_start, exclude_end, range_end); // This section has its end inside excluded range - - if (head_excluded && tail_excluded) - { - // Cannot be salvaged, fully excluded - this_range = { 0, 0 }; - } - else if (head_excluded) - { - // Head overlaps, truncate head - this_range.first = exclude_end; - } - else if (tail_excluded) - { - // Tail overlaps, truncate tail - this_range.second = exclude_start - range_start; - } - else - { - verify(HERE), (exclude_start > range_start && exclude_end < range_end); - - // Section sits in the middle - this_range.second = exclude_start - range_start; // Head - result.push_back({ exclude_end, range_end - exclude_end }); // Tail - } - } - } - - return result; - } + /** + * Section invalidation + */ + private: template void flush_set(thrashed_set& data, Args&&... extras) { + AUDIT(!data.flushed); + if (data.sections_to_flush.size() > 1) { // Sort with oldest data first @@ -697,268 +396,450 @@ namespace rsx } m_num_flush_requests++; - data.sections_to_unprotect.push_back(surface); } - data.sections_to_flush.clear(); + data.flushed = true; } + + // Merges the protected ranges of the sections in "sections" into "result" + void merge_protected_ranges(address_range_vector &result, const std::vector §ions) + { + result.reserve(result.size() + sections.size()); + + // Copy ranges to result, merging them if possible + for (const auto §ion : sections) + { + const auto &new_range = section->get_locked_range(); + AUDIT( new_range.is_page_range() ); + + result.merge(new_range); + } + } + + // NOTE: It is *very* important that data contains exclusions for *all* sections that overlap sections_to_unprotect/flush + // Otherwise the page protections will end up incorrect and things will break! void unprotect_set(thrashed_set& data) { - auto release_set = [this](std::vector& _set) + auto protect_ranges = [this](address_range_vector& _set, utils::protection _prot) { - for (auto& section : _set) + u32 count = 0; + for (auto &range : _set) { - verify(HERE), section->is_flushed() || section->is_dirty(); - - section->unprotect(); - m_cache[get_block_address(section->get_section_base())].remove_one(); + if (range.valid()) + { + rsx::memory_protect(range, _prot); + count++; + } } + //LOG_ERROR(RSX, "Set protection of %d blocks to 0x%x", count, static_cast(prot)); }; auto discard_set = [this](std::vector& _set) { - for (auto& section : _set) + for (auto* section : _set) { verify(HERE), section->is_flushed() || section->is_dirty(); - const bool dirty = section->is_dirty(); - section->discard(); - section->set_dirty(dirty); - m_cache[get_block_address(section->get_section_base())].remove_one(); + section->discard(/*set_dirty*/ false); } }; - if (data.sections_to_exclude.empty()) - { - release_set(data.sections_to_unprotect); - release_set(data.sections_to_flush); - } - else - { - auto removables = data.sections_to_unprotect; - if (!data.sections_to_flush.empty()) - { - removables.resize(removables.size() + data.sections_to_flush.size()); - std::copy(data.sections_to_flush.begin(), data.sections_to_flush.end(), removables.begin() + data.sections_to_unprotect.size()); - } + // Sanity checks + AUDIT(data.fault_range.is_page_range()); + AUDIT(data.invalidate_range.is_page_range()); + AUDIT(data.is_flushed()); - const auto intersect_info = subtractive_intersect(removables, data.sections_to_exclude); - for (const auto &range : intersect_info) + // Merge ranges to unprotect + address_range_vector ranges_to_unprotect; + address_range_vector ranges_to_protect_ro; + ranges_to_unprotect.reserve(data.sections_to_unprotect.size() + data.sections_to_flush.size() + data.sections_to_exclude.size()); + + merge_protected_ranges(ranges_to_unprotect, data.sections_to_unprotect); + merge_protected_ranges(ranges_to_unprotect, data.sections_to_flush); + AUDIT(!ranges_to_unprotect.empty()); + + // Apply exclusions and collect ranges of excluded pages that need to be reprotected RO (i.e. only overlap RO regions) + if (!data.sections_to_exclude.empty()) + { + ranges_to_protect_ro.reserve(data.sections_to_exclude.size()); + + u32 no_access_count = 0; + for (const auto &excluded : data.sections_to_exclude) { - if (range.second) + address_range exclusion_range = excluded->get_locked_range(); + + // We need to make sure that the exclusion range is *inside* invalidate range + exclusion_range.intersect(data.invalidate_range); + + // Sanity checks + AUDIT( exclusion_range.is_page_range() ); + AUDIT(!exclusion_range.overlaps(data.fault_range)); + + // Apply exclusion + ranges_to_unprotect.exclude(exclusion_range); + + // Keep track of RO exclusions + // TODO ruipin: Bug here, we cannot add the whole exclusion range to ranges_to_reprotect, only the part inside invalidate_range + utils::protection prot = excluded->get_protection(); + if (prot == utils::protection::ro) { - utils::memory_protect(vm::base(range.first), range.second, utils::protection::rw); + ranges_to_protect_ro.merge(exclusion_range); + } + else if (prot == utils::protection::no) + { + no_access_count++; + } + else + { + fmt::throw_exception("Unreachable" HERE); } } - discard_set(data.sections_to_unprotect); - discard_set(data.sections_to_flush); - } - } - - // Get intersecting set - Returns all objects intersecting a given range and their owning blocks - std::vector> get_intersecting_set(u32 address, u32 range) - { - std::vector> result; - u32 last_dirty_block = UINT32_MAX; - bool repeat_loop = false; - const u64 cache_tag = get_system_time(); - - std::pair trampled_range = std::make_pair(address, address + range); - const bool strict_range_check = g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer; - - auto It = m_cache.begin(); - while (It != m_cache.end()) - { - const u32 base = It->first; - auto &range_data = It->second; - - // Ignore invalid or empty sets - if (trampled_range.first <= trampled_range.second && - !(trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second)) + // Exclude NA ranges from ranges_to_reprotect_ro + if (no_access_count > 0 && !ranges_to_protect_ro.empty()) { - for (int i = 0; i < range_data.data.size(); i++) + for (auto &exclusion : data.sections_to_exclude) { - auto &tex = range_data.data[i]; - if (tex.cache_tag == cache_tag) continue; //already processed - if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better - - const auto bounds_test = (strict_range_check || tex.get_context() == rsx::texture_upload_context::blit_engine_dst) ? - rsx::overlap_test_bounds::full_range : - rsx::overlap_test_bounds::protected_range; - - auto overlapped = tex.overlaps_page(trampled_range, address, bounds_test); - if (std::get<0>(overlapped)) + if (exclusion->get_protection() != utils::protection::ro) { - auto &new_range = std::get<1>(overlapped); - - if (new_range.first != trampled_range.first || - new_range.second != trampled_range.second) - { - trampled_range = new_range; - repeat_loop = true; // we will need to repeat the loop again - last_dirty_block = base; // stop the repeat loop once we finish this block - } - - tex.cache_tag = cache_tag; - result.push_back({ &tex, &range_data }); + ranges_to_protect_ro.exclude(exclusion->get_locked_range()); } } } + } + AUDIT( !ranges_to_unprotect.empty() ); + + // Exclude the fault range if told to do so (this means the fault_range got unmapped or is otherwise invalid) + if (data.cause.exclude_fault_range()) + { + ranges_to_unprotect.exclude(data.fault_range); + ranges_to_protect_ro.exclude(data.fault_range); + + AUDIT(!ranges_to_unprotect.overlaps(data.fault_range)); + AUDIT(!ranges_to_protect_ro.overlaps(data.fault_range)); + } + else + { + AUDIT( ranges_to_unprotect.inside(data.invalidate_range) ); + AUDIT( ranges_to_protect_ro.inside(data.invalidate_range) ); + } + AUDIT( !ranges_to_protect_ro.overlaps(ranges_to_unprotect) ); + + // Unprotect and discard + protect_ranges(ranges_to_unprotect, utils::protection::rw); + protect_ranges(ranges_to_protect_ro, utils::protection::ro); + discard_set(data.sections_to_unprotect); + discard_set(data.sections_to_flush); + +#ifdef TEXTURE_CACHE_DEBUG + // Check that the cache looks sane + data.check_post_sanity(); +#endif // TEXTURE_CACHE_DEBUG + } + + // Return a set containing all sections that should be flushed/unprotected/reprotected + std::atomic m_last_section_cache_tag = 0; + intersecting_set get_intersecting_set(const address_range &fault_range, bool is_writing) + { + (void)is_writing; // silence unused formal parameter warning; used only for debug purposes if TEXTURE_CACHE_DEBUG is defined + + AUDIT( fault_range.is_page_range() ); + + const u64 cache_tag = ++m_last_section_cache_tag; + + intersecting_set result = {}; + address_range &invalidate_range = result.invalidate_range; + invalidate_range = fault_range; // Sections fully inside this range will be invalidated, others will be deemed false positives + + // Loop through cache and find pages that overlap the invalidate_range + u32 last_dirty_block = UINT32_MAX; + bool repeat_loop = false; + + // Not having full-range protections means some textures will check the confirmed range and not the locked range + const bool not_full_range_protected = (buffered_section::guard_policy != protection_policy::protect_policy_full_range); + section_bounds range_it_bounds = not_full_range_protected ? confirmed_range : locked_range; + + auto It = m_storage.range_begin(invalidate_range, range_it_bounds, true); // will iterate through locked sections only + while (It != m_storage.range_end()) + { + const u32 base = It.get_block().get_start(); // On the last loop, we stop once we're done with the last dirty block - if (!repeat_loop && base == last_dirty_block) + if (!repeat_loop && base > last_dirty_block) // note: blocks are iterated in order from lowest to highest base address break; + auto &tex = *It; + + AUDIT(tex.is_locked()); // we should be iterating locked sections only, but just to make sure... + AUDIT(tex.cache_tag != cache_tag || last_dirty_block != UINT32_MAX); // cache tag should not match during the first loop + + if (tex.cache_tag != cache_tag) //flushable sections can be 'clean' but unlocked. TODO: Handle this better + { + const rsx::section_bounds bounds = tex.get_overlap_test_bounds(); + + if (range_it_bounds == bounds || tex.overlaps(invalidate_range, bounds)) + { + const auto new_range = tex.get_min_max(invalidate_range, bounds).to_page_range(); + AUDIT(new_range.is_page_range() && invalidate_range.inside(new_range)); + + const s32 signed_distance = tex.signed_distance(fault_range, section_bounds::locked_range); + const s32 distance = signed_distance < 0 ? -signed_distance : signed_distance; + const bool is_after_fault = (signed_distance >= 0); + + // The various chaining policies behave differently + bool extend_invalidate_range = tex.overlaps(fault_range, bounds); + + // Extend the various ranges + if (extend_invalidate_range && new_range != invalidate_range) + { + if (new_range.end > invalidate_range.end) + It.set_end(new_range.end); + + invalidate_range = new_range; + repeat_loop = true; // we will need to repeat the loop again + last_dirty_block = base; // stop the repeat loop once we finish this block + } + + // Add texture to result, and update its cache tag + tex.cache_tag = cache_tag; + result.sections.push_back(&tex); + + if (tex.is_flushable()) + { + result.has_flushables = true; + } + } + } + // Iterate It++; // repeat_loop==true means some blocks are still dirty and we need to repeat the loop again - if (repeat_loop && It == m_cache.end()) + if (repeat_loop && It == m_storage.range_end()) { - It = m_cache.begin(); + It = m_storage.range_begin(invalidate_range, range_it_bounds, true); repeat_loop = false; } } + AUDIT( result.invalidate_range.is_page_range() ); + +#ifdef TEXTURE_CACHE_DEBUG + // naive check that sections are not duplicated in the results + for (auto §ion1 : result.sections) + { + size_t count = 0; + for (auto §ion2 : result.sections) + { + if (section1 == section2) count++; + } + verify(HERE), count == 1; + } + + // Check that the number of sections we "found" matches the sections known to be in the invalidation range + const u32 count = static_cast(result.sections.size()); + const auto inv_min_no_ro = tex_cache_checker.get_minimum_number_of_sections(invalidate_range); + const u16 inv_min = inv_min_no_ro.first + (is_writing ? inv_min_no_ro.second : 0); + ASSERT(count >= inv_min); +#endif //TEXTURE_CACHE_DEBUG + return result; } + //Invalidate range base implementation template - thrashed_set invalidate_range_impl_base(u32 address, u32 range, bool is_writing, bool discard_only, bool allow_flush, Args&&... extras) + thrashed_set invalidate_range_impl_base(const address_range &fault_range_in, invalidation_cause cause, Args&&... extras) { - if (!region_intersects_cache(address, range, is_writing)) - return {}; +#ifdef TEXTURE_CACHE_DEBUG + // Check that the cache has the correct protections + tex_cache_checker.verify(); +#endif // TEXTURE_CACHE_DEBUG - auto trampled_set = get_intersecting_set(address, range); + AUDIT( cause.valid() ); + AUDIT( fault_range_in.valid() ); + address_range fault_range = fault_range_in.to_page_range(); - if (trampled_set.size() > 0) + auto trampled_set = std::move(get_intersecting_set(fault_range, cause.is_write())); + + thrashed_set result = {}; + result.cause = cause; + result.fault_range = fault_range; + result.invalidate_range = trampled_set.invalidate_range; + + if (cause == invalidation_cause::unmap && !trampled_set.sections.empty()) { - const auto mem_base = (address & ~4095u); - const auto mem_end = (address + range + 4095u) & ~4095u; - const auto mem_range = std::make_pair(mem_base, mem_end - mem_base); - - update_cache_tag(); - - bool deferred_flush = false; - bool allow_rebuild = true; - - thrashed_set result = {}; - result.violation_handled = true; - - if (!discard_only) + // We discard all sections fully inside fault_range + for (auto &obj : trampled_set.sections) { - for (auto &obj : trampled_set) + auto &tex = *obj; + if (tex.inside(fault_range, section_bounds::locked_range)) { - if (obj.first->is_flushable()) - { - if (obj.first->overlaps(mem_range, rsx::overlap_test_bounds::full_range)) - { - // At least one section will introduce new data unconditionally - allow_rebuild = false; - break; - } - } - } - - deferred_flush = !allow_rebuild && !allow_flush; - } - - for (auto &obj : trampled_set) - { - if (!discard_only) - { - // NOTE: The memory test is page aligned to prevent continuously faulting in the page range - if (allow_rebuild && !obj.first->overlaps(mem_range, rsx::overlap_test_bounds::full_range)) - { - // False positive - result.sections_to_exclude.push_back(obj.first); - continue; - } - - if (obj.first->is_flushable()) - { - verify(HERE), !allow_rebuild; - - //Write if and only if no one else has trashed section memory already - //TODO: Proper section management should prevent this from happening - //TODO: Blit engine section merge support and/or partial texture memory buffering - if (!obj.first->test_memory_head() || !obj.first->test_memory_tail()) - { - if (obj.first->get_memory_read_flags() == rsx::memory_read_flags::flush_always) - { - //Someone decided to overwrite memory specified as an active framebuffer - m_flush_always_cache.erase(obj.first->get_section_base()); - } - - //Contents clobbered, destroy this - if (!obj.first->is_dirty()) - { - obj.first->set_dirty(true); - m_unreleased_texture_objects++; - } - - result.sections_to_unprotect.push_back(obj.first); - } - else - { - result.sections_to_flush.push_back(obj.first); - } - - continue; - } - else - { - //allow_flush = false and not synchronized - if (!obj.first->is_dirty()) - { - obj.first->set_dirty(true); - m_unreleased_texture_objects++; - } - - result.sections_to_unprotect.push_back(obj.first); - continue; - } - } - - verify(HERE), discard_only; - - m_unreleased_texture_objects++; - - obj.first->discard(); - obj.second->remove_one(); - } - - if (!result.sections_to_flush.empty()) - { - if (deferred_flush) - { - result.num_flushable = static_cast(result.sections_to_flush.size()); - result.address_base = address; - result.address_range = range; - result.cache_tag = m_cache_update_tag.load(std::memory_order_consume); - return result; + // Discard and destroy - this section won't be needed any more + tex.discard(); + tex.destroy(); } else { - verify(HERE), allow_flush; - flush_set(result, std::forward(extras)...); + if (tex.is_flushable()) + { + // TODO: Flush only the part outside the fault_range + LOG_TODO(RSX, "Flushable section 0x%x data may have been lost (invalidate_range_impl_base)", tex.get_section_base()); + } + + tex.set_dirty(true); } } - unprotect_set(result); +#ifdef TEXTURE_CACHE_DEBUG + // Notify the checker that fault_range got discarded + tex_cache_checker.discard(fault_range); +#endif + + // If invalidate_range is fault_range, we can stop now + const address_range invalidate_range = trampled_set.invalidate_range; + if (invalidate_range == fault_range) + { + result.violation_handled = true; +#ifdef TEXTURE_CACHE_DEBUG + // Post-check the result + result.check_post_sanity(); +#endif + return result; + } + AUDIT(fault_range.inside(invalidate_range)); + + // There are textures that need to be invalidated, we now trigger another intersecting_set search on it + // "invalidate_range" contains the range of sections that overlaps the unmap boundary + // We set "fault_range = invalidate_range" to cause a forced invalidation over that whole range, + // and proceed as normal. + // NOTE: result.fault_range *must* stay the original fault_range otherwise everything breaks + fault_range = invalidate_range; + trampled_set = std::move(get_intersecting_set(fault_range, true)); + result.invalidate_range = trampled_set.invalidate_range; + } + + + // Decide which sections to flush, unprotect, and exclude + if (!trampled_set.sections.empty()) + { + update_cache_tag(); + + for (auto &obj : trampled_set.sections) + { + auto &tex = *obj; + + const rsx::section_bounds bounds = tex.get_overlap_test_bounds(); + + if ( + // RO sections during a read invalidation can be ignored (unless there are flushables in trampled_set, since those could overwrite RO data) + // TODO: Also exclude RO sections even if there are flushables + (invalidation_keep_ro_during_read && !trampled_set.has_flushables && !cause.is_write() && tex.get_protection() == utils::protection::ro) || + // Sections that are not fully contained in invalidate_range can be ignored + !tex.inside(trampled_set.invalidate_range, bounds) || + // Unsynchronized sections that do not overlap the fault range directly can also be ignored + (invalidation_ignore_unsynchronized && tex.is_flushable() && !tex.is_synchronized() && !tex.overlaps(fault_range, bounds)) + ) + { + // False positive + result.sections_to_exclude.push_back(&tex); + continue; + } + + if (tex.is_flushable()) + { + //Write if and only if no one else has trashed section memory already + //TODO: Proper section management should prevent this from happening + //TODO: Blit engine section merge support and/or partial texture memory buffering + if (tex.is_dirty() || !tex.test_memory_head() || !tex.test_memory_tail()) + { + //Contents clobbered, destroy this + if (!tex.is_dirty()) + { + tex.set_dirty(true); + } + + result.sections_to_unprotect.push_back(&tex); + } + else + { + result.sections_to_flush.push_back(&tex); + } + + continue; + } + else + { + //allow_flush = false and not synchronized + if (!tex.is_dirty()) + { + AUDIT( tex.get_memory_read_flags() != memory_read_flags::flush_always ); + tex.set_dirty(true); + } + + result.sections_to_unprotect.push_back(&tex); + continue; + } + + fmt::throw_exception("Unreachable " HERE); + } + - //Everything has been handled - result = {}; result.violation_handled = true; +#ifdef TEXTURE_CACHE_DEBUG + // Check that result makes sense + result.check_pre_sanity(); +#endif // TEXTURE_CACHE_DEBUG + + const bool has_flushables = !result.sections_to_flush.empty(); + const bool has_unprotectables = !result.sections_to_unprotect.empty(); + + if (cause.is_deferred() && has_flushables) + { + // There is something to flush, but we've been asked to defer it + result.num_flushable = static_cast(result.sections_to_flush.size()); + result.cache_tag = m_cache_update_tag.load(std::memory_order_consume); + return result; + } + else if (has_flushables || has_unprotectables) + { + AUDIT(!has_flushables || cause.allow_flush()); + + // We have something to flush and are allowed to flush now + // or there is nothing to flush but we have something to unprotect + if (has_flushables) + { + flush_set(result, std::forward(extras)...); + } + + unprotect_set(result); + + //Everything has been handled + result.clear_sections(); + } + else + { + // This is a read and all overlapping sections were RO and were excluded + AUDIT(!cause.is_write() && !result.sections_to_exclude.empty()); + + // We did not handle this violation + result.clear_sections(); + result.violation_handled = false; + } + +#ifdef TEXTURE_CACHE_DEBUG + // Post-check the result + result.check_post_sanity(); +#endif // TEXTURE_CACHE_DEBUG + return result; } return {}; } + protected: inline bool is_hw_blit_engine_compatible(u32 format) const { switch (format) @@ -1049,30 +930,33 @@ namespace rsx public: - texture_cache() {} + texture_cache() : m_storage(this) {} ~texture_cache() {} virtual void destroy() = 0; virtual bool is_depth_texture(u32, u32) = 0; virtual void on_frame_end() = 0; - std::vector find_texture_from_range(u32 rsx_address, u32 range) + void clear() + { + m_storage.clear(); + } + + + std::vector find_texture_from_range(const address_range &test_range) { std::vector results; - auto test = std::make_pair(rsx_address, range); - for (auto &address_range : m_cache) + + for (auto It = m_storage.range_begin(test_range, full_range); It != m_storage.range_end(); It++) { - auto &range_data = address_range.second; - if (!range_data.overlaps(rsx_address, range)) continue; + auto &tex = *It; - for (auto &tex : range_data.data) - { - if (tex.get_section_base() > rsx_address) - continue; + // TODO ruipin: Removed as a workaround for a bug, will need to be fixed by kd-11 + //if (tex.get_section_base() > test_range.start) + // continue; - if (!tex.is_dirty() && tex.overlaps(test, rsx::overlap_test_bounds::full_range)) - results.push_back(&tex); - } + if (!tex.is_dirty()) + results.push_back(&tex); } return results; @@ -1080,227 +964,303 @@ namespace rsx section_storage_type *find_texture_from_dimensions(u32 rsx_address, u16 width = 0, u16 height = 0, u16 depth = 0, u16 mipmaps = 0) { - auto found = m_cache.find(get_block_address(rsx_address)); - if (found != m_cache.end()) + auto &block = m_storage.block_for(rsx_address); + for (auto &tex : block) { - auto &range_data = found->second; - for (auto &tex : range_data.data) + if (tex.matches(rsx_address, width, height, depth, mipmaps) && !tex.is_dirty()) { - if (tex.matches(rsx_address, width, height, depth, mipmaps) && !tex.is_dirty()) - { - return &tex; - } + return &tex; } } return nullptr; } - section_storage_type& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 depth = 0, u16 mipmaps = 0) + section_storage_type* find_cached_texture(const address_range &range, bool create_if_not_found, bool confirm_dimensions, u16 width = 0, u16 height = 0, u16 depth = 0, u16 mipmaps = 0) { - const u32 block_address = get_block_address(rsx_address); + auto &block = m_storage.block_for(range); - auto found = m_cache.find(block_address); - if (found != m_cache.end()) + section_storage_type *best_fit = nullptr; + section_storage_type *first_dirty = nullptr; + section_storage_type *mismatch = nullptr; +#ifdef TEXTURE_CACHE_DEBUG + section_storage_type *res = nullptr; +#endif + + // Try to find match in block + for (auto &tex : block) { - auto &range_data = found->second; - std::pair best_fit = {}; - - for (auto &tex : range_data.data) + if (tex.matches(range)) { - if (tex.matches(rsx_address, rsx_size)) + if (!tex.is_dirty()) { - if (!tex.is_dirty()) + if (!confirm_dimensions || tex.matches_dimensions(width, height, depth, mipmaps)) { - if (!confirm_dimensions || tex.matches(rsx_address, width, height, depth, mipmaps)) - { - if (!tex.is_locked()) - { - //Data is valid from cache pov but region has been unlocked and flushed - if (tex.get_context() == texture_upload_context::framebuffer_storage || - tex.get_context() == texture_upload_context::blit_engine_dst) - range_data.notify(); - } - - return tex; - } - else - { - LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address); - LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height()); - } +#ifndef TEXTURE_CACHE_DEBUG + return &tex; +#else + ASSERT(res == nullptr); + res = &tex; +#endif } - else if (!best_fit.first) + else { - //By grabbing a ref to a matching entry, duplicates are avoided - best_fit = { &tex, &range_data }; + mismatch = &tex; } } + else if (best_fit == nullptr && tex.can_be_reused()) + { + //By grabbing a ref to a matching entry, duplicates are avoided + best_fit = &tex; + } } - - if (best_fit.first) + else if (first_dirty == nullptr && tex.can_be_reused()) { - if (best_fit.first->exists()) - { - if (best_fit.first->get_context() != rsx::texture_upload_context::framebuffer_storage) - m_texture_memory_in_use -= best_fit.first->get_section_size(); - - m_unreleased_texture_objects--; - free_texture_section(*best_fit.first); - } - - best_fit.second->notify(rsx_address, rsx_size); - return *best_fit.first; - } - - for (auto &tex : range_data.data) - { - if (tex.is_dirty()) - { - if (tex.exists()) - { - if (tex.get_context() != rsx::texture_upload_context::framebuffer_storage) - m_texture_memory_in_use -= tex.get_section_size(); - - m_unreleased_texture_objects--; - free_texture_section(tex); - } - - range_data.notify(rsx_address, rsx_size); - return tex; - } + first_dirty = &tex; } } - section_storage_type tmp; +#ifdef TEXTURE_CACHE_DEBUG + if (res != nullptr) + return res; +#endif + + if (mismatch != nullptr) + { + auto &tex = *mismatch; + LOG_WARNING(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters (width=%d vs %d; height=%d vs %d; depth=%d vs %d; mipmaps=%d vs %d)", + range.start, width, tex.get_width(), height, tex.get_height(), depth, tex.get_depth(), mipmaps, tex.get_mipmaps()); + } + + if (!create_if_not_found) + return nullptr; + + // If found, use the best fitting section + if (best_fit) + { + best_fit->destroy(); + + return best_fit; + } + + // Return the first dirty section found, if any + if (first_dirty != nullptr) + { + first_dirty->destroy(); + + return first_dirty; + } + + // Create and return a new section update_cache_tag(); - m_cache[block_address].add(tmp, rsx_address, rsx_size); - return m_cache[block_address].data.back(); + auto tex = &block.create_section(); + return tex; } - section_storage_type* find_flushable_section(u32 address, u32 range) + section_storage_type* find_flushable_section(const address_range &memory_range) { - auto found = m_cache.find(get_block_address(address)); - if (found != m_cache.end()) + auto &block = m_storage.block_for(memory_range); + for (auto &tex : block) { - auto &range_data = found->second; - for (auto &tex : range_data.data) - { - if (tex.is_dirty()) continue; - if (!tex.is_flushable() && !tex.is_flushed()) continue; + if (tex.is_dirty()) continue; + if (!tex.is_flushable() && !tex.is_flushed()) continue; - if (tex.matches(address, range)) - return &tex; - } + if (tex.matches(memory_range)) + return &tex; } return nullptr; } template - void lock_memory_region(image_storage_type* image, u32 memory_address, u32 memory_size, u32 width, u32 height, u32 pitch, Args&&... extras) + void lock_memory_region(image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, Args&&... extras) { - std::lock_guard lock(m_cache_mutex); - section_storage_type& region = find_cached_texture(memory_address, memory_size, false); + AUDIT( g_cfg.video.write_color_buffers ); // this method is only called when WCB is enabled - if (region.get_context() != texture_upload_context::framebuffer_storage && - region.exists()) + std::lock_guard lock(m_cache_mutex); + + section_storage_type& region = *find_cached_texture(rsx_range, true, false); + + if (region.get_context() != texture_upload_context::framebuffer_storage && region.exists()) { //This space was being used for other purposes other than framebuffer storage //Delete used resources before attaching it to framebuffer memory read_only_tex_invalidate = true; - free_texture_section(region); - m_texture_memory_in_use -= region.get_section_size(); + + // We are going to reprotect this section in a second, so discard it here + if (region.is_locked()) + region.discard(); + + // Destroy the resources + region.destroy(); } if (!region.is_locked()) { - region.reset(memory_address, memory_size); - region.set_dirty(false); - no_access_range = region.get_min_max(no_access_range); + // New region, we must prepare it + region.reset(rsx_range); + no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range); + region.set_context(texture_upload_context::framebuffer_storage); + region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d); + } + else + { + // Re-using locked fbo region + AUDIT(region.matches(rsx_range)); + ASSERT(region.get_context() == texture_upload_context::framebuffer_storage); + ASSERT(region.get_image_type() == rsx::texture_dimension_extended::texture_dimension_2d); } region.create(width, height, 1, 1, image, pitch, false, std::forward(extras)...); - region.set_context(texture_upload_context::framebuffer_storage); - region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d); - region.set_memory_read_flags(memory_read_flags::flush_always); + region.set_dirty(false); region.touch(m_cache_update_tag); - m_flush_always_cache[memory_address] = memory_size; + // Add to flush always cache + if (region.get_memory_read_flags() != memory_read_flags::flush_always) + { + region.set_memory_read_flags(memory_read_flags::flush_always, false); + update_flush_always_cache(region, true); + } + else + { + AUDIT(m_flush_always_cache.find(region.get_section_range()) != m_flush_always_cache.end()); + } // Test for invalidated sections from surface cache occupying same address range - const auto mem_base = (memory_address & ~4095u); - const auto mem_end = (memory_address + memory_size + 4095u) & ~4095u; - const auto &overlapped = find_texture_from_range(mem_base, mem_end - mem_base); - if (overlapped.size() > 1) - { - const auto mem_range = std::make_pair(memory_address, memory_size); - for (auto surface : overlapped) - { - if (surface == ®ion) - continue; + const auto mem_range = rsx_range.to_page_range(); + const auto &overlapped = find_texture_from_range(mem_range); - if (surface->get_context() != rsx::texture_upload_context::framebuffer_storage) + bool found_region = false; + for (auto* surface : overlapped) + { + if (surface == ®ion) + { + found_region = true; + continue; + } + + if (surface->get_context() == rsx::texture_upload_context::framebuffer_storage) + { + if (surface->get_section_base() != rsx_range.start) + // HACK: preserve other overlapped sections despite overlap unless new section is superseding + // TODO: write memory to cell or redesign sections to preserve the data + // TODO ruipin: can this be done now? + continue; + } + + // Memory is shared with another surface + // Discard it - the backend should ensure memory contents are preserved if needed + // TODO ruipin: This fails the protection checker. Refactor to use invalidate_range_impl_base + surface->set_dirty(true); + + if (surface->is_locked()) + { + AUDIT(surface->get_memory_read_flags() != memory_read_flags::flush_always); + + if (surface->is_flushable() && surface->test_memory_head() && surface->test_memory_tail()) { - m_unreleased_texture_objects++; + if (!surface->inside(rsx_range, rsx::section_bounds::full_range)) + { + // TODO: This section contains data that should be flushed + LOG_TODO(RSX, "Flushable section 0x%x data may have been lost (lock_memory_region)", surface->get_section_base()); + } + } + + if (surface->inside(region, rsx::section_bounds::locked_range)) + { + // Since this surface falls inside "region", we don't need to unlock and can simply discard + surface->discard(); } else { - if (surface->get_section_base() != memory_address) - // HACK: preserve other overlapped sections despite overlap unless new section is superseding - // TODO: write memory to cell or redesign sections to preserve the data - continue; - } - - // Memory is shared with another surface - // Discard it - the backend should ensure memory contents are preserved if needed - surface->set_dirty(true); - - if (surface->is_locked()) - { - if (surface->is_flushable() && surface->test_memory_head() && surface->test_memory_tail()) - { - if (!surface->overlaps(mem_range, rsx::overlap_test_bounds::full_range)) - { - // TODO: This section contains data that should be flushed - LOG_TODO(RSX, "Flushable section data may have been lost (0x%x)", surface->get_section_base()); - } - } - + // TODO: Exclude other NA/RO regions overlapping this one but not "region". + // Exclude region to avoid having the region's locked_range unprotected for a split second + const auto &srfc_rng = surface->get_section_range(); + LOG_TODO(RSX, "Valid region data may have been incorrectly unprotected (0x%x-0x%x)", srfc_rng.start, srfc_rng.end); surface->unprotect(); - m_cache[get_block_address(surface->get_section_base())].remove_one(); } } } + ASSERT(found_region); // Delay protection until here in case the invalidation block above has unprotected pages in this range - region.reprotect(utils::protection::no, { 0, memory_size }); + region.reprotect(utils::protection::no, { 0, rsx_range.length() }); update_cache_tag(); + +#ifdef TEXTURE_CACHE_DEBUG + // Check that the cache makes sense + tex_cache_checker.verify(); +#endif // TEXTURE_CACHE_DEBUG } - void set_memory_read_flags(u32 memory_address, u32 memory_size, memory_read_flags flags) + void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags) { std::lock_guard lock(m_cache_mutex); - if (flags != memory_read_flags::flush_always) - m_flush_always_cache.erase(memory_address); - - section_storage_type& region = find_cached_texture(memory_address, memory_size, false); - - if (!region.exists() || region.get_context() != texture_upload_context::framebuffer_storage) + auto* region_ptr = find_cached_texture(memory_range, false, false); + if (region_ptr == nullptr) + { + AUDIT( m_flush_always_cache.find(memory_range) == m_flush_always_cache.end() ); + LOG_ERROR(RSX, "set_memory_flags(0x%x, 0x%x, %d): region_ptr == nullptr"); return; + } - if (flags == memory_read_flags::flush_always) - m_flush_always_cache[memory_address] = memory_size; + auto& region = *region_ptr; - region.set_memory_read_flags(flags); + if (region.is_dirty() || !region.exists() || region.get_context() != texture_upload_context::framebuffer_storage) + { +#ifdef TEXTURE_CACHE_DEBUG + if (!region.is_dirty()) + { + if (flags == memory_read_flags::flush_once) + verify(HERE), m_flush_always_cache.find(memory_range) == m_flush_always_cache.end(); + else + verify(HERE), m_flush_always_cache[memory_range] == ®ion; + } +#endif // TEXTURE_CACHE_DEBUG + return; + } + + update_flush_always_cache(region, flags == memory_read_flags::flush_always); + region.set_memory_read_flags(flags, false); } + virtual void on_memory_read_flags_changed(section_storage_type §ion, rsx::memory_read_flags flags) + { +#ifdef TEXTURE_CACHE_DEBUG + const auto &memory_range = section.get_section_range(); + if (flags == memory_read_flags::flush_once) + verify(HERE), m_flush_always_cache[memory_range] == §ion; + else + verify(HERE), m_flush_always_cache.find(memory_range) == m_flush_always_cache.end(); +#endif + update_flush_always_cache(section, flags == memory_read_flags::flush_always); + } + + private: + inline void update_flush_always_cache(section_storage_type §ion, bool add) + { + const address_range& range = section.get_section_range(); + if (add) + { + // Add to m_flush_always_cache + AUDIT(m_flush_always_cache.find(range) == m_flush_always_cache.end()); + m_flush_always_cache[range] = §ion; + } + else + { + // Remove from m_flush_always_cache + AUDIT(m_flush_always_cache[range] == §ion); + m_flush_always_cache.erase(range); + } + } + + public: template - bool flush_memory_to_cache(u32 memory_address, u32 memory_size, bool skip_synchronized, u32 allowed_types_mask, Args&&... extra) + bool flush_memory_to_cache(const address_range &memory_range, bool skip_synchronized, u32 allowed_types_mask, Args&&... extra) { std::lock_guard lock(m_cache_mutex); - section_storage_type* region = find_flushable_section(memory_address, memory_size); + section_storage_type* region = find_flushable_section(memory_range); //Check if section was released, usually if cell overwrites a currently bound render target if (region == nullptr) @@ -1321,10 +1281,10 @@ namespace rsx } template - bool load_memory_from_cache(u32 memory_address, u32 memory_size, Args&&... extras) + bool load_memory_from_cache(const address_range &memory_range, Args&&... extras) { reader_lock lock(m_cache_mutex); - section_storage_type *region = find_flushable_section(memory_address, memory_size); + section_storage_type *region = find_flushable_section(memory_range); if (region && !region->is_dirty()) { @@ -1336,76 +1296,27 @@ namespace rsx return false; } - std::tuple address_is_flushable(u32 address) - { - if (address < no_access_range.first || - address > no_access_range.second) - return std::make_tuple(false, nullptr); - - reader_lock lock(m_cache_mutex); - - auto found = m_cache.find(get_block_address(address)); - if (found != m_cache.end()) - { - auto &range_data = found->second; - for (auto &tex : range_data.data) - { - if (tex.is_dirty()) continue; - if (!tex.is_flushable()) continue; - - if (tex.overlaps(address, rsx::overlap_test_bounds::protected_range)) - return std::make_tuple(true, &tex); - } - } - - for (auto &address_range : m_cache) - { - if (address_range.first == address) - continue; - - auto &range_data = address_range.second; - - //Quickly discard range - const u32 lock_base = address_range.first & ~0xfff; - const u32 lock_limit = align(range_data.max_range + address_range.first, 4096); - - if (address < lock_base || address >= lock_limit) - continue; - - for (auto &tex : range_data.data) - { - if (tex.is_dirty()) continue; - if (!tex.is_flushable()) continue; - - if (tex.overlaps(address, rsx::overlap_test_bounds::protected_range)) - return std::make_tuple(true, &tex); - } - } - - return std::make_tuple(false, nullptr); - } - template - thrashed_set invalidate_address(u32 address, bool is_writing, bool allow_flush, Args&&... extras) + thrashed_set invalidate_address(u32 address, invalidation_cause cause, Args&&... extras) { //Test before trying to acquire the lock - const auto range = 4096 - (address & 4095); - if (!region_intersects_cache(address, range, is_writing)) + const auto range = page_for(address); + if (!region_intersects_cache(range, cause.is_write())) return{}; std::lock_guard lock(m_cache_mutex); - return invalidate_range_impl_base(address, range, is_writing, false, allow_flush, std::forward(extras)...); + return invalidate_range_impl_base(range, cause, std::forward(extras)...); } template - thrashed_set invalidate_range(u32 address, u32 range, bool is_writing, bool discard, bool allow_flush, Args&&... extras) + thrashed_set invalidate_range(const address_range &range, invalidation_cause cause, Args&&... extras) { //Test before trying to acquire the lock - if (!region_intersects_cache(address, range, is_writing)) + if (!region_intersects_cache(range, cause.is_write())) return {}; std::lock_guard lock(m_cache_mutex); - return invalidate_range_impl_base(address, range, is_writing, discard, allow_flush, std::forward(extras)...); + return invalidate_range_impl_base(range, cause, std::forward(extras)...); } template @@ -1413,6 +1324,9 @@ namespace rsx { std::lock_guard lock(m_cache_mutex); + AUDIT(data.cause.is_deferred()); + AUDIT(!data.flushed); + if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag) { //1. Write memory to cpu side @@ -1424,7 +1338,7 @@ namespace rsx else { // The cache contents have changed between the two readings. This means the data held is useless - invalidate_range_impl_base(data.address_base, data.address_range, true, false, true, std::forward(extras)...); + invalidate_range_impl_base(data.fault_range, data.cause.undefer(), std::forward(extras)...); } return true; @@ -1434,21 +1348,20 @@ namespace rsx { m_num_cache_misses++; - const u32 memory_address = tex.get_section_base(); - const u32 memory_size = tex.get_section_size(); + const auto& memory_range = tex.get_section_range(); const auto fmt = tex.get_format(); - auto It = m_cache_miss_statistics_table.find(memory_address); + auto It = m_cache_miss_statistics_table.find(memory_range); if (It == m_cache_miss_statistics_table.end()) { - m_cache_miss_statistics_table[memory_address] = { 1, memory_size, fmt }; + m_cache_miss_statistics_table[memory_range] = { 1, fmt }; return; } auto &value = It->second; - if (value.format != fmt || value.block_size != memory_size) + if (value.format != fmt) { - m_cache_miss_statistics_table[memory_address] = { 1, memory_size, fmt }; + value = { 1, fmt }; return; } @@ -1456,22 +1369,22 @@ namespace rsx } template - bool flush_if_cache_miss_likely(texture_format fmt, u32 memory_address, u32 memory_size, Args&&... extras) + bool flush_if_cache_miss_likely(texture_format fmt, const address_range &memory_range, Args&&... extras) { - auto It = m_cache_miss_statistics_table.find(memory_address); + auto It = m_cache_miss_statistics_table.find(memory_range); if (It == m_cache_miss_statistics_table.end()) { - m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; + m_cache_miss_statistics_table[memory_range] = { 0, fmt }; return false; } auto &value = It->second; - if (value.format != fmt || value.block_size < memory_size) + if (value.format != fmt) { //Reset since the data has changed //TODO: Keep track of all this information together - m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; + value = { 0, fmt }; } // By default, blit targets are always to be tested for readback @@ -1481,7 +1394,7 @@ namespace rsx if (value.misses >= m_cache_miss_threshold) { // Disable prediction if memory is flagged as flush_always - if (m_flush_always_cache.find(memory_address) == m_flush_always_cache.end()) + if (m_flush_always_cache.find(memory_range) == m_flush_always_cache.end()) { // TODO: Determine better way of setting threshold // Allow all types @@ -1489,7 +1402,7 @@ namespace rsx } } - if (!flush_memory_to_cache(memory_address, memory_size, true, flush_mask, std::forward(extras)...) && + if (!flush_memory_to_cache(memory_range, true, flush_mask, std::forward(extras)...) && value.misses > 0) { value.misses--; @@ -1498,42 +1411,11 @@ namespace rsx return true; } - void purge_dirty() + void purge_unreleased_sections() { std::lock_guard lock(m_cache_mutex); - //Reclaims all graphics memory consumed by dirty textures - std::vector empty_addresses; - empty_addresses.resize(32); - - for (auto &address_range : m_cache) - { - auto &range_data = address_range.second; - - //if (range_data.valid_count == 0) - //empty_addresses.push_back(address_range.first); - - for (auto &tex : range_data.data) - { - if (!tex.is_dirty()) - continue; - - if (tex.exists() && - tex.get_context() != rsx::texture_upload_context::framebuffer_storage) - { - free_texture_section(tex); - m_texture_memory_in_use -= tex.get_section_size(); - } - } - } - - //Free descriptor objects as well - for (const auto &address : empty_addresses) - { - m_cache.erase(address); - } - - m_unreleased_texture_objects = 0; + m_storage.purge_unreleased_sections(); } image_view_type create_temporary_subresource(commandbuffer_type &cmd, deferred_subresource& desc) @@ -1740,6 +1622,8 @@ namespace rsx auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp); bool requires_merging = false; + // TODO ruipin: This AUDIT fails due to a bug that kd will have to fix + //AUDIT( !overlapping.empty() ); if (overlapping.size() > 1) { // The returned values are sorted with oldest first and newest last @@ -1865,10 +1749,11 @@ namespace rsx { const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); const u32 tex_size = (u32)get_placed_texture_storage_size(tex, 256, 512); + const address_range tex_range = address_range::start_length(texaddr, tex_size); const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); const bool is_compressed_format = (format == CELL_GCM_TEXTURE_COMPRESSED_DXT1 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT23 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT45); - if (!texaddr || !tex_size) + if (!texaddr || !tex_size || !tex_range.valid()) { LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X, w=%d, h=%d, p=%d, format=0x%X)", texaddr, tex_size, tex.width(), tex.height(), tex.pitch(), tex.format()); return {}; @@ -1913,7 +1798,7 @@ namespace rsx else { m_rtts.invalidate_surface_address(texaddr, false); - invalidate_address(texaddr, false, true, std::forward(extras)...); + invalidate_address(texaddr, invalidation_cause::read, std::forward(extras)...); } } @@ -1928,7 +1813,7 @@ namespace rsx else { m_rtts.invalidate_surface_address(texaddr, true); - invalidate_address(texaddr, false, true, std::forward(extras)...); + invalidate_address(texaddr, invalidation_cause::read, std::forward(extras)...); } } } @@ -1950,7 +1835,7 @@ namespace rsx if (!test_framebuffer(rsc.base_address)) { m_rtts.invalidate_surface_address(rsc.base_address, rsc.is_depth_surface); - invalidate_address(rsc.base_address, false, true, std::forward(extras)...); + invalidate_address(rsc.base_address, invalidation_cause::read, std::forward(extras)...); } else if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d && extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d) @@ -1993,8 +1878,8 @@ namespace rsx { if (!cached_texture->is_locked()) { + lock.upgrade(); cached_texture->set_dirty(true); - m_unreleased_texture_objects++; } } else @@ -2009,13 +1894,13 @@ namespace rsx if (is_hw_blit_engine_compatible(format)) { //Find based on range instead - auto overlapping_surfaces = find_texture_from_range(texaddr, tex_size); + auto overlapping_surfaces = find_texture_from_range(tex_range); if (!overlapping_surfaces.empty()) { for (const auto &surface : overlapping_surfaces) { if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst || - !surface->overlaps(std::make_pair(texaddr, tex_size), rsx::overlap_test_bounds::confirmed_range)) + !surface->overlaps(tex_range, rsx::section_bounds::confirmed_range)) continue; if (surface->get_width() >= tex_width && surface->get_height() >= tex_height) @@ -2047,32 +1932,33 @@ namespace rsx } } } + + //Do direct upload from CPU as the last resort + const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); + auto subresources_layout = get_subresources_layout(tex); + + bool is_depth_format = false; + switch (format) + { + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + is_depth_format = true; + break; + } + + // Upgrade lock + lock.upgrade(); + + //Invalidate + invalidate_range_impl_base(tex_range, invalidation_cause::read, std::forward(extras)...); + + //NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB + return{ upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, + texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled)->get_view(tex.remap(), tex.decoded_remap()), + texture_upload_context::shader_read, is_depth_format, scale_x, scale_y, extended_dimension }; } - - //Do direct upload from CPU as the last resort - std::lock_guard lock(m_cache_mutex); - const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); - auto subresources_layout = get_subresources_layout(tex); - - bool is_depth_format = false; - switch (format) - { - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - is_depth_format = true; - break; - } - - //Invalidate with writing=false, discard=false, rebuild=false, native_flush=true - invalidate_range_impl_base(texaddr, tex_size, false, false, true, std::forward(extras)...); - - //NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB - m_texture_memory_in_use += (tex_pitch * tex_height); - return{ upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, - texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled)->get_view(tex.remap(), tex.decoded_remap()), - texture_upload_context::shader_read, is_depth_format, scale_x, scale_y, extended_dimension }; } template @@ -2136,14 +2022,14 @@ namespace rsx if (src_is_render_target && !test_framebuffer(src_subres.base_address)) { m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface); - invalidate_address(src_subres.base_address, false, true, std::forward(extras)...); + invalidate_address(src_subres.base_address, invalidation_cause::read, std::forward(extras)...); src_is_render_target = false; } if (dst_is_render_target && !test_framebuffer(dst_subres.base_address)) { m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface); - invalidate_address(dst_subres.base_address, false, true, std::forward(extras)...); + invalidate_address(dst_subres.base_address, invalidation_cause::read, std::forward(extras)...); dst_is_render_target = false; } @@ -2209,8 +2095,8 @@ namespace rsx const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height; lock.upgrade(); - invalidate_range_impl_base(src_address, memcpy_bytes_length, false, false, true, std::forward(extras)...); - invalidate_range_impl_base(dst_address, memcpy_bytes_length, true, false, true, std::forward(extras)...); + invalidate_range_impl_base(address_range::start_length(src_address, memcpy_bytes_length), invalidation_cause::read, std::forward(extras)...); + invalidate_range_impl_base(address_range::start_length(dst_address, memcpy_bytes_length), invalidation_cause::write, std::forward(extras)...); memcpy(dst.pixels, src.pixels, memcpy_bytes_length); return true; } @@ -2239,14 +2125,14 @@ namespace rsx if (!dst_is_render_target) { // Check for any available region that will fit this one - auto overlapping_surfaces = find_texture_from_range(dst_address, dst.pitch * dst.clip_height); + auto overlapping_surfaces = find_texture_from_range(address_range::start_length(dst_address, dst.pitch * dst.clip_height)); for (const auto &surface : overlapping_surfaces) { if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst) continue; - if (surface->rsx_pitch != dst.pitch) + if (surface->get_rsx_pitch() != dst.pitch) continue; const auto old_dst_area = dst_area; @@ -2302,7 +2188,7 @@ namespace rsx //Create source texture if does not exist if (!src_is_render_target) { - auto overlapping_surfaces = find_texture_from_range(src_address, src.pitch * src.height); + auto overlapping_surfaces = find_texture_from_range(address_range::start_length(src_address, src.pitch * src.height)); auto old_src_area = src_area; for (const auto &surface : overlapping_surfaces) @@ -2312,7 +2198,7 @@ namespace rsx surface->get_context() == rsx::texture_upload_context::framebuffer_storage) continue; - if (surface->rsx_pitch != src.pitch) + if (surface->get_rsx_pitch() != src.pitch) continue; if (const u32 address_offset = src_address - surface->get_section_base()) @@ -2343,7 +2229,7 @@ namespace rsx { lock.upgrade(); - invalidate_range_impl_base(src_address, src.pitch * src.slice_h, false, false, true, std::forward(extras)...); + invalidate_range_impl_base(address_range::start_length(src_address, src.pitch * src.slice_h), invalidation_cause::read, std::forward(extras)...); const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1; std::vector subresource_layout; @@ -2359,7 +2245,6 @@ namespace rsx vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src, subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled)->get_raw_texture(); - m_texture_memory_in_use += src.pitch * src.slice_h; typeless_info.src_context = texture_upload_context::blit_engine_src; } } @@ -2425,10 +2310,15 @@ namespace rsx } //TODO: Check for other types of format mismatch + const address_range dst_range = address_range::start_length(dst_address, dst.pitch * dst.height); + AUDIT( cached_dest == nullptr || cached_dest->overlaps(dst_range, section_bounds::full_range) ); if (format_mismatch) { lock.upgrade(); - invalidate_range_impl_base(cached_dest->get_section_base(), cached_dest->get_section_size(), true, false, true, std::forward(extras)...); + + // Invalidate as the memory is not reusable now + invalidate_range_impl_base(cached_dest->get_section_range(), invalidation_cause::write, std::forward(extras)...); + AUDIT(!cached_dest->is_locked()); dest_texture = 0; cached_dest = nullptr; @@ -2479,7 +2369,7 @@ namespace rsx dst_dimensions.height = section_length / dst.pitch; lock.upgrade(); - invalidate_range_impl_base(dst.rsx_address, section_length, true, false, true, std::forward(extras)...); + invalidate_range_impl_base(address_range::start_length(dst.rsx_address, section_length), invalidation_cause::write, std::forward(extras)...); const u16 pitch_in_block = dst_is_argb8 ? dst.pitch >> 2 : dst.pitch >> 1; std::vector subresource_layout; @@ -2504,19 +2394,13 @@ namespace rsx dest_texture = cached_dest->get_raw_texture(); typeless_info.dst_context = texture_upload_context::blit_engine_dst; - m_texture_memory_in_use += dst.pitch * dst_dimensions.height; } if (cached_dest) { lock.upgrade(); - if (!cached_dest->is_locked()) - { - // Notify - m_cache[get_block_address(cached_dest->get_section_base())].notify(); - } - else if (cached_dest->is_synchronized()) + if (cached_dest->is_locked() && cached_dest->is_synchronized()) { // Premature readback m_num_cache_mispredictions++; @@ -2612,26 +2496,26 @@ namespace rsx for (const auto &It : m_flush_always_cache) { - auto& section = find_cached_texture(It.first, It.second); + auto& section = *(It.second); if (section.get_protection() != utils::protection::no) { - if (section.exists()) - { - //NOTE: find_cached_texture will increment block ctr - section.reprotect(utils::protection::no); - tag_framebuffer(It.first); - update_tag = true; - } - else - { - //This should never happen - LOG_ERROR(RSX, "Reprotection attempted on destroyed framebuffer section @ 0x%x+0x%x", It.first, It.second); - } + verify(HERE), section.exists(); + AUDIT(section.get_context() == texture_upload_context::framebuffer_storage); + AUDIT(section.get_memory_read_flags() == memory_read_flags::flush_always); + + section.reprotect(utils::protection::no); + tag_framebuffer(section.get_section_base()); + update_tag = true; } } if (update_tag) update_cache_tag(); m_flush_always_update_timestamp = m_cache_update_tag.load(std::memory_order_consume); + +#ifdef TEXTURE_CACHE_DEBUG + // Check that the cache has the correct protections + m_storage.verify_protection(); +#endif // TEXTURE_CACHE_DEBUG } } } @@ -2646,12 +2530,12 @@ namespace rsx virtual const u32 get_unreleased_textures_count() const { - return m_unreleased_texture_objects; + return m_storage.m_unreleased_texture_objects; } virtual const u32 get_texture_memory_in_use() const { - return m_texture_memory_in_use; + return m_storage.m_texture_memory_in_use; } virtual u32 get_num_flush_requests() const @@ -2691,13 +2575,16 @@ namespace rsx void tag_framebuffer(u32 texaddr) { - auto ptr = rsx::get_super_ptr(texaddr, 4).get(); + auto super_ptr = rsx::get_super_ptr(texaddr, 4); + volatile u32 *ptr = super_ptr.get(); *ptr = texaddr; + super_ptr.flush(0, 4); } bool test_framebuffer(u32 texaddr) { - auto ptr = rsx::get_super_ptr(texaddr, 4).get(); + auto super_ptr = rsx::get_super_ptr(texaddr, 4); + volatile const u32 *ptr = super_ptr.get(); return *ptr == texaddr; } }; diff --git a/rpcs3/Emu/RSX/Common/texture_cache_checker.h b/rpcs3/Emu/RSX/Common/texture_cache_checker.h new file mode 100644 index 0000000000..1030591c12 --- /dev/null +++ b/rpcs3/Emu/RSX/Common/texture_cache_checker.h @@ -0,0 +1,221 @@ +#pragma once + +#include "../rsx_utils.h" + +#ifdef TEXTURE_CACHE_DEBUG +namespace rsx { + + class tex_cache_checker_t { + struct per_page_info_t { + u8 prot = 0; + u8 no = 0; + u8 ro = 0; + + FORCE_INLINE utils::protection get_protection() const + { + return static_cast(prot); + } + + FORCE_INLINE void set_protection(utils::protection prot) + { + this->prot = static_cast(prot); + } + + FORCE_INLINE void reset_refcount() + { + no = 0; + ro = 0; + } + + FORCE_INLINE u16 sum() const + { + return u16{ no } + ro; + } + + FORCE_INLINE bool verify() const + { + const utils::protection prot = get_protection(); + switch (prot) + { + case utils::protection::no: return no > 0; + case utils::protection::ro: return no == 0 && ro > 0; + case utils::protection::rw: return no == 0 && ro == 0; + default: ASSUME(0); + } + } + + FORCE_INLINE void add(utils::protection prot) + { + switch (prot) + { + case utils::protection::no: if (no++ == UINT8_MAX) fmt::throw_exception("add(protection::no) overflow with NO==%d", UINT8_MAX); return; + case utils::protection::ro: if (ro++ == UINT8_MAX) fmt::throw_exception("add(protection::ro) overflow with RO==%d", UINT8_MAX); return; + default: ASSUME(0); + } + } + + FORCE_INLINE void remove(utils::protection prot) + { + switch (prot) + { + case utils::protection::no: if (no-- == 0) fmt::throw_exception("remove(protection::no) overflow with NO==0"); return; + case utils::protection::ro: if (ro-- == 0) fmt::throw_exception("remove(protection::ro) overflow with RO==0"); return; + default: ASSUME(0); + } + } + }; + static_assert(sizeof(per_page_info_t) <= 4, "page_info_elmnt must be less than 4-bytes in size"); + + + // 4GB memory space / 4096 bytes per page = 1048576 pages + static constexpr size_t num_pages = 0x1'0000'0000 / 4096; + per_page_info_t _info[num_pages]; + + static constexpr size_t rsx_address_to_index(u32 address) + { + return (address / 4096); + } + + static constexpr u32 index_to_rsx_address(size_t idx) + { + return static_cast(idx * 4096); + } + + constexpr per_page_info_t* rsx_address_to_info_pointer(u32 address) + { + return &(_info[rsx_address_to_index(address)]); + } + + constexpr const per_page_info_t* rsx_address_to_info_pointer(u32 address) const + { + return &(_info[rsx_address_to_index(address)]); + } + + constexpr u32 info_pointer_to_address(const per_page_info_t* ptr) const + { + return index_to_rsx_address(static_cast(ptr - _info)); + } + + std::string prot_to_str(utils::protection prot) const + { + switch (prot) + { + case utils::protection::no: return "NA"; + case utils::protection::ro: return "RO"; + case utils::protection::rw: return "RW"; + default: fmt::throw_exception("Unreachable " HERE); + } + } + + public: + tex_cache_checker_t() + { + // Initialize array to all 0 + memset(&_info, 0, sizeof(_info)); + } + static_assert(static_cast(utils::protection::rw) == 0, "utils::protection::rw must have value 0 for the above constructor to work"); + + void set_protection(const address_range& range, utils::protection prot) + { + AUDIT(range.is_page_range()); + AUDIT(prot == utils::protection::no || prot == utils::protection::ro || prot == utils::protection::rw); + + for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++) + { + ptr->set_protection(prot); + } + } + + void discard(const address_range& range) + { + set_protection(range, utils::protection::rw); + } + + void reset_refcount() + { + for (per_page_info_t* ptr = rsx_address_to_info_pointer(0); ptr <= rsx_address_to_info_pointer(0xFF'FF'FF'FF); ptr++) + { + ptr->reset_refcount(); + } + } + + void add(const address_range& range, utils::protection prot) + { + AUDIT(range.is_page_range()); + AUDIT(prot == utils::protection::no || prot == utils::protection::ro); + + for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++) + { + ptr->add(prot); + } + } + + void remove(const address_range& range, utils::protection prot) + { + AUDIT(range.is_page_range()); + AUDIT(prot == utils::protection::no || prot == utils::protection::ro); + + for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++) + { + ptr->remove(prot); + } + } + + // Returns the a lower bound as to how many locked sections are known to be within the given range with each protection {NA,RO} + // The assumption here is that the page in the given range with the largest number of refcounted sections represents the lower bound to how many there must be + std::pair get_minimum_number_of_sections(const address_range& range) const + { + AUDIT(range.is_page_range()); + + u8 no = 0; + u8 ro = 0; + for (const per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++) + { + no = std::max(no, ptr->no); + ro = std::max(ro, ptr->ro); + } + + return { no,ro }; + } + + void check_unprotected(const address_range& range, bool allow_ro = false, bool must_be_empty = true) const + { + AUDIT(range.is_page_range()); + for (const per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++) + { + const auto prot = ptr->get_protection(); + if (prot != utils::protection::rw && (!allow_ro || prot != utils::protection::ro)) + { + const u32 addr = info_pointer_to_address(ptr); + fmt::throw_exception("Page at addr=0x%8x should be RW%s: Prot=%s, RO=%d, NA=%d", addr, allow_ro ? " or RO" : "", prot_to_str(prot), ptr->ro, ptr->no); + } + + if (must_be_empty && ( + ptr->no > 0 || + (!allow_ro && ptr->ro > 0) + )) + { + const u32 addr = info_pointer_to_address(ptr); + fmt::throw_exception("Page at addr=0x%8x should not have any NA%s sections: Prot=%s, RO=%d, NA=%d", addr, allow_ro ? " or RO" : "", prot_to_str(prot), ptr->ro, ptr->no); + } + } + } + + void verify() const + { + for (size_t idx = 0; idx < num_pages; idx++) + { + auto &elmnt = _info[idx]; + if (!elmnt.verify()) + { + const u32 addr = index_to_rsx_address(idx); + const utils::protection prot = elmnt.get_protection(); + fmt::throw_exception("Protection verification failed at addr=0x%x: Prot=%s, RO=%d, NA=%d", addr, prot_to_str(prot), elmnt.ro, elmnt.no); + } + } + } + }; + + extern tex_cache_checker_t tex_cache_checker; +}; // namespace rsx +#endif //TEXTURE_CACHE_DEBUG \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h new file mode 100644 index 0000000000..da26b6b082 --- /dev/null +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -0,0 +1,1544 @@ +#pragma once + +#include "../rsx_cache.h" +#include "../rsx_utils.h" +#include "TextureUtils.h" + +#include +#include + + +namespace rsx +{ + /** + * Helper enums/structs + */ + enum invalidation_chain_policy + { + invalidation_chain_none, // No chaining: Only sections that overlap the faulting page get invalidated. + invalidation_chain_full, // Full chaining: Sections overlapping the faulting page get invalidated, as well as any sections overlapping invalidated sections. + invalidation_chain_nearby // Invalidations chain if they are near to the fault ( + class ranged_storage_block_list + { + static_assert(array_size > 0, "array_elements must be positive non-zero"); + + public: + using value_type = section_storage_type; + using array_type = typename std::array; + using list_type = typename std::list; + using size_type = u32; + + // Iterator + template + class iterator_tmpl + { + public: + // Traits + using value_type = T; + using pointer = T * ; + using difference_type = int; + using reference = T & ; + using iterator_category = std::forward_iterator_tag; + + // Constructors + iterator_tmpl() = default; + iterator_tmpl(block_list *_block) : + block(_block), + list_it(_block->m_data.begin()), + idx(0) + { + if (_block->empty()) + idx = UINT32_MAX; + } + + private: + // Members + block_list *block; + list_iterator list_it = {}; + size_type idx = UINT32_MAX; + size_type array_idx = 0; + + inline void next() + { + idx++; + if (idx >= block->size()) + { + idx = UINT32_MAX; + return; + } + + array_idx++; + if (array_idx >= array_size) + { + array_idx = 0; + list_it++; + } + } + + public: + inline reference operator*() const { return (*list_it)[array_idx]; } + inline pointer operator->() const { return &((*list_it)[array_idx]); } + inline reference operator++() { next(); return **this; } + inline reference operator++(int) { auto &res = **this; next(); return res; } + inline bool operator==(const iterator_tmpl &rhs) const { return idx == rhs.idx; } + inline bool operator!=(const iterator_tmpl &rhs) const { return idx != rhs.idx; } + }; + + using iterator = iterator_tmpl; + using const_iterator = iterator_tmpl; + + // Members + size_type m_size = 0; + list_type m_data; + typename list_type::iterator m_data_it; + size_type m_array_idx; + size_type m_capacity; + + // Helpers + inline void next_array() + { + if (m_data_it == m_data.end() || ++m_data_it == m_data.end()) + { + m_data_it = m_data.emplace(m_data_it); + m_capacity += array_size; + } + + m_array_idx = 0; + } + + public: + // Constructor, Destructor + ranged_storage_block_list() : + m_data_it(m_data.end()), + m_array_idx(UINT32_MAX) + {} + + // Iterator + inline iterator begin() noexcept { return { this }; } + inline const_iterator begin() const noexcept { return { this }; } + constexpr iterator end() noexcept { return {}; } + constexpr const_iterator end() const noexcept { return {}; } + + // Operators + inline value_type& front() + { + AUDIT(!empty()); + return m_data.front()[0]; + } + + inline value_type& back() + { + AUDIT(m_data_it != m_data.end() && m_array_idx < array_size); + return (*m_data_it)[m_array_idx]; + } + + // Other operations on data + inline size_type size() const { return m_size; } + inline size_type capacity() const { return m_capacity; } + inline bool empty() const { return m_size == 0; } + + inline void clear() + { + m_size = 0; + m_array_idx = 0; + m_data_it = m_data.begin(); + } + + inline void free() + { + m_size = 0; + m_array_idx = 0; + m_capacity = 0; + m_data.resize(0); + m_data_it = m_data.end(); + } + + inline void reserve(size_type new_size) + { + if (new_size <= m_capacity) return; + size_type new_num_arrays = ((new_size - 1) / array_size) + 1; + m_data.reserve(new_num_arrays); + m_capacity = new_num_arrays * array_size; + } + + template + inline value_type& emplace_back(Args&&... args) + { + if (m_array_idx >= array_size) + { + next_array(); + } + + ASSERT(m_capacity > 0 && m_array_idx < array_size && m_data_it != m_data.end()); + + value_type *dest = &((*m_data_it)[m_array_idx++]); + new (dest) value_type(std::forward(args)...); + m_size++; + return *dest; + } + }; + + + + /** + * Ranged storage + */ + template + class texture_cache_base + { + public: + virtual void on_memory_read_flags_changed(section_storage_type §ion, rsx::memory_read_flags flags) = 0; + virtual void on_section_destroyed(section_storage_type & /*section*/) {}; + }; + + + template + class ranged_storage_block + { + public: + using ranged_storage_type = _ranged_storage_type; + using section_storage_type = typename ranged_storage_type::section_storage_type; + using texture_cache_type = typename ranged_storage_type::texture_cache_type; + + //using block_container_type = std::list; + using block_container_type = ranged_storage_block_list; + using iterator = typename block_container_type::iterator; + using const_iterator = typename block_container_type::const_iterator; + + using size_type = typename block_container_type::size_type; + + static constexpr u32 num_blocks = ranged_storage_type::num_blocks; + static constexpr u32 block_size = ranged_storage_type::block_size; + + using unowned_container_type = std::unordered_set; + using unowned_iterator = typename unowned_container_type::iterator; + using unowned_const_iterator = typename unowned_container_type::const_iterator; + + private: + u32 index = 0; + address_range range = {}; + block_container_type sections = {}; + unowned_container_type unowned; // pointers to sections from other blocks that overlap this block + std::atomic exists_count = 0; + std::atomic locked_count = 0; + std::atomic unreleased_count = 0; + ranged_storage_type *m_storage = nullptr; + + inline void add_owned_section_overlaps(section_storage_type §ion) + { + u32 end = section.get_section_range().end; + for (auto *block = next_block(); block != nullptr && end >= block->get_start(); block = block->next_block()) + { + block->add_unowned_section(section); + } + } + + inline void remove_owned_section_overlaps(section_storage_type §ion) + { + u32 end = section.get_section_range().end; + for (auto *block = next_block(); block != nullptr && end >= block->get_start(); block = block->next_block()) + { + block->remove_unowned_section(section); + } + } + + public: + // Construction + ranged_storage_block() = default; + + void initialize(u32 _index, ranged_storage_type *storage) + { + verify(HERE), m_storage == nullptr && storage != nullptr; + AUDIT(index < num_blocks); + + m_storage = storage; + index = _index; + range = address_range::start_length(index * block_size, block_size); + + AUDIT(range.is_page_range() && get_start() / block_size == index); + } + + /** + * Wrappers + */ + constexpr iterator begin() noexcept { return sections.begin(); } + constexpr const_iterator begin() const noexcept { return sections.begin(); } + inline iterator end() noexcept { return sections.end(); } + inline const_iterator end() const noexcept { return sections.end(); } + inline iterator at(size_type pos) { return sections.data(pos); } + inline const_iterator at(size_type pos) const { return sections.data(pos); } + inline bool empty() const { return sections.empty(); } + inline size_type size() const { return sections.size(); } + inline u32 get_exists_count() const { return exists_count; } + inline u32 get_locked_count() const { return locked_count; } + inline u32 get_unreleased_count() const { return unreleased_count; } + + /** + * Utilities + */ + ranged_storage_type& get_storage() const + { + AUDIT(m_storage != nullptr); + return *m_storage; + } + + texture_cache_type& get_texture_cache() const + { + return get_storage().get_texture_cache(); + } + + inline section_storage_type& create_section() + { + auto &res = sections.emplace_back(this); + return res; + } + + inline void clear() + { + for (auto §ion : *this) + { + section.destroy(); + } + + AUDIT(exists_count == 0); + AUDIT(unreleased_count == 0); + AUDIT(locked_count == 0); + sections.clear(); + } + + inline bool is_first_block() const + { + return index == 0; + } + + inline bool is_last_block() const + { + return index == num_blocks - 1; + } + + inline ranged_storage_block* prev_block() const + { + if (is_first_block()) return nullptr; + return &get_storage()[index - 1]; + } + + inline ranged_storage_block* next_block() const + { + if (is_last_block()) return nullptr; + return &get_storage()[index + 1]; + } + + // Address range + inline const address_range& get_range() const { return range; } + inline u32 get_start() const { return range.start; } + inline u32 get_end() const { return range.end; } + inline u32 get_index() const { return index; } + inline bool overlaps(const section_storage_type& section, section_bounds bounds = full_range) const { return section.overlaps(range, bounds); } + inline bool overlaps(const address_range& _range) const { return range.overlaps(_range); } + + /** + * Section callbacks + */ + inline void on_section_protected(const section_storage_type §ion) + { + (void)section; // silence unused warning without _AUDIT + AUDIT(section.is_locked()); + locked_count++; + } + + inline void on_section_unprotected(const section_storage_type §ion) + { + (void)section; // silence unused warning without _AUDIT + AUDIT(!section.is_locked()); + u32 prev_locked = locked_count--; + ASSERT(prev_locked > 0); + } + + inline void on_section_range_valid(section_storage_type §ion) + { + AUDIT(section.valid_range()); + AUDIT(range.overlaps(section.get_section_base())); + add_owned_section_overlaps(section); + } + + inline void on_section_range_invalid(section_storage_type §ion) + { + AUDIT(section.valid_range()); + AUDIT(range.overlaps(section.get_section_base())); + remove_owned_section_overlaps(section); + } + + inline void on_section_resources_created(const section_storage_type §ion) + { + (void)section; // silence unused warning without _AUDIT + AUDIT(section.exists()); + + u32 prev_exists = exists_count++; + + if (prev_exists == 0) + { + m_storage->on_ranged_block_first_section_created(*this); + } + } + + inline void on_section_resources_destroyed(const section_storage_type §ion) + { + (void)section; // silence unused warning without _AUDIT + AUDIT(!section.exists()); + + u32 prev_exists = exists_count--; + ASSERT(prev_exists > 0); + + if (prev_exists == 1) + { + m_storage->on_ranged_block_last_section_destroyed(*this); + } + } + + void on_section_released(const section_storage_type &/*section*/) + { + u32 prev_unreleased = unreleased_count--; + ASSERT(prev_unreleased > 0); + } + + void on_section_unreleased(const section_storage_type &/*section*/) + { + unreleased_count++; + } + + + /** + * Overlapping sections + */ + inline bool contains_unowned(section_storage_type §ion) const + { + return (unowned.find(§ion) != unowned.end()); + } + + inline void add_unowned_section(section_storage_type §ion) + { + AUDIT(overlaps(section)); + AUDIT(section.get_section_base() < range.start); + AUDIT(!contains_unowned(section)); + unowned.insert(§ion); + } + + inline void remove_unowned_section(section_storage_type §ion) + { + AUDIT(overlaps(section)); + AUDIT(section.get_section_base() < range.start); + AUDIT(contains_unowned(section)); + unowned.erase(§ion); + } + + inline unowned_iterator unowned_begin() { return unowned.begin(); } + inline unowned_const_iterator unowned_begin() const { return unowned.begin(); } + inline unowned_iterator unowned_end() { return unowned.end(); } + inline unowned_const_iterator unowned_end() const { return unowned.end(); } + inline bool unowned_empty() const { return unowned.empty(); } + }; + + + template + class ranged_storage + { + public: + static constexpr u32 block_size = 0x100'0000; + static_assert(block_size % 4096u == 0, "block_size must be a multiple of the page size"); + static constexpr u32 num_blocks = u32{0x1'0000'0000ull / block_size}; + static_assert((num_blocks > 0) && (u64{num_blocks} *block_size == 0x1'0000'0000ull), "Invalid block_size/num_blocks"); + + using section_storage_type = _section_storage_type; + using texture_cache_type = texture_cache_base; + using block_type = ranged_storage_block; + + private: + block_type blocks[num_blocks]; + texture_cache_type *m_tex_cache; + std::unordered_set m_in_use; + bool m_purging = false; + + public: + std::atomic m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory + std::atomic m_texture_memory_in_use = { 0 }; + + // Constructor + ranged_storage(texture_cache_type *tex_cache) : + m_tex_cache(tex_cache) + { + // Initialize blocks + for (u32 i = 0; i < num_blocks; i++) + { + blocks[i].initialize(i, this); + } + } + + /** + * Iterators + */ + + constexpr auto begin() { return std::begin(blocks); } + constexpr auto begin() const { return std::begin(blocks); } + constexpr auto end() { return std::end(blocks); } + constexpr auto end() const { return std::end(blocks); } + + /** + * Utilities + */ + inline block_type& block_for(u32 address) + { + return blocks[address / block_size]; + } + + inline const block_type& block_for(u32 address) const + { + return blocks[address / block_size]; + } + + inline block_type& block_for(const address_range &range) + { + AUDIT(range.valid()); + return block_for(range.start); + } + + inline block_type& block_for(const section_storage_type §ion) + { + return block_for(section.get_section_base()); + } + + inline block_type& operator[](size_t pos) + { + AUDIT(pos < num_blocks); + return blocks[pos]; + } + + inline texture_cache_type& get_texture_cache() const + { + AUDIT(m_tex_cache != nullptr); + return *m_tex_cache; + } + + + /** + * Blocks + */ + + void clear() + { + for (auto &block : *this) + { + block.clear(); + } + + m_in_use.clear(); + + AUDIT(m_unreleased_texture_objects == 0); + AUDIT(m_texture_memory_in_use == 0); + } + + void purge_unreleased_sections() + { + // We will be iterating through m_in_use + // do not allow the callbacks to touch m_in_use to avoid invalidating the iterator + m_purging = true; + + //Reclaims all graphics memory consumed by dirty textures + for (auto it = m_in_use.begin(); it != m_in_use.end();) + { + auto *block = *it; + + if (block->get_unreleased_count() > 0) + { + for (auto &tex : *block) + { + if (!tex.is_unreleased()) + continue; + + ASSERT(!tex.is_locked()); + + tex.destroy(); + } + } + + if (block->get_exists_count() == 0) + { + it = m_in_use.erase(it); + } + else + { + it++; + } + } + + m_purging = false; + AUDIT(m_unreleased_texture_objects == 0); + } + + + /** + * Callbacks + */ + void on_section_released(const section_storage_type &/*section*/) + { + u32 prev_unreleased = m_unreleased_texture_objects--; + ASSERT(prev_unreleased > 0); + } + + void on_section_unreleased(const section_storage_type &/*section*/) + { + m_unreleased_texture_objects++; + } + + void on_section_resources_created(const section_storage_type §ion) + { + m_texture_memory_in_use += section.get_section_size(); + } + + void on_section_resources_destroyed(const section_storage_type §ion) + { + u32 size = section.get_section_size(); + u32 prev_size = m_texture_memory_in_use.fetch_sub(size); + ASSERT(prev_size >= size); + } + + void on_ranged_block_first_section_created(block_type& block) + { + AUDIT(!m_purging); + AUDIT(m_in_use.find(&block) == m_in_use.end()); + m_in_use.insert(&block); + } + + void on_ranged_block_last_section_destroyed(block_type& block) + { + if (m_purging) + return; + + AUDIT(m_in_use.find(&block) != m_in_use.end()); + m_in_use.erase(&block); + } + + /** + * Ranged Iterator + */ + // Iterator + template + class range_iterator_tmpl + { + public: + // Traits + using value_type = T; + using pointer = T * ; + using difference_type = int; + using reference = T & ; + using iterator_category = std::forward_iterator_tag; + + // Constructors + range_iterator_tmpl() = default; // end iterator + explicit range_iterator_tmpl(parent_type &storage, const address_range &_range, section_bounds _bounds, bool _locked_only) : + range(_range), + bounds(_bounds), + block(&storage.block_for(range.start)), + unowned_it(block->unowned_begin()), + unowned_remaining(true), + cur_block_it(block->begin()), + locked_only(_locked_only) + { + // do a "fake" iteration to ensure the internal state is consistent + next(false); + } + + private: + // Members + address_range range; + section_bounds bounds; + + block_type *block = nullptr; + bool needs_overlap_check = true; + bool unowned_remaining = false; + unowned_iterator unowned_it = {}; + section_iterator cur_block_it = {}; + pointer obj = nullptr; + bool locked_only = false; + + inline void next(bool iterate = true) + { + AUDIT(block != nullptr); + + if (unowned_remaining) + { + do + { + // Still have "unowned" sections from blocks before the range to loop through + auto blk_end = block->unowned_end(); + if (iterate && unowned_it != blk_end) + { + unowned_it++; + } + + if (unowned_it != blk_end) + { + obj = *unowned_it; + if (obj->valid_range() && (!locked_only || obj->is_locked()) && obj->overlaps(range, bounds)) + return; + + iterate = true; + continue; + } + + // No more unowned sections remaining + unowned_remaining = false; + iterate = false; + break; + + } while (true); + } + + // Go to next block + do + { + // Iterate current block + do + { + auto blk_end = block->end(); + if (iterate && cur_block_it != blk_end) + { + cur_block_it++; + } + + if (cur_block_it != blk_end) + { + obj = &(*cur_block_it); + if (obj->valid_range() && (!locked_only || obj->is_locked()) && (!needs_overlap_check || obj->overlaps(range, bounds))) + return; + + iterate = true; + continue; + } + break; + + } while (true); + + // Move to next block(s) + do + { + block = block->next_block(); + if (block == nullptr || block->get_start() > range.end) // Reached end + { + block = nullptr; + obj = nullptr; + return; + } + + needs_overlap_check = (block->get_end() > range.end); + cur_block_it = block->begin(); + iterate = false; + } while (locked_only && block->get_locked_count() == 0); // find a block with locked sections + + } while (true); + } + + public: + inline reference operator*() const { return *obj; } + inline pointer operator->() const { return obj; } + inline reference operator++() { next(); return *obj; } + inline reference operator++(int) { auto *ptr = obj; next(); return *ptr; } + inline bool operator==(const range_iterator_tmpl &rhs) const { return obj == rhs.obj && unowned_remaining == rhs.unowned_remaining; } + inline bool operator!=(const range_iterator_tmpl &rhs) const { return !operator==(rhs); } + + inline void set_end(u32 new_end) + { + range.end = new_end; + + // If we've exceeded the new end, invalidate iterator + if (block->get_start() > range.end) + { + block = nullptr; + } + } + + inline block_type& get_block() const + { + AUDIT(block != nullptr); + return *block; + } + + inline section_bounds get_bounds() const + { + return bounds; + } + }; + + using range_iterator = range_iterator_tmpl; + using range_const_iterator = range_iterator_tmpl; + + inline range_iterator range_begin(const address_range &range, section_bounds bounds, bool locked_only = false) { + return range_iterator(*this, range, bounds, locked_only); + } + + inline range_const_iterator range_begin(const address_range &range, section_bounds bounds, bool locked_only = false) const { + return range_const_iterator(*this, range, bounds, locked_only); + } + + inline range_const_iterator range_begin(u32 address, section_bounds bounds, bool locked_only = false) const { + return range_const_iterator(*this, address_range::start_length(address, 1), bounds, locked_only); + } + + constexpr range_iterator range_end() + { + return range_iterator(); + } + + constexpr range_const_iterator range_end() const + { + return range_const_iterator(); + } + + /** + * Debug + */ +#ifdef TEXTURE_CACHE_DEBUG + void verify_protection(bool recount = false) + { + if (recount) + { + // Reset calculated part of the page_info struct + tex_cache_checker.reset_refcount(); + + // Go through all blocks and update calculated values + for (auto &block : *this) + { + for (auto &tex : block) + { + if (tex.is_locked()) + { + tex_cache_checker.add(tex.get_locked_range(), tex.get_protection()); + } + } + } + } + + // Verify + tex_cache_checker.verify(); + } +#endif //TEXTURE_CACHE_DEBUG + + }; + + + + /** + * Cached Texture Section + */ + template + class cached_texture_section : public rsx::buffered_section + { + public: + using ranged_storage_type = ranged_storage; + using ranged_storage_block_type = ranged_storage_block; + using texture_cache_type = typename ranged_storage_type::texture_cache_type; + + protected: + ranged_storage_type *m_storage = nullptr; + ranged_storage_block_type *m_block = nullptr; + texture_cache_type *m_tex_cache = nullptr; + + private: + constexpr derived_type* derived() + { + return static_cast(this); + } + + constexpr const derived_type* derived() const + { + return static_cast(this); + } + + bool dirty = true; + bool triggered_exists_callbacks = false; + bool triggered_unreleased_callbacks = false; + + protected: + + u16 width; + u16 height; + u16 depth; + u16 mipmaps; + + u16 real_pitch; + u16 rsx_pitch; + + u32 gcm_format = 0; + bool pack_unpack_swap_bytes = false; + + u64 sync_timestamp = 0; + bool synchronized = false; + bool flushed = false; + + u32 num_writes = 0; + std::deque read_history; + + rsx::memory_read_flags readback_behaviour = rsx::memory_read_flags::flush_once; + rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order; + rsx::texture_upload_context context = rsx::texture_upload_context::shader_read; + rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d; + + public: + u64 cache_tag = 0; + u64 last_write_tag = 0; + + ~cached_texture_section() + { + AUDIT(!exists()); + } + + cached_texture_section() = default; + cached_texture_section(ranged_storage_block_type *block) : m_block(block), m_storage(&block->get_storage()), m_tex_cache(&block->get_texture_cache()) + { + update_unreleased(); + } + + void initialize(ranged_storage_block_type *block) + { + verify(HERE), m_block == nullptr && m_tex_cache == nullptr && m_storage == nullptr; + m_block = block; + m_storage = &block->get_storage(); + m_tex_cache = &block->get_texture_cache(); + + update_unreleased(); + } + + + /** + * Reset + */ + void reset(const address_range &memory_range) + { + AUDIT(memory_range.valid()); + AUDIT(!is_locked()); + + // Invalidate if necessary + invalidate_range(); + + // Superclass + rsx::buffered_section::reset(memory_range); + + // Reset member variables to the default + width = 0; + height = 0; + depth = 0; + mipmaps = 0; + + real_pitch = 0; + rsx_pitch = 0; + + gcm_format = 0; + pack_unpack_swap_bytes = false; + + sync_timestamp = 0ull; + synchronized = false; + flushed = false; + + cache_tag = 0ull; + last_write_tag = 0ull; + + num_writes = 0; + read_history.clear(); + + readback_behaviour = rsx::memory_read_flags::flush_once; + view_flags = rsx::texture_create_flags::default_component_order; + context = rsx::texture_upload_context::shader_read; + image_type = rsx::texture_dimension_extended::texture_dimension_2d; + + // Set to dirty + set_dirty(true); + + // Notify that our CPU range is now valid + notify_range_valid(); + } + + + + /** + * Destroyed Flag + */ + inline bool is_destroyed() const { return !exists(); } // this section is currently destroyed + + inline bool can_destroy() const { + return !is_destroyed() && is_tracked(); + } // This section may be destroyed + + protected: + void on_section_resources_created() + { + AUDIT(exists()); + AUDIT(valid_range()); + + if (triggered_exists_callbacks) return; + triggered_exists_callbacks = true; + + // Callbacks + m_block->on_section_resources_created(*derived()); + m_storage->on_section_resources_created(*derived()); + } + + void on_section_resources_destroyed() + { + if (!triggered_exists_callbacks) return; + triggered_exists_callbacks = false; + + AUDIT(valid_range()); + + // Set dirty + set_dirty(true); + + // Unlock + if (is_locked()) + { + unprotect(); + } + + // Trigger callbacks + m_block->on_section_resources_destroyed(*derived()); + m_storage->on_section_resources_destroyed(*derived()); + + // Invalidate range + invalidate_range(); + } + + public: + /** + * Dirty/Unreleased Flag + */ + inline bool is_dirty() const { return dirty; } // this section is dirty and will need to be reuploaded + + void set_dirty(bool new_dirty) + { + if (new_dirty == false && !is_locked() && context == texture_upload_context::shader_read) + return; + + dirty = new_dirty; + + AUDIT(dirty || (!dirty && exists())); + + update_unreleased(); + } + + private: + void update_unreleased() + { + bool unreleased = is_unreleased(); + + if (unreleased && !triggered_unreleased_callbacks) + { + triggered_unreleased_callbacks = true; + m_block->on_section_unreleased(*derived()); + m_storage->on_section_unreleased(*derived()); + } + else if (!unreleased && triggered_unreleased_callbacks) + { + triggered_unreleased_callbacks = false; + m_block->on_section_released(*derived()); + m_storage->on_section_released(*derived()); + } + } + + + /** + * Valid Range + */ + + void notify_range_valid() + { + AUDIT(valid_range()); + + // Callbacks + m_block->on_section_range_valid(*derived()); + //m_storage->on_section_range_valid(*derived()); + + // Reset texture_cache m_flush_always_cache + if (readback_behaviour == memory_read_flags::flush_always) + { + m_tex_cache->on_memory_read_flags_changed(*derived(), memory_read_flags::flush_always); + } + } + + void invalidate_range() + { + if (!valid_range()) + return; + + // Reset texture_cache m_flush_always_cache + if (readback_behaviour == memory_read_flags::flush_always) + { + m_tex_cache->on_memory_read_flags_changed(*derived(), memory_read_flags::flush_once); + } + + // Notify the storage block that we are now invalid + m_block->on_section_range_invalid(*derived()); + //m_storage->on_section_range_invalid(*derived()); + + buffered_section::invalidate_range(); + } + + public: + /** + * Misc. + */ + bool is_tracked() const + { + return !exists() || (get_context() != framebuffer_storage); + } + + bool is_unreleased() const + { + return is_tracked() && exists() && is_dirty() && !is_locked(); + } + + bool can_be_reused() const + { + return !exists() || (is_dirty() && !is_locked()); + } + + bool is_flushable() const + { + //This section is active and can be flushed to cpu + return (get_protection() == utils::protection::no); + } + + + private: + /** + * Protection + */ + void post_protect(utils::protection old_prot, utils::protection prot) + { + if (old_prot != utils::protection::rw && prot == utils::protection::rw) + { + AUDIT(!is_locked()); + + m_block->on_section_unprotected(*derived()); + + // Blit and framebuffers may be unprotected and clean + if (context == texture_upload_context::shader_read) + { + set_dirty(true); + } + } + else if (old_prot == utils::protection::rw && prot != utils::protection::rw) + { + AUDIT(is_locked()); + + m_block->on_section_protected(*derived()); + + set_dirty(false); + } + } + + public: + inline void protect(utils::protection prot) + { + utils::protection old_prot = get_protection(); + rsx::buffered_section::protect(prot); + post_protect(old_prot, prot); + } + + inline void protect(utils::protection prot, const std::pair& range_confirm) + { + utils::protection old_prot = get_protection(); + rsx::buffered_section::protect(prot, range_confirm); + post_protect(old_prot, prot); + } + + inline void unprotect() + { + utils::protection old_prot = get_protection(); + rsx::buffered_section::unprotect(); + post_protect(old_prot, utils::protection::rw); + } + + inline void discard(bool set_dirty = true) + { + utils::protection old_prot = get_protection(); + rsx::buffered_section::discard(); + post_protect(old_prot, utils::protection::rw); + + if (set_dirty) + { + this->set_dirty(true); + } + } + + void reprotect(const utils::protection prot) + { + //Reset properties and protect again + flushed = false; + synchronized = false; + sync_timestamp = 0ull; + + protect(prot); + } + + void reprotect(const utils::protection prot, const std::pair& range) + { + //Reset properties and protect again + flushed = false; + synchronized = false; + sync_timestamp = 0ull; + + protect(prot, range); + } + + + /** + * Misc + */ + void touch(u64 tag) + { + num_writes++; + last_write_tag = tag; + } + + void reset_write_statistics() + { + if (read_history.size() == 16) + { + read_history.pop_back(); + } + + read_history.push_front(num_writes); + num_writes = 0; + } + + void set_view_flags(rsx::texture_create_flags flags) + { + view_flags = flags; + } + + void set_context(rsx::texture_upload_context upload_context) + { + AUDIT(!exists() || !is_locked() || context == upload_context); + context = upload_context; + } + + void set_image_type(rsx::texture_dimension_extended type) + { + image_type = type; + } + + void set_gcm_format(u32 format) + { + gcm_format = format; + } + + void set_memory_read_flags(memory_read_flags flags, bool notify_texture_cache = true) + { + const bool changed = (flags != readback_behaviour); + readback_behaviour = flags; + + if (notify_texture_cache && changed && valid_range()) + { + m_tex_cache->on_memory_read_flags_changed(*derived(), flags); + } + } + + u16 get_width() const + { + return width; + } + + u16 get_height() const + { + return height; + } + + u16 get_depth() const + { + return depth; + } + + u16 get_mipmaps() const + { + return mipmaps; + } + + u16 get_rsx_pitch() const + { + return rsx_pitch; + } + + rsx::texture_create_flags get_view_flags() const + { + return view_flags; + } + + rsx::texture_upload_context get_context() const + { + return context; + } + + rsx::section_bounds get_overlap_test_bounds() const + { + if (guard_policy == protection_policy::protect_policy_full_range) + return rsx::section_bounds::locked_range; + + const bool strict_range_check = g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer; + return (strict_range_check || get_context() == rsx::texture_upload_context::blit_engine_dst) ? + rsx::section_bounds::confirmed_range : + rsx::section_bounds::locked_range; + } + + rsx::texture_dimension_extended get_image_type() const + { + return image_type; + } + + u32 get_gcm_format() const + { + return gcm_format; + } + + memory_read_flags get_memory_read_flags() const + { + return readback_behaviour; + } + + bool writes_likely_completed() const + { + // TODO: Move this to the miss statistics block + const auto num_records = read_history.size(); + + if (num_records == 0) + { + return false; + } + else if (num_records == 1) + { + return num_writes >= read_history.front(); + } + else + { + const u32 last = read_history.front(); + const u32 prev_last = read_history[1]; + + if (last == prev_last && num_records <= 3) + { + return num_writes >= last; + } + + u32 compare = UINT32_MAX; + for (u32 n = 1; n < num_records; n++) + { + if (read_history[n] == last) + { + // Uncertain, but possible + compare = read_history[n - 1]; + + if (num_records > (n + 1)) + { + if (read_history[n + 1] == prev_last) + { + // Confirmed with 2 values + break; + } + } + } + } + + return num_writes >= compare; + } + } + + u64 get_sync_timestamp() const + { + return sync_timestamp; + } + + /** + * Comparison + */ + inline bool matches(const address_range &memory_range) + { + return valid_range() && rsx::buffered_section::matches(memory_range); + } + + bool matches_dimensions(u32 width, u32 height, u32 depth, u32 mipmaps) + { + if (!valid_range()) + return false; + + if (!width && !height && !depth && !mipmaps) + return true; + + if (width && width != this->width) + return false; + + if (height && height != this->height) + return false; + + if (depth && depth != this->depth) + return false; + + if (mipmaps && mipmaps > this->mipmaps) + return false; + + return true; + } + + bool matches(u32 rsx_address, u32 width, u32 height, u32 depth, u32 mipmaps) + { + if (!valid_range()) + return false; + + if (rsx_address != get_section_base()) + return false; + + return matches_dimensions(width, height, depth, mipmaps); + } + + bool matches(const address_range& memory_range, u32 width, u32 height, u32 depth, u32 mipmaps) + { + if (!valid_range()) + return false; + + if (!rsx::buffered_section::matches(memory_range)) + return false; + + return matches_dimensions(width, height, depth, mipmaps); + } + + + /** + * Derived wrappers + */ + inline void destroy() + { + derived()->destroy(); + } + + inline bool exists() const + { + return derived()->exists(); + } + }; + +} // namespace rsx \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 09c2a810c3..e4b08c88a3 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1593,12 +1593,12 @@ void GLGSRender::flip(int buffer) gl::screen.bind(); glViewport(0, 0, m_frame->client_width(), m_frame->client_height()); - m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), "RSX Load: " + std::to_string(get_load()) + "%"); - m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), "draw calls: " + std::to_string(m_draw_calls)); - m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), "draw call setup: " + std::to_string(m_begin_time) + "us"); - m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us"); - m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "textures upload time: " + std::to_string(m_textures_upload_time) + "us"); - m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us"); + m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load())); + m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", m_draw_calls)); + m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", m_begin_time)); + m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", m_vertex_upload_time)); + m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", m_textures_upload_time)); + m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", m_draw_time)); const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count(); const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024); @@ -1606,9 +1606,9 @@ void GLGSRender::flip(int buffer) const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions(); const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes(); const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100); - m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures)); - m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M"); - m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate)); + m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures)); + m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size)); + m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate)); } m_frame->flip(m_context); @@ -1640,8 +1640,11 @@ void GLGSRender::flip(int buffer) bool GLGSRender::on_access_violation(u32 address, bool is_writing) { - bool can_flush = (std::this_thread::get_id() == m_thread_id); - auto result = m_gl_texture_cache.invalidate_address(address, is_writing, can_flush); + const bool can_flush = (std::this_thread::get_id() == m_thread_id); + const rsx::invalidation_cause cause = + is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write) + : (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read); + auto result = m_gl_texture_cache.invalidate_address(address, cause); if (!result.violation_handled) return false; @@ -1664,12 +1667,15 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) return true; } -void GLGSRender::on_invalidate_memory_range(u32 address_base, u32 size) +void GLGSRender::on_invalidate_memory_range(const utils::address_range &range) { //Discard all memory in that range without bothering with writeback (Force it for strict?) - if (m_gl_texture_cache.invalidate_range(address_base, size, true, true, false).violation_handled) + auto data = std::move(m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap)); + AUDIT(data.empty()); + + if (data.violation_handled) { - m_gl_texture_cache.purge_dirty(); + m_gl_texture_cache.purge_unreleased_sections(); { std::lock_guard lock(m_sampler_mutex); m_samplers_dirty.store(true); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 1b2f3d2548..29b359e655 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -390,7 +390,7 @@ protected: void do_local_task(rsx::FIFO_state state) override; bool on_access_violation(u32 address, bool is_writing) override; - void on_invalidate_memory_range(u32 address_base, u32 size) override; + void on_invalidate_memory_range(const utils::address_range &range) override; void notify_tile_unbound(u32 tile) override; std::array, 4> copy_render_targets_to_memory() override; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 85fbcfb126..e0f13931c7 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -237,8 +237,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk old_format_found = true; } - m_gl_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once); - m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height); + const utils::address_range surface_range = m_surface_info[i].get_memory_range(); + m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); + m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range); } if (std::get<0>(m_rtts.m_bound_render_targets[i])) @@ -268,8 +269,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width; auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil; - m_gl_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once); - m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height); + const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); + m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); + m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range); } auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); @@ -381,8 +383,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; - const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * layout.aa_factors[1]; - m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, + const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]); + m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, color_format.format, color_format.type, color_format.swap_bytes); } } @@ -392,8 +394,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk if (m_depth_surface_info.address && m_depth_surface_info.pitch) { const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); - const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1]; - m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, + const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); + m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, depth_format_gl.format, depth_format_gl.type, true); } } @@ -448,12 +450,11 @@ void GLGSRender::read_buffers() if (!m_surface_info[i].pitch) continue; - const u32 range = pitch * height; - rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); - bool success = m_gl_texture_cache.load_memory_from_cache(texaddr, pitch * height, std::get<1>(m_rtts.m_bound_render_targets[i])); + const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height); + bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i])); //Fall back to slower methods if the image could not be fetched from cache. if (!success) @@ -464,7 +465,7 @@ void GLGSRender::read_buffers() } else { - m_gl_texture_cache.invalidate_range(texaddr, range, false, false, true); + m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::read); std::unique_ptr buffer(new u8[pitch * height]); color_buffer.read(buffer.get(), width, height, pitch); @@ -512,8 +513,9 @@ void GLGSRender::read_buffers() if (!pitch) return; - u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - bool in_cache = m_gl_texture_cache.load_memory_from_cache(depth_address, pitch * height, std::get<1>(m_rtts.m_bound_depth_stencil)); + const u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); + const utils::address_range range = utils::address_range::start_length(depth_address, pitch * height); + bool in_cache = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_depth_stencil)); if (in_cache) return; diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 256ee0101e..21b2762d5c 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -141,8 +141,10 @@ namespace gl } }; - class cached_texture_section : public rsx::cached_texture_section + class cached_texture_section : public rsx::cached_texture_section { + using baseclass = rsx::cached_texture_section; + private: fence m_fence; u32 pbo_id = 0; @@ -226,7 +228,7 @@ namespace gl void init_buffer() { const f32 resolution_scale = (context == rsx::texture_upload_context::framebuffer_storage? rsx::get_resolution_scale() : 1.f); - const u32 real_buffer_size = (resolution_scale <= 1.f) ? cpu_address_range : (u32)(resolution_scale * resolution_scale * cpu_address_range); + const u32 real_buffer_size = (resolution_scale <= 1.f) ? get_section_size() : (u32)(resolution_scale * resolution_scale * get_section_size()); const u32 buffer_size = align(real_buffer_size, 4096); if (pbo_id) @@ -249,13 +251,14 @@ namespace gl } public: + using baseclass::cached_texture_section; - void reset(u32 base, u32 size, bool /*flushable*/=false) + void reset(const utils::address_range &memory_range) { - rsx::cached_texture_section::reset(base, size); - vram_texture = nullptr; managed_texture.reset(); + + baseclass::reset(memory_range); } void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only, @@ -283,7 +286,7 @@ namespace gl if (rsx_pitch > 0) this->rsx_pitch = rsx_pitch; else - this->rsx_pitch = cpu_address_range / height; + this->rsx_pitch = get_section_size() / height; this->width = w; this->height = h; @@ -292,6 +295,9 @@ namespace gl this->mipmaps = mipmaps; set_format(gl_format, gl_type, swap_bytes); + + // Notify baseclass + baseclass::on_section_resources_created(); } void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps) @@ -307,6 +313,9 @@ namespace gl rsx_pitch = 0; real_pitch = 0; + + // Notify baseclass + baseclass::on_section_resources_created(); } void make_flushable() @@ -458,11 +467,12 @@ namespace gl bool flush() { if (flushed) return true; //Already written, ignore + AUDIT( is_locked() ); bool result = true; if (!synchronized) { - LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); + LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base()); copy_texture(); if (!synchronized) @@ -480,11 +490,14 @@ namespace gl m_fence.wait_for_signal(); flushed = true; - const auto valid_range = get_confirmed_range(); - void *dst = get_raw_ptr(valid_range.first, true); + const auto valid_range = get_confirmed_range_delta(); + const u32 valid_offset = valid_range.first; + const u32 valid_length = valid_range.second; + AUDIT( valid_length > 0 ); + void *dst = get_ptr_by_offset(valid_range.first, true); glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); - void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT); + void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_offset, valid_length, GL_MAP_READ_BIT); //throw if map failed since we'll segfault anyway verify(HERE), src != nullptr; @@ -496,20 +509,20 @@ namespace gl require_manual_shuffle = true; } - if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch) + if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch) { - memcpy(dst, src, valid_range.second); + memcpy(dst, src, valid_length); } else { - if (valid_range.second % rsx_pitch) + if (valid_length % rsx_pitch) { fmt::throw_exception("Unreachable" HERE); } u8 *_src = (u8*)src; u8 *_dst = (u8*)dst; - const auto num_rows = valid_range.second / rsx_pitch; + const auto num_rows = valid_length / rsx_pitch; for (u32 row = 0; row < num_rows; ++row) { memcpy(_dst, _src, real_pitch); @@ -521,7 +534,7 @@ namespace gl if (require_manual_shuffle) { //byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty - rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, valid_range.second / rsx_pitch); + rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, valid_length / rsx_pitch); } else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD) { @@ -537,7 +550,7 @@ namespace gl case texture::type::ushort_1_5_5_5_rev: case texture::type::ushort_5_5_5_1: { - const u32 num_reps = valid_range.second / 2; + const u32 num_reps = valid_length / 2; be_t* in = (be_t*)(dst); u16* out = (u16*)dst; @@ -556,7 +569,7 @@ namespace gl case texture::type::uint_2_10_10_10_rev: case texture::type::uint_8_8_8_8: { - u32 num_reps = valid_range.second / 4; + u32 num_reps = valid_length / 4; be_t* in = (be_t*)(dst); u32* out = (u32*)dst; @@ -575,7 +588,7 @@ namespace gl } } - flush_io(valid_range.first, valid_range.second); + flush_ptr_by_offset(valid_offset, valid_length); glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); @@ -586,13 +599,10 @@ namespace gl void destroy() { - if (!locked && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty()) + if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty()) //Already destroyed return; - if (locked) - unprotect(); - if (pbo_id == 0) { //Read-only texture, destroy texture memory @@ -611,6 +621,13 @@ namespace gl if (!m_fence.is_empty()) m_fence.destroy(); + + baseclass::on_section_resources_destroyed(); + } + + inline bool exists() const + { + return (vram_texture != nullptr); } texture::format get_format() const @@ -618,16 +635,6 @@ namespace gl return format; } - bool exists() const - { - return vram_texture != nullptr; - } - - bool is_flushable() const - { - return (protection == utils::protection::no); - } - bool is_flushed() const { return flushed; @@ -683,9 +690,10 @@ namespace gl } }; - class texture_cache : public rsx::texture_cache + class texture_cache : public rsx::texture_cache { private: + using baseclass = rsx::texture_cache; struct discardable_storage { @@ -717,30 +725,10 @@ namespace gl blitter m_hw_blitter; std::vector m_temporary_surfaces; - cached_texture_section& create_texture(gl::viewable_image* image, u32 texaddr, u32 texsize, u32 w, u32 h, u32 depth, u32 mipmaps) - { - cached_texture_section& tex = find_cached_texture(texaddr, texsize, true, w, h, depth); - tex.reset(texaddr, texsize, false); - tex.create_read_only(image, w, h, depth, mipmaps); - read_only_range = tex.get_min_max(read_only_range); - return tex; - } - void clear() { - for (auto &address_range : m_cache) - { - auto &range_data = address_range.second; - for (auto &tex : range_data.data) - { - tex.destroy(); - } - - range_data.data.resize(0); - } - + baseclass::clear(); clear_temporary_subresources(); - m_unreleased_texture_objects = 0; } void clear_temporary_subresources() @@ -850,11 +838,6 @@ namespace gl protected: - void free_texture_section(cached_texture_section& tex) override - { - tex.destroy(); - } - gl::texture_view* create_temporary_subresource_view(void*&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) override { @@ -946,7 +929,7 @@ namespace gl dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1); } - cached_texture_section* create_new_texture(void*&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, + cached_texture_section* create_new_texture(void*&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override { auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type); @@ -954,15 +937,23 @@ namespace gl const auto swizzle = get_component_mapping(gcm_format, flags); image->set_native_component_layout(swizzle); - auto& cached = create_texture(image, rsx_address, rsx_size, width, height, depth, mipmaps); - cached.set_dirty(false); + auto& cached = *find_cached_texture(rsx_range, true, true, width, width, depth, mipmaps); + ASSERT(!cached.is_locked()); + + // Prepare section + cached.reset(rsx_range); cached.set_view_flags(flags); cached.set_context(context); - cached.set_gcm_format(gcm_format); cached.set_image_type(type); + cached.set_gcm_format(gcm_format); + + cached.create_read_only(image, width, height, depth, mipmaps); + cached.set_dirty(false); if (context != rsx::texture_upload_context::blit_engine_dst) { + AUDIT( cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always ); + read_only_range = cached.get_min_max(read_only_range, rsx::section_bounds::locked_range); // TODO ruipin: This was outside the if, but is inside the if in Vulkan. Ask kd-11 cached.protect(utils::protection::ro); } else @@ -998,8 +989,8 @@ namespace gl //NOTE: Protection is handled by the caller cached.make_flushable(); - cached.set_dimensions(width, height, depth, (rsx_size / height)); - no_access_range = cached.get_min_max(no_access_range); + cached.set_dimensions(width, height, depth, (rsx_range.length() / height)); + no_access_range = cached.get_min_max(no_access_range, rsx::section_bounds::locked_range); } update_cache_tag(); @@ -1010,7 +1001,8 @@ namespace gl rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override { void* unused = nullptr; - auto section = create_new_texture(unused, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, context, type, + const utils::address_range rsx_range = utils::address_range::start_length(rsx_address, pitch * height); + auto section = create_new_texture(unused, rsx_range, width, height, depth, mipmaps, gcm_format, context, type, rsx::texture_create_flags::default_component_order); gl::upload_texture(section->get_raw_texture()->id(), rsx_address, gcm_format, width, height, depth, mipmaps, @@ -1082,9 +1074,7 @@ namespace gl public: - texture_cache() {} - - ~texture_cache() {} + using baseclass::texture_cache; void initialize() { @@ -1103,19 +1093,17 @@ namespace gl { reader_lock lock(m_cache_mutex); - auto found = m_cache.find(get_block_address(rsx_address)); - if (found == m_cache.end()) + auto &block = m_storage.block_for(rsx_address); + + if (block.get_locked_count() == 0) return false; - //if (found->second.valid_count == 0) - //return false; - - for (auto& tex : found->second.data) + for (auto& tex : block) { if (tex.is_dirty()) continue; - if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range)) + if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range)) continue; if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size()) @@ -1127,9 +1115,9 @@ namespace gl void on_frame_end() override { - if (m_unreleased_texture_objects >= m_max_zombie_objects) + if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects) { - purge_dirty(); + purge_unreleased_sections(); } clear_temporary_subresources(); @@ -1158,7 +1146,7 @@ namespace gl gl::texture::format::depth_stencil : gl::texture::format::depth; } - flush_if_cache_miss_likely(fmt, result.real_dst_address, result.real_dst_size); + flush_if_cache_miss_likely(fmt, result.to_address_range()); } return true; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 9eba7b4e71..b65e3fc924 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1350,34 +1350,12 @@ namespace rsx { if (!in_begin_end && state != FIFO_state::lock_wait) { - if (!m_invalidated_memory_ranges.empty()) + reader_lock lock(m_mtx_task); + + if (m_invalidated_memory_range.valid()) { - std::lock_guard lock(m_mtx_task); - - for (const auto& range : m_invalidated_memory_ranges) - { - on_invalidate_memory_range(range.first, range.second); - - // Clean the main memory super_ptr cache if invalidated - const auto range_end = range.first + range.second; - for (auto It = main_super_memory_block.begin(); It != main_super_memory_block.end();) - { - const auto mem_start = It->first; - const auto mem_end = mem_start + It->second.size(); - const bool overlaps = (mem_start < range_end && range.first < mem_end); - - if (overlaps) - { - It = main_super_memory_block.erase(It); - } - else - { - It++; - } - } - } - - m_invalidated_memory_ranges.clear(); + lock.upgrade(); + handle_invalidated_memory_range(); } } } @@ -2676,15 +2654,32 @@ namespace rsx void thread::on_notify_memory_mapped(u32 address, u32 size) { - // TODO + // In the case where an unmap is followed shortly after by a remap of the same address space + // we must block until RSX has invalidated the memory + // or lock m_mtx_task and do it ourselves + + if (m_rsx_thread_exiting) + return; + + reader_lock lock(m_mtx_task); + + const auto map_range = address_range::start_length(address, size); + + if (!m_invalidated_memory_range.valid()) + return; + + if (m_invalidated_memory_range.overlaps(map_range)) + { + lock.upgrade(); + handle_invalidated_memory_range(); + } } - - void thread::on_notify_memory_unmapped(u32 base_address, u32 size) + void thread::on_notify_memory_unmapped(u32 address, u32 size) { - if (!m_rsx_thread_exiting && base_address < 0xC0000000) + if (!m_rsx_thread_exiting && address < 0xC0000000) { - u32 ea = base_address >> 20, io = RSXIOMem.io[ea]; + u32 ea = address >> 20, io = RSXIOMem.io[ea]; if (io < 512) { @@ -2704,11 +2699,56 @@ namespace rsx } } + // Queue up memory invalidation std::lock_guard lock(m_mtx_task); - m_invalidated_memory_ranges.push_back({ base_address, size }); + const bool existing_range_valid = m_invalidated_memory_range.valid(); + const auto unmap_range = address_range::start_length(address, size); + + if (existing_range_valid && m_invalidated_memory_range.touches(unmap_range)) + { + // Merge range-to-invalidate in case of consecutive unmaps + m_invalidated_memory_range.set_min_max(unmap_range); + } + else + { + if (existing_range_valid) + { + // We can only delay consecutive unmaps. + // Otherwise, to avoid VirtualProtect failures, we need to do the invalidation here + handle_invalidated_memory_range(); + } + + m_invalidated_memory_range = unmap_range; + } } } + // NOTE: m_mtx_task lock must be acquired before calling this method + void thread::handle_invalidated_memory_range() + { + if (!m_invalidated_memory_range.valid()) + return; + + on_invalidate_memory_range(m_invalidated_memory_range); + + // Clean the main memory super_ptr cache if invalidated + for (auto It = main_super_memory_block.begin(); It != main_super_memory_block.end();) + { + const auto block_range = address_range::start_length(It->first, It->second.size()); + + if (m_invalidated_memory_range.overlaps(block_range)) + { + It = main_super_memory_block.erase(It); + } + else + { + It++; + } + } + + m_invalidated_memory_range.invalidate(); + } + //Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself! void thread::pause() { diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 663ffaa51f..2089eb1bb8 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -347,7 +347,7 @@ namespace rsx std::shared_ptr m_overlay_manager; // Invalidated memory range - std::vector> m_invalidated_memory_ranges; + address_range m_invalidated_memory_range; public: RsxDmaControl* ctrl = nullptr; @@ -468,6 +468,8 @@ namespace rsx thread(); virtual ~thread(); + void handle_invalidated_memory_range(); + virtual void on_task() override; virtual void on_exit() override; @@ -495,7 +497,7 @@ namespace rsx virtual void flip(int buffer) = 0; virtual u64 timestamp(); virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; } - virtual void on_invalidate_memory_range(u32 /*address*/, u32 /*range*/) {} + virtual void on_invalidate_memory_range(const address_range & /*range*/) {} virtual void notify_tile_unbound(u32 /*tile*/) {} // zcull diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 869aa770a0..9f63c8c910 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -812,7 +812,9 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) vk::texture_cache::thrashed_set result; { std::lock_guard lock(m_secondary_cb_guard); - result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_swapchain->get_graphics_queue())); + + const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read; + result = std::move(m_texture_cache.invalidate_address(address, cause, m_secondary_command_buffer, m_swapchain->get_graphics_queue())); } if (!result.violation_handled) @@ -893,13 +895,16 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) return false; } -void VKGSRender::on_invalidate_memory_range(u32 address_base, u32 size) +void VKGSRender::on_invalidate_memory_range(const utils::address_range &range) { std::lock_guard lock(m_secondary_cb_guard); - if (m_texture_cache.invalidate_range(address_base, size, true, true, false, - m_secondary_command_buffer, m_swapchain->get_graphics_queue()).violation_handled) + + auto data = std::move(m_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap, m_secondary_command_buffer, m_swapchain->get_graphics_queue())); + AUDIT(data.empty()); + + if (data.violation_handled) { - m_texture_cache.purge_dirty(); + m_texture_cache.purge_unreleased_sections(); { std::lock_guard lock(m_sampler_mutex); m_samplers_dirty.store(true); @@ -2625,9 +2630,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (old_format == VK_FORMAT_UNDEFINED) old_format = vk::get_compatible_surface_format(m_surface_info[i].color_format).first; - m_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once); - m_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, - *m_current_command_buffer, m_swapchain->get_graphics_queue()); + const utils::address_range rsx_range = m_surface_info[i].get_memory_range(); + m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once); + m_texture_cache.flush_if_cache_miss_likely(old_format, rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue()); } m_surface_info[i].address = m_surface_info[i].pitch = 0; @@ -2641,9 +2646,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer) { auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format); - m_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once); - m_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, - *m_current_command_buffer, m_swapchain->get_graphics_queue()); + const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); + m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); + m_texture_cache.flush_if_cache_miss_likely(old_format, surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue()); } m_depth_surface_info.address = m_depth_surface_info.pitch = 0; @@ -2697,8 +2702,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; - const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * layout.aa_factors[1]; - m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range, + const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); + m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second); } } @@ -2708,8 +2713,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (m_depth_surface_info.address && m_depth_surface_info.pitch) { const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; - const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1]; - m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, + const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); + m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false); } } @@ -3129,13 +3134,13 @@ void VKGSRender::flip(int buffer) if (g_cfg.video.overlay) { - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "RSX Load: " + std::to_string(get_load()) + "%"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "texture upload time: " + std::to_string(m_textures_upload_time) + "us"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us"); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load())); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", m_draw_calls)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", m_setup_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", m_vertex_upload_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", m_textures_upload_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", m_draw_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", m_flip_time)); const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024); @@ -3144,10 +3149,10 @@ void VKGSRender::flip(int buffer) const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions(); const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes(); const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M"); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate)); } vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 4e506f3d64..0ecd134361 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -433,7 +433,7 @@ protected: void notify_tile_unbound(u32 tile) override; bool on_access_violation(u32 address, bool is_writing) override; - void on_invalidate_memory_range(u32 address_base, u32 size) override; + void on_invalidate_memory_range(const utils::address_range &range) override; bool on_decompiler_task() override; }; diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 36b65bf668..ffa90fdc20 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -13,8 +13,10 @@ extern u64 get_system_time(); namespace vk { - class cached_texture_section : public rsx::cached_texture_section + class cached_texture_section : public rsx::cached_texture_section { + using baseclass = typename rsx::cached_texture_section; + std::unique_ptr managed_texture = nullptr; //DMA relevant data @@ -24,15 +26,14 @@ namespace vk std::unique_ptr dma_buffer; public: + using baseclass::cached_texture_section; - cached_texture_section() {} - - void reset(u32 base, u32 length) + void reset(const utils::address_range &memory_range) { - if (length > cpu_address_range) + if (memory_range.length() > get_section_size()) release_dma_resources(); - rsx::cached_texture_section::reset(base, length); + baseclass::reset(memory_range); } void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false) @@ -56,13 +57,16 @@ namespace vk if (rsx_pitch > 0) this->rsx_pitch = rsx_pitch; else - this->rsx_pitch = cpu_address_range / height; + this->rsx_pitch = get_section_size() / height; //Even if we are managing the same vram section, we cannot guarantee contents are static - //The create method is only invoked when a new mangaged session is required + //The create method is only invoked when a new managed session is required synchronized = false; flushed = false; sync_timestamp = 0ull; + + // Notify baseclass + baseclass::on_section_resources_created(); } void release_dma_resources() @@ -81,11 +85,14 @@ namespace vk void destroy() { + m_tex_cache->on_section_destroyed(*this); vram_texture = nullptr; release_dma_resources(); + + baseclass::on_section_resources_destroyed(); } - bool exists() const + inline bool exists() const { return (vram_texture != nullptr); } @@ -115,12 +122,6 @@ namespace vk return vram_texture->info.format; } - bool is_flushable() const - { - //This section is active and can be flushed to cpu - return (protection == utils::protection::no); - } - bool is_flushed() const { //This memory section was flushable, but a flush has already removed protection @@ -144,7 +145,7 @@ namespace vk if (dma_buffer.get() == nullptr) { auto memory_type = m_device->get_memory_mapping().host_visible_coherent; - dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); + dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); } if (manage_cb_lifetime) @@ -246,18 +247,18 @@ namespace vk { verify (HERE), mem_target->value != dma_buffer->value; - vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, cpu_address_range, + vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - shuffle_kernel->run(cmd, mem_target, cpu_address_range); + shuffle_kernel->run(cmd, mem_target, get_section_size()); - vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, cpu_address_range, + vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); VkBufferCopy copy = {}; - copy.size = cpu_address_range; + copy.size = get_section_size(); vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, ©); } @@ -282,6 +283,7 @@ namespace vk bool flush(vk::command_buffer& cmd, VkQueue submit_queue) { if (flushed) return true; + AUDIT( is_locked() ); if (m_device == nullptr) { @@ -293,7 +295,7 @@ namespace vk if (!synchronized) { - LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); + LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base()); copy_texture(true, cmd, submit_queue); result = false; } @@ -301,22 +303,26 @@ namespace vk verify(HERE), real_pitch > 0; flushed = true; - const auto valid_range = get_confirmed_range(); - void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second); - void* pixels_dst = get_raw_ptr(valid_range.first, true); + const auto valid_range = get_confirmed_range_delta(); + const u32 valid_offset = valid_range.first; + const u32 valid_length = valid_range.second; + AUDIT( valid_length > 0 ); - if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch) + void* pixels_src = dma_buffer->map(valid_offset, valid_length); + void* pixels_dst = get_ptr_by_offset(valid_offset, true); + + if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch) { - memcpy(pixels_dst, pixels_src, valid_range.second); + memcpy(pixels_dst, pixels_src, valid_length); } else { - if (valid_range.second % rsx_pitch) + if (valid_length % rsx_pitch) { fmt::throw_exception("Unreachable" HERE); } - const u32 num_rows = valid_range.second / rsx_pitch; + const u32 num_rows = valid_length / rsx_pitch; auto _src = (u8*)pixels_src; auto _dst = (u8*)pixels_dst; @@ -328,7 +334,7 @@ namespace vk } } - flush_io(valid_range.first, valid_range.second); + flush_ptr_by_offset(valid_offset, valid_length); dma_buffer->unmap(); reset_write_statistics(); @@ -405,9 +411,18 @@ namespace vk } }; - class texture_cache : public rsx::texture_cache + class texture_cache : public rsx::texture_cache { + public: + virtual void on_section_destroyed(cached_texture_section& tex) + { + m_discarded_memory_size += tex.get_section_size(); + m_discardable_storage.push_back(tex); + } + private: + using baseclass = rsx::texture_cache; + //Vulkan internals vk::render_device* m_device; vk::memory_type_mapping m_memory_types; @@ -419,30 +434,11 @@ namespace vk std::list m_discardable_storage; std::atomic m_discarded_memory_size = { 0 }; - void purge_cache() + void clear() { - for (auto &address_range : m_cache) - { - auto &range_data = address_range.second; - for (auto &tex : range_data.data) - { - if (tex.exists()) - { - m_discardable_storage.push_back(tex); - } - - if (tex.is_locked()) - tex.unprotect(); - - tex.release_dma_resources(); - } - - range_data.data.resize(0); - } + baseclass::clear(); m_discardable_storage.clear(); - m_unreleased_texture_objects = 0; - m_texture_memory_in_use = 0; m_discarded_memory_size = 0; } @@ -486,14 +482,6 @@ namespace vk } protected: - - void free_texture_section(cached_texture_section& tex) override - { - m_discarded_memory_size += tex.get_section_size(); - m_discardable_storage.push_back(tex); - tex.destroy(); - } - vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy) { @@ -776,7 +764,7 @@ namespace vk vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); } - cached_texture_section* create_new_texture(vk::command_buffer& cmd, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, + cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override { const u16 section_depth = depth; @@ -846,26 +834,30 @@ namespace vk change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_flags, 0, mipmaps, 0, layer }); - cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, section_depth); - region.reset(rsx_address, rsx_size); - region.create(width, height, section_depth, mipmaps, image, 0, true, gcm_format); - region.set_dirty(false); + cached_texture_section& region = *find_cached_texture(rsx_range, true, true, width, height, section_depth); + ASSERT(!region.is_locked()); + + // New section, we must prepare it + region.reset(rsx_range); region.set_context(context); region.set_gcm_format(gcm_format); region.set_image_type(type); + region.create(width, height, section_depth, mipmaps, image, 0, true, gcm_format); + region.set_dirty(false); + //Its not necessary to lock blit dst textures as they are just reused as necessary if (context != rsx::texture_upload_context::blit_engine_dst) { region.protect(utils::protection::ro); - read_only_range = region.get_min_max(read_only_range); + read_only_range = region.get_min_max(read_only_range, rsx::section_bounds::locked_range); } else { //TODO: Confirm byte swap patterns //NOTE: Protection is handled by the caller region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT); - no_access_range = region.get_min_max(no_access_range); + no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range); } update_cache_tag(); @@ -875,7 +867,8 @@ namespace vk cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override { - auto section = create_new_texture(cmd, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, context, type, + const utils::address_range rsx_range = utils::address_range::start_length(rsx_address, pitch * height); + auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, gcm_format, context, type, rsx::texture_create_flags::default_component_order); auto image = section->get_raw_texture(); @@ -962,6 +955,7 @@ namespace vk } public: + using baseclass::texture_cache; void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap) { @@ -974,26 +968,24 @@ namespace vk void destroy() override { - purge_cache(); + clear(); } bool is_depth_texture(u32 rsx_address, u32 rsx_size) override { reader_lock lock(m_cache_mutex); - auto found = m_cache.find(get_block_address(rsx_address)); - if (found == m_cache.end()) + auto &block = m_storage.block_for(rsx_address); + + if (block.get_locked_count() == 0) return false; - //if (found->second.valid_count == 0) - //return false; - - for (auto& tex : found->second.data) + for (auto& tex : block) { if (tex.is_dirty()) continue; - if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range)) + if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range)) continue; if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size()) @@ -1016,10 +1008,10 @@ namespace vk void on_frame_end() override { - if (m_unreleased_texture_objects >= m_max_zombie_objects || + if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects || m_discarded_memory_size > 0x4000000) //If already holding over 64M in discardable memory, be frugal with memory resources { - purge_dirty(); + purge_unreleased_sections(); } const u64 last_complete_frame = vk::get_last_completed_frame_id(); @@ -1228,7 +1220,7 @@ namespace vk { if (reply.real_dst_size) { - flush_if_cache_miss_likely(helper.format, reply.real_dst_address, reply.real_dst_size, cmd, m_submit_queue); + flush_if_cache_miss_likely(helper.format, reply.to_address_range(), cmd, m_submit_queue); } return true; @@ -1239,12 +1231,12 @@ namespace vk const u32 get_unreleased_textures_count() const override { - return m_unreleased_texture_objects + (u32)m_discardable_storage.size(); + return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size(); } const u32 get_texture_memory_in_use() const override { - return m_texture_memory_in_use; + return m_storage.m_texture_memory_in_use; } const u32 get_temporary_memory_in_use() diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index a977d6d1a7..f254b81477 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "Utilities/VirtualMemory.h" #include "Utilities/hash.h" #include "Emu/Memory/vm.h" @@ -6,6 +6,7 @@ #include "Common/ProgramStateCache.h" #include "Emu/Cell/Modules/cellMsgDialog.h" #include "Emu/System.h" +#include "Common/texture_cache_checker.h" #include "rsx_utils.h" #include @@ -19,109 +20,125 @@ namespace rsx protect_policy_full_range //Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding }; - enum overlap_test_bounds + enum section_bounds { full_range, - protected_range, + locked_range, confirmed_range }; + static inline void memory_protect(const address_range& range, utils::protection prot) + { + verify(HERE), range.is_page_range(); + + //LOG_ERROR(RSX, "memory_protect(0x%x, 0x%x, %x)", static_cast(range.start), static_cast(range.length()), static_cast(prot)); + utils::memory_protect(vm::base(range.start), range.length(), prot); + +#ifdef TEXTURE_CACHE_DEBUG + tex_cache_checker.set_protection(range, prot); +#endif + } + class buffered_section { + public: + static const protection_policy guard_policy = protect_policy_full_range; + private: - u32 locked_address_base = 0; - u32 locked_address_range = 0; - weak_ptr locked_memory_ptr; - std::pair confirmed_range; - - inline void tag_memory() - { - if (locked_memory_ptr) - { - const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range; - u32* first = locked_memory_ptr.get(confirmed_range.first, true); - u32* last = locked_memory_ptr.get(valid_limit - 4, true); - - *first = cpu_address_base + confirmed_range.first; - *last = cpu_address_base + valid_limit - 4; - - locked_memory_ptr.flush(confirmed_range.first, 4); - locked_memory_ptr.flush(valid_limit - 4, 4); - } - } - - protected: - u32 cpu_address_base = 0; - u32 cpu_address_range = 0; + address_range locked_range; + address_range cpu_range = {}; + address_range confirmed_range; + weak_ptr super_ptr; utils::protection protection = utils::protection::rw; - protection_policy guard_policy; bool locked = false; - bool dirty = false; - inline void init_lockable_range(u32 base, u32 length) + inline void init_lockable_range(const address_range &range) { - locked_address_base = (base & ~4095); + locked_range = range.to_page_range(); - if ((guard_policy != protect_policy_full_range) && (length >= 4096)) + if ((guard_policy != protect_policy_full_range) && (range.length() >= 4096)) { - const u32 limit = base + length; - const u32 block_end = (limit & ~4095); - const u32 block_start = (locked_address_base < base) ? (locked_address_base + 4096) : locked_address_base; - - locked_address_range = 4096; + const u32 block_start = (locked_range.start < range.start) ? (locked_range.start + 4096u) : locked_range.start; + const u32 block_end = locked_range.end; if (block_start < block_end) { - //Page boundaries cover at least one unique page - locked_address_base = block_start; + // protect unique page range + locked_range.start = block_start; + locked_range.end = block_end; + } - if (guard_policy == protect_policy_conservative) - { - //Protect full unique range - locked_address_range = (block_end - block_start); - } + if (guard_policy == protect_policy_one_page) + { + // protect exactly one page + locked_range.set_length(4096u); } } - else - locked_address_range = align(base + length, 4096) - locked_address_base; - verify(HERE), locked_address_range > 0; + AUDIT( (locked_range.start == page_start(range.start)) || (locked_range.start == next_page(range.start)) ); + AUDIT( locked_range.end <= page_end(range.end) ); + verify(HERE), locked_range.is_page_range(); } public: - buffered_section() {} - ~buffered_section() {} + buffered_section() {}; + ~buffered_section() {}; - void reset(u32 base, u32 length, protection_policy protect_policy = protect_policy_full_range) + void reset(const address_range &memory_range) { - verify(HERE), locked == false; + verify(HERE), memory_range.valid() && locked == false; - cpu_address_base = base; - cpu_address_range = length; + cpu_range = address_range(memory_range); + confirmed_range.invalidate(); + locked_range.invalidate(); - confirmed_range = { 0, 0 }; protection = utils::protection::rw; - guard_policy = protect_policy; locked = false; - init_lockable_range(cpu_address_base, cpu_address_range); + super_ptr = {}; + + init_lockable_range(cpu_range); } - void protect(utils::protection prot, bool force = false) + protected: + void invalidate_range() { - if (prot == protection && !force) return; + ASSERT(!locked); - verify(HERE), locked_address_range > 0; - utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot); - protection = prot; - locked = prot != utils::protection::rw; + cpu_range.invalidate(); + confirmed_range.invalidate(); + locked_range.invalidate(); + } - if (prot == utils::protection::no) + public: + void protect(utils::protection new_prot, bool force = false) + { + if (new_prot == protection && !force) return; + + verify(HERE), locked_range.is_page_range(); + AUDIT( !confirmed_range.valid() || confirmed_range.inside(cpu_range) ); + +#ifdef TEXTURE_CACHE_DEBUG + if (new_prot != protection || force) { - locked_memory_ptr = rsx::get_super_ptr(cpu_address_base, cpu_address_range); + if (locked && !force) // When force=true, it is the responsibility of the caller to remove this section from the checker refcounting + tex_cache_checker.remove(locked_range, protection); + if (new_prot != utils::protection::rw) + tex_cache_checker.add(locked_range, new_prot); + } +#endif // TEXTURE_CACHE_DEBUG + + rsx::memory_protect(locked_range, new_prot); + protection = new_prot; + locked = (protection != utils::protection::rw); + + if (protection == utils::protection::no) + { + super_ptr = rsx::get_super_ptr(cpu_range); + verify(HERE), super_ptr; tag_memory(); } else @@ -129,255 +146,274 @@ namespace rsx if (!locked) { //Unprotect range also invalidates secured range - confirmed_range = { 0, 0 }; + confirmed_range.invalidate(); } - locked_memory_ptr = {}; + super_ptr = {}; } + } - void protect(utils::protection prot, const std::pair& range_confirm) + void protect(utils::protection prot, const std::pair& new_confirm) { + // new_confirm.first is an offset after cpu_range.start + // new_confirm.second is the length (after cpu_range.start + new_confirm.first) + +#ifdef TEXTURE_CACHE_DEBUG + // We need to remove the lockable range from page_info as we will be re-protecting with force==true + if (locked) + tex_cache_checker.remove(locked_range, protection); +#endif + if (prot != utils::protection::rw) { - const auto old_prot = protection; - const auto old_locked_base = locked_address_base; - const auto old_locked_length = locked_address_range; - - if (confirmed_range.second) + if (confirmed_range.valid()) { - const u32 range_limit = std::max(range_confirm.first + range_confirm.second, confirmed_range.first + confirmed_range.second); - confirmed_range.first = std::min(confirmed_range.first, range_confirm.first); - confirmed_range.second = range_limit - confirmed_range.first; + confirmed_range.start = std::min(confirmed_range.start, cpu_range.start + new_confirm.first); + confirmed_range.end = std::max(confirmed_range.end, cpu_range.start + new_confirm.first + new_confirm.second - 1); } else { - confirmed_range = range_confirm; + confirmed_range = address_range::start_length(cpu_range.start + new_confirm.first, new_confirm.second); + ASSERT(!locked || locked_range.inside(confirmed_range.to_page_range())); } - - init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second); + + verify(HERE), confirmed_range.inside(cpu_range); + init_lockable_range(confirmed_range); } protect(prot, true); } - void unprotect() + inline void unprotect() { + AUDIT(protection != utils::protection::rw); protect(utils::protection::rw); } - void discard() + inline void discard() { +#ifdef TEXTURE_CACHE_DEBUG + if (locked) + tex_cache_checker.remove(locked_range, protection); +#endif + protection = utils::protection::rw; - dirty = true; + confirmed_range.invalidate(); + super_ptr = {}; locked = false; - - confirmed_range = { 0, 0 }; - locked_memory_ptr = {}; } - /** - * Check if range overlaps with this section. - * ignore_protection_range - if true, the test should not check against the aligned protection range, instead - * tests against actual range of contents in memory - */ - bool overlaps(std::pair range) const - { - return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second); - } - - bool overlaps(u32 address, overlap_test_bounds bounds) const + inline const address_range& get_bounds(section_bounds bounds) const { switch (bounds) { - case overlap_test_bounds::full_range: - { - return (cpu_address_base <= address && (address - cpu_address_base) < cpu_address_range); - } - case overlap_test_bounds::protected_range: - { - return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); - } - case overlap_test_bounds::confirmed_range: - { - const auto range = get_confirmed_range(); - return ((range.first + cpu_address_base) <= address && (address - range.first) < range.second); - } + case section_bounds::full_range: + return cpu_range; + case section_bounds::locked_range: + return locked_range; + case section_bounds::confirmed_range: + return confirmed_range.valid() ? confirmed_range : cpu_range; default: - fmt::throw_exception("Unreachable" HERE); + ASSUME(0); } } - bool overlaps(const std::pair& range, overlap_test_bounds bounds) const - { - switch (bounds) - { - case overlap_test_bounds::full_range: - { - return region_overlaps(cpu_address_base, cpu_address_base + cpu_address_range, range.first, range.first + range.second); - } - case overlap_test_bounds::protected_range: - { - return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second); - } - case overlap_test_bounds::confirmed_range: - { - const auto test_range = get_confirmed_range(); - return region_overlaps(test_range.first + cpu_address_base, test_range.first + cpu_address_base + test_range.second, range.first, range.first + range.second); - } - default: - fmt::throw_exception("Unreachable" HERE); - } - } /** - * Check if the page containing the address tramples this section. Also compares a former trampled page range to compare - * If true, returns the range with updated invalid range + * Overlapping checks */ - std::tuple> overlaps_page(const std::pair& old_range, u32 address, overlap_test_bounds bounds) const + inline bool overlaps(const u32 address, section_bounds bounds) const { - const u32 page_base = address & ~4095; - const u32 page_limit = page_base + 4096; - - const u32 compare_min = std::min(old_range.first, page_base); - const u32 compare_max = std::max(old_range.second, page_limit); - - u32 memory_base, memory_range; - switch (bounds) - { - case overlap_test_bounds::full_range: - { - memory_base = (cpu_address_base & ~4095); - memory_range = align(cpu_address_base + cpu_address_range, 4096u) - memory_base; - break; - } - case overlap_test_bounds::protected_range: - { - memory_base = locked_address_base; - memory_range = locked_address_range; - break; - } - case overlap_test_bounds::confirmed_range: - { - const auto range = get_confirmed_range(); - memory_base = (cpu_address_base + range.first) & ~4095; - memory_range = align(cpu_address_base + range.first + range.second, 4096u) - memory_base; - break; - } - default: - fmt::throw_exception("Unreachable" HERE); - } - - if (!region_overlaps(memory_base, memory_base + memory_range, compare_min, compare_max)) - return std::make_tuple(false, old_range); - - const u32 _min = std::min(memory_base, compare_min); - const u32 _max = std::max(memory_base + memory_range, compare_max); - return std::make_tuple(true, std::make_pair(_min, _max)); + return get_bounds(bounds).overlaps(address); } - bool is_locked() const + inline bool overlaps(const address_range &other, section_bounds bounds) const + { + return get_bounds(bounds).overlaps(other); + } + + inline bool overlaps(const buffered_section &other, section_bounds bounds) const + { + return get_bounds(bounds).overlaps(other.get_bounds(bounds)); + } + + inline bool inside(const address_range &other, section_bounds bounds) const + { + return get_bounds(bounds).inside(other); + } + + inline bool inside(const buffered_section &other, section_bounds bounds) const + { + return get_bounds(bounds).inside(other.get_bounds(bounds)); + } + + inline s32 signed_distance(const address_range &other, section_bounds bounds) const + { + return get_bounds(bounds).signed_distance(other); + } + + inline u32 distance(const address_range &other, section_bounds bounds) const + { + return get_bounds(bounds).distance(other); + } + + /** + * Utilities + */ + inline bool valid_range() const + { + return cpu_range.valid(); + } + + inline bool is_locked() const { return locked; } - bool is_dirty() const + inline u32 get_section_base() const { - return dirty; + return cpu_range.start; } - void set_dirty(bool state) + inline u32 get_section_size() const { - dirty = state; + return cpu_range.valid() ? cpu_range.length() : 0; } - u32 get_section_base() const + inline const address_range& get_locked_range() const { - return cpu_address_base; + AUDIT( locked ); + return locked_range; } - u32 get_section_size() const + inline const address_range& get_section_range() const { - return cpu_address_range; + return cpu_range; } - bool matches(u32 cpu_address, u32 size) const + const address_range& get_confirmed_range() const { - return (cpu_address_base == cpu_address && cpu_address_range == size); + return confirmed_range.valid() ? confirmed_range : cpu_range; } - std::pair get_min_max(const std::pair& current_min_max) const + const std::pair get_confirmed_range_delta() const { - u32 min = std::min(current_min_max.first, locked_address_base); - u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range); + if (!confirmed_range.valid()) + return { 0, cpu_range.length() }; - return std::make_pair(min, max); + return { confirmed_range.start - cpu_range.start, confirmed_range.length() }; } - utils::protection get_protection() const + inline bool matches(const address_range &range) const + { + return cpu_range.valid() && cpu_range == range; + } + + inline utils::protection get_protection() const { return protection; } - template - T* get_raw_ptr(u32 offset = 0, bool no_sync = false) + inline address_range get_min_max(const address_range& current_min_max, section_bounds bounds) const { - verify(HERE), locked_memory_ptr; - return locked_memory_ptr.get(offset, no_sync); + return get_bounds(bounds).get_min_max(current_min_max); } + /** + * Super Pointer + */ + template + inline T* get_ptr_by_offset(u32 offset = 0, bool no_sync = false) + { + verify(HERE), super_ptr && cpu_range.length() >= (offset + sizeof(T)); + return super_ptr.get(offset, no_sync); + } + + // specialization due to sizeof(void) being illegal + inline void* get_ptr_by_offset(u32 offset, bool no_sync) + { + verify(HERE), super_ptr && cpu_range.length() >= (offset + 1); + return super_ptr.get(offset, no_sync); + } + + template + inline T* get_ptr(u32 address, bool no_sync = false) + { + verify(HERE), cpu_range.start <= address; // super_ptr & sizeof(T) tests are done by get_ptr_by_offset + return get_ptr_by_offset(address - cpu_range.start, no_sync); + } + + inline void flush_ptr_by_offset(u32 offset = 0, u32 len = 0) const + { + verify(HERE), super_ptr && cpu_range.length() >= (offset + len); + super_ptr.flush(offset, len); + } + + inline void flush_ptr(u32 address, u32 len = 0) const + { + verify(HERE), cpu_range.start <= address; // super_ptr & length tests are done by flush_ptr_by_offset + return flush_ptr_by_offset(address - cpu_range.start, len); + } + + inline void flush_ptr(const address_range &range) const + { + return flush_ptr(range.start, range.length()); + } + + + /** + * Memory tagging + */ + private: + inline void tag_memory() + { + // We only need to tag memory if we are in full-range mode + if (guard_policy == protect_policy_full_range) + return; + + AUDIT(locked && super_ptr); + + const address_range& range = get_confirmed_range(); + + volatile u32* first = get_ptr(range.start, true); + volatile u32* last = get_ptr(range.end - 3, true); + + *first = range.start; + *last = range.end; + + flush_ptr(range.start, 4); + flush_ptr(range.end - 3, 4); + } + + public: bool test_memory_head() { - if (!locked_memory_ptr) - { - return false; - } + if (guard_policy == protect_policy_full_range) + return true; - const u32* first = locked_memory_ptr.get(confirmed_range.first); - return (*first == (cpu_address_base + confirmed_range.first)); + AUDIT(locked && super_ptr); + + const auto& range = get_confirmed_range(); + volatile const u32* first = get_ptr(range.start); + return (*first == range.start); } bool test_memory_tail() { - if (!locked_memory_ptr) - { - return false; - } + if (guard_policy == protect_policy_full_range) + return true; - const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range; - const u32* last = locked_memory_ptr.get(valid_limit - 4); - return (*last == (cpu_address_base + valid_limit - 4)); - } + AUDIT(locked && super_ptr); - void flush_io(u32 offset = 0, u32 len = 0) const - { - const auto write_length = len ? len : (cpu_address_range - offset); - locked_memory_ptr.flush(offset, write_length); - } - - std::pair get_protected_range() const - { - if (locked) - { - return { locked_address_base, locked_address_range }; - } - else - { - return { 0, 0 }; - } - } - - std::pair get_confirmed_range() const - { - if (confirmed_range.second == 0) - { - return { 0, cpu_address_range }; - } - - return confirmed_range; + const auto& range = get_confirmed_range(); + volatile const u32* last = get_ptr(range.end-3); + return (*last == range.end); } }; + + template class shaders_cache { diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index e2c32c1f14..fd813357d5 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -76,6 +76,11 @@ namespace rsx } } + weak_ptr get_super_ptr(const address_range &range) + { + return get_super_ptr(range.start, range.length()); + } + weak_ptr get_super_ptr(u32 addr, u32 len) { verify(HERE), g_current_renderer; @@ -507,4 +512,8 @@ namespace rsx ++src_ptr; } } + +#ifdef TEXTURE_CACHE_DEBUG + tex_cache_checker_t tex_cache_checker = {}; +#endif } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 221561336d..1019d643e9 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -1,8 +1,11 @@ -#pragma once +#pragma once #include "../System.h" +#include "Utilities/address_range.h" #include "Utilities/geometry.h" #include "Utilities/asm.h" +#include "Utilities/VirtualMemory.h" +#include "Emu/Memory/vm.h" #include "gcm_enums.h" #include #include @@ -16,6 +19,15 @@ extern "C" namespace rsx { + // Import address_range utilities + using utils::address_range; + using utils::address_range_vector; + using utils::page_for; + using utils::page_start; + using utils::page_end; + using utils::next_page; + + // Definitions class thread; extern thread* g_current_renderer; @@ -200,7 +212,14 @@ namespace rsx } }; - //Holds information about a framebuffer + // Acquire memory mirror with r/w permissions + weak_ptr get_super_ptr(const address_range &range); + weak_ptr get_super_ptr(u32 addr, u32 size); + + + /** + * Holds information about a framebuffer + */ struct gcm_framebuffer_info { u32 address = 0; @@ -223,6 +242,11 @@ namespace rsx gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h) :address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h) {} + + address_range get_memory_range(u32 aa_factor = 1) const + { + return address_range::start_length(address, pitch * height * aa_factor); + } }; struct avconf @@ -463,9 +487,6 @@ namespace rsx std::array get_constant_blend_colors(); - // Acquire memory mirror with r/w permissions - weak_ptr get_super_ptr(u32 addr, u32 size); - /** * Shuffle texel layout from xyzw to wzyx * TODO: Variable src/dst and optional se conversion @@ -727,11 +748,6 @@ namespace rsx return g_current_renderer; } - static inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2) - { - return (base1 < limit2 && base2 < limit1); - } - template void unpack_bitset(std::bitset& block, u64* values) { @@ -768,4 +784,4 @@ namespace rsx } } } -} +} \ No newline at end of file diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index fe6489dab8..416be20263 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -528,6 +528,8 @@ + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index c8c8aa801c..79039e3a37 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1444,5 +1444,11 @@ Utilities + + Emu\GPU\RSX\Common + + + Emu\GPU\RSX\Common + \ No newline at end of file