diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index d567145933..a6102cebf3 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -716,7 +716,7 @@ namespace rsx } template - std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u8 required_bpp) + std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u8 required_bpp, rsx::surface_access access) { std::vector result; std::vector> dirty; @@ -739,12 +739,6 @@ namespace rsx if ((this_address + texture_size) <= texaddr) continue; - if (surface->read_barrier(cmd); !surface->test()) - { - dirty.emplace_back(this_address, is_depth); - continue; - } - surface_overlap_info info; info.surface = surface; info.base_address = this_address; @@ -789,6 +783,13 @@ namespace rsx info.height = std::min(required_height, normalized_surface_height - info.src_y); } + // Delay this as much as possible to avoid side-effects of spamming barrier + if (surface->memory_barrier(cmd, access); !surface->test()) + { + dirty.emplace_back(this_address, is_depth); + continue; + } + info.is_clipped = (info.width < required_width || info.height < required_height); if (auto surface_bpp = surface->get_bpp(); UNLIKELY(surface_bpp != required_bpp)) @@ -943,5 +944,26 @@ namespace rsx rtt = std::make_pair(0, nullptr); } } + + void invalidate_range(const rsx::address_range& range) + { + for (auto &rtt : m_render_targets_storage) + { + if (range.overlaps(rtt.second->get_memory_range())) + { + rtt.second->clear_rw_barrier(); + rtt.second->state_flags |= rsx::surface_state_flags::erase_bkgnd; + } + } + + for (auto &ds : m_depth_stencil_storage) + { + if (range.overlaps(ds.second->get_memory_range())) + { + ds.second->clear_rw_barrier(); + ds.second->state_flags |= rsx::surface_state_flags::erase_bkgnd; + } + } + } }; } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 146f2d43ea..7b8bfea5d5 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2084,7 +2084,7 @@ namespace rsx // NOTE: Compressed formats require a reupload, facilitated by blit synchronization and/or WCB and are not handled here const auto bpp = get_format_block_size_in_bytes(format); - const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp); + const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp, rsx::surface_access::read); if (!overlapping_fbos.empty() || !overlapping_locals.empty()) { @@ -2342,7 +2342,7 @@ namespace rsx auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info { - const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp); + const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, rsx::surface_access::transfer); if (list.empty()) { return {}; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 278f5d4376..c55164d06b 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1629,7 +1629,7 @@ void GLGSRender::flip(int buffer, bool emu_flip) else { gl::command_context cmd = { gl_state }; - const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp()); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read); if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) { diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 22a3306cdc..7df1454894 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -223,8 +223,6 @@ struct gl_render_target_traits prev.target = sink.get(); - sink->sync_tag(); - if (!sink->old_contents.empty()) { // Deal with this, likely only needs to clear diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index fd81810347..be391dd13b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -3224,7 +3224,7 @@ void VKGSRender::flip(int buffer, bool emu_flip) } else { - const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp()); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read); if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) { // Confirmed to be the newest data source in that range diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index c5b18d65ca..744f1db558 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -13,59 +13,42 @@ namespace vk void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); - struct render_target : public viewable_image, public rsx::ref_counted, public rsx::render_target_descriptor + class render_target : public viewable_image, public rsx::ref_counted, public rsx::render_target_descriptor { - u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid - - using viewable_image::viewable_image; - - vk::viewable_image* get_surface(rsx::surface_access access_type) override + // Get the linear resolve target bound to this surface. Initialize if none exists + vk::viewable_image* get_resolve_target_safe(vk::command_buffer& cmd) { - if (samples() == 1 || access_type == rsx::surface_access::write) + if (!resolve_surface) { - return this; + // Create a resolve surface + auto pdev = vk::get_current_renderer(); + const auto resolve_w = width() * samples_x; + const auto resolve_h = height() * samples_y; + + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + resolve_surface.reset(new vk::viewable_image( + *pdev, + pdev->get_memory_mapping().device_local, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_IMAGE_TYPE_2D, + format(), + resolve_w, resolve_h, 1, 1, 1, + VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, + usage, + 0)); + + resolve_surface->native_component_map = native_component_map; + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); } - // A read barrier should have been called before this! - verify("Read access without explicit barrier" HERE), resolve_surface, !(msaa_flags & rsx::surface_state_flags::require_resolve); return resolve_surface.get(); } - bool is_depth_surface() const override - { - return !!(aspect() & VK_IMAGE_ASPECT_DEPTH_BIT); - } - - void release_ref(vk::viewable_image* t) const override - { - static_cast(t)->release(); - } - - bool matches_dimensions(u16 _width, u16 _height) const - { - //Use forward scaling to account for rounding and clamping errors - return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height()); - } - - image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, - VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override - { - if (remap_encoding != 0xDEADBEEF && resolve_surface) - { - return resolve_surface->get_view(remap_encoding, remap, mask); - } - else - { - if (remap_encoding == 0xDEADBEEF) - { - // Special encoding to skip the resolve target fetch - remap_encoding = 0xAAE4; - } - - return vk::viewable_image::get_view(remap_encoding, remap, mask); - } - } - + // Resolve the planar MSAA data into a linear block void resolve(vk::command_buffer& cmd) { VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 }; @@ -140,6 +123,7 @@ namespace vk msaa_flags &= ~(rsx::surface_state_flags::require_resolve); } + // Unresolve the linear data into planar MSAA data void unresolve(vk::command_buffer& cmd) { verify(HERE), !(msaa_flags & rsx::surface_state_flags::require_resolve); @@ -212,94 +196,229 @@ namespace vk msaa_flags &= ~(rsx::surface_state_flags::require_unresolve); } - void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) + // Default-initialize memory without loading + void clear_memory(vk::command_buffer& cmd, vk::image *surface) { - // Helper to optionally clear/initialize memory contents depending on barrier type - auto clear_surface_impl = [&cmd, this](vk::image* surface) + const auto optimal_layout = (surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) ? + VK_IMAGE_LAYOUT_GENERAL : + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + surface->push_layout(cmd, optimal_layout); + + VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 }; + if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) { - const auto optimal_layout = (surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) ? - VK_IMAGE_LAYOUT_GENERAL : - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + VkClearColorValue color = { 0.f, 0.f, 0.f, 1.f }; + vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range); + } + else + { + VkClearDepthStencilValue clear{ 1.f, 255 }; + vkCmdClearDepthStencilImage(cmd, surface->value, surface->current_layout, &clear, 1, &range); + } - surface->push_layout(cmd, optimal_layout); + surface->pop_layout(cmd); - VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 }; - if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) + if (surface == this) + { + state_flags &= ~rsx::surface_state_flags::erase_bkgnd; + } + } + + // Load memory from cell and use to initialize the surface + void load_memory(vk::command_buffer& cmd) + { + auto& upload_heap = *vk::get_upload_heap(); + + u32 gcm_format; + if (is_depth_surface()) + { + gcm_format = get_compatible_gcm_format(format_info.gcm_depth_format).first; + } + else + { + auto fmt = get_compatible_gcm_format(format_info.gcm_color_format); + if (fmt.second) { - VkClearColorValue color = { 0.f, 0.f, 0.f, 1.f }; - vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range); + switch (fmt.first) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + //Hack + gcm_format = CELL_GCM_TEXTURE_X32_FLOAT; + break; + default: + gcm_format = fmt.first; + break; + } } else { - VkClearDepthStencilValue clear{ 1.f, 255 }; - vkCmdClearDepthStencilImage(cmd, surface->value, surface->current_layout, &clear, 1, &range); + gcm_format = fmt.first; } + } - surface->pop_layout(cmd); + rsx_subresource_layout subres{}; + subres.width_in_block = surface_width * samples_x; + subres.height_in_block = surface_height * samples_y; + subres.pitch_in_block = rsx_pitch / get_bpp(); + subres.depth = 1; + subres.data = { (const gsl::byte*)vm::get_super_ptr(base_addr), s32(rsx_pitch * surface_height * samples_y) }; - if (surface == this) - { - state_flags &= ~rsx::surface_state_flags::erase_bkgnd; - } - }; - - auto get_resolve_target = [&]() + if (LIKELY(g_cfg.video.resolution_scale_percent == 100 && samples() == 1)) { - if (!resolve_surface) + push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vk::copy_mipmaped_image_using_buffer(cmd, this, { subres }, gcm_format, false, 1, aspect(), upload_heap, rsx_pitch); + pop_layout(cmd); + } + else + { + vk::image* content = nullptr; + vk::image* final_dst = (samples() > 1) ? get_resolve_target_safe(cmd) : this; + + if (LIKELY(g_cfg.video.resolution_scale_percent == 100)) { - // Create a resolve surface - auto pdev = vk::get_current_renderer(); - const auto resolve_w = width() * samples_x; - const auto resolve_h = height() * samples_y; - - VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); - - resolve_surface.reset(new vk::viewable_image( - *pdev, - pdev->get_memory_mapping().device_local, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_IMAGE_TYPE_2D, - format(), - resolve_w, resolve_h, 1, 1, 1, - VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, - usage, - 0)); - - resolve_surface->native_component_map = native_component_map; - resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + verify(HERE), samples() > 1; + content = get_resolve_target_safe(cmd); + } + else + { + content = vk::get_typeless_helper(format(), subres.width_in_block, subres.height_in_block); + content->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } - return resolve_surface.get(); - }; + // Load Cell data into temp buffer + vk::copy_mipmaped_image_using_buffer(cmd, content, { subres }, gcm_format, false, 1, aspect(), upload_heap, rsx_pitch); + // Write into final image + if (content != final_dst) + { + vk::copy_scaled_image(cmd, content->value, final_dst->value, content->current_layout, final_dst->current_layout, + { 0, 0, subres.width_in_block, subres.height_in_block }, { 0, 0, (s32)final_dst->width(), (s32)final_dst->height() }, + 1, aspect(), true, aspect() == VK_IMAGE_ASPECT_COLOR_BIT ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, + format(), format()); + } + + if (samples() > 1) + { + // Trigger unresolve + msaa_flags = rsx::surface_state_flags::require_unresolve; + } + } + + state_flags &= ~rsx::surface_state_flags::erase_bkgnd; + } + + void initialize_memory(vk::command_buffer& cmd, bool read_access) + { + const bool memory_load = is_depth_surface() ? + !!g_cfg.video.read_depth_buffer : + !!g_cfg.video.read_color_buffers; + + if (!memory_load) + { + clear_memory(cmd, this); + + if (read_access && samples() > 1) + { + // Only clear the resolve surface if reading from it, otherwise it's a waste + clear_memory(cmd, get_resolve_target_safe(cmd)); + } + + msaa_flags = rsx::surface_state_flags::ready; + } + else + { + load_memory(cmd); + } + } + + public: + u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid + using viewable_image::viewable_image; + + vk::viewable_image* get_surface(rsx::surface_access access_type) override + { + if (samples() == 1 || access_type == rsx::surface_access::write) + { + return this; + } + + // A read barrier should have been called before this! + verify("Read access without explicit barrier" HERE), resolve_surface, !(msaa_flags & rsx::surface_state_flags::require_resolve); + return resolve_surface.get(); + } + + bool is_depth_surface() const override + { + return !!(aspect() & VK_IMAGE_ASPECT_DEPTH_BIT); + } + + void release_ref(vk::viewable_image* t) const override + { + static_cast(t)->release(); + } + + bool matches_dimensions(u16 _width, u16 _height) const + { + //Use forward scaling to account for rounding and clamping errors + return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height()); + } + + image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, + VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override + { + if (remap_encoding != 0xDEADBEEF && resolve_surface) + { + return resolve_surface->get_view(remap_encoding, remap, mask); + } + else + { + if (remap_encoding == 0xDEADBEEF) + { + // Special encoding to skip the resolve target fetch + remap_encoding = 0xAAE4; + } + + return vk::viewable_image::get_view(remap_encoding, remap, mask); + } + } + + void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) + { const bool read_access = (access != rsx::surface_access::write); - if (samples() > 1 && read_access) + const bool is_depth = is_depth_surface(); + + if ((g_cfg.video.read_color_buffers && !is_depth) || + (g_cfg.video.read_depth_buffer && is_depth)) { - get_resolve_target(); + // TODO: Decide what to do when memory loads are disabled but the underlying has memory changed + // NOTE: Assume test() is expensive when in a pinch + if (last_use_tag && state_flags == rsx::surface_state_flags::ready && !test()) + { + // TODO: Figure out why merely returning and failing the test does not work when reading (TLoU) + // The result should have been the same either way + state_flags |= rsx::surface_state_flags::erase_bkgnd; + } } if (LIKELY(old_contents.empty())) { if (state_flags & rsx::surface_state_flags::erase_bkgnd) { - clear_surface_impl(this); + // NOTE: This step CAN introduce MSAA flags! + initialize_memory(cmd, read_access); - if (resolve_surface && read_access) - { - // Only clear the resolve surface if reading from it, otherwise it's a waste - clear_surface_impl(resolve_surface.get()); - } - - on_write(rsx::get_shared_tag(), rsx::surface_state_flags::ready); + verify(HERE), state_flags == rsx::surface_state_flags::ready; + on_write(rsx::get_shared_tag(), static_cast(msaa_flags)); } - else if (msaa_flags & rsx::surface_state_flags::require_resolve) + + if (msaa_flags & rsx::surface_state_flags::require_resolve) { if (read_access) { // Only do this step when read access is required + get_resolve_target_safe(cmd); resolve(cmd); } } @@ -308,6 +427,7 @@ namespace vk if (!read_access) { // Only do this step when it is needed to start rendering + verify(HERE), resolve_surface; unresolve(cmd); } } @@ -316,11 +436,13 @@ namespace vk } // Memory transfers - vk::image *target_image = (samples() > 1) ? get_resolve_target() : this; + vk::image *target_image = (samples() > 1) ? get_resolve_target_safe(cmd) : this; vk::blitter hw_blitter; - bool optimize_copy = true; const auto dst_bpp = get_bpp(); + unsigned first = prepare_rw_barrier_for_transfer(this); + bool optimize_copy = true; + bool any_valid_writes = false; for (auto i = first; i < old_contents.size(); ++i) { @@ -328,10 +450,19 @@ namespace vk auto src_texture = static_cast(section.source); src_texture->read_barrier(cmd); + if (LIKELY(src_texture->test())) + { + any_valid_writes = true; + } + else + { + continue; + } + const auto src_bpp = src_texture->get_bpp(); rsx::typeless_xfer typeless_info{}; - if (src_texture->info.format == info.format) + if (LIKELY(src_texture->info.format == info.format)) { verify(HERE), src_bpp == dst_bpp; } @@ -343,7 +474,7 @@ namespace vk typeless_info.src_is_typeless = true; typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; typeless_info.src_native_format_override = (u32)info.format; - typeless_info.src_is_depth = src_texture->is_depth_surface(); + typeless_info.src_is_depth = is_depth; typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp; } } @@ -371,12 +502,12 @@ namespace vk } else if (state_flags & rsx::surface_state_flags::erase_bkgnd) { - clear_surface_impl(target_image); - - state_flags &= ~(rsx::surface_state_flags::erase_bkgnd); - msaa_flags = rsx::surface_state_flags::ready; + // Might introduce MSAA flags + initialize_memory(cmd, false); + verify(HERE), state_flags == rsx::surface_state_flags::ready; } - else if (msaa_flags & rsx::surface_state_flags::require_resolve) + + if (msaa_flags & rsx::surface_state_flags::require_resolve) { // Need to forward resolve this resolve(cmd); @@ -393,6 +524,24 @@ namespace vk optimize_copy = optimize_copy && !memory_load; } + if (UNLIKELY(!any_valid_writes)) + { + LOG_WARNING(RSX, "Surface at 0x%x inherited stale references", base_addr); + + clear_rw_barrier(); + shuffle_tag(); + + if (!read_access) + { + // This will be modified either way + state_flags |= rsx::surface_state_flags::erase_bkgnd; + memory_barrier(cmd, access); + } + + return; + } + + // NOTE: Optimize flag relates to stencil resolve/unresolve for NVIDIA. on_write_copy(0, optimize_copy); if (!read_access && samples() > 1) @@ -592,8 +741,6 @@ namespace rsx prev.target = sink.get(); sink->rsx_pitch = ref->get_rsx_pitch(); - sink->sync_tag(); - if (!sink->old_contents.empty()) { // Deal with this, likely only needs to clear