From 31b07f2c5cf1566d560e6939a241748b7973e4ff Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 29 Oct 2017 19:34:55 +0300 Subject: [PATCH] rsx: Tweaks - Optimize get_surface_subresource - Add check_program_status time to draw call setup statistics. It can slow down games significantly --- rpcs3/Emu/RSX/Common/surface_store.h | 143 +++++++++++++++++---------- rpcs3/Emu/RSX/Common/texture_cache.h | 4 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 7 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 11 ++- 4 files changed, 105 insertions(+), 60 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index e3971ccce2..5295ac1098 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -557,52 +557,63 @@ namespace rsx if (surface_address > texaddr) return false; - u32 offset = texaddr - surface_address; - if (texaddr >= surface_address) + const u32 offset = texaddr - surface_address; + if (offset == 0) { - if (offset == 0) - { - is_subslice = true; - } - else - { - surface_format_info info; - Traits::get_surface_info(surface, &info); + *x = 0; + *y = 0; + return true; + } + else + { + surface_format_info info; + Traits::get_surface_info(surface, &info); - u32 range = info.rsx_pitch * info.surface_height; - if (double_height) range *= 2; + u32 range = info.rsx_pitch * info.surface_height; + if (double_height) range <<= 1; - if (offset < range) + if (offset < range) + { + const u32 y = (offset / info.rsx_pitch); + u32 x = (offset % info.rsx_pitch) / info.bpp; + + if (scale_to_fit) { - const u32 y = (offset / info.rsx_pitch); - u32 x = (offset % info.rsx_pitch) / info.bpp; - - if (scale_to_fit) - { - const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch; - x = (u32)((f32)x / x_scale); - } - - x_offset = x; - y_offset = y; - - if (double_height) y_offset /= 2; - is_subslice = true; + const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch; + x = (u32)((f32)x / x_scale); } - } - if (is_subslice) - { - *x = x_offset; - *y = y_offset; + x_offset = x; + y_offset = y; - return true; + if (double_height) y_offset /= 2; + is_subslice = true; } } + if (is_subslice) + { + *x = x_offset; + *y = y_offset; + + return true; + } + return false; } + //Fast hit test + inline bool surface_overlaps_address_fast(surface_type surface, u32 surface_address, u32 texaddr) + { + if (surface_address > texaddr) + return false; + + const u32 offset = texaddr - surface_address; + const u32 range = surface->get_rsx_pitch() * surface->get_surface_height(); + + return (offset < range); + } + bool address_is_bound(u32 address, bool is_depth) const { if (is_depth) @@ -629,7 +640,8 @@ namespace rsx return true; } - surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool double_height = false) + surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, + bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool ignore_color_formats = false, bool double_height = false) { auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped) { @@ -638,12 +650,6 @@ namespace rsx surface_format_info info; Traits::get_surface_info(surface, &info); - if (info.rsx_pitch != requested_pitch) - return false; - - if (requested_width == 0 || requested_height == 0) - return true; - u16 real_width = requested_width; if (scale_to_fit) @@ -696,26 +702,55 @@ namespace rsx u16 w; u16 h; - for (auto &tex_info : m_render_targets_storage) + if (!ignore_color_formats) { - u32 this_address = std::get<0>(tex_info); - surface = std::get<1>(tex_info).get(); + for (auto &tex_info : m_render_targets_storage) + { + const u32 this_address = std::get<0>(tex_info); + if (texaddr < this_address) + continue; - if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) - return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped }; + surface = std::get<1>(tex_info).get(); + if (surface->get_rsx_pitch() != requested_pitch) + continue; + + if (requested_width == 0 || requested_height == 0) + { + if (!surface_overlaps_address_fast(surface, this_address, texaddr)) + continue; + else + return{ surface, 0, 0, 0, 0, false, false, false }; + } + + if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) + return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped }; + } } - if (ignore_depth_formats) - return{}; - - //Check depth surfaces for overlap - for (auto &tex_info : m_depth_stencil_storage) + if (!ignore_depth_formats) { - u32 this_address = std::get<0>(tex_info); - surface = std::get<1>(tex_info).get(); + //Check depth surfaces for overlap + for (auto &tex_info : m_depth_stencil_storage) + { + const u32 this_address = std::get<0>(tex_info); + if (texaddr < this_address) + continue; - if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) - return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped }; + surface = std::get<1>(tex_info).get(); + if (surface->get_rsx_pitch() != requested_pitch) + continue; + + if (requested_width == 0 || requested_height == 0) + { + if (!surface_overlaps_address_fast(surface, this_address, texaddr)) + continue; + else + return{ surface, 0, 0, 0, 0, false, true, false }; + } + + if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) + return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped }; + } } return{}; diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index d81759848b..2428f02dad 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1087,7 +1087,7 @@ namespace rsx } //Check if src/dst are parts of render targets - auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, dst.compressed_y); + auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, false, dst.compressed_y); dst_is_render_target = dst_subres.surface != nullptr; if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch) @@ -1099,7 +1099,7 @@ namespace rsx } //TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate - auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, src.compressed_y); + auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, false, src.compressed_y); src_is_render_target = src_subres.surface != nullptr; if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 679721f3e4..c599a16d4d 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -318,12 +318,17 @@ namespace void GLGSRender::end() { + std::chrono::time_point state_check_start = steady_clock::now(); + if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state()) { rsx::thread::end(); return; } + std::chrono::time_point state_check_end = steady_clock::now(); + m_begin_time += (u32)std::chrono::duration_cast(state_check_end - state_check_start).count(); + if (manually_flush_ring_buffers) { //Use approximations to reseve space. This path is mostly for debug purposes anyway @@ -964,7 +969,7 @@ bool GLGSRender::check_program_state() if (dirty_framebuffer) return std::make_tuple(false, 0); - auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch()); + auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth); if (!rsc.surface || rsc.is_depth_surface != is_depth) return std::make_tuple(false, 0); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index a7c88c39e9..4910103089 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -995,6 +995,8 @@ void VKGSRender::end() return; } + std::chrono::time_point state_check_start = steady_clock::now(); + //Load program here since it is dependent on vertex state if (!check_program_status()) { @@ -1003,14 +1005,17 @@ void VKGSRender::end() return; } + std::chrono::time_point state_check_end = steady_clock::now(); + m_setup_time += (u32)std::chrono::duration_cast(state_check_end - state_check_start).count(); + //Programs data is dependent on vertex state - std::chrono::time_point vertex_start = steady_clock::now(); + std::chrono::time_point vertex_start = state_check_end; auto upload_info = upload_vertex_data(); std::chrono::time_point vertex_end = steady_clock::now(); m_vertex_upload_time += std::chrono::duration_cast(vertex_end - vertex_start).count(); //Load program - std::chrono::time_point program_start = steady_clock::now(); + std::chrono::time_point program_start = vertex_end; load_program(std::get<2>(upload_info), std::get<3>(upload_info)); std::chrono::time_point program_stop = steady_clock::now(); m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); @@ -1841,7 +1846,7 @@ bool VKGSRender::check_program_status() if (dirty_framebuffer) return std::make_tuple(false, 0); - auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch()); + auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth); if (!rsc.surface || rsc.is_depth_surface != is_depth) return std::make_tuple(false, 0);