diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 8b76297108..81f226b9a0 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -173,6 +173,12 @@ namespace void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading) { + if (m_framebuffer_state_contested && (m_framebuffer_contest_type != context)) + { + // Clear commands affect contested memory + m_rtts_dirty = true; + } + if (draw_fbo && !m_rtts_dirty) { set_viewport(); @@ -180,221 +186,41 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk } m_rtts_dirty = false; - zcull_surface_active = false; - - const u16 clip_horizontal = rsx::method_registers.surface_clip_width(); - const u16 clip_vertical = rsx::method_registers.surface_clip_height(); - const u16 clip_x = rsx::method_registers.surface_clip_origin_x(); - const u16 clip_y = rsx::method_registers.surface_clip_origin_y(); - framebuffer_status_valid = false; m_framebuffer_state_contested = false; - if (clip_horizontal == 0 || clip_vertical == 0) + const auto layout = get_framebuffer_layout(context); + if (!framebuffer_status_valid) { - LOG_ERROR(RSX, "Invalid framebuffer setup, w=%d, h=%d", clip_horizontal, clip_vertical); return; } - auto surface_addresses = get_color_surface_addresses(); - auto depth_address = get_zeta_surface_address(); - - const auto pitchs = get_pitchs(); - const auto zeta_pitch = rsx::method_registers.surface_z_pitch(); - const auto surface_format = rsx::method_registers.surface_color(); - const auto depth_format = rsx::method_registers.surface_depth_fmt(); - const auto target = rsx::method_registers.surface_color_target(); - - const auto aa_mode = rsx::method_registers.surface_antialias(); - const u32 aa_factor_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2; - const u32 aa_factor_v = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; - - //NOTE: Its is possible that some renders are done on a swizzled context. Pitch is meaningless in that case - //Seen in Nier (color) and GT HD concept (z buffer) - //Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries - const auto required_zeta_pitch = std::max((u32)(depth_format == rsx::surface_depth_format::z16 ? clip_horizontal * 2 : clip_horizontal * 4) * aa_factor_u, 64u); - const auto required_color_pitch = std::max((u32)rsx::utility::get_packed_pitch(surface_format, clip_horizontal) * aa_factor_u, 64u); - const bool stencil_test_enabled = depth_format == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled(); - const auto lg2w = rsx::method_registers.surface_log2_width(); - const auto lg2h = rsx::method_registers.surface_log2_height(); - const auto clipw_log2 = (u32)floor(log2(clip_horizontal)); - const auto cliph_log2 = (u32)floor(log2(clip_vertical)); - - if (depth_address) + if (draw_fbo && layout.ignore_change) { - if (!rsx::method_registers.depth_test_enabled() && - !stencil_test_enabled && - target != rsx::surface_target::none) + // Nothing has changed, we're still using the same framebuffer + // Update flags to match current + for (u32 index = 0; index < 4; index++) { - //Disable depth buffer if depth testing is not enabled, unless a clear command is targeting the depth buffer - const bool is_depth_clear = !!(context & rsx::framebuffer_creation_context::context_clear_depth); - if (!is_depth_clear) + if (auto surface = std::get<1>(m_rtts.m_bound_render_targets[index])) { - depth_address = 0; - m_framebuffer_state_contested = true; + surface->write_aa_mode = layout.aa_mode; } } - if (depth_address && zeta_pitch < required_zeta_pitch) + if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) { - if (lg2w < clipw_log2 || lg2h < cliph_log2) - { - //Cannot fit - depth_address = 0; - - if (lg2w > 0 || lg2h > 0) - { - //Something was actually declared for the swizzle context dimensions - LOG_WARNING(RSX, "Invalid swizzled context depth surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_horizontal, clip_vertical); - } - } - else - { - LOG_TRACE(RSX, "Swizzled context depth surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_horizontal, clip_vertical); - } - } - } - - for (const auto &index : rsx::utility::get_rtt_indexes(target)) - { - if (pitchs[index] < required_color_pitch) - { - if (lg2w < clipw_log2 || lg2h < cliph_log2) - { - surface_addresses[index] = 0; - - if (lg2w > 0 || lg2h > 0) - { - //Something was actually declared for the swizzle context dimensions - LOG_WARNING(RSX, "Invalid swizzled context color surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_horizontal, clip_vertical); - } - } - else - { - LOG_TRACE(RSX, "Swizzled context color surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_horizontal, clip_vertical); - } + ds->write_aa_mode = layout.aa_mode; } - if (surface_addresses[index] == depth_address) - { - LOG_TRACE(RSX, "Framebuffer at 0x%X has aliasing color/depth targets, zeta_pitch = %d, color_pitch=%d", depth_address, zeta_pitch, pitchs[index]); - //TODO: Research clearing both depth AND color - //TODO: If context is creation_draw, deal with possibility of a lost buffer clear - if (context == rsx::framebuffer_creation_context::context_clear_depth || - rsx::method_registers.depth_test_enabled() || stencil_test_enabled || - (!rsx::method_registers.color_write_enabled() && rsx::method_registers.depth_write_enabled())) - { - // Use address for depth data - surface_addresses[index] = 0; - } - else - { - // Use address for color data - depth_address = 0; - m_framebuffer_state_contested = true; - break; - } - } - - if (surface_addresses[index]) - framebuffer_status_valid = true; - } - - if (!framebuffer_status_valid && !depth_address) - { - LOG_WARNING(RSX, "Framebuffer setup failed. Draw calls may have been lost"); return; } - //Window (raster) offsets - const auto window_offset_x = rsx::method_registers.window_offset_x(); - const auto window_offset_y = rsx::method_registers.window_offset_y(); - const auto window_clip_width = rsx::method_registers.window_clip_horizontal(); - const auto window_clip_height = rsx::method_registers.window_clip_vertical(); - - const auto bpp = get_format_block_size_in_bytes(surface_format); - - if (window_offset_x || window_offset_y) - { - //Window offset is what affects the raster position! - //Tested with Turbo: Super stunt squad that only changes the window offset to declare new framebuffers - //Sampling behavior clearly indicates the addresses are expected to have changed - if (auto clip_type = rsx::method_registers.window_clip_type()) - LOG_ERROR(RSX, "Unknown window clip type 0x%X" HERE, clip_type); - - for (const auto &index : rsx::utility::get_rtt_indexes(target)) - { - if (surface_addresses[index]) - { - const u32 window_offset_bytes = (std::max(pitchs[index], required_color_pitch) * window_offset_y) + ((aa_factor_u * bpp) * window_offset_x); - surface_addresses[index] += window_offset_bytes; - } - } - - if (depth_address) - { - const auto depth_bpp = depth_format == rsx::surface_depth_format::z16 ? 2 : 4; - depth_address += (std::max(zeta_pitch, required_zeta_pitch) * window_offset_y) + ((aa_factor_u * depth_bpp) * window_offset_x); - } - } - - if ((window_clip_width && window_clip_width < clip_horizontal) || - (window_clip_height && window_clip_height < clip_vertical)) - { - LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d", - window_clip_width, window_clip_height, clip_horizontal, clip_vertical); - } - - if (draw_fbo) - { - bool really_changed = false; - auto sz = draw_fbo.get_extents(); - - if (sz.width == clip_horizontal && sz.height == clip_vertical) - { - for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) - { - if (m_surface_info[i].address != surface_addresses[i]) - { - really_changed = true; - break; - } - } - - if (!really_changed) - { - if (depth_address == m_depth_surface_info.address) - { - // Nothing has changed, we're still using the same framebuffer - // Update flags to match current - - const auto aa_mode = rsx::method_registers.surface_antialias(); - - for (u32 index = 0; index < 4; index++) - { - if (auto surface = std::get<1>(m_rtts.m_bound_render_targets[index])) - { - surface->write_aa_mode = aa_mode; - } - } - - if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) - { - ds->write_aa_mode = aa_mode; - } - - return; - } - } - } - } - - m_rtts.prepare_render_target(nullptr, surface_format, depth_format, clip_horizontal, clip_vertical, - target, surface_addresses, depth_address); + m_rtts.prepare_render_target(nullptr, layout.color_format, layout.depth_format, layout.width, layout.height, + layout.target, layout.color_addresses, layout.zeta_address); draw_fbo.recreate(); draw_fbo.bind(); - draw_fbo.set_extents({ (int)clip_horizontal, (int)clip_vertical }); + draw_fbo.set_extents({ (int)layout.width, (int)layout.height }); bool old_format_found = false; gl::texture::format old_format; @@ -421,13 +247,13 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk auto rtt = std::get<1>(m_rtts.m_bound_render_targets[i]); draw_fbo.color[i] = *rtt; - rtt->set_rsx_pitch(pitchs[i]); - m_surface_info[i] = { surface_addresses[i], std::max(pitchs[i], required_color_pitch), false, surface_format, depth_format, clip_horizontal, clip_vertical }; + rtt->set_rsx_pitch(layout.color_pitch[i]); + m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height }; rtt->tile = find_tile(color_offsets[i], color_locations[i]); - rtt->write_aa_mode = aa_mode; - m_gl_texture_cache.notify_surface_changed(surface_addresses[i]); - m_gl_texture_cache.tag_framebuffer(surface_addresses[i]); + rtt->write_aa_mode = layout.aa_mode; + m_gl_texture_cache.notify_surface_changed(m_surface_info[i].address); + m_gl_texture_cache.tag_framebuffer(m_surface_info[i].address); } else m_surface_info[i] = {}; @@ -445,18 +271,17 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk } auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); - if (depth_format == rsx::surface_depth_format::z24s8) + if (layout.depth_format == rsx::surface_depth_format::z24s8) draw_fbo.depth_stencil = *ds; else draw_fbo.depth = *ds; std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch()); - m_depth_surface_info = { depth_address, std::max(zeta_pitch, required_zeta_pitch), true, surface_format, depth_format, clip_horizontal, clip_vertical }; + m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height }; - ds->write_aa_mode = aa_mode; - m_gl_texture_cache.notify_surface_changed(depth_address); - - m_gl_texture_cache.tag_framebuffer(depth_address); + ds->write_aa_mode = layout.aa_mode; + m_gl_texture_cache.notify_surface_changed(layout.zeta_address); + m_gl_texture_cache.tag_framebuffer(layout.zeta_address); } else m_depth_surface_info = {}; @@ -502,13 +327,13 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk //Mark buffer regions as NO_ACCESS on Cell visible side if (g_cfg.video.write_color_buffers) { - auto color_format = rsx::internals::surface_color_format_to_gl(surface_format); + auto color_format = rsx::internals::surface_color_format_to_gl(layout.color_format); for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; - const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * aa_factor_v; + const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * layout.aa_factors[1]; m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, color_format.format, color_format.type, color_format.swap_bytes); } @@ -518,8 +343,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { if (m_depth_surface_info.address && m_depth_surface_info.pitch) { - const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format); - const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * aa_factor_v; + const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); + const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1]; m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, depth_format_gl.format, depth_format_gl.type, true); } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index f611ca9b37..3bece0580c 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -8,6 +8,7 @@ #include "Common/BufferUtils.h" #include "Common/texture_cache.h" +#include "Common/surface_store.h" #include "Capture/rsx_capture.h" #include "rsx_methods.h" #include "rsx_utils.h" @@ -1443,6 +1444,240 @@ namespace rsx return rsx::get_address(offset_zeta, m_context_dma_z); } + framebuffer_layout thread::get_framebuffer_layout(rsx::framebuffer_creation_context context) + { + framebuffer_layout layout; + memset(&layout, 0, sizeof(layout)); + + layout.ignore_change = true; + layout.width = rsx::method_registers.surface_clip_width(); + layout.height = rsx::method_registers.surface_clip_height(); + + framebuffer_status_valid = false; + m_framebuffer_state_contested = false; + m_framebuffer_contest_type = context; + + if (layout.width == 0 || layout.height == 0) + { + LOG_TRACE(RSX, "Invalid framebuffer setup, w=%d, h=%d", layout.width, layout.height); + return layout; + } + + const u16 clip_x = rsx::method_registers.surface_clip_origin_x(); + const u16 clip_y = rsx::method_registers.surface_clip_origin_y(); + + layout.color_addresses = get_color_surface_addresses(); + layout.zeta_address = get_zeta_surface_address(); + layout.zeta_pitch = rsx::method_registers.surface_z_pitch(); + layout.color_pitch = + { + rsx::method_registers.surface_a_pitch(), + rsx::method_registers.surface_b_pitch(), + rsx::method_registers.surface_c_pitch(), + rsx::method_registers.surface_d_pitch(), + }; + + layout.color_format = rsx::method_registers.surface_color(); + layout.depth_format = rsx::method_registers.surface_depth_fmt(); + layout.target = rsx::method_registers.surface_color_target(); + + const auto aa_mode = rsx::method_registers.surface_antialias(); + const u32 aa_factor_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2; + const u32 aa_factor_v = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; + + // NOTE: Its is possible that some renders are done on a swizzled context. Pitch is meaningless in that case + // Seen in Nier (color) and GT HD concept (z buffer) + // Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries + const auto required_zeta_pitch = std::max((u32)(layout.depth_format == rsx::surface_depth_format::z16 ? layout.width * 2 : layout.width * 4) * aa_factor_u, 64u); + const auto required_color_pitch = std::max((u32)rsx::utility::get_packed_pitch(layout.color_format, layout.width) * aa_factor_u, 64u); + const bool color_write_enabled = (context & rsx::framebuffer_creation_context::context_clear_color) ? true : rsx::method_registers.color_write_enabled(); + const bool depth_write_enabled = (context & rsx::framebuffer_creation_context::context_clear_depth) ? true : rsx::method_registers.depth_write_enabled(); + const auto lg2w = rsx::method_registers.surface_log2_width(); + const auto lg2h = rsx::method_registers.surface_log2_height(); + const auto clipw_log2 = (u32)floor(log2(layout.width)); + const auto cliph_log2 = (u32)floor(log2(layout.height)); + + const bool stencil_test_enabled = layout.depth_format == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled(); + const bool depth_test_enabled = rsx::method_registers.depth_test_enabled(); + const bool ignore_depth = (context == rsx::framebuffer_creation_context::context_clear_color); + const bool ignore_color = (context == rsx::framebuffer_creation_context::context_clear_depth); + + if (layout.zeta_address) + { + if (!depth_test_enabled && + !stencil_test_enabled && + layout.target != rsx::surface_target::none) + { + // Disable depth buffer if depth testing is not enabled, unless a clear command is targeting the depth buffer + const bool is_depth_clear = !!(context & rsx::framebuffer_creation_context::context_clear_depth); + if (!is_depth_clear) + { + layout.zeta_address = 0; + m_framebuffer_state_contested = true; + } + } + + if (layout.zeta_address && layout.zeta_pitch < required_zeta_pitch) + { + if (lg2w < clipw_log2 || lg2h < cliph_log2) + { + // Cannot fit + layout.zeta_address = 0; + + if (lg2w > 0 || lg2h > 0) + { + // Something was actually declared for the swizzle context dimensions + LOG_WARNING(RSX, "Invalid swizzled context depth surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height); + } + } + else + { + LOG_TRACE(RSX, "Swizzled context depth surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height); + } + } + + if (layout.zeta_address) + { + // Still exists? Unlikely to get discarded + layout.actual_zeta_pitch = std::max(layout.zeta_pitch, required_zeta_pitch); + } + } + + for (const auto &index : rsx::utility::get_rtt_indexes(layout.target)) + { + if (layout.color_pitch[index] < required_color_pitch) + { + if (lg2w < clipw_log2 || lg2h < cliph_log2) + { + layout.color_addresses[index] = 0; + + if (lg2w > 0 || lg2h > 0) + { + // Something was actually declared for the swizzle context dimensions + LOG_WARNING(RSX, "Invalid swizzled context color surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height); + } + } + else + { + LOG_TRACE(RSX, "Swizzled context color surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, layout.width, layout.height); + } + } + + if (layout.zeta_address && (layout.color_addresses[index] == layout.zeta_address)) + { + LOG_TRACE(RSX, "Framebuffer at 0x%X has aliasing color/depth targets, color_index=%d, zeta_pitch = %d, color_pitch=%d, context=%d", + layout.zeta_address, index, layout.zeta_pitch, layout.color_pitch[index], (u32)context); + + // TODO: Research clearing both depth AND color + // TODO: If context is creation_draw, deal with possibility of a lost buffer clear + if (!ignore_depth && + (ignore_color || depth_test_enabled || stencil_test_enabled || + (!color_write_enabled && depth_write_enabled))) + { + // Use address for depth data + layout.color_addresses[index] = 0; + } + else + { + // Use address for color data + layout.zeta_address = 0; + m_framebuffer_state_contested = true; + } + } + + if (layout.color_addresses[index]) + { + layout.actual_color_pitch[index] = std::max(layout.color_pitch[index], required_color_pitch); + framebuffer_status_valid = true; + } + } + + if (!framebuffer_status_valid && !layout.zeta_address) + { + LOG_WARNING(RSX, "Framebuffer setup failed. Draw calls may have been lost"); + return layout; + } + + // At least one attachment exists + framebuffer_status_valid = true; + + // Window (raster) offsets + const auto window_offset_x = rsx::method_registers.window_offset_x(); + const auto window_offset_y = rsx::method_registers.window_offset_y(); + const auto window_clip_width = rsx::method_registers.window_clip_horizontal(); + const auto window_clip_height = rsx::method_registers.window_clip_vertical(); + + const auto bpp = get_format_block_size_in_bytes(layout.color_format); + + if (window_offset_x || window_offset_y) + { + // Window offset is what affects the raster position! + // Tested with Turbo: Super stunt squad that only changes the window offset to declare new framebuffers + // Sampling behavior clearly indicates the addresses are expected to have changed + if (auto clip_type = rsx::method_registers.window_clip_type()) + LOG_ERROR(RSX, "Unknown window clip type 0x%X" HERE, clip_type); + + for (const auto &index : rsx::utility::get_rtt_indexes(layout.target)) + { + if (layout.color_addresses[index]) + { + const u32 window_offset_bytes = (std::max(layout.color_pitch[index], required_color_pitch) * window_offset_y) + ((aa_factor_u * bpp) * window_offset_x); + layout.color_addresses[index] += window_offset_bytes; + } + } + + if (layout.zeta_address) + { + const auto depth_bpp = (layout.depth_format == rsx::surface_depth_format::z16) ? 2 : 4; + layout.zeta_address += (std::max(layout.zeta_pitch, required_zeta_pitch) * window_offset_y) + ((aa_factor_u * depth_bpp) * window_offset_x); + } + } + + if ((window_clip_width && window_clip_width < layout.width) || + (window_clip_height && window_clip_height < layout.height)) + { + LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d", + window_clip_width, window_clip_height, layout.width, layout.height); + } + + layout.aa_mode = aa_mode; + layout.aa_factors[0] = aa_factor_u; + layout.aa_factors[1] = aa_factor_v; + + bool really_changed = false; + + for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) + { + if (m_surface_info[i].address != layout.color_addresses[i]) + { + really_changed = true; + break; + } + + if (layout.color_addresses[i]) + { + if (m_surface_info[i].width != layout.width || + m_surface_info[i].height != layout.height) + { + really_changed = true; + break; + } + } + } + + if (!really_changed) + { + if (layout.zeta_address == m_depth_surface_info.address) + { + // Same target is reused + return layout; + } + } + + layout.ignore_change = false; + return layout; + } + void thread::get_current_vertex_program(const std::array, rsx::limits::vertex_textures_count>& sampler_descriptors, bool skip_textures, bool skip_vertex_inputs) { if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty)) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index fbfdd0f814..0d60415f74 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -213,6 +213,24 @@ namespace rsx std::array attribute_placement; }; + struct framebuffer_layout + { + u16 width; + u16 height; + std::array color_addresses; + std::array color_pitch; + std::array actual_color_pitch; + u32 zeta_address; + u32 zeta_pitch; + u32 actual_zeta_pitch; + rsx::surface_target target; + rsx::surface_color_format color_format; + rsx::surface_depth_format depth_format; + rsx::surface_antialiasing aa_mode; + u32 aa_factors[2]; + bool ignore_change; + }; + namespace reports { struct occlusion_query_info @@ -391,6 +409,8 @@ namespace rsx bool m_textures_dirty[16]; bool m_vertex_textures_dirty[4]; bool m_framebuffer_state_contested = false; + rsx::framebuffer_creation_context m_framebuffer_contest_type = rsx::framebuffer_creation_context::context_draw; + u32 m_graphics_state = 0; u64 ROP_sync_timestamp = 0; @@ -401,6 +421,8 @@ namespace rsx std::array get_color_surface_addresses() const; u32 get_zeta_surface_address() const; + framebuffer_layout get_framebuffer_layout(rsx::framebuffer_creation_context context); + /** * Analyze vertex inputs and group all interleaved blocks */ diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 3aee2013fb..e809294110 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2516,231 +2516,55 @@ void VKGSRender::open_command_buffer() m_current_command_buffer->begin(); } - void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { + if (m_framebuffer_state_contested && (m_framebuffer_contest_type != context)) + { + // Clear commands affect contested memory + m_rtts_dirty = true; + } + if (m_draw_fbo && !m_rtts_dirty) return; m_rtts_dirty = false; - - u32 clip_width = rsx::method_registers.surface_clip_width(); - u32 clip_height = rsx::method_registers.surface_clip_height(); - u32 clip_x = rsx::method_registers.surface_clip_origin_x(); - u32 clip_y = rsx::method_registers.surface_clip_origin_y(); - framebuffer_status_valid = false; m_framebuffer_state_contested = false; - if (clip_width == 0 || clip_height == 0) + const auto layout = get_framebuffer_layout(context); + if (!framebuffer_status_valid) { - LOG_ERROR(RSX, "Invalid framebuffer setup, w=%d, h=%d", clip_width, clip_height); return; } - auto surface_addresses = get_color_surface_addresses(); - auto zeta_address = get_zeta_surface_address(); - - const auto zeta_pitch = rsx::method_registers.surface_z_pitch(); - const u32 surface_pitchs[] = { rsx::method_registers.surface_a_pitch(), rsx::method_registers.surface_b_pitch(), - rsx::method_registers.surface_c_pitch(), rsx::method_registers.surface_d_pitch() }; - - const auto color_fmt = rsx::method_registers.surface_color(); - const auto depth_fmt = rsx::method_registers.surface_depth_fmt(); - const auto target = rsx::method_registers.surface_color_target(); - - const auto aa_mode = rsx::method_registers.surface_antialias(); - const u32 aa_factor_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2; - const u32 aa_factor_v = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; - - //NOTE: Its is possible that some renders are done on a swizzled context. Pitch is meaningless in that case - //Seen in Nier (color) and GT HD concept (z buffer) - //Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries - const auto required_zeta_pitch = std::max((u32)(depth_fmt == rsx::surface_depth_format::z16 ? clip_width * 2 : clip_width * 4) * aa_factor_u, 64u); - const auto required_color_pitch = std::max((u32)rsx::utility::get_packed_pitch(color_fmt, clip_width) * aa_factor_v, 64u); - const bool stencil_test_enabled = depth_fmt == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled(); - const auto lg2w = rsx::method_registers.surface_log2_width(); - const auto lg2h = rsx::method_registers.surface_log2_height(); - const auto clipw_log2 = (u32)floor(log2(clip_width)); - const auto cliph_log2 = (u32)floor(log2(clip_height)); - - if (zeta_address) + if (m_draw_fbo && layout.ignore_change) { - if (!rsx::method_registers.depth_test_enabled() && - !stencil_test_enabled && - target != rsx::surface_target::none) + // Nothing has changed, we're still using the same framebuffer + // Update flags to match current + + const auto aa_mode = rsx::method_registers.surface_antialias(); + + for (u32 index = 0; index < 4; index++) { - //Disable depth buffer if depth testing is not enabled, unless a clear command is targeting the depth buffer - const bool is_depth_clear = !!(context & rsx::framebuffer_creation_context::context_clear_depth); - if (!is_depth_clear) + if (auto surface = std::get<1>(m_rtts.m_bound_render_targets[index])) { - zeta_address = 0; - m_framebuffer_state_contested = true; + surface->write_aa_mode = layout.aa_mode; } } - if (zeta_address && zeta_pitch < required_zeta_pitch) + if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) { - if (lg2w < clipw_log2 || lg2h < cliph_log2) - { - //Cannot fit - zeta_address = 0; - - if (lg2w > 0 || lg2h > 0) - { - //Something was actually declared for the swizzle context dimensions - LOG_WARNING(RSX, "Invalid swizzled context depth surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_width, clip_height); - } - } - else - { - LOG_TRACE(RSX, "Swizzled context depth surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_width, clip_height); - } - } - } - - for (const auto &index : rsx::utility::get_rtt_indexes(target)) - { - if (surface_pitchs[index] < required_color_pitch) - { - if (lg2w < clipw_log2 || lg2h < cliph_log2) - { - surface_addresses[index] = 0; - - if (lg2w > 0 || lg2h > 0) - { - //Something was actually declared for the swizzle context dimensions - LOG_WARNING(RSX, "Invalid swizzled context color surface dims, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_width, clip_height); - } - } - else - { - LOG_TRACE(RSX, "Swizzled context color surface, LG2W=%d, LG2H=%d, clip_w=%d, clip_h=%d", lg2w, lg2h, clip_width, clip_height); - } + ds->write_aa_mode = layout.aa_mode; } - if (surface_addresses[index] == zeta_address) - { - LOG_TRACE(RSX, "Framebuffer at 0x%X has aliasing color/depth targets, zeta_pitch = %d, color_pitch=%d", zeta_address, zeta_pitch, surface_pitchs[index]); - if (context == rsx::framebuffer_creation_context::context_clear_depth || - rsx::method_registers.depth_test_enabled() || stencil_test_enabled || - (!rsx::method_registers.color_write_enabled() && rsx::method_registers.depth_write_enabled())) - { - // Use address for depth data - // TODO: create a temporary render buffer for this to keep MRT outputs aligned - surface_addresses[index] = 0; - } - else - { - // Use address for color data - zeta_address = 0; - m_framebuffer_state_contested = true; - break; - } - } - - if (surface_addresses[index]) - framebuffer_status_valid = true; - } - - if (!framebuffer_status_valid && !zeta_address) - { - LOG_WARNING(RSX, "Framebuffer setup failed. Draw calls may have been lost"); return; } - //At least one attachment exists - framebuffer_status_valid = true; - - const auto fbo_width = rsx::apply_resolution_scale(clip_width, true); - const auto fbo_height = rsx::apply_resolution_scale(clip_height, true); - const auto bpp = get_format_block_size_in_bytes(color_fmt); - - //Window (raster) offsets - const auto window_offset_x = rsx::method_registers.window_offset_x(); - const auto window_offset_y = rsx::method_registers.window_offset_y(); - const auto window_clip_width = rsx::method_registers.window_clip_horizontal(); - const auto window_clip_height = rsx::method_registers.window_clip_vertical(); - - if (window_offset_x || window_offset_y) - { - //Window offset is what affects the raster position! - //Tested with Turbo: Super stunt squad that only changes the window offset to declare new framebuffers - //Sampling behavior clearly indicates the addresses are expected to have changed - if (auto clip_type = rsx::method_registers.window_clip_type()) - LOG_ERROR(RSX, "Unknown window clip type 0x%X" HERE, clip_type); - - for (const auto &index : rsx::utility::get_rtt_indexes(target)) - { - if (surface_addresses[index]) - { - const u32 window_offset_bytes = (std::max(surface_pitchs[index], required_color_pitch) * window_offset_y) + ((aa_factor_u * bpp) * window_offset_x); - surface_addresses[index] += window_offset_bytes; - } - } - - if (zeta_address) - { - const auto depth_bpp = (depth_fmt == rsx::surface_depth_format::z16 ? 2 : 4); - zeta_address += (std::max(zeta_pitch, required_zeta_pitch) * window_offset_y) + ((aa_factor_u * depth_bpp) * window_offset_x); - } - } - - if ((window_clip_width && window_clip_width < clip_width) || - (window_clip_height && window_clip_height < clip_height)) - { - LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d", - window_clip_width, window_clip_height, clip_width, clip_height); - } - - if (m_draw_fbo) - { - bool really_changed = false; - - if (m_draw_fbo->width() == fbo_width && m_draw_fbo->height() == fbo_height) - { - for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) - { - if (m_surface_info[i].address != surface_addresses[i]) - { - really_changed = true; - break; - } - } - - if (!really_changed) - { - if (zeta_address == m_depth_surface_info.address) - { - // Nothing has changed, we're still using the same framebuffer - // Update flags to match current - - const auto aa_mode = rsx::method_registers.surface_antialias(); - - for (u32 index = 0; index < 4; index++) - { - if (auto surface = std::get<1>(m_rtts.m_bound_render_targets[index])) - { - surface->write_aa_mode = aa_mode; - } - } - - if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) - { - ds->write_aa_mode = aa_mode; - } - - return; - } - } - } - } - m_rtts.prepare_render_target(&*m_current_command_buffer, - color_fmt, depth_fmt, - clip_width, clip_height, - target, - surface_addresses, zeta_address, + layout.color_format, layout.depth_format, + layout.width, layout.height, + layout.target, + layout.color_addresses, layout.zeta_address, (*m_device), &*m_current_command_buffer); //Reset framebuffer information @@ -2759,9 +2583,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) } m_surface_info[i].address = m_surface_info[i].pitch = 0; - m_surface_info[i].width = clip_width; - m_surface_info[i].height = clip_height; - m_surface_info[i].color_format = color_fmt; + m_surface_info[i].width = layout.width; + m_surface_info[i].height = layout.height; + m_surface_info[i].color_format = layout.color_format; } //Process depth surface as well @@ -2775,13 +2599,13 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) } m_depth_surface_info.address = m_depth_surface_info.pitch = 0; - m_depth_surface_info.width = clip_width; - m_depth_surface_info.height = clip_height; - m_depth_surface_info.depth_format = depth_fmt; + m_depth_surface_info.width = layout.width; + m_depth_surface_info.height = layout.height; + m_depth_surface_info.depth_format = layout.depth_format; } //Bind created rtts as current fbo... - std::vector draw_buffers = rsx::utility::get_rtt_indexes(target); + const auto draw_buffers = rsx::utility::get_rtt_indexes(layout.target); m_draw_buffers_count = 0; std::vector bound_images; @@ -2793,14 +2617,13 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { bound_images.push_back(surface); - m_surface_info[index].address = surface_addresses[index]; - m_surface_info[index].pitch = std::max(surface_pitchs[index], required_color_pitch); - surface->rsx_pitch = surface_pitchs[index]; + m_surface_info[index].address = layout.color_addresses[index]; + m_surface_info[index].pitch = layout.color_pitch[index]; + surface->rsx_pitch = layout.color_pitch[index]; - surface->write_aa_mode = aa_mode; - m_texture_cache.notify_surface_changed(surface_addresses[index]); - - m_texture_cache.tag_framebuffer(surface_addresses[index]); + surface->write_aa_mode = layout.aa_mode; + m_texture_cache.notify_surface_changed(layout.color_addresses[index]); + m_texture_cache.tag_framebuffer(layout.color_addresses[index]); m_draw_buffers_count++; } } @@ -2810,26 +2633,25 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); bound_images.push_back(ds); - m_depth_surface_info.address = zeta_address; - m_depth_surface_info.pitch = std::max(zeta_pitch, required_zeta_pitch); - ds->rsx_pitch = m_depth_surface_info.pitch; + m_depth_surface_info.address = layout.zeta_address; + m_depth_surface_info.pitch = layout.zeta_pitch; + ds->rsx_pitch = layout.zeta_pitch; - ds->write_aa_mode = aa_mode; - m_texture_cache.notify_surface_changed(zeta_address); - - m_texture_cache.tag_framebuffer(zeta_address); + ds->write_aa_mode = layout.aa_mode; + m_texture_cache.notify_surface_changed(layout.zeta_address); + m_texture_cache.tag_framebuffer(layout.zeta_address); } if (g_cfg.video.write_color_buffers) { - const auto color_fmt_info = vk::get_compatible_gcm_format(color_fmt); + const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format); for (u8 index : draw_buffers) { if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; - const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * aa_factor_v; + const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * layout.aa_factors[1]; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range, - m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second); + m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second); } } @@ -2838,17 +2660,19 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (m_depth_surface_info.address && m_depth_surface_info.pitch) { const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; - const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * aa_factor_v; + const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1]; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, - m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, false); + m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false); } } - auto vk_depth_format = (zeta_address == 0) ? VK_FORMAT_UNDEFINED : vk::get_compatible_depth_surface_format(m_device->get_formats_support(), depth_fmt); - m_current_renderpass_id = vk::get_render_pass_location(vk::get_compatible_surface_format(color_fmt).first, vk_depth_format, m_draw_buffers_count); + auto vk_depth_format = (layout.zeta_address == 0) ? VK_FORMAT_UNDEFINED : vk::get_compatible_depth_surface_format(m_device->get_formats_support(), layout.depth_format); + m_current_renderpass_id = vk::get_render_pass_location(vk::get_compatible_surface_format(layout.color_format).first, vk_depth_format, m_draw_buffers_count); //Search old framebuffers for this same configuration bool framebuffer_found = false; + const auto fbo_width = rsx::apply_resolution_scale(layout.width, true); + const auto fbo_height = rsx::apply_resolution_scale(layout.height, true); for (auto &fbo : m_framebuffers_to_clean) { diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 166652d3d3..9362adc173 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -2688,8 +2688,12 @@ public: void unmap() { - //mapped = false; - //heap->unmap(); + if (g_cfg.video.disable_vulkan_mem_allocator) + { + heap->unmap(); + mapped = false; + _ptr = nullptr; + } } }; }