rsx: Fix region pitch inaccuracy

- Region pitch of 64 (disabled) can be used to indicate packed contents - do not assume it is the actual pitch!
- Also fixes interaction of AA factors with lockable_region size
This commit is contained in:
kd-11 2018-06-19 20:54:18 +03:00 committed by kd-11
parent c776ff5020
commit 8f1c36d79f
2 changed files with 31 additions and 43 deletions

View File

@ -208,11 +208,15 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const auto depth_format = rsx::method_registers.surface_depth_fmt();
const auto target = rsx::method_registers.surface_color_target();
const auto aa_mode = rsx::method_registers.surface_antialias();
const u32 aa_factor_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2;
const u32 aa_factor_v = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
//NOTE: Its is possible that some renders are done on a swizzled context. Pitch is meaningless in that case
//Seen in Nier (color) and GT HD concept (z buffer)
//Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries
const auto required_zeta_pitch = std::max<u32>((u32)(depth_format == rsx::surface_depth_format::z16 ? clip_horizontal * 2 : clip_horizontal * 4), 64u);
const auto required_color_pitch = std::max<u32>((u32)rsx::utility::get_packed_pitch(surface_format, clip_horizontal), 64u);
const auto required_zeta_pitch = std::max<u32>((u32)(depth_format == rsx::surface_depth_format::z16 ? clip_horizontal * 2 : clip_horizontal * 4) * aa_factor_u, 64u);
const auto required_color_pitch = std::max<u32>((u32)rsx::utility::get_packed_pitch(surface_format, clip_horizontal) * aa_factor_u, 64u);
const bool stencil_test_enabled = depth_format == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled();
const auto lg2w = rsx::method_registers.surface_log2_width();
const auto lg2h = rsx::method_registers.surface_log2_height();
@ -305,16 +309,14 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
return;
}
const auto aa_mode = rsx::method_registers.surface_antialias();
const auto bpp = get_format_block_size_in_bytes(surface_format);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
//Window (raster) offsets
const auto window_offset_x = rsx::method_registers.window_offset_x();
const auto window_offset_y = rsx::method_registers.window_offset_y();
const auto window_clip_width = rsx::method_registers.window_clip_horizontal();
const auto window_clip_height = rsx::method_registers.window_clip_vertical();
const auto bpp = get_format_block_size_in_bytes(surface_format);
if (window_offset_x || window_offset_y)
{
//Window offset is what affects the raster position!
@ -327,7 +329,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
if (surface_addresses[index])
{
const u32 window_offset_bytes = (std::max<u32>(pitchs[index], clip_horizontal * aa_factor * bpp) * window_offset_y) + ((aa_factor * bpp) * window_offset_x);
const u32 window_offset_bytes = (std::max<u32>(pitchs[index], required_color_pitch) * window_offset_y) + ((aa_factor_u * bpp) * window_offset_x);
surface_addresses[index] += window_offset_bytes;
}
}
@ -335,7 +337,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
if (depth_address)
{
const auto depth_bpp = depth_format == rsx::surface_depth_format::z16 ? 2 : 4;
depth_address += (std::max<u32>(zeta_pitch, clip_horizontal * aa_factor * depth_bpp) * window_offset_y) + ((aa_factor * depth_bpp) * window_offset_x);
depth_address += (std::max<u32>(zeta_pitch, required_zeta_pitch) * window_offset_y) + ((aa_factor_u * depth_bpp) * window_offset_x);
}
}
@ -423,7 +425,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
draw_fbo.color[i] = *rtt;
rtt->set_rsx_pitch(pitchs[i]);
m_surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical };
m_surface_info[i] = { surface_addresses[i], std::max(pitchs[i], required_color_pitch), false, surface_format, depth_format, clip_horizontal, clip_vertical };
rtt->tile = find_tile(color_offsets[i], color_locations[i]);
rtt->write_aa_mode = aa_mode;
@ -451,9 +453,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
else
draw_fbo.depth = *ds;
const u32 depth_surface_pitch = rsx::method_registers.surface_z_pitch();
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch());
m_depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical };
m_depth_surface_info = { depth_address, std::max(zeta_pitch, required_zeta_pitch), true, surface_format, depth_format, clip_horizontal, clip_vertical };
ds->write_aa_mode = aa_mode;
m_gl_texture_cache.notify_surface_changed(depth_address);
@ -510,7 +511,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * aa_factor;
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * aa_factor_v;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
color_format.format, color_format.type, color_format.swap_bytes);
}
@ -520,13 +521,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format);
u32 pitch = m_depth_surface_info.width * 2;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
const u32 range = pitch * m_depth_surface_info.height * aa_factor;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, pitch,
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format);
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * aa_factor_v;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
depth_format_gl.format, depth_format_gl.type, true);
}
}
@ -706,7 +703,7 @@ void GLGSRender::write_buffers()
*/
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height;
__glcheck m_gl_texture_cache.flush_memory_to_cache(m_surface_info[i].address, range, true, 0xFF);
m_gl_texture_cache.flush_memory_to_cache(m_surface_info[i].address, range, true, 0xFF);
}
};
@ -715,12 +712,9 @@ void GLGSRender::write_buffers()
if (g_cfg.video.write_depth_buffer)
{
//TODO: use pitch
if (m_depth_surface_info.pitch == 0) return;
u32 range = m_depth_surface_info.width * m_depth_surface_info.height * 2;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2;
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height;
m_gl_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, range, true, 0xFF);
}
}

View File

@ -2535,11 +2535,15 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const auto depth_fmt = rsx::method_registers.surface_depth_fmt();
const auto target = rsx::method_registers.surface_color_target();
const auto aa_mode = rsx::method_registers.surface_antialias();
const u32 aa_factor_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2;
const u32 aa_factor_v = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
//NOTE: Its is possible that some renders are done on a swizzled context. Pitch is meaningless in that case
//Seen in Nier (color) and GT HD concept (z buffer)
//Restriction is that the dimensions are powers of 2. Also, dimensions are passed via log2w and log2h entries
const auto required_zeta_pitch = std::max<u32>((u32)(depth_fmt == rsx::surface_depth_format::z16 ? clip_width * 2 : clip_width * 4), 64u);
const auto required_color_pitch = std::max<u32>((u32)rsx::utility::get_packed_pitch(color_fmt, clip_width), 64u);
const auto required_zeta_pitch = std::max<u32>((u32)(depth_fmt == rsx::surface_depth_format::z16 ? clip_width * 2 : clip_width * 4) * aa_factor_u, 64u);
const auto required_color_pitch = std::max<u32>((u32)rsx::utility::get_packed_pitch(color_fmt, clip_width) * aa_factor_v, 64u);
const bool stencil_test_enabled = depth_fmt == rsx::surface_depth_format::z24s8 && rsx::method_registers.stencil_test_enabled();
const auto lg2w = rsx::method_registers.surface_log2_width();
const auto lg2h = rsx::method_registers.surface_log2_height();
@ -2636,9 +2640,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const auto fbo_width = rsx::apply_resolution_scale(clip_width, true);
const auto fbo_height = rsx::apply_resolution_scale(clip_height, true);
const auto aa_mode = rsx::method_registers.surface_antialias();
const auto bpp = get_format_block_size_in_bytes(color_fmt);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
//Window (raster) offsets
const auto window_offset_x = rsx::method_registers.window_offset_x();
@ -2658,7 +2660,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
if (surface_addresses[index])
{
const u32 window_offset_bytes = (std::max<u32>(surface_pitchs[index], clip_width * aa_factor * bpp) * window_offset_y) + ((aa_factor * bpp) * window_offset_x);
const u32 window_offset_bytes = (std::max<u32>(surface_pitchs[index], required_color_pitch) * window_offset_y) + ((aa_factor_u * bpp) * window_offset_x);
surface_addresses[index] += window_offset_bytes;
}
}
@ -2666,7 +2668,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (zeta_address)
{
const auto depth_bpp = (depth_fmt == rsx::surface_depth_format::z16 ? 2 : 4);
zeta_address += (std::max<u32>(zeta_pitch, clip_width * aa_factor * depth_bpp) * window_offset_y) + ((aa_factor * depth_bpp) * window_offset_x);
zeta_address += (std::max<u32>(zeta_pitch, required_zeta_pitch) * window_offset_y) + ((aa_factor_u * depth_bpp) * window_offset_x);
}
}
@ -2778,7 +2780,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
bound_images.push_back(surface);
m_surface_info[index].address = surface_addresses[index];
m_surface_info[index].pitch = surface_pitchs[index];
m_surface_info[index].pitch = std::max(surface_pitchs[index], required_color_pitch);
surface->rsx_pitch = surface_pitchs[index];
surface->write_aa_mode = aa_mode;
@ -2795,7 +2797,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
bound_images.push_back(ds);
m_depth_surface_info.address = zeta_address;
m_depth_surface_info.pitch = rsx::method_registers.surface_z_pitch();
m_depth_surface_info.pitch = std::max(zeta_pitch, required_zeta_pitch);
ds->rsx_pitch = m_depth_surface_info.pitch;
ds->write_aa_mode = aa_mode;
@ -2811,7 +2813,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * aa_factor;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * aa_factor_v;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second);
}
@ -2821,16 +2823,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
u32 pitch = m_depth_surface_info.width * 2;
u32 gcm_format = CELL_GCM_TEXTURE_DEPTH16;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)
{
gcm_format = CELL_GCM_TEXTURE_DEPTH24_D8;
pitch *= 2;
}
const u32 range = pitch * m_depth_surface_info.height * aa_factor;
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * aa_factor_v;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, false);
}