rsx: wcb scaling fixes

This commit is contained in:
kd-11 2017-11-29 19:08:16 +03:00
parent 9d27ac359b
commit 08b829dc22
6 changed files with 101 additions and 67 deletions

View File

@ -248,6 +248,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const auto color_locations = get_locations();
const auto aa_mode = rsx::method_registers.surface_antialias();
const auto bpp = get_format_block_size_in_bytes(surface_format);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
@ -354,7 +355,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * aa_factor;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
color_format.format, color_format.type, color_format.swap_bytes);
@ -370,7 +370,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
u32 pitch = m_depth_surface_info.width * 2;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
const u32 range = pitch * m_depth_surface_info.height * aa_factor;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, pitch,
depth_format_gl.format, depth_format_gl.type, true);

View File

@ -442,12 +442,10 @@ namespace gl
}
else
{
//TODO: Use compression hint from the gcm tile information
//TODO: Fall back to bilinear filtering if samples > 2
const u8 pixel_size = get_pixel_size(format, type);
const u8 samples = rsx_pitch / real_pitch;
rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples);
const u8 samples_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2;
const u8 samples_v = (aa_mode == rsx::surface_antialiasing::square_centered_4_samples || aa_mode == rsx::surface_antialiasing::square_rotated_4_samples) ? 2 : 1;
rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples_u, samples_v);
}
/* switch (gcm_format)

View File

@ -2481,6 +2481,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const auto fbo_height = rsx::apply_resolution_scale(clip_height, true);
const auto aa_mode = rsx::method_registers.surface_antialias();
const auto bpp = get_format_block_size_in_bytes(color_fmt);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
if (m_draw_fbo)
{
@ -2588,8 +2589,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
for (u8 index : draw_buffers)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * aa_factor;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second);
}
@ -2608,7 +2609,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
pitch *= 2;
}
const u32 range = pitch * m_depth_surface_info.height;
const u32 range = pitch * m_depth_surface_info.height * aa_factor;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, true);
}

View File

@ -308,9 +308,20 @@ namespace vk
{
//Scale image to fit
//usually we can just get away with nearest filtering
const u8 samples = rsx_pitch / real_pitch;
u8 samples_u = 1, samples_v = 1;
switch (static_cast<vk::render_target*>(vram_texture)->aa_mode)
{
case rsx::surface_antialiasing::diagonal_centered_2_samples:
samples_u = 2;
break;
case rsx::surface_antialiasing::square_centered_4_samples:
case rsx::surface_antialiasing::square_rotated_4_samples:
samples_u = 2;
samples_v = 2;
break;
}
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, pack_unpack_swap_bytes);
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples_u, samples_v, pack_unpack_swap_bytes);
}
dma_buffer->unmap();

View File

@ -140,18 +140,19 @@ namespace rsx
* N - Sample count
*/
template <typename T, typename U>
void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v)
{
u32 dst_offset = 0;
u32 src_offset = 0;
u32 padding = (dst_pitch - (src_pitch * samples)) / sizeof(T);
u32 padding = (dst_pitch - (src_pitch * samples_u)) / sizeof(T);
for (u16 h = 0; h < src_height; ++h)
{
const auto row_start = dst_offset;
for (u16 w = 0; w < src_width; ++w)
{
for (u8 n = 0; n < samples; ++n)
for (u8 n = 0; n < samples_u; ++n)
{
dst[dst_offset++] = src[src_offset];
}
@ -160,51 +161,57 @@ namespace rsx
}
dst_offset += padding;
for (int n = 1; n < samples_v; ++n)
{
memcpy(&dst[dst_offset], &dst[row_start], dst_pitch);
dst_offset += dst_pitch;
}
}
}
void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v)
{
switch (pixel_size)
{
case 1:
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
case 2:
scale_image_fallback_impl<u16, u16>((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u16, u16>((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
case 4:
scale_image_fallback_impl<u32, u32>((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u32, u32>((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
case 8:
scale_image_fallback_impl<u64, u64>((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u64, u64>((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
case 16:
scale_image_fallback_impl<u128, u128>((u128*)dst, (const u128*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u128, u128>((u128*)dst, (const u128*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
default:
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
}
}
void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v)
{
switch (pixel_size)
{
case 1:
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
case 2:
scale_image_fallback_impl<u16, be_t<u16>>((u16*)dst, (const be_t<u16>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u16, be_t<u16>>((u16*)dst, (const be_t<u16>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
case 4:
scale_image_fallback_impl<u32, be_t<u32>>((u32*)dst, (const be_t<u32>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u32, be_t<u32>>((u32*)dst, (const be_t<u32>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
case 8:
scale_image_fallback_impl<u64, be_t<u64>>((u64*)dst, (const be_t<u64>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u64, be_t<u64>>((u64*)dst, (const be_t<u64>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
case 16:
scale_image_fallback_impl<u128, be_t<u128>>((u128*)dst, (const be_t<u128>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
scale_image_fallback_impl<u128, be_t<u128>>((u128*)dst, (const be_t<u128>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break;
default:
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
@ -279,61 +286,79 @@ namespace rsx
}
}
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes)
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes)
{
//Scale this image by repeating pixel data n times
//n = expected_pitch / real_pitch
//Use of fixed argument templates for performance reasons
const u16 dst_width = dst_pitch / pixel_size;
const u16 padding = dst_width - (src_width * samples);
const u16 padding = dst_width - (src_width * samples_u);
if (!swap_bytes)
{
switch (samples)
if (samples_v == 1)
{
case 2:
scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 3:
scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 4:
scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 8:
scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 16:
scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding);
break;
default:
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch);
scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
switch (samples_u)
{
case 1:
scale_image_fast<1>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 2:
scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 3:
scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 4:
scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 8:
scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 16:
scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding);
break;
default:
scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1);
}
}
else
{
scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
}
}
else
{
switch (samples)
if (samples_v == 1)
{
case 2:
scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 3:
scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 4:
scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 8:
scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 16:
scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding);
break;
default:
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch);
scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
switch (samples_u)
{
case 1:
scale_image_fast_with_byte_swap<1>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 2:
scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 3:
scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 4:
scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 8:
scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 16:
scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding);
break;
default:
scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1);
}
}
else
{
scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
}
}
}

View File

@ -143,7 +143,7 @@ namespace rsx
}
}
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes = false);
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes = false);
void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);