From 527b1bb071408fe0c6d6409c03dcaf6b6a232937 Mon Sep 17 00:00:00 2001 From: Eladash Date: Sat, 10 Aug 2019 07:07:01 +0300 Subject: [PATCH] rsx: Fix overlapping transfer of nv3089::image_in when out_pitch != in_pitch or out_pitch != out_bpp * out_w --- rpcs3/Emu/RSX/rsx_methods.cpp | 94 ++++++++++++++++++++++++++--------- rpcs3/Emu/RSX/rsx_utils.cpp | 28 ++++++++++- rpcs3/Emu/RSX/rsx_utils.h | 1 + 3 files changed, 98 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 1a2eaae417..ea103b1aed 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -1067,23 +1067,86 @@ namespace rsx if (method_registers.blit_engine_context_surface() != blit_engine::context_surface::swizzle2d) { - if (need_convert || need_clip) + if (!need_convert) { - if (need_clip) + const bool is_overlapping = scale_x > 0 && scale_y > 0 && dst_dma == src_dma && [&]() -> bool { - if (need_convert) + const u32 src_max = src_offset + in_pitch * (in_h - 1) + (in_bpp * in_w); + const u32 dst_max = dst_offset + out_pitch * (out_h - 1) + (out_bpp * out_w); + return (src_offset >= dst_offset && src_offset < dst_max) || + (dst_offset >= src_offset && dst_offset < src_max); + }(); + + if (is_overlapping) + { + if (need_clip) { - temp2.resize(out_pitch * std::max(convert_h, (u32)clip_h)); + temp2.resize(out_pitch * clip_h); - convert_scale_image(temp2.data(), out_format, convert_w, convert_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter == blit_engine::transfer_interpolator::foh); + clip_image_may_overlap(pixels_dst, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch, temp2.data()); + } + else if (out_pitch != in_pitch || out_pitch != out_bpp * out_w) + { + const u32 buffer_pitch = out_bpp * out_w; + temp2.resize(buffer_pitch * out_h); + std::add_pointer_t buf = temp2.data(), pixels = pixels_src; - clip_image(pixels_dst, temp2.data(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + // Read the whole buffer from source + for (u32 y = 0; y < out_h; ++y) + { + std::memcpy(buf, pixels, buffer_pitch); + pixels += in_pitch; + buf += buffer_pitch; + } + + buf = temp2.data(), pixels = pixels_dst; + + // Write to destination + for (u32 y = 0; y < out_h; ++y) + { + std::memcpy(pixels, buf, buffer_pitch); + pixels += out_pitch; + buf += buffer_pitch; + } } else + { + std::memmove(pixels_dst, pixels_src, out_pitch * out_h); + } + } + else + { + if (need_clip) { clip_image(pixels_dst, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); } + else if (out_pitch != in_pitch || out_pitch != out_bpp * out_w) + { + u8 *dst = pixels_dst, *src = pixels_src; + + for (u32 y = 0; y < out_h; ++y) + { + std::memcpy(dst, src, out_w * out_bpp); + dst += out_pitch; + src += in_pitch; + } + } + else + { + std::memcpy(pixels_dst, pixels_src, out_pitch * out_h); + } + } + } + else + { + if (need_clip) + { + temp2.resize(out_pitch * std::max(convert_h, (u32)clip_h)); + + convert_scale_image(temp2.data(), out_format, convert_w, convert_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter == blit_engine::transfer_interpolator::foh); + + clip_image(pixels_dst, temp2.data(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); } else { @@ -1091,23 +1154,6 @@ namespace rsx pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter == blit_engine::transfer_interpolator::foh); } } - else - { - if (out_pitch != in_pitch || out_pitch != out_bpp * out_w) - { - for (u32 y = 0; y < out_h; ++y) - { - u8 *dst = pixels_dst + out_pitch * y; - u8 *src = pixels_src + in_pitch * y; - - std::memmove(dst, src, out_w * out_bpp); - } - } - else - { - std::memmove(pixels_dst, pixels_src, out_pitch * out_h); - } - } } else { diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index d285e1410e..9e2eec6f35 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -32,12 +32,38 @@ namespace rsx for (int y = 0; y < clip_h; ++y) { - std::memmove(pixels_dst, pixels_src, row_length); + std::memcpy(pixels_dst, pixels_src, row_length); pixels_src += src_pitch; pixels_dst += dst_pitch; } } + void clip_image_may_overlap(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch, u8 *buffer) + { + src += clip_y * src_pitch + clip_x * bpp; + + const u32 buffer_pitch = bpp * clip_w; + u8* buf = buffer; + + // Read the whole buffer from source + for (u32 y = 0; y < clip_h; ++y) + { + std::memcpy(buf, src, buffer_pitch); + src += src_pitch; + buf += buffer_pitch; + } + + buf = buffer; + + // Write to destination + for (u32 y = 0; y < clip_h; ++y) + { + std::memcpy(dst, buf, buffer_pitch); + dst += dst_pitch; + buf += buffer_pitch; + } + } + //Convert decoded integer values for CONSTANT_BLEND_FACTOR into f32 array in 0-1 range std::array get_constant_blend_colors() { diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index e2892712f2..f97f91b49c 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -425,6 +425,7 @@ namespace rsx const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear); void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch); + void clip_image_may_overlap(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch, u8* buffer); void convert_le_f32_to_be_d24(void *dst, void *src, u32 row_length_in_texels, u32 num_rows); void convert_le_d24x8_to_be_d24x8(void *dst, void *src, u32 row_length_in_texels, u32 num_rows);