diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 136b9c232b..a23a384563 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -16,6 +16,8 @@ #include "../../Memory/vm.h" #include "Utilities/Config.h" +#include "../rsx_utils.h" + class GLGSRender; extern cfg::bool_entry g_cfg_rsx_write_color_buffers; @@ -113,76 +115,6 @@ namespace gl return size; } - //TODO: Move swscale routines to RSX shared - void scale_image_fallback(u8* dst, const u8* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) - { - u32 dst_offset = 0; - u32 src_offset = 0; - u32 padding = dst_pitch - (src_pitch * samples); - - for (u16 h = 0; h < src_height; ++h) - { - for (u16 w = 0; w < src_width; ++w) - { - for (u8 n = 0; n < samples; ++n) - { - memcpy(&dst[dst_offset], &src[src_offset], pixel_size); - dst_offset += pixel_size; - } - - src_offset += pixel_size; - } - - dst_offset += padding; - } - } - - template - void scale_image_impl(T* dst, const T* src, u16 src_width, u16 src_height, u16 padding) - { - u32 dst_offset = 0; - u32 src_offset = 0; - - for (u16 h = 0; h < src_height; ++h) - { - for (u16 w = 0; w < src_width; ++w) - { - for (u8 n = 0; n < N; ++n) - { - dst[dst_offset++] = src[src_offset]; - } - - //Fetch next pixel - src_offset++; - } - - //Pad this row - dst_offset += padding; - } - } - - template - void scale_image(void *dst, void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) - { - switch (pixel_size) - { - case 1: - scale_image_impl((u8*)dst, (u8*)src, current_width, current_height, padding); - break; - case 2: - scale_image_impl((u16*)dst, (u16*)src, current_width, current_height, padding); - break; - case 4: - scale_image_impl((u32*)dst, (u32*)src, current_width, current_height, padding); - break; - case 8: - scale_image_impl((u64*)dst, (u64*)src, current_width, current_height, padding); - break; - default: - fmt::throw_exception("unsupported rtt format 0x%X" HERE, (u32)format); - } - } - void init_buffer() { if (pbo_id) @@ -341,36 +273,11 @@ namespace gl else { //TODO: Use compression hint from the gcm tile information - //Scale this image by repeating pixel data n times - //n = expected_pitch / real_pitch - //Use of fixed argument templates for performance reasons + //TODO: Fall back to bilinear filtering if samples > 2 - const u16 pixel_size = get_pixel_size(format, type); - const u16 dst_width = current_pitch / pixel_size; - const u16 sample_count = current_pitch / real_pitch; - const u16 padding = dst_width - (current_width * sample_count); - - switch (sample_count) - { - case 2: - scale_image<2>(dst, data, pixel_size, current_width, current_height, padding); - break; - case 3: - scale_image<3>(dst, data, pixel_size, current_width, current_height, padding); - break; - case 4: - scale_image<4>(dst, data, pixel_size, current_width, current_height, padding); - break; - case 8: - scale_image<8>(dst, data, pixel_size, current_width, current_height, padding); - break; - case 16: - scale_image<16>(dst, data, pixel_size, current_width, current_height, padding); - break; - default: - LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", current_pitch, real_pitch); - scale_image_fallback(dst, static_cast(data), current_width, current_height, current_pitch, real_pitch, pixel_size, sample_count); - } + const u8 pixel_size = get_pixel_size(format, type); + const u8 samples = current_pitch / real_pitch; + rsx::scale_image_nearest(dst, const_cast(data), current_width, current_height, current_pitch, real_pitch, pixel_size, samples); } glUnmapBuffer(GL_PIXEL_PACK_BUFFER); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 4cf8d07adc..8d792599f1 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -3,6 +3,7 @@ #include "VKRenderTargets.h" #include "VKGSRender.h" #include "../Common/TextureUtils.h" +#include "../rsx_utils.h" namespace vk { @@ -195,58 +196,19 @@ namespace vk } template - void do_memory_transfer(void *pixels_dst, void *pixels_src) + void do_memory_transfer(void *pixels_dst, const void *pixels_src) { - //LOG_ERROR(RSX, "COPY %d -> %d", native_pitch, pitch); - if (pitch == native_pitch) - { - if (sizeof T == 1) - memcpy(pixels_dst, pixels_src, cpu_address_range); - else - { - const u32 block_size = width * height; - - auto typed_dst = (be_t *)pixels_dst; - auto typed_src = (T *)pixels_src; - - for (u32 px = 0; px < block_size; ++px) - typed_dst[px] = typed_src[px]; - } - } + if (sizeof T == 1) + memcpy(pixels_dst, pixels_src, cpu_address_range); else { - if (sizeof T == 1) - { - u8 *typed_dst = (u8 *)pixels_dst; - u8 *typed_src = (u8 *)pixels_src; + const u32 block_size = width * height; + + auto typed_dst = (be_t *)pixels_dst; + auto typed_src = (T *)pixels_src; - //TODO: Scaling - for (u16 row = 0; row < height; ++row) - { - memcpy(typed_dst, typed_src, native_pitch); - typed_dst += pitch; - typed_src += native_pitch; - } - } - else - { - const u32 src_step = native_pitch / sizeof T; - const u32 dst_step = pitch / sizeof T; - - auto typed_dst = (be_t *)pixels_dst; - auto typed_src = (T *)pixels_src; - - for (u16 row = 0; row < height; ++row) - { - for (u16 px = 0; px < width; ++px) - { - typed_dst[px] = typed_src[px]; - } - - typed_dst += dst_step; - typed_src += src_step; - } - } + for (u32 px = 0; px < block_size; ++px) + typed_dst[px] = typed_src[px]; } } @@ -263,29 +225,39 @@ namespace vk protect(utils::protection::rw); - //TODO: Image scaling, etc void* pixels_src = dma_buffer->map(0, cpu_address_range); void* pixels_dst = vm::base(cpu_address_base); - //We have to do our own byte swapping since the driver doesnt do it for us const u8 bpp = native_pitch / width; - switch (bpp) + if (pitch == native_pitch) { - default: - LOG_ERROR(RSX, "Invalid bpp %d", bpp); - case 1: - do_memory_transfer(pixels_dst, pixels_src); - break; - case 2: - do_memory_transfer(pixels_dst, pixels_src); - break; - case 4: - do_memory_transfer(pixels_dst, pixels_src); - break; - case 8: - do_memory_transfer(pixels_dst, pixels_src); - break; + //We have to do our own byte swapping since the driver doesnt do it for us + switch (bpp) + { + default: + LOG_ERROR(RSX, "Invalid bpp %d", bpp); + case 1: + do_memory_transfer(pixels_dst, pixels_src); + break; + case 2: + do_memory_transfer(pixels_dst, pixels_src); + break; + case 4: + do_memory_transfer(pixels_dst, pixels_src); + break; + case 8: + do_memory_transfer(pixels_dst, pixels_src); + break; + } + } + else + { + //Scale image to fit + //usually we can just get away with nearest filtering + const u8 samples = pitch / native_pitch; + + rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, pitch, native_pitch, bpp, samples, true); } dma_buffer->unmap(); diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index bd1506076d..192fa5b1e1 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -132,4 +132,203 @@ namespace rsx return { blend_color_r / 255.f, blend_color_g / 255.f, blend_color_b / 255.f, blend_color_a / 255.f }; } } + + /* Fast image scaling routines + * Only uses fast nearest scaling and integral scaling factors + * T - Dst type + * U - Src type + * N - Sample count + */ + template + void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) + { + u32 dst_offset = 0; + u32 src_offset = 0; + + u32 padding = (dst_pitch - (src_pitch * samples)) / sizeof T; + + for (u16 h = 0; h < src_height; ++h) + { + for (u16 w = 0; w < src_width; ++w) + { + for (u8 n = 0; n < samples; ++n) + { + dst[dst_offset++] = src[src_offset]; + } + + src_offset++; + } + + dst_offset += padding; + } + } + + void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) + { + switch (pixel_size) + { + case 1: + scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + break; + case 2: + scale_image_fallback_impl((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + break; + case 4: + scale_image_fallback_impl((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + break; + case 8: + scale_image_fallback_impl((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + break; + default: + fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + } + } + + void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) + { + switch (pixel_size) + { + case 1: + scale_image_fallback_impl((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + break; + case 2: + scale_image_fallback_impl>((u16*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + break; + case 4: + scale_image_fallback_impl>((u32*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + break; + case 8: + scale_image_fallback_impl>((u64*)dst, (const be_t*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + break; + default: + fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + } + } + + template + void scale_image_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 padding) + { + u32 dst_offset = 0; + u32 src_offset = 0; + + for (u16 h = 0; h < src_height; ++h) + { + for (u16 w = 0; w < src_width; ++w) + { + for (u8 n = 0; n < N; ++n) + { + dst[dst_offset++] = src[src_offset]; + } + + //Fetch next pixel + src_offset++; + } + + //Pad this row + dst_offset += padding; + } + } + + template + void scale_image_fast(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) + { + switch (pixel_size) + { + case 1: + scale_image_impl((u8*)dst, (const u8*)src, src_width, src_height, padding); + break; + case 2: + scale_image_impl((u16*)dst, (const u16*)src, src_width, src_height, padding); + break; + case 4: + scale_image_impl((u32*)dst, (const u32*)src, src_width, src_height, padding); + break; + case 8: + scale_image_impl((u64*)dst, (const u64*)src, src_width, src_height, padding); + break; + default: + fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + } + } + + template + void scale_image_fast_with_byte_swap(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding) + { + switch (pixel_size) + { + case 1: + scale_image_impl((u8*)dst, (const u8*)src, src_width, src_height, padding); + break; + case 2: + scale_image_impl, N>((u16*)dst, (const be_t*)src, src_width, src_height, padding); + break; + case 4: + scale_image_impl, N>((u32*)dst, (const be_t*)src, src_width, src_height, padding); + break; + case 8: + scale_image_impl, N>((u64*)dst, (const be_t*)src, src_width, src_height, padding); + break; + default: + fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); + } + } + + void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes) + { + //Scale this image by repeating pixel data n times + //n = expected_pitch / real_pitch + //Use of fixed argument templates for performance reasons + + const u16 dst_width = dst_pitch / pixel_size; + const u16 padding = dst_width - (src_width * samples); + + if (!swap_bytes) + { + switch (samples) + { + case 2: + scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 3: + scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 4: + scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 8: + scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 16: + scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding); + break; + default: + LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch); + scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + } + } + else + { + switch (samples) + { + case 2: + scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 3: + scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 4: + scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 8: + scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding); + break; + case 16: + scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding); + break; + default: + LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch); + scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); + } + } + } } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 28e22d17ad..ac0276cbe3 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -133,6 +133,8 @@ namespace rsx } } + void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes = false); + void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch, const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);