rsx: Refactor image scaling code; Scale downloaded surfaces with vulkan

This commit is contained in:
kd-11 2017-04-23 12:32:37 +03:00
parent c7db322873
commit e1a75deb25
4 changed files with 244 additions and 164 deletions

View File

@ -16,6 +16,8 @@
#include "../../Memory/vm.h"
#include "Utilities/Config.h"
#include "../rsx_utils.h"
class GLGSRender;
extern cfg::bool_entry g_cfg_rsx_write_color_buffers;
@ -113,76 +115,6 @@ namespace gl
return size;
}
//TODO: Move swscale routines to RSX shared
void scale_image_fallback(u8* dst, const u8* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
{
u32 dst_offset = 0;
u32 src_offset = 0;
u32 padding = dst_pitch - (src_pitch * samples);
for (u16 h = 0; h < src_height; ++h)
{
for (u16 w = 0; w < src_width; ++w)
{
for (u8 n = 0; n < samples; ++n)
{
memcpy(&dst[dst_offset], &src[src_offset], pixel_size);
dst_offset += pixel_size;
}
src_offset += pixel_size;
}
dst_offset += padding;
}
}
template <typename T, int N>
void scale_image_impl(T* dst, const T* src, u16 src_width, u16 src_height, u16 padding)
{
u32 dst_offset = 0;
u32 src_offset = 0;
for (u16 h = 0; h < src_height; ++h)
{
for (u16 w = 0; w < src_width; ++w)
{
for (u8 n = 0; n < N; ++n)
{
dst[dst_offset++] = src[src_offset];
}
//Fetch next pixel
src_offset++;
}
//Pad this row
dst_offset += padding;
}
}
template <int N>
void scale_image(void *dst, void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding)
{
switch (pixel_size)
{
case 1:
scale_image_impl<u8, N>((u8*)dst, (u8*)src, current_width, current_height, padding);
break;
case 2:
scale_image_impl<u16, N>((u16*)dst, (u16*)src, current_width, current_height, padding);
break;
case 4:
scale_image_impl<u32, N>((u32*)dst, (u32*)src, current_width, current_height, padding);
break;
case 8:
scale_image_impl<u64, N>((u64*)dst, (u64*)src, current_width, current_height, padding);
break;
default:
fmt::throw_exception("unsupported rtt format 0x%X" HERE, (u32)format);
}
}
void init_buffer()
{
if (pbo_id)
@ -341,36 +273,11 @@ namespace gl
else
{
//TODO: Use compression hint from the gcm tile information
//Scale this image by repeating pixel data n times
//n = expected_pitch / real_pitch
//Use of fixed argument templates for performance reasons
//TODO: Fall back to bilinear filtering if samples > 2
const u16 pixel_size = get_pixel_size(format, type);
const u16 dst_width = current_pitch / pixel_size;
const u16 sample_count = current_pitch / real_pitch;
const u16 padding = dst_width - (current_width * sample_count);
switch (sample_count)
{
case 2:
scale_image<2>(dst, data, pixel_size, current_width, current_height, padding);
break;
case 3:
scale_image<3>(dst, data, pixel_size, current_width, current_height, padding);
break;
case 4:
scale_image<4>(dst, data, pixel_size, current_width, current_height, padding);
break;
case 8:
scale_image<8>(dst, data, pixel_size, current_width, current_height, padding);
break;
case 16:
scale_image<16>(dst, data, pixel_size, current_width, current_height, padding);
break;
default:
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", current_pitch, real_pitch);
scale_image_fallback(dst, static_cast<u8*>(data), current_width, current_height, current_pitch, real_pitch, pixel_size, sample_count);
}
const u8 pixel_size = get_pixel_size(format, type);
const u8 samples = current_pitch / real_pitch;
rsx::scale_image_nearest(dst, const_cast<const void*>(data), current_width, current_height, current_pitch, real_pitch, pixel_size, samples);
}
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);

View File

@ -3,6 +3,7 @@
#include "VKRenderTargets.h"
#include "VKGSRender.h"
#include "../Common/TextureUtils.h"
#include "../rsx_utils.h"
namespace vk
{
@ -195,58 +196,19 @@ namespace vk
}
template<typename T>
void do_memory_transfer(void *pixels_dst, void *pixels_src)
void do_memory_transfer(void *pixels_dst, const void *pixels_src)
{
//LOG_ERROR(RSX, "COPY %d -> %d", native_pitch, pitch);
if (pitch == native_pitch)
{
if (sizeof T == 1)
memcpy(pixels_dst, pixels_src, cpu_address_range);
else
{
const u32 block_size = width * height;
auto typed_dst = (be_t<T> *)pixels_dst;
auto typed_src = (T *)pixels_src;
for (u32 px = 0; px < block_size; ++px)
typed_dst[px] = typed_src[px];
}
}
if (sizeof T == 1)
memcpy(pixels_dst, pixels_src, cpu_address_range);
else
{
if (sizeof T == 1)
{
u8 *typed_dst = (u8 *)pixels_dst;
u8 *typed_src = (u8 *)pixels_src;
const u32 block_size = width * height;
auto typed_dst = (be_t<T> *)pixels_dst;
auto typed_src = (T *)pixels_src;
//TODO: Scaling
for (u16 row = 0; row < height; ++row)
{
memcpy(typed_dst, typed_src, native_pitch);
typed_dst += pitch;
typed_src += native_pitch;
}
}
else
{
const u32 src_step = native_pitch / sizeof T;
const u32 dst_step = pitch / sizeof T;
auto typed_dst = (be_t<T> *)pixels_dst;
auto typed_src = (T *)pixels_src;
for (u16 row = 0; row < height; ++row)
{
for (u16 px = 0; px < width; ++px)
{
typed_dst[px] = typed_src[px];
}
typed_dst += dst_step;
typed_src += src_step;
}
}
for (u32 px = 0; px < block_size; ++px)
typed_dst[px] = typed_src[px];
}
}
@ -263,29 +225,39 @@ namespace vk
protect(utils::protection::rw);
//TODO: Image scaling, etc
void* pixels_src = dma_buffer->map(0, cpu_address_range);
void* pixels_dst = vm::base(cpu_address_base);
//We have to do our own byte swapping since the driver doesnt do it for us
const u8 bpp = native_pitch / width;
switch (bpp)
if (pitch == native_pitch)
{
default:
LOG_ERROR(RSX, "Invalid bpp %d", bpp);
case 1:
do_memory_transfer<u8>(pixels_dst, pixels_src);
break;
case 2:
do_memory_transfer<u16>(pixels_dst, pixels_src);
break;
case 4:
do_memory_transfer<u32>(pixels_dst, pixels_src);
break;
case 8:
do_memory_transfer<u64>(pixels_dst, pixels_src);
break;
//We have to do our own byte swapping since the driver doesnt do it for us
switch (bpp)
{
default:
LOG_ERROR(RSX, "Invalid bpp %d", bpp);
case 1:
do_memory_transfer<u8>(pixels_dst, pixels_src);
break;
case 2:
do_memory_transfer<u16>(pixels_dst, pixels_src);
break;
case 4:
do_memory_transfer<u32>(pixels_dst, pixels_src);
break;
case 8:
do_memory_transfer<u64>(pixels_dst, pixels_src);
break;
}
}
else
{
//Scale image to fit
//usually we can just get away with nearest filtering
const u8 samples = pitch / native_pitch;
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, pitch, native_pitch, bpp, samples, true);
}
dma_buffer->unmap();

View File

@ -132,4 +132,203 @@ namespace rsx
return { blend_color_r / 255.f, blend_color_g / 255.f, blend_color_b / 255.f, blend_color_a / 255.f };
}
}
/* Fast image scaling routines
* Only uses fast nearest scaling and integral scaling factors
* T - Dst type
* U - Src type
* N - Sample count
*/
template <typename T, typename U>
void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
{
u32 dst_offset = 0;
u32 src_offset = 0;
u32 padding = (dst_pitch - (src_pitch * samples)) / sizeof T;
for (u16 h = 0; h < src_height; ++h)
{
for (u16 w = 0; w < src_width; ++w)
{
for (u8 n = 0; n < samples; ++n)
{
dst[dst_offset++] = src[src_offset];
}
src_offset++;
}
dst_offset += padding;
}
}
void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
{
switch (pixel_size)
{
case 1:
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
break;
case 2:
scale_image_fallback_impl<u16, u16>((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
break;
case 4:
scale_image_fallback_impl<u32, u32>((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
break;
case 8:
scale_image_fallback_impl<u64, u64>((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
break;
default:
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
}
}
void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
{
switch (pixel_size)
{
case 1:
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
break;
case 2:
scale_image_fallback_impl<u16, be_t<u16>>((u16*)dst, (const be_t<u16>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
break;
case 4:
scale_image_fallback_impl<u32, be_t<u32>>((u32*)dst, (const be_t<u32>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
break;
case 8:
scale_image_fallback_impl<u64, be_t<u64>>((u64*)dst, (const be_t<u64>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
break;
default:
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
}
}
template <typename T, typename U, int N>
void scale_image_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 padding)
{
u32 dst_offset = 0;
u32 src_offset = 0;
for (u16 h = 0; h < src_height; ++h)
{
for (u16 w = 0; w < src_width; ++w)
{
for (u8 n = 0; n < N; ++n)
{
dst[dst_offset++] = src[src_offset];
}
//Fetch next pixel
src_offset++;
}
//Pad this row
dst_offset += padding;
}
}
template <int N>
void scale_image_fast(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding)
{
switch (pixel_size)
{
case 1:
scale_image_impl<u8, u8, N>((u8*)dst, (const u8*)src, src_width, src_height, padding);
break;
case 2:
scale_image_impl<u16, u16, N>((u16*)dst, (const u16*)src, src_width, src_height, padding);
break;
case 4:
scale_image_impl<u32, u32, N>((u32*)dst, (const u32*)src, src_width, src_height, padding);
break;
case 8:
scale_image_impl<u64, u64, N>((u64*)dst, (const u64*)src, src_width, src_height, padding);
break;
default:
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
}
}
template <int N>
void scale_image_fast_with_byte_swap(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding)
{
switch (pixel_size)
{
case 1:
scale_image_impl<u8, u8, N>((u8*)dst, (const u8*)src, src_width, src_height, padding);
break;
case 2:
scale_image_impl<u16, be_t<u16>, N>((u16*)dst, (const be_t<u16>*)src, src_width, src_height, padding);
break;
case 4:
scale_image_impl<u32, be_t<u32>, N>((u32*)dst, (const be_t<u32>*)src, src_width, src_height, padding);
break;
case 8:
scale_image_impl<u64, be_t<u64>, N>((u64*)dst, (const be_t<u64>*)src, src_width, src_height, padding);
break;
default:
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
}
}
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes)
{
//Scale this image by repeating pixel data n times
//n = expected_pitch / real_pitch
//Use of fixed argument templates for performance reasons
const u16 dst_width = dst_pitch / pixel_size;
const u16 padding = dst_width - (src_width * samples);
if (!swap_bytes)
{
switch (samples)
{
case 2:
scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 3:
scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 4:
scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 8:
scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 16:
scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding);
break;
default:
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch);
scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
}
}
else
{
switch (samples)
{
case 2:
scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 3:
scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 4:
scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 8:
scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 16:
scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding);
break;
default:
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch);
scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
}
}
}
}

View File

@ -133,6 +133,8 @@ namespace rsx
}
}
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes = false);
void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);