diff --git a/gfx/filters/Makefile b/gfx/filters/Makefile index 33a94ec101..d27971912a 100644 --- a/gfx/filters/Makefile +++ b/gfx/filters/Makefile @@ -1,6 +1,7 @@ compiler := gcc extra_flags := use_neon := 0 +release := debug ifndef platform platform := $(shell $(compiler) -dumpmachine) @@ -37,7 +38,7 @@ ASMFLAGS := -INEON/asm asflags += -mfpu=neon endif -objects += 2xsai.so 2xbr.so darken.so scale2x.so +objects += 2xsai.so 2xbr.so darken.so hq2x.so scale2x.so all: build; diff --git a/gfx/filters/hq2x.c b/gfx/filters/hq2x.c new file mode 100644 index 0000000000..4b92ce24ab --- /dev/null +++ b/gfx/filters/hq2x.c @@ -0,0 +1,415 @@ +/* RetroArch - A frontend for libretro. + * Copyright (C) 2010-2014 - Hans-Kristian Arntzen + * + * RetroArch is free software: you can redistribute it and/or modify it under the terms + * of the GNU General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with RetroArch. + * If not, see . + */ + +// Compile: gcc -o hq2x.so -shared hq2x.c -std=c99 -O3 -Wall -pedantic -fPIC + +#include "softfilter.h" +#include + +#ifdef RARCH_INTERNAL +#define softfilter_get_implementation hq2x_get_implementation +#endif + +#define HQ2X_SCALE 2 + +static unsigned hq2x_generic_input_fmts(void) +{ + return SOFTFILTER_FMT_RGB565; +} + +static unsigned hq2x_generic_output_fmts(unsigned input_fmts) +{ + return input_fmts; +} + +static unsigned hq2x_generic_threads(void *data) +{ + struct filter_data *filt = (struct filter_data*)data; + return filt->threads; +} + +static void *hq2x_generic_create(unsigned in_fmt, unsigned out_fmt, + unsigned max_width, unsigned max_height, + unsigned threads, softfilter_simd_mask_t simd) +{ + (void)simd; + + struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt)); + if (!filt) + return NULL; + filt->workers = (struct softfilter_thread_data*)calloc(threads, sizeof(struct softfilter_thread_data)); + filt->threads = threads; + filt->in_fmt = in_fmt; + if (!filt->workers) + { + free(filt); + return NULL; + } + return filt; +} + +static void hq2x_generic_output(void *data, unsigned *out_width, unsigned *out_height, + unsigned width, unsigned height) +{ + *out_width = width * HQ2X_SCALE; + *out_height = height * HQ2X_SCALE; +} + +static void hq2x_generic_destroy(void *data) +{ + struct filter_data *filt = (struct filter_data*)data; + free(filt->workers); + free(filt); +} + +#define DIFF_OFFSET ((0x440 << 21) + (0x207 << 11) + 0x407) +#define DIFF_MASK ((0x380 << 21) + (0x1f0 << 11) + 0x3f0) + +#define HQ2X_565_MASK (0x7e0f81f) +#define HQ2X_4444_MASK (0xf0f0f0f) +#define HQ2X_565_SHIFT (16) +#define HQ2X_4444_SHIFT (16) + +static uint32_t *yuvTable; +static uint8_t rotate[256]; + +const static uint8_t hqTable[256] = { + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 15, 12, 5, 3, 17, 13, + 4, 4, 6, 18, 4, 4, 6, 18, 5, 3, 12, 12, 5, 3, 1, 12, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 17, 13, 5, 3, 16, 14, + 4, 4, 6, 18, 4, 4, 6, 18, 5, 3, 16, 12, 5, 3, 1, 14, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 19, 12, 12, 5, 19, 16, 12, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 16, 12, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 19, 1, 12, 5, 19, 1, 14, + 4, 4, 6, 2, 4, 4, 6, 18, 5, 3, 16, 12, 5, 19, 1, 14, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 15, 12, 5, 3, 17, 13, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 16, 12, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 17, 13, 5, 3, 16, 14, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 13, 5, 3, 1, 14, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 16, 13, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 1, 12, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 16, 12, 5, 3, 1, 14, + 4, 4, 6, 2, 4, 4, 6, 2, 5, 3, 1, 12, 5, 3, 1, 14, +}; + +static void initialize(void) +{ + unsigned i, n; + static int initialized = 0; + + if (initialized) + return; + + if (posix_memalign((void**)&yuvTable, 16, 32768 * sizeof(uint32_t))) + return; + + for (i = 0; i < 32768; ++i) + { + const uint8_t R = (i >> 0) & 31; + const uint8_t G = (i >> 5) & 31; + const uint8_t B = (i >> 10) & 31; + + /* bgr555->bgr888 */ + const double r = (R << 3) | (R >> 2); + const double g = (G << 3) | (G >> 2); + const double b = (B << 3) | (B >> 2); + + /* bgr888->yuv888 */ + const double y = (r + g + b) * (0.25f * (63.5f / 48.0f)); + const double u = ((r - b) * 0.25f + 128.0f) * (7.5f / 7.0f); + const double v = ((g * 2.0f - r - b) * 0.125f + 128.0f) * (7.5f / 6.0f); + + yuvTable[i] = ((unsigned)y << 21) + ((unsigned)u << 11) + ((unsigned)v); + } + + for (n = 0; n < 256; ++n) + { + rotate[n] = ((n >> 2) & 0x11) | ((n << 2) & 0x88) + | ((n & 0x01) << 5) | ((n & 0x08) << 3) + | ((n & 0x10) >> 3) | ((n & 0x80) >> 5); + } + + initialized = 1; +} + +static inline uint8_t same(uint16_t x, uint16_t y) +{ + return !((yuvTable[x] - yuvTable[y] + DIFF_OFFSET) & DIFF_MASK); +} + +static inline uint8_t diff(uint32_t x, uint16_t y) +{ + return ((x - yuvTable[y]) & DIFF_MASK); +} + +static inline void grow(uint32_t *n, unsigned shift, uint32_t mask) +{ + *n |= (*n << shift); + *n &= mask; +} + +static inline uint16_t pack(uint32_t n, uint32_t mask, unsigned shift) +{ + n &= mask; + return (n | (n >> shift)); +} + +static uint16_t blend1(uint32_t A, uint32_t B, unsigned shift, uint32_t mask) +{ + grow(&A, shift, mask); + grow(&B, shift, mask); + A = (A * 3 + B) >> 2; + return pack(A, mask, shift); +} + +static uint16_t blend2(uint32_t A, uint32_t B, uint32_t C, unsigned shift, uint32_t mask) +{ + grow(&A, shift, mask); + grow(&B, shift, mask); + grow(&C, shift, mask); + return pack((A * 2 + B + C) >> 2, mask, shift); +} + +static uint16_t blend3(uint32_t A, uint32_t B, uint32_t C, unsigned shift, uint32_t mask) +{ + grow(&A, shift, mask); + grow(&B, shift, mask); + grow(&C, shift, mask); + return pack((A * 5 + B * 2 + C) >> 3, mask, shift); +} + +static uint16_t blend4(uint32_t A, uint32_t B, uint32_t C, unsigned shift, uint32_t mask) +{ + grow(&A, shift, mask); + grow(&B, shift, mask); + grow(&C, shift, mask); + return pack((A * 6 + B + C) >> 3, mask, shift); +} + +static uint16_t blend5(uint32_t A, uint32_t B, uint32_t C, unsigned shift, uint32_t mask) +{ + grow(&A, shift, mask); + grow(&B, shift, mask); + grow(&C, shift, mask); + return pack((A * 2 + (B + C) * 3) >> 3, mask, shift); +} + +static uint16_t blend6(uint32_t A, uint32_t B, uint32_t C, unsigned shift, uint32_t mask) +{ + grow(&A, shift, mask); + grow(&B, shift, mask); + grow(&C, shift, mask); + return pack((A * 14 + B + C) >> 4, mask, shift); +} + +typedef uint16_t (*blend_16bit)(unsigned rule, uint16_t E, uint16_t A, + uint16_t B, uint16_t D, uint16_t F, uint16_t H); + +static uint16_t blend(unsigned colfmt, unsigned rule, uint16_t E, uint16_t A, + uint16_t B, uint16_t D, uint16_t F, uint16_t H) +{ + unsigned shift; + uint32_t mask; + switch (colfmt) + { + case SOFTFILTER_FMT_RGB565: + mask = HQ2X_565_MASK; + shift = HQ2X_565_SHIFT; + break; +#if 0 + case SOFTFILTER_FMT_RGB4444: + mask = HQ2X_4444_MASK; + shift = HQ2X_4444_SHIFT; + break; +#endif + } + + switch (rule) + { + default: + case 0: + return E; + case 1: + return blend1(E, A, shift, mask); + case 2: + return blend1(E, D, shift, mask); + case 3: + return blend1(E, B, shift, mask); + case 4: + return blend2(E, D, B, shift, mask); + case 5: + return blend2(E, A, B, shift, mask); + case 6: + return blend2(E, A, D, shift ,mask); + case 7: + return blend3(E, B, D, shift, mask); + case 8: + return blend3(E, D, B, shift, mask); + case 9: + return blend4(E, D, B, shift, mask); + case 10: + return blend5(E, D, B, shift, mask); + case 11: + return blend6(E, D, B, shift, mask); + case 12: + return same(B, D) ? blend2(E, D, B, shift, mask) : E; + case 13: + return same(B, D) ? blend5(E, D, B, shift, mask) : E; + case 14: + return same(B, D) ? blend6(E, D, B, shift, mask) : E; + case 15: + return same(B, D) ? blend2(E, D, B, shift, mask) : blend1(E, A, shift, mask); + case 16: + return same(B, D) ? blend4(E, D, B, shift, mask) : blend1(E, A, shift, mask); + case 17: + return same(B, D) ? blend5(E, D, B, shift, mask) : blend1(E, A, shift, mask); + case 18: + return same(B, F) ? blend3(E, B, D, shift, mask) : blend1(E, D, shift, mask); + case 19: + return same(D, H) ? blend3(E, D, B, shift, mask) : blend1(E, B, shift, mask); + } +} + + +static void hq2x_16bit_generic(unsigned width, unsigned height, + int first, int last, + uint16_t *src, unsigned src_stride, + uint16_t *dst, unsigned dst_stride, + unsigned colfmt) +{ + initialize(); + + for (unsigned y = 0; y < height; ++y) + { + const uint16_t *in = (const uint16_t*)(src + y * src_stride); + uint16_t *out0 = (uint16_t*)(dst + y * dst_stride * 2); + uint16_t *out1 = (uint16_t*)(dst + y * dst_stride * 2 + dst_stride); + + const int prevline = (((y == 0) && first) ? 0 : src_stride); + const int nextline = (((y == height - 1) && last) ? 0 : src_stride); + + in++; + *out0++ = 0; *out0++ = 0; + *out1++ = 0; *out1++ = 0; + + for (unsigned x = 1; x < width - 1; ++x) + { + const uint16_t A = *(in - prevline - 1); + const uint16_t B = *(in - prevline + 0); + const uint16_t C = *(in - prevline + 1); + const uint16_t D = *(in - 1); + const uint16_t E = *(in + 0); + const uint16_t F = *(in + 1); + const uint16_t G = *(in + nextline - 1); + const uint16_t H = *(in + nextline + 0); + const uint16_t I = *(in + nextline + 1); + const uint32_t e = yuvTable[E] + DIFF_OFFSET; + + uint8_t pattern; + pattern = diff(e, A) << 0; + pattern |= diff(e, B) << 1; + pattern |= diff(e, C) << 2; + pattern |= diff(e, D) << 3; + pattern |= diff(e, F) << 4; + pattern |= diff(e, G) << 5; + pattern |= diff(e, H) << 6; + pattern |= diff(e, I) << 7; + + *(out0 + 0) = blend(colfmt, hqTable[pattern], E, A, B, D, F, H); + pattern = rotate[pattern]; + *(out0 + 1) = blend(colfmt, hqTable[pattern], E, C, F, B, H, D); + pattern = rotate[pattern]; + *(out1 + 1) = blend(colfmt, hqTable[pattern], E, I, H, F, D, B); + pattern = rotate[pattern]; + *(out1 + 0) = blend(colfmt, hqTable[pattern], E, G, D, H, B, F); + + in++; + out0 += 2; + out1 += 2; + } + + in++; + *out0++ = 0; *out0++ = 0; + *out1++ = 0; *out1++ = 0; + } +} + +static void hq2x_work_cb_rgb565(void *data, void *thread_data) +{ + struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data; + uint16_t *input = (uint16_t*)thr->in_data; + uint16_t *output = (uint16_t*)thr->out_data; + unsigned width = thr->width; + unsigned height = thr->height; + + hq2x_16bit_generic(width, height, + thr->first, thr->last, input, thr->in_pitch / SOFTFILTER_BPP_RGB565, output, thr->out_pitch / SOFTFILTER_BPP_RGB565, + thr->colfmt); +} + +static void hq2x_generic_packets(void *data, + struct softfilter_work_packet *packets, + void *output, size_t output_stride, + const void *input, unsigned width, unsigned height, size_t input_stride) +{ + struct filter_data *filt = (struct filter_data*)data; + unsigned i; + for (i = 0; i < filt->threads; i++) + { + struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[i]; + + unsigned y_start = (height * i) / filt->threads; + unsigned y_end = (height * (i + 1)) / filt->threads; + thr->out_data = (uint8_t*)output + y_start * HQ2X_SCALE * output_stride; + thr->in_data = (const uint8_t*)input + y_start * input_stride; + thr->out_pitch = output_stride; + thr->in_pitch = input_stride; + thr->width = width; + thr->height = y_end - y_start; + + // Workers need to know if they can access pixels outside their given buffer. + thr->first = y_start; + thr->last = y_end == height; + thr->colfmt = SOFTFILTER_FMT_RGB565; + + if (filt->in_fmt == SOFTFILTER_FMT_RGB565) + packets[i].work = hq2x_work_cb_rgb565; + packets[i].thread_data = thr; + } +} + +static const struct softfilter_implementation hq2x_generic = { + hq2x_generic_input_fmts, + hq2x_generic_output_fmts, + + hq2x_generic_create, + hq2x_generic_destroy, + + hq2x_generic_threads, + hq2x_generic_output, + hq2x_generic_packets, + "HQ2x", + SOFTFILTER_API_VERSION, +}; + +const struct softfilter_implementation *softfilter_get_implementation(softfilter_simd_mask_t simd) +{ + (void)simd; + return &hq2x_generic; +} + +#ifdef RARCH_INTERNAL +#undef softfilter_get_implementation +#endif diff --git a/gfx/filters/softfilter.h b/gfx/filters/softfilter.h index ee83bfd97f..c82d1e9fbc 100644 --- a/gfx/filters/softfilter.h +++ b/gfx/filters/softfilter.h @@ -104,6 +104,7 @@ struct softfilter_thread_data const void *in_data; size_t out_pitch; size_t in_pitch; + unsigned colfmt; unsigned width; unsigned height; int first;