diff --git a/Makefile b/Makefile index c4905ede83..a751dc6fa6 100644 --- a/Makefile +++ b/Makefile @@ -126,6 +126,7 @@ endif ifeq ($(HAVE_SDL), 1) OBJ += gfx/sdl_gfx.o gfx/context/sdl_ctx.o input/sdl_input.o audio/sdl_audio.o fifo_buffer.o + OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o DEFINES += $(SDL_CFLAGS) $(BSD_LOCAL_INC) LIBS += $(SDL_LIBS) diff --git a/Makefile.win b/Makefile.win index c26b4fbbd2..bebb04744c 100644 --- a/Makefile.win +++ b/Makefile.win @@ -62,6 +62,7 @@ endif ifeq ($(HAVE_SDL), 1) OBJ += gfx/sdl_gfx.o gfx/gl.o gfx/math/matrix.o gfx/fonts/freetype.o gfx/context/sdl_ctx.o input/sdl_input.o audio/sdl_audio.o fifo_buffer.o + OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o LIBS += -lSDL DEFINES += -ISDL -DHAVE_SDL endif diff --git a/gfx/scaler/filter.c b/gfx/scaler/filter.c new file mode 100644 index 0000000000..9c7e5516f0 --- /dev/null +++ b/gfx/scaler/filter.c @@ -0,0 +1,250 @@ +#include "filter.h" +#include +#include +#include + +static bool allocate_filters(struct scaler_ctx *ctx) +{ + ctx->horiz.filter = (int16_t*)scaler_alloc(sizeof(int16_t), ctx->horiz.filter_stride * ctx->out_width); + ctx->horiz.filter_pos = (int*)scaler_alloc(sizeof(int), ctx->out_width); + + ctx->vert.filter = (int16_t*)scaler_alloc(sizeof(int16_t), ctx->vert.filter_stride * ctx->out_height); + ctx->vert.filter_pos = (int*)scaler_alloc(sizeof(int), ctx->out_height); + + return ctx->horiz.filter && ctx->vert.filter; +} + +static void gen_filter_point_sub(struct scaler_filter *filter, int len, int pos, int step) +{ + for (int i = 0; i < len; i++, pos += step) + { + filter->filter_pos[i] = pos >> 16; + filter->filter[i] = FILTER_UNITY; + } +} + +static bool gen_filter_point(struct scaler_ctx *ctx) +{ + ctx->horiz.filter_len = 1; + ctx->horiz.filter_stride = 1; + ctx->vert.filter_len = 1; + ctx->vert.filter_stride = 1; + + if (!allocate_filters(ctx)) + return false; + + int x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15); + int x_step = (1 << 16) * ctx->in_width / ctx->out_width; + int y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15); + int y_step = (1 << 16) * ctx->in_height / ctx->out_height; + + gen_filter_point_sub(&ctx->horiz, ctx->out_width, x_pos, x_step); + gen_filter_point_sub(&ctx->vert, ctx->out_height, y_pos, y_step); + + return true; +} + +static void gen_filter_bilinear_sub(struct scaler_filter *filter, int len, int pos, int step) +{ + for (int i = 0; i < len; i++, pos += step) + { + filter->filter_pos[i] = pos >> 16; + filter->filter[i * 2 + 1] = (pos & 0xffff) >> 2; + filter->filter[i * 2 + 0] = FILTER_UNITY - filter->filter[i * 2 + 1]; + } +} + +static bool gen_filter_bilinear(struct scaler_ctx *ctx) +{ + ctx->horiz.filter_len = 2; + ctx->horiz.filter_stride = 2; + ctx->vert.filter_len = 2; + ctx->vert.filter_stride = 2; + + if (!allocate_filters(ctx)) + return false; + + int x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15); + int x_step = (1 << 16) * ctx->in_width / ctx->out_width; + int y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15); + int y_step = (1 << 16) * ctx->in_height / ctx->out_height; + + gen_filter_bilinear_sub(&ctx->horiz, ctx->out_width, x_pos, x_step); + gen_filter_bilinear_sub(&ctx->vert, ctx->out_height, y_pos, y_step); + + return true; +} + +static inline double sinc(double phase) +{ + if (fabs(phase) < 0.0001) + return 1.0; + else + return sin(phase) / phase; +} + +static inline unsigned next_pow2(unsigned v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + + return v; +} + +static void gen_filter_sinc_sub(struct scaler_filter *filter, int len, int pos, int step, double phase_mul) +{ + const int sinc_size = filter->filter_len; + + for (int i = 0; i < len; i++, pos += step) + { + filter->filter_pos[i] = pos >> 16; + + //int16_t sinc_sum = 0; + for (int j = 0; j < sinc_size; j++) + { + double sinc_phase = M_PI * ((double)((sinc_size << 15) + (pos & 0xffff)) / 0x10000 - j); + double lanczos_phase = sinc_phase / ((sinc_size >> 1)); + int16_t sinc_val = FILTER_UNITY * sinc(sinc_phase * phase_mul) * sinc(lanczos_phase) * phase_mul; + //sinc_sum += sinc_val; + + filter->filter[i * sinc_size + j] = sinc_val; + } + //fprintf(stderr, "Sinc sum = %.3lf\n", (double)sinc_sum / FILTER_UNITY); + } +} + +static bool gen_filter_sinc(struct scaler_ctx *ctx) +{ + // Need to expand the filter when downsampling to get a proper low-pass effect. + const int sinc_size = 8 * (ctx->in_width > ctx->out_width ? next_pow2(ctx->in_width / ctx->out_width) : 1); + ctx->horiz.filter_len = sinc_size; + ctx->horiz.filter_stride = sinc_size; + ctx->vert.filter_len = sinc_size; + ctx->vert.filter_stride = sinc_size; + + if (!allocate_filters(ctx)) + return false; + + int x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15) - (sinc_size << 15); + int x_step = (1 << 16) * ctx->in_width / ctx->out_width; + int y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15) - (sinc_size << 15); + int y_step = (1 << 16) * ctx->in_height / ctx->out_height; + + double phase_mul_horiz = ctx->in_width > ctx->out_width ? (double)ctx->out_width / ctx->in_width : 1.0; + double phase_mul_vert = ctx->in_height > ctx->out_height ? (double)ctx->out_height / ctx->in_height : 1.0; + + gen_filter_sinc_sub(&ctx->horiz, ctx->out_width, x_pos, x_step, phase_mul_horiz); + gen_filter_sinc_sub(&ctx->vert, ctx->out_height, y_pos, y_step, phase_mul_vert); + + return true; +} + + +static bool validate_filter(struct scaler_ctx *ctx) +{ + int max_w_pos = ctx->in_width - ctx->horiz.filter_len; + for (int i = 0; i < ctx->out_width; i++) + { + if (ctx->horiz.filter_pos[i] > max_w_pos || ctx->horiz.filter_pos[i] < 0) + { + fprintf(stderr, "Out X = %d => In X = %d\n", i, ctx->horiz.filter_pos[i]); + return false; + } + } + + int max_h_pos = ctx->in_height - ctx->vert.filter_len; + for (int i = 0; i < ctx->out_height; i++) + { + if (ctx->vert.filter_pos[i] > max_h_pos || ctx->vert.filter_pos[i] < 0) + { + fprintf(stderr, "Out Y = %d => In Y = %d\n", i, ctx->vert.filter_pos[i]); + return false; + } + } + + return true; +} + +static void fixup_filter_sub(struct scaler_filter *filter, int out_len, int in_len) +{ + int max_pos = in_len - filter->filter_len; + + for (int i = 0; i < out_len; i++) + { + int postsample = filter->filter_pos[i] - max_pos; + int presample = -filter->filter_pos[i]; + + if (postsample > 0) + { + filter->filter_pos[i] -= postsample; + + int16_t *base_filter = filter->filter + i * filter->filter_stride; + + if (postsample > (int)filter->filter_len) + memset(base_filter, 0, filter->filter_len * sizeof(int16_t)); + else + { + memmove(base_filter + postsample, base_filter, (filter->filter_len - postsample) * sizeof(int16_t)); + memset(base_filter, 0, postsample * sizeof(int16_t)); + } + } + + if (presample > 0) + { + filter->filter_pos[i] += presample; + int16_t *base_filter = filter->filter + i * filter->filter_stride; + + if (presample > (int)filter->filter_len) + memset(base_filter, 0, filter->filter_len * sizeof(int16_t)); + else + { + memmove(base_filter, base_filter + presample, (filter->filter_len - presample) * sizeof(int16_t)); + memset(base_filter + (filter->filter_len - presample), 0, presample * sizeof(int16_t)); + } + } + } +} + +// Makes sure that we never sample outside our rectangle. +static void fixup_filter(struct scaler_ctx *ctx) +{ + fixup_filter_sub(&ctx->horiz, ctx->out_width, ctx->in_width); + fixup_filter_sub(&ctx->vert, ctx->out_height, ctx->in_height); +} + + +bool scaler_gen_filter(struct scaler_ctx *ctx) +{ + bool ret = true; + + switch (ctx->scaler_type) + { + case SCALER_TYPE_POINT: + ret = gen_filter_point(ctx); + break; + + case SCALER_TYPE_BILINEAR: + ret = gen_filter_bilinear(ctx); + break; + + case SCALER_TYPE_SINC: + ret = gen_filter_sinc(ctx); + break; + + default: + return false; + } + + if (!ret) + return false; + + fixup_filter(ctx); + + return validate_filter(ctx); +} + diff --git a/gfx/scaler/filter.h b/gfx/scaler/filter.h new file mode 100644 index 0000000000..0614c6d31a --- /dev/null +++ b/gfx/scaler/filter.h @@ -0,0 +1,10 @@ +#ifndef FILTER_H__ +#define FILTER_H__ + +#include +#include "scaler.h" + +bool scaler_gen_filter(struct scaler_ctx *ctx); + +#endif + diff --git a/gfx/scaler/main.c b/gfx/scaler/main.c new file mode 100644 index 0000000000..2edf1226a6 --- /dev/null +++ b/gfx/scaler/main.c @@ -0,0 +1,171 @@ +#include "scaler.h" +#include +#include +#include +#include +#include +#include +#include + +static float g_horiz_scale = 1.0f; +static float g_vert_scale = 1.0f; + +static enum scaler_type g_scaler_type = SCALER_TYPE_SINC; + +static char *g_in_path; +static char *g_out_path; + +static void print_help(void) +{ + fprintf(stderr, "Usage: scale [...options...]\n"); + fprintf(stderr, "\t-i/--input: Input file\n"); + fprintf(stderr, "\t-o/--output: Output file\n"); + fprintf(stderr, "\t-x/--xscale: Relative scale in X\n"); + fprintf(stderr, "\t-y/--yscale: Relative scale in Y\n"); + fprintf(stderr, "\t-s/--scale: Relative scale in both X/Y\n"); + fprintf(stderr, "\t-t/--type: Filter type. Valid ones are:\n"); + fprintf(stderr, "\t\tsinc, point, bilinear\n"); + fprintf(stderr, "\t-h/--help: Prints this help\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + const struct option opts[] = { + { "xscale", 1, NULL, 'x' }, + { "yscale", 1, NULL, 'y' }, + { "scale", 1, NULL, 's' }, + { "input", 1, NULL, 'i' }, + { "output", 1, NULL, 'o' }, + { "type", 1, NULL, 't' }, + { "help", 0, NULL, 'h' }, + { NULL, 0, NULL, 0 }, + }; + + const char *optstring = "x:y:i:o:t:s:h"; + + for (;;) + { + int c = getopt_long(argc, argv, optstring, opts, NULL); + if (c == -1) + break; + + switch (c) + { + case 'h': + print_help(); + exit(EXIT_SUCCESS); + + case 's': + g_horiz_scale = strtof(optarg, NULL); + g_vert_scale = g_horiz_scale; + break; + + case 'x': + g_horiz_scale = strtof(optarg, NULL); + break; + + case 'y': + g_vert_scale = strtof(optarg, NULL); + break; + + case 'i': + g_in_path = strdup(optarg); + break; + + case 'o': + g_out_path = strdup(optarg); + break; + + case '?': + print_help(); + return false; + + case 't': + if (strcmp(optarg, "sinc") == 0) + g_scaler_type = SCALER_TYPE_SINC; + else if (strcmp(optarg, "bilinear") == 0) + g_scaler_type = SCALER_TYPE_BILINEAR; + else if (strcmp(optarg, "point") == 0) + g_scaler_type = SCALER_TYPE_POINT; + else + { + print_help(); + return false; + } + break; + } + } + + if (!g_in_path || !g_out_path) + { + print_help(); + return false; + } + + if (optind < argc) + { + print_help(); + return false; + } + + return true; +} + +int main(int argc, char *argv[]) +{ + if (!parse_args(argc, argv)) + return EXIT_FAILURE; + + Imlib_Image img = imlib_load_image(g_in_path); + if (!img) + return EXIT_FAILURE; + + imlib_context_set_image(img); + + struct scaler_ctx ctx = {0}; + ctx.in_width = imlib_image_get_width(); + ctx.in_height = imlib_image_get_height(); + ctx.out_width = (int)(imlib_image_get_width() * g_horiz_scale); + ctx.out_height = (int)(imlib_image_get_height() * g_vert_scale); + ctx.in_stride = imlib_image_get_width() * sizeof(uint32_t); + ctx.out_stride = (int)(imlib_image_get_width() * g_horiz_scale) * sizeof(uint32_t); + ctx.in_fmt = SCALER_FMT_ARGB8888; + ctx.out_fmt = SCALER_FMT_ARGB8888; + ctx.scaler_type = g_scaler_type; + + assert(scaler_ctx_gen_filter(&ctx)); + + uint32_t *scale_buf = (uint32_t*)calloc(sizeof(uint32_t), ctx.out_width * ctx.out_height); + + //struct timespec tv[2]; + //clock_gettime(CLOCK_MONOTONIC, &tv[0]); + scaler_ctx_scale(&ctx, scale_buf, imlib_image_get_data_for_reading_only()); + //clock_gettime(CLOCK_MONOTONIC, &tv[1]); + + //double time_ms = (tv[1].tv_sec - tv[0].tv_sec) * 1000.0 + (tv[1].tv_nsec - tv[0].tv_nsec) / 1000000.0; + //double ns_per_pix = (1000000.0 * time_ms) / (ctx.out_width * ctx.out_height); + //printf("Time: %.3lf ms, %.3lf ns / pixel\n", time_ms, ns_per_pix); + + Imlib_Image new_img = imlib_create_image_using_data(ctx.out_width, ctx.out_height, + scale_buf); + + imlib_free_image(); + imlib_context_set_image(new_img); + + const char *fmt = strrchr(g_out_path, '.'); + if (fmt) + fmt++; + else + fmt = "png"; + + imlib_image_set_format(fmt); + imlib_save_image(g_out_path); + imlib_free_image(); + + free(scale_buf); + free(g_in_path); + free(g_out_path); + + scaler_ctx_gen_reset(&ctx); +} + diff --git a/gfx/scaler/pixconv.c b/gfx/scaler/pixconv.c new file mode 100644 index 0000000000..791c9ca13f --- /dev/null +++ b/gfx/scaler/pixconv.c @@ -0,0 +1,131 @@ +#include "pixconv.h" +#include +#include +#include + +void conv_0rgb1555_argb8888(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint16_t *input = (const uint16_t*)input_; + uint32_t *output = (uint32_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride >> 1) + { + for (int w = 0; w < width; w++) + { + uint32_t col = input[w]; + uint32_t r = (col >> 10) & 0x1f; + uint32_t g = (col >> 5) & 0x1f; + uint32_t b = (col >> 0) & 0x1f; + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + + output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0); + } + } +} + +void conv_0rgb1555_bgr24(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint16_t *input = (const uint16_t*)input_; + uint8_t *output = (uint8_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride, input += in_stride >> 1) + { + uint8_t *out = output; + for (int w = 0; w < width; w++) + { + uint32_t col = input[w]; + uint32_t b = (col >> 0) & 0x1f; + uint32_t g = (col >> 5) & 0x1f; + uint32_t r = (col >> 10) & 0x1f; + b = (b << 3) | (b >> 2); + g = (g << 3) | (g >> 2); + r = (r << 3) | (r >> 2); + + *out++ = b; + *out++ = g; + *out++ = r; + } + } +} + +void conv_bgr24_argb8888(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint8_t *input = (const uint8_t*)input_; + uint32_t *output = (uint32_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride) + { + const uint8_t *inp = input; + for (int w = 0; w < width; w++) + { + uint32_t b = *inp++; + uint32_t g = *inp++; + uint32_t r = *inp++; + output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0); + } + } +} + +void conv_argb8888_0rgb1555(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint32_t *input = (const uint32_t*)input_; + uint16_t *output = (uint16_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride >> 1, input += in_stride >> 2) + { + for (int w = 0; w < width; w++) + { + uint32_t col = input[w]; + uint16_t r = (col >> 19) & 0x1f; + uint16_t g = (col >> 11) & 0x1f; + uint16_t b = (col >> 3) & 0x1f; + output[w] = (r << 10) | (g << 5) | (b << 0); + } + } +} + +void conv_argb8888_bgr24(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint32_t *input = (const uint32_t*)input_; + uint8_t *output = (uint8_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride, input += in_stride >> 2) + { + uint8_t *out = output; + for (int w = 0; w < width; w++) + { + uint32_t col = input[w]; + *out++ = (uint8_t)(col >> 0); + *out++ = (uint8_t)(col >> 8); + *out++ = (uint8_t)(col >> 16); + } + } +} + +void conv_copy(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + int copy_len = abs(out_stride); + if (abs(in_stride) < copy_len) + copy_len = abs(in_stride); + + const uint8_t *input = (const uint8_t*)input_; + uint8_t *output = (uint8_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride, input += in_stride) + memcpy(output, input, copy_len); +} + diff --git a/gfx/scaler/pixconv.h b/gfx/scaler/pixconv.h new file mode 100644 index 0000000000..8af7240713 --- /dev/null +++ b/gfx/scaler/pixconv.h @@ -0,0 +1,29 @@ +#ifndef PIXCONV_H__ +#define PIXCONV_H__ + +void conv_0rgb1555_argb8888(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + +void conv_bgr24_argb8888(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + +void conv_argb8888_0rgb1555(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + +void conv_argb8888_bgr24(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + +void conv_0rgb1555_bgr24(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + +void conv_copy(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + +#endif + diff --git a/gfx/scaler/scaler.c b/gfx/scaler/scaler.c new file mode 100644 index 0000000000..8dc886daca --- /dev/null +++ b/gfx/scaler/scaler.c @@ -0,0 +1,195 @@ +#include "scaler.h" +#include "scaler_int.h" +#include "filter.h" +#include "pixconv.h" +#include +#include +#include +#include + +// In case aligned allocs are needed later ... +void *scaler_alloc(size_t elem_size, size_t size) +{ + return calloc(elem_size, size); +} + +void scaler_free(void *ptr) +{ + free(ptr); +} + +static bool allocate_frames(struct scaler_ctx *ctx) +{ + ctx->scaled.stride = ((ctx->out_width + 7) & ~7) * sizeof(uint64_t); + ctx->scaled.width = ctx->out_width; + ctx->scaled.height = ctx->in_height; + ctx->scaled.frame = (uint64_t*)scaler_alloc(sizeof(uint64_t), (ctx->scaled.stride * ctx->scaled.height) >> 3); + if (!ctx->scaled.frame) + return false; + + if (ctx->in_fmt != SCALER_FMT_ARGB8888) + { + ctx->input.stride = ((ctx->in_width + 7) & ~7) * sizeof(uint32_t); + ctx->input.frame = (uint32_t*)scaler_alloc(sizeof(uint32_t), (ctx->input.stride * ctx->in_height) >> 2); + if (!ctx->input.frame) + return false; + } + + if (ctx->out_fmt != SCALER_FMT_ARGB8888) + { + ctx->output.stride = ((ctx->out_width + 7) & ~7) * sizeof(uint32_t); + ctx->output.frame = (uint32_t*)scaler_alloc(sizeof(uint32_t), (ctx->output.stride * ctx->out_height) >> 2); + if (!ctx->output.frame) + return false; + } + + return true; +} + +static bool set_direct_pix_conv(struct scaler_ctx *ctx) +{ + if (ctx->in_fmt == ctx->out_fmt) + ctx->direct_pixconv = conv_copy; + else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_ARGB8888) + ctx->direct_pixconv = conv_0rgb1555_argb8888; + else if (ctx->in_fmt == SCALER_FMT_BGR24 && ctx->out_fmt == SCALER_FMT_ARGB8888) + ctx->direct_pixconv = conv_bgr24_argb8888; + else if (ctx->in_fmt == SCALER_FMT_ARGB8888 && ctx->out_fmt == SCALER_FMT_0RGB1555) + ctx->direct_pixconv = conv_argb8888_0rgb1555; + else if (ctx->in_fmt == SCALER_FMT_ARGB8888 && ctx->out_fmt == SCALER_FMT_BGR24) + ctx->direct_pixconv = conv_argb8888_bgr24; + else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_BGR24) + ctx->direct_pixconv = conv_0rgb1555_bgr24; + else + return false; + + return true; +} + +static bool set_pix_conv(struct scaler_ctx *ctx) +{ + switch (ctx->in_fmt) + { + case SCALER_FMT_ARGB8888: + // No need to convert :D + break; + + case SCALER_FMT_0RGB1555: + ctx->in_pixconv = conv_0rgb1555_argb8888; + break; + + case SCALER_FMT_BGR24: + ctx->in_pixconv = conv_bgr24_argb8888; + break; + + default: + return false; + } + + switch (ctx->out_fmt) + { + case SCALER_FMT_ARGB8888: + // No need to convert :D + break; + + case SCALER_FMT_0RGB1555: + ctx->out_pixconv = conv_argb8888_0rgb1555; + break; + + case SCALER_FMT_BGR24: + ctx->out_pixconv = conv_argb8888_bgr24; + break; + + default: + return false; + } + + return true; +} + +bool scaler_ctx_gen_filter(struct scaler_ctx *ctx) +{ + scaler_ctx_gen_reset(ctx); + + if (ctx->in_width == ctx->out_width && ctx->in_height == ctx->out_height) + ctx->unscaled = true; // Only pixel format conversion ... + else + { + ctx->scaler_horiz = scaler_argb8888_horiz; + ctx->scaler_vert = scaler_argb8888_vert; + ctx->unscaled = false; + } + + if (!allocate_frames(ctx)) + return false; + + if (ctx->unscaled) + { + if (!set_direct_pix_conv(ctx)) + return false; + } + else + { + if (!set_pix_conv(ctx)) + return false; + } + + if (!ctx->unscaled && !scaler_gen_filter(ctx)) + return false; + + return true; +} + +void scaler_ctx_gen_reset(struct scaler_ctx *ctx) +{ + scaler_free(ctx->horiz.filter); + scaler_free(ctx->horiz.filter_pos); + scaler_free(ctx->vert.filter); + scaler_free(ctx->vert.filter_pos); + scaler_free(ctx->scaled.frame); + scaler_free(ctx->input.frame); + scaler_free(ctx->output.frame); + + memset(&ctx->horiz, 0, sizeof(ctx->horiz)); + memset(&ctx->vert, 0, sizeof(ctx->vert)); + memset(&ctx->scaled, 0, sizeof(ctx->scaled)); + memset(&ctx->input, 0, sizeof(ctx->input)); + memset(&ctx->output, 0, sizeof(ctx->output)); +} + +void scaler_ctx_scale(const struct scaler_ctx *ctx, + void *output, const void *input) +{ + if (ctx->unscaled) + { + ctx->direct_pixconv(output, input, + ctx->out_width, ctx->out_height, + ctx->out_stride, ctx->in_stride); + } + else + { + if (ctx->in_fmt != SCALER_FMT_ARGB8888) + { + ctx->in_pixconv(ctx->input.frame, input, + ctx->in_width, ctx->in_height, + ctx->input.stride, ctx->in_stride); + + ctx->scaler_horiz(ctx, ctx->input.frame, ctx->input.stride); + } + else + ctx->scaler_horiz(ctx, input, ctx->in_stride); + + if (ctx->out_fmt != SCALER_FMT_ARGB8888) + { + ctx->scaler_vert(ctx, ctx->output.frame, ctx->output.stride); + + ctx->out_pixconv(output, ctx->output.frame, + ctx->out_width, ctx->out_height, + ctx->out_stride, ctx->output.stride); + } + else + ctx->scaler_vert(ctx, output, ctx->out_stride); + } +} + + diff --git a/gfx/scaler/scaler.h b/gfx/scaler/scaler.h new file mode 100644 index 0000000000..f5091621df --- /dev/null +++ b/gfx/scaler/scaler.h @@ -0,0 +1,90 @@ +#ifndef SCALER_H__ +#define SCALER_H__ + +#include +#include +#include + +#define FILTER_UNITY (1 << 14) + +enum scaler_pix_fmt +{ + SCALER_FMT_ARGB8888 = 0, + SCALER_FMT_0RGB1555, + SCALER_FMT_BGR24 +}; + +enum scaler_type +{ + SCALER_TYPE_UNKNOWN = 0, + SCALER_TYPE_POINT, + SCALER_TYPE_BILINEAR, + SCALER_TYPE_SINC +}; + +struct scaler_filter +{ + int16_t *filter; + size_t filter_len; + size_t filter_stride; + int *filter_pos; +}; + +struct scaler_ctx +{ + int in_width; + int in_height; + int in_stride; + + int out_width; + int out_height; + int out_stride; + + enum scaler_pix_fmt in_fmt; + enum scaler_pix_fmt out_fmt; + enum scaler_type scaler_type; + + void (*scaler_horiz)(const struct scaler_ctx*, + const void*, int); + void (*scaler_vert)(const struct scaler_ctx*, + void*, int); + + void (*in_pixconv)(void*, const void*, int, int, int, int); + void (*out_pixconv)(void*, const void*, int, int, int, int); + void (*direct_pixconv)(void*, const void*, int, int, int, int); + + bool unscaled; + struct scaler_filter horiz, vert; + + struct + { + uint32_t *frame; + int stride; + } input; + + struct + { + uint64_t *frame; + int width; + int height; + int stride; + } scaled; + + struct + { + uint32_t *frame; + int stride; + } output; +}; + +bool scaler_ctx_gen_filter(struct scaler_ctx *ctx); +void scaler_ctx_gen_reset(struct scaler_ctx *ctx); + +void scaler_ctx_scale(const struct scaler_ctx *ctx, + void *output, const void *input); + +void *scaler_alloc(size_t elem_size, size_t size); +void scaler_free(void *ptr); + +#endif + diff --git a/gfx/scaler/scaler_int.c b/gfx/scaler/scaler_int.c new file mode 100644 index 0000000000..a6e1390a44 --- /dev/null +++ b/gfx/scaler/scaler_int.c @@ -0,0 +1,214 @@ +#include "scaler_int.h" + +#if defined(__SSE2__) +#include +#endif + +static inline uint64_t build_argb64(uint16_t a, uint16_t r, uint16_t g, uint16_t b) +{ + return ((uint64_t)a << 48) | ((uint64_t)r << 32) | ((uint64_t)g << 16) | ((uint64_t)b << 0); +} + +static inline uint8_t clamp_8bit(int16_t col) +{ + if (col > 255) + return 255; + else if (col < 0) + return 0; + else + return (uint8_t)col; +} + +// ARGB8888 scaler is split in two: +// +// First, horizontal scaler is applied. +// Here, all 8-bit channels are expanded to 16-bit. Values are then shifted 7 to left to occupy 15 bits. +// The sign bit is kept empty as we have to do signed multiplication for the filter. +// A mulhi [(a * b) >> 16] is applied which loses some precision, but is very efficient for SIMD. +// It is accurate enough for 8-bit purposes. +// +// The fixed point 1.0 for filter is (1 << 14). After horizontal scale, the output is kept +// with 16-bit channels, and will now have 13 bits of precision as [(a * (1 << 14)) >> 16] is effectively a right shift by 2. +// +// Vertical scaler takes the 13 bit channels, and performs the same mulhi steps. +// Another 2 bits of precision is lost, which ends up as 11 bits. +// Scaling is now complete. Channels are shifted right by 3, and saturated into 8-bit values. +// +// The C version of scalers perform the exact same operations as the SIMD code for testing purposes. + +#if defined(__SSE2__) +void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int stride) +{ + const uint64_t *input = ctx->scaled.frame; + uint32_t *output = (uint32_t*)output_; + + const int16_t *filter_vert = ctx->vert.filter; + + for (int h = 0; h < ctx->out_height; h++, filter_vert += ctx->vert.filter_stride, output += stride >> 2) + { + const uint64_t *input_base = input + ctx->vert.filter_pos[h] * (ctx->scaled.stride >> 3); + + for (int w = 0; w < ctx->out_width; w++) + { + __m128i res = _mm_setzero_si128(); + + const uint64_t *input_base_y = input_base + w; + + size_t y; + for (y = 0; (y + 1) < ctx->vert.filter_len; y += 2, input_base_y += (ctx->scaled.stride >> 2)) + { + __m128i coeff = _mm_set_epi64x(filter_vert[y + 1] * 0x0001000100010001ll, filter_vert[y + 0] * 0x0001000100010001ll); + __m128i col = _mm_set_epi64x(input_base_y[ctx->scaled.stride >> 3], input_base_y[0]); + + res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res); + } + + for (; y < ctx->vert.filter_len; y++, input_base_y += (ctx->scaled.stride >> 3)) + { + __m128i coeff = _mm_set_epi64x(0, filter_vert[y] * 0x0001000100010001ll); + __m128i col = _mm_set_epi64x(0, input_base_y[0]); + + res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res); + } + + res = _mm_adds_epi16(_mm_srli_si128(res, 8), res); + res = _mm_srai_epi16(res, (7 - 2 - 2)); + + __m128i final = _mm_packus_epi16(res, res); + + output[w] = _mm_cvtsi128_si32(final); + } + } +} +#else +void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int stride) +{ + const uint64_t *input = ctx->scaled.frame; + uint32_t *output = output_; + + const int16_t *filter_vert = ctx->vert.filter; + + for (int h = 0; h < ctx->out_height; h++, filter_vert += ctx->vert.filter_stride, output += stride >> 2) + { + const uint64_t *input_base = input + ctx->vert.filter_pos[h] * (ctx->scaled.stride >> 3); + + for (int w = 0; w < ctx->out_width; w++) + { + int16_t res_a = 0; + int16_t res_r = 0; + int16_t res_g = 0; + int16_t res_b = 0; + + const uint64_t *input_base_y = input_base + w; + for (size_t y = 0; y < ctx->vert.filter_len; y++, input_base_y += (ctx->scaled.stride >> 3)) + { + uint64_t col = *input_base_y; + + int16_t a = (col >> 48) & 0xffff; + int16_t r = (col >> 32) & 0xffff; + int16_t g = (col >> 16) & 0xffff; + int16_t b = (col >> 0) & 0xffff; + + int16_t coeff = filter_vert[y]; + + res_a += (a * coeff) >> 16; + res_r += (r * coeff) >> 16; + res_g += (g * coeff) >> 16; + res_b += (b * coeff) >> 16; + } + + res_a >>= (7 - 2 - 2); + res_r >>= (7 - 2 - 2); + res_g >>= (7 - 2 - 2); + res_b >>= (7 - 2 - 2); + + output[w] = (clamp_8bit(res_a) << 24) | (clamp_8bit(res_r) << 16) | (clamp_8bit(res_g) << 8) | (clamp_8bit(res_b) << 0); + } + } +} +#endif + +#if defined(__SSE2__) +void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int stride) +{ + const uint32_t *input = (const uint32_t*)input_; + uint64_t *output = ctx->scaled.frame; + + for (int h = 0; h < ctx->scaled.height; h++, input += stride >> 2, output += ctx->scaled.stride >> 3) + { + const int16_t *filter_horiz = ctx->horiz.filter; + + for (int w = 0; w < ctx->scaled.width; w++, filter_horiz += ctx->horiz.filter_stride) + { + __m128i res = _mm_setzero_si128(); + + const uint32_t *input_base_x = input + ctx->horiz.filter_pos[w]; + + size_t x; + for (x = 0; (x + 1) < ctx->horiz.filter_len; x += 2) + { + __m128i coeff = _mm_set_epi64x(filter_horiz[x + 1] * 0x0001000100010001ll, filter_horiz[x + 0] * 0x0001000100010001ll); + + __m128i col = _mm_unpacklo_epi8(_mm_set_epi64x(0, + ((uint64_t)input_base_x[x + 1] << 32) | input_base_x[x + 0]), _mm_setzero_si128()); + + col = _mm_slli_epi16(col, 7); + res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res); + } + + for (; x < ctx->horiz.filter_len; x++) + { + __m128i coeff = _mm_set_epi64x(0, filter_horiz[x] * 0x0001000100010001ll); + __m128i col = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, 0, input_base_x[x]), _mm_setzero_si128()); + + col = _mm_slli_epi16(col, 7); + res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res); + } + + res = _mm_adds_epi16(_mm_srli_si128(res, 8), res); + output[w] = _mm_cvtsi128_si64(res); + } + } +} +#else +void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int stride) +{ + const uint32_t *input = input_; + uint64_t *output = ctx->scaled.frame; + + for (int h = 0; h < ctx->scaled.height; h++, input += stride >> 2, output += ctx->scaled.stride >> 3) + { + const int16_t *filter_horiz = ctx->horiz.filter; + + for (int w = 0; w < ctx->scaled.width; w++, filter_horiz += ctx->horiz.filter_stride) + { + const uint32_t *input_base_x = input + ctx->horiz.filter_pos[w]; + + int16_t res_a = 0; + int16_t res_r = 0; + int16_t res_g = 0; + int16_t res_b = 0; + + for (size_t x = 0; x < ctx->horiz.filter_len; x++) + { + uint32_t col = input_base_x[x]; + + int16_t a = (col >> (24 - 7)) & (0xff << 7); + int16_t r = (col >> (16 - 7)) & (0xff << 7); + int16_t g = (col >> ( 8 - 7)) & (0xff << 7); + int16_t b = (col << ( 0 + 7)) & (0xff << 7); + + int16_t coeff = filter_horiz[x]; + + res_a += (a * coeff) >> 16; + res_r += (r * coeff) >> 16; + res_g += (g * coeff) >> 16; + res_b += (b * coeff) >> 16; + } + + output[w] = build_argb64(res_a, res_r, res_g, res_b); + } + } +} +#endif + diff --git a/gfx/scaler/scaler_int.h b/gfx/scaler/scaler_int.h new file mode 100644 index 0000000000..1d20616edf --- /dev/null +++ b/gfx/scaler/scaler_int.h @@ -0,0 +1,10 @@ +#ifndef SCALER_INT_H__ +#define SCALER_INT_H__ + +#include "scaler.h" + +void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output, int stride); +void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input, int stride); + +#endif + diff --git a/gfx/sdl_gfx.c b/gfx/sdl_gfx.c index 686a32191c..2543d3bc66 100644 --- a/gfx/sdl_gfx.c +++ b/gfx/sdl_gfx.c @@ -19,6 +19,7 @@ #include #include "../general.h" #include "../input/rarch_sdl_input.h" +#include "scaler/scaler.h" #include "gfx_common.h" #include "gfx_context.h" @@ -53,6 +54,10 @@ typedef struct sdl_video uint8_t font_g; uint8_t font_b; #endif + + struct scaler_ctx scaler; + unsigned last_width; + unsigned last_height; } sdl_video_t; static void sdl_gfx_free(void *data) @@ -71,6 +76,8 @@ static void sdl_gfx_free(void *data) font_renderer_free(vid->font); #endif + scaler_ctx_gen_reset(&vid->scaler); + free(vid); } @@ -268,23 +275,18 @@ static void *sdl_gfx_init(const video_info_t *video, const input_driver_t **inpu if (!video->fullscreen) RARCH_LOG("Creating window @ %ux%u\n", video->width, video->height); - vid->render32 = video->rgb32 && !g_settings.video.force_16bit; + vid->render32 = !g_settings.video.force_16bit; vid->screen = SDL_SetVideoMode(video->width, video->height, vid->render32 ? 32 : 15, SDL_HWSURFACE | SDL_HWACCEL | SDL_DOUBLEBUF | (video->fullscreen ? SDL_FULLSCREEN : 0)); - if (!vid->screen && !g_settings.video.force_16bit && !video->rgb32) - { - vid->upsample = true; - vid->screen = SDL_SetVideoMode(video->width, video->height, 32, SDL_HWSURFACE | SDL_HWACCEL | SDL_DOUBLEBUF | (video->fullscreen ? SDL_FULLSCREEN : 0)); - RARCH_WARN("SDL: 15-bit colors failed, attempting 32-bit colors.\n"); - vid->render32 = true; - } - if (!vid->screen) { RARCH_ERR("Failed to init SDL surface: %s\n", SDL_GetError()); goto error; } + if (!video->rgb32 && vid->render32) + vid->upsample = true; + SDL_ShowCursor(SDL_DISABLE); #ifdef HAVE_X11 @@ -358,6 +360,10 @@ static void *sdl_gfx_init(const video_info_t *video, const input_driver_t **inpu vid->convert_32_func = convert_32bit_32bit_shift; } + vid->scaler.scaler_type = video->smooth ? SCALER_TYPE_BILINEAR : SCALER_TYPE_POINT; + vid->scaler.in_fmt = vid->render32 ? SCALER_FMT_ARGB8888 : SCALER_FMT_0RGB1555; + vid->scaler.out_fmt = vid->scaler.in_fmt; + return vid; error: @@ -375,13 +381,20 @@ static inline uint16_t conv_pixel_32_15(uint32_t pix, const SDL_PixelFormat *fmt static inline uint32_t conv_pixel_15_32(uint16_t pix, const SDL_PixelFormat *fmt) { - uint32_t r = ((pix >> 10) & 0x1f) << (fmt->Rshift + 3); - uint32_t g = ((pix >> 5) & 0x1f) << (fmt->Gshift + 3); - uint32_t b = ((pix >> 0) & 0x1f) << (fmt->Bshift + 3); - return r | g | b; + uint32_t r = (pix >> 10) & 0x1f; + uint32_t g = (pix >> 5) & 0x1f; + uint32_t b = (pix >> 0) & 0x1f; + + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + + return (r << fmt->Rshift) | (g << fmt->Gshift) | (b << fmt->Bshift); } -static void convert_32bit_15bit(uint16_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) +static void convert_32bit_15bit(uint16_t *out, unsigned outpitch, + const uint32_t *input, unsigned width, unsigned height, + unsigned pitch, const SDL_PixelFormat *fmt) { for (unsigned y = 0; y < height; y++) { @@ -393,7 +406,9 @@ static void convert_32bit_15bit(uint16_t *out, unsigned outpitch, const uint32_t } } -static void convert_15bit_32bit(uint32_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) +static void convert_15bit_32bit(uint32_t *out, unsigned outpitch, + const uint16_t *input, unsigned width, unsigned height, + unsigned pitch, const SDL_PixelFormat *fmt) { for (unsigned y = 0; y < height; y++) { @@ -405,7 +420,9 @@ static void convert_15bit_32bit(uint32_t *out, unsigned outpitch, const uint16_t } } -static void convert_15bit_15bit_direct(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) +static void convert_15bit_15bit_direct(uint16_t *out, unsigned outpitch, + const uint16_t *input, unsigned width, unsigned height, + unsigned pitch, const SDL_PixelFormat *fmt) { for (unsigned y = 0; y < height; y++) { @@ -416,7 +433,9 @@ static void convert_15bit_15bit_direct(uint16_t *out, unsigned outpitch, const u (void)fmt; } -static void convert_32bit_32bit_direct(uint32_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) +static void convert_32bit_32bit_direct(uint32_t *out, unsigned outpitch, + const uint32_t *input, unsigned width, unsigned height, + unsigned pitch, const SDL_PixelFormat *fmt) { for (unsigned y = 0; y < height; y++) { @@ -427,12 +446,15 @@ static void convert_32bit_32bit_direct(uint32_t *out, unsigned outpitch, const u (void)fmt; } -static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) +static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch, + const uint16_t *input, unsigned width, unsigned height, + unsigned pitch, const SDL_PixelFormat *fmt) { for (unsigned y = 0; y < height; y++) { - uint16_t *dest = out + ((y * outpitch) >> 1); + uint16_t *dest = out + ((y * outpitch) >> 1); const uint16_t *src = input + ((y * pitch) >> 1); + for (unsigned x = 0; x < width; x++) { uint16_t color = src[x]; @@ -444,12 +466,15 @@ static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch, const ui } } -static void convert_32bit_32bit_shift(uint32_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) +static void convert_32bit_32bit_shift(uint32_t *out, unsigned outpitch, + const uint32_t *input, unsigned width, unsigned height, + unsigned pitch, const SDL_PixelFormat *fmt) { for (unsigned y = 0; y < height; y++) { - uint32_t *dest = out + ((y * outpitch) >> 2); + uint32_t *dest = out + ((y * outpitch) >> 2); const uint32_t *src = input + ((y * pitch) >> 2); + for (unsigned x = 0; x < width; x++) { uint32_t color = src[x]; @@ -488,43 +513,51 @@ static bool sdl_gfx_frame(void *data, const void *frame, unsigned width, unsigne if (SDL_MUSTLOCK(vid->buffer)) SDL_LockSurface(vid->buffer); - // :( - // 15-bit -> 32-bit (Sometimes 15-bit won't work on "modern" OSes :\) + // 15-bit -> 32-bit. if (vid->upsample) convert_15bit_32bit((uint32_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint16_t*)frame, width, height, pitch, vid->screen->format); // 15-bit -> 15-bit else if (!vid->rgb32) vid->convert_15_func((uint16_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint16_t*)frame, width, height, pitch, vid->screen->format); // 32-bit -> 15-bit - else if (vid->rgb32 && g_settings.video.force_16bit) + else if (vid->rgb32 && !vid->render32) convert_32bit_15bit((uint16_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint32_t*)frame, width, height, pitch, vid->screen->format); // 32-bit -> 32-bit else vid->convert_32_func((uint32_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint32_t*)frame, width, height, pitch, vid->screen->format); + if (width != vid->last_width || height != vid->last_height) + { + vid->scaler.in_width = width; + vid->scaler.in_height = height; + vid->scaler.in_stride = vid->buffer->pitch; + + vid->scaler.out_width = vid->screen->w; + vid->scaler.out_height = vid->screen->h; + vid->scaler.out_stride = vid->screen->pitch; + + scaler_ctx_gen_filter(&vid->scaler); + + vid->last_width = width; + vid->last_height = height; + } + + if (SDL_MUSTLOCK(vid->screen)) + SDL_LockSurface(vid->screen); + + scaler_ctx_scale(&vid->scaler, vid->screen->pixels, vid->buffer->pixels); + if (SDL_MUSTLOCK(vid->buffer)) SDL_UnlockSurface(vid->buffer); - - SDL_Rect src = {0}; - src.x = 0; - src.y = 0; - src.w = width; - src.h = height; - - SDL_Rect dest = {0}; - dest.x = 0; - dest.y = 0; - dest.w = vid->screen->w; - dest.h = vid->screen->h; - - SDL_SoftStretch(vid->buffer, &src, vid->screen, &dest); + if (SDL_MUSTLOCK(vid->screen)) + SDL_UnlockSurface(vid->screen); if (msg) { - if ((!vid->rgb32 || g_settings.video.force_16bit) && !vid->upsample) - sdl_render_msg_15(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format); - else + if (vid->render32) sdl_render_msg_32(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format); + else + sdl_render_msg_15(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format); } char buf[128];