mirror of
https://github.com/libretro/RetroArch
synced 2025-01-26 09:35:21 +00:00
Add custom scaling routines.
Implemented for point, bilinear, lanczos. Partly optimized for SSE2.
This commit is contained in:
parent
22e43d4d84
commit
19fa31f17d
1
Makefile
1
Makefile
@ -126,6 +126,7 @@ endif
|
||||
|
||||
ifeq ($(HAVE_SDL), 1)
|
||||
OBJ += gfx/sdl_gfx.o gfx/context/sdl_ctx.o input/sdl_input.o audio/sdl_audio.o fifo_buffer.o
|
||||
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
|
||||
DEFINES += $(SDL_CFLAGS) $(BSD_LOCAL_INC)
|
||||
LIBS += $(SDL_LIBS)
|
||||
|
||||
|
@ -62,6 +62,7 @@ endif
|
||||
|
||||
ifeq ($(HAVE_SDL), 1)
|
||||
OBJ += gfx/sdl_gfx.o gfx/gl.o gfx/math/matrix.o gfx/fonts/freetype.o gfx/context/sdl_ctx.o input/sdl_input.o audio/sdl_audio.o fifo_buffer.o
|
||||
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
|
||||
LIBS += -lSDL
|
||||
DEFINES += -ISDL -DHAVE_SDL
|
||||
endif
|
||||
|
250
gfx/scaler/filter.c
Normal file
250
gfx/scaler/filter.c
Normal file
@ -0,0 +1,250 @@
|
||||
#include "filter.h"
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static bool allocate_filters(struct scaler_ctx *ctx)
|
||||
{
|
||||
ctx->horiz.filter = (int16_t*)scaler_alloc(sizeof(int16_t), ctx->horiz.filter_stride * ctx->out_width);
|
||||
ctx->horiz.filter_pos = (int*)scaler_alloc(sizeof(int), ctx->out_width);
|
||||
|
||||
ctx->vert.filter = (int16_t*)scaler_alloc(sizeof(int16_t), ctx->vert.filter_stride * ctx->out_height);
|
||||
ctx->vert.filter_pos = (int*)scaler_alloc(sizeof(int), ctx->out_height);
|
||||
|
||||
return ctx->horiz.filter && ctx->vert.filter;
|
||||
}
|
||||
|
||||
static void gen_filter_point_sub(struct scaler_filter *filter, int len, int pos, int step)
|
||||
{
|
||||
for (int i = 0; i < len; i++, pos += step)
|
||||
{
|
||||
filter->filter_pos[i] = pos >> 16;
|
||||
filter->filter[i] = FILTER_UNITY;
|
||||
}
|
||||
}
|
||||
|
||||
static bool gen_filter_point(struct scaler_ctx *ctx)
|
||||
{
|
||||
ctx->horiz.filter_len = 1;
|
||||
ctx->horiz.filter_stride = 1;
|
||||
ctx->vert.filter_len = 1;
|
||||
ctx->vert.filter_stride = 1;
|
||||
|
||||
if (!allocate_filters(ctx))
|
||||
return false;
|
||||
|
||||
int x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15);
|
||||
int x_step = (1 << 16) * ctx->in_width / ctx->out_width;
|
||||
int y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15);
|
||||
int y_step = (1 << 16) * ctx->in_height / ctx->out_height;
|
||||
|
||||
gen_filter_point_sub(&ctx->horiz, ctx->out_width, x_pos, x_step);
|
||||
gen_filter_point_sub(&ctx->vert, ctx->out_height, y_pos, y_step);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gen_filter_bilinear_sub(struct scaler_filter *filter, int len, int pos, int step)
|
||||
{
|
||||
for (int i = 0; i < len; i++, pos += step)
|
||||
{
|
||||
filter->filter_pos[i] = pos >> 16;
|
||||
filter->filter[i * 2 + 1] = (pos & 0xffff) >> 2;
|
||||
filter->filter[i * 2 + 0] = FILTER_UNITY - filter->filter[i * 2 + 1];
|
||||
}
|
||||
}
|
||||
|
||||
static bool gen_filter_bilinear(struct scaler_ctx *ctx)
|
||||
{
|
||||
ctx->horiz.filter_len = 2;
|
||||
ctx->horiz.filter_stride = 2;
|
||||
ctx->vert.filter_len = 2;
|
||||
ctx->vert.filter_stride = 2;
|
||||
|
||||
if (!allocate_filters(ctx))
|
||||
return false;
|
||||
|
||||
int x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15);
|
||||
int x_step = (1 << 16) * ctx->in_width / ctx->out_width;
|
||||
int y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15);
|
||||
int y_step = (1 << 16) * ctx->in_height / ctx->out_height;
|
||||
|
||||
gen_filter_bilinear_sub(&ctx->horiz, ctx->out_width, x_pos, x_step);
|
||||
gen_filter_bilinear_sub(&ctx->vert, ctx->out_height, y_pos, y_step);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline double sinc(double phase)
|
||||
{
|
||||
if (fabs(phase) < 0.0001)
|
||||
return 1.0;
|
||||
else
|
||||
return sin(phase) / phase;
|
||||
}
|
||||
|
||||
static inline unsigned next_pow2(unsigned v)
|
||||
{
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static void gen_filter_sinc_sub(struct scaler_filter *filter, int len, int pos, int step, double phase_mul)
|
||||
{
|
||||
const int sinc_size = filter->filter_len;
|
||||
|
||||
for (int i = 0; i < len; i++, pos += step)
|
||||
{
|
||||
filter->filter_pos[i] = pos >> 16;
|
||||
|
||||
//int16_t sinc_sum = 0;
|
||||
for (int j = 0; j < sinc_size; j++)
|
||||
{
|
||||
double sinc_phase = M_PI * ((double)((sinc_size << 15) + (pos & 0xffff)) / 0x10000 - j);
|
||||
double lanczos_phase = sinc_phase / ((sinc_size >> 1));
|
||||
int16_t sinc_val = FILTER_UNITY * sinc(sinc_phase * phase_mul) * sinc(lanczos_phase) * phase_mul;
|
||||
//sinc_sum += sinc_val;
|
||||
|
||||
filter->filter[i * sinc_size + j] = sinc_val;
|
||||
}
|
||||
//fprintf(stderr, "Sinc sum = %.3lf\n", (double)sinc_sum / FILTER_UNITY);
|
||||
}
|
||||
}
|
||||
|
||||
static bool gen_filter_sinc(struct scaler_ctx *ctx)
|
||||
{
|
||||
// Need to expand the filter when downsampling to get a proper low-pass effect.
|
||||
const int sinc_size = 8 * (ctx->in_width > ctx->out_width ? next_pow2(ctx->in_width / ctx->out_width) : 1);
|
||||
ctx->horiz.filter_len = sinc_size;
|
||||
ctx->horiz.filter_stride = sinc_size;
|
||||
ctx->vert.filter_len = sinc_size;
|
||||
ctx->vert.filter_stride = sinc_size;
|
||||
|
||||
if (!allocate_filters(ctx))
|
||||
return false;
|
||||
|
||||
int x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15) - (sinc_size << 15);
|
||||
int x_step = (1 << 16) * ctx->in_width / ctx->out_width;
|
||||
int y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15) - (sinc_size << 15);
|
||||
int y_step = (1 << 16) * ctx->in_height / ctx->out_height;
|
||||
|
||||
double phase_mul_horiz = ctx->in_width > ctx->out_width ? (double)ctx->out_width / ctx->in_width : 1.0;
|
||||
double phase_mul_vert = ctx->in_height > ctx->out_height ? (double)ctx->out_height / ctx->in_height : 1.0;
|
||||
|
||||
gen_filter_sinc_sub(&ctx->horiz, ctx->out_width, x_pos, x_step, phase_mul_horiz);
|
||||
gen_filter_sinc_sub(&ctx->vert, ctx->out_height, y_pos, y_step, phase_mul_vert);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool validate_filter(struct scaler_ctx *ctx)
|
||||
{
|
||||
int max_w_pos = ctx->in_width - ctx->horiz.filter_len;
|
||||
for (int i = 0; i < ctx->out_width; i++)
|
||||
{
|
||||
if (ctx->horiz.filter_pos[i] > max_w_pos || ctx->horiz.filter_pos[i] < 0)
|
||||
{
|
||||
fprintf(stderr, "Out X = %d => In X = %d\n", i, ctx->horiz.filter_pos[i]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int max_h_pos = ctx->in_height - ctx->vert.filter_len;
|
||||
for (int i = 0; i < ctx->out_height; i++)
|
||||
{
|
||||
if (ctx->vert.filter_pos[i] > max_h_pos || ctx->vert.filter_pos[i] < 0)
|
||||
{
|
||||
fprintf(stderr, "Out Y = %d => In Y = %d\n", i, ctx->vert.filter_pos[i]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void fixup_filter_sub(struct scaler_filter *filter, int out_len, int in_len)
|
||||
{
|
||||
int max_pos = in_len - filter->filter_len;
|
||||
|
||||
for (int i = 0; i < out_len; i++)
|
||||
{
|
||||
int postsample = filter->filter_pos[i] - max_pos;
|
||||
int presample = -filter->filter_pos[i];
|
||||
|
||||
if (postsample > 0)
|
||||
{
|
||||
filter->filter_pos[i] -= postsample;
|
||||
|
||||
int16_t *base_filter = filter->filter + i * filter->filter_stride;
|
||||
|
||||
if (postsample > (int)filter->filter_len)
|
||||
memset(base_filter, 0, filter->filter_len * sizeof(int16_t));
|
||||
else
|
||||
{
|
||||
memmove(base_filter + postsample, base_filter, (filter->filter_len - postsample) * sizeof(int16_t));
|
||||
memset(base_filter, 0, postsample * sizeof(int16_t));
|
||||
}
|
||||
}
|
||||
|
||||
if (presample > 0)
|
||||
{
|
||||
filter->filter_pos[i] += presample;
|
||||
int16_t *base_filter = filter->filter + i * filter->filter_stride;
|
||||
|
||||
if (presample > (int)filter->filter_len)
|
||||
memset(base_filter, 0, filter->filter_len * sizeof(int16_t));
|
||||
else
|
||||
{
|
||||
memmove(base_filter, base_filter + presample, (filter->filter_len - presample) * sizeof(int16_t));
|
||||
memset(base_filter + (filter->filter_len - presample), 0, presample * sizeof(int16_t));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Makes sure that we never sample outside our rectangle.
|
||||
static void fixup_filter(struct scaler_ctx *ctx)
|
||||
{
|
||||
fixup_filter_sub(&ctx->horiz, ctx->out_width, ctx->in_width);
|
||||
fixup_filter_sub(&ctx->vert, ctx->out_height, ctx->in_height);
|
||||
}
|
||||
|
||||
|
||||
bool scaler_gen_filter(struct scaler_ctx *ctx)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
switch (ctx->scaler_type)
|
||||
{
|
||||
case SCALER_TYPE_POINT:
|
||||
ret = gen_filter_point(ctx);
|
||||
break;
|
||||
|
||||
case SCALER_TYPE_BILINEAR:
|
||||
ret = gen_filter_bilinear(ctx);
|
||||
break;
|
||||
|
||||
case SCALER_TYPE_SINC:
|
||||
ret = gen_filter_sinc(ctx);
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
return false;
|
||||
|
||||
fixup_filter(ctx);
|
||||
|
||||
return validate_filter(ctx);
|
||||
}
|
||||
|
10
gfx/scaler/filter.h
Normal file
10
gfx/scaler/filter.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef FILTER_H__
|
||||
#define FILTER_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "scaler.h"
|
||||
|
||||
bool scaler_gen_filter(struct scaler_ctx *ctx);
|
||||
|
||||
#endif
|
||||
|
171
gfx/scaler/main.c
Normal file
171
gfx/scaler/main.c
Normal file
@ -0,0 +1,171 @@
|
||||
#include "scaler.h"
|
||||
#include <Imlib2.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <assert.h>
|
||||
#include <getopt.h>
|
||||
#include <string.h>
|
||||
|
||||
static float g_horiz_scale = 1.0f;
|
||||
static float g_vert_scale = 1.0f;
|
||||
|
||||
static enum scaler_type g_scaler_type = SCALER_TYPE_SINC;
|
||||
|
||||
static char *g_in_path;
|
||||
static char *g_out_path;
|
||||
|
||||
static void print_help(void)
|
||||
{
|
||||
fprintf(stderr, "Usage: scale [...options...]\n");
|
||||
fprintf(stderr, "\t-i/--input: Input file\n");
|
||||
fprintf(stderr, "\t-o/--output: Output file\n");
|
||||
fprintf(stderr, "\t-x/--xscale: Relative scale in X\n");
|
||||
fprintf(stderr, "\t-y/--yscale: Relative scale in Y\n");
|
||||
fprintf(stderr, "\t-s/--scale: Relative scale in both X/Y\n");
|
||||
fprintf(stderr, "\t-t/--type: Filter type. Valid ones are:\n");
|
||||
fprintf(stderr, "\t\tsinc, point, bilinear\n");
|
||||
fprintf(stderr, "\t-h/--help: Prints this help\n");
|
||||
}
|
||||
|
||||
static bool parse_args(int argc, char *argv[])
|
||||
{
|
||||
const struct option opts[] = {
|
||||
{ "xscale", 1, NULL, 'x' },
|
||||
{ "yscale", 1, NULL, 'y' },
|
||||
{ "scale", 1, NULL, 's' },
|
||||
{ "input", 1, NULL, 'i' },
|
||||
{ "output", 1, NULL, 'o' },
|
||||
{ "type", 1, NULL, 't' },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ NULL, 0, NULL, 0 },
|
||||
};
|
||||
|
||||
const char *optstring = "x:y:i:o:t:s:h";
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int c = getopt_long(argc, argv, optstring, opts, NULL);
|
||||
if (c == -1)
|
||||
break;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case 'h':
|
||||
print_help();
|
||||
exit(EXIT_SUCCESS);
|
||||
|
||||
case 's':
|
||||
g_horiz_scale = strtof(optarg, NULL);
|
||||
g_vert_scale = g_horiz_scale;
|
||||
break;
|
||||
|
||||
case 'x':
|
||||
g_horiz_scale = strtof(optarg, NULL);
|
||||
break;
|
||||
|
||||
case 'y':
|
||||
g_vert_scale = strtof(optarg, NULL);
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
g_in_path = strdup(optarg);
|
||||
break;
|
||||
|
||||
case 'o':
|
||||
g_out_path = strdup(optarg);
|
||||
break;
|
||||
|
||||
case '?':
|
||||
print_help();
|
||||
return false;
|
||||
|
||||
case 't':
|
||||
if (strcmp(optarg, "sinc") == 0)
|
||||
g_scaler_type = SCALER_TYPE_SINC;
|
||||
else if (strcmp(optarg, "bilinear") == 0)
|
||||
g_scaler_type = SCALER_TYPE_BILINEAR;
|
||||
else if (strcmp(optarg, "point") == 0)
|
||||
g_scaler_type = SCALER_TYPE_POINT;
|
||||
else
|
||||
{
|
||||
print_help();
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!g_in_path || !g_out_path)
|
||||
{
|
||||
print_help();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (optind < argc)
|
||||
{
|
||||
print_help();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (!parse_args(argc, argv))
|
||||
return EXIT_FAILURE;
|
||||
|
||||
Imlib_Image img = imlib_load_image(g_in_path);
|
||||
if (!img)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
imlib_context_set_image(img);
|
||||
|
||||
struct scaler_ctx ctx = {0};
|
||||
ctx.in_width = imlib_image_get_width();
|
||||
ctx.in_height = imlib_image_get_height();
|
||||
ctx.out_width = (int)(imlib_image_get_width() * g_horiz_scale);
|
||||
ctx.out_height = (int)(imlib_image_get_height() * g_vert_scale);
|
||||
ctx.in_stride = imlib_image_get_width() * sizeof(uint32_t);
|
||||
ctx.out_stride = (int)(imlib_image_get_width() * g_horiz_scale) * sizeof(uint32_t);
|
||||
ctx.in_fmt = SCALER_FMT_ARGB8888;
|
||||
ctx.out_fmt = SCALER_FMT_ARGB8888;
|
||||
ctx.scaler_type = g_scaler_type;
|
||||
|
||||
assert(scaler_ctx_gen_filter(&ctx));
|
||||
|
||||
uint32_t *scale_buf = (uint32_t*)calloc(sizeof(uint32_t), ctx.out_width * ctx.out_height);
|
||||
|
||||
//struct timespec tv[2];
|
||||
//clock_gettime(CLOCK_MONOTONIC, &tv[0]);
|
||||
scaler_ctx_scale(&ctx, scale_buf, imlib_image_get_data_for_reading_only());
|
||||
//clock_gettime(CLOCK_MONOTONIC, &tv[1]);
|
||||
|
||||
//double time_ms = (tv[1].tv_sec - tv[0].tv_sec) * 1000.0 + (tv[1].tv_nsec - tv[0].tv_nsec) / 1000000.0;
|
||||
//double ns_per_pix = (1000000.0 * time_ms) / (ctx.out_width * ctx.out_height);
|
||||
//printf("Time: %.3lf ms, %.3lf ns / pixel\n", time_ms, ns_per_pix);
|
||||
|
||||
Imlib_Image new_img = imlib_create_image_using_data(ctx.out_width, ctx.out_height,
|
||||
scale_buf);
|
||||
|
||||
imlib_free_image();
|
||||
imlib_context_set_image(new_img);
|
||||
|
||||
const char *fmt = strrchr(g_out_path, '.');
|
||||
if (fmt)
|
||||
fmt++;
|
||||
else
|
||||
fmt = "png";
|
||||
|
||||
imlib_image_set_format(fmt);
|
||||
imlib_save_image(g_out_path);
|
||||
imlib_free_image();
|
||||
|
||||
free(scale_buf);
|
||||
free(g_in_path);
|
||||
free(g_out_path);
|
||||
|
||||
scaler_ctx_gen_reset(&ctx);
|
||||
}
|
||||
|
131
gfx/scaler/pixconv.c
Normal file
131
gfx/scaler/pixconv.c
Normal file
@ -0,0 +1,131 @@
|
||||
#include "pixconv.h"
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
void conv_0rgb1555_argb8888(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
{
|
||||
const uint16_t *input = (const uint16_t*)input_;
|
||||
uint32_t *output = (uint32_t*)output_;
|
||||
|
||||
for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride >> 1)
|
||||
{
|
||||
for (int w = 0; w < width; w++)
|
||||
{
|
||||
uint32_t col = input[w];
|
||||
uint32_t r = (col >> 10) & 0x1f;
|
||||
uint32_t g = (col >> 5) & 0x1f;
|
||||
uint32_t b = (col >> 0) & 0x1f;
|
||||
r = (r << 3) | (r >> 2);
|
||||
g = (g << 3) | (g >> 2);
|
||||
b = (b << 3) | (b >> 2);
|
||||
|
||||
output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv_0rgb1555_bgr24(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
{
|
||||
const uint16_t *input = (const uint16_t*)input_;
|
||||
uint8_t *output = (uint8_t*)output_;
|
||||
|
||||
for (int h = 0; h < height; h++, output += out_stride, input += in_stride >> 1)
|
||||
{
|
||||
uint8_t *out = output;
|
||||
for (int w = 0; w < width; w++)
|
||||
{
|
||||
uint32_t col = input[w];
|
||||
uint32_t b = (col >> 0) & 0x1f;
|
||||
uint32_t g = (col >> 5) & 0x1f;
|
||||
uint32_t r = (col >> 10) & 0x1f;
|
||||
b = (b << 3) | (b >> 2);
|
||||
g = (g << 3) | (g >> 2);
|
||||
r = (r << 3) | (r >> 2);
|
||||
|
||||
*out++ = b;
|
||||
*out++ = g;
|
||||
*out++ = r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv_bgr24_argb8888(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
{
|
||||
const uint8_t *input = (const uint8_t*)input_;
|
||||
uint32_t *output = (uint32_t*)output_;
|
||||
|
||||
for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride)
|
||||
{
|
||||
const uint8_t *inp = input;
|
||||
for (int w = 0; w < width; w++)
|
||||
{
|
||||
uint32_t b = *inp++;
|
||||
uint32_t g = *inp++;
|
||||
uint32_t r = *inp++;
|
||||
output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv_argb8888_0rgb1555(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
{
|
||||
const uint32_t *input = (const uint32_t*)input_;
|
||||
uint16_t *output = (uint16_t*)output_;
|
||||
|
||||
for (int h = 0; h < height; h++, output += out_stride >> 1, input += in_stride >> 2)
|
||||
{
|
||||
for (int w = 0; w < width; w++)
|
||||
{
|
||||
uint32_t col = input[w];
|
||||
uint16_t r = (col >> 19) & 0x1f;
|
||||
uint16_t g = (col >> 11) & 0x1f;
|
||||
uint16_t b = (col >> 3) & 0x1f;
|
||||
output[w] = (r << 10) | (g << 5) | (b << 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv_argb8888_bgr24(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
{
|
||||
const uint32_t *input = (const uint32_t*)input_;
|
||||
uint8_t *output = (uint8_t*)output_;
|
||||
|
||||
for (int h = 0; h < height; h++, output += out_stride, input += in_stride >> 2)
|
||||
{
|
||||
uint8_t *out = output;
|
||||
for (int w = 0; w < width; w++)
|
||||
{
|
||||
uint32_t col = input[w];
|
||||
*out++ = (uint8_t)(col >> 0);
|
||||
*out++ = (uint8_t)(col >> 8);
|
||||
*out++ = (uint8_t)(col >> 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv_copy(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
{
|
||||
int copy_len = abs(out_stride);
|
||||
if (abs(in_stride) < copy_len)
|
||||
copy_len = abs(in_stride);
|
||||
|
||||
const uint8_t *input = (const uint8_t*)input_;
|
||||
uint8_t *output = (uint8_t*)output_;
|
||||
|
||||
for (int h = 0; h < height; h++, output += out_stride, input += in_stride)
|
||||
memcpy(output, input, copy_len);
|
||||
}
|
||||
|
29
gfx/scaler/pixconv.h
Normal file
29
gfx/scaler/pixconv.h
Normal file
@ -0,0 +1,29 @@
|
||||
#ifndef PIXCONV_H__
|
||||
#define PIXCONV_H__
|
||||
|
||||
void conv_0rgb1555_argb8888(void *output, const void *input,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
||||
void conv_bgr24_argb8888(void *output, const void *input,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
||||
void conv_argb8888_0rgb1555(void *output, const void *input,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
||||
void conv_argb8888_bgr24(void *output, const void *input,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
||||
void conv_0rgb1555_bgr24(void *output, const void *input,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
||||
void conv_copy(void *output, const void *input,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
||||
#endif
|
||||
|
195
gfx/scaler/scaler.c
Normal file
195
gfx/scaler/scaler.c
Normal file
@ -0,0 +1,195 @@
|
||||
#include "scaler.h"
|
||||
#include "scaler_int.h"
|
||||
#include "filter.h"
|
||||
#include "pixconv.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
// In case aligned allocs are needed later ...
|
||||
void *scaler_alloc(size_t elem_size, size_t size)
|
||||
{
|
||||
return calloc(elem_size, size);
|
||||
}
|
||||
|
||||
void scaler_free(void *ptr)
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
static bool allocate_frames(struct scaler_ctx *ctx)
|
||||
{
|
||||
ctx->scaled.stride = ((ctx->out_width + 7) & ~7) * sizeof(uint64_t);
|
||||
ctx->scaled.width = ctx->out_width;
|
||||
ctx->scaled.height = ctx->in_height;
|
||||
ctx->scaled.frame = (uint64_t*)scaler_alloc(sizeof(uint64_t), (ctx->scaled.stride * ctx->scaled.height) >> 3);
|
||||
if (!ctx->scaled.frame)
|
||||
return false;
|
||||
|
||||
if (ctx->in_fmt != SCALER_FMT_ARGB8888)
|
||||
{
|
||||
ctx->input.stride = ((ctx->in_width + 7) & ~7) * sizeof(uint32_t);
|
||||
ctx->input.frame = (uint32_t*)scaler_alloc(sizeof(uint32_t), (ctx->input.stride * ctx->in_height) >> 2);
|
||||
if (!ctx->input.frame)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->out_fmt != SCALER_FMT_ARGB8888)
|
||||
{
|
||||
ctx->output.stride = ((ctx->out_width + 7) & ~7) * sizeof(uint32_t);
|
||||
ctx->output.frame = (uint32_t*)scaler_alloc(sizeof(uint32_t), (ctx->output.stride * ctx->out_height) >> 2);
|
||||
if (!ctx->output.frame)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool set_direct_pix_conv(struct scaler_ctx *ctx)
|
||||
{
|
||||
if (ctx->in_fmt == ctx->out_fmt)
|
||||
ctx->direct_pixconv = conv_copy;
|
||||
else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_ARGB8888)
|
||||
ctx->direct_pixconv = conv_0rgb1555_argb8888;
|
||||
else if (ctx->in_fmt == SCALER_FMT_BGR24 && ctx->out_fmt == SCALER_FMT_ARGB8888)
|
||||
ctx->direct_pixconv = conv_bgr24_argb8888;
|
||||
else if (ctx->in_fmt == SCALER_FMT_ARGB8888 && ctx->out_fmt == SCALER_FMT_0RGB1555)
|
||||
ctx->direct_pixconv = conv_argb8888_0rgb1555;
|
||||
else if (ctx->in_fmt == SCALER_FMT_ARGB8888 && ctx->out_fmt == SCALER_FMT_BGR24)
|
||||
ctx->direct_pixconv = conv_argb8888_bgr24;
|
||||
else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_BGR24)
|
||||
ctx->direct_pixconv = conv_0rgb1555_bgr24;
|
||||
else
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool set_pix_conv(struct scaler_ctx *ctx)
|
||||
{
|
||||
switch (ctx->in_fmt)
|
||||
{
|
||||
case SCALER_FMT_ARGB8888:
|
||||
// No need to convert :D
|
||||
break;
|
||||
|
||||
case SCALER_FMT_0RGB1555:
|
||||
ctx->in_pixconv = conv_0rgb1555_argb8888;
|
||||
break;
|
||||
|
||||
case SCALER_FMT_BGR24:
|
||||
ctx->in_pixconv = conv_bgr24_argb8888;
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (ctx->out_fmt)
|
||||
{
|
||||
case SCALER_FMT_ARGB8888:
|
||||
// No need to convert :D
|
||||
break;
|
||||
|
||||
case SCALER_FMT_0RGB1555:
|
||||
ctx->out_pixconv = conv_argb8888_0rgb1555;
|
||||
break;
|
||||
|
||||
case SCALER_FMT_BGR24:
|
||||
ctx->out_pixconv = conv_argb8888_bgr24;
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scaler_ctx_gen_filter(struct scaler_ctx *ctx)
|
||||
{
|
||||
scaler_ctx_gen_reset(ctx);
|
||||
|
||||
if (ctx->in_width == ctx->out_width && ctx->in_height == ctx->out_height)
|
||||
ctx->unscaled = true; // Only pixel format conversion ...
|
||||
else
|
||||
{
|
||||
ctx->scaler_horiz = scaler_argb8888_horiz;
|
||||
ctx->scaler_vert = scaler_argb8888_vert;
|
||||
ctx->unscaled = false;
|
||||
}
|
||||
|
||||
if (!allocate_frames(ctx))
|
||||
return false;
|
||||
|
||||
if (ctx->unscaled)
|
||||
{
|
||||
if (!set_direct_pix_conv(ctx))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!set_pix_conv(ctx))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ctx->unscaled && !scaler_gen_filter(ctx))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void scaler_ctx_gen_reset(struct scaler_ctx *ctx)
|
||||
{
|
||||
scaler_free(ctx->horiz.filter);
|
||||
scaler_free(ctx->horiz.filter_pos);
|
||||
scaler_free(ctx->vert.filter);
|
||||
scaler_free(ctx->vert.filter_pos);
|
||||
scaler_free(ctx->scaled.frame);
|
||||
scaler_free(ctx->input.frame);
|
||||
scaler_free(ctx->output.frame);
|
||||
|
||||
memset(&ctx->horiz, 0, sizeof(ctx->horiz));
|
||||
memset(&ctx->vert, 0, sizeof(ctx->vert));
|
||||
memset(&ctx->scaled, 0, sizeof(ctx->scaled));
|
||||
memset(&ctx->input, 0, sizeof(ctx->input));
|
||||
memset(&ctx->output, 0, sizeof(ctx->output));
|
||||
}
|
||||
|
||||
void scaler_ctx_scale(const struct scaler_ctx *ctx,
|
||||
void *output, const void *input)
|
||||
{
|
||||
if (ctx->unscaled)
|
||||
{
|
||||
ctx->direct_pixconv(output, input,
|
||||
ctx->out_width, ctx->out_height,
|
||||
ctx->out_stride, ctx->in_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ctx->in_fmt != SCALER_FMT_ARGB8888)
|
||||
{
|
||||
ctx->in_pixconv(ctx->input.frame, input,
|
||||
ctx->in_width, ctx->in_height,
|
||||
ctx->input.stride, ctx->in_stride);
|
||||
|
||||
ctx->scaler_horiz(ctx, ctx->input.frame, ctx->input.stride);
|
||||
}
|
||||
else
|
||||
ctx->scaler_horiz(ctx, input, ctx->in_stride);
|
||||
|
||||
if (ctx->out_fmt != SCALER_FMT_ARGB8888)
|
||||
{
|
||||
ctx->scaler_vert(ctx, ctx->output.frame, ctx->output.stride);
|
||||
|
||||
ctx->out_pixconv(output, ctx->output.frame,
|
||||
ctx->out_width, ctx->out_height,
|
||||
ctx->out_stride, ctx->output.stride);
|
||||
}
|
||||
else
|
||||
ctx->scaler_vert(ctx, output, ctx->out_stride);
|
||||
}
|
||||
}
|
||||
|
||||
|
90
gfx/scaler/scaler.h
Normal file
90
gfx/scaler/scaler.h
Normal file
@ -0,0 +1,90 @@
|
||||
#ifndef SCALER_H__
|
||||
#define SCALER_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define FILTER_UNITY (1 << 14)
|
||||
|
||||
enum scaler_pix_fmt
|
||||
{
|
||||
SCALER_FMT_ARGB8888 = 0,
|
||||
SCALER_FMT_0RGB1555,
|
||||
SCALER_FMT_BGR24
|
||||
};
|
||||
|
||||
enum scaler_type
|
||||
{
|
||||
SCALER_TYPE_UNKNOWN = 0,
|
||||
SCALER_TYPE_POINT,
|
||||
SCALER_TYPE_BILINEAR,
|
||||
SCALER_TYPE_SINC
|
||||
};
|
||||
|
||||
struct scaler_filter
|
||||
{
|
||||
int16_t *filter;
|
||||
size_t filter_len;
|
||||
size_t filter_stride;
|
||||
int *filter_pos;
|
||||
};
|
||||
|
||||
struct scaler_ctx
|
||||
{
|
||||
int in_width;
|
||||
int in_height;
|
||||
int in_stride;
|
||||
|
||||
int out_width;
|
||||
int out_height;
|
||||
int out_stride;
|
||||
|
||||
enum scaler_pix_fmt in_fmt;
|
||||
enum scaler_pix_fmt out_fmt;
|
||||
enum scaler_type scaler_type;
|
||||
|
||||
void (*scaler_horiz)(const struct scaler_ctx*,
|
||||
const void*, int);
|
||||
void (*scaler_vert)(const struct scaler_ctx*,
|
||||
void*, int);
|
||||
|
||||
void (*in_pixconv)(void*, const void*, int, int, int, int);
|
||||
void (*out_pixconv)(void*, const void*, int, int, int, int);
|
||||
void (*direct_pixconv)(void*, const void*, int, int, int, int);
|
||||
|
||||
bool unscaled;
|
||||
struct scaler_filter horiz, vert;
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t *frame;
|
||||
int stride;
|
||||
} input;
|
||||
|
||||
struct
|
||||
{
|
||||
uint64_t *frame;
|
||||
int width;
|
||||
int height;
|
||||
int stride;
|
||||
} scaled;
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t *frame;
|
||||
int stride;
|
||||
} output;
|
||||
};
|
||||
|
||||
bool scaler_ctx_gen_filter(struct scaler_ctx *ctx);
|
||||
void scaler_ctx_gen_reset(struct scaler_ctx *ctx);
|
||||
|
||||
void scaler_ctx_scale(const struct scaler_ctx *ctx,
|
||||
void *output, const void *input);
|
||||
|
||||
void *scaler_alloc(size_t elem_size, size_t size);
|
||||
void scaler_free(void *ptr);
|
||||
|
||||
#endif
|
||||
|
214
gfx/scaler/scaler_int.c
Normal file
214
gfx/scaler/scaler_int.c
Normal file
@ -0,0 +1,214 @@
|
||||
#include "scaler_int.h"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
static inline uint64_t build_argb64(uint16_t a, uint16_t r, uint16_t g, uint16_t b)
|
||||
{
|
||||
return ((uint64_t)a << 48) | ((uint64_t)r << 32) | ((uint64_t)g << 16) | ((uint64_t)b << 0);
|
||||
}
|
||||
|
||||
static inline uint8_t clamp_8bit(int16_t col)
|
||||
{
|
||||
if (col > 255)
|
||||
return 255;
|
||||
else if (col < 0)
|
||||
return 0;
|
||||
else
|
||||
return (uint8_t)col;
|
||||
}
|
||||
|
||||
// ARGB8888 scaler is split in two:
|
||||
//
|
||||
// First, horizontal scaler is applied.
|
||||
// Here, all 8-bit channels are expanded to 16-bit. Values are then shifted 7 to left to occupy 15 bits.
|
||||
// The sign bit is kept empty as we have to do signed multiplication for the filter.
|
||||
// A mulhi [(a * b) >> 16] is applied which loses some precision, but is very efficient for SIMD.
|
||||
// It is accurate enough for 8-bit purposes.
|
||||
//
|
||||
// The fixed point 1.0 for filter is (1 << 14). After horizontal scale, the output is kept
|
||||
// with 16-bit channels, and will now have 13 bits of precision as [(a * (1 << 14)) >> 16] is effectively a right shift by 2.
|
||||
//
|
||||
// Vertical scaler takes the 13 bit channels, and performs the same mulhi steps.
|
||||
// Another 2 bits of precision is lost, which ends up as 11 bits.
|
||||
// Scaling is now complete. Channels are shifted right by 3, and saturated into 8-bit values.
|
||||
//
|
||||
// The C version of scalers perform the exact same operations as the SIMD code for testing purposes.
|
||||
|
||||
#if defined(__SSE2__)
|
||||
void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int stride)
|
||||
{
|
||||
const uint64_t *input = ctx->scaled.frame;
|
||||
uint32_t *output = (uint32_t*)output_;
|
||||
|
||||
const int16_t *filter_vert = ctx->vert.filter;
|
||||
|
||||
for (int h = 0; h < ctx->out_height; h++, filter_vert += ctx->vert.filter_stride, output += stride >> 2)
|
||||
{
|
||||
const uint64_t *input_base = input + ctx->vert.filter_pos[h] * (ctx->scaled.stride >> 3);
|
||||
|
||||
for (int w = 0; w < ctx->out_width; w++)
|
||||
{
|
||||
__m128i res = _mm_setzero_si128();
|
||||
|
||||
const uint64_t *input_base_y = input_base + w;
|
||||
|
||||
size_t y;
|
||||
for (y = 0; (y + 1) < ctx->vert.filter_len; y += 2, input_base_y += (ctx->scaled.stride >> 2))
|
||||
{
|
||||
__m128i coeff = _mm_set_epi64x(filter_vert[y + 1] * 0x0001000100010001ll, filter_vert[y + 0] * 0x0001000100010001ll);
|
||||
__m128i col = _mm_set_epi64x(input_base_y[ctx->scaled.stride >> 3], input_base_y[0]);
|
||||
|
||||
res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res);
|
||||
}
|
||||
|
||||
for (; y < ctx->vert.filter_len; y++, input_base_y += (ctx->scaled.stride >> 3))
|
||||
{
|
||||
__m128i coeff = _mm_set_epi64x(0, filter_vert[y] * 0x0001000100010001ll);
|
||||
__m128i col = _mm_set_epi64x(0, input_base_y[0]);
|
||||
|
||||
res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res);
|
||||
}
|
||||
|
||||
res = _mm_adds_epi16(_mm_srli_si128(res, 8), res);
|
||||
res = _mm_srai_epi16(res, (7 - 2 - 2));
|
||||
|
||||
__m128i final = _mm_packus_epi16(res, res);
|
||||
|
||||
output[w] = _mm_cvtsi128_si32(final);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int stride)
|
||||
{
|
||||
const uint64_t *input = ctx->scaled.frame;
|
||||
uint32_t *output = output_;
|
||||
|
||||
const int16_t *filter_vert = ctx->vert.filter;
|
||||
|
||||
for (int h = 0; h < ctx->out_height; h++, filter_vert += ctx->vert.filter_stride, output += stride >> 2)
|
||||
{
|
||||
const uint64_t *input_base = input + ctx->vert.filter_pos[h] * (ctx->scaled.stride >> 3);
|
||||
|
||||
for (int w = 0; w < ctx->out_width; w++)
|
||||
{
|
||||
int16_t res_a = 0;
|
||||
int16_t res_r = 0;
|
||||
int16_t res_g = 0;
|
||||
int16_t res_b = 0;
|
||||
|
||||
const uint64_t *input_base_y = input_base + w;
|
||||
for (size_t y = 0; y < ctx->vert.filter_len; y++, input_base_y += (ctx->scaled.stride >> 3))
|
||||
{
|
||||
uint64_t col = *input_base_y;
|
||||
|
||||
int16_t a = (col >> 48) & 0xffff;
|
||||
int16_t r = (col >> 32) & 0xffff;
|
||||
int16_t g = (col >> 16) & 0xffff;
|
||||
int16_t b = (col >> 0) & 0xffff;
|
||||
|
||||
int16_t coeff = filter_vert[y];
|
||||
|
||||
res_a += (a * coeff) >> 16;
|
||||
res_r += (r * coeff) >> 16;
|
||||
res_g += (g * coeff) >> 16;
|
||||
res_b += (b * coeff) >> 16;
|
||||
}
|
||||
|
||||
res_a >>= (7 - 2 - 2);
|
||||
res_r >>= (7 - 2 - 2);
|
||||
res_g >>= (7 - 2 - 2);
|
||||
res_b >>= (7 - 2 - 2);
|
||||
|
||||
output[w] = (clamp_8bit(res_a) << 24) | (clamp_8bit(res_r) << 16) | (clamp_8bit(res_g) << 8) | (clamp_8bit(res_b) << 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__)
|
||||
void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int stride)
|
||||
{
|
||||
const uint32_t *input = (const uint32_t*)input_;
|
||||
uint64_t *output = ctx->scaled.frame;
|
||||
|
||||
for (int h = 0; h < ctx->scaled.height; h++, input += stride >> 2, output += ctx->scaled.stride >> 3)
|
||||
{
|
||||
const int16_t *filter_horiz = ctx->horiz.filter;
|
||||
|
||||
for (int w = 0; w < ctx->scaled.width; w++, filter_horiz += ctx->horiz.filter_stride)
|
||||
{
|
||||
__m128i res = _mm_setzero_si128();
|
||||
|
||||
const uint32_t *input_base_x = input + ctx->horiz.filter_pos[w];
|
||||
|
||||
size_t x;
|
||||
for (x = 0; (x + 1) < ctx->horiz.filter_len; x += 2)
|
||||
{
|
||||
__m128i coeff = _mm_set_epi64x(filter_horiz[x + 1] * 0x0001000100010001ll, filter_horiz[x + 0] * 0x0001000100010001ll);
|
||||
|
||||
__m128i col = _mm_unpacklo_epi8(_mm_set_epi64x(0,
|
||||
((uint64_t)input_base_x[x + 1] << 32) | input_base_x[x + 0]), _mm_setzero_si128());
|
||||
|
||||
col = _mm_slli_epi16(col, 7);
|
||||
res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res);
|
||||
}
|
||||
|
||||
for (; x < ctx->horiz.filter_len; x++)
|
||||
{
|
||||
__m128i coeff = _mm_set_epi64x(0, filter_horiz[x] * 0x0001000100010001ll);
|
||||
__m128i col = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, 0, input_base_x[x]), _mm_setzero_si128());
|
||||
|
||||
col = _mm_slli_epi16(col, 7);
|
||||
res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res);
|
||||
}
|
||||
|
||||
res = _mm_adds_epi16(_mm_srli_si128(res, 8), res);
|
||||
output[w] = _mm_cvtsi128_si64(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int stride)
|
||||
{
|
||||
const uint32_t *input = input_;
|
||||
uint64_t *output = ctx->scaled.frame;
|
||||
|
||||
for (int h = 0; h < ctx->scaled.height; h++, input += stride >> 2, output += ctx->scaled.stride >> 3)
|
||||
{
|
||||
const int16_t *filter_horiz = ctx->horiz.filter;
|
||||
|
||||
for (int w = 0; w < ctx->scaled.width; w++, filter_horiz += ctx->horiz.filter_stride)
|
||||
{
|
||||
const uint32_t *input_base_x = input + ctx->horiz.filter_pos[w];
|
||||
|
||||
int16_t res_a = 0;
|
||||
int16_t res_r = 0;
|
||||
int16_t res_g = 0;
|
||||
int16_t res_b = 0;
|
||||
|
||||
for (size_t x = 0; x < ctx->horiz.filter_len; x++)
|
||||
{
|
||||
uint32_t col = input_base_x[x];
|
||||
|
||||
int16_t a = (col >> (24 - 7)) & (0xff << 7);
|
||||
int16_t r = (col >> (16 - 7)) & (0xff << 7);
|
||||
int16_t g = (col >> ( 8 - 7)) & (0xff << 7);
|
||||
int16_t b = (col << ( 0 + 7)) & (0xff << 7);
|
||||
|
||||
int16_t coeff = filter_horiz[x];
|
||||
|
||||
res_a += (a * coeff) >> 16;
|
||||
res_r += (r * coeff) >> 16;
|
||||
res_g += (g * coeff) >> 16;
|
||||
res_b += (b * coeff) >> 16;
|
||||
}
|
||||
|
||||
output[w] = build_argb64(res_a, res_r, res_g, res_b);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
10
gfx/scaler/scaler_int.h
Normal file
10
gfx/scaler/scaler_int.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef SCALER_INT_H__
|
||||
#define SCALER_INT_H__
|
||||
|
||||
#include "scaler.h"
|
||||
|
||||
void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output, int stride);
|
||||
void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input, int stride);
|
||||
|
||||
#endif
|
||||
|
115
gfx/sdl_gfx.c
115
gfx/sdl_gfx.c
@ -19,6 +19,7 @@
|
||||
#include <string.h>
|
||||
#include "../general.h"
|
||||
#include "../input/rarch_sdl_input.h"
|
||||
#include "scaler/scaler.h"
|
||||
#include "gfx_common.h"
|
||||
#include "gfx_context.h"
|
||||
|
||||
@ -53,6 +54,10 @@ typedef struct sdl_video
|
||||
uint8_t font_g;
|
||||
uint8_t font_b;
|
||||
#endif
|
||||
|
||||
struct scaler_ctx scaler;
|
||||
unsigned last_width;
|
||||
unsigned last_height;
|
||||
} sdl_video_t;
|
||||
|
||||
static void sdl_gfx_free(void *data)
|
||||
@ -71,6 +76,8 @@ static void sdl_gfx_free(void *data)
|
||||
font_renderer_free(vid->font);
|
||||
#endif
|
||||
|
||||
scaler_ctx_gen_reset(&vid->scaler);
|
||||
|
||||
free(vid);
|
||||
}
|
||||
|
||||
@ -268,23 +275,18 @@ static void *sdl_gfx_init(const video_info_t *video, const input_driver_t **inpu
|
||||
if (!video->fullscreen)
|
||||
RARCH_LOG("Creating window @ %ux%u\n", video->width, video->height);
|
||||
|
||||
vid->render32 = video->rgb32 && !g_settings.video.force_16bit;
|
||||
vid->render32 = !g_settings.video.force_16bit;
|
||||
vid->screen = SDL_SetVideoMode(video->width, video->height, vid->render32 ? 32 : 15, SDL_HWSURFACE | SDL_HWACCEL | SDL_DOUBLEBUF | (video->fullscreen ? SDL_FULLSCREEN : 0));
|
||||
|
||||
if (!vid->screen && !g_settings.video.force_16bit && !video->rgb32)
|
||||
{
|
||||
vid->upsample = true;
|
||||
vid->screen = SDL_SetVideoMode(video->width, video->height, 32, SDL_HWSURFACE | SDL_HWACCEL | SDL_DOUBLEBUF | (video->fullscreen ? SDL_FULLSCREEN : 0));
|
||||
RARCH_WARN("SDL: 15-bit colors failed, attempting 32-bit colors.\n");
|
||||
vid->render32 = true;
|
||||
}
|
||||
|
||||
if (!vid->screen)
|
||||
{
|
||||
RARCH_ERR("Failed to init SDL surface: %s\n", SDL_GetError());
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (!video->rgb32 && vid->render32)
|
||||
vid->upsample = true;
|
||||
|
||||
SDL_ShowCursor(SDL_DISABLE);
|
||||
|
||||
#ifdef HAVE_X11
|
||||
@ -358,6 +360,10 @@ static void *sdl_gfx_init(const video_info_t *video, const input_driver_t **inpu
|
||||
vid->convert_32_func = convert_32bit_32bit_shift;
|
||||
}
|
||||
|
||||
vid->scaler.scaler_type = video->smooth ? SCALER_TYPE_BILINEAR : SCALER_TYPE_POINT;
|
||||
vid->scaler.in_fmt = vid->render32 ? SCALER_FMT_ARGB8888 : SCALER_FMT_0RGB1555;
|
||||
vid->scaler.out_fmt = vid->scaler.in_fmt;
|
||||
|
||||
return vid;
|
||||
|
||||
error:
|
||||
@ -375,13 +381,20 @@ static inline uint16_t conv_pixel_32_15(uint32_t pix, const SDL_PixelFormat *fmt
|
||||
|
||||
static inline uint32_t conv_pixel_15_32(uint16_t pix, const SDL_PixelFormat *fmt)
|
||||
{
|
||||
uint32_t r = ((pix >> 10) & 0x1f) << (fmt->Rshift + 3);
|
||||
uint32_t g = ((pix >> 5) & 0x1f) << (fmt->Gshift + 3);
|
||||
uint32_t b = ((pix >> 0) & 0x1f) << (fmt->Bshift + 3);
|
||||
return r | g | b;
|
||||
uint32_t r = (pix >> 10) & 0x1f;
|
||||
uint32_t g = (pix >> 5) & 0x1f;
|
||||
uint32_t b = (pix >> 0) & 0x1f;
|
||||
|
||||
r = (r << 3) | (r >> 2);
|
||||
g = (g << 3) | (g >> 2);
|
||||
b = (b << 3) | (b >> 2);
|
||||
|
||||
return (r << fmt->Rshift) | (g << fmt->Gshift) | (b << fmt->Bshift);
|
||||
}
|
||||
|
||||
static void convert_32bit_15bit(uint16_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
static void convert_32bit_15bit(uint16_t *out, unsigned outpitch,
|
||||
const uint32_t *input, unsigned width, unsigned height,
|
||||
unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
{
|
||||
for (unsigned y = 0; y < height; y++)
|
||||
{
|
||||
@ -393,7 +406,9 @@ static void convert_32bit_15bit(uint16_t *out, unsigned outpitch, const uint32_t
|
||||
}
|
||||
}
|
||||
|
||||
static void convert_15bit_32bit(uint32_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
static void convert_15bit_32bit(uint32_t *out, unsigned outpitch,
|
||||
const uint16_t *input, unsigned width, unsigned height,
|
||||
unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
{
|
||||
for (unsigned y = 0; y < height; y++)
|
||||
{
|
||||
@ -405,7 +420,9 @@ static void convert_15bit_32bit(uint32_t *out, unsigned outpitch, const uint16_t
|
||||
}
|
||||
}
|
||||
|
||||
static void convert_15bit_15bit_direct(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
static void convert_15bit_15bit_direct(uint16_t *out, unsigned outpitch,
|
||||
const uint16_t *input, unsigned width, unsigned height,
|
||||
unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
{
|
||||
for (unsigned y = 0; y < height; y++)
|
||||
{
|
||||
@ -416,7 +433,9 @@ static void convert_15bit_15bit_direct(uint16_t *out, unsigned outpitch, const u
|
||||
(void)fmt;
|
||||
}
|
||||
|
||||
static void convert_32bit_32bit_direct(uint32_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
static void convert_32bit_32bit_direct(uint32_t *out, unsigned outpitch,
|
||||
const uint32_t *input, unsigned width, unsigned height,
|
||||
unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
{
|
||||
for (unsigned y = 0; y < height; y++)
|
||||
{
|
||||
@ -427,12 +446,15 @@ static void convert_32bit_32bit_direct(uint32_t *out, unsigned outpitch, const u
|
||||
(void)fmt;
|
||||
}
|
||||
|
||||
static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch,
|
||||
const uint16_t *input, unsigned width, unsigned height,
|
||||
unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
{
|
||||
for (unsigned y = 0; y < height; y++)
|
||||
{
|
||||
uint16_t *dest = out + ((y * outpitch) >> 1);
|
||||
uint16_t *dest = out + ((y * outpitch) >> 1);
|
||||
const uint16_t *src = input + ((y * pitch) >> 1);
|
||||
|
||||
for (unsigned x = 0; x < width; x++)
|
||||
{
|
||||
uint16_t color = src[x];
|
||||
@ -444,12 +466,15 @@ static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch, const ui
|
||||
}
|
||||
}
|
||||
|
||||
static void convert_32bit_32bit_shift(uint32_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
static void convert_32bit_32bit_shift(uint32_t *out, unsigned outpitch,
|
||||
const uint32_t *input, unsigned width, unsigned height,
|
||||
unsigned pitch, const SDL_PixelFormat *fmt)
|
||||
{
|
||||
for (unsigned y = 0; y < height; y++)
|
||||
{
|
||||
uint32_t *dest = out + ((y * outpitch) >> 2);
|
||||
uint32_t *dest = out + ((y * outpitch) >> 2);
|
||||
const uint32_t *src = input + ((y * pitch) >> 2);
|
||||
|
||||
for (unsigned x = 0; x < width; x++)
|
||||
{
|
||||
uint32_t color = src[x];
|
||||
@ -488,43 +513,51 @@ static bool sdl_gfx_frame(void *data, const void *frame, unsigned width, unsigne
|
||||
if (SDL_MUSTLOCK(vid->buffer))
|
||||
SDL_LockSurface(vid->buffer);
|
||||
|
||||
// :(
|
||||
// 15-bit -> 32-bit (Sometimes 15-bit won't work on "modern" OSes :\)
|
||||
// 15-bit -> 32-bit.
|
||||
if (vid->upsample)
|
||||
convert_15bit_32bit((uint32_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint16_t*)frame, width, height, pitch, vid->screen->format);
|
||||
// 15-bit -> 15-bit
|
||||
else if (!vid->rgb32)
|
||||
vid->convert_15_func((uint16_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint16_t*)frame, width, height, pitch, vid->screen->format);
|
||||
// 32-bit -> 15-bit
|
||||
else if (vid->rgb32 && g_settings.video.force_16bit)
|
||||
else if (vid->rgb32 && !vid->render32)
|
||||
convert_32bit_15bit((uint16_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint32_t*)frame, width, height, pitch, vid->screen->format);
|
||||
// 32-bit -> 32-bit
|
||||
else
|
||||
vid->convert_32_func((uint32_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint32_t*)frame, width, height, pitch, vid->screen->format);
|
||||
|
||||
if (width != vid->last_width || height != vid->last_height)
|
||||
{
|
||||
vid->scaler.in_width = width;
|
||||
vid->scaler.in_height = height;
|
||||
vid->scaler.in_stride = vid->buffer->pitch;
|
||||
|
||||
vid->scaler.out_width = vid->screen->w;
|
||||
vid->scaler.out_height = vid->screen->h;
|
||||
vid->scaler.out_stride = vid->screen->pitch;
|
||||
|
||||
scaler_ctx_gen_filter(&vid->scaler);
|
||||
|
||||
vid->last_width = width;
|
||||
vid->last_height = height;
|
||||
}
|
||||
|
||||
if (SDL_MUSTLOCK(vid->screen))
|
||||
SDL_LockSurface(vid->screen);
|
||||
|
||||
scaler_ctx_scale(&vid->scaler, vid->screen->pixels, vid->buffer->pixels);
|
||||
|
||||
if (SDL_MUSTLOCK(vid->buffer))
|
||||
SDL_UnlockSurface(vid->buffer);
|
||||
|
||||
SDL_Rect src = {0};
|
||||
src.x = 0;
|
||||
src.y = 0;
|
||||
src.w = width;
|
||||
src.h = height;
|
||||
|
||||
SDL_Rect dest = {0};
|
||||
dest.x = 0;
|
||||
dest.y = 0;
|
||||
dest.w = vid->screen->w;
|
||||
dest.h = vid->screen->h;
|
||||
|
||||
SDL_SoftStretch(vid->buffer, &src, vid->screen, &dest);
|
||||
if (SDL_MUSTLOCK(vid->screen))
|
||||
SDL_UnlockSurface(vid->screen);
|
||||
|
||||
if (msg)
|
||||
{
|
||||
if ((!vid->rgb32 || g_settings.video.force_16bit) && !vid->upsample)
|
||||
sdl_render_msg_15(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format);
|
||||
else
|
||||
if (vid->render32)
|
||||
sdl_render_msg_32(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format);
|
||||
else
|
||||
sdl_render_msg_15(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format);
|
||||
}
|
||||
|
||||
char buf[128];
|
||||
|
Loading…
x
Reference in New Issue
Block a user