Merge pull request #11480 from jdgleaver/scale2x-refactor

Refactor Scale2x video filter (crash fix + slight performance improvement)
This commit is contained in:
Autechre 2020-10-24 01:33:41 +02:00 committed by GitHub
commit d9af8ea9ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,6 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2011-2017 - Daniel De Matteis
* Copyright (C) 2011-2018 - Daniel De Matteis
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
@ -18,6 +18,7 @@
#include "softfilter.h"
#include <stdlib.h>
#include <string.h>
#ifdef RARCH_INTERNAL
#define softfilter_get_implementation scale2x_get_implementation
@ -25,8 +26,6 @@
#define filter_data scale2x_filter_data
#endif
#define SCALE2X_SCALE 2
struct softfilter_thread_data
{
void *out_data;
@ -47,67 +46,6 @@ struct filter_data
unsigned in_fmt;
};
#define SCALE2X_GENERIC(typename_t, width, height, first, last, src, src_stride, dst, dst_stride, out0, out1) \
for (y = 0; y < height; ++y) \
{ \
const int prevline = ((y == 0) && first) ? 0 : src_stride; \
const int nextline = ((y == height - 1) && last) ? 0 : src_stride; \
\
for (x = 0; x < width; ++x) \
{ \
const typename_t A = *(src - prevline); \
const typename_t B = (x > 0) ? *(src - 1) : *src; \
const typename_t C = *src; \
const typename_t D = (x < width - 1) ? *(src + 1) : *src; \
const typename_t E = *(src++ + nextline); \
\
if (A != E && B != D) \
{ \
*out0++ = (A == B ? A : C); \
*out0++ = (A == D ? A : C); \
*out1++ = (E == B ? E : C); \
*out1++ = (E == D ? E : C); \
} \
else \
{ \
*out0++ = C; \
*out0++ = C; \
*out1++ = C; \
*out1++ = C; \
} \
} \
\
src += src_stride - width; \
out0 += dst_stride + dst_stride - (width * SCALE2X_SCALE); \
out1 += dst_stride + dst_stride - (width * SCALE2X_SCALE); \
}
static void scale2x_generic_rgb565(unsigned width, unsigned height,
int first, int last,
const uint16_t *src, unsigned src_stride,
uint16_t *dst, unsigned dst_stride)
{
unsigned x, y;
uint16_t *out0, *out1;
out0 = (uint16_t*)dst;
out1 = (uint16_t*)(dst + dst_stride);
SCALE2X_GENERIC(uint16_t, width, height, first, last,
src, src_stride, dst, dst_stride, out0, out1);
}
static void scale2x_generic_xrgb8888(unsigned width, unsigned height,
int first, int last,
const uint32_t *src, unsigned src_stride,
uint32_t *dst, unsigned dst_stride)
{
unsigned x, y;
uint32_t *out0 = (uint32_t*)dst;
uint32_t *out1 = (uint32_t*)(dst + dst_stride);
SCALE2X_GENERIC(uint32_t, width, height, first, last,
src, src_stride, dst, dst_stride, out0, out1);
}
static unsigned scale2x_generic_input_fmts(void)
{
return SOFTFILTER_FMT_XRGB8888 | SOFTFILTER_FMT_RGB565;
@ -133,14 +71,16 @@ static void *scale2x_generic_create(const struct softfilter_config *config,
(void)simd;
(void)config;
(void)userdata;
if (!filt)
if (!filt) {
return NULL;
filt->workers = (struct softfilter_thread_data*)
calloc(threads, sizeof(struct softfilter_thread_data));
}
/* Apparently the code is not thread-safe,
* so force single threaded operation... */
filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
filt->threads = 1;
filt->in_fmt = in_fmt;
if (!filt->workers)
{
if (!filt->workers) {
free(filt);
return NULL;
}
@ -151,51 +91,114 @@ static void scale2x_generic_output(void *data,
unsigned *out_width, unsigned *out_height,
unsigned width, unsigned height)
{
*out_width = width * SCALE2X_SCALE;
*out_height = height * SCALE2X_SCALE;
*out_width = width << 1;
*out_height = height << 1;
}
static void scale2x_generic_destroy(void *data)
{
struct filter_data *filt = (struct filter_data*)data;
if (!filt)
if (!filt) {
return;
}
free(filt->workers);
free(filt);
}
static void scale2x_work_cb_xrgb8888(void *data, void *thread_data)
{
struct softfilter_thread_data *thr =
(struct softfilter_thread_data*)thread_data;
const uint32_t *input = (const uint32_t*)thr->in_data;
uint32_t *output = (uint32_t*)thr->out_data;
unsigned width = thr->width;
unsigned height = thr->height;
struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
uint32_t in_stride = (uint32_t)(thr->in_pitch >> 2);
uint32_t out_stride = (uint32_t)(thr->out_pitch >> 2);
const uint32_t *input = (const uint32_t*)thr->in_data;
uint32_t *output0 = (uint32_t*)thr->out_data;
uint32_t *output1 = (uint32_t*)thr->out_data + out_stride;
uint16_t x, y;
scale2x_generic_xrgb8888(width, height,
thr->first, thr->last, input,
(unsigned)(thr->in_pitch / SOFTFILTER_BPP_XRGB8888),
output,
(unsigned)(thr->out_pitch / SOFTFILTER_BPP_XRGB8888));
for (y = 0; y < thr->height; y++)
{
/* Determine offsets of previous/next source lines */
uint32_t line_prev = (y == 0) ? 0 : in_stride;
uint32_t line_next = (y == thr->height - 1) ? 0 : in_stride;
for (x = 0; x < thr->width; x++)
{
/* Get sample points */
uint32_t A = *(input - line_prev);
uint32_t B = (x > 0) ? *(input - 1) : *input;
uint32_t C = *input;
uint32_t D = (x < thr->width - 1) ? *(input + 1) : *input;
uint32_t E = *(input++ + line_next);
/* Apply pixel expansion algorithm */
if (A != E && B != D)
{
*output0++ = (A == B ? A : C);
*output0++ = (A == D ? A : C);
*output1++ = (E == B ? E : C);
*output1++ = (E == D ? E : C);
}
else
{
*output0++ = C;
*output0++ = C;
*output1++ = C;
*output1++ = C;
}
}
input += in_stride - thr->width;
output0 += (out_stride << 1) - (thr->width << 1);
output1 += (out_stride << 1) - (thr->width << 1);
}
}
static void scale2x_work_cb_rgb565(void *data, void *thread_data)
{
struct softfilter_thread_data *thr =
(struct softfilter_thread_data*)thread_data;
const uint16_t *input = (const uint16_t*)thr->in_data;
uint16_t *output = (uint16_t*)thr->out_data;
unsigned width = thr->width;
unsigned height = thr->height;
struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
uint32_t in_stride = (uint32_t)(thr->in_pitch >> 1);
uint32_t out_stride = (uint32_t)(thr->out_pitch >> 1);
const uint16_t *input = (const uint16_t*)thr->in_data;
uint16_t *output0 = (uint16_t*)thr->out_data;
uint16_t *output1 = (uint16_t*)thr->out_data + out_stride;
uint16_t x, y;
scale2x_generic_rgb565(width, height,
thr->first, thr->last, input,
(unsigned)(thr->in_pitch / SOFTFILTER_BPP_RGB565),
output,
(unsigned)(thr->out_pitch / SOFTFILTER_BPP_RGB565));
for (y = 0; y < thr->height; y++)
{
/* Determine offsets of previous/next source lines */
uint32_t line_prev = (y == 0) ? 0 : in_stride;
uint32_t line_next = (y == thr->height - 1) ? 0 : in_stride;
for (x = 0; x < thr->width; x++)
{
/* Get sample points */
uint16_t A = *(input - line_prev);
uint16_t B = (x > 0) ? *(input - 1) : *input;
uint16_t C = *input;
uint16_t D = (x < thr->width - 1) ? *(input + 1) : *input;
uint16_t E = *(input++ + line_next);
/* Apply pixel expansion algorithm */
if (A != E && B != D)
{
*output0++ = (A == B ? A : C);
*output0++ = (A == D ? A : C);
*output1++ = (E == B ? E : C);
*output1++ = (E == D ? E : C);
}
else
{
*output0++ = C;
*output0++ = C;
*output1++ = C;
*output1++ = C;
}
}
input += in_stride - thr->width;
output0 += (out_stride << 1) - (thr->width << 1);
output1 += (out_stride << 1) - (thr->width << 1);
}
}
static void scale2x_generic_packets(void *data,
@ -203,34 +206,27 @@ static void scale2x_generic_packets(void *data,
void *output, size_t output_stride,
const void *input, unsigned width, unsigned height, size_t input_stride)
{
/* We are guaranteed single threaded operation
* (filt->threads = 1) so we don't need to loop
* over threads and can cull some code. This only
* makes the tiniest performance difference, but
* every little helps when running on an o3DS... */
struct filter_data *filt = (struct filter_data*)data;
unsigned i;
for (i = 0; i < filt->threads; i++)
{
struct softfilter_thread_data *thr =
(struct softfilter_thread_data*)&filt->workers[i];
struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
unsigned y_start = (height * i) / filt->threads;
unsigned y_end = (height * (i + 1)) / filt->threads;
thr->out_data = (uint8_t*)output + y_start *
SCALE2X_SCALE * output_stride;
thr->in_data = (const uint8_t*)input + y_start * input_stride;
thr->out_pitch = output_stride;
thr->in_pitch = input_stride;
thr->width = width;
thr->height = y_end - y_start;
thr->out_data = (uint8_t*)output;
thr->in_data = (const uint8_t*)input;
thr->out_pitch = output_stride;
thr->in_pitch = input_stride;
thr->width = width;
thr->height = height;
/* Workers need to know if they can access pixels
* outside their given buffer. */
thr->first = y_start;
thr->last = y_end == height;
if (filt->in_fmt == SOFTFILTER_FMT_XRGB8888)
packets[i].work = scale2x_work_cb_xrgb8888;
else if (filt->in_fmt == SOFTFILTER_FMT_RGB565)
packets[i].work = scale2x_work_cb_rgb565;
packets[i].thread_data = thr;
if (filt->in_fmt == SOFTFILTER_FMT_XRGB8888) {
packets[0].work = scale2x_work_cb_xrgb8888;
} else if (filt->in_fmt == SOFTFILTER_FMT_RGB565) {
packets[0].work = scale2x_work_cb_rgb565;
}
packets[0].thread_data = thr;
}
static const struct softfilter_implementation scale2x_generic = {
@ -243,6 +239,7 @@ static const struct softfilter_implementation scale2x_generic = {
scale2x_generic_threads,
scale2x_generic_output,
scale2x_generic_packets,
SOFTFILTER_API_VERSION,
"Scale2x",
"scale2x",