mirror of
https://github.com/libretro/RetroArch
synced 2025-04-25 09:02:44 +00:00
(scaler_int) Cleanups
This commit is contained in:
parent
372c51f5b9
commit
1a04875a6c
@ -53,7 +53,6 @@
|
|||||||
* The C version of scalers perform the exact same operations as the SIMD code for testing purposes.
|
* The C version of scalers perform the exact same operations as the SIMD code for testing purposes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
|
||||||
void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int stride)
|
void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int stride)
|
||||||
{
|
{
|
||||||
int h, w, y;
|
int h, w, y;
|
||||||
@ -68,11 +67,11 @@ void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int strid
|
|||||||
|
|
||||||
for (w = 0; w < ctx->out_width; w++)
|
for (w = 0; w < ctx->out_width; w++)
|
||||||
{
|
{
|
||||||
|
const uint64_t *input_base_y = input_base + w;
|
||||||
|
#if defined(__SSE2__)
|
||||||
__m128i final;
|
__m128i final;
|
||||||
__m128i res = _mm_setzero_si128();
|
__m128i res = _mm_setzero_si128();
|
||||||
|
|
||||||
const uint64_t *input_base_y = input_base + w;
|
|
||||||
|
|
||||||
for (y = 0; (y + 1) < ctx->vert.filter_len; y += 2, input_base_y += (ctx->scaled.stride >> 2))
|
for (y = 0; (y + 1) < ctx->vert.filter_len; y += 2, input_base_y += (ctx->scaled.stride >> 2))
|
||||||
{
|
{
|
||||||
__m128i coeff = _mm_set_epi64x(filter_vert[y + 1] * 0x0001000100010001ll, filter_vert[y + 0] * 0x0001000100010001ll);
|
__m128i coeff = _mm_set_epi64x(filter_vert[y + 1] * 0x0001000100010001ll, filter_vert[y + 0] * 0x0001000100010001ll);
|
||||||
@ -95,30 +94,12 @@ void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int strid
|
|||||||
final = _mm_packus_epi16(res, res);
|
final = _mm_packus_epi16(res, res);
|
||||||
|
|
||||||
output[w] = _mm_cvtsi128_si32(final);
|
output[w] = _mm_cvtsi128_si32(final);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int stride)
|
|
||||||
{
|
|
||||||
int h, w, y;
|
|
||||||
const uint64_t *input = ctx->scaled.frame;
|
|
||||||
uint32_t *output = (uint32_t*)output_;
|
|
||||||
|
|
||||||
const int16_t *filter_vert = ctx->vert.filter;
|
|
||||||
|
|
||||||
for (h = 0; h < ctx->out_height; h++, filter_vert += ctx->vert.filter_stride, output += stride >> 2)
|
|
||||||
{
|
|
||||||
const uint64_t *input_base = input + ctx->vert.filter_pos[h] * (ctx->scaled.stride >> 3);
|
|
||||||
|
|
||||||
for (w = 0; w < ctx->out_width; w++)
|
|
||||||
{
|
|
||||||
int16_t res_a = 0;
|
int16_t res_a = 0;
|
||||||
int16_t res_r = 0;
|
int16_t res_r = 0;
|
||||||
int16_t res_g = 0;
|
int16_t res_g = 0;
|
||||||
int16_t res_b = 0;
|
int16_t res_b = 0;
|
||||||
|
|
||||||
const uint64_t *input_base_y = input_base + w;
|
|
||||||
for (y = 0; y < ctx->vert.filter_len; y++, input_base_y += (ctx->scaled.stride >> 3))
|
for (y = 0; y < ctx->vert.filter_len; y++, input_base_y += (ctx->scaled.stride >> 3))
|
||||||
{
|
{
|
||||||
uint64_t col = *input_base_y;
|
uint64_t col = *input_base_y;
|
||||||
@ -141,17 +122,24 @@ void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int strid
|
|||||||
res_g >>= (7 - 2 - 2);
|
res_g >>= (7 - 2 - 2);
|
||||||
res_b >>= (7 - 2 - 2);
|
res_b >>= (7 - 2 - 2);
|
||||||
|
|
||||||
output[w] = (clamp_8bit(res_a) << 24) | (clamp_8bit(res_r) << 16) | (clamp_8bit(res_g) << 8) | (clamp_8bit(res_b) << 0);
|
output[w] = (clamp_8bit(res_a) << 24) | (clamp_8bit(res_r) << 16) |
|
||||||
|
(clamp_8bit(res_g) << 8) | (clamp_8bit(res_b) << 0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined(__SSE2__)
|
||||||
|
static INLINE uint64_t build_argb64(uint16_t a, uint16_t r, uint16_t g, uint16_t b)
|
||||||
|
{
|
||||||
|
return ((uint64_t)a << 48) | ((uint64_t)r << 32) | ((uint64_t)g << 16) | ((uint64_t)b << 0);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
|
||||||
void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int stride)
|
void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int stride)
|
||||||
{
|
{
|
||||||
int h, w, x;
|
int h, w, x;
|
||||||
const uint32_t *input = (const uint32_t*)input_;
|
const uint32_t *input = (uint32_t*)input_;
|
||||||
uint64_t *output = ctx->scaled.frame;
|
uint64_t *output = ctx->scaled.frame;
|
||||||
|
|
||||||
for (h = 0; h < ctx->scaled.height; h++, input += stride >> 2, output += ctx->scaled.stride >> 3)
|
for (h = 0; h < ctx->scaled.height; h++, input += stride >> 2, output += ctx->scaled.stride >> 3)
|
||||||
@ -160,9 +148,9 @@ void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int
|
|||||||
|
|
||||||
for (w = 0; w < ctx->scaled.width; w++, filter_horiz += ctx->horiz.filter_stride)
|
for (w = 0; w < ctx->scaled.width; w++, filter_horiz += ctx->horiz.filter_stride)
|
||||||
{
|
{
|
||||||
__m128i res = _mm_setzero_si128();
|
|
||||||
|
|
||||||
const uint32_t *input_base_x = input + ctx->horiz.filter_pos[w];
|
const uint32_t *input_base_x = input + ctx->horiz.filter_pos[w];
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
__m128i res = _mm_setzero_si128();
|
||||||
|
|
||||||
for (x = 0; (x + 1) < ctx->horiz.filter_len; x += 2)
|
for (x = 0; (x + 1) < ctx->horiz.filter_len; x += 2)
|
||||||
{
|
{
|
||||||
@ -198,29 +186,7 @@ void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int
|
|||||||
u.u32[0] = _mm_cvtsi128_si32(res);
|
u.u32[0] = _mm_cvtsi128_si32(res);
|
||||||
u.u32[1] = _mm_cvtsi128_si32(_mm_srli_si128(res, 4));
|
u.u32[1] = _mm_cvtsi128_si32(_mm_srli_si128(res, 4));
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
static INLINE uint64_t build_argb64(uint16_t a, uint16_t r, uint16_t g, uint16_t b)
|
|
||||||
{
|
|
||||||
return ((uint64_t)a << 48) | ((uint64_t)r << 32) | ((uint64_t)g << 16) | ((uint64_t)b << 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int stride)
|
|
||||||
{
|
|
||||||
int h, w, x;
|
|
||||||
const uint32_t *input = (uint32_t*)input_;
|
|
||||||
uint64_t *output = ctx->scaled.frame;
|
|
||||||
|
|
||||||
for (h = 0; h < ctx->scaled.height; h++, input += stride >> 2, output += ctx->scaled.stride >> 3)
|
|
||||||
{
|
|
||||||
const int16_t *filter_horiz = ctx->horiz.filter;
|
|
||||||
|
|
||||||
for (w = 0; w < ctx->scaled.width; w++, filter_horiz += ctx->horiz.filter_stride)
|
|
||||||
{
|
|
||||||
const uint32_t *input_base_x = input + ctx->horiz.filter_pos[w];
|
|
||||||
|
|
||||||
int16_t res_a = 0;
|
int16_t res_a = 0;
|
||||||
int16_t res_r = 0;
|
int16_t res_r = 0;
|
||||||
int16_t res_g = 0;
|
int16_t res_g = 0;
|
||||||
@ -244,10 +210,10 @@ void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
output[w] = build_argb64(res_a, res_r, res_g, res_b);
|
output[w] = build_argb64(res_a, res_r, res_g, res_b);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
void scaler_argb8888_point_special(const struct scaler_ctx *ctx,
|
void scaler_argb8888_point_special(const struct scaler_ctx *ctx,
|
||||||
void *output_, const void *input_,
|
void *output_, const void *input_,
|
||||||
@ -256,23 +222,18 @@ void scaler_argb8888_point_special(const struct scaler_ctx *ctx,
|
|||||||
int out_stride, int in_stride)
|
int out_stride, int in_stride)
|
||||||
{
|
{
|
||||||
int h, w;
|
int h, w;
|
||||||
const uint32_t *input = NULL;
|
|
||||||
uint32_t *output = NULL;
|
|
||||||
int x_pos = (1 << 15) * in_width / out_width - (1 << 15);
|
int x_pos = (1 << 15) * in_width / out_width - (1 << 15);
|
||||||
int x_step = (1 << 16) * in_width / out_width;
|
int x_step = (1 << 16) * in_width / out_width;
|
||||||
int y_pos = (1 << 15) * in_height / out_height - (1 << 15);
|
int y_pos = (1 << 15) * in_height / out_height - (1 << 15);
|
||||||
int y_step = (1 << 16) * in_height / out_height;
|
int y_step = (1 << 16) * in_height / out_height;
|
||||||
|
const uint32_t *input = (const uint32_t*)input_;
|
||||||
(void)ctx;
|
uint32_t *output = (uint32_t*)output_;
|
||||||
|
|
||||||
if (x_pos < 0)
|
if (x_pos < 0)
|
||||||
x_pos = 0;
|
x_pos = 0;
|
||||||
if (y_pos < 0)
|
if (y_pos < 0)
|
||||||
y_pos = 0;
|
y_pos = 0;
|
||||||
|
|
||||||
input = (const uint32_t*)input_;
|
|
||||||
output = (uint32_t*)output_;
|
|
||||||
|
|
||||||
for (h = 0; h < out_height; h++, y_pos += y_step, output += out_stride >> 2)
|
for (h = 0; h < out_height; h++, y_pos += y_step, output += out_stride >> 2)
|
||||||
{
|
{
|
||||||
int x = x_pos;
|
int x = x_pos;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user