mirror of
https://github.com/libretro/RetroArch
synced 2025-04-17 02:43:03 +00:00
Optimize sinc resampler a bit.
This commit is contained in:
parent
bb824b5679
commit
0496ffc007
151
audio/sinc.c
151
audio/sinc.c
@ -19,6 +19,7 @@
|
|||||||
// Only suitable as an upsampler, as there is no low-pass filter stage.
|
// Only suitable as an upsampler, as there is no low-pass filter stage.
|
||||||
|
|
||||||
#include "resampler.h"
|
#include "resampler.h"
|
||||||
|
#include "../general.h"
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -29,6 +30,10 @@
|
|||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if __SSE3__
|
||||||
|
#include <pmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define PHASE_BITS 8
|
#define PHASE_BITS 8
|
||||||
#define SUBPHASE_BITS 16
|
#define SUBPHASE_BITS 16
|
||||||
|
|
||||||
@ -41,11 +46,14 @@
|
|||||||
#define FRAMES_SHIFT (PHASE_BITS + SUBPHASE_BITS)
|
#define FRAMES_SHIFT (PHASE_BITS + SUBPHASE_BITS)
|
||||||
|
|
||||||
#define SIDELOBES 8
|
#define SIDELOBES 8
|
||||||
|
#define TAPS (SIDELOBES * 2)
|
||||||
|
|
||||||
|
#define PHASE_INDEX 0
|
||||||
|
#define DELTA_INDEX 1
|
||||||
|
|
||||||
struct ssnes_resampler
|
struct ssnes_resampler
|
||||||
{
|
{
|
||||||
float phase_table[PHASES + 1][SIDELOBES];
|
float phase_table[PHASES][2][2 * SIDELOBES];
|
||||||
float delta_table[PHASES + 1][SIDELOBES];
|
|
||||||
float buffer_l[2 * SIDELOBES];
|
float buffer_l[2 * SIDELOBES];
|
||||||
float buffer_r[2 * SIDELOBES];
|
float buffer_r[2 * SIDELOBES];
|
||||||
|
|
||||||
@ -75,19 +83,26 @@ static inline double blackman(double index)
|
|||||||
|
|
||||||
static void init_sinc_table(ssnes_resampler_t *resamp)
|
static void init_sinc_table(ssnes_resampler_t *resamp)
|
||||||
{
|
{
|
||||||
for (unsigned i = 0; i <= PHASES; i++)
|
// Sinc phases: [..., p + 3, p + 2, p + 1, p + 0, p - 1, p - 2, p - 3, p - 4, ...]
|
||||||
|
for (int i = 0; i < PHASES; i++)
|
||||||
{
|
{
|
||||||
for (unsigned j = 0; j < SIDELOBES; j++)
|
for (int j = 0; j < 2 * SIDELOBES; j++)
|
||||||
{
|
{
|
||||||
double sinc_phase = M_PI * ((double)i / PHASES + (double)j);
|
double p = (double)i / PHASES;
|
||||||
resamp->phase_table[i][j] = sinc(sinc_phase) * blackman(sinc_phase / SIDELOBES);
|
double sinc_phase = M_PI * (p + (SIDELOBES - 1 - j));
|
||||||
|
resamp->phase_table[i][PHASE_INDEX][j] = sinc(sinc_phase) * blackman(sinc_phase / SIDELOBES);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optimize linear interpolation.
|
// Optimize linear interpolation.
|
||||||
for (unsigned i = 0; i < PHASES; i++)
|
for (int i = 0; i < PHASES - 1; i++)
|
||||||
for (unsigned j = 0; j < SIDELOBES; j++)
|
{
|
||||||
resamp->delta_table[i][j] = resamp->phase_table[i + 1][j] - resamp->phase_table[i][j];
|
for (int j = 0; j < 2 * SIDELOBES; j++)
|
||||||
|
{
|
||||||
|
resamp->phase_table[i][DELTA_INDEX][j] =
|
||||||
|
(resamp->phase_table[i + 1][PHASE_INDEX][j] - resamp->phase_table[i][PHASE_INDEX][j]) / SUBPHASES;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ssnes_resampler_t *resampler_new(void)
|
ssnes_resampler_t *resampler_new(void)
|
||||||
@ -99,6 +114,15 @@ ssnes_resampler_t *resampler_new(void)
|
|||||||
memset(re, 0, sizeof(*re));
|
memset(re, 0, sizeof(*re));
|
||||||
|
|
||||||
init_sinc_table(re);
|
init_sinc_table(re);
|
||||||
|
|
||||||
|
#if __SSE3__
|
||||||
|
SSNES_LOG("Sinc resampler [SSE3]\n");
|
||||||
|
#elif __SSE__
|
||||||
|
SSNES_LOG("Sinc resampler [SSE]\n");
|
||||||
|
#else
|
||||||
|
SSNES_LOG("Sinc resampler [C]\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
return re;
|
return re;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,68 +137,41 @@ static void process_sinc(ssnes_resampler_t *resamp, float *out_buffer)
|
|||||||
|
|
||||||
unsigned phase = resamp->time >> PHASES_SHIFT;
|
unsigned phase = resamp->time >> PHASES_SHIFT;
|
||||||
unsigned delta = (resamp->time >> SUBPHASES_SHIFT) & SUBPHASES_MASK;
|
unsigned delta = (resamp->time >> SUBPHASES_SHIFT) & SUBPHASES_MASK;
|
||||||
__m128 delta_f = _mm_set1_ps((float)delta / SUBPHASES);
|
__m128 delta_f = _mm_set1_ps(delta);
|
||||||
|
|
||||||
const float *phase_table = resamp->phase_table[phase];
|
const float *phase_table = resamp->phase_table[phase][PHASE_INDEX];
|
||||||
const float *delta_table = resamp->delta_table[phase];
|
const float *delta_table = resamp->phase_table[phase][DELTA_INDEX];
|
||||||
|
|
||||||
__m128 sinc_vals[SIDELOBES / 4];
|
for (unsigned i = 0; i < TAPS; i += 4)
|
||||||
for (unsigned i = 0; i < SIDELOBES; i += 4)
|
|
||||||
{
|
{
|
||||||
|
__m128 buf_l = _mm_load_ps(buffer_l + i);
|
||||||
|
__m128 buf_r = _mm_load_ps(buffer_r + i);
|
||||||
|
|
||||||
__m128 phases = _mm_load_ps(phase_table + i);
|
__m128 phases = _mm_load_ps(phase_table + i);
|
||||||
__m128 deltas = _mm_load_ps(delta_table + i);
|
__m128 deltas = _mm_load_ps(delta_table + i);
|
||||||
sinc_vals[i / 4] = _mm_add_ps(phases, _mm_mul_ps(deltas, delta_f));
|
|
||||||
|
__m128 sinc = _mm_add_ps(phases, _mm_mul_ps(deltas, delta_f));
|
||||||
|
|
||||||
|
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, sinc));
|
||||||
|
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, sinc));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Older data.
|
#if __SSE3__
|
||||||
for (unsigned i = 0; i < SIDELOBES; i += 4)
|
__m128 res = _mm_hadd_ps(_mm_hadd_ps(sum_l, sum_r), _mm_setzero_ps());
|
||||||
{
|
_mm_storeu_ps(out_buffer, res); // Overwriting, but this is safe.
|
||||||
__m128 buf_l = _mm_loadr_ps(buffer_l + SIDELOBES - 4 - i);
|
#else // Meh, compiler should optimize this to something sane.
|
||||||
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, sinc_vals[i / 4]));
|
|
||||||
|
|
||||||
__m128 buf_r = _mm_loadr_ps(buffer_r + SIDELOBES - 4 - i);
|
|
||||||
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, sinc_vals[i / 4]));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Newer data.
|
|
||||||
unsigned reverse_phase = PHASES_WRAP - resamp->time;
|
|
||||||
phase = reverse_phase >> PHASES_SHIFT;
|
|
||||||
delta = (reverse_phase >> SUBPHASES_SHIFT) & SUBPHASES_MASK;
|
|
||||||
delta_f = _mm_set1_ps((float)delta / SUBPHASES);
|
|
||||||
|
|
||||||
phase_table = resamp->phase_table[phase];
|
|
||||||
delta_table = resamp->delta_table[phase];
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < SIDELOBES; i += 4)
|
|
||||||
{
|
|
||||||
__m128 phases = _mm_load_ps(phase_table + i);
|
|
||||||
__m128 deltas = _mm_load_ps(delta_table + i);
|
|
||||||
sinc_vals[i / 4] = _mm_add_ps(phases, _mm_mul_ps(deltas, delta_f));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < SIDELOBES; i += 4)
|
|
||||||
{
|
|
||||||
__m128 buf_l = _mm_load_ps(buffer_l + SIDELOBES + i);
|
|
||||||
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, sinc_vals[i / 4]));
|
|
||||||
|
|
||||||
__m128 buf_r = _mm_load_ps(buffer_r + SIDELOBES + i);
|
|
||||||
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, sinc_vals[i / 4]));
|
|
||||||
}
|
|
||||||
|
|
||||||
// This can be done faster with _mm_hadd_ps(), but it's SSE3 :(
|
|
||||||
__m128 sum_shuf_l = _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(1, 0, 1, 0));
|
|
||||||
__m128 sum_shuf_r = _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2));
|
|
||||||
__m128 sum = _mm_add_ps(sum_shuf_l, sum_shuf_r);
|
|
||||||
|
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
float f[4];
|
float f[4];
|
||||||
__m128 v;
|
__m128 v;
|
||||||
} u;
|
} u[2] = {
|
||||||
u.v = sum;
|
[0] = { .v = sum_l },
|
||||||
|
[1] = { .v = sum_r },
|
||||||
|
};
|
||||||
|
|
||||||
out_buffer[0] = u.f[0] + u.f[1];
|
out_buffer[0] = u[0].f[0] + u[0].f[1] + u[0].f[2] + u[0].f[3];
|
||||||
out_buffer[1] = u.f[2] + u.f[3];
|
out_buffer[1] = u[1].f[0] + u[1].f[1] + u[1].f[2] + u[1].f[3];
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#else // Plain ol' C99
|
#else // Plain ol' C99
|
||||||
static void process_sinc(ssnes_resampler_t *resamp, float *out_buffer)
|
static void process_sinc(ssnes_resampler_t *resamp, float *out_buffer)
|
||||||
@ -186,38 +183,16 @@ static void process_sinc(ssnes_resampler_t *resamp, float *out_buffer)
|
|||||||
|
|
||||||
unsigned phase = resamp->time >> PHASES_SHIFT;
|
unsigned phase = resamp->time >> PHASES_SHIFT;
|
||||||
unsigned delta = (resamp->time >> SUBPHASES_SHIFT) & SUBPHASES_MASK;
|
unsigned delta = (resamp->time >> SUBPHASES_SHIFT) & SUBPHASES_MASK;
|
||||||
float delta_f = (float)delta / SUBPHASES;
|
float delta_f = (float)delta;
|
||||||
|
|
||||||
const float *phase_table = resamp->phase_table[phase];
|
const float *phase_table = resamp->phase_table[phase][PHASE_INDEX];
|
||||||
const float *delta_table = resamp->delta_table[phase];
|
const float *delta_table = resamp->phase_table[phase][DELTA_INDEX];
|
||||||
|
|
||||||
float sinc_vals[SIDELOBES];
|
for (unsigned i = 0; i < TAPS; i++)
|
||||||
for (unsigned i = 0; i < SIDELOBES; i++)
|
|
||||||
sinc_vals[i] = phase_table[i] + delta_f * delta_table[i];
|
|
||||||
|
|
||||||
// Older data.
|
|
||||||
for (unsigned i = 0; i < SIDELOBES; i++)
|
|
||||||
{
|
{
|
||||||
sum_l += buffer_l[SIDELOBES - 1 - i] * sinc_vals[i];
|
float sinc_val = phase_table[i] + delta_f * delta_table[i];
|
||||||
sum_r += buffer_r[SIDELOBES - 1 - i] * sinc_vals[i];
|
sum_l += buffer_l[i] * sinc_val;
|
||||||
}
|
sum_r += buffer_r[i] * sinc_val;
|
||||||
|
|
||||||
// Newer data.
|
|
||||||
unsigned reverse_phase = PHASES_WRAP - resamp->time;
|
|
||||||
phase = reverse_phase >> PHASES_SHIFT;
|
|
||||||
delta = (reverse_phase >> SUBPHASES_SHIFT) & SUBPHASES_MASK;
|
|
||||||
delta_f = (float)delta / SUBPHASES;
|
|
||||||
|
|
||||||
phase_table = resamp->phase_table[phase];
|
|
||||||
delta_table = resamp->delta_table[phase];
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < SIDELOBES; i++)
|
|
||||||
sinc_vals[i] = phase_table[i] + delta_f * delta_table[i];
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < SIDELOBES; i++)
|
|
||||||
{
|
|
||||||
sum_l += buffer_l[SIDELOBES + i] * sinc_vals[i];
|
|
||||||
sum_r += buffer_r[SIDELOBES + i] * sinc_vals[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out_buffer[0] = sum_l;
|
out_buffer[0] = sum_l;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user