mirror of
https://github.com/libretro/RetroArch
synced 2025-01-30 12:32:52 +00:00
(resamplers) Turn some 1-line inline functions into macros
This commit is contained in:
parent
7bcd4dd04d
commit
33feed8aa7
@ -419,10 +419,7 @@ static INLINE float cc_int(float x, float b)
|
|||||||
return (val > M_PI) ? M_PI : (val < -M_PI) ? -M_PI : val;
|
return (val > M_PI) ? M_PI : (val < -M_PI) ? -M_PI : val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE float cc_kernel(float x, float b)
|
#define cc_kernel(x, b) ((cc_int((x) + 0.5, (b)) - cc_int((x) - 0.5, (b))) / (2.0 * M_PI))
|
||||||
{
|
|
||||||
return (cc_int(x + 0.5, b) - cc_int(x - 0.5, b)) / (2.0 * M_PI);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
static INLINE float cc_int(float x, float b)
|
static INLINE float cc_int(float x, float b)
|
||||||
{
|
{
|
||||||
@ -433,10 +430,7 @@ static INLINE float cc_int(float x, float b)
|
|||||||
return (val > 0.5) ? 0.5 : (val < -0.5) ? -0.5 : val;
|
return (val > 0.5) ? 0.5 : (val < -0.5) ? -0.5 : val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE float cc_kernel(float x, float b)
|
#define cc_kernel(x, b) ((cc_int((x) + 0.5, (b)) - cc_int((x) - 0.5, (b))))
|
||||||
{
|
|
||||||
return (cc_int(x + 0.5, b) - cc_int(x - 0.5, b));
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static INLINE void add_to(const audio_frame_float_t *source,
|
static INLINE void add_to(const audio_frame_float_t *source,
|
||||||
@ -503,15 +497,15 @@ static void resampler_CC_upsample(void *re_, struct resampler_data *data)
|
|||||||
while (re->distance < 1.0)
|
while (re->distance < 1.0)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float temp;
|
|
||||||
outp->l = 0.0;
|
outp->l = 0.0;
|
||||||
outp->r = 0.0;
|
outp->r = 0.0;
|
||||||
|
|
||||||
for (i = 0; i < 4; i++)
|
for (i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
temp = cc_kernel(re->distance + 1.0 - i, b);
|
float temp = cc_kernel(re->distance + 1.0 - i, b);
|
||||||
outp->l += re->buffer[i].l * temp;
|
outp->l += re->buffer[i].l * temp;
|
||||||
outp->r += re->buffer[i].r * temp;
|
outp->r += re->buffer[i].r * temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
re->distance += ratio;
|
re->distance += ratio;
|
||||||
|
@ -121,10 +121,7 @@ static INLINE double sinc(double val)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(SINC_WINDOW_LANCZOS)
|
#if defined(SINC_WINDOW_LANCZOS)
|
||||||
static INLINE double window_function(double idx)
|
#define window_function(idx) (sinc(M_PI * (idx)))
|
||||||
{
|
|
||||||
return sinc(M_PI * idx);
|
|
||||||
}
|
|
||||||
#elif defined(SINC_WINDOW_KAISER)
|
#elif defined(SINC_WINDOW_KAISER)
|
||||||
/* Modified Bessel function of first order.
|
/* Modified Bessel function of first order.
|
||||||
* Check Wiki for mathematical definition ... */
|
* Check Wiki for mathematical definition ... */
|
||||||
@ -153,10 +150,7 @@ static INLINE double besseli0(double x)
|
|||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE double window_function(double idx)
|
#define window_function(idx) (besseli0(SINC_WINDOW_KAISER_BETA * sqrt(1 - (idx) * (idx))))
|
||||||
{
|
|
||||||
return besseli0(SINC_WINDOW_KAISER_BETA * sqrt(1 - idx * idx));
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
#error "No SINC window function defined."
|
#error "No SINC window function defined."
|
||||||
#endif
|
#endif
|
||||||
@ -247,17 +241,17 @@ static INLINE void process_sinc_C(rarch_sinc_resampler_t *resamp,
|
|||||||
float *out_buffer)
|
float *out_buffer)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
float sum_l = 0.0f;
|
float sum_l = 0.0f;
|
||||||
float sum_r = 0.0f;
|
float sum_r = 0.0f;
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
unsigned taps = resamp->taps;
|
||||||
unsigned taps = resamp->taps;
|
unsigned phase = resamp->time >> SUBPHASE_BITS;
|
||||||
unsigned phase = resamp->time >> SUBPHASE_BITS;
|
|
||||||
#if SINC_COEFF_LERP
|
#if SINC_COEFF_LERP
|
||||||
const float *phase_table = resamp->phase_table + phase * taps * 2;
|
const float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||||
const float *delta_table = phase_table + taps;
|
const float *delta_table = phase_table + taps;
|
||||||
float delta = (float)(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD;
|
float delta = (float)
|
||||||
|
(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD;
|
||||||
#else
|
#else
|
||||||
const float *phase_table = resamp->phase_table + phase * taps;
|
const float *phase_table = resamp->phase_table + phase * taps;
|
||||||
#endif
|
#endif
|
||||||
@ -283,18 +277,18 @@ static INLINE void process_sinc_C(rarch_sinc_resampler_t *resamp,
|
|||||||
static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
|
static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
__m256 sum_l = _mm256_setzero_ps();
|
__m256 sum_l = _mm256_setzero_ps();
|
||||||
__m256 sum_r = _mm256_setzero_ps();
|
__m256 sum_r = _mm256_setzero_ps();
|
||||||
|
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
|
||||||
unsigned taps = resamp->taps;
|
unsigned taps = resamp->taps;
|
||||||
unsigned phase = resamp->time >> SUBPHASE_BITS;
|
unsigned phase = resamp->time >> SUBPHASE_BITS;
|
||||||
#if SINC_COEFF_LERP
|
#if SINC_COEFF_LERP
|
||||||
const float *phase_table = resamp->phase_table + phase * taps * 2;
|
const float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||||
const float *delta_table = phase_table + taps;
|
const float *delta_table = phase_table + taps;
|
||||||
__m256 delta = _mm256_set1_ps((float)
|
__m256 delta = _mm256_set1_ps((float)
|
||||||
(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD);
|
(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD);
|
||||||
#else
|
#else
|
||||||
const float *phase_table = resamp->phase_table + phase * taps;
|
const float *phase_table = resamp->phase_table + phase * taps;
|
||||||
@ -302,28 +296,28 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
|
|||||||
|
|
||||||
for (i = 0; i < taps; i += 8)
|
for (i = 0; i < taps; i += 8)
|
||||||
{
|
{
|
||||||
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
||||||
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
||||||
|
|
||||||
#if SINC_COEFF_LERP
|
#if SINC_COEFF_LERP
|
||||||
__m256 deltas = _mm256_load_ps(delta_table + i);
|
__m256 deltas = _mm256_load_ps(delta_table + i);
|
||||||
__m256 sinc = _mm256_add_ps(_mm256_load_ps(phase_table + i),
|
__m256 sinc = _mm256_add_ps(_mm256_load_ps(phase_table + i),
|
||||||
_mm256_mul_ps(deltas, delta));
|
_mm256_mul_ps(deltas, delta));
|
||||||
#else
|
#else
|
||||||
__m256 sinc = _mm256_load_ps(phase_table + i);
|
__m256 sinc = _mm256_load_ps(phase_table + i);
|
||||||
#endif
|
#endif
|
||||||
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
||||||
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* hadd on AVX is weird, and acts on low-lanes
|
/* hadd on AVX is weird, and acts on low-lanes
|
||||||
* and high-lanes separately. */
|
* and high-lanes separately. */
|
||||||
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
||||||
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
||||||
res_l = _mm256_hadd_ps(res_l, res_l);
|
res_l = _mm256_hadd_ps(res_l, res_l);
|
||||||
res_r = _mm256_hadd_ps(res_r, res_r);
|
res_r = _mm256_hadd_ps(res_r, res_r);
|
||||||
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
||||||
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
||||||
|
|
||||||
/* This is optimized to mov %xmmN, [mem].
|
/* This is optimized to mov %xmmN, [mem].
|
||||||
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
||||||
@ -335,23 +329,23 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
|
|||||||
static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
|
static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
__m128 sum_l = _mm_setzero_ps();
|
__m128 sum;
|
||||||
__m128 sum_r = _mm_setzero_ps();
|
__m128 sum_l = _mm_setzero_ps();
|
||||||
|
__m128 sum_r = _mm_setzero_ps();
|
||||||
|
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
|
||||||
unsigned taps = resamp->taps;
|
unsigned taps = resamp->taps;
|
||||||
unsigned phase = resamp->time >> SUBPHASE_BITS;
|
unsigned phase = resamp->time >> SUBPHASE_BITS;
|
||||||
#if SINC_COEFF_LERP
|
#if SINC_COEFF_LERP
|
||||||
const float *phase_table = resamp->phase_table + phase * taps * 2;
|
const float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||||
const float *delta_table = phase_table + taps;
|
const float *delta_table = phase_table + taps;
|
||||||
__m128 delta = _mm_set1_ps((float)
|
__m128 delta = _mm_set1_ps((float)
|
||||||
(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD);
|
(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD);
|
||||||
#else
|
#else
|
||||||
const float *phase_table = resamp->phase_table + phase * taps;
|
const float *phase_table = resamp->phase_table + phase * taps;
|
||||||
#endif
|
#endif
|
||||||
__m128 sum;
|
|
||||||
|
|
||||||
for (i = 0; i < taps; i += 4)
|
for (i = 0; i < taps; i += 4)
|
||||||
{
|
{
|
||||||
@ -360,13 +354,13 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
|
|||||||
|
|
||||||
#if SINC_COEFF_LERP
|
#if SINC_COEFF_LERP
|
||||||
__m128 deltas = _mm_load_ps(delta_table + i);
|
__m128 deltas = _mm_load_ps(delta_table + i);
|
||||||
__m128 _sinc = _mm_add_ps(_mm_load_ps(phase_table + i),
|
__m128 _sinc = _mm_add_ps(_mm_load_ps(phase_table + i),
|
||||||
_mm_mul_ps(deltas, delta));
|
_mm_mul_ps(deltas, delta));
|
||||||
#else
|
#else
|
||||||
__m128 _sinc = _mm_load_ps(phase_table + i);
|
__m128 _sinc = _mm_load_ps(phase_table + i);
|
||||||
#endif
|
#endif
|
||||||
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
||||||
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Them annoying shuffles.
|
/* Them annoying shuffles.
|
||||||
@ -413,11 +407,11 @@ void process_sinc_neon_asm(float *out, const float *left,
|
|||||||
static void process_sinc_neon(rarch_sinc_resampler_t *resamp,
|
static void process_sinc_neon(rarch_sinc_resampler_t *resamp,
|
||||||
float *out_buffer)
|
float *out_buffer)
|
||||||
{
|
{
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
|
||||||
unsigned phase = resamp->time >> SUBPHASE_BITS;
|
unsigned phase = resamp->time >> SUBPHASE_BITS;
|
||||||
unsigned taps = resamp->taps;
|
unsigned taps = resamp->taps;
|
||||||
const float *phase_table = resamp->phase_table + phase * taps;
|
const float *phase_table = resamp->phase_table + phase * taps;
|
||||||
|
|
||||||
process_sinc_neon_asm(out_buffer, buffer_l, buffer_r, phase_table, taps);
|
process_sinc_neon_asm(out_buffer, buffer_l, buffer_r, phase_table, taps);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user