mirror of
https://github.com/libretro/RetroArch
synced 2025-01-30 21:32:45 +00:00
Rollback sinc resampler changes - themaister wasn't sure if these were
actual optimizations
This commit is contained in:
parent
bba924e8d2
commit
1906a741c2
@ -37,12 +37,6 @@
|
|||||||
|
|
||||||
#ifdef __SSE__
|
#ifdef __SSE__
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#define MEMBER(v, i) ((v).m128_f32[(i)])
|
|
||||||
#else
|
|
||||||
#define MEMBER(v, i) ((v)[(i)])
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__AVX__)
|
#if defined(__AVX__)
|
||||||
@ -81,15 +75,16 @@ typedef struct rarch_sinc_resampler
|
|||||||
float *phase_table;
|
float *phase_table;
|
||||||
float *buffer_l;
|
float *buffer_l;
|
||||||
float *buffer_r;
|
float *buffer_r;
|
||||||
|
unsigned enable_avx;
|
||||||
unsigned phase_bits;
|
unsigned phase_bits;
|
||||||
unsigned subphase_bits;
|
unsigned subphase_bits;
|
||||||
unsigned subphase_mask;
|
unsigned subphase_mask;
|
||||||
unsigned taps;
|
unsigned taps;
|
||||||
unsigned ptr;
|
unsigned ptr;
|
||||||
unsigned num_channels;
|
|
||||||
uint32_t time;
|
uint32_t time;
|
||||||
float subphase_mod;
|
float subphase_mod;
|
||||||
float kaiser_beta;
|
float kaiser_beta;
|
||||||
|
enum sinc_window window_type;
|
||||||
} rarch_sinc_resampler_t;
|
} rarch_sinc_resampler_t;
|
||||||
|
|
||||||
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
|
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
|
||||||
@ -159,89 +154,6 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__AVX__)
|
#if defined(__AVX__)
|
||||||
static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *data)
|
|
||||||
{
|
|
||||||
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
|
||||||
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
|
|
||||||
|
|
||||||
uint32_t ratio = phases / data->ratio;
|
|
||||||
const float *input = data->data_in;
|
|
||||||
float *output = data->data_out;
|
|
||||||
size_t frames = data->input_frames;
|
|
||||||
size_t out_frames = 0;
|
|
||||||
|
|
||||||
while (frames)
|
|
||||||
{
|
|
||||||
while (frames && resamp->time >= phases)
|
|
||||||
{
|
|
||||||
/* Push in reverse to make filter more obvious. */
|
|
||||||
if (!resamp->ptr)
|
|
||||||
resamp->ptr = resamp->taps;
|
|
||||||
resamp->ptr--;
|
|
||||||
|
|
||||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_l[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_r[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->time -= phases;
|
|
||||||
frames--;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
|
||||||
unsigned taps = resamp->taps;
|
|
||||||
while (resamp->time < phases)
|
|
||||||
{
|
|
||||||
unsigned i;
|
|
||||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
|
||||||
|
|
||||||
float *phase_table = resamp->phase_table + phase * taps * 2;
|
|
||||||
float *delta_table = phase_table + taps;
|
|
||||||
__m256 delta = _mm256_set1_ps((float)
|
|
||||||
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
|
|
||||||
|
|
||||||
__m256 sum_l = _mm256_setzero_ps();
|
|
||||||
__m256 sum_r = _mm256_setzero_ps();
|
|
||||||
|
|
||||||
for (i = 0; i < taps; i += 8)
|
|
||||||
{
|
|
||||||
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
|
||||||
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
|
||||||
__m256 deltas = _mm256_load_ps(delta_table + i);
|
|
||||||
__m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i),
|
|
||||||
_mm256_mul_ps(deltas, delta));
|
|
||||||
|
|
||||||
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
|
||||||
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* hadd on AVX is weird, and acts on low-lanes
|
|
||||||
* and high-lanes separately. */
|
|
||||||
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
|
||||||
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
|
||||||
res_l = _mm256_hadd_ps(res_l, res_l);
|
|
||||||
res_r = _mm256_hadd_ps(res_r, res_r);
|
|
||||||
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
|
||||||
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
|
||||||
|
|
||||||
/* This is optimized to mov %xmmN, [mem].
|
|
||||||
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
|
||||||
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
|
|
||||||
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
|
|
||||||
|
|
||||||
output += 2;
|
|
||||||
out_frames++;
|
|
||||||
resamp->time += ratio;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
data->output_frames = out_frames;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
|
static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
|
||||||
{
|
{
|
||||||
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
||||||
@ -253,66 +165,140 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
|
|||||||
size_t frames = data->input_frames;
|
size_t frames = data->input_frames;
|
||||||
size_t out_frames = 0;
|
size_t out_frames = 0;
|
||||||
|
|
||||||
while (frames)
|
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||||
{
|
{
|
||||||
while (frames && resamp->time >= phases)
|
while (frames)
|
||||||
{
|
{
|
||||||
/* Push in reverse to make filter more obvious. */
|
while (frames && resamp->time >= phases)
|
||||||
if (!resamp->ptr)
|
|
||||||
resamp->ptr = resamp->taps;
|
|
||||||
resamp->ptr--;
|
|
||||||
|
|
||||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_l[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_r[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->time -= phases;
|
|
||||||
frames--;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
|
||||||
unsigned taps = resamp->taps;
|
|
||||||
while (resamp->time < phases)
|
|
||||||
{
|
{
|
||||||
unsigned i;
|
/* Push in reverse to make filter more obvious. */
|
||||||
__m256 delta;
|
if (!resamp->ptr)
|
||||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
resamp->ptr = resamp->taps;
|
||||||
float *phase_table = resamp->phase_table + phase * taps;
|
resamp->ptr--;
|
||||||
|
|
||||||
__m256 sum_l = _mm256_setzero_ps();
|
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||||
__m256 sum_r = _mm256_setzero_ps();
|
resamp->buffer_l[resamp->ptr] = *input++;
|
||||||
|
|
||||||
for (i = 0; i < taps; i += 8)
|
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||||
|
resamp->buffer_r[resamp->ptr] = *input++;
|
||||||
|
|
||||||
|
resamp->time -= phases;
|
||||||
|
frames--;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
unsigned taps = resamp->taps;
|
||||||
|
while (resamp->time < phases)
|
||||||
{
|
{
|
||||||
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
unsigned i;
|
||||||
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||||
__m256 sinc = _mm256_load_ps((const float*)phase_table + i);
|
|
||||||
|
|
||||||
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||||
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
float *delta_table = phase_table + taps;
|
||||||
|
__m256 delta = _mm256_set1_ps((float)
|
||||||
|
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
|
||||||
|
|
||||||
|
__m256 sum_l = _mm256_setzero_ps();
|
||||||
|
__m256 sum_r = _mm256_setzero_ps();
|
||||||
|
|
||||||
|
for (i = 0; i < taps; i += 8)
|
||||||
|
{
|
||||||
|
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
||||||
|
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
||||||
|
__m256 deltas = _mm256_load_ps(delta_table + i);
|
||||||
|
__m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i),
|
||||||
|
_mm256_mul_ps(deltas, delta));
|
||||||
|
|
||||||
|
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
||||||
|
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hadd on AVX is weird, and acts on low-lanes
|
||||||
|
* and high-lanes separately. */
|
||||||
|
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
||||||
|
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
||||||
|
res_l = _mm256_hadd_ps(res_l, res_l);
|
||||||
|
res_r = _mm256_hadd_ps(res_r, res_r);
|
||||||
|
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
||||||
|
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
||||||
|
|
||||||
|
/* This is optimized to mov %xmmN, [mem].
|
||||||
|
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
||||||
|
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
|
||||||
|
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
|
||||||
|
|
||||||
|
output += 2;
|
||||||
|
out_frames++;
|
||||||
|
resamp->time += ratio;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (frames)
|
||||||
|
{
|
||||||
|
while (frames && resamp->time >= phases)
|
||||||
|
{
|
||||||
|
/* Push in reverse to make filter more obvious. */
|
||||||
|
if (!resamp->ptr)
|
||||||
|
resamp->ptr = resamp->taps;
|
||||||
|
resamp->ptr--;
|
||||||
|
|
||||||
/* hadd on AVX is weird, and acts on low-lanes
|
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||||
* and high-lanes separately. */
|
resamp->buffer_l[resamp->ptr] = *input++;
|
||||||
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
|
||||||
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
|
||||||
res_l = _mm256_hadd_ps(res_l, res_l);
|
|
||||||
res_r = _mm256_hadd_ps(res_r, res_r);
|
|
||||||
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
|
||||||
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
|
||||||
|
|
||||||
/* This is optimized to mov %xmmN, [mem].
|
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||||
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
resamp->buffer_r[resamp->ptr] = *input++;
|
||||||
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
|
|
||||||
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
|
|
||||||
|
|
||||||
output += 2;
|
resamp->time -= phases;
|
||||||
out_frames++;
|
frames--;
|
||||||
resamp->time += ratio;
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
unsigned taps = resamp->taps;
|
||||||
|
while (resamp->time < phases)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
__m256 delta;
|
||||||
|
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||||
|
float *phase_table = resamp->phase_table + phase * taps;
|
||||||
|
|
||||||
|
__m256 sum_l = _mm256_setzero_ps();
|
||||||
|
__m256 sum_r = _mm256_setzero_ps();
|
||||||
|
|
||||||
|
for (i = 0; i < taps; i += 8)
|
||||||
|
{
|
||||||
|
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
|
||||||
|
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
|
||||||
|
__m256 sinc = _mm256_load_ps((const float*)phase_table + i);
|
||||||
|
|
||||||
|
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
|
||||||
|
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hadd on AVX is weird, and acts on low-lanes
|
||||||
|
* and high-lanes separately. */
|
||||||
|
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
|
||||||
|
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
|
||||||
|
res_l = _mm256_hadd_ps(res_l, res_l);
|
||||||
|
res_r = _mm256_hadd_ps(res_r, res_r);
|
||||||
|
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
|
||||||
|
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
|
||||||
|
|
||||||
|
/* This is optimized to mov %xmmN, [mem].
|
||||||
|
* There doesn't seem to be any _mm256_store_ss intrinsic. */
|
||||||
|
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
|
||||||
|
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
|
||||||
|
|
||||||
|
output += 2;
|
||||||
|
out_frames++;
|
||||||
|
resamp->time += ratio;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -322,75 +308,6 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SSE__)
|
#if defined(__SSE__)
|
||||||
static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *data)
|
|
||||||
{
|
|
||||||
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
|
||||||
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
|
|
||||||
|
|
||||||
uint32_t ratio = phases / data->ratio;
|
|
||||||
const float *input = data->data_in;
|
|
||||||
float *output = data->data_out;
|
|
||||||
size_t frames = data->input_frames;
|
|
||||||
size_t out_frames = 0;
|
|
||||||
|
|
||||||
while (frames)
|
|
||||||
{
|
|
||||||
while (frames && resamp->time >= phases)
|
|
||||||
{
|
|
||||||
/* Push in reverse to make filter more obvious. */
|
|
||||||
if (!resamp->ptr)
|
|
||||||
resamp->ptr = resamp->taps;
|
|
||||||
resamp->ptr--;
|
|
||||||
|
|
||||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_l[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_r[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->time -= phases;
|
|
||||||
frames--;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
|
||||||
unsigned taps = resamp->taps;
|
|
||||||
while (resamp->time < phases)
|
|
||||||
{
|
|
||||||
unsigned i;
|
|
||||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
|
||||||
float *phase_table = resamp->phase_table + phase * taps * 2;
|
|
||||||
float *delta_table = phase_table + taps;
|
|
||||||
__m128 delta = _mm_set1_ps((float)
|
|
||||||
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
|
|
||||||
|
|
||||||
__m128 sum_l = _mm_setzero_ps();
|
|
||||||
__m128 sum_r = _mm_setzero_ps();
|
|
||||||
|
|
||||||
for (i = 0; i < taps; i += 4)
|
|
||||||
{
|
|
||||||
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
|
|
||||||
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
|
|
||||||
__m128 deltas = _mm_load_ps(delta_table + i);
|
|
||||||
__m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i),
|
|
||||||
_mm_mul_ps(deltas, delta));
|
|
||||||
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
|
||||||
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
|
||||||
}
|
|
||||||
|
|
||||||
*(output++) = _mm_cvtss_f32(sum_l) + MEMBER(sum_l, 1) + MEMBER(sum_l, 2) + MEMBER(sum_l, 3);
|
|
||||||
*(output++) = _mm_cvtss_f32(sum_r) + MEMBER(sum_r, 1) + MEMBER(sum_r, 2) + MEMBER(sum_r, 3);
|
|
||||||
|
|
||||||
out_frames++;
|
|
||||||
resamp->time += ratio;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
data->output_frames = out_frames;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
|
static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
|
||||||
{
|
{
|
||||||
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
||||||
@ -402,52 +319,160 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
|
|||||||
size_t frames = data->input_frames;
|
size_t frames = data->input_frames;
|
||||||
size_t out_frames = 0;
|
size_t out_frames = 0;
|
||||||
|
|
||||||
while (frames)
|
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||||
{
|
{
|
||||||
while (frames && resamp->time >= phases)
|
while (frames)
|
||||||
{
|
{
|
||||||
/* Push in reverse to make filter more obvious. */
|
while (frames && resamp->time >= phases)
|
||||||
if (!resamp->ptr)
|
|
||||||
resamp->ptr = resamp->taps;
|
|
||||||
resamp->ptr--;
|
|
||||||
|
|
||||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_l[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_r[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->time -= phases;
|
|
||||||
frames--;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
|
||||||
unsigned taps = resamp->taps;
|
|
||||||
while (resamp->time < phases)
|
|
||||||
{
|
{
|
||||||
unsigned i;
|
/* Push in reverse to make filter more obvious. */
|
||||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
if (!resamp->ptr)
|
||||||
float *phase_table = resamp->phase_table + phase * taps;
|
resamp->ptr = resamp->taps;
|
||||||
|
resamp->ptr--;
|
||||||
|
|
||||||
__m128 sum_l = _mm_setzero_ps();
|
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||||
__m128 sum_r = _mm_setzero_ps();
|
resamp->buffer_l[resamp->ptr] = *input++;
|
||||||
|
|
||||||
for (i = 0; i < taps; i += 4)
|
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||||
|
resamp->buffer_r[resamp->ptr] = *input++;
|
||||||
|
|
||||||
|
resamp->time -= phases;
|
||||||
|
frames--;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
unsigned taps = resamp->taps;
|
||||||
|
while (resamp->time < phases)
|
||||||
{
|
{
|
||||||
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
|
unsigned i;
|
||||||
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
|
__m128 sum;
|
||||||
__m128 _sinc = _mm_load_ps((const float*)phase_table + i);
|
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||||
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||||
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
float *delta_table = phase_table + taps;
|
||||||
|
__m128 delta = _mm_set1_ps((float)
|
||||||
|
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
|
||||||
|
|
||||||
|
__m128 sum_l = _mm_setzero_ps();
|
||||||
|
__m128 sum_r = _mm_setzero_ps();
|
||||||
|
|
||||||
|
for (i = 0; i < taps; i += 4)
|
||||||
|
{
|
||||||
|
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
|
||||||
|
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
|
||||||
|
__m128 deltas = _mm_load_ps(delta_table + i);
|
||||||
|
__m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i),
|
||||||
|
_mm_mul_ps(deltas, delta));
|
||||||
|
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
||||||
|
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Them annoying shuffles.
|
||||||
|
* sum_l = { l3, l2, l1, l0 }
|
||||||
|
* sum_r = { r3, r2, r1, r0 }
|
||||||
|
*/
|
||||||
|
|
||||||
|
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
|
||||||
|
_MM_SHUFFLE(1, 0, 1, 0)),
|
||||||
|
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||||
|
|
||||||
|
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
|
||||||
|
* sum = { R1, R0, L1, L0 }
|
||||||
|
*/
|
||||||
|
|
||||||
|
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
|
||||||
|
|
||||||
|
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
|
||||||
|
* sum = { X, R, X, L }
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Store L */
|
||||||
|
_mm_store_ss(output + 0, sum);
|
||||||
|
|
||||||
|
/* movehl { X, R, X, L } == { X, R, X, R } */
|
||||||
|
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
|
||||||
|
|
||||||
|
output += 2;
|
||||||
|
out_frames++;
|
||||||
|
resamp->time += ratio;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (frames)
|
||||||
|
{
|
||||||
|
while (frames && resamp->time >= phases)
|
||||||
|
{
|
||||||
|
/* Push in reverse to make filter more obvious. */
|
||||||
|
if (!resamp->ptr)
|
||||||
|
resamp->ptr = resamp->taps;
|
||||||
|
resamp->ptr--;
|
||||||
|
|
||||||
*(output++) = _mm_cvtss_f32(sum_l) + MEMBER(sum_l, 1) + MEMBER(sum_l, 2) + MEMBER(sum_l, 3);
|
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||||
*(output++) = _mm_cvtss_f32(sum_r) + MEMBER(sum_r, 1) + MEMBER(sum_r, 2) + MEMBER(sum_r, 3);
|
resamp->buffer_l[resamp->ptr] = *input++;
|
||||||
|
|
||||||
out_frames++;
|
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||||
resamp->time += ratio;
|
resamp->buffer_r[resamp->ptr] = *input++;
|
||||||
|
|
||||||
|
resamp->time -= phases;
|
||||||
|
frames--;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
unsigned taps = resamp->taps;
|
||||||
|
while (resamp->time < phases)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
__m128 sum;
|
||||||
|
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||||
|
float *phase_table = resamp->phase_table + phase * taps;
|
||||||
|
|
||||||
|
__m128 sum_l = _mm_setzero_ps();
|
||||||
|
__m128 sum_r = _mm_setzero_ps();
|
||||||
|
|
||||||
|
for (i = 0; i < taps; i += 4)
|
||||||
|
{
|
||||||
|
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
|
||||||
|
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
|
||||||
|
__m128 _sinc = _mm_load_ps((const float*)phase_table + i);
|
||||||
|
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
|
||||||
|
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Them annoying shuffles.
|
||||||
|
* sum_l = { l3, l2, l1, l0 }
|
||||||
|
* sum_r = { r3, r2, r1, r0 }
|
||||||
|
*/
|
||||||
|
|
||||||
|
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
|
||||||
|
_MM_SHUFFLE(1, 0, 1, 0)),
|
||||||
|
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||||
|
|
||||||
|
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
|
||||||
|
* sum = { R1, R0, L1, L0 }
|
||||||
|
*/
|
||||||
|
|
||||||
|
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
|
||||||
|
|
||||||
|
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
|
||||||
|
* sum = { X, R, X, L }
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Store L */
|
||||||
|
_mm_store_ss(output + 0, sum);
|
||||||
|
|
||||||
|
/* movehl { X, R, X, L } == { X, R, X, R } */
|
||||||
|
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
|
||||||
|
|
||||||
|
output += 2;
|
||||||
|
out_frames++;
|
||||||
|
resamp->time += ratio;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -456,72 +481,6 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *data)
|
|
||||||
{
|
|
||||||
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
|
||||||
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
|
|
||||||
|
|
||||||
uint32_t ratio = phases / data->ratio;
|
|
||||||
const float *input = data->data_in;
|
|
||||||
float *output = data->data_out;
|
|
||||||
size_t frames = data->input_frames;
|
|
||||||
size_t out_frames = 0;
|
|
||||||
|
|
||||||
while (frames)
|
|
||||||
{
|
|
||||||
while (frames && resamp->time >= phases)
|
|
||||||
{
|
|
||||||
/* Push in reverse to make filter more obvious. */
|
|
||||||
if (!resamp->ptr)
|
|
||||||
resamp->ptr = resamp->taps;
|
|
||||||
resamp->ptr--;
|
|
||||||
|
|
||||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_l[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_r[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->time -= phases;
|
|
||||||
frames--;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
|
||||||
unsigned taps = resamp->taps;
|
|
||||||
while (resamp->time < phases)
|
|
||||||
{
|
|
||||||
unsigned i;
|
|
||||||
float sum_l = 0.0f;
|
|
||||||
float sum_r = 0.0f;
|
|
||||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
|
||||||
float *phase_table = resamp->phase_table + phase * taps * 2;
|
|
||||||
float *delta_table = phase_table + taps;
|
|
||||||
float delta = (float)
|
|
||||||
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod;
|
|
||||||
|
|
||||||
for (i = 0; i < taps; i++)
|
|
||||||
{
|
|
||||||
float sinc_val = phase_table[i] + delta_table[i] * delta;
|
|
||||||
|
|
||||||
sum_l += buffer_l[i] * sinc_val;
|
|
||||||
sum_r += buffer_r[i] * sinc_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
*output++ = sum_l;
|
|
||||||
*output++ = sum_r;
|
|
||||||
|
|
||||||
out_frames++;
|
|
||||||
resamp->time += ratio;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
data->output_frames = out_frames;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
|
static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
|
||||||
{
|
{
|
||||||
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
|
||||||
@ -533,53 +492,112 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
|
|||||||
size_t frames = data->input_frames;
|
size_t frames = data->input_frames;
|
||||||
size_t out_frames = 0;
|
size_t out_frames = 0;
|
||||||
|
|
||||||
while (frames)
|
if (resamp->window_type == SINC_WINDOW_KAISER)
|
||||||
{
|
{
|
||||||
while (frames && resamp->time >= phases)
|
while (frames)
|
||||||
{
|
{
|
||||||
/* Push in reverse to make filter more obvious. */
|
while (frames && resamp->time >= phases)
|
||||||
if (!resamp->ptr)
|
|
||||||
resamp->ptr = resamp->taps;
|
|
||||||
resamp->ptr--;
|
|
||||||
|
|
||||||
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_l[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
|
||||||
resamp->buffer_r[resamp->ptr] = *input++;
|
|
||||||
|
|
||||||
resamp->time -= phases;
|
|
||||||
frames--;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
|
||||||
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
|
||||||
unsigned taps = resamp->taps;
|
|
||||||
while (resamp->time < phases)
|
|
||||||
{
|
{
|
||||||
unsigned i;
|
/* Push in reverse to make filter more obvious. */
|
||||||
float sum_l = 0.0f;
|
if (!resamp->ptr)
|
||||||
float sum_r = 0.0f;
|
resamp->ptr = resamp->taps;
|
||||||
unsigned phase = resamp->time >> resamp->subphase_bits;
|
resamp->ptr--;
|
||||||
float *phase_table = resamp->phase_table + phase * taps;
|
|
||||||
|
|
||||||
for (i = 0; i < taps; i++)
|
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||||
{
|
resamp->buffer_l[resamp->ptr] = *input++;
|
||||||
float sinc_val = phase_table[i];
|
|
||||||
|
|
||||||
sum_l += buffer_l[i] * sinc_val;
|
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||||
sum_r += buffer_r[i] * sinc_val;
|
resamp->buffer_r[resamp->ptr] = *input++;
|
||||||
}
|
|
||||||
|
|
||||||
*output++ = sum_l;
|
resamp->time -= phases;
|
||||||
*output++ = sum_r;
|
frames--;
|
||||||
|
|
||||||
out_frames++;
|
|
||||||
resamp->time += ratio;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
{
|
||||||
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
unsigned taps = resamp->taps;
|
||||||
|
while (resamp->time < phases)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
float sum_l = 0.0f;
|
||||||
|
float sum_r = 0.0f;
|
||||||
|
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||||
|
float *phase_table = resamp->phase_table + phase * taps * 2;
|
||||||
|
float *delta_table = phase_table + taps;
|
||||||
|
float delta = (float)
|
||||||
|
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod;
|
||||||
|
|
||||||
|
for (i = 0; i < taps; i++)
|
||||||
|
{
|
||||||
|
float sinc_val = phase_table[i] + delta_table[i] * delta;
|
||||||
|
|
||||||
|
sum_l += buffer_l[i] * sinc_val;
|
||||||
|
sum_r += buffer_r[i] * sinc_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
output[0] = sum_l;
|
||||||
|
output[1] = sum_r;
|
||||||
|
|
||||||
|
output += 2;
|
||||||
|
out_frames++;
|
||||||
|
resamp->time += ratio;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (frames)
|
||||||
|
{
|
||||||
|
while (frames && resamp->time >= phases)
|
||||||
|
{
|
||||||
|
/* Push in reverse to make filter more obvious. */
|
||||||
|
if (!resamp->ptr)
|
||||||
|
resamp->ptr = resamp->taps;
|
||||||
|
resamp->ptr--;
|
||||||
|
|
||||||
|
resamp->buffer_l[resamp->ptr + resamp->taps] =
|
||||||
|
resamp->buffer_l[resamp->ptr] = *input++;
|
||||||
|
|
||||||
|
resamp->buffer_r[resamp->ptr + resamp->taps] =
|
||||||
|
resamp->buffer_r[resamp->ptr] = *input++;
|
||||||
|
|
||||||
|
resamp->time -= phases;
|
||||||
|
frames--;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const float *buffer_l = resamp->buffer_l + resamp->ptr;
|
||||||
|
const float *buffer_r = resamp->buffer_r + resamp->ptr;
|
||||||
|
unsigned taps = resamp->taps;
|
||||||
|
while (resamp->time < phases)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
float sum_l = 0.0f;
|
||||||
|
float sum_r = 0.0f;
|
||||||
|
unsigned phase = resamp->time >> resamp->subphase_bits;
|
||||||
|
float *phase_table = resamp->phase_table + phase * taps;
|
||||||
|
|
||||||
|
for (i = 0; i < taps; i++)
|
||||||
|
{
|
||||||
|
float sinc_val = phase_table[i];
|
||||||
|
|
||||||
|
sum_l += buffer_l[i] * sinc_val;
|
||||||
|
sum_r += buffer_r[i] * sinc_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
output[0] = sum_l;
|
||||||
|
output[1] = sum_r;
|
||||||
|
|
||||||
|
output += 2;
|
||||||
|
out_frames++;
|
||||||
|
resamp->time += ratio;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data->output_frames = out_frames;
|
data->output_frames = out_frames;
|
||||||
@ -717,14 +735,14 @@ static void *resampler_sinc_new(const struct resampler_config *config,
|
|||||||
size_t phase_elems = 0;
|
size_t phase_elems = 0;
|
||||||
size_t elems = 0;
|
size_t elems = 0;
|
||||||
unsigned sidelobes = 0;
|
unsigned sidelobes = 0;
|
||||||
unsigned enable_avx = 0;
|
|
||||||
enum sinc_window window_type = SINC_WINDOW_NONE;
|
|
||||||
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)
|
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)
|
||||||
calloc(1, sizeof(*re));
|
calloc(1, sizeof(*re));
|
||||||
|
|
||||||
if (!re)
|
if (!re)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
re->window_type = SINC_WINDOW_NONE;
|
||||||
|
|
||||||
switch (quality)
|
switch (quality)
|
||||||
{
|
{
|
||||||
case RESAMPLER_QUALITY_LOWEST:
|
case RESAMPLER_QUALITY_LOWEST:
|
||||||
@ -732,32 +750,34 @@ static void *resampler_sinc_new(const struct resampler_config *config,
|
|||||||
sidelobes = 2;
|
sidelobes = 2;
|
||||||
re->phase_bits = 12;
|
re->phase_bits = 12;
|
||||||
re->subphase_bits = 10;
|
re->subphase_bits = 10;
|
||||||
window_type = SINC_WINDOW_LANCZOS;
|
re->window_type = SINC_WINDOW_LANCZOS;
|
||||||
|
re->enable_avx = 0;
|
||||||
break;
|
break;
|
||||||
case RESAMPLER_QUALITY_LOWER:
|
case RESAMPLER_QUALITY_LOWER:
|
||||||
cutoff = 0.98;
|
cutoff = 0.98;
|
||||||
sidelobes = 4;
|
sidelobes = 4;
|
||||||
re->phase_bits = 12;
|
re->phase_bits = 12;
|
||||||
re->subphase_bits = 10;
|
re->subphase_bits = 10;
|
||||||
window_type = SINC_WINDOW_LANCZOS;
|
re->window_type = SINC_WINDOW_LANCZOS;
|
||||||
|
re->enable_avx = 0;
|
||||||
break;
|
break;
|
||||||
case RESAMPLER_QUALITY_HIGHER:
|
case RESAMPLER_QUALITY_HIGHER:
|
||||||
cutoff = 0.90;
|
cutoff = 0.90;
|
||||||
sidelobes = 32;
|
sidelobes = 32;
|
||||||
re->phase_bits = 10;
|
re->phase_bits = 10;
|
||||||
re->subphase_bits = 14;
|
re->subphase_bits = 14;
|
||||||
|
re->window_type = SINC_WINDOW_KAISER;
|
||||||
re->kaiser_beta = 10.5;
|
re->kaiser_beta = 10.5;
|
||||||
enable_avx = 1;
|
re->enable_avx = 1;
|
||||||
window_type = SINC_WINDOW_KAISER;
|
|
||||||
break;
|
break;
|
||||||
case RESAMPLER_QUALITY_HIGHEST:
|
case RESAMPLER_QUALITY_HIGHEST:
|
||||||
cutoff = 0.962;
|
cutoff = 0.962;
|
||||||
sidelobes = 128;
|
sidelobes = 128;
|
||||||
re->phase_bits = 10;
|
re->phase_bits = 10;
|
||||||
re->subphase_bits = 14;
|
re->subphase_bits = 14;
|
||||||
|
re->window_type = SINC_WINDOW_KAISER;
|
||||||
re->kaiser_beta = 14.5;
|
re->kaiser_beta = 14.5;
|
||||||
enable_avx = 1;
|
re->enable_avx = 1;
|
||||||
window_type = SINC_WINDOW_KAISER;
|
|
||||||
break;
|
break;
|
||||||
case RESAMPLER_QUALITY_NORMAL:
|
case RESAMPLER_QUALITY_NORMAL:
|
||||||
case RESAMPLER_QUALITY_DONTCARE:
|
case RESAMPLER_QUALITY_DONTCARE:
|
||||||
@ -765,14 +785,14 @@ static void *resampler_sinc_new(const struct resampler_config *config,
|
|||||||
sidelobes = 8;
|
sidelobes = 8;
|
||||||
re->phase_bits = 8;
|
re->phase_bits = 8;
|
||||||
re->subphase_bits = 16;
|
re->subphase_bits = 16;
|
||||||
|
re->window_type = SINC_WINDOW_KAISER;
|
||||||
re->kaiser_beta = 5.5;
|
re->kaiser_beta = 5.5;
|
||||||
window_type = SINC_WINDOW_KAISER;
|
re->enable_avx = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
re->subphase_mask = (1 << re->subphase_bits) - 1;
|
re->subphase_mask = (1 << re->subphase_bits) - 1;
|
||||||
re->subphase_mod = 1.0f / (1 << re->subphase_bits);
|
re->subphase_mod = 1.0f / (1 << re->subphase_bits);
|
||||||
re->num_channels = 2;
|
|
||||||
re->taps = sidelobes * 2;
|
re->taps = sidelobes * 2;
|
||||||
|
|
||||||
/* Downsampling, must lower cutoff, and extend number of
|
/* Downsampling, must lower cutoff, and extend number of
|
||||||
@ -785,7 +805,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
|
|||||||
|
|
||||||
/* Be SIMD-friendly. */
|
/* Be SIMD-friendly. */
|
||||||
#if defined(__AVX__)
|
#if defined(__AVX__)
|
||||||
if (enable_avx)
|
if (re->enable_avx)
|
||||||
re->taps = (re->taps + 7) & ~7;
|
re->taps = (re->taps + 7) & ~7;
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
@ -798,7 +818,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
|
|||||||
}
|
}
|
||||||
|
|
||||||
phase_elems = ((1 << re->phase_bits) * re->taps);
|
phase_elems = ((1 << re->phase_bits) * re->taps);
|
||||||
if (window_type == SINC_WINDOW_KAISER)
|
if (re->window_type == SINC_WINDOW_KAISER)
|
||||||
phase_elems = phase_elems * 2;
|
phase_elems = phase_elems * 2;
|
||||||
elems = phase_elems + 4 * re->taps;
|
elems = phase_elems + 4 * re->taps;
|
||||||
|
|
||||||
@ -812,7 +832,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
|
|||||||
re->buffer_l = re->main_buffer + phase_elems;
|
re->buffer_l = re->main_buffer + phase_elems;
|
||||||
re->buffer_r = re->buffer_l + 2 * re->taps;
|
re->buffer_r = re->buffer_l + 2 * re->taps;
|
||||||
|
|
||||||
switch (window_type)
|
switch (re->window_type)
|
||||||
{
|
{
|
||||||
case SINC_WINDOW_LANCZOS:
|
case SINC_WINDOW_LANCZOS:
|
||||||
sinc_init_table_lanczos(re, cutoff, re->phase_table,
|
sinc_init_table_lanczos(re, cutoff, re->phase_table,
|
||||||
@ -826,30 +846,24 @@ static void *resampler_sinc_new(const struct resampler_config *config,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
sinc_resampler.process = resampler_sinc_process_c;
|
sinc_resampler.process = resampler_sinc_process_c;
|
||||||
if (window_type == SINC_WINDOW_KAISER)
|
|
||||||
sinc_resampler.process = resampler_sinc_process_c_kaiser;
|
|
||||||
|
|
||||||
if (mask & RESAMPLER_SIMD_AVX && enable_avx)
|
if (mask & RESAMPLER_SIMD_AVX && re->enable_avx)
|
||||||
{
|
{
|
||||||
#if defined(__AVX__)
|
#if defined(__AVX__)
|
||||||
sinc_resampler.process = resampler_sinc_process_avx;
|
sinc_resampler.process = resampler_sinc_process_avx;
|
||||||
if (window_type == SINC_WINDOW_KAISER)
|
|
||||||
sinc_resampler.process = resampler_sinc_process_avx_kaiser;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else if (mask & RESAMPLER_SIMD_SSE)
|
else if (mask & RESAMPLER_SIMD_SSE)
|
||||||
{
|
{
|
||||||
#if defined(__SSE__)
|
#if defined(__SSE__)
|
||||||
sinc_resampler.process = resampler_sinc_process_sse;
|
sinc_resampler.process = resampler_sinc_process_sse;
|
||||||
if (window_type == SINC_WINDOW_KAISER)
|
|
||||||
sinc_resampler.process = resampler_sinc_process_sse_kaiser;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else if (mask & RESAMPLER_SIMD_NEON && window_type != SINC_WINDOW_KAISER)
|
else if (mask & RESAMPLER_SIMD_NEON && re->window_type != SINC_WINDOW_KAISER)
|
||||||
{
|
{
|
||||||
#if defined(WANT_NEON)
|
#if defined(WANT_NEON)
|
||||||
sinc_resampler.process = resampler_sinc_process_neon;
|
sinc_resampler.process = resampler_sinc_process_neon;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user