Rollback changes for now until we can get this compiling on MSVC 2005 on

the commandline (it builds with the VS solution file but not CLI)
This commit is contained in:
twinaphex 2021-06-09 16:39:43 +02:00
parent dc50ff584e
commit 08481e2a68

View File

@ -75,15 +75,16 @@ typedef struct rarch_sinc_resampler
float *phase_table;
float *buffer_l;
float *buffer_r;
unsigned enable_avx;
unsigned phase_bits;
unsigned subphase_bits;
unsigned subphase_mask;
unsigned taps;
unsigned ptr;
unsigned num_channels;
uint32_t time;
float subphase_mod;
float kaiser_beta;
enum sinc_window window_type;
} rarch_sinc_resampler_t;
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
@ -153,7 +154,7 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
#endif
#if defined(__AVX__)
static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *data)
static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
@ -164,6 +165,8 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
size_t frames = data->input_frames;
size_t out_frames = 0;
if (resamp->window_type == SINC_WINDOW_KAISER)
{
while (frames)
{
while (frames && resamp->time >= phases)
@ -232,21 +235,9 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
}
}
}
data->output_frames = out_frames;
}
static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
else
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
while (frames)
{
while (frames && resamp->time >= phases)
@ -310,13 +301,14 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
}
}
}
}
data->output_frames = out_frames;
}
#endif
#if defined(__SSE__)
static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *data)
static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
@ -327,6 +319,8 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
size_t frames = data->input_frames;
size_t out_frames = 0;
if (resamp->window_type == SINC_WINDOW_KAISER)
{
while (frames)
{
while (frames && resamp->time >= phases)
@ -353,9 +347,7 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
while (resamp->time < phases)
{
unsigned i;
#if 0
__m128 sum;
#endif
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
@ -376,55 +368,40 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
}
#ifdef HAVE_GRIFFIN
/* Them annoying shuffles.
* sum_l = { l3, l2, l1, l0 }
* sum_r = { r3, r2, r1, r0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
* sum = { X, R, X, L }
*/
/* Store L */
_mm_store_ss(output++, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output++, _mm_movehl_ps(sum, sum));
#else
#ifdef _MSC_VER
*(output++) = _mm_cvtss_f32(sum_l) + sum_l.m128_f32[1] + sum_l.m128_f32[2] + sum_l.m128_f32[3];
*(output++) = _mm_cvtss_f32(sum_r) + sum_r.m128_f32[1] + sum_r.m128_f32[2] + sum_r.m128_f32[3];
#else
*(output++) = _mm_cvtss_f32(sum_l) + sum_l[1] + sum_l[2] + sum_l[3];
*(output++) = _mm_cvtss_f32(sum_r) + sum_r[1] + sum_r[2] + sum_r[3];
#endif
#endif
/* Store L */
_mm_store_ss(output + 0, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
data->output_frames = out_frames;
}
static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
else
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
while (frames)
{
while (frames && resamp->time >= phases)
@ -451,9 +428,7 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
while (resamp->time < phases)
{
unsigned i;
#if 0
__m128 sum;
#endif
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
@ -469,46 +444,44 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
}
#if 0
/* Them annoying shuffles.
* sum_l = { l3, l2, l1, l0 }
* sum_r = { r3, r2, r1, r0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
* sum = { X, R, X, L }
*/
/* Store L */
_mm_store_ss(output++, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output++, _mm_movehl_ps(sum, sum));
#else
#ifdef _MSC_VER
*(output++) = _mm_cvtss_f32(sum_l) + sum_l.m128_f32[1] + sum_l.m128_f32[2] + sum_l.m128_f32[3];
*(output++) = _mm_cvtss_f32(sum_r) + sum_r.m128_f32[1] + sum_r.m128_f32[2] + sum_r.m128_f32[3];
#else
*(output++) = _mm_cvtss_f32(sum_l) + sum_l[1] + sum_l[2] + sum_l[3];
*(output++) = _mm_cvtss_f32(sum_r) + sum_r[1] + sum_r[2] + sum_r[3];
#endif
#endif
/* Store L */
_mm_store_ss(output + 0, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
}
data->output_frames = out_frames;
}
#endif
static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *data)
static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
@ -519,6 +492,8 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
size_t frames = data->input_frames;
size_t out_frames = 0;
if (resamp->window_type == SINC_WINDOW_KAISER)
{
while (frames)
{
while (frames && resamp->time >= phases)
@ -561,30 +536,19 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
sum_r += buffer_r[i] * sinc_val;
}
*output++ = sum_l;
*output++ = sum_r;
output[0] = sum_l;
output[1] = sum_r;
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
data->output_frames = out_frames;
}
static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
else
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
while (frames)
{
while (frames && resamp->time >= phases)
@ -624,15 +588,17 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
sum_r += buffer_r[i] * sinc_val;
}
*output++ = sum_l;
*output++ = sum_r;
output[0] = sum_l;
output[1] = sum_r;
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
}
data->output_frames = out_frames;
}
@ -769,14 +735,14 @@ static void *resampler_sinc_new(const struct resampler_config *config,
size_t phase_elems = 0;
size_t elems = 0;
unsigned sidelobes = 0;
unsigned enable_avx = 0;
enum sinc_window window_type = SINC_WINDOW_NONE;
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)
calloc(1, sizeof(*re));
if (!re)
return NULL;
re->window_type = SINC_WINDOW_NONE;
switch (quality)
{
case RESAMPLER_QUALITY_LOWEST:
@ -784,32 +750,34 @@ static void *resampler_sinc_new(const struct resampler_config *config,
sidelobes = 2;
re->phase_bits = 12;
re->subphase_bits = 10;
window_type = SINC_WINDOW_LANCZOS;
re->window_type = SINC_WINDOW_LANCZOS;
re->enable_avx = 0;
break;
case RESAMPLER_QUALITY_LOWER:
cutoff = 0.98;
sidelobes = 4;
re->phase_bits = 12;
re->subphase_bits = 10;
window_type = SINC_WINDOW_LANCZOS;
re->window_type = SINC_WINDOW_LANCZOS;
re->enable_avx = 0;
break;
case RESAMPLER_QUALITY_HIGHER:
cutoff = 0.90;
sidelobes = 32;
re->phase_bits = 10;
re->subphase_bits = 14;
re->window_type = SINC_WINDOW_KAISER;
re->kaiser_beta = 10.5;
enable_avx = 1;
window_type = SINC_WINDOW_KAISER;
re->enable_avx = 1;
break;
case RESAMPLER_QUALITY_HIGHEST:
cutoff = 0.962;
sidelobes = 128;
re->phase_bits = 10;
re->subphase_bits = 14;
re->window_type = SINC_WINDOW_KAISER;
re->kaiser_beta = 14.5;
enable_avx = 1;
window_type = SINC_WINDOW_KAISER;
re->enable_avx = 1;
break;
case RESAMPLER_QUALITY_NORMAL:
case RESAMPLER_QUALITY_DONTCARE:
@ -817,14 +785,14 @@ static void *resampler_sinc_new(const struct resampler_config *config,
sidelobes = 8;
re->phase_bits = 8;
re->subphase_bits = 16;
re->window_type = SINC_WINDOW_KAISER;
re->kaiser_beta = 5.5;
window_type = SINC_WINDOW_KAISER;
re->enable_avx = 0;
break;
}
re->subphase_mask = (1 << re->subphase_bits) - 1;
re->subphase_mod = 1.0f / (1 << re->subphase_bits);
re->num_channels = 2;
re->taps = sidelobes * 2;
/* Downsampling, must lower cutoff, and extend number of
@ -837,7 +805,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
/* Be SIMD-friendly. */
#if defined(__AVX__)
if (enable_avx)
if (re->enable_avx)
re->taps = (re->taps + 7) & ~7;
else
#endif
@ -850,7 +818,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
}
phase_elems = ((1 << re->phase_bits) * re->taps);
if (window_type == SINC_WINDOW_KAISER)
if (re->window_type == SINC_WINDOW_KAISER)
phase_elems = phase_elems * 2;
elems = phase_elems + 4 * re->taps;
@ -864,7 +832,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
re->buffer_l = re->main_buffer + phase_elems;
re->buffer_r = re->buffer_l + 2 * re->taps;
switch (window_type)
switch (re->window_type)
{
case SINC_WINDOW_LANCZOS:
sinc_init_table_lanczos(re, cutoff, re->phase_table,
@ -879,26 +847,20 @@ static void *resampler_sinc_new(const struct resampler_config *config,
}
sinc_resampler.process = resampler_sinc_process_c;
if (window_type == SINC_WINDOW_KAISER)
sinc_resampler.process = resampler_sinc_process_c_kaiser;
if (mask & RESAMPLER_SIMD_AVX && enable_avx)
if (mask & RESAMPLER_SIMD_AVX && re->enable_avx)
{
#if defined(__AVX__)
sinc_resampler.process = resampler_sinc_process_avx;
if (window_type == SINC_WINDOW_KAISER)
sinc_resampler.process = resampler_sinc_process_avx_kaiser;
#endif
}
else if (mask & RESAMPLER_SIMD_SSE)
{
#if defined(__SSE__)
sinc_resampler.process = resampler_sinc_process_sse;
if (window_type == SINC_WINDOW_KAISER)
sinc_resampler.process = resampler_sinc_process_sse_kaiser;
#endif
}
else if (mask & RESAMPLER_SIMD_NEON && window_type != SINC_WINDOW_KAISER)
else if (mask & RESAMPLER_SIMD_NEON && re->window_type != SINC_WINDOW_KAISER)
{
#if defined(WANT_NEON)
sinc_resampler.process = resampler_sinc_process_neon;