Stylistic cleanups in CC resampler.

This commit is contained in:
Themaister 2014-03-23 14:14:42 +01:00
parent 0c57a1726b
commit 4d9ff7d147
2 changed files with 164 additions and 199 deletions

View File

@ -1,10 +1,21 @@
/*
* Convoluted Cosine Resampler
* Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* licence: GPLv3
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/
// Convoluted Cosine Resampler
#include "resampler.h"
#include "../libretro.h"
#include "../performance.h"
@ -19,206 +30,162 @@
#define RARCH_LOG(...) fprintf(stderr, __VA_ARGS__)
#endif
typedef struct audio_frame_float
{
float l;
float r;
} audio_frame_float_t;
typedef struct audio_frame_int16
{
int16_t l;
int16_t r;
} audio_frame_int16_t;
#ifdef _MIPS_ARCH_ALLEGREX1
typedef struct rarch_CC_resampler
{
int dummy;
}rarch_CC_resampler_t;
static void resampler_CC_process(void *re_, struct resampler_data *data)
{
(void)re_;
// rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
float ratio,fraction;
typedef struct audio_frame_float
{
float l;
float r;
}audio_frame_float_t;
typedef struct audio_frame_int16
{
int16_t l;
int16_t r;
}audio_frame_int16_t;
float ratio, fraction;
audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
audio_frame_float_t *inp_max = inp + data->input_frames;
audio_frame_float_t *outp = (audio_frame_float_t*)data->data_out;
__asm__ (
".set push\n"
".set noreorder\n"
".set push\n"
".set noreorder\n"
"mtv %2, s700 \n" // 700 = data->ratio = b
// "vsat0.s s700, s700 \n"
"vrcp.s s701, s700 \n" // 701 = 1.0 / b
"vadd.s s702, s700, s700 \n" // 702 = 2 * b
"vmul.s s703, s700, s710 \n" // 703 = b * pi
"mtv %2, s700 \n" // 700 = data->ratio = b
// "vsat0.s s700, s700 \n"
"vrcp.s s701, s700 \n" // 701 = 1.0 / b
"vadd.s s702, s700, s700 \n" // 702 = 2 * b
"vmul.s s703, s700, s710 \n" // 703 = b * pi
"mfv %0, s701 \n"
"mfv %1, s730 \n"
"mfv %0, s701 \n"
"mfv %1, s730 \n"
".set pop\n"
:"=r"(ratio),"=r"(fraction): "r"((float)data->ratio)
".set pop\n"
: "=r"(ratio), "=r"(fraction)
: "r"((float)data->ratio)
);
while(true)
for (;;)
{
while ((fraction < ratio))
while (fraction < ratio)
{
__asm__ (
".set push \n"
".set noreorder \n"
".set push \n"
".set noreorder \n"
"lv.s s620, 0(%1) \n"
"lv.s s621, 4(%1) \n"
"lv.s s620, 0(%1) \n"
"lv.s s621, 4(%1) \n"
"vsub.s s731, s701, s730 \n"
"vsub.s s731, s701, s730 \n"
"vadd.q c600, c730[-X,Y,-X,Y], c730[1/2,1/2,-1/2,-1/2]\n"
"vadd.q c600, c730[-X,Y,-X,Y], c730[1/2,1/2,-1/2,-1/2]\n"
"vmul.q c610, c600, c700[Z,Z,Z,Z] \n" //*2*b
"vmul.q c600, c600, c700[W,W,W,W] \n" //*b*pi
"vsin.q c610, c610 \n"
"vadd.q c600, c600, c610 \n"
"vmul.q c610, c600, c700[Z,Z,Z,Z] \n" //*2*b
"vmul.q c600, c600, c700[W,W,W,W] \n" //*b*pi
"vsin.q c610, c610 \n"
"vadd.q c600, c600, c610 \n"
"vmul.q c600[-1:1,-1:1,-1:1,-1:1], c600, c710[Y,Y,Y,Y] \n"
"vmul.q c600[-1:1,-1:1,-1:1,-1:1], c600, c710[Y,Y,Y,Y] \n"
"vsub.p c600, c600, c602 \n"
"vsub.p c600, c600, c602 \n"
"vmul.q c620, c620[X,Y,X,Y], c600[X,X,Y,Y] \n"
"vmul.q c620, c620[X,Y,X,Y], c600[X,X,Y,Y] \n"
"vadd.q c720, c720, c620 \n"
"vadd.q c720, c720, c620 \n"
"vadd.s s730, s730, s730[1] \n"
"mfv %0, s730 \n"
"vadd.s s730, s730, s730[1] \n"
"mfv %0, s730 \n"
".set pop \n"
: "=r"(fraction)
: "r"(inp));
".set pop \n"
:"=r"(fraction): "r"(inp)
);
inp++;
if (inp == inp_max)
goto done;
}
__asm__ (
".set push \n"
".set noreorder \n"
".set push \n"
".set noreorder \n"
"vmul.p c720, c720, c720[1/2,1/2] \n"
"sv.s s720, 0(%1) \n"
"sv.s s721, 4(%1) \n"
"vmov.q c720, c720[Z,W,0,0] \n"
"vsub.s s730, s730, s701 \n"
"mfv %0, s730 \n"
"vmul.p c720, c720, c720[1/2,1/2] \n"
"sv.s s720, 0(%1) \n"
"sv.s s721, 4(%1) \n"
"vmov.q c720, c720[Z,W,0,0] \n"
"vsub.s s730, s730, s701 \n"
"mfv %0, s730 \n"
".set pop \n"
: "=r"(fraction)
: "r"(outp));
".set pop \n"
:"=r"(fraction): "r"(outp)
);
outp++;
}
// The VFPU state is assumed to remain intact in-between calls to resampler_CC_process.
done:
data->output_frames = (outp - (audio_frame_float_t*)data->data_out);
data->output_frames = outp - (audio_frame_float_t*)data->data_out;
}
static void resampler_CC_free(void *re_)
{
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
if (re)
free(re);
(void)re_;
}
static void *resampler_CC_init(double bandwidth_mod)
{
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)calloc(1, sizeof(rarch_CC_resampler_t));
if (!re)
return NULL;
__asm__ (
".set push\n"
".set noreorder\n"
".set push\n"
".set noreorder\n"
"vcst.s s710, VFPU_PI \n" // 710 = pi
"vcst.s s711, VFPU_1_PI \n" // 711 = 1.0 / (pi)
"vcst.s s710, VFPU_PI \n" // 710 = pi
"vcst.s s711, VFPU_1_PI \n" // 711 = 1.0 / (pi)
"vzero.q c720 \n"
"vzero.q c730 \n"
"vzero.q c720 \n"
"vzero.q c730 \n"
".set pop\n"
);
".set pop\n");
RARCH_LOG("\nConvoluted Cosine resampler (VFPU): \n");
return re;
return (void*)-1;
}
#else
//#define HAVE_SSE_MATHFUN_H
#if defined(__SSE2__) && defined(HAVE_SSE_MATHFUN_H)
#define USE_SSE2
#include "sse_mathfun.h"
static inline float _mm_sin(float x)
{
static float temp;
__m128 vector = _mm_set1_ps(x);
vector = sin_ps(vector);
_mm_store1_ps(&temp,vector);
return temp;
}
static inline float _mm_cos(float x)
{
static float temp;
__m128 vector = _mm_set1_ps(x);
vector = cos_ps(vector);
_mm_store1_ps(&temp,vector);
return temp;
}
#define sin(x) _mm_sin(x)
#define cos(x) _mm_cos(x)
#endif
typedef struct audio_frame_float
{
float l;
float r;
}audio_frame_float_t;
// C reference version. Not optimized.
typedef struct rarch_CC_resampler
{
audio_frame_float_t buffer[4];
float distance;
void (*process)(void *re, struct resampler_data *data);
} rarch_CC_resampler_t;
}rarch_CC_resampler_t;
static inline float cc_int(float x, float b){
float val = x * b * M_PI + sin(x * b * M_PI);
return (val > M_PI)? M_PI : (val < -M_PI)? -M_PI : val;
static inline float cc_int(float x, float b)
{
float val = x * b * M_PI + sinf(x * b * M_PI);
return (val > M_PI) ? M_PI : (val < -M_PI) ? -M_PI : val;
}
static inline float cc_kernel(float x, float b){
static inline float cc_kernel(float x, float b)
{
return (cc_int(x + 0.5, b) - cc_int(x - 0.5, b)) / (2.0 * M_PI);
}
static inline void add_to(const audio_frame_float_t* source,audio_frame_float_t* target, float ratio){
static inline void add_to(const audio_frame_float_t *source, audio_frame_float_t *target, float ratio)
{
target->l += source->l * ratio;
target->r += source->r * ratio;
}
static void resampler_CC_downsample(void *re_, struct resampler_data *data)
{
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
@ -227,9 +194,9 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)
float ratio = 1.0 / data->ratio;
float b = data->ratio; // cutoff frequency
float b = data->ratio; // cutoff frequency
while(inp != inp_max)
while (inp != inp_max)
{
add_to(inp, re->buffer + 0, cc_kernel(re->distance, b));
add_to(inp, re->buffer + 1, cc_kernel(re->distance - ratio, b));
@ -240,7 +207,7 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)
if (re->distance > (ratio + 0.5))
{
*outp=re->buffer[0];
*outp = re->buffer[0];
re->buffer[0] = re->buffer[1];
re->buffer[1] = re->buffer[2];
@ -248,12 +215,12 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)
re->buffer[2].l = 0.0;
re->buffer[2].r = 0.0;
re->distance-=ratio;
re->distance -= ratio;
outp++;
}
}
data->output_frames = (outp - (audio_frame_float_t*)data->data_out);
data->output_frames = outp - (audio_frame_float_t*)data->data_out;
}
#ifndef min
@ -262,17 +229,16 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)
static void resampler_CC_upsample(void *re_, struct resampler_data *data)
{
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
audio_frame_float_t *inp_max = inp + data->input_frames;
audio_frame_float_t *outp = (audio_frame_float_t*)data->data_out;
float b = min(data->ratio, 1.00); // cutoff frequency
float b = min(data->ratio, 1.00); // cutoff frequency
float ratio = 1.0 / data->ratio;
while(inp != inp_max)
while (inp != inp_max)
{
re->buffer[0] = re->buffer[1];
re->buffer[1] = re->buffer[2];
@ -286,7 +252,7 @@ static void resampler_CC_upsample(void *re_, struct resampler_data *data)
outp->l = 0.0;
outp->r = 0.0;
for (i=0; i!=4; i++)
for (i = 0; i < 4; i++)
{
temp = cc_kernel(re->distance + 1.0 - i, b);
outp->l += re->buffer[i].l * temp;
@ -297,12 +263,11 @@ static void resampler_CC_upsample(void *re_, struct resampler_data *data)
outp++;
}
re->distance-= 1.0;
re->distance -= 1.0;
inp++;
}
data->output_frames = (outp - (audio_frame_float_t*)data->data_out);
data->output_frames = outp - (audio_frame_float_t*)data->data_out;
}
static void resampler_CC_process(void *re_, struct resampler_data *data)
@ -325,15 +290,15 @@ static void *resampler_CC_init(double bandwidth_mod)
if (!re)
return NULL;
for (i=0; i!=4 ; i++)
for (i = 0; i < 4; i++)
{
re->buffer[i].l=0.0;
re->buffer[i].r=0.0;
re->buffer[i].l = 0.0;
re->buffer[i].r = 0.0;
}
RARCH_LOG("Convoluted Cosine resampler (C) : ");
if (bandwidth_mod < 0.75) // variations of data->ratio around 0.75 are safer than around 1.0 for both up/downsampler.
if (bandwidth_mod < 0.75) // variations of data->ratio around 0.75 are safer than around 1.0 for both up/downsampler.
{
RARCH_LOG("CC_downsample @%f \n", bandwidth_mod);
re->process = resampler_CC_downsample;
@ -356,3 +321,4 @@ const rarch_resampler_t CC_resampler = {
resampler_CC_free,
"CC",
};

View File

@ -1,5 +1,6 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
@ -167,59 +168,58 @@ static void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_
void audio_convert_s16_to_float_ALLEGREX(float *out,
const int16_t *in, size_t samples, float gain)
{
#ifdef DEBUG
// make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK
rarch_assert(((uint32_t)out & 0xF) == 0);
// Make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.
// Only the output buffer can be assumed to be 16-byte aligned.
rarch_assert(((uintptr_t)out & 0xf) == 0);
#endif
size_t i;
gain = gain / 0x8000;
__asm__ (
".set push \n"
".set noreorder \n"
"mtv %0, s200 \n"
".set pop \n"
::"r"(gain)
);
".set push \n"
".set noreorder \n"
"mtv %0, s200 \n"
".set pop \n"
::"r"(gain));
for (i = 0; (i+16) <= samples; i+=16)
for (i = 0; i + 16 <= samples; i += 16)
{
__asm__ (
".set push \n"
".set noreorder \n"
".set push \n"
".set noreorder \n"
"lv.s s100, 0(%0) \n"
"lv.s s101, 4(%0) \n"
"lv.s s110, 8(%0) \n"
"lv.s s111, 12(%0) \n"
"lv.s s120, 16(%0) \n"
"lv.s s121, 20(%0) \n"
"lv.s s130, 24(%0) \n"
"lv.s s131, 28(%0) \n"
"lv.s s100, 0(%0) \n"
"lv.s s101, 4(%0) \n"
"lv.s s110, 8(%0) \n"
"lv.s s111, 12(%0) \n"
"lv.s s120, 16(%0) \n"
"lv.s s121, 20(%0) \n"
"lv.s s130, 24(%0) \n"
"lv.s s131, 28(%0) \n"
"vs2i.p c100, c100 \n"
"vs2i.p c110, c110 \n"
"vs2i.p c120, c120 \n"
"vs2i.p c130, c130 \n"
"vs2i.p c100, c100 \n"
"vs2i.p c110, c110 \n"
"vs2i.p c120, c120 \n"
"vs2i.p c130, c130 \n"
"vi2f.q c100, c100, 16 \n"
"vi2f.q c110, c110, 16 \n"
"vi2f.q c120, c120, 16 \n"
"vi2f.q c130, c130, 16 \n"
"vi2f.q c100, c100, 16 \n"
"vi2f.q c110, c110, 16 \n"
"vi2f.q c120, c120, 16 \n"
"vi2f.q c130, c130, 16 \n"
"vmscl.q e100, e100, s200 \n"
"vmscl.q e100, e100, s200 \n"
"sv.q c100, 0(%1) \n"
"sv.q c110, 16(%1) \n"
"sv.q c120, 32(%1) \n"
"sv.q c130, 48(%1) \n"
"sv.q c100, 0(%1) \n"
"sv.q c110, 16(%1) \n"
"sv.q c120, 32(%1) \n"
"sv.q c130, 48(%1) \n"
".set pop \n"
::"r"(in+i),"r"(out+i)
);
".set pop \n"
:: "r"(in + i), "r"(out + i));
}
for (;i != samples; i++)
for (; i < samples; i++)
out[i] = (float)in[i] * gain;
}
@ -227,39 +227,38 @@ void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
const float *in, size_t samples)
{
#ifdef DEBUG
// make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK
rarch_assert(((uint32_t)in & 0xF) == 0);
rarch_assert(((uint32_t)out & 0xF) == 0);
// Make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.
// Both buffers are allocated by RetroArch, so can assume alignment.
rarch_assert(((uintptr_t)in & 0xf) == 0);
rarch_assert(((uintptr_t)out & 0xf) == 0);
#endif
size_t i;
for (i = 0; (i+8) <= samples; i+=8)
for (i = 0; i + 8 <= samples; i += 8)
{
__asm__ (
".set push \n"
".set noreorder \n"
".set push \n"
".set noreorder \n"
"lv.q c100, 0(%0) \n"
"lv.q c110, 16(%0) \n"
"lv.q c100, 0(%0) \n"
"lv.q c110, 16(%0) \n"
"vf2in.q c100, c100, 31 \n"
"vf2in.q c110, c110, 31 \n"
"vi2s.q c100, c100 \n"
"vi2s.q c102, c110 \n"
"vf2in.q c100, c100, 31 \n"
"vf2in.q c110, c110, 31 \n"
"vi2s.q c100, c100 \n"
"vi2s.q c102, c110 \n"
"sv.q c100, 0(%1) \n"
"sv.q c100, 0(%1) \n"
".set pop \n"
::"r"(in+i),"r"(out+i)
);
".set pop \n"
:: "r"(in + i), "r"(out + i));
}
for (;i != samples; i++)
for (; i < samples; i++)
{
int32_t val = (int32_t)(in[i] * 0x8000);
out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val);
}
}
#endif