Stylistic cleanups in CC resampler.

This commit is contained in:
Themaister 2014-03-23 14:14:42 +01:00
parent 0c57a1726b
commit 4d9ff7d147
2 changed files with 164 additions and 199 deletions

View File

@ -1,10 +1,21 @@
/* /* RetroArch - A frontend for libretro.
* Convoluted Cosine Resampler * Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com ) * Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
* *
* licence: GPLv3 * RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/ */
// Convoluted Cosine Resampler
#include "resampler.h" #include "resampler.h"
#include "../libretro.h" #include "../libretro.h"
#include "../performance.h" #include "../performance.h"
@ -19,19 +30,6 @@
#define RARCH_LOG(...) fprintf(stderr, __VA_ARGS__) #define RARCH_LOG(...) fprintf(stderr, __VA_ARGS__)
#endif #endif
#ifdef _MIPS_ARCH_ALLEGREX1
typedef struct rarch_CC_resampler
{
int dummy;
}rarch_CC_resampler_t;
static void resampler_CC_process(void *re_, struct resampler_data *data)
{
(void)re_;
// rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
float ratio,fraction;
typedef struct audio_frame_float typedef struct audio_frame_float
{ {
float l; float l;
@ -44,6 +42,11 @@ static void resampler_CC_process(void *re_, struct resampler_data *data)
int16_t r; int16_t r;
} audio_frame_int16_t; } audio_frame_int16_t;
#ifdef _MIPS_ARCH_ALLEGREX1
static void resampler_CC_process(void *re_, struct resampler_data *data)
{
(void)re_;
float ratio, fraction;
audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in; audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
audio_frame_float_t *inp_max = inp + data->input_frames; audio_frame_float_t *inp_max = inp + data->input_frames;
@ -63,12 +66,13 @@ static void resampler_CC_process(void *re_, struct resampler_data *data)
"mfv %1, s730 \n" "mfv %1, s730 \n"
".set pop\n" ".set pop\n"
:"=r"(ratio),"=r"(fraction): "r"((float)data->ratio) : "=r"(ratio), "=r"(fraction)
: "r"((float)data->ratio)
); );
while(true) for (;;)
{ {
while ((fraction < ratio)) while (fraction < ratio)
{ {
__asm__ ( __asm__ (
".set push \n" ".set push \n"
@ -99,8 +103,9 @@ static void resampler_CC_process(void *re_, struct resampler_data *data)
"mfv %0, s730 \n" "mfv %0, s730 \n"
".set pop \n" ".set pop \n"
:"=r"(fraction): "r"(inp) : "=r"(fraction)
); : "r"(inp));
inp++; inp++;
if (inp == inp_max) if (inp == inp_max)
goto done; goto done;
@ -117,29 +122,26 @@ static void resampler_CC_process(void *re_, struct resampler_data *data)
"mfv %0, s730 \n" "mfv %0, s730 \n"
".set pop \n" ".set pop \n"
:"=r"(fraction): "r"(outp) : "=r"(fraction)
); : "r"(outp));
outp++; outp++;
} }
// The VFPU state is assumed to remain intact in-between calls to resampler_CC_process.
done: done:
data->output_frames = (outp - (audio_frame_float_t*)data->data_out); data->output_frames = outp - (audio_frame_float_t*)data->data_out;
} }
static void resampler_CC_free(void *re_) static void resampler_CC_free(void *re_)
{ {
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_; (void)re_;
if (re)
free(re);
} }
static void *resampler_CC_init(double bandwidth_mod) static void *resampler_CC_init(double bandwidth_mod)
{ {
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)calloc(1, sizeof(rarch_CC_resampler_t));
if (!re)
return NULL;
__asm__ ( __asm__ (
".set push\n" ".set push\n"
".set noreorder\n" ".set noreorder\n"
@ -150,75 +152,40 @@ static void *resampler_CC_init(double bandwidth_mod)
"vzero.q c720 \n" "vzero.q c720 \n"
"vzero.q c730 \n" "vzero.q c730 \n"
".set pop\n" ".set pop\n");
);
RARCH_LOG("\nConvoluted Cosine resampler (VFPU): \n"); RARCH_LOG("\nConvoluted Cosine resampler (VFPU): \n");
return re; return (void*)-1;
} }
#else #else
// C reference version. Not optimized.
//#define HAVE_SSE_MATHFUN_H
#if defined(__SSE2__) && defined(HAVE_SSE_MATHFUN_H)
#define USE_SSE2
#include "sse_mathfun.h"
static inline float _mm_sin(float x)
{
static float temp;
__m128 vector = _mm_set1_ps(x);
vector = sin_ps(vector);
_mm_store1_ps(&temp,vector);
return temp;
}
static inline float _mm_cos(float x)
{
static float temp;
__m128 vector = _mm_set1_ps(x);
vector = cos_ps(vector);
_mm_store1_ps(&temp,vector);
return temp;
}
#define sin(x) _mm_sin(x)
#define cos(x) _mm_cos(x)
#endif
typedef struct audio_frame_float
{
float l;
float r;
}audio_frame_float_t;
typedef struct rarch_CC_resampler typedef struct rarch_CC_resampler
{ {
audio_frame_float_t buffer[4]; audio_frame_float_t buffer[4];
float distance; float distance;
void (*process)(void *re, struct resampler_data *data); void (*process)(void *re, struct resampler_data *data);
} rarch_CC_resampler_t; } rarch_CC_resampler_t;
static inline float cc_int(float x, float b)
{
static inline float cc_int(float x, float b){ float val = x * b * M_PI + sinf(x * b * M_PI);
float val = x * b * M_PI + sin(x * b * M_PI);
return (val > M_PI) ? M_PI : (val < -M_PI) ? -M_PI : val; return (val > M_PI) ? M_PI : (val < -M_PI) ? -M_PI : val;
} }
static inline float cc_kernel(float x, float b){ static inline float cc_kernel(float x, float b)
{
return (cc_int(x + 0.5, b) - cc_int(x - 0.5, b)) / (2.0 * M_PI); return (cc_int(x + 0.5, b) - cc_int(x - 0.5, b)) / (2.0 * M_PI);
} }
static inline void add_to(const audio_frame_float_t* source,audio_frame_float_t* target, float ratio){ static inline void add_to(const audio_frame_float_t *source, audio_frame_float_t *target, float ratio)
{
target->l += source->l * ratio; target->l += source->l * ratio;
target->r += source->r * ratio; target->r += source->r * ratio;
} }
static void resampler_CC_downsample(void *re_, struct resampler_data *data) static void resampler_CC_downsample(void *re_, struct resampler_data *data)
{ {
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_; rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in; audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
@ -253,7 +220,7 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)
} }
} }
data->output_frames = (outp - (audio_frame_float_t*)data->data_out); data->output_frames = outp - (audio_frame_float_t*)data->data_out;
} }
#ifndef min #ifndef min
@ -262,7 +229,6 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)
static void resampler_CC_upsample(void *re_, struct resampler_data *data) static void resampler_CC_upsample(void *re_, struct resampler_data *data)
{ {
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_; rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in; audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
@ -286,7 +252,7 @@ static void resampler_CC_upsample(void *re_, struct resampler_data *data)
outp->l = 0.0; outp->l = 0.0;
outp->r = 0.0; outp->r = 0.0;
for (i=0; i!=4; i++) for (i = 0; i < 4; i++)
{ {
temp = cc_kernel(re->distance + 1.0 - i, b); temp = cc_kernel(re->distance + 1.0 - i, b);
outp->l += re->buffer[i].l * temp; outp->l += re->buffer[i].l * temp;
@ -301,8 +267,7 @@ static void resampler_CC_upsample(void *re_, struct resampler_data *data)
inp++; inp++;
} }
data->output_frames = (outp - (audio_frame_float_t*)data->data_out); data->output_frames = outp - (audio_frame_float_t*)data->data_out;
} }
static void resampler_CC_process(void *re_, struct resampler_data *data) static void resampler_CC_process(void *re_, struct resampler_data *data)
@ -325,7 +290,7 @@ static void *resampler_CC_init(double bandwidth_mod)
if (!re) if (!re)
return NULL; return NULL;
for (i=0; i!=4 ; i++) for (i = 0; i < 4; i++)
{ {
re->buffer[i].l = 0.0; re->buffer[i].l = 0.0;
re->buffer[i].r = 0.0; re->buffer[i].r = 0.0;
@ -356,3 +321,4 @@ const rarch_resampler_t CC_resampler = {
resampler_CC_free, resampler_CC_free,
"CC", "CC",
}; };

View File

@ -1,5 +1,6 @@
/* RetroArch - A frontend for libretro. /* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen * Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
* *
* RetroArch is free software: you can redistribute it and/or modify it under the terms * RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found- * of the GNU General Public License as published by the Free Software Found-
@ -167,11 +168,12 @@ static void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_
void audio_convert_s16_to_float_ALLEGREX(float *out, void audio_convert_s16_to_float_ALLEGREX(float *out,
const int16_t *in, size_t samples, float gain) const int16_t *in, size_t samples, float gain)
{ {
#ifdef DEBUG #ifdef DEBUG
// make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK // Make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.
rarch_assert(((uint32_t)out & 0xF) == 0); // Only the output buffer can be assumed to be 16-byte aligned.
rarch_assert(((uintptr_t)out & 0xf) == 0);
#endif #endif
size_t i; size_t i;
gain = gain / 0x8000; gain = gain / 0x8000;
__asm__ ( __asm__ (
@ -179,10 +181,9 @@ void audio_convert_s16_to_float_ALLEGREX(float *out,
".set noreorder \n" ".set noreorder \n"
"mtv %0, s200 \n" "mtv %0, s200 \n"
".set pop \n" ".set pop \n"
::"r"(gain) ::"r"(gain));
);
for (i = 0; (i+16) <= samples; i+=16) for (i = 0; i + 16 <= samples; i += 16)
{ {
__asm__ ( __asm__ (
".set push \n" ".set push \n"
@ -215,11 +216,10 @@ void audio_convert_s16_to_float_ALLEGREX(float *out,
"sv.q c130, 48(%1) \n" "sv.q c130, 48(%1) \n"
".set pop \n" ".set pop \n"
::"r"(in+i),"r"(out+i) :: "r"(in + i), "r"(out + i));
);
} }
for (;i != samples; i++) for (; i < samples; i++)
out[i] = (float)in[i] * gain; out[i] = (float)in[i] * gain;
} }
@ -227,13 +227,14 @@ void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
const float *in, size_t samples) const float *in, size_t samples)
{ {
#ifdef DEBUG #ifdef DEBUG
// make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK // Make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.
rarch_assert(((uint32_t)in & 0xF) == 0); // Both buffers are allocated by RetroArch, so can assume alignment.
rarch_assert(((uint32_t)out & 0xF) == 0); rarch_assert(((uintptr_t)in & 0xf) == 0);
rarch_assert(((uintptr_t)out & 0xf) == 0);
#endif #endif
size_t i; size_t i;
for (i = 0; (i+8) <= samples; i+=8) for (i = 0; i + 8 <= samples; i += 8)
{ {
__asm__ ( __asm__ (
".set push \n" ".set push \n"
@ -250,16 +251,14 @@ void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
"sv.q c100, 0(%1) \n" "sv.q c100, 0(%1) \n"
".set pop \n" ".set pop \n"
::"r"(in+i),"r"(out+i) :: "r"(in + i), "r"(out + i));
);
} }
for (;i != samples; i++) for (; i < samples; i++)
{ {
int32_t val = (int32_t)(in[i] * 0x8000); int32_t val = (int32_t)(in[i] * 0x8000);
out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val); out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val);
} }
} }
#endif #endif