Merge pull request #606 from aliaspider/master

(PSP) VFPU optimized audio resampler and s16 <-> float conversions
This commit is contained in:
Twinaphex 2014-03-14 17:02:04 +01:00
commit 7fa8920357
8 changed files with 286 additions and 1 deletions

167
audio/cc_resampler.c Normal file
View File

@ -0,0 +1,167 @@
/*
* Convoluted Cosine Resampler
* Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
*
* licence: GPLv3
*/
#include "resampler.h"
#include "../libretro.h"
#include "../performance.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "../general.h"
typedef struct rarch_CC_resampler
{
int dummy;
}rarch_CC_resampler_t;
typedef struct audio_frame_float
{
float l;
float r;
}audio_frame_float_t;
typedef struct audio_frame_int16
{
int16_t l;
int16_t r;
}audio_frame_int16_t;
#ifdef _MIPS_ARCH_ALLEGREX
static void resampler_CC_process(void *re_, struct resampler_data *data)
{
(void)re_;
// rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
float ratio,fraction;
audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
audio_frame_float_t *inp_max = inp + data->input_frames;
audio_frame_float_t *outp = (audio_frame_float_t*)data->data_out;
__asm__ (
".set push\n"
".set noreorder\n"
"mtv %2, s700 \n" // 700 = data->ratio = b
// "vsat0.s s700, s700 \n"
"vrcp.s s701, s700 \n" // 701 = 1.0 / b
"vadd.s s702, s700, s700 \n" // 702 = 2 * b
"vmul.s s703, s700, s710 \n" // 703 = b * pi
"mfv %0, s701 \n"
"mfv %1, s730 \n"
".set pop\n"
:"=r"(ratio),"=r"(fraction): "r"((float)data->ratio)
);
while(true)
{
while ((fraction < ratio))
{
__asm__ (
".set push \n"
".set noreorder \n"
"lv.s s620, 0(%1) \n"
"lv.s s621, 4(%1) \n"
"vsub.s s731, s701, s730 \n"
"vadd.q c600, c730[-X,Y,-X,Y], c730[1/2,1/2,-1/2,-1/2]\n"
"vmul.q c610, c600, c700[Z,Z,Z,Z] \n" //*2*b
"vmul.q c600, c600, c700[W,W,W,W] \n" //*b*pi
"vsin.q c610, c610 \n"
"vadd.q c600, c600, c610 \n"
"vmul.q c600[-1:1,-1:1,-1:1,-1:1], c600, c710[Y,Y,Y,Y] \n"
"vsub.p c600, c600, c602 \n"
"vmul.q c620, c620[X,Y,X,Y], c600[X,X,Y,Y] \n"
"vadd.q c720, c720, c620 \n"
"vadd.s s730, s730, s730[1] \n"
"mfv %0, s730 \n"
".set pop \n"
:"=r"(fraction): "r"(inp)
);
inp++;
if (inp == inp_max)
goto done;
}
__asm__ (
".set push \n"
".set noreorder \n"
"vmul.p c720, c720, c720[1/2,1/2] \n"
"sv.s s720, 0(%1) \n"
"sv.s s721, 4(%1) \n"
"vmov.q c720, c720[Z,W,0,0] \n"
"vsub.s s730, s730, s701 \n"
"mfv %0, s730 \n"
".set pop \n"
:"=r"(fraction): "r"(outp)
);
outp++;
}
done:
data->output_frames = (outp - (audio_frame_float_t*)data->data_out);
}
#else
#error "platform not supported"
#endif
static void resampler_CC_free(void *re_)
{
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
if (re)
free(re);
}
static void *resampler_CC_init(double bandwidth_mod)
{
rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)calloc(1, sizeof(rarch_CC_resampler_t));
if (!re)
return NULL;
__asm__ (
".set push\n"
".set noreorder\n"
"vcst.s s710, VFPU_PI \n" // 710 = pi
"vcst.s s711, VFPU_1_PI \n" // 711 = 1.0 / (pi)
"vzero.q c720 \n"
"vzero.q c730 \n"
".set pop\n"
);
return re;
}
const rarch_resampler_t CC_resampler = {
resampler_CC_init,
resampler_CC_process,
resampler_CC_free,
"CC",
};

View File

@ -24,6 +24,9 @@
static const rarch_resampler_t *backends[] = {
&sinc_resampler,
#if defined(PSP)
&CC_resampler,
#endif
NULL,
};

View File

@ -51,6 +51,9 @@ typedef struct rarch_resampler
} rarch_resampler_t;
extern const rarch_resampler_t sinc_resampler;
#if defined(PSP)
extern const rarch_resampler_t CC_resampler;
#endif
// Reallocs resampler. Will free previous handle before allocating a new one.
// If ident is NULL, first resampler will be used.

View File

@ -163,6 +163,104 @@ static void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_
audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples,
samples - aligned_samples);
}
#elif defined(_MIPS_ARCH_ALLEGREX)
void audio_convert_s16_to_float_ALLEGREX(float *out,
const int16_t *in, size_t samples, float gain)
{
#ifdef DEBUG
// make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK
rarch_assert(((uint32_t)out & 0xF) == 0);
#endif
size_t i;
gain = gain / 0x8000;
__asm__ (
".set push \n"
".set noreorder \n"
"mtv %0, s200 \n"
".set pop \n"
::"r"(gain)
);
for (i = 0; (i+16) <= samples; i+=16)
{
__asm__ (
".set push \n"
".set noreorder \n"
"lv.s s100, 0(%0) \n"
"lv.s s101, 4(%0) \n"
"lv.s s110, 8(%0) \n"
"lv.s s111, 12(%0) \n"
"lv.s s120, 16(%0) \n"
"lv.s s121, 20(%0) \n"
"lv.s s130, 24(%0) \n"
"lv.s s131, 28(%0) \n"
"vs2i.p c100, c100 \n"
"vs2i.p c110, c110 \n"
"vs2i.p c120, c120 \n"
"vs2i.p c130, c130 \n"
"vi2f.q c100, c100, 16 \n"
"vi2f.q c110, c110, 16 \n"
"vi2f.q c120, c120, 16 \n"
"vi2f.q c130, c130, 16 \n"
"vmscl.q e100, e100, s200 \n"
"sv.q c100, 0(%1) \n"
"sv.q c110, 16(%1) \n"
"sv.q c120, 32(%1) \n"
"sv.q c130, 48(%1) \n"
".set pop \n"
::"r"(in+i),"r"(out+i)
);
}
for (;i != samples; i++)
out[i] = (float)in[i] * gain;
}
void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
const float *in, size_t samples)
{
#ifdef DEBUG
// make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK
rarch_assert(((uint32_t)in & 0xF) == 0);
rarch_assert(((uint32_t)out & 0xF) == 0);
#endif
size_t i;
for (i = 0; (i+8) <= samples; i+=8)
{
__asm__ (
".set push \n"
".set noreorder \n"
"lv.q c100, 0(%0) \n"
"lv.q c110, 16(%0) \n"
"vf2in.q c100, c100, 31 \n"
"vf2in.q c110, c110, 31 \n"
"vi2s.q c100, c100 \n"
"vi2s.q c102, c110 \n"
"sv.q c100, 0(%1) \n"
".set pop \n"
::"r"(in+i),"r"(out+i)
);
}
for (;i != samples; i++)
{
int32_t val = (int32_t)(in[i] * 0x8000);
out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val);
}
}
#endif
void audio_convert_init_simd(void)

View File

@ -51,7 +51,14 @@ void (*audio_convert_s16_to_float_arm)(float *out,
const int16_t *in, size_t samples, float gain);
void (*audio_convert_float_to_s16_arm)(int16_t *out,
const float *in, size_t samples);
#elif defined(_MIPS_ARCH_ALLEGREX)
#define audio_convert_s16_to_float audio_convert_s16_to_float_ALLEGREX
#define audio_convert_float_to_s16 audio_convert_float_to_s16_ALLEGREX
void audio_convert_s16_to_float_ALLEGREX(float *out,
const int16_t *in, size_t samples, float gain);
void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
const float *in, size_t samples);
#else
#define audio_convert_s16_to_float audio_convert_s16_to_float_C
#define audio_convert_float_to_s16 audio_convert_float_to_s16_C

View File

@ -447,7 +447,11 @@ static const int out_latency = 64;
static const bool audio_sync = true;
// Default resampler
#if defined(PSP)
static const char *audio_resampler = "CC";
#else
static const char *audio_resampler = "sinc";
#endif
// Experimental rate control
#if defined(GEKKO) || !defined(RARCH_CONSOLE)

View File

@ -27,7 +27,7 @@
#include "../../psp/sdk_defines.h"
PSP_MODULE_INFO("RetroArch PSP", 0, 1, 1);
PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER);
PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER|THREAD_ATTR_VFPU);
PSP_HEAP_SIZE_MAX();
static int exit_callback(int arg1, int arg2, void *common)

View File

@ -358,6 +358,9 @@ AUDIO RESAMPLER
============================================================ */
#include "../audio/resampler.c"
#include "../audio/sinc.c"
#ifdef PSP
#include "../audio/cc_resampler.c"
#endif
/*============================================================
CAMERA