2017-01-22 13:58:20 +01:00
|
|
|
/* Copyright (C) 2010-2017 The RetroArch team
|
2012-04-07 13:26:27 +02:00
|
|
|
*
|
2016-11-08 17:55:23 +01:00
|
|
|
* ---------------------------------------------------------------------------------------
|
|
|
|
* The following license statement only applies to this file (s16_to_float.c).
|
|
|
|
* ---------------------------------------------------------------------------------------
|
2012-04-07 13:26:27 +02:00
|
|
|
*
|
2016-11-08 17:55:23 +01:00
|
|
|
* Permission is hereby granted, free of charge,
|
|
|
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation the rights to
|
|
|
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
|
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
2012-04-07 13:26:27 +02:00
|
|
|
*
|
2016-11-08 17:55:23 +01:00
|
|
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
|
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
2012-04-07 13:26:27 +02:00
|
|
|
*/
|
2012-12-05 23:17:07 +01:00
|
|
|
#if defined(__SSE2__)
|
2011-12-02 01:34:02 +01:00
|
|
|
#include <emmintrin.h>
|
2012-12-05 23:17:07 +01:00
|
|
|
#elif defined(__ALTIVEC__)
|
2011-12-02 01:34:02 +01:00
|
|
|
#include <altivec.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-10 20:31:27 +02:00
|
|
|
#include <boolean.h>
|
|
|
|
#include <features/features_cpu.h>
|
2016-12-12 13:02:29 +01:00
|
|
|
#include <audio/conversion/s16_to_float.h>
|
2016-05-10 20:31:27 +02:00
|
|
|
|
2017-02-25 02:15:15 +01:00
|
|
|
#if defined(__ARM_NEON__)
|
2017-01-31 07:58:58 +01:00
|
|
|
static bool s16_to_float_neon_enabled = false;
|
|
|
|
|
2017-01-31 07:33:58 +01:00
|
|
|
/* Avoid potential hard-float/soft-float ABI issues. */
|
|
|
|
void convert_s16_float_asm(float *out, const int16_t *in,
|
|
|
|
size_t samples, const float *gain);
|
|
|
|
#endif
|
|
|
|
|
2015-01-09 01:43:57 +01:00
|
|
|
/**
|
2017-01-31 07:33:58 +01:00
|
|
|
* convert_s16_to_float:
|
2015-01-09 01:43:57 +01:00
|
|
|
* @out : output buffer
|
|
|
|
* @in : input buffer
|
|
|
|
* @samples : size of samples to be converted
|
2016-05-16 11:24:34 +02:00
|
|
|
* @gain : gain applied (.e.g. audio volume)
|
2015-01-09 01:43:57 +01:00
|
|
|
*
|
2016-05-16 11:24:34 +02:00
|
|
|
* Converts from signed integer 16-bit
|
2015-01-09 01:43:57 +01:00
|
|
|
* to floating point.
|
|
|
|
**/
|
2017-01-31 07:33:58 +01:00
|
|
|
void convert_s16_to_float(float *out,
|
2012-11-03 14:15:03 +01:00
|
|
|
const int16_t *in, size_t samples, float gain)
|
2011-12-02 01:34:02 +01:00
|
|
|
{
|
2017-01-31 07:33:58 +01:00
|
|
|
size_t i = 0;
|
2011-12-02 01:34:02 +01:00
|
|
|
|
2012-12-05 23:17:07 +01:00
|
|
|
#if defined(__SSE2__)
|
2015-07-10 19:51:09 +02:00
|
|
|
float fgain = gain / UINT32_C(0x80000000);
|
|
|
|
__m128 factor = _mm_set1_ps(fgain);
|
2015-01-09 01:43:57 +01:00
|
|
|
|
2011-12-02 01:34:02 +01:00
|
|
|
for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
|
|
|
|
{
|
2015-06-29 21:39:00 +02:00
|
|
|
__m128i input = _mm_loadu_si128((const __m128i *)in);
|
|
|
|
__m128i regs_l = _mm_unpacklo_epi16(_mm_setzero_si128(), input);
|
|
|
|
__m128i regs_r = _mm_unpackhi_epi16(_mm_setzero_si128(), input);
|
|
|
|
__m128 output_l = _mm_mul_ps(_mm_cvtepi32_ps(regs_l), factor);
|
|
|
|
__m128 output_r = _mm_mul_ps(_mm_cvtepi32_ps(regs_r), factor);
|
|
|
|
|
|
|
|
_mm_storeu_ps(out + 0, output_l);
|
|
|
|
_mm_storeu_ps(out + 4, output_r);
|
2011-12-02 01:34:02 +01:00
|
|
|
}
|
|
|
|
|
2017-01-31 07:33:58 +01:00
|
|
|
samples = samples - i;
|
|
|
|
i = 0;
|
2012-12-05 23:17:07 +01:00
|
|
|
#elif defined(__ALTIVEC__)
|
2014-08-27 02:06:39 +02:00
|
|
|
size_t samples_in = samples;
|
2014-09-09 05:56:12 +02:00
|
|
|
|
|
|
|
/* Unaligned loads/store is a bit expensive, so we
|
|
|
|
* optimize for the good path (very likely). */
|
2011-12-02 01:34:02 +01:00
|
|
|
if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)
|
|
|
|
{
|
|
|
|
size_t i;
|
2015-07-10 19:51:09 +02:00
|
|
|
const vector float gain_vec = { gain, gain , gain, gain };
|
|
|
|
const vector float zero_vec = { 0.0f, 0.0f, 0.0f, 0.0f};
|
|
|
|
|
2011-12-02 01:34:02 +01:00
|
|
|
for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
|
|
|
|
{
|
|
|
|
vector signed short input = vec_ld(0, in);
|
2015-06-29 21:39:00 +02:00
|
|
|
vector signed int hi = vec_unpackh(input);
|
|
|
|
vector signed int lo = vec_unpackl(input);
|
|
|
|
vector float out_hi = vec_madd(vec_ctf(hi, 15), gain_vec, zero_vec);
|
|
|
|
vector float out_lo = vec_madd(vec_ctf(lo, 15), gain_vec, zero_vec);
|
2011-12-02 01:34:02 +01:00
|
|
|
|
|
|
|
vec_st(out_hi, 0, out);
|
|
|
|
vec_st(out_lo, 16, out);
|
|
|
|
}
|
|
|
|
|
2014-08-27 02:06:39 +02:00
|
|
|
samples_in -= i;
|
2011-12-02 01:34:02 +01:00
|
|
|
}
|
|
|
|
|
2017-01-31 07:33:58 +01:00
|
|
|
samples = samples_in;
|
|
|
|
i = 0;
|
2014-09-09 05:56:12 +02:00
|
|
|
|
2017-02-25 02:15:15 +01:00
|
|
|
#elif defined(__ARM_NEON__)
|
2017-01-31 07:58:58 +01:00
|
|
|
if (s16_to_float_neon_enabled)
|
|
|
|
{
|
|
|
|
size_t aligned_samples = samples & ~7;
|
|
|
|
if (aligned_samples)
|
|
|
|
convert_s16_float_asm(out, in, aligned_samples, &gain);
|
|
|
|
|
|
|
|
/* Could do all conversion in ASM, but keep it simple for now. */
|
|
|
|
out = out + aligned_samples;
|
|
|
|
in = in + aligned_samples;
|
|
|
|
samples = samples - aligned_samples;
|
|
|
|
i = 0;
|
|
|
|
}
|
2015-01-09 01:43:57 +01:00
|
|
|
|
2017-01-31 07:33:58 +01:00
|
|
|
#elif defined(_MIPS_ARCH_ALLEGREX)
|
2014-03-14 15:12:47 +01:00
|
|
|
#ifdef DEBUG
|
2014-09-09 05:56:12 +02:00
|
|
|
/* Make sure the buffer is 16 byte aligned, this should be the
|
|
|
|
* default behaviour of malloc in the PSPSDK.
|
|
|
|
* Only the output buffer can be assumed to be 16-byte aligned. */
|
2015-10-26 03:18:13 +01:00
|
|
|
retro_assert(((uintptr_t)out & 0xf) == 0);
|
2014-03-14 15:12:47 +01:00
|
|
|
#endif
|
2014-03-23 14:14:42 +01:00
|
|
|
|
2014-03-14 15:12:47 +01:00
|
|
|
gain = gain / 0x8000;
|
|
|
|
__asm__ (
|
2014-03-23 14:14:42 +01:00
|
|
|
".set push \n"
|
|
|
|
".set noreorder \n"
|
|
|
|
"mtv %0, s200 \n"
|
|
|
|
".set pop \n"
|
|
|
|
::"r"(gain));
|
|
|
|
|
|
|
|
for (i = 0; i + 16 <= samples; i += 16)
|
2014-03-14 15:12:47 +01:00
|
|
|
{
|
|
|
|
__asm__ (
|
2014-03-23 14:14:42 +01:00
|
|
|
".set push \n"
|
|
|
|
".set noreorder \n"
|
|
|
|
|
|
|
|
"lv.s s100, 0(%0) \n"
|
|
|
|
"lv.s s101, 4(%0) \n"
|
|
|
|
"lv.s s110, 8(%0) \n"
|
|
|
|
"lv.s s111, 12(%0) \n"
|
|
|
|
"lv.s s120, 16(%0) \n"
|
|
|
|
"lv.s s121, 20(%0) \n"
|
|
|
|
"lv.s s130, 24(%0) \n"
|
|
|
|
"lv.s s131, 28(%0) \n"
|
|
|
|
|
|
|
|
"vs2i.p c100, c100 \n"
|
|
|
|
"vs2i.p c110, c110 \n"
|
|
|
|
"vs2i.p c120, c120 \n"
|
|
|
|
"vs2i.p c130, c130 \n"
|
|
|
|
|
|
|
|
"vi2f.q c100, c100, 16 \n"
|
|
|
|
"vi2f.q c110, c110, 16 \n"
|
|
|
|
"vi2f.q c120, c120, 16 \n"
|
|
|
|
"vi2f.q c130, c130, 16 \n"
|
|
|
|
|
|
|
|
"vmscl.q e100, e100, s200 \n"
|
|
|
|
|
|
|
|
"sv.q c100, 0(%1) \n"
|
|
|
|
"sv.q c110, 16(%1) \n"
|
|
|
|
"sv.q c120, 32(%1) \n"
|
|
|
|
"sv.q c130, 48(%1) \n"
|
|
|
|
|
|
|
|
".set pop \n"
|
|
|
|
:: "r"(in + i), "r"(out + i));
|
2014-03-14 15:12:47 +01:00
|
|
|
}
|
|
|
|
|
2017-01-31 07:33:58 +01:00
|
|
|
#endif
|
|
|
|
gain = gain / 0x8000;
|
|
|
|
|
2014-03-23 14:14:42 +01:00
|
|
|
for (; i < samples; i++)
|
2017-01-31 07:33:58 +01:00
|
|
|
out[i] = (float)in[i] * gain;
|
2014-03-14 15:12:47 +01:00
|
|
|
}
|
2011-12-02 01:34:02 +01:00
|
|
|
|
2015-01-09 01:43:57 +01:00
|
|
|
/**
|
2016-05-17 17:00:16 +00:00
|
|
|
* convert_s16_to_float_init_simd:
|
2015-01-09 01:43:57 +01:00
|
|
|
*
|
2016-05-16 11:24:34 +02:00
|
|
|
* Sets up function pointers for conversion
|
2015-01-09 01:43:57 +01:00
|
|
|
* functions based on CPU features.
|
|
|
|
**/
|
2016-05-17 17:00:16 +00:00
|
|
|
void convert_s16_to_float_init_simd(void)
|
2013-01-24 18:37:42 +01:00
|
|
|
{
|
2017-02-25 02:15:15 +01:00
|
|
|
#if defined(__ARM_NEON__)
|
2016-05-17 17:00:16 +00:00
|
|
|
unsigned cpu = cpu_features_get();
|
2015-02-06 17:26:01 +01:00
|
|
|
|
2017-01-31 07:58:58 +01:00
|
|
|
if (cpu & RETRO_SIMD_NEON)
|
|
|
|
s16_to_float_neon_enabled = true;
|
2013-01-24 18:37:42 +01:00
|
|
|
#endif
|
|
|
|
}
|