/* Copyright (C) 2010-2016 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (float_to_s16.c). * --------------------------------------------------------------------------------------- * * Permission is hereby granted, free of charge, * to any person obtaining a copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #if defined(__SSE2__) #include #elif defined(__ALTIVEC__) #include #endif #include #include /** * convert_float_to_s16_C: * @out : output buffer * @in : input buffer * @samples : size of samples to be converted * * Converts floating point * to signed integer 16-bit. * * C implementation callback function. **/ void convert_float_to_s16_C(int16_t *out, const float *in, size_t samples) { size_t i; for (i = 0; i < samples; i++) { int32_t val = (int32_t)(in[i] * 0x8000); out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val); } } #if defined(__SSE2__) /** * convert_float_to_s16_SSE2: * @out : output buffer * @in : input buffer * @samples : size of samples to be converted * * Converts floating point * to signed integer 16-bit. * * SSE2 implementation callback function. **/ void convert_float_to_s16_SSE2(int16_t *out, const float *in, size_t samples) { size_t i; __m128 factor = _mm_set1_ps((float)0x8000); for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8) { __m128 input_l = _mm_loadu_ps(in + 0); __m128 input_r = _mm_loadu_ps(in + 4); __m128 res_l = _mm_mul_ps(input_l, factor); __m128 res_r = _mm_mul_ps(input_r, factor); __m128i ints_l = _mm_cvtps_epi32(res_l); __m128i ints_r = _mm_cvtps_epi32(res_r); __m128i packed = _mm_packs_epi32(ints_l, ints_r); _mm_storeu_si128((__m128i *)out, packed); } convert_float_to_s16_C(out, in, samples - i); } #elif defined(__ALTIVEC__) /** * convert_float_to_s16_altivec: * @out : output buffer * @in : input buffer * @samples : size of samples to be converted * * Converts floating point * to signed integer 16-bit. * * AltiVec implementation callback function. **/ void convert_float_to_s16_altivec(int16_t *out, const float *in, size_t samples) { int samples_in = samples; /* Unaligned loads/store is a bit expensive, * so we optimize for the good path (very likely). */ if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0) { size_t i; for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8) { vector float input0 = vec_ld( 0, in); vector float input1 = vec_ld(16, in); vector signed int result0 = vec_cts(input0, 15); vector signed int result1 = vec_cts(input1, 15); vec_st(vec_packs(result0, result1), 0, out); } samples_in -= i; } convert_float_to_s16_C(out, in, samples_in); } #elif defined(__ARM_NEON__) && !defined(VITA) void convert_float_s16_asm(int16_t *out, const float *in, size_t samples); /** * convert_float_to_s16_neon: * @out : output buffer * @in : input buffer * @samples : size of samples to be converted * * Converts floating point * to signed integer 16-bit. * * ARM NEON implementation callback function. **/ static void convert_float_to_s16_neon(int16_t *out, const float *in, size_t samples) { size_t aligned_samples = samples & ~7; if (aligned_samples) convert_float_s16_asm(out, in, aligned_samples); convert_float_to_s16_C(out + aligned_samples, in + aligned_samples, samples - aligned_samples); } #elif defined(_MIPS_ARCH_ALLEGREX) /** * convert_float_to_s16_ALLEGREX: * @out : output buffer * @in : input buffer * @samples : size of samples to be converted * * Converts floating point * to signed integer 16-bit. * * MIPS ALLEGREX implementation callback function. **/ void convert_float_to_s16_ALLEGREX(int16_t *out, const float *in, size_t samples) { size_t i; #ifdef DEBUG /* Make sure the buffers are 16 byte aligned, this should be * the default behaviour of malloc in the PSPSDK. * Assume alignment. */ retro_assert(((uintptr_t)in & 0xf) == 0); retro_assert(((uintptr_t)out & 0xf) == 0); #endif for (i = 0; i + 8 <= samples; i += 8) { __asm__ ( ".set push \n" ".set noreorder \n" "lv.q c100, 0(%0) \n" "lv.q c110, 16(%0) \n" "vf2in.q c100, c100, 31 \n" "vf2in.q c110, c110, 31 \n" "vi2s.q c100, c100 \n" "vi2s.q c102, c110 \n" "sv.q c100, 0(%1) \n" ".set pop \n" :: "r"(in + i), "r"(out + i)); } for (; i < samples; i++) { int32_t val = (int32_t)(in[i] * 0x8000); out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val); } } #endif /** * convert_float_to_s16_init_simd: * * Sets up function pointers for conversion * functions based on CPU features. **/ void convert_float_to_s16_init_simd(void) { unsigned cpu = cpu_features_get(); (void)cpu; #if defined(__ARM_NEON__) && !defined(VITA) convert_float_to_s16_arm = (cpu & RETRO_SIMD_NEON) ? convert_float_to_s16_neon : convert_float_to_s16_C; #endif }