diff --git a/android/native/jni/Android.mk b/android/native/jni/Android.mk index de472c0a25..b36ac2b57f 100644 --- a/android/native/jni/Android.mk +++ b/android/native/jni/Android.mk @@ -20,6 +20,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) ifeq ($(HAVE_NEON),1) LOCAL_CFLAGS += -DHAVE_NEON +LOCAL_SRC_FILES += ../../../audio/utils_neon.S.neon endif ifeq ($(HAVE_SINC),1) diff --git a/audio/utils.c b/audio/utils.c index 4fa8a3f89e..4d7b202592 100644 --- a/audio/utils.c +++ b/audio/utils.c @@ -15,9 +15,9 @@ #include "utils.h" -#if __SSE2__ +#if defined(__SSE2__) #include -#elif __ALTIVEC__ +#elif defined(__ALTIVEC__) #include #endif @@ -39,7 +39,7 @@ void audio_convert_float_to_s16_C(int16_t *out, } } -#if __SSE2__ +#if defined(__SSE2__) void audio_convert_s16_to_float_SSE2(float *out, const int16_t *in, size_t samples, float gain) { @@ -84,7 +84,7 @@ void audio_convert_float_to_s16_SSE2(int16_t *out, audio_convert_float_to_s16_C(out, in, samples - i); } -#elif __ALTIVEC__ +#elif defined(__ALTIVEC__) void audio_convert_s16_to_float_altivec(float *out, const int16_t *in, size_t samples, float gain) { @@ -133,6 +133,28 @@ void audio_convert_float_to_s16_altivec(int16_t *out, else audio_convert_float_to_s16_C(out, in, samples); } +#elif defined(HAVE_NEON) +void audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples); +void audio_convert_s16_to_float_neon(float *out, const int16_t *in, size_t samples, + float gain) +{ + (void)gain; // gain is ignored for now. + size_t aligned_samples = samples & ~7; + audio_convert_s16_float_asm(out, in, aligned_samples); + + // Could do all conversion in ASM, but keep it simple for now. + audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples, + samples - aligned_samples, 1.0f); +} + +void audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples); +void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_t samples) +{ + size_t aligned_samples = samples & ~7; + audio_convert_float_s16_asm(out, in, aligned_samples); + audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples, + samples - aligned_samples); +} #endif diff --git a/audio/utils.h b/audio/utils.h index 26238881cc..6b5d356926 100644 --- a/audio/utils.h +++ b/audio/utils.h @@ -19,7 +19,7 @@ #include #include -#if __SSE2__ +#if defined(__SSE2__) #define audio_convert_s16_to_float audio_convert_s16_to_float_SSE2 #define audio_convert_float_to_s16 audio_convert_float_to_s16_SSE2 @@ -29,7 +29,7 @@ void audio_convert_s16_to_float_SSE2(float *out, void audio_convert_float_to_s16_SSE2(int16_t *out, const float *in, size_t samples); -#elif __ALTIVEC__ +#elif defined(__ALTIVEC__) #define audio_convert_s16_to_float audio_convert_s16_to_float_altivec #define audio_convert_float_to_s16 audio_convert_float_to_s16_altivec @@ -39,6 +39,16 @@ void audio_convert_s16_to_float_altivec(float *out, void audio_convert_float_to_s16_altivec(int16_t *out, const float *in, size_t samples); +#elif defined(HAVE_NEON) +#define audio_convert_s16_to_float audio_convert_s16_to_float_neon +#define audio_convert_float_to_s16 audio_convert_float_to_s16_neon + +void audio_convert_s16_to_float_neon(float *out, + const int16_t *in, size_t samples, float gain); + +void audio_convert_float_to_s16_neon(int16_t *out, + const float *in, size_t samples); + #else #define audio_convert_s16_to_float audio_convert_s16_to_float_C #define audio_convert_float_to_s16 audio_convert_float_to_s16_C diff --git a/audio/utils_neon.S b/audio/utils_neon.S new file mode 100644 index 0000000000..c4834a9249 --- /dev/null +++ b/audio/utils_neon.S @@ -0,0 +1,86 @@ +/* RetroArch - A frontend for libretro. + * Copyright (C) 2010-2012 - Hans-Kristian Arntzen + * + * RetroArch is free software: you can redistribute it and/or modify it under the terms + * of the GNU General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with RetroArch. + * If not, see . + */ + +.arm + +.align 4 +.global audio_convert_s16_float_asm +# audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples) +audio_convert_s16_float_asm: + # Hacky way to get a constant of 2^-15. + # Might be faster to just load a constant from memory. + # It's just done once however ... + vmov.f32 q5, #0.25 + vmul.f32 q5, q5, q5 + vmul.f32 q5, q5, q5 + vmul.f32 q5, q5, q5 + vadd.f32 q5, q5, q5 + +1: + # Preload here? + vld1.s16 {q0}, [r1]! + + # Widen to 32-bit + vmovl.s16 q1, d0 + vmovl.s16 q2, d1 + + # Convert to float + vcvt.f32.s32 q3, q1 + vcvt.f32.s32 q4, q2 + + vmul.f32 q3, q3, q5 + vmul.f32 q4, q4, q5 + + vst1.f32 {q3-q4}, [r0]! + + # Guaranteed to get samples in multiples of 8. + subs r2, r2, #8 + bne 1b + + bx lr + +.align 4 +.global audio_convert_float_s16_asm +# audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples) +audio_convert_float_s16_asm: + # Hacky way to get a constant of 2^15. + # ((2^4)^2)^2 * 0.5 = 2^15 + vmov.f32 q5, #16.0 + vmov.f32 q6, #0.5 + vmul.f32 q5, q5, q5 + vmul.f32 q5, q5, q5 + vmul.f32 q5, q5, q6 + +1: + # Preload here? + vld1.f32 {q0-q1}, [r1]! + + vmul.f32 q0, q0, q5 + vmul.f32 q1, q1, q5 + + vcvt.s32.f32 q0, q0 + vcvt.s32.f32 q1, q1 + + vqmovn.s32 d4, q0 + vqmovn.s32 d5, q1 + + vst1.f32 {d4-d5}, [r0]! + + # Guaranteed to get samples in multiples of 8. + subs r2, r2, #8 + bne 1b + + bx lr +