From 92753e8b8a384df005af778db456683fea1481f7 Mon Sep 17 00:00:00 2001 From: Themaister Date: Sun, 29 Dec 2013 12:59:57 +0100 Subject: [PATCH] Add volume control to NEON ASM. --- audio/utils.c | 8 +++----- audio/utils_neon.S | 6 +++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/audio/utils.c b/audio/utils.c index 3b4e7963b1..09f18baa74 100644 --- a/audio/utils.c +++ b/audio/utils.c @@ -140,19 +140,17 @@ void audio_convert_float_to_s16_altivec(int16_t *out, audio_convert_float_to_s16_C(out, in, samples); } #elif defined(HAVE_NEON) -void audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples); +void audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples, const float *gain); // Avoid potential hard-float/soft-float ABI issues. static void audio_convert_s16_to_float_neon(float *out, const int16_t *in, size_t samples, float gain) { - (void)gain; // gain is ignored for now. - size_t aligned_samples = samples & ~7; if (aligned_samples) - audio_convert_s16_float_asm(out, in, aligned_samples); + audio_convert_s16_float_asm(out, in, aligned_samples, &gain); // Could do all conversion in ASM, but keep it simple for now. audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples, - samples - aligned_samples, 1.0f); + samples - aligned_samples, gain); } void audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples); diff --git a/audio/utils_neon.S b/audio/utils_neon.S index cf433fd90e..c0fc53ed75 100644 --- a/audio/utils_neon.S +++ b/audio/utils_neon.S @@ -17,7 +17,7 @@ .align 4 .globl audio_convert_s16_float_asm -# audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples) +# audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples, const float *gain) audio_convert_s16_float_asm: # Hacky way to get a constant of 2^-15. # Might be faster to just load a constant from memory. @@ -28,6 +28,10 @@ audio_convert_s16_float_asm: vmul.f32 q8, q8, q8 vadd.f32 q8, q8, q8 + # Apply gain + vld1.f32 {d6[0]}, [r3] + vmul.f32 q8, q8, d6[0] + 1: # Preload here? vld1.s16 {q0}, [r1]!