Stylistic cleanups in CC resampler.

2025-04-17 02:43:03 +00:00 · 2014-03-23 14:14:42 +01:00 · 2014-03-23 14:14:42 +01:00 · 4d9ff7d147
commit 4d9ff7d147
parent 0c57a1726b
2 changed files with 164 additions and 199 deletions
--- a/audio/cc_resampler.c
+++ b/audio/cc_resampler.c
@ -1,10 +1,21 @@
-/*
- * Convoluted Cosine Resampler
- * Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
+ * 
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
 *
- * licence: GPLv3
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
 */

+// Convoluted Cosine Resampler
+
 #include "resampler.h"
 #include "../libretro.h"
 #include "../performance.h"
@ -19,206 +30,162 @@
 #define RARCH_LOG(...) fprintf(stderr, __VA_ARGS__)
 #endif

+typedef struct audio_frame_float
+{
+   float l;
+   float r;
+} audio_frame_float_t;
+
+typedef struct audio_frame_int16
+{
+   int16_t l;
+   int16_t r;
+} audio_frame_int16_t;

 #ifdef _MIPS_ARCH_ALLEGREX1
-typedef struct rarch_CC_resampler
-{
-   int dummy;
-}rarch_CC_resampler_t;
-
 static void resampler_CC_process(void *re_, struct resampler_data *data)
 {
   (void)re_;
-//   rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
-   float ratio,fraction;
-
-   typedef struct audio_frame_float
-   {
-      float l;
-      float r;
-   }audio_frame_float_t;
-
-   typedef struct audio_frame_int16
-   {
-      int16_t l;
-      int16_t r;
-   }audio_frame_int16_t;
-
+   float ratio, fraction;

   audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
   audio_frame_float_t *inp_max = inp + data->input_frames;
   audio_frame_float_t *outp = (audio_frame_float_t*)data->data_out;

   __asm__ (
-    ".set      push\n"
-    ".set      noreorder\n"
+         ".set      push\n"
+         ".set      noreorder\n"

-    "mtv       %2,   s700              \n"   // 700 = data->ratio = b
-//    "vsat0.s   s700, s700              \n"
-    "vrcp.s    s701, s700              \n"   // 701 = 1.0 / b
-    "vadd.s    s702, s700, s700        \n"   // 702 = 2 * b
-    "vmul.s    s703, s700, s710        \n"   // 703 = b * pi
+         "mtv       %2,   s700              \n"   // 700 = data->ratio = b
+         //    "vsat0.s   s700, s700              \n"
+         "vrcp.s    s701, s700              \n"   // 701 = 1.0 / b
+         "vadd.s    s702, s700, s700        \n"   // 702 = 2 * b
+         "vmul.s    s703, s700, s710        \n"   // 703 = b * pi

-    "mfv       %0,   s701              \n"
-    "mfv       %1,   s730              \n"
+         "mfv       %0,   s701              \n"
+         "mfv       %1,   s730              \n"

-    ".set      pop\n"
-    :"=r"(ratio),"=r"(fraction): "r"((float)data->ratio)
+         ".set      pop\n"
+         : "=r"(ratio), "=r"(fraction)
+         : "r"((float)data->ratio)
   );

-   while(true)
+   for (;;)
   {
-      while ((fraction < ratio))
+      while (fraction < ratio)
      {
         __asm__ (
-         ".set      push               \n"
-         ".set      noreorder          \n"
+               ".set      push               \n"
+               ".set      noreorder          \n"

-         "lv.s    s620, 0(%1)             \n"
-         "lv.s    s621, 4(%1)             \n"
+               "lv.s    s620, 0(%1)             \n"
+               "lv.s    s621, 4(%1)             \n"

-         "vsub.s  s731, s701, s730     \n"
+               "vsub.s  s731, s701, s730     \n"

-         "vadd.q  c600, c730[-X,Y,-X,Y], c730[1/2,1/2,-1/2,-1/2]\n"
+               "vadd.q  c600, c730[-X,Y,-X,Y], c730[1/2,1/2,-1/2,-1/2]\n"

-         "vmul.q  c610, c600, c700[Z,Z,Z,Z]  \n"   //*2*b
-         "vmul.q  c600, c600, c700[W,W,W,W]  \n"   //*b*pi
-         "vsin.q  c610, c610                 \n"
-         "vadd.q  c600, c600, c610           \n"
+               "vmul.q  c610, c600, c700[Z,Z,Z,Z]  \n"   //*2*b
+               "vmul.q  c600, c600, c700[W,W,W,W]  \n"   //*b*pi
+               "vsin.q  c610, c610                 \n"
+               "vadd.q  c600, c600, c610           \n"

-         "vmul.q  c600[-1:1,-1:1,-1:1,-1:1], c600, c710[Y,Y,Y,Y]	\n"
+               "vmul.q  c600[-1:1,-1:1,-1:1,-1:1], c600, c710[Y,Y,Y,Y]	\n"

-         "vsub.p  c600, c600, c602           \n"
+               "vsub.p  c600, c600, c602           \n"

-         "vmul.q  c620, c620[X,Y,X,Y], c600[X,X,Y,Y]  \n"
+               "vmul.q  c620, c620[X,Y,X,Y], c600[X,X,Y,Y]  \n"

-         "vadd.q  c720, c720, c620           \n"
+               "vadd.q  c720, c720, c620           \n"


-         "vadd.s  s730, s730, s730[1]  \n"
-         "mfv     %0,   s730           \n"
+               "vadd.s  s730, s730, s730[1]  \n"
+               "mfv     %0,   s730           \n"
+
+               ".set      pop         \n"
+               : "=r"(fraction)
+               : "r"(inp));

-         ".set      pop         \n"
-         :"=r"(fraction): "r"(inp)
-         );
         inp++;
         if (inp == inp_max)
            goto done;
      }
      __asm__ (
-      ".set    push                       \n"
-      ".set    noreorder                  \n"
+            ".set    push                       \n"
+            ".set    noreorder                  \n"

-      "vmul.p  c720, c720, c720[1/2,1/2]  \n"
-      "sv.s    s720, 0(%1)                \n"
-      "sv.s    s721, 4(%1)                \n"
-      "vmov.q  c720, c720[Z,W,0,0]        \n"
-      "vsub.s  s730, s730, s701           \n"
-      "mfv     %0,   s730                 \n"
+            "vmul.p  c720, c720, c720[1/2,1/2]  \n"
+            "sv.s    s720, 0(%1)                \n"
+            "sv.s    s721, 4(%1)                \n"
+            "vmov.q  c720, c720[Z,W,0,0]        \n"
+            "vsub.s  s730, s730, s701           \n"
+            "mfv     %0,   s730                 \n"
+
+            ".set    pop                        \n"
+            : "=r"(fraction)
+            : "r"(outp));

-      ".set    pop                        \n"
-      :"=r"(fraction): "r"(outp)
-      );
      outp++;
   }

+   // The VFPU state is assumed to remain intact in-between calls to resampler_CC_process.
+
 done:
-   data->output_frames = (outp - (audio_frame_float_t*)data->data_out);
+   data->output_frames = outp - (audio_frame_float_t*)data->data_out;
 }


 static void resampler_CC_free(void *re_)
 {
-   rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;
-   if (re)
-      free(re);
+   (void)re_;
 }

 static void *resampler_CC_init(double bandwidth_mod)
 {
-   rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)calloc(1, sizeof(rarch_CC_resampler_t));
-   if (!re)
-      return NULL;
-
   __asm__ (
-    ".set      push\n"
-    ".set      noreorder\n"
+         ".set      push\n"
+         ".set      noreorder\n"

-    "vcst.s    s710, VFPU_PI           \n"   // 710 = pi
-    "vcst.s    s711, VFPU_1_PI         \n"   // 711 = 1.0 / (pi)
+         "vcst.s    s710, VFPU_PI           \n"   // 710 = pi
+         "vcst.s    s711, VFPU_1_PI         \n"   // 711 = 1.0 / (pi)

-    "vzero.q   c720                    \n"
-    "vzero.q   c730                    \n"
+         "vzero.q   c720                    \n"
+         "vzero.q   c730                    \n"

-    ".set      pop\n"
-   );
+         ".set      pop\n");

   RARCH_LOG("\nConvoluted Cosine resampler (VFPU): \n");
-   return re;
+   return (void*)-1;
 }
 #else

-
-//#define HAVE_SSE_MATHFUN_H
-#if defined(__SSE2__) && defined(HAVE_SSE_MATHFUN_H)
-#define USE_SSE2
-#include "sse_mathfun.h"
-
-static inline float _mm_sin(float x)
-{
-   static float temp;
-   __m128 vector =  _mm_set1_ps(x);
-   vector = sin_ps(vector);
-   _mm_store1_ps(&temp,vector);
-   return temp;
-}
-static inline float _mm_cos(float x)
-{
-   static float temp;
-   __m128 vector =  _mm_set1_ps(x);
-   vector = cos_ps(vector);
-   _mm_store1_ps(&temp,vector);
-   return temp;
-}
-
-#define sin(x) _mm_sin(x)
-#define cos(x) _mm_cos(x)
-#endif
-
-
-typedef struct audio_frame_float
-{
-   float l;
-   float r;
-}audio_frame_float_t;
-
+// C reference version. Not optimized.
 typedef struct rarch_CC_resampler
 {
   audio_frame_float_t buffer[4];
   float distance;
   void (*process)(void *re, struct resampler_data *data);
+} rarch_CC_resampler_t;

-}rarch_CC_resampler_t;
-
-
-
-static inline float cc_int(float x, float b){
-   float val = x * b * M_PI + sin(x * b * M_PI);
-   return (val > M_PI)? M_PI : (val < -M_PI)? -M_PI : val;
+static inline float cc_int(float x, float b)
+{
+   float val = x * b * M_PI + sinf(x * b * M_PI);
+   return (val > M_PI) ? M_PI : (val < -M_PI) ? -M_PI : val;
 }

-static inline float cc_kernel(float x, float b){
+static inline float cc_kernel(float x, float b)
+{
   return (cc_int(x + 0.5, b) - cc_int(x - 0.5, b)) / (2.0 * M_PI);
 }

-static inline void add_to(const audio_frame_float_t* source,audio_frame_float_t* target, float ratio){
+static inline void add_to(const audio_frame_float_t *source, audio_frame_float_t *target, float ratio)
+{
   target->l += source->l * ratio;
   target->r += source->r * ratio;
 }

 static void resampler_CC_downsample(void *re_, struct resampler_data *data)
 {
-
   rarch_CC_resampler_t *re     = (rarch_CC_resampler_t*)re_;

   audio_frame_float_t *inp     = (audio_frame_float_t*)data->data_in;
@ -227,9 +194,9 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)

   float ratio = 1.0 / data->ratio;

-   float b = data->ratio;  // cutoff frequency
+   float b = data->ratio; // cutoff frequency

-   while(inp != inp_max)
+   while (inp != inp_max)
   {
      add_to(inp, re->buffer + 0, cc_kernel(re->distance, b));
      add_to(inp, re->buffer + 1, cc_kernel(re->distance - ratio, b));
@ -240,7 +207,7 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)

      if (re->distance > (ratio + 0.5))
      {
-         *outp=re->buffer[0];
+         *outp = re->buffer[0];

         re->buffer[0] = re->buffer[1];
         re->buffer[1] = re->buffer[2];
@ -248,12 +215,12 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)
         re->buffer[2].l = 0.0;
         re->buffer[2].r = 0.0;

-         re->distance-=ratio;
+         re->distance -= ratio;
         outp++;
      }
   }

-   data->output_frames = (outp - (audio_frame_float_t*)data->data_out);
+   data->output_frames = outp - (audio_frame_float_t*)data->data_out;
 }

 #ifndef min
@ -262,17 +229,16 @@ static void resampler_CC_downsample(void *re_, struct resampler_data *data)

 static void resampler_CC_upsample(void *re_, struct resampler_data *data)
 {
-
   rarch_CC_resampler_t *re = (rarch_CC_resampler_t*)re_;

   audio_frame_float_t *inp = (audio_frame_float_t*)data->data_in;
   audio_frame_float_t *inp_max = inp + data->input_frames;
   audio_frame_float_t *outp = (audio_frame_float_t*)data->data_out;

-   float b = min(data->ratio, 1.00);  // cutoff frequency
+   float b = min(data->ratio, 1.00); // cutoff frequency
   float ratio = 1.0 / data->ratio;

-   while(inp != inp_max)
+   while (inp != inp_max)
   {
      re->buffer[0] = re->buffer[1];
      re->buffer[1] = re->buffer[2];
@ -286,7 +252,7 @@ static void resampler_CC_upsample(void *re_, struct resampler_data *data)
         outp->l = 0.0;
         outp->r = 0.0;

-         for (i=0; i!=4; i++)
+         for (i = 0; i < 4; i++)
         {
            temp = cc_kernel(re->distance + 1.0 - i, b);
            outp->l += re->buffer[i].l * temp;
@ -297,12 +263,11 @@ static void resampler_CC_upsample(void *re_, struct resampler_data *data)
         outp++;
      }

-      re->distance-= 1.0;
+      re->distance -= 1.0;
      inp++;
   }

-   data->output_frames = (outp - (audio_frame_float_t*)data->data_out);
-
+   data->output_frames = outp - (audio_frame_float_t*)data->data_out;
 }

 static void resampler_CC_process(void *re_, struct resampler_data *data)
@ -325,15 +290,15 @@ static void *resampler_CC_init(double bandwidth_mod)
   if (!re)
      return NULL;

-   for (i=0; i!=4 ; i++)
+   for (i = 0; i < 4; i++)
   {
-      re->buffer[i].l=0.0;
-      re->buffer[i].r=0.0;
+      re->buffer[i].l = 0.0;
+      re->buffer[i].r = 0.0;
   }

   RARCH_LOG("Convoluted Cosine resampler (C) : ");

-   if (bandwidth_mod < 0.75)  // variations of data->ratio around 0.75 are safer than around 1.0 for both up/downsampler.
+   if (bandwidth_mod < 0.75) // variations of data->ratio around 0.75 are safer than around 1.0 for both up/downsampler.
   {
      RARCH_LOG("CC_downsample @%f \n", bandwidth_mod);
      re->process = resampler_CC_downsample;
@ -356,3 +321,4 @@ const rarch_resampler_t CC_resampler = {
   resampler_CC_free,
   "CC",
 };
+
--- a/audio/utils.c
+++ b/audio/utils.c
@ -1,5 +1,6 @@
 /*  RetroArch - A frontend for libretro.
 *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
 *
 *  RetroArch is free software: you can redistribute it and/or modify it under the terms
 *  of the GNU General Public License as published by the Free Software Found-
@ -167,59 +168,58 @@ static void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_
 void audio_convert_s16_to_float_ALLEGREX(float *out,
      const int16_t *in, size_t samples, float gain)
 {
-
 #ifdef DEBUG
-//   make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK
-   rarch_assert(((uint32_t)out & 0xF) == 0);
+   // Make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.
+   // Only the output buffer can be assumed to be 16-byte aligned.
+   rarch_assert(((uintptr_t)out & 0xf) == 0);
 #endif
+
   size_t i;
   gain = gain / 0x8000;
   __asm__ (
-   ".set    push                    \n"
-   ".set    noreorder               \n"
-   "mtv     %0, s200                \n"
-   ".set    pop                     \n"
-   ::"r"(gain)
-   );
+         ".set    push                    \n"
+         ".set    noreorder               \n"
+         "mtv     %0, s200                \n"
+         ".set    pop                     \n"
+         ::"r"(gain));

-   for (i = 0; (i+16) <= samples; i+=16)
+   for (i = 0; i + 16 <= samples; i += 16)
   {
      __asm__ (
-      ".set    push                 \n"
-      ".set    noreorder            \n"
+            ".set    push                 \n"
+            ".set    noreorder            \n"

-      "lv.s    s100,  0(%0)         \n"
-      "lv.s    s101,  4(%0)         \n"
-      "lv.s    s110,  8(%0)         \n"
-      "lv.s    s111, 12(%0)         \n"
-      "lv.s    s120, 16(%0)         \n"
-      "lv.s    s121, 20(%0)         \n"
-      "lv.s    s130, 24(%0)         \n"
-      "lv.s    s131, 28(%0)         \n"
+            "lv.s    s100,  0(%0)         \n"
+            "lv.s    s101,  4(%0)         \n"
+            "lv.s    s110,  8(%0)         \n"
+            "lv.s    s111, 12(%0)         \n"
+            "lv.s    s120, 16(%0)         \n"
+            "lv.s    s121, 20(%0)         \n"
+            "lv.s    s130, 24(%0)         \n"
+            "lv.s    s131, 28(%0)         \n"

-      "vs2i.p  c100, c100           \n"
-      "vs2i.p  c110, c110           \n"
-      "vs2i.p  c120, c120           \n"
-      "vs2i.p  c130, c130           \n"
+            "vs2i.p  c100, c100           \n"
+            "vs2i.p  c110, c110           \n"
+            "vs2i.p  c120, c120           \n"
+            "vs2i.p  c130, c130           \n"

-      "vi2f.q  c100, c100, 16       \n"
-      "vi2f.q  c110, c110, 16       \n"
-      "vi2f.q  c120, c120, 16       \n"
-      "vi2f.q  c130, c130, 16       \n"
+            "vi2f.q  c100, c100, 16       \n"
+            "vi2f.q  c110, c110, 16       \n"
+            "vi2f.q  c120, c120, 16       \n"
+            "vi2f.q  c130, c130, 16       \n"

-      "vmscl.q e100, e100, s200     \n"
+            "vmscl.q e100, e100, s200     \n"

-      "sv.q    c100,  0(%1)         \n"
-      "sv.q    c110, 16(%1)         \n"
-      "sv.q    c120, 32(%1)         \n"
-      "sv.q    c130, 48(%1)         \n"
+            "sv.q    c100,  0(%1)         \n"
+            "sv.q    c110, 16(%1)         \n"
+            "sv.q    c120, 32(%1)         \n"
+            "sv.q    c130, 48(%1)         \n"

-      ".set    pop                  \n"
-      ::"r"(in+i),"r"(out+i)
-      );
+            ".set    pop                  \n"
+            :: "r"(in + i), "r"(out + i));
   }

-   for (;i != samples; i++)
+   for (; i < samples; i++)
      out[i] = (float)in[i] * gain;
 }

@ -227,39 +227,38 @@ void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
      const float *in, size_t samples)
 {
 #ifdef DEBUG
-//   make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK
-   rarch_assert(((uint32_t)in  & 0xF) == 0);
-   rarch_assert(((uint32_t)out & 0xF) == 0);
+   // Make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.
+   // Both buffers are allocated by RetroArch, so can assume alignment.
+   rarch_assert(((uintptr_t)in  & 0xf) == 0);
+   rarch_assert(((uintptr_t)out & 0xf) == 0);
 #endif

   size_t i;
-   for (i = 0; (i+8) <= samples; i+=8)
+   for (i = 0; i + 8 <= samples; i += 8)
   {
      __asm__ (
-      ".set    push                 \n"
-      ".set    noreorder            \n"
+            ".set    push                 \n"
+            ".set    noreorder            \n"

-      "lv.q    c100,  0(%0)         \n"
-      "lv.q    c110,  16(%0)        \n"
+            "lv.q    c100,  0(%0)         \n"
+            "lv.q    c110,  16(%0)        \n"

-      "vf2in.q c100, c100, 31       \n"
-      "vf2in.q c110, c110, 31       \n"
-      "vi2s.q  c100, c100           \n"
-      "vi2s.q  c102, c110           \n"
+            "vf2in.q c100, c100, 31       \n"
+            "vf2in.q c110, c110, 31       \n"
+            "vi2s.q  c100, c100           \n"
+            "vi2s.q  c102, c110           \n"

-      "sv.q    c100,  0(%1)         \n"
+            "sv.q    c100,  0(%1)         \n"

-      ".set    pop                  \n"
-      ::"r"(in+i),"r"(out+i)
-      );
+            ".set    pop                  \n"
+            :: "r"(in + i), "r"(out + i));
   }

-   for (;i != samples; i++)
+   for (; i < samples; i++)
   {
      int32_t val = (int32_t)(in[i] * 0x8000);
      out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val);
   }
-
 }
 #endif