Fix downsampling for SINC.

2025-04-15 23:42:30 +00:00 · 2013-02-13 12:15:09 +01:00 · 2013-02-13 12:15:09 +01:00 · 11d919b9e8
commit 11d919b9e8
parent 27748e6c3a
10 changed files with 132 additions and 79 deletions
--- a/audio/hermite.c
+++ b/audio/hermite.c
@ -18,13 +18,20 @@
 #include "resampler.h"
 #include <stdlib.h>
 #include <math.h>
+#include <stdio.h>
 #include "../boolean.h"
-#include "../general.h"

 #ifdef HAVE_CONFIG_H
 #include "../config.h"
 #endif

+#ifndef RESAMPLER_TEST
+#include "../general.h"
+#else
+#define RARCH_LOG(...) fprintf(stderr, __VA_ARGS__)
+#define RARCH_WARN(...) fprintf(stderr, __VA_ARGS__)
+#endif
+
 #define CHANNELS 2

 typedef struct rarch_hermite_resampler
@ -51,8 +58,11 @@ static inline float hermite_kernel(float mu1, float a, float b, float c, float d
   return (a0 * b) + (a1 * m0) + (a2 * m1) + (a3 * c);
 }

-void *resampler_hermite_new(void)
+void *resampler_hermite_new(double bandwidth_mod)
 {
+   if (bandwidth_mod < 1.0)
+      RARCH_WARN("Hermite resampler is likely to sound absolutely terrible when downsampling.\n");
+
 #ifndef RESAMPLER_TEST
   RARCH_LOG("Hermite resampler [C]\n");
 #endif
--- a/audio/resampler.c
+++ b/audio/resampler.c
@ -29,7 +29,7 @@ static const rarch_resampler_t *backends[] = {
   &hermite_resampler,
 };

-bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const char *ident)
+bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const char *ident, double bw_ratio)
 {
   if (*re && *backend)
      (*backend)->free(*re);
@ -54,7 +54,7 @@ bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const
   if (!*backend)
      return false;

-   *re = (*backend)->init();
+   *re = (*backend)->init(bw_ratio);
   if (!*re)
   {
      *backend = NULL;
--- a/audio/resampler.h
+++ b/audio/resampler.h
@ -46,7 +46,7 @@ struct resampler_data

 typedef struct rarch_resampler
 {
-   void *(*init)(void);
+   void *(*init)(double bandwidth_mod); // Bandwidth factor. Will be < 1.0 for downsampling, > 1.0 for upsamling. Corresponds to expected resampling ratio.
   void (*process)(void *re, struct resampler_data *data);
   void (*free)(void *re);
   const char *ident;
@ -57,7 +57,7 @@ extern const rarch_resampler_t sinc_resampler;

 // Reallocs resampler. Will free previous handle before allocating a new one.
 // If ident is NULL, first resampler will be used.
-bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const char *ident);
+bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const char *ident, double bw_ratio);

 // Convenience macros.
 // freep makes sure to set handles to NULL to avoid double-free in rarch_resampler_realloc.
--- a/audio/sinc.c
+++ b/audio/sinc.c
@ -45,7 +45,7 @@
 #if defined(SINC_LOWEST_QUALITY)
 #define SINC_WINDOW_LANCZOS
 #define CUTOFF 0.98
-#define PHASE_BITS 11
+#define PHASE_BITS 12
 #define SINC_COEFF_LERP 0
 #define SUBPHASE_BITS 10
 #define SIDELOBES 2
@ -104,18 +104,18 @@

 typedef struct rarch_sinc_resampler
 {
-#if SINC_COEFF_LERP
-   sample_t phase_table[1 << PHASE_BITS][TAPS * 2];
-#else
-   sample_t phase_table[1 << PHASE_BITS][TAPS];
-#endif
-   sample_t buffer_l[2 * TAPS];
-   sample_t buffer_r[2 * TAPS];
+   sample_t *phase_table;
+   sample_t *buffer_l;
+   sample_t *buffer_r;

   unsigned taps;

   unsigned ptr;
   uint32_t time;
+
+   // A buffer for phase_table, buffer_l and buffer_r are created in a single calloc().
+   // Ensure that we get as good cache locality as we can hope for.
+   sample_t *main_buffer;
 } rarch_sinc_resampler_t;

 static inline double sinc(double val)
@ -173,6 +173,7 @@ static void init_sinc_table(rarch_sinc_resampler_t *resamp, double cutoff,
   double window_mod = window_function(0.0); // Need to normalize w(0) to 1.0.
   int stride = calculate_delta ? 2 : 1;

+   double sidelobes = taps / 2.0;
   for (int i = 0; i < phases; i++)
   {
      for (int j = 0; j < taps; j++)
@ -180,7 +181,7 @@ static void init_sinc_table(rarch_sinc_resampler_t *resamp, double cutoff,
         int n = j * phases + i;
         double window_phase = (double)n / (phases * taps); // [0, 1).
         window_phase = 2.0 * window_phase - 1.0; // [-1, 1)
-         double sinc_phase = SIDELOBES * window_phase;
+         double sinc_phase = sidelobes * window_phase;

         float val = cutoff * sinc(M_PI * sinc_phase * cutoff) * window_function(window_phase) / window_mod;
         phase_table[i * stride * taps + j] = val;
@ -189,26 +190,26 @@ static void init_sinc_table(rarch_sinc_resampler_t *resamp, double cutoff,

   if (calculate_delta)
   {
-      for (int i = 0; i < phases - 1; i++)
+      for (int p = 0; p < phases - 1; p++)
      {
         for (int j = 0; j < taps; j++)
         {
-            float delta = phase_table[(i + 1) * stride * taps + j] - phase_table[i * stride * taps + j];
-            phase_table[(i * stride + 1) * taps + j] = delta;
+            float delta = phase_table[(p + 1) * stride * taps + j] - phase_table[p * stride * taps + j];
+            phase_table[(p * stride + 1) * taps + j] = delta;
         }
      }

-      int i = phases - 1;
+      int phase = phases - 1;
      for (int j = 0; j < taps; j++)
      {
-         int n = j * phases + (i + 1);
-         double window_phase = (double)n / (phases * taps); // [0, 1).
-         window_phase = 2.0 * window_phase - 1.0; // [-1, 1)
-         double sinc_phase = SIDELOBES * window_phase;
+         int n = j * phases + (phase + 1);
+         double window_phase = (double)n / (phases * taps); // (0, 1].
+         window_phase = 2.0 * window_phase - 1.0; // (-1, 1]
+         double sinc_phase = sidelobes * window_phase;

         float val = cutoff * sinc(M_PI * sinc_phase * cutoff) * window_function(window_phase) / window_mod;
-         float delta = (val - phase_table[i * stride * taps + j]);
-         phase_table[(i * stride + 1) * taps + j] = delta;
+         float delta = (val - phase_table[phase * stride * taps + j]);
+         phase_table[(phase * stride + 1) * taps + j] = delta;
      }
   }
 }
@ -240,14 +241,16 @@ static inline void process_sinc_C(rarch_sinc_resampler_t *resamp, float *out_buf
   const float *buffer_l = resamp->buffer_l + resamp->ptr;
   const float *buffer_r = resamp->buffer_r + resamp->ptr;

+   unsigned taps  = resamp->taps;
   unsigned phase = resamp->time >> SUBPHASE_BITS;
-   const float *phase_table = resamp->phase_table[phase];
 #if SINC_COEFF_LERP
-   const float *delta_table = phase_table + TAPS;
+   const float *phase_table = resamp->phase_table + phase * taps * 2;
+   const float *delta_table = phase_table + taps;
   float delta = (float)(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD;
+#else
+   const float *phase_table = resamp->phase_table + phase * taps;
 #endif

-   unsigned taps = resamp->taps;
   for (unsigned i = 0; i < taps; i++)
   {
 #if SINC_COEFF_LERP
@ -273,14 +276,16 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
   const float *buffer_l = resamp->buffer_l + resamp->ptr;
   const float *buffer_r = resamp->buffer_r + resamp->ptr;

+   unsigned taps = resamp->taps;
   unsigned phase = resamp->time >> SUBPHASE_BITS;
-   const float *phase_table = resamp->phase_table[phase];
 #if SINC_COEFF_LERP
-   const float *delta_table = phase_table + TAPS;
+   const float *phase_table = resamp->phase_table + phase * taps * 2;
+   const float *delta_table = phase_table + taps;
   __m256 delta = _mm256_set1_ps((float)(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD);
+#else
+   const float *phase_table = resamp->phase_table + phase * taps;
 #endif

-   unsigned taps = resamp->taps;
   for (unsigned i = 0; i < taps; i += 8)
   {
      __m256 buf_l = _mm256_loadu_ps(buffer_l + i);
@ -321,10 +326,12 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)

   unsigned taps = resamp->taps;
   unsigned phase = resamp->time >> SUBPHASE_BITS;
-   const float *phase_table = resamp->phase_table[phase];
 #if SINC_COEFF_LERP
+   const float *phase_table = resamp->phase_table + phase * taps * 2;
   const float *delta_table = phase_table + taps;
   __m128 delta = _mm_set1_ps((float)(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD);
+#else
+   const float *phase_table = resamp->phase_table + phase * taps;
 #endif

   for (unsigned i = 0; i < taps; i += 4)
@ -365,10 +372,6 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
 }
 #elif defined(HAVE_NEON)

-#if TAPS < 8
-#error "NEON asm requires at least 8 taps (for now)."
-#endif
-
 #if SINC_COEFF_LERP
 #error "NEON asm does not support SINC lerp."
 #endif
@ -398,9 +401,6 @@ static void resampler_sinc_process(void *re_, struct resampler_data *data)
 {
   rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;

-   // If data->ratio is < 1, we are downsampling.
-   // The sinc table is not set up for this, as it always assumes upsampling.
-   // Downsampling will work, but with some added noise due to aliasing might be present.
   uint32_t ratio = PHASES / data->ratio;

   const sample_t *input = data->data_in;
@ -412,9 +412,13 @@ static void resampler_sinc_process(void *re_, struct resampler_data *data)
   {
      while (frames && re->time >= PHASES)
      {
-         re->ptr = (re->ptr - 1) & (TAPS - 1);
-         re->buffer_l[re->ptr + TAPS] = re->buffer_l[re->ptr] = *input++;
-         re->buffer_r[re->ptr + TAPS] = re->buffer_r[re->ptr] = *input++;
+         // Push in reverse to make filter more obvious.
+         if (!re->ptr)
+            re->ptr = re->taps;
+         re->ptr--;
+
+         re->buffer_l[re->ptr + re->taps] = re->buffer_l[re->ptr] = *input++;
+         re->buffer_r[re->ptr + re->taps] = re->buffer_r[re->ptr] = *input++;

         re->time -= PHASES;
         frames--;
@ -434,19 +438,52 @@ static void resampler_sinc_process(void *re_, struct resampler_data *data)

 static void resampler_sinc_free(void *re)
 {
-   aligned_free__(re);
+   rarch_sinc_resampler_t *resampler = (rarch_sinc_resampler_t*)re;
+   if (resampler)
+      aligned_free__(resampler->main_buffer);
+   free(resampler);
 }

-static void *resampler_sinc_new(void)
+static void *resampler_sinc_new(double bandwidth_mod)
 {
-   rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)aligned_alloc__(128, sizeof(*re));
+   rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)calloc(1, sizeof(*re));
   if (!re)
      return NULL;

   memset(re, 0, sizeof(*re));

   re->taps = TAPS;
-   init_sinc_table(re, CUTOFF, &re->phase_table[0][0], 1 << PHASE_BITS, re->taps, SINC_COEFF_LERP);
+   double cutoff = CUTOFF;
+
+   // Downsampling, must lower cutoff, and extend number of taps accordingly to keep same stopband attenuation.
+   if (bandwidth_mod < 1.0)
+   {
+      cutoff *= bandwidth_mod;
+      re->taps = (unsigned)ceil(re->taps / bandwidth_mod);
+   }
+
+   // Be SIMD-friendly.
+#if (defined(__AVX__) && ENABLE_AVX) || defined(HAVE_NEON)
+   re->taps = (re->taps + 7) & ~7;
+#else
+   re->taps = (re->taps + 3) & ~3;
+#endif
+
+   size_t phase_elems = (1 << PHASE_BITS) * re->taps;
+#if SINC_COEFF_LERP
+   phase_elems *= 2;
+#endif
+   size_t elems = phase_elems + 4 * re->taps;
+
+   re->main_buffer = (sample_t*)aligned_alloc__(128, sizeof(sample_t) * elems);
+   if (!re->main_buffer)
+      goto error;
+
+   re->phase_table = re->main_buffer;
+   re->buffer_l = re->main_buffer + phase_elems;
+   re->buffer_r = re->buffer_l + 2 * re->taps;
+
+   init_sinc_table(re, cutoff, re->phase_table, 1 << PHASE_BITS, re->taps, SINC_COEFF_LERP);

 #if defined(__AVX__) && ENABLE_AVX
   RARCH_LOG("Sinc resampler [AVX]\n");
@ -462,8 +499,11 @@ static void *resampler_sinc_new(void)
 #endif

   RARCH_LOG("SINC params (%u phase bits, %u taps).\n", PHASE_BITS, re->taps);
-
   return re;
+
+error:
+   resampler_sinc_free(re);
+   return NULL;
 }

 const rarch_resampler_t sinc_resampler = {
--- a/audio/test/Makefile
+++ b/audio/test/Makefile
@ -16,10 +16,10 @@ LDFLAGS += -lm

 all: $(TESTS)

-test-hermite: ../hermite.o ../utils.o main.o resampler-hermite.o
+test-hermite: hermite.o ../utils.o main.o resampler-hermite.o
 	$(CC) -o $@ $^ $(LDFLAGS)

-test-snr-hermite: ../hermite.o ../utils.o snr.o resampler-hermite.o
+test-snr-hermite: hermite.o ../utils.o snr.o resampler-hermite.o
 	$(CC) -o $@ $^ $(LDFLAGS)

 resampler-sinc.o: ../resampler.c
@ -28,6 +28,9 @@ resampler-sinc.o: ../resampler.c
 resampler-hermite.o: ../resampler.c
 	$(CC) -c -o $@ $< $(CFLAGS)

+hermite.o: ../hermite.c
+	$(CC) -c -o $@ $< $(CFLAGS)
+
 sinc-lowest.o: ../sinc.c
 	$(CC) -c -o $@ $< $(CFLAGS) -DHAVE_SINC -DSINC_LOWEST_QUALITY

--- a/audio/test/main.c
+++ b/audio/test/main.c
@ -47,7 +47,7 @@ int main(int argc, char *argv[])

   const rarch_resampler_t *resampler = NULL;
   void *re = NULL;
-   if (!rarch_resampler_realloc(&re, &resampler, NULL))
+   if (!rarch_resampler_realloc(&re, &resampler, NULL, out_rate / in_rate))
   {
      fprintf(stderr, "Failed to allocate resampler ...\n");
      return 1;
--- a/audio/test/sinc_test.m
+++ b/audio/test/sinc_test.m
@ -2,14 +2,16 @@
 close all;

 % 4-tap and 8-tap are Lanczos windowed, but include here for completeness.
-sidelobes = [2 4 8 32 128];
-taps = sidelobes * 2;
-
 phases = 256;
-cutoffs = [0.65 0.75 0.825 0.90 0.95];
+bw = 0.375;
+downsample = round(phases / bw);
+cutoffs = bw * [0.65 0.75 0.825 0.90 0.95];
 betas = [2.0 3.0 5.5 10.5 14.5];

-freqs = 0.05 : 0.05 : 0.99;
+sidelobes = round([2 4 8 32 128] / bw);
+taps = sidelobes * 2;
+
+freqs = 0.05 : 0.02 : 0.99;

 filters = length(taps);
 for i = 1 : filters
@ -21,17 +23,17 @@ for i = 1 : filters
    win = kaiser(filter_length, betas(i))';
    filter = s .* win;

-    impulse_response_half = 0.5 * upfirdn(1, filter, phases, phases / 2);
+    impulse_response_half = 0.5 * upfirdn(1, filter, phases, downsample);
    figure('name', sprintf('Response SINC: %d taps', taps(i)));
    freqz(impulse_response_half);
    ylim([-200 0]);

-    signal = zeros(1, 4001);
+    signal = zeros(1, 8001);
    for freq = freqs
-        signal = signal + sin(pi * freq * (0 : 4000));
+        signal = signal + sin(pi * freq * (0 : 8000));
    end
    
-    resampled = upfirdn(signal, filter, phases, round(phases * 44100 / 48000));
+    resampled = upfirdn(signal, filter, phases, downsample);
    figure('name', sprintf('Kaiser SINC: %d taps, w = %.f', taps(i), freq));
    freqz(resampled .* kaiser(length(resampled), 40.0)');
    ylim([-80 70]);
--- a/audio/test/snr.c
+++ b/audio/test/snr.c
@ -23,6 +23,9 @@
 #include <assert.h>
 #include <stdbool.h>

+#undef min
+#define min(a, b) ((a) < (b)) ? (a) : (b)
+
 static void gen_signal(float *out, double omega, double bias_samples, size_t samples)
 {
   for (size_t i = 0; i < samples; i += 2)
@ -244,15 +247,9 @@ int main(int argc, char *argv[])

   const unsigned fft_samples = 1024 * 128;
   unsigned out_rate = fft_samples / 2;
-   unsigned in_rate = out_rate / ratio;
+   unsigned in_rate = round(out_rate / ratio);
   ratio = (double)out_rate / in_rate;

-   if (ratio <= 1.0)
-   {
-      fprintf(stderr, "Ratio too low ...\n");
-      return 1;
-   }
-
   static const float freq_list[] = {
      0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
      0.010, 0.015, 0.020, 0.025, 0.030, 0.035, 0.040, 0.045, 0.050,
@ -265,19 +262,18 @@ int main(int argc, char *argv[])

   unsigned samples = in_rate * 4;
   float *input = calloc(sizeof(float), samples);
-   float *output = calloc(sizeof(float), (fft_samples + 1) * 2);
+   float *output = calloc(sizeof(float), (fft_samples + 16) * 2);
   complex double *butterfly_buf = calloc(sizeof(complex double), fft_samples / 2);
   assert(input);
   assert(output);

   void *re = NULL;
   const rarch_resampler_t *resampler = NULL;
-   if (!rarch_resampler_realloc(&re, &resampler, NULL))
+   if (!rarch_resampler_realloc(&re, &resampler, NULL, ratio))
      return 1;

   test_fft();

-   printf("Omega,SNR,Gain\n");
   for (unsigned i = 0; i < sizeof(freq_list) / sizeof(freq_list[0]); i++)
   {
      unsigned freq = freq_list[i] * in_rate;
@ -293,12 +289,13 @@ int main(int argc, char *argv[])

      rarch_resampler_process(resampler, re, &data);

-      unsigned out_samples = data.output_frames * 2;
-      assert(out_samples >= fft_samples * 2);
-
      // We generate 2 seconds worth of audio, however, only the last second is considered so phase has stabilized.
      struct snr_result res;
-      calculate_snr(&res, freq, in_rate / 2, output + fft_samples, butterfly_buf, fft_samples);
+      unsigned max_freq = min(in_rate, out_rate) / 2;
+      if (freq > max_freq)
+         continue;
+
+      calculate_snr(&res, freq, max_freq, output + fft_samples - 2048, butterfly_buf, fft_samples);

      printf("SNR @ w = %5.3f : %6.2lf dB, Gain: %6.1lf dB\n",
            freq_list[i], res.snr, res.gain);
--- a/driver.c
+++ b/driver.c
@ -406,9 +406,13 @@ void init_audio(void)
      g_extern.audio_data.chunk_size = g_extern.audio_data.nonblock_chunk_size;
   }

+   g_extern.audio_data.orig_src_ratio =
+      g_extern.audio_data.src_ratio =
+      (double)g_settings.audio.out_rate / g_settings.audio.in_rate;
+
   const char *resampler = *g_settings.audio.resampler ? g_settings.audio.resampler : NULL;
   if (!rarch_resampler_realloc(&g_extern.audio_data.resampler_data, &g_extern.audio_data.resampler,
-         resampler))
+         resampler, g_extern.audio_data.orig_src_ratio))
   {
      RARCH_ERR("Failed to initialize resampler \"%s\".\n", resampler ? resampler : "(default)");
      g_extern.audio_active = false;
@ -421,10 +425,6 @@ void init_audio(void)
   rarch_assert(g_settings.audio.out_rate < g_settings.audio.in_rate * AUDIO_MAX_RATIO);
   rarch_assert(g_extern.audio_data.outsamples = (sample_t*)malloc(outsamples_max * sizeof(sample_t)));

-   g_extern.audio_data.orig_src_ratio =
-      g_extern.audio_data.src_ratio =
-      (double)g_settings.audio.out_rate / g_settings.audio.in_rate;
-
   if (g_extern.audio_active && g_settings.audio.rate_control)
   {
      if (driver.audio->buffer_size && driver.audio->write_avail)
--- a/record/ffemu.c
+++ b/record/ffemu.c
@ -281,7 +281,8 @@ static bool ffemu_init_audio(ffemu_t *handle)

      rarch_resampler_realloc(&audio->resampler_data,
            &audio->resampler,
-            *g_settings.audio.resampler ? g_settings.audio.resampler : NULL);
+            *g_settings.audio.resampler ? g_settings.audio.resampler : NULL,
+            audio->ratio);
   }
   else
   {