diff --git a/audio/ext/Makefile b/audio/ext/Makefile deleted file mode 100644 index fa73ce35ca..0000000000 --- a/audio/ext/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -TARGET = ssnes_iir_filter.so -OBJ = ssnes_iir_filter.o - -all: $(TARGET) - -$(TARGET): $(OBJ) - $(CC) -o $@ -shared $(OBJ) -lm - -%.o: %.c - $(CC) -c -o $@ $< -g -std=c99 -fPIC -O3 -march=native $(CFLAGS) - -clean: - rm -f $(TARGET) - rm -f $(OBJ) - -.PHONY: clean diff --git a/audio/ext/Makefile.test b/audio/ext/Makefile.test deleted file mode 100644 index 643289a237..0000000000 --- a/audio/ext/Makefile.test +++ /dev/null @@ -1,16 +0,0 @@ -TARGET = iir_filter -OBJ = ssnes_iir_filter.o - -all: $(TARGET) - -$(TARGET): $(OBJ) - $(CC) -o $@ $(OBJ) -lm - -%.o: %.c - $(CC) -c -o $@ $< -g -std=c99 -O3 -march=native $(CFLAGS) -DTEST -DLOG - -clean: - rm -f $(TARGET) - rm -f $(OBJ) - -.PHONY: clean diff --git a/audio/ext/ssnes_iir_filter.c b/audio/ext/ssnes_iir_filter.c deleted file mode 100644 index 4c6b3f8685..0000000000 --- a/audio/ext/ssnes_iir_filter.c +++ /dev/null @@ -1,418 +0,0 @@ -#include "ssnes_dsp.h" -#include -#include -#include -#include -#include -#include - -// Simple IIR/EQ filter implementation, optimized for SSE3. - -#ifdef __SSE3__ // Build with -march=native or -msse3 to let this be detected. D: -#define USE_SSE3 -#endif - -#ifdef USE_SSE3 -#include -#endif - -// Make a test build with standalone main() -//#define TEST - -#ifdef TEST -#include -#include -#endif - -// Log filter coeffs for debugging -//#define LOG - -#define min(x, y) ((x) < (y) ? (x) : (y)) - -#ifndef M_PI -#define M_PI 3.14159265 -#endif - -// Taps needs to be power-of-two. -#define TAPS (2048) - -static const float lp_freq = 5000.0; -static const float hp_freq = 500.0; -static const float bp_freq = 300.0; -static const unsigned bp_q = 500; - -#ifdef USE_SSE3 -typedef union -{ - __m128 vec[TAPS / 4 + 1]; // Wraparound for unaligned reads. - float f[TAPS + 4]; -} vec_filt; -#else -typedef float vec_filt[TAPS + 4]; -#endif - -typedef struct -{ - vec_filt buffer[2]; - vec_filt iir_buffer[2]; - vec_filt fir_coeff; - vec_filt iir_coeff; - - float out_buffer[4096]; - unsigned buf_ptr; -} dsp_state; - -// Convolve polynoms. -static void conv(float * restrict a, const float * restrict b, unsigned a_size, unsigned b_size) -{ - float output[a_size + b_size - 1]; - - // Lead-in - for (int i = 0; i < b_size - 1; i++) - { - output[i] = 0.0; - for (int j = 0; j < b_size; j++) - { - if ((i - j) >= 0) - output[i] += a[i - j] * b[j]; - } - } - - // Mid-section - for (unsigned i = b_size - 1; i < a_size; i++) - { - output[i] = 0.0; - for (unsigned j = 0; j < b_size; j++) - output[i] += a[i - j] * b[j]; - } - - // Lead-out - for (unsigned i = a_size; i < a_size + b_size - 1; i++) - { - output[i] = 0.0; - for (unsigned j = 0; j < b_size; j++) - { - if ((i - j) < a_size) - output[i] += a[i - j] * b[j]; - } - } - - memcpy(a, output, sizeof(output)); -} - -#ifdef LOG -static void print_coeff(const char *key, const float *coeff, unsigned elem) -{ - for (unsigned i = 0; i < elem; i++) - fprintf(stderr, "%4s [%3u] = %9.5f\n", key, i, coeff[i]); -} - -static void print_filter_coeffs(const float *fir, const float *iir, unsigned elem) -{ - print_coeff("FIR", fir, elem); - print_coeff("IIR", iir, elem); -} -#endif - -// Simulate plain 1-pole RC circuit. -// dVc/dt = (Vin - Vc) / RC => -// y[n] = y[n - 1] * (1 - Ts/RC) + x[n] * Ts/RC -// RCw = 1 => RC = 1/(2*pi*fc) -static void generate_low_pass(float fir_out[1], float iir_out[2], float freq, float input_rate) -{ - float ts = 1.0 / input_rate; - float t = 1.0 / (2 * M_PI * freq); - fir_out[0] = ts / t; - iir_out[0] = 1.0; - iir_out[1] = -(1.0 - ts / t); - -#ifdef LOG - print_coeff("FLOW", fir_out, 1); - print_coeff("ILOW", iir_out, 2); -#endif -} - -// 1-pole RC circuit (Vo taken over R). -static void generate_high_pass(float fir_out[2], float iir_out[2], float freq, float input_rate) -{ - float ts = 1.0 / input_rate; - float t = 1.0 / (2 * M_PI * freq); - fir_out[0] = 1.0; - fir_out[1] = -1.0; - - iir_out[0] = 1.0; - iir_out[1] = -(1.0 - ts / t); - -#ifdef LOG - print_coeff("FHI", fir_out, 2); - print_coeff("IHI", iir_out, 2); -#endif -} - -static void generate_band_pass(float *fir_out, unsigned num_fir, float freq, float input_rate) -{ - float w_norm = 2.0 * M_PI * freq / input_rate; - for (unsigned i = 0; i < num_fir; i++) - fir_out[i] = cosf(i * w_norm); - - complex float amp = 0.0; - for (unsigned i = 0; i < num_fir; i++) - amp += cosf(i * w_norm) * cexp(-I * w_norm * i); - - float abs = cabs(amp); - for (unsigned i = 0; i < num_fir; i++) - fir_out[i] /= abs; -} - -// Generic low-pass. Differential equation. -static void generate_filter_coeffs(float * restrict fir_out, float * restrict iir_out, float input_rate) -{ - fir_out[0] = 1.0; - iir_out[0] = 1.0; - - unsigned fir_length = 1; - unsigned iir_length = 1; - - /* - // Low pass - float lp_fir[1]; - float lp_iir[2]; - generate_low_pass(lp_fir, lp_iir, lp_freq, input_rate); - conv(fir_out, lp_fir, fir_length, 1); - conv(iir_out, lp_iir, iir_length, 2); - fir_length += 0; - iir_length += 1; - - // High pass - float hp_fir[2]; - float hp_iir[2]; - generate_high_pass(hp_fir, hp_iir, hp_freq, input_rate); - conv(fir_out, hp_fir, fir_length, 2); - conv(iir_out, hp_iir, iir_length, 2); - fir_length += 1; - iir_length += 1; -*/ - - // Band pass - float bp_fir[bp_q]; - generate_band_pass(bp_fir, bp_q, bp_freq, input_rate); - conv(fir_out, bp_fir, fir_length, bp_q); - fir_length += bp_q - 1; - iir_length += 0; - - // Adjust from Z rational polynomial model to discrete domain. - // We don't care about first param as it's implicitly 1. - for (unsigned i = 0; i < TAPS; i++) - iir_out[i] = -1.0 * iir_out[i + 1]; -} - - -static void *dsp_init(const ssnes_dsp_info_t *info) -{ - (void)info; - dsp_state *state = calloc(1, sizeof(*state)); - if (!state) - return NULL; - - fprintf(stderr, "Input rate = %.2f\n", (float)info->input_rate); - -#ifdef USE_SSE3 - generate_filter_coeffs(state->fir_coeff.f, state->iir_coeff.f, info->input_rate); -#ifdef LOG - print_filter_coeffs(state->fir_coeff.f, state->iir_coeff.f, 3); -#endif -#else - generate_filter_coeffs(state->fir_coeff, state->iir_coeff, info->input_rate); -#ifdef LOG - print_filter_coeffs(state->fir_coeff, state->iir_coeff, 3); -#endif -#endif - - -#ifdef USE_SSE3 - memcpy(state->fir_coeff.f + TAPS, state->fir_coeff.f, 4 * sizeof(float)); - memcpy(state->iir_coeff.f + TAPS, state->iir_coeff.f, 4 * sizeof(float)); -#endif - - return state; -} - -#ifdef USE_SSE3 -static void calculate_iir(float *out, dsp_state *dsp) -{ - unsigned buf_ptr = (dsp->buf_ptr - 1) & (TAPS - 1); - unsigned iir_ptr = (TAPS - dsp->buf_ptr) & (TAPS - 1); - - const float * restrict samples_left = dsp->buffer[0].f; - const float * restrict samples_right = dsp->buffer[1].f; - const float * restrict iir_left = dsp->iir_buffer[0].f; - const float * restrict iir_right = dsp->iir_buffer[1].f; - - __m128 sum_left = _mm_setzero_ps(); - __m128 sum_right = _mm_setzero_ps(); - - for (unsigned i = 0; i < TAPS; i += 4) - { - __m128 left = _mm_load_ps(samples_left + i); - __m128 right = _mm_load_ps(samples_right + i); - const __m128 ileft = _mm_load_ps(iir_left + i); - const __m128 iright = _mm_load_ps(iir_right + i); - - // Need unaligned reads here. - const __m128 fir = _mm_loadu_ps(dsp->fir_coeff.f + iir_ptr); - const __m128 iir = _mm_loadu_ps(dsp->iir_coeff.f + iir_ptr); - - const __m128 fir_res_left = _mm_mul_ps(fir, left); - const __m128 fir_res_right = _mm_mul_ps(fir, right); - const __m128 iir_res_left = _mm_mul_ps(iir, ileft); - const __m128 iir_res_right = _mm_mul_ps(iir, iright); - - left = _mm_add_ps(fir_res_left, iir_res_left); - right = _mm_add_ps(fir_res_right, iir_res_right); - - sum_left = _mm_add_ps(sum_left, left); - sum_right = _mm_add_ps(sum_right, right); - - iir_ptr = (iir_ptr + 4) & (TAPS - 1); - } - - __m128 res = _mm_hadd_ps(sum_left, sum_right); - res = _mm_hadd_ps(res, res); - - union - { - __m128 vec; - float f[4]; - } u; - u.vec = res; - - out[0] = u.f[0]; - out[1] = u.f[1]; - dsp->iir_buffer[0].f[buf_ptr] = u.f[0]; - dsp->iir_buffer[1].f[buf_ptr] = u.f[1]; -} -#else -static void calculate_iir(float *out, dsp_state *dsp) -{ - const float * restrict samples_left = dsp->buffer[0]; - const float * restrict samples_right = dsp->buffer[1]; - const float * restrict iir_left = dsp->iir_buffer[0]; - const float * restrict iir_right = dsp->iir_buffer[1]; - const float * restrict fir = dsp->fir_coeff; - const float * restrict iir = dsp->iir_coeff; - - unsigned iir_ptr = 0; - unsigned sample_ptr = dsp->buf_ptr; - - float sum[2] = { 0.0f, 0.0f }; - for (unsigned i = 0; i < TAPS; i++) - { - sum[0] += fir[iir_ptr] * samples_left[sample_ptr] + iir[iir_ptr] * iir_left[sample_ptr]; - sum[1] += fir[iir_ptr] * samples_right[sample_ptr] + iir[iir_ptr] * iir_right[sample_ptr]; - iir_ptr++; - sample_ptr = (sample_ptr + 1) & (TAPS - 1); - } - - // Stick our output value in the IIR buffer. - for (unsigned i = 0; i < 2; i++) - { - out[i] = sum[i]; - dsp->iir_buffer[i][(dsp->buf_ptr - 1) & (TAPS - 1)] = sum[i]; - } -} -#endif - -static void dsp_process(void *data, ssnes_dsp_output_t *output, const ssnes_dsp_input_t *input) -{ - dsp_state *dsp = data; - output->should_resample = SSNES_TRUE; - output->frames = input->frames; - output->samples = dsp->out_buffer; - - for (unsigned i = 0; i < input->frames; i++) - { -#ifdef USE_SSE3 - dsp->buffer[0].f[dsp->buf_ptr] = input->samples[(i << 1) + 0]; - dsp->buffer[1].f[dsp->buf_ptr] = input->samples[(i << 1) + 1]; -#else - dsp->buffer[0][dsp->buf_ptr] = input->samples[(i << 1) + 0]; - dsp->buffer[1][dsp->buf_ptr] = input->samples[(i << 1) + 1]; -#endif - - calculate_iir(&dsp->out_buffer[i << 1], dsp); - dsp->buf_ptr = (dsp->buf_ptr - 1) & (TAPS - 1); - } -} - -static void dsp_config(void *data) -{ - (void)data; - // Normally we unhide a GUI window or something, - // but we're just going to print to the log instead. - fprintf(stderr, "DSP_CONFIG\n"); -} - -static void dsp_free(void *data) -{ - free(data); -} - -const ssnes_dsp_plugin_t dsp_plug = { - .init = dsp_init, - .process = dsp_process, - .free = dsp_free, - .config = dsp_config, - .api_version = SSNES_DSP_API_VERSION, - .ident = "IIR filter" -}; - -SSNES_API_EXPORT const ssnes_dsp_plugin_t* - SSNES_API_CALLTYPE ssnes_dsp_plugin_init(void) -{ - return &dsp_plug; -} - -#ifdef TEST -int main(void) -{ - const ssnes_dsp_plugin_t *plug = ssnes_dsp_plugin_init(); - assert(plug); - - ssnes_dsp_info_t info = { - .input_rate = 44100, - .output_rate = 44100 - }; - void *handle = plug->init(&info); // Info isn't used. - assert(handle); - - int16_t buf[64]; - float fbuf[64]; - - for (;;) - { - size_t rd = fread(buf, sizeof(int16_t), 64, stdin); - for (unsigned i = 0; i < rd; i++) - fbuf[i] = (float)buf[i]; - - ssnes_dsp_input_t input = { - .samples = fbuf, - .frames = rd >> 1 - }; - ssnes_dsp_output_t output; - plug->process(handle, &output, &input); - - for (unsigned i = 0; i < output.frames << 1; i++) - { - int32_t sample = (int32_t)output.samples[i]; - buf[i] = (sample > 0x7fff) ? 0x7fff : ((sample < -0x8000) ? -0x8000 : (int16_t)sample); - } - - fwrite(buf, sizeof(int16_t), output.frames << 1, stdout); - - if (rd < 64) - break; - } - - plug->free(handle); -} -#endif