Merge branch 'master' of github.com:Themaister/SSNES

This commit is contained in:
TwinAphex51224 2012-02-26 15:19:40 +01:00
commit ac5fb991a2
8 changed files with 102 additions and 39 deletions

View File

@ -15,7 +15,7 @@ HAVE_DYLIB = 1
HAVE_NETPLAY = 1
HAVE_THREADS = 1
DYNAMIC = 1
HAVE_SINC = 0
HAVE_SINC = 1
ifeq ($(SLIM),)
HAVE_SDL_IMAGE = 1

View File

@ -39,6 +39,7 @@ typedef struct coreaudio
fifo_buffer_t *buffer;
bool nonblock;
size_t buffer_size;
} coreaudio_t;
static void coreaudio_free(void *data)
@ -193,6 +194,7 @@ static void *coreaudio_init(const char *device, unsigned rate, unsigned latency)
fifo_size = (latency * g_settings.audio.out_rate) / 1000;
fifo_size *= 2 * sizeof(float);
dev->buffer_size = fifo_size;
dev->buffer = fifo_new(fifo_size);
if (!dev->buffer)
@ -270,6 +272,21 @@ static bool coreaudio_use_float(void *data)
return true;
}
static size_t coreaudio_write_avail(void *data)
{
coreaudio_t *dev = (coreaudio_t*)data;
pthread_mutex_lock(&dev->lock);
size_t avail = fifo_write_avail(dev->buffer);
pthread_mutex_unlock(&dev->lock);
return avail;
}
static size_t coreaudio_buffer_size(void *data)
{
coreaudio_t *dev = (coreaudio_t*)data;
return dev->buffer_size;
}
const audio_driver_t audio_coreaudio = {
coreaudio_init,
coreaudio_write,
@ -278,6 +295,8 @@ const audio_driver_t audio_coreaudio = {
coreaudio_set_nonblock_state,
coreaudio_free,
coreaudio_use_float,
"coreaudio"
"coreaudio",
coreaudio_write_avail,
coreaudio_buffer_size,
};

View File

@ -22,7 +22,6 @@
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#ifndef RESAMPLER_TEST
@ -35,10 +34,6 @@
#include <xmmintrin.h>
#endif
#if __SSE3__
#include <pmmintrin.h>
#endif
#define PHASE_BITS 8
#define SUBPHASE_BITS 16
@ -126,9 +121,29 @@ static void init_sinc_table(ssnes_resampler_t *resamp)
}
}
// No memalign() for us on Win32 ...
static void *aligned_alloc(size_t boundary, size_t size)
{
void *ptr = malloc(boundary + size + sizeof(uintptr_t));
if (!ptr)
return NULL;
uintptr_t addr = ((uintptr_t)ptr + sizeof(uintptr_t) + boundary) & ~(boundary - 1);
void **place = (void**)addr;
place[-1] = ptr;
return (void*)addr;
}
static void aligned_free(void *ptr)
{
void **p = (void**)ptr;
free(p[-1]);
}
ssnes_resampler_t *resampler_new(void)
{
ssnes_resampler_t *re = (ssnes_resampler_t*)memalign(16, sizeof(*re));
ssnes_resampler_t *re = (ssnes_resampler_t*)aligned_alloc(16, sizeof(*re));
if (!re)
return NULL;
@ -136,9 +151,7 @@ ssnes_resampler_t *resampler_new(void)
init_sinc_table(re);
#if __SSE3__
SSNES_LOG("Sinc resampler [SSE3]\n");
#elif __SSE__
#if __SSE__
SSNES_LOG("Sinc resampler [SSE]\n");
#else
SSNES_LOG("Sinc resampler [C]\n");
@ -177,22 +190,26 @@ static void process_sinc(ssnes_resampler_t *resamp, float *out_buffer)
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, sinc));
}
#if __SSE3__
__m128 res = _mm_hadd_ps(_mm_hadd_ps(sum_l, sum_r), _mm_setzero_ps());
_mm_storeu_ps(out_buffer, res); // Overwriting, but this is safe.
#else // Meh, compiler should optimize this to something sane.
union
{
float f[4];
__m128 v;
} u[2] = {
[0] = { .v = sum_l },
[1] = { .v = sum_r },
};
// Them annoying shuffles :V
// sum_l = { l3, l2, l1, l0 }
// sum_r = { r3, r2, r1, r0 }
out_buffer[0] = (u[0].f[0] + u[0].f[1]) + (u[0].f[2] + u[0].f[3]);
out_buffer[1] = (u[1].f[0] + u[1].f[1]) + (u[1].f[2] + u[1].f[3]);
#endif
__m128 sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
// sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
// sum = { R1, R0, L1, L0 }
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
// sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
// sum = { X, R, X, L }
// Store L
_mm_store_ss(out_buffer + 0, sum);
// movehl { X, R, X, L } == { X, R, X, R }
_mm_store_ss(out_buffer + 1, _mm_movehl_ps(sum, sum));
}
#else // Plain ol' C99
static void process_sinc(ssnes_resampler_t *resamp, float *out_buffer)
@ -257,6 +274,6 @@ void resampler_process(ssnes_resampler_t *re, struct resampler_data *data)
void resampler_free(ssnes_resampler_t *re)
{
free(re);
aligned_free(re);
}

View File

@ -22,10 +22,8 @@
#include <math.h>
#include <assert.h>
static void gen_signal(float *out, double freq, double sample_rate, double bias_samples, size_t samples)
static void gen_signal(float *out, double omega, double bias_samples, size_t samples)
{
double omega = 2.0 * M_PI * freq / sample_rate;
for (size_t i = 0; i < samples; i += 2)
{
out[i + 0] = cos(((i >> 1) + bias_samples) * omega);
@ -47,32 +45,61 @@ static double calculate_gain(const float *orig, const float *resamp, size_t samp
return sqrt(resamp_power / orig_power);
}
static double calculate_phase(const float *orig, const float *resamp, double makeup_gain, size_t samples)
{
double max_correlation = 0.0;
for (size_t i = 0; i < samples; i += 2)
max_correlation += orig[i] * orig[i];
double actual_correlation = 0.0;
for (size_t i = 0; i < samples; i += 2)
{
double resampled = makeup_gain * resamp[i];
actual_correlation += resampled * orig[i];
}
double corr = actual_correlation / max_correlation;
if (corr > 1.0)
corr = 1.0;
if (fabs(corr) < 0.0001)
return 0.5 * M_PI;
else
return acos(corr);
}
struct snr_result
{
double snr;
double gain;
double phase;
};
static void calculate_snr(struct snr_result *res, const float *orig, const float *resamp, size_t samples)
static void calculate_snr(struct snr_result *res,
double omega,
float *orig, const float *resamp, size_t samples)
{
double noise = 0.0;
double signal = 0.0;
gen_signal(orig, omega, 0, samples);
// Account for gain losses at higher frequencies as it's not really noise.
double filter_gain = calculate_gain(orig, resamp, samples);
double makeup_gain = 1.0 / filter_gain;
for (size_t i = 0; i < samples; i += 2)
signal += orig[i] * orig[i];
double phase = calculate_phase(orig, resamp, makeup_gain, samples);
for (size_t i = 0; i < samples; i += 2)
{
signal += orig[i] * orig[i];
double diff = makeup_gain * resamp[i] - orig[i];
noise += diff * diff;
}
res->snr = 10 * log10(signal / noise);
res->gain = 20.0 * log10(filter_gain);
res->phase = phase;
}
int main(int argc, char *argv[])
@ -149,10 +176,11 @@ int main(int argc, char *argv[])
for (unsigned i = 0; i < sizeof(freq_list) / sizeof(freq_list[0]) && freq_list[i] < 0.5f * in_rate; i++)
{
double omega = 2.0 * M_PI * freq_list[i] / in_rate;
double omega_out = 2.0 * M_PI * freq_list[i] / out_rate;
double sample_offset;
resampler_preinit(re, omega, &sample_offset);
gen_signal(input, freq_list[i], in_rate, sample_offset, samples);
gen_signal(input, omega, sample_offset, samples);
struct resampler_data data = {
.data_in = input,
@ -164,13 +192,12 @@ int main(int argc, char *argv[])
resampler_process(re, &data);
unsigned out_samples = data.output_frames * 2;
gen_signal(output_expected, freq_list[i], out_rate, 0, out_samples);
struct snr_result res;
calculate_snr(&res, output_expected, output, out_samples);
calculate_snr(&res, omega_out, output_expected, output, out_samples);
printf("SNR @ %7.1f Hz: %6.2lf dB, Gain: %6.1f dB\n",
freq_list[i], res.snr, res.gain);
printf("SNR @ %7.1f Hz: %6.2lf dB, Gain: %6.1lf dB, Phase: %6.4f rad\n",
freq_list[i], res.snr, res.gain, res.phase);
//printf("Generated:\n\t");
//for (unsigned i = 0; i < 10; i++)

View File

@ -31,4 +31,4 @@ add_command_line_enable FREETYPE "Enable FreeType support" auto
add_command_line_enable XVIDEO "Enable XVideo support" auto
add_command_line_enable SDL_IMAGE "Enable SDL_image support" auto
add_command_line_enable PYTHON "Enable Python 3 support for shaders" auto
add_command_line_enable SINC "Enable Blackman SINC resampler" no
add_command_line_enable SINC "Disable SINC resampler" yes