diff --git a/Makefile b/Makefile index 36a0b13996..ce08637c98 100644 --- a/Makefile +++ b/Makefile @@ -137,8 +137,9 @@ ifeq ($(SCALER_NO_SIMD), 1) DEFINES += -DSCALER_NO_SIMD endif -ifeq ($(SCALER_PERF), 1) - DEFINES += -DSCALER_PERF +ifeq ($(PERF_TEST), 1) + DEFINES += -DPERF_TEST + OBJ += benchmark.o endif ifeq ($(HAVE_SDL), 1) diff --git a/Makefile.win b/Makefile.win index b60275190b..19d8724300 100644 --- a/Makefile.win +++ b/Makefile.win @@ -68,6 +68,11 @@ ifeq ($(SCALER_NO_SIMD), 1) DEFINES += -DSCALER_NO_SIMD endif +ifeq ($(PERF_TEST), 1) + DEFINES += -DPERF_TEST + OBJ += benchmark.o +endif + ifeq ($(HAVE_SDL), 1) OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o else ifeq ($(HAVE_FFMPEG), 1) diff --git a/benchmark.c b/benchmark.c index 0c3c6f2681..dcf5618eb6 100644 --- a/benchmark.c +++ b/benchmark.c @@ -14,7 +14,7 @@ * If not, see . */ -#include +#include "benchmark.h" #if defined(__CELLOS_LV2__) || defined(GEKKO) #ifndef _PPU_INTRINSICS_H @@ -24,23 +24,31 @@ #include #endif -unsigned long long rarch_get_performance_counter(void) +rarch_perf_tick_t rarch_get_perf_counter(void) { - unsigned long long time = 0; + rarch_perf_tick_t time = 0; #ifdef _XBOX1 + #define rdtsc __asm __emit 0fh __asm __emit 031h LARGE_INTEGER time_tmp; rdtsc; __asm mov time_tmp.LowPart, eax; __asm mov time_tmp.HighPart, edx; time = time_tmp.QuadPart; -#elif defined(__i386__) || defined(__i486__) || defined(__x86_64__) - uint64_t lo, hi; - __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); - time = ((((uint64 t)hi) << 32) | ((uint64 t)lo) ); + +#elif defined(__GNUC__) + +#if defined(__i386__) || defined(__i486__) + asm volatile ("rdtsc" : "=A" (time)); +#elif defined(__x86_64__) + unsigned a, d; + asm volatile ("rdtsc" : "=a" (a), "=d" (d)); + time = (rarch_perf_tick_t)a | ((rarch_perf_tick_t)d << 32); +#endif + #elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360) time = __mftb(); #endif - (void)time; + return time; } diff --git a/benchmark.h b/benchmark.h index 7979966297..7164a52366 100644 --- a/benchmark.h +++ b/benchmark.h @@ -17,22 +17,44 @@ #ifndef _RARCH_BENCHMARK_H #define _RARCH_BENCHMARK_H -typedef struct performance_counter_t +#include "general.h" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +typedef unsigned long long rarch_perf_tick_t; + +typedef struct rarch_perf_counter { - unsigned long long start; - unsigned long long stop; -} performance_counter_t; + rarch_perf_tick_t start; + rarch_perf_tick_t total; + unsigned call_cnt; +} rarch_perf_counter_t; -unsigned long long rarch_get_performance_counter(void); +rarch_perf_tick_t rarch_get_perf_counter(void); -#define RARCH_PERFORMANCE_INIT(X) performance_counter_t (X) -#define RARCH_PERFORMANCE_START(X) ((X).start = rarch_get_performance_counter()) -#define RARCH_PERFORMANCE_STOP(X) ((X).stop = rarch_get_performance_counter() - (X).start) +#ifdef PERF_TEST + +#define RARCH_PERFORMANCE_INIT(X) static rarch_perf_counter_t X +#define RARCH_PERFORMANCE_START(X) ((X).start = rarch_get_perf_counter()) +#define RARCH_PERFORMANCE_STOP(X) do { (X).total += rarch_get_perf_counter() - (X).start; (X).call_cnt++; } while(0) #ifdef _WIN32 -#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("Time taken (%s): %I64u.\n", functionname, (X).stop) +#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("[PERF]: Avg (%s): %I64u ticks.\n", functionname, (X).total / (X).call_cnt) #else -#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("Time taken (%s): %llu.\n", functionname, (X).stop) +#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("[PERF]: Avg (%s): %llu ticks.\n", functionname, (X).total / (X).call_cnt) +#endif + +#else + +#define RARCH_PERFORMANCE_INIT(X) +#define RARCH_PERFORMANCE_START(X) +#define RARCH_PERFORMANCE_STOP(X) +#define RARCH_PERFORMANCE_LOG(functionname, X) + #endif #endif + diff --git a/gfx/scaler/scaler.c b/gfx/scaler/scaler.c index 52a93118ab..fe5a04e787 100644 --- a/gfx/scaler/scaler.c +++ b/gfx/scaler/scaler.c @@ -21,6 +21,7 @@ #include #include #include +#include "../../benchmark.h" #ifdef SCALER_PERF #include @@ -189,10 +190,8 @@ void scaler_ctx_gen_reset(struct scaler_ctx *ctx) void scaler_ctx_scale(struct scaler_ctx *ctx, void *output, const void *input) { -#ifdef SCALER_PERF - struct timespec start_tv, end_tv; - clock_gettime(CLOCK_MONOTONIC, &start_tv); -#endif + RARCH_PERFORMANCE_INIT(scaler_perf); + RARCH_PERFORMANCE_START(scaler_perf); if (ctx->unscaled) // Just perform straight pixel conversion. { @@ -262,10 +261,7 @@ void scaler_ctx_scale(struct scaler_ctx *ctx, ctx->scaler_vert(ctx, output, ctx->out_stride); } -#ifdef SCALER_PERF - clock_gettime(CLOCK_MONOTONIC, &end_tv); - ctx->elapsed_time_ms += (end_tv.tv_sec - start_tv.tv_sec) * 1000.0 + (end_tv.tv_nsec - start_tv.tv_nsec) / 1000000.0; - ctx->elapsed_frames++; -#endif + RARCH_PERFORMANCE_STOP(scaler_perf); + RARCH_PERFORMANCE_LOG("Scaler", scaler_perf); }