diff --git a/Makefile b/Makefile
index 36a0b13996..ce08637c98 100644
--- a/Makefile
+++ b/Makefile
@@ -137,8 +137,9 @@ ifeq ($(SCALER_NO_SIMD), 1)
DEFINES += -DSCALER_NO_SIMD
endif
-ifeq ($(SCALER_PERF), 1)
- DEFINES += -DSCALER_PERF
+ifeq ($(PERF_TEST), 1)
+ DEFINES += -DPERF_TEST
+ OBJ += benchmark.o
endif
ifeq ($(HAVE_SDL), 1)
diff --git a/Makefile.win b/Makefile.win
index b60275190b..19d8724300 100644
--- a/Makefile.win
+++ b/Makefile.win
@@ -68,6 +68,11 @@ ifeq ($(SCALER_NO_SIMD), 1)
DEFINES += -DSCALER_NO_SIMD
endif
+ifeq ($(PERF_TEST), 1)
+ DEFINES += -DPERF_TEST
+ OBJ += benchmark.o
+endif
+
ifeq ($(HAVE_SDL), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
else ifeq ($(HAVE_FFMPEG), 1)
diff --git a/benchmark.c b/benchmark.c
index 0c3c6f2681..dcf5618eb6 100644
--- a/benchmark.c
+++ b/benchmark.c
@@ -14,7 +14,7 @@
* If not, see .
*/
-#include
+#include "benchmark.h"
#if defined(__CELLOS_LV2__) || defined(GEKKO)
#ifndef _PPU_INTRINSICS_H
@@ -24,23 +24,31 @@
#include
#endif
-unsigned long long rarch_get_performance_counter(void)
+rarch_perf_tick_t rarch_get_perf_counter(void)
{
- unsigned long long time = 0;
+ rarch_perf_tick_t time = 0;
#ifdef _XBOX1
+
#define rdtsc __asm __emit 0fh __asm __emit 031h
LARGE_INTEGER time_tmp;
rdtsc;
__asm mov time_tmp.LowPart, eax;
__asm mov time_tmp.HighPart, edx;
time = time_tmp.QuadPart;
-#elif defined(__i386__) || defined(__i486__) || defined(__x86_64__)
- uint64_t lo, hi;
- __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
- time = ((((uint64 t)hi) << 32) | ((uint64 t)lo) );
+
+#elif defined(__GNUC__)
+
+#if defined(__i386__) || defined(__i486__)
+ asm volatile ("rdtsc" : "=A" (time));
+#elif defined(__x86_64__)
+ unsigned a, d;
+ asm volatile ("rdtsc" : "=a" (a), "=d" (d));
+ time = (rarch_perf_tick_t)a | ((rarch_perf_tick_t)d << 32);
+#endif
+
#elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360)
time = __mftb();
#endif
- (void)time;
+
return time;
}
diff --git a/benchmark.h b/benchmark.h
index 7979966297..7164a52366 100644
--- a/benchmark.h
+++ b/benchmark.h
@@ -17,22 +17,44 @@
#ifndef _RARCH_BENCHMARK_H
#define _RARCH_BENCHMARK_H
-typedef struct performance_counter_t
+#include "general.h"
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include
+typedef unsigned long long rarch_perf_tick_t;
+
+typedef struct rarch_perf_counter
{
- unsigned long long start;
- unsigned long long stop;
-} performance_counter_t;
+ rarch_perf_tick_t start;
+ rarch_perf_tick_t total;
+ unsigned call_cnt;
+} rarch_perf_counter_t;
-unsigned long long rarch_get_performance_counter(void);
+rarch_perf_tick_t rarch_get_perf_counter(void);
-#define RARCH_PERFORMANCE_INIT(X) performance_counter_t (X)
-#define RARCH_PERFORMANCE_START(X) ((X).start = rarch_get_performance_counter())
-#define RARCH_PERFORMANCE_STOP(X) ((X).stop = rarch_get_performance_counter() - (X).start)
+#ifdef PERF_TEST
+
+#define RARCH_PERFORMANCE_INIT(X) static rarch_perf_counter_t X
+#define RARCH_PERFORMANCE_START(X) ((X).start = rarch_get_perf_counter())
+#define RARCH_PERFORMANCE_STOP(X) do { (X).total += rarch_get_perf_counter() - (X).start; (X).call_cnt++; } while(0)
#ifdef _WIN32
-#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("Time taken (%s): %I64u.\n", functionname, (X).stop)
+#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("[PERF]: Avg (%s): %I64u ticks.\n", functionname, (X).total / (X).call_cnt)
#else
-#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("Time taken (%s): %llu.\n", functionname, (X).stop)
+#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("[PERF]: Avg (%s): %llu ticks.\n", functionname, (X).total / (X).call_cnt)
+#endif
+
+#else
+
+#define RARCH_PERFORMANCE_INIT(X)
+#define RARCH_PERFORMANCE_START(X)
+#define RARCH_PERFORMANCE_STOP(X)
+#define RARCH_PERFORMANCE_LOG(functionname, X)
+
#endif
#endif
+
diff --git a/gfx/scaler/scaler.c b/gfx/scaler/scaler.c
index 52a93118ab..fe5a04e787 100644
--- a/gfx/scaler/scaler.c
+++ b/gfx/scaler/scaler.c
@@ -21,6 +21,7 @@
#include
#include
#include
+#include "../../benchmark.h"
#ifdef SCALER_PERF
#include
@@ -189,10 +190,8 @@ void scaler_ctx_gen_reset(struct scaler_ctx *ctx)
void scaler_ctx_scale(struct scaler_ctx *ctx,
void *output, const void *input)
{
-#ifdef SCALER_PERF
- struct timespec start_tv, end_tv;
- clock_gettime(CLOCK_MONOTONIC, &start_tv);
-#endif
+ RARCH_PERFORMANCE_INIT(scaler_perf);
+ RARCH_PERFORMANCE_START(scaler_perf);
if (ctx->unscaled) // Just perform straight pixel conversion.
{
@@ -262,10 +261,7 @@ void scaler_ctx_scale(struct scaler_ctx *ctx,
ctx->scaler_vert(ctx, output, ctx->out_stride);
}
-#ifdef SCALER_PERF
- clock_gettime(CLOCK_MONOTONIC, &end_tv);
- ctx->elapsed_time_ms += (end_tv.tv_sec - start_tv.tv_sec) * 1000.0 + (end_tv.tv_nsec - start_tv.tv_nsec) / 1000000.0;
- ctx->elapsed_frames++;
-#endif
+ RARCH_PERFORMANCE_STOP(scaler_perf);
+ RARCH_PERFORMANCE_LOG("Scaler", scaler_perf);
}