Update benchmarks for GCC + x86.

Compile with make PERF_TEST=1 to enable performance logging.
This commit is contained in:
Themaister 2012-10-01 23:43:16 +02:00
parent 8ffd954122
commit 1bac5421ff
5 changed files with 61 additions and 29 deletions

View File

@ -137,8 +137,9 @@ ifeq ($(SCALER_NO_SIMD), 1)
DEFINES += -DSCALER_NO_SIMD DEFINES += -DSCALER_NO_SIMD
endif endif
ifeq ($(SCALER_PERF), 1) ifeq ($(PERF_TEST), 1)
DEFINES += -DSCALER_PERF DEFINES += -DPERF_TEST
OBJ += benchmark.o
endif endif
ifeq ($(HAVE_SDL), 1) ifeq ($(HAVE_SDL), 1)

View File

@ -68,6 +68,11 @@ ifeq ($(SCALER_NO_SIMD), 1)
DEFINES += -DSCALER_NO_SIMD DEFINES += -DSCALER_NO_SIMD
endif endif
ifeq ($(PERF_TEST), 1)
DEFINES += -DPERF_TEST
OBJ += benchmark.o
endif
ifeq ($(HAVE_SDL), 1) ifeq ($(HAVE_SDL), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
else ifeq ($(HAVE_FFMPEG), 1) else ifeq ($(HAVE_FFMPEG), 1)

View File

@ -14,7 +14,7 @@
* If not, see <http://www.gnu.org/licenses/>. * If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <stdio.h> #include "benchmark.h"
#if defined(__CELLOS_LV2__) || defined(GEKKO) #if defined(__CELLOS_LV2__) || defined(GEKKO)
#ifndef _PPU_INTRINSICS_H #ifndef _PPU_INTRINSICS_H
@ -24,23 +24,31 @@
#include <PPCIntrinsics.h> #include <PPCIntrinsics.h>
#endif #endif
unsigned long long rarch_get_performance_counter(void) rarch_perf_tick_t rarch_get_perf_counter(void)
{ {
unsigned long long time = 0; rarch_perf_tick_t time = 0;
#ifdef _XBOX1 #ifdef _XBOX1
#define rdtsc __asm __emit 0fh __asm __emit 031h #define rdtsc __asm __emit 0fh __asm __emit 031h
LARGE_INTEGER time_tmp; LARGE_INTEGER time_tmp;
rdtsc; rdtsc;
__asm mov time_tmp.LowPart, eax; __asm mov time_tmp.LowPart, eax;
__asm mov time_tmp.HighPart, edx; __asm mov time_tmp.HighPart, edx;
time = time_tmp.QuadPart; time = time_tmp.QuadPart;
#elif defined(__i386__) || defined(__i486__) || defined(__x86_64__)
uint64_t lo, hi; #elif defined(__GNUC__)
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
time = ((((uint64 t)hi) << 32) | ((uint64 t)lo) ); #if defined(__i386__) || defined(__i486__)
asm volatile ("rdtsc" : "=A" (time));
#elif defined(__x86_64__)
unsigned a, d;
asm volatile ("rdtsc" : "=a" (a), "=d" (d));
time = (rarch_perf_tick_t)a | ((rarch_perf_tick_t)d << 32);
#endif
#elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360) #elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360)
time = __mftb(); time = __mftb();
#endif #endif
(void)time;
return time; return time;
} }

View File

@ -17,22 +17,44 @@
#ifndef _RARCH_BENCHMARK_H #ifndef _RARCH_BENCHMARK_H
#define _RARCH_BENCHMARK_H #define _RARCH_BENCHMARK_H
typedef struct performance_counter_t #include "general.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdint.h>
typedef unsigned long long rarch_perf_tick_t;
typedef struct rarch_perf_counter
{ {
unsigned long long start; rarch_perf_tick_t start;
unsigned long long stop; rarch_perf_tick_t total;
} performance_counter_t; unsigned call_cnt;
} rarch_perf_counter_t;
unsigned long long rarch_get_performance_counter(void); rarch_perf_tick_t rarch_get_perf_counter(void);
#define RARCH_PERFORMANCE_INIT(X) performance_counter_t (X) #ifdef PERF_TEST
#define RARCH_PERFORMANCE_START(X) ((X).start = rarch_get_performance_counter())
#define RARCH_PERFORMANCE_STOP(X) ((X).stop = rarch_get_performance_counter() - (X).start) #define RARCH_PERFORMANCE_INIT(X) static rarch_perf_counter_t X
#define RARCH_PERFORMANCE_START(X) ((X).start = rarch_get_perf_counter())
#define RARCH_PERFORMANCE_STOP(X) do { (X).total += rarch_get_perf_counter() - (X).start; (X).call_cnt++; } while(0)
#ifdef _WIN32 #ifdef _WIN32
#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("Time taken (%s): %I64u.\n", functionname, (X).stop) #define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("[PERF]: Avg (%s): %I64u ticks.\n", functionname, (X).total / (X).call_cnt)
#else #else
#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("Time taken (%s): %llu.\n", functionname, (X).stop) #define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("[PERF]: Avg (%s): %llu ticks.\n", functionname, (X).total / (X).call_cnt)
#endif
#else
#define RARCH_PERFORMANCE_INIT(X)
#define RARCH_PERFORMANCE_START(X)
#define RARCH_PERFORMANCE_STOP(X)
#define RARCH_PERFORMANCE_LOG(functionname, X)
#endif #endif
#endif #endif

View File

@ -21,6 +21,7 @@
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include "../../benchmark.h"
#ifdef SCALER_PERF #ifdef SCALER_PERF
#include <time.h> #include <time.h>
@ -189,10 +190,8 @@ void scaler_ctx_gen_reset(struct scaler_ctx *ctx)
void scaler_ctx_scale(struct scaler_ctx *ctx, void scaler_ctx_scale(struct scaler_ctx *ctx,
void *output, const void *input) void *output, const void *input)
{ {
#ifdef SCALER_PERF RARCH_PERFORMANCE_INIT(scaler_perf);
struct timespec start_tv, end_tv; RARCH_PERFORMANCE_START(scaler_perf);
clock_gettime(CLOCK_MONOTONIC, &start_tv);
#endif
if (ctx->unscaled) // Just perform straight pixel conversion. if (ctx->unscaled) // Just perform straight pixel conversion.
{ {
@ -262,10 +261,7 @@ void scaler_ctx_scale(struct scaler_ctx *ctx,
ctx->scaler_vert(ctx, output, ctx->out_stride); ctx->scaler_vert(ctx, output, ctx->out_stride);
} }
#ifdef SCALER_PERF RARCH_PERFORMANCE_STOP(scaler_perf);
clock_gettime(CLOCK_MONOTONIC, &end_tv); RARCH_PERFORMANCE_LOG("Scaler", scaler_perf);
ctx->elapsed_time_ms += (end_tv.tv_sec - start_tv.tv_sec) * 1000.0 + (end_tv.tv_nsec - start_tv.tv_nsec) / 1000000.0;
ctx->elapsed_frames++;
#endif
} }