From 1c9895611b4f091331979fd807dcc63c26070a9b Mon Sep 17 00:00:00 2001
From: Themaister <maister@archlinux.us>
Date: Wed, 18 Dec 2013 19:10:57 +0100
Subject: [PATCH 1/2] Rework performance interface.

Avoids super-ugly macros and retroarch-ifdefs.
---
 camera/video4linux2.c |   7 +--
 dynamic.c             |  20 +++---
 general.h             |   1 -
 gfx/gl.c              |  35 +++++------
 gfx/sdl_gfx.c         |   7 +--
 gfx/thread_wrapper.c  |   7 +--
 gfx/vg.c              |  14 ++---
 libretro.h            |  71 ++++++++++++++++-----
 performance.c         | 143 +++++++++++++++++++++---------------------
 performance.h         |  38 +++++++++--
 retroarch.c           |  33 ++++------
 settings.c            |   4 --
 12 files changed, 210 insertions(+), 170 deletions(-)

diff --git a/camera/video4linux2.c b/camera/video4linux2.c
index a3bd6c8440..e2c104ab52 100644
--- a/camera/video4linux2.c
+++ b/camera/video4linux2.c
@@ -63,11 +63,10 @@ typedef struct video4linux
 
 static void process_image(video4linux_t *v4l, const uint8_t *buffer_yuv)
 {
-   static retro_perf_counter_t yuv_convert_direct = { "yuv_convert_direct", 0, 0, 0, false };
-   rarch_perf_init(&yuv_convert_direct, g_settings.perfcounter_enable);
-   rarch_perf_start(&yuv_convert_direct, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(yuv_convert_direct);
+   RARCH_PERFORMANCE_START(yuv_convert_direct);
    scaler_ctx_scale(&v4l->scaler, v4l->buffer_output, buffer_yuv);
-   rarch_perf_stop(&yuv_convert_direct, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(yuv_convert_direct);
 }
 
 static int xioctl(int fd, int request, void *args)
diff --git a/dynamic.c b/dynamic.c
index a70c721c9c..cffeaf44df 100644
--- a/dynamic.c
+++ b/dynamic.c
@@ -18,6 +18,7 @@
 #include "compat/strl.h"
 #include "compat/posix_string.h"
 #include "retroarch_logger.h"
+#include "performance.h"
 #include "file.h"
 #include <string.h>
 #include <ctype.h>
@@ -417,6 +418,9 @@ void uninit_libretro_sym(void)
 
    // No longer valid.
    memset(&g_extern.system, 0, sizeof(g_extern.system));
+
+   // Performance counters no longer valid.
+   retro_perf_clear();
 }
 
 #ifdef NEED_DYNAMIC
@@ -844,15 +848,13 @@ bool rarch_environment_cb(unsigned cmd, void *data)
       {
          RARCH_LOG("Environ GET_PERF_INTERFACE.\n");
          struct retro_perf_callback *cb = (struct retro_perf_callback*)data;
-         cb->get_perf_counter  = rarch_get_perf_counter;
-         cb->get_time_usec     = rarch_get_time_usec;
-         cb->get_cpu_features  = rarch_get_cpu_features;
-         cb->perf_init         = rarch_perf_init;
-         cb->perf_start        = rarch_perf_start;
-         cb->perf_stop         = rarch_perf_stop;
-         cb->perf_log          = rarch_perf_log;
-         cb->perf_logs         = rarch_perf_logs;
-         cb->perf_register     = rarch_perf_register;
+         cb->get_time_usec    = rarch_get_time_usec;
+         cb->get_cpu_features = rarch_get_cpu_features;
+         cb->get_perf_counter = rarch_get_perf_counter;
+         cb->perf_register    = retro_perf_register; // libretro specific path.
+         cb->perf_start       = rarch_perf_start;
+         cb->perf_stop        = rarch_perf_stop;
+         cb->perf_log         = retro_perf_log; // libretro specific path.
          break;
       }
 
diff --git a/general.h b/general.h
index 572cca6726..9e9bcde0fe 100644
--- a/general.h
+++ b/general.h
@@ -292,7 +292,6 @@ struct settings
    bool rgui_show_start_screen;
 #endif
    bool fps_show;
-   bool perfcounter_enable;
 };
 
 enum rarch_game_type
diff --git a/gfx/gl.c b/gfx/gl.c
index 1bb273071d..91dec027b3 100644
--- a/gfx/gl.c
+++ b/gfx/gl.c
@@ -1315,14 +1315,13 @@ static void gl_pbo_async_readback(void *data)
    glPixelStorei(GL_PACK_ALIGNMENT, get_alignment(gl->vp.width * sizeof(uint32_t)));
 
    // Read asynchronously into PBO buffer.
-   static retro_perf_counter_t async_readback = { "async_readback", 0, 0, 0, false };
-   rarch_perf_init(&async_readback, g_settings.perfcounter_enable);
-   rarch_perf_start(&async_readback, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(async_readback);
+   RARCH_PERFORMANCE_START(async_readback);
    glReadBuffer(GL_BACK);
    glReadPixels(gl->vp.x, gl->vp.y,
          gl->vp.width, gl->vp.height,
          GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
-   rarch_perf_stop(&async_readback, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(async_readback);
 
    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
 }
@@ -1373,9 +1372,8 @@ static inline void gl_draw_texture(void *data)
 
 static bool gl_frame(void *data, const void *frame, unsigned width, unsigned height, unsigned pitch, const char *msg)
 {
-   static retro_perf_counter_t frame_run = { "frame_run", 0, 0, 0, false };
-   rarch_perf_init(&frame_run, g_settings.perfcounter_enable);
-   rarch_perf_start(&frame_run, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(frame_run);
+   RARCH_PERFORMANCE_START(frame_run);
 
    gl_t *gl = (gl_t*)data;
 
@@ -1420,12 +1418,11 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei
       if (!gl->hw_render_fbo_init)
 #endif
       {
-         static retro_perf_counter_t copy_frame = { "copy_frame", 0, 0, 0, false };
          gl_update_input_size(gl, width, height, pitch, true);
-         rarch_perf_init(&copy_frame, g_settings.perfcounter_enable);
-         rarch_perf_start(&copy_frame, g_settings.perfcounter_enable);
+         RARCH_PERFORMANCE_INIT(copy_frame);
+         RARCH_PERFORMANCE_START(copy_frame);
          gl_copy_frame(gl, frame, width, height, pitch);
-         rarch_perf_stop(&copy_frame, g_settings.perfcounter_enable);
+         RARCH_PERFORMANCE_STOP(copy_frame);
       }
    }
    else
@@ -1504,7 +1501,7 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei
 
    context_update_window_title_func();
 
-   rarch_perf_stop(&frame_run, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(frame_run);
 
 #ifdef HAVE_FBO
    // Reset state which could easily mess up libretro core.
@@ -1530,9 +1527,8 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei
 #ifdef HAVE_GL_SYNC
    if (g_settings.video.hard_sync && gl->have_sync)
    {
-      static retro_perf_counter_t gl_fence = {"gl_fence", 0, 0, 0, false};
-      rarch_perf_init(&gl_fence, g_settings.perfcounter_enable);
-      rarch_perf_start(&gl_fence, g_settings.perfcounter_enable);
+      RARCH_PERFORMANCE_INIT(gl_fence);
+      RARCH_PERFORMANCE_START(gl_fence);
       glClear(GL_COLOR_BUFFER_BIT);
       gl->fences[gl->fence_count++] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 
@@ -1545,7 +1541,7 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei
          memmove(gl->fences, gl->fences + 1, gl->fence_count * sizeof(GLsync));
       }
 
-      rarch_perf_stop(&gl_fence, g_settings.perfcounter_enable);
+      RARCH_PERFORMANCE_STOP(gl_fence);
    }
 #endif
 
@@ -2341,13 +2337,12 @@ static void gl_viewport_info(void *data, struct rarch_viewport *vp)
 
 static bool gl_read_viewport(void *data, uint8_t *buffer)
 {
-   static retro_perf_counter_t read_viewport = { "read_viewport", 0, 0, 0, false };
    unsigned i;
    gl_t *gl = (gl_t*)data;
    (void)i;
 
-   rarch_perf_init(&read_viewport, g_settings.perfcounter_enable);
-   rarch_perf_start(&read_viewport, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(read_viewport);
+   RARCH_PERFORMANCE_START(read_viewport);
 
 #ifdef HAVE_FBO
    // Make sure we're reading from backbuffer incase some state has been overridden.
@@ -2403,7 +2398,7 @@ static bool gl_read_viewport(void *data, uint8_t *buffer)
    }
 #endif
 
-   rarch_perf_stop(&read_viewport, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(read_viewport);
    return true;
 }
 #endif
diff --git a/gfx/sdl_gfx.c b/gfx/sdl_gfx.c
index 76354c87eb..b4522b88fc 100644
--- a/gfx/sdl_gfx.c
+++ b/gfx/sdl_gfx.c
@@ -305,11 +305,10 @@ static bool sdl_gfx_frame(void *data, const void *frame, unsigned width, unsigne
    if (SDL_MUSTLOCK(vid->screen))
       SDL_LockSurface(vid->screen);
 
-   static retro_perf_counter_t sdl_scale = { "sdl_scale", 0, 0, 0, false };
-   rarch_perf_init(&sdl_scale, g_settings.perfcounter_enable);
-   rarch_perf_start(&sdl_scale,  g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(sdl_scale);
+   RARCH_PERFORMANCE_START(sdl_scale);
    scaler_ctx_scale(&vid->scaler, vid->screen->pixels, frame);
-   rarch_perf_stop(&sdl_scale,  g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(sdl_scale);
 
    if (msg)
       sdl_render_msg(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format);
diff --git a/gfx/thread_wrapper.c b/gfx/thread_wrapper.c
index 2a1248ef0a..2de5a0c830 100644
--- a/gfx/thread_wrapper.c
+++ b/gfx/thread_wrapper.c
@@ -394,9 +394,8 @@ static bool thread_focus(void *data)
 static bool thread_frame(void *data, const void *frame_,
       unsigned width, unsigned height, unsigned pitch, const char *msg)
 {
-   static retro_perf_counter_t thread_frame = { "thread_frame", 0, 0, 0, false};
-   rarch_perf_init(&thread_frame, g_settings.perfcounter_enable);
-   rarch_perf_start(&thread_frame, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(thread_frame);
+   RARCH_PERFORMANCE_START(thread_frame);
 
    thread_video_t *thr = (thread_video_t*)data;
    unsigned copy_stride = width * (thr->info.rgb32 ? sizeof(uint32_t) : sizeof(uint16_t));
@@ -462,7 +461,7 @@ static bool thread_frame(void *data, const void *frame_,
 
    slock_unlock(thr->lock);
 
-   rarch_perf_stop(&thread_frame, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(thread_frame);
 
    thr->last_time = rarch_get_time_usec();
    return true;
diff --git a/gfx/vg.c b/gfx/vg.c
index fc0b5a5b68..f6dbaa3697 100644
--- a/gfx/vg.c
+++ b/gfx/vg.c
@@ -353,9 +353,8 @@ static void vg_copy_frame(void *data, const void *frame, unsigned width, unsigne
 
 static bool vg_frame(void *data, const void *frame, unsigned width, unsigned height, unsigned pitch, const char *msg)
 {
-   static retro_perf_counter_t vg_fr = { "vg_fr", 0, 0, 0, false };
-   rarch_perf_init(&vg_fr, g_settings.perfcounter_enable);
-   rarch_perf_start(&vg_fr, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(vg_fr);
+   RARCH_PERFORMANCE_START(vg_fr);
    vg_t *vg = (vg_t*)data;
 
    if (width != vg->mRenderWidth || height != vg->mRenderHeight || vg->should_resize)
@@ -377,11 +376,10 @@ static bool vg_frame(void *data, const void *frame, unsigned width, unsigned hei
    vgClear(0, 0, vg->mScreenWidth, vg->mScreenHeight);
    vgSeti(VG_SCISSORING, VG_TRUE);
 
-   static retro_perf_counter_t vg_image = { "vg_image", 0, 0, 0, false };
-   rarch_perf_init(&vg_image, g_settings.perfcounter_enable);
-   rarch_perf_start(&vg_image, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(vg_image);
+   RARCH_PERFORMANCE_START(vg_image);
    vg_copy_frame(vg, frame, width, height, pitch);
-   rarch_perf_stop(&vg_image, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(vg_image);
 
    vgDrawImage(vg->mImage);
 
@@ -390,7 +388,7 @@ static bool vg_frame(void *data, const void *frame, unsigned width, unsigned hei
 
    vg->driver->update_window_title();
 
-   rarch_perf_stop(&vg_fr, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(vg_fr);
    vg->driver->swap_buffers();
 
    return true;
diff --git a/libretro.h b/libretro.h
index abf1750c16..c1c1c6cf17 100755
--- a/libretro.h
+++ b/libretro.h
@@ -587,9 +587,9 @@ struct retro_log_callback
    retro_log_printf_t log;
 };
 
-// Performance functions
+// Performance related functions
 //
-// Id values for SIMD CPU features
+// ID values for SIMD CPU features
 #define RETRO_SIMD_SSE      (1 << 0)
 #define RETRO_SIMD_SSE2     (1 << 1)
 #define RETRO_SIMD_VMX      (1 << 2)
@@ -599,10 +599,10 @@ struct retro_log_callback
 #define RETRO_SIMD_SSE3     (1 << 6)
 #define RETRO_SIMD_SSSE3    (1 << 7)
 
-typedef unsigned long long retro_perf_tick_t;
+typedef uint64_t retro_perf_tick_t;
 typedef int64_t retro_time_t;
 
-typedef struct retro_perf_counter
+struct retro_perf_counter
 {
    const char *ident;
    retro_perf_tick_t start;
@@ -610,30 +610,67 @@ typedef struct retro_perf_counter
    retro_perf_tick_t call_cnt;
 
    bool registered;
-} retro_perf_counter_t;
-
+};
 
+// Returns current time in microsec sec. Tries to use the most accurate timer available.
 typedef retro_time_t (*retro_perf_get_time_usec_t)(void);
+// A simple counter. Usually nanoseconds, but can also be CPU cycles.
+// Can be used directly if desired (when creating a more sophisticated performance counter system).
 typedef retro_perf_tick_t (*retro_perf_get_counter_t)(void);
-typedef void (*retro_get_cpu_features_t)(unsigned*);
-typedef void (*retro_perf_init_t)(void*, bool);
-typedef void (*retro_perf_start_t)(void*, bool);
-typedef void (*retro_perf_stop_t)(void*, bool);
-typedef void (*retro_perf_log_t)(void*, const char*, bool);
-typedef void (*retro_perf_logs_t)(void);
-typedef void (*retro_perf_register_t)(retro_perf_counter_t*);
+// Returns a bit-mask of detected CPU features (RETRO_SIMD_*).
+typedef uint64_t (*retro_get_cpu_features_t)(void);
+// Asks frontend to log and/or display the state of performance counters.
+// Performance counters can always be poked into manually as well.
+typedef void (*retro_perf_log_t)(void);
+// Register a performance counter.
+// ident field must be set with a discrete value and other values in retro_perf_counter must be 0.
+// Registering can be called multiple times. To avoid calling to frontend redundantly, you can check registered field first.
+typedef void (*retro_perf_register_t)(struct retro_perf_counter *counter);
+// Starts and stops a registered counter.
+typedef void (*retro_perf_start_t)(struct retro_perf_counter *counter);
+typedef void (*retro_perf_stop_t)(struct retro_perf_counter *counter);
+
+// For convenience it can be useful to wrap register, start and stop in macros.
+// E.g.:
+// #ifdef LOG_PERFORMANCE
+// #define RETRO_PERFORMANCE_INIT(perf_cb, name) static struct retro_perf_counter name = {#name}; if (!perf_cb.registered) perf_cb.perf_register(&(name))
+// #define RETRO_PERFORMANCE_START(perf_cb, name) perf_cb.start(&(name))
+// #define RETRO_PERFORMANCE_STOP(perf_cb, name) perf_cb.stop(&(name))
+// #else
+// ... Blank macros ...
+// #endif
+// These can then be used mid-functions around code snippets.
+//
+// extern struct retro_perf_callback perf_cb; // Somewhere in the core.
+//
+// void do_some_heavy_work(void)
+// {
+//    RETRO_PERFORMANCE_INIT(cb, work_1);
+//    RETRO_PERFORMANCE_START(cb, work_1);
+//    heavy_work_1();
+//    RETRO_PERFORMANCE_STOP(cb, work_1);
+//
+//    RETRO_PERFORMANCE_INIT(cb, work_2);
+//    RETRO_PERFORMANCE_START(cb, work_2);
+//    heavy_work_2();
+//    RETRO_PERFORMANCE_STOP(cb, work_2);
+// }
+//
+// void retro_deinit(void)
+// {
+//    perf_cb.perf_log(); // Log all perf counters here for example.
+// }
 
 struct retro_perf_callback
 {
    retro_perf_get_time_usec_t    get_time_usec;
-   retro_perf_get_counter_t      get_perf_counter; 
    retro_get_cpu_features_t      get_cpu_features;
-   retro_perf_init_t             perf_init;
+
+   retro_perf_get_counter_t      get_perf_counter;
+   retro_perf_register_t         perf_register;
    retro_perf_start_t            perf_start;
    retro_perf_stop_t             perf_stop;
    retro_perf_log_t              perf_log;
-   retro_perf_logs_t             perf_logs;
-   retro_perf_register_t         perf_register;
 };
 
 // FIXME: Document the sensor API and work out behavior.
diff --git a/performance.c b/performance.c
index 769bdd1f45..9ce7788256 100644
--- a/performance.c
+++ b/performance.c
@@ -42,11 +42,9 @@
 #include <time.h>
 #endif
 
-#ifdef __QNX__
-#ifndef CLOCK_MONOTONIC
+#if defined(__QNX__) && !defined(CLOCK_MONOTONIC)
 #define CLOCK_MONOTONIC 2
 #endif
-#endif
 
 #if defined(__PSL1GHT__)
 #include <sys/time.h>
@@ -71,25 +69,65 @@
 #include <string.h>
 
 #define MAX_COUNTERS 64
-static struct retro_perf_counter *perf_counters[MAX_COUNTERS];
-static unsigned perf_ptr;
+static const struct retro_perf_counter *perf_counters_rarch[MAX_COUNTERS];
+static const struct retro_perf_counter *perf_counters_libretro[MAX_COUNTERS];
+static unsigned perf_ptr_rarch;
+static unsigned perf_ptr_libretro;
 
 void rarch_perf_register(struct retro_perf_counter *perf)
 {
-   if (!perf && perf_ptr >= MAX_COUNTERS)
+   if (perf->registered || perf_ptr_rarch >= MAX_COUNTERS)
       return;
 
-   perf_counters[perf_ptr++] = perf;
+   perf_counters_rarch[perf_ptr_rarch++] = perf;
    perf->registered = true;
 }
 
+void retro_perf_register(struct retro_perf_counter *perf)
+{
+   if (perf->registered || perf_ptr_libretro >= MAX_COUNTERS)
+      return;
 
-void rarch_perf_logs(void)
+   perf_counters_libretro[perf_ptr_libretro++] = perf;
+   perf->registered = true;
+}
+
+void retro_perf_clear(void)
+{
+   perf_ptr_libretro = 0;
+   memset(perf_counters_libretro, 0, sizeof(perf_counters_libretro));
+}
+
+#ifdef _WIN32
+#define PERF_LOG_FMT "[PERF]: Avg (%s): %I64u ticks, %I64u runs.\n"
+#else
+#define PERF_LOG_FMT "[PERF]: Avg (%s): %llu ticks, %llu runs.\n"
+#endif
+
+static void log_counters(const struct retro_perf_counter **counters, unsigned num)
 {
    unsigned i;
-   RARCH_LOG("[PERF]: Performance counters:\n");
-   for (i = 0; i < perf_ptr; i++)
-      rarch_perf_log(perf_counters[i], perf_counters[i]->ident, true);
+   for (i = 0; i < num; i++)
+   {
+      RARCH_LOG(PERF_LOG_FMT,
+            counters[i]->ident,
+            (unsigned long long)counters[i]->total / (unsigned long long)counters[i]->call_cnt,
+            (unsigned long long)counters[i]->call_cnt);
+   }
+}
+
+void rarch_perf_log(void)
+{
+#if defined(PERF_TEST) || !defined(RARCH_INTERNAL)
+   RARCH_LOG("[PERF]: Performance counters (RetroArch):\n");
+   log_counters(perf_counters_rarch, perf_ptr_rarch);
+#endif
+}
+
+void retro_perf_log(void)
+{
+   RARCH_LOG("[PERF]: Performance counters (libretro):\n");
+   log_counters(perf_counters_libretro, perf_ptr_libretro);
 }
 
 retro_perf_tick_t rarch_get_perf_counter(void)
@@ -122,7 +160,7 @@ retro_perf_tick_t rarch_get_perf_counter(void)
 #endif
 
 #elif defined(__ARM_ARCH_6__)
-    asm volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(time) );
+   asm volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(time) );
 #elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360)
    time = __mftb();
 #endif
@@ -130,49 +168,6 @@ retro_perf_tick_t rarch_get_perf_counter(void)
    return time;
 }
 
-void rarch_perf_init(void *data, bool enable)
-{
-   struct retro_perf_counter *perf = (struct retro_perf_counter*)data;
-   if (!enable || !perf)
-      return;
-
-   if (!perf->registered)
-      rarch_perf_register(perf);
-}
-
-void rarch_perf_start(void *data, bool enable)
-{
-   struct retro_perf_counter *perf = (struct retro_perf_counter*)data;
-   if (!enable || !perf)
-      return;
-
-   perf->call_cnt++;
-   perf->start = rarch_get_perf_counter();
-}
-
-void rarch_perf_stop(void *data, bool enable)
-{
-   struct retro_perf_counter *perf = (struct retro_perf_counter*)data;
-   if (!enable || !perf)
-      return;
-
-   perf->total += rarch_get_perf_counter() - perf->start;
-}
-
-void rarch_perf_log(void *data, const char *funcname, bool enable)
-{
-   struct retro_perf_counter *perf = (struct retro_perf_counter*)data;
-   if (!enable || !perf)
-      return;
-#ifdef _WIN32
-   RARCH_LOG("[PERF]: Avg (%s): %I64u ticks, %I64u runs.\n",
-         funcname, perf->total / perf->call_cnt, perf->call_cnt);
-#else
-   RARCH_LOG("[PERF]: Avg (%s): %llu ticks, %llu runs.\n",
-         funcname, perf->total / perf->call_cnt, perf->call_cnt);
-#endif
-}
-
 retro_time_t rarch_get_time_usec(void)
 {
 #if defined(_WIN32)
@@ -244,9 +239,9 @@ static void x86_cpuid(int func, int flags[4])
 }
 #endif
 
-void rarch_get_cpu_features(unsigned *cpu)
+uint64_t rarch_get_cpu_features(void)
 {
-   *cpu = 0;
+   uint64_t cpu = 0;
 
 #if defined(CPU_X86)
    int flags[4];
@@ -258,46 +253,48 @@ void rarch_get_cpu_features(unsigned *cpu)
    RARCH_LOG("[CPUID]: Vendor: %s\n", vendor);
 
    if (flags[0] < 1) // Does CPUID not support func = 1? (unlikely ...)
-      return;
+      return 0;
 
    x86_cpuid(1, flags);
 
    if (flags[3] & (1 << 25))
-      *cpu |= RETRO_SIMD_SSE;
+      cpu |= RETRO_SIMD_SSE;
 
    if (flags[3] & (1 << 26))
-      *cpu |= RETRO_SIMD_SSE2;
+      cpu |= RETRO_SIMD_SSE2;
 
    if (flags[2] & (1 << 0))
-      *cpu |= RETRO_SIMD_SSE3;
+      cpu |= RETRO_SIMD_SSE3;
 
    if (flags[2] & (1 << 9))
-      *cpu |= RETRO_SIMD_SSSE3;
+      cpu |= RETRO_SIMD_SSSE3;
 
    const int avx_flags = (1 << 27) | (1 << 28);
    if ((flags[2] & avx_flags) == avx_flags)
-      *cpu |= RETRO_SIMD_AVX;
+      cpu |= RETRO_SIMD_AVX;
 
-   RARCH_LOG("[CPUID]: SSE:   %u\n", !!(*cpu & RETRO_SIMD_SSE));
-   RARCH_LOG("[CPUID]: SSE2:  %u\n", !!(*cpu & RETRO_SIMD_SSE2));
-   RARCH_LOG("[CPUID]: SSE3:  %u\n", !!(*cpu & RETRO_SIMD_SSE3));
-   RARCH_LOG("[CPUID]: SSSE3: %u\n", !!(*cpu & RETRO_SIMD_SSSE3));
-   RARCH_LOG("[CPUID]: AVX:   %u\n", !!(*cpu & RETRO_SIMD_AVX));
+   RARCH_LOG("[CPUID]: SSE:   %u\n", !!(cpu & RETRO_SIMD_SSE));
+   RARCH_LOG("[CPUID]: SSE2:  %u\n", !!(cpu & RETRO_SIMD_SSE2));
+   RARCH_LOG("[CPUID]: SSE3:  %u\n", !!(cpu & RETRO_SIMD_SSE3));
+   RARCH_LOG("[CPUID]: SSSE3: %u\n", !!(cpu & RETRO_SIMD_SSSE3));
+   RARCH_LOG("[CPUID]: AVX:   %u\n", !!(cpu & RETRO_SIMD_AVX));
 #elif defined(ANDROID) && defined(ANDROID_ARM)
    uint64_t cpu_flags = android_getCpuFeatures();
 
    if (cpu_flags & ANDROID_CPU_ARM_FEATURE_NEON)
-      *cpu |= RETRO_SIMD_NEON;
+      cpu |= RETRO_SIMD_NEON;
 
-   RARCH_LOG("[CPUID]: NEON: %u\n", !!(*cpu & RETRO_SIMD_NEON));
+   RARCH_LOG("[CPUID]: NEON: %u\n", !!(cpu & RETRO_SIMD_NEON));
 #elif defined(HAVE_NEON)
    *cpu |= RETRO_SIMD_NEON;
-   RARCH_LOG("[CPUID]: NEON: %u\n", !!(*cpu & RETRO_SIMD_NEON));
+   RARCH_LOG("[CPUID]: NEON: %u\n", !!(cpu & RETRO_SIMD_NEON));
 #elif defined(__CELLOS_LV2__)
    *cpu |= RETRO_SIMD_VMX;
-   RARCH_LOG("[CPUID]: VMX: %u\n", !!(*cpu & RETRO_SIMD_VMX));
+   RARCH_LOG("[CPUID]: VMX: %u\n", !!(cpu & RETRO_SIMD_VMX));
 #elif defined(XBOX360)
    *cpu |= RETRO_SIMD_VMX128;
-   RARCH_LOG("[CPUID]: VMX128: %u\n", !!(*cpu & RETRO_SIMD_VMX128));
+   RARCH_LOG("[CPUID]: VMX128: %u\n", !!(cpu & RETRO_SIMD_VMX128));
 #endif
+
+   return cpu;
 }
diff --git a/performance.h b/performance.h
index 24967c1438..624b6b4b38 100644
--- a/performance.h
+++ b/performance.h
@@ -26,19 +26,45 @@ extern "C" {
 #endif
 
 #include "boolean.h"
+#include "libretro.h"
 #include <stdint.h>
 
 retro_perf_tick_t rarch_get_perf_counter(void);
 retro_time_t rarch_get_time_usec(void);
 void rarch_perf_register(struct retro_perf_counter *perf);
-void rarch_perf_logs(void);
+void retro_perf_register(struct retro_perf_counter *perf); // Same as rarch_perf_register, just for libretro cores.
+void retro_perf_clear(void);
+void rarch_perf_log(void);
+void retro_perf_log(void);
 
-void rarch_get_cpu_features(unsigned *cpu);
+static inline void rarch_perf_start(struct retro_perf_counter *perf)
+{
+   perf->call_cnt++;
+   perf->start = rarch_get_perf_counter();
+}
 
-void rarch_perf_init(void *data, bool enable);
-void rarch_perf_start(void *data, bool enable);
-void rarch_perf_stop(void *data, bool enable);
-void rarch_perf_log(void *data, const char *funcname, bool enable);
+static inline void rarch_perf_stop(struct retro_perf_counter *perf)
+{
+   perf->total += rarch_get_perf_counter() - perf->start;
+}
+
+uint64_t rarch_get_cpu_features(void);
+
+// Used internally by RetroArch.
+#if defined(PERF_TEST) || !defined(RARCH_INTERNAL)
+#define RARCH_PERFORMANCE_INIT(X) \
+   static struct retro_perf_counter X = {#X}; \
+   do { \
+      if (!(X).registered) \
+         rarch_perf_register(&(X)); \
+   } while(0)
+#define RARCH_PERFORMANCE_START(X) rarch_perf_start(&(X))
+#define RARCH_PERFORMANCE_STOP(X) rarch_perf_stop(&(X))
+#else
+#define RARCH_PERFORMANCE_INIT(X)
+#define RARCH_PERFORMANCE_START(X)
+#define RARCH_PERFORMANCE_STOP(X)
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/retroarch.c b/retroarch.c
index caa2f3bd0f..64cba817f8 100644
--- a/retroarch.c
+++ b/retroarch.c
@@ -276,9 +276,8 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_
 
    if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555 && data && data != RETRO_HW_FRAME_BUFFER_VALID)
    {
-      static retro_perf_counter_t video_frame_conv = { "video_frame_conv", 0, 0, 0, false };
-      rarch_perf_init(&video_frame_conv, g_settings.perfcounter_enable);
-      rarch_perf_start(&video_frame_conv, g_settings.perfcounter_enable);
+      RARCH_PERFORMANCE_INIT(video_frame_conv);
+      RARCH_PERFORMANCE_START(video_frame_conv);
       driver.scaler.in_width = width;
       driver.scaler.in_height = height;
       driver.scaler.out_width = width;
@@ -289,7 +288,7 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_
       scaler_ctx_scale(&driver.scaler, driver.scaler_out, data);
       data = driver.scaler_out;
       pitch = driver.scaler.out_stride;
-      rarch_perf_stop(&video_frame_conv, g_settings.perfcounter_enable);
+      RARCH_PERFORMANCE_STOP(video_frame_conv);
    }
 
    // Slightly messy code,
@@ -382,13 +381,11 @@ static bool audio_flush(const int16_t *data, size_t samples)
    unsigned output_frames      = 0;
 
    struct resampler_data src_data = {0};
-   static retro_perf_counter_t audio_convert_s16 = { "audio_convert_s16", 0, 0, 0, false };
-
-   rarch_perf_init(&audio_convert_s16, g_settings.perfcounter_enable);
-   rarch_perf_start(&audio_convert_s16, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(audio_convert_s16);
+   RARCH_PERFORMANCE_START(audio_convert_s16);
    audio_convert_s16_to_float(g_extern.audio_data.data, data, samples,
          g_extern.audio_data.volume_gain);
-   rarch_perf_stop(&audio_convert_s16, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(audio_convert_s16);
 
 #if defined(HAVE_DYLIB)
    rarch_dsp_output_t dsp_output = {0};
@@ -415,13 +412,11 @@ static bool audio_flush(const int16_t *data, size_t samples)
    if (g_extern.is_slowmotion)
       src_data.ratio *= g_settings.slowmotion_ratio;
 
-   static retro_perf_counter_t resampler_proc = { "resampler_proc", 0, 0, 0, false };
-
-   rarch_perf_init(&resampler_proc, g_settings.perfcounter_enable);
-   rarch_perf_start(&resampler_proc, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_INIT(resampler_proc);
+   RARCH_PERFORMANCE_START(resampler_proc);
    rarch_resampler_process(g_extern.audio_data.resampler,
          g_extern.audio_data.resampler_data, &src_data);
-   rarch_perf_stop(&resampler_proc, g_settings.perfcounter_enable);
+   RARCH_PERFORMANCE_STOP(resampler_proc);
 
    output_data   = g_extern.audio_data.outsamples;
    output_frames = src_data.output_frames;
@@ -436,12 +431,11 @@ static bool audio_flush(const int16_t *data, size_t samples)
    }
    else
    {
-      static retro_perf_counter_t audio_convert_float = { "audio_convert_float", 0, 0, 0, false };
-      rarch_perf_init(&audio_convert_float, g_settings.perfcounter_enable);
-      rarch_perf_start(&audio_convert_float, g_settings.perfcounter_enable);
+      RARCH_PERFORMANCE_INIT(audio_convert_float);
+      RARCH_PERFORMANCE_START(audio_convert_float);
       audio_convert_float_to_s16(g_extern.audio_data.conv_outsamples,
             output_data, output_frames * 2);
-      rarch_perf_stop(&audio_convert_float, g_settings.perfcounter_enable);
+      RARCH_PERFORMANCE_STOP(audio_convert_float);
 
       if (audio_write_func(g_extern.audio_data.conv_outsamples, output_frames * sizeof(int16_t) * 2) < 0)
       {
@@ -2889,8 +2883,7 @@ static void verify_api_version(void)
 // Ideally, code would get swapped out depending on CPU support, but this will do for now.
 static void validate_cpu_features(void)
 {
-   unsigned cpu;
-   rarch_get_cpu_features(&cpu);
+   uint64_t cpu = rarch_get_cpu_features();
 
 #define FAIL_CPU(simd_type) do { \
    RARCH_ERR(simd_type " code is compiled in, but CPU does not support this feature. Cannot continue.\n"); \
diff --git a/settings.c b/settings.c
index cbb82595a9..4054faecb1 100644
--- a/settings.c
+++ b/settings.c
@@ -350,10 +350,6 @@ void config_set_defaults(void)
    *g_settings.rgui_config_directory = '\0';
 #endif
 
-#ifdef PERF_TEST
-   g_settings.perfcounter_enable = true;
-#endif
-
 #ifdef RARCH_CONSOLE
    g_extern.lifecycle_state |= (1ULL << MODE_MENU);
 

From d1f80a38fcb9ad3dbebbe71a538feffe0515356c Mon Sep 17 00:00:00 2001
From: Themaister <maister@archlinux.us>
Date: Wed, 18 Dec 2013 19:34:51 +0100
Subject: [PATCH 2/2] Fix typo.

---
 libretro.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libretro.h b/libretro.h
index c1c1c6cf17..c584786949 100755
--- a/libretro.h
+++ b/libretro.h
@@ -612,7 +612,7 @@ struct retro_perf_counter
    bool registered;
 };
 
-// Returns current time in microsec sec. Tries to use the most accurate timer available.
+// Returns current time in microseconds. Tries to use the most accurate timer available.
 typedef retro_time_t (*retro_perf_get_time_usec_t)(void);
 // A simple counter. Usually nanoseconds, but can also be CPU cycles.
 // Can be used directly if desired (when creating a more sophisticated performance counter system).