From 1405db5b196e2ed9840384373a960d49587fa0d5 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Mon, 16 May 2016 10:27:20 +0200 Subject: [PATCH] Move mismatch.c to libretro-common/algorithms --- Makefile.common | 1 + griffin/griffin.c | 6 + libretro-common/algorithms/mismatch.c | 164 ++++++++++++++++++ libretro-common/include/algorithms/mismatch.h | 39 +++++ managers/state_manager.c | 137 +-------------- 5 files changed, 211 insertions(+), 136 deletions(-) create mode 100644 libretro-common/algorithms/mismatch.c create mode 100644 libretro-common/include/algorithms/mismatch.h diff --git a/Makefile.common b/Makefile.common index 2780dedd24..d531781c11 100644 --- a/Makefile.common +++ b/Makefile.common @@ -123,6 +123,7 @@ OBJ += frontend/frontend.o \ intl/msg_hash_pt.o \ intl/msg_hash_us.o \ runloop.o \ + libretro-common/algorithms/mismatch.o \ libretro-common/queues/task_queue.o \ tasks/tasks_internal.o \ tasks/task_content.o \ diff --git a/griffin/griffin.c b/griffin/griffin.c index 1f189425d3..892b143d6c 100644 --- a/griffin/griffin.c +++ b/griffin/griffin.c @@ -45,6 +45,12 @@ CONSOLE EXTENSIONS #endif +/*============================================================ +ALGORITHMS +============================================================ */ + +#include "../libretro-common/algorithms/mismatch.c" + /*============================================================ ARCHIVE FILE ============================================================ */ diff --git a/libretro-common/algorithms/mismatch.c b/libretro-common/algorithms/mismatch.c new file mode 100644 index 0000000000..5af33964e1 --- /dev/null +++ b/libretro-common/algorithms/mismatch.c @@ -0,0 +1,164 @@ +/* Copyright (C) 2010-2016 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (mismatch.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#define __STDC_LIMIT_MACROS +#include +#include +#include + +#include + +#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i686__) +#define CPU_X86 +#endif + +/* Other arches SIGBUS (usually) on unaligned accesses. */ +#ifndef CPU_X86 +#define NO_UNALIGNED_MEM +#endif + +#if __SSE2__ +#include +#endif + +#if __SSE2__ +#if defined(__GNUC__) +static INLINE int compat_ctz(unsigned x) +{ + return __builtin_ctz(x); +} +#else + +/* Only checks at nibble granularity, + * because that's what we need. */ + +static INLINE int compat_ctz(unsigned x) +{ + if (x & 0x000f) + return 0; + if (x & 0x00f0) + return 4; + if (x & 0x0f00) + return 8; + if (x & 0xf000) + return 12; + return 16; +} +#endif +#endif + +/* There's no equivalent in libc, you'd think so ... + * std::mismatch exists, but it's not optimized at all. */ +size_t find_change(const uint16_t *a, const uint16_t *b) +{ +#if __SSE2__ + const __m128i *a128 = (const __m128i*)a; + const __m128i *b128 = (const __m128i*)b; + + for (;;) + { + __m128i v0 = _mm_loadu_si128(a128); + __m128i v1 = _mm_loadu_si128(b128); + __m128i c = _mm_cmpeq_epi32(v0, v1); + uint32_t mask = _mm_movemask_epi8(c); + + if (mask != 0xffff) /* Something has changed, figure out where. */ + { + size_t ret = (((uint8_t*)a128 - (uint8_t*)a) | + (compat_ctz(~mask))) >> 1; + return ret | (a[ret] == b[ret]); + } + + a128++; + b128++; + } +#else + const uint16_t *a_org = a; +#ifdef NO_UNALIGNED_MEM + while (((uintptr_t)a & (sizeof(size_t) - 1)) && *a == *b) + { + a++; + b++; + } + if (*a == *b) +#endif + { + const size_t *a_big = (const size_t*)a; + const size_t *b_big = (const size_t*)b; + + while (*a_big == *b_big) + { + a_big++; + b_big++; + } + a = (const uint16_t*)a_big; + b = (const uint16_t*)b_big; + + while (*a == *b) + { + a++; + b++; + } + } + return a - a_org; +#endif +} + +size_t find_same(const uint16_t *a, const uint16_t *b) +{ + const uint16_t *a_org = a; +#ifdef NO_UNALIGNED_MEM + if (((uintptr_t)a & (sizeof(uint32_t) - 1)) && *a != *b) + { + a++; + b++; + } + if (*a != *b) +#endif + { + /* With this, it's random whether two consecutive identical + * words are caught. + * + * Luckily, compression rate is the same for both cases, and + * three is always caught. + * + * (We prefer to miss two-word blocks, anyways; fewer iterations + * of the outer loop, as well as in the decompressor.) */ + const uint32_t *a_big = (const uint32_t*)a; + const uint32_t *b_big = (const uint32_t*)b; + + while (*a_big != *b_big) + { + a_big++; + b_big++; + } + a = (const uint16_t*)a_big; + b = (const uint16_t*)b_big; + + if (a != a_org && a[-1] == b[-1]) + { + a--; + b--; + } + } + return a - a_org; +} diff --git a/libretro-common/include/algorithms/mismatch.h b/libretro-common/include/algorithms/mismatch.h new file mode 100644 index 0000000000..8aa4be7ae1 --- /dev/null +++ b/libretro-common/include/algorithms/mismatch.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2010-2016 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (mismatch.h). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __LIBRETRO_SDK_ALGORITHMS_MISMATCH_H__ +#define __LIBRETRO_SDK_ALGORITHMS_MISMATCH_H__ + +#include +#include + +#include + +RETRO_BEGIN_DECLS + +size_t find_change(const uint16_t *a, const uint16_t *b); + +size_t find_same(const uint16_t *a, const uint16_t *b); + +RETRO_END_DECLS + +#endif diff --git a/managers/state_manager.c b/managers/state_manager.c index 042b36ca15..3b79d8ed12 100644 --- a/managers/state_manager.c +++ b/managers/state_manager.c @@ -24,6 +24,7 @@ #endif #include +#include #include "state_manager.h" #include "../configuration.h" @@ -47,16 +48,6 @@ #define UINT32_MAX 0xffffffffu #endif -#undef CPU_X86 -#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i686__) -#define CPU_X86 -#endif - -/* Other arches SIGBUS (usually) on unaligned accesses. */ -#ifndef CPU_X86 -#define NO_UNALIGNED_MEM -#endif - struct state_manager { uint8_t *data; @@ -154,131 +145,6 @@ static void *state_manager_raw_alloc(size_t len, uint16_t uniq) return ret; } -#if __SSE2__ -#if defined(__GNUC__) -static INLINE int compat_ctz(unsigned x) -{ - return __builtin_ctz(x); -} -#else - -/* Only checks at nibble granularity, - * because that's what we need. */ - -static INLINE int compat_ctz(unsigned x) -{ - if (x & 0x000f) - return 0; - if (x & 0x00f0) - return 4; - if (x & 0x0f00) - return 8; - if (x & 0xf000) - return 12; - return 16; -} -#endif - -/* There's no equivalent in libc, you'd think so ... - * std::mismatch exists, but it's not optimized at all. */ - -static INLINE size_t find_change(const uint16_t *a, const uint16_t *b) -{ - const __m128i *a128 = (const __m128i*)a; - const __m128i *b128 = (const __m128i*)b; - - for (;;) - { - __m128i v0 = _mm_loadu_si128(a128); - __m128i v1 = _mm_loadu_si128(b128); - __m128i c = _mm_cmpeq_epi32(v0, v1); - uint32_t mask = _mm_movemask_epi8(c); - - if (mask != 0xffff) /* Something has changed, figure out where. */ - { - size_t ret = (((uint8_t*)a128 - (uint8_t*)a) | - (compat_ctz(~mask))) >> 1; - return ret | (a[ret] == b[ret]); - } - - a128++; - b128++; - } -} -#else -static INLINE size_t find_change(const uint16_t *a, const uint16_t *b) -{ - const uint16_t *a_org = a; -#ifdef NO_UNALIGNED_MEM - while (((uintptr_t)a & (sizeof(size_t) - 1)) && *a == *b) - { - a++; - b++; - } - if (*a == *b) -#endif - { - const size_t *a_big = (const size_t*)a; - const size_t *b_big = (const size_t*)b; - - while (*a_big == *b_big) - { - a_big++; - b_big++; - } - a = (const uint16_t*)a_big; - b = (const uint16_t*)b_big; - - while (*a == *b) - { - a++; - b++; - } - } - return a - a_org; -} -#endif - -static INLINE size_t find_same(const uint16_t *a, const uint16_t *b) -{ - const uint16_t *a_org = a; -#ifdef NO_UNALIGNED_MEM - if (((uintptr_t)a & (sizeof(uint32_t) - 1)) && *a != *b) - { - a++; - b++; - } - if (*a != *b) -#endif - { - /* With this, it's random whether two consecutive identical - * words are caught. - * - * Luckily, compression rate is the same for both cases, and - * three is always caught. - * - * (We prefer to miss two-word blocks, anyways; fewer iterations - * of the outer loop, as well as in the decompressor.) */ - const uint32_t *a_big = (const uint32_t*)a; - const uint32_t *b_big = (const uint32_t*)b; - - while (*a_big != *b_big) - { - a_big++; - b_big++; - } - a = (const uint16_t*)a_big; - b = (const uint16_t*)b_big; - - if (a != a_org && a[-1] == b[-1]) - { - a--; - b--; - } - } - return a - a_org; -} - /* * Takes two savestates and creates a patch that turns 'src' into 'dst'. * Both 'src' and 'dst' must be returned from state_manager_raw_alloc(), @@ -485,7 +351,6 @@ error: return NULL; } - static bool state_manager_pop(state_manager_t *state, const void **data) { size_t start;