mirror of
https://github.com/libretro/RetroArch
synced 2025-01-30 03:32:46 +00:00
Merge git://github.com/Alcaro/RetroArch into alcaro
Conflicts: retroarch.c rewind.c
This commit is contained in:
commit
883708df0f
3
AUTHORS
3
AUTHORS
@ -56,3 +56,6 @@ Gražvydas Ignotas -
|
||||
|
||||
Saggi Mizrahi -
|
||||
- RetroLaunch utility
|
||||
|
||||
Alfred Agrell - <floating@muncher.se>
|
||||
- Rewritten savestate manager
|
||||
|
@ -519,7 +519,6 @@ struct global
|
||||
|
||||
// Rewind support.
|
||||
state_manager_t *state_manager;
|
||||
void *state_buf;
|
||||
size_t state_size;
|
||||
bool frame_is_reverse;
|
||||
|
||||
|
38
retroarch.c
38
retroarch.c
@ -1539,29 +1539,16 @@ void rarch_init_rewind(void)
|
||||
return;
|
||||
}
|
||||
|
||||
// Make sure we allocate at least 4-byte multiple.
|
||||
size_t aligned_state_size = (g_extern.state_size + 3) & ~3;
|
||||
g_extern.state_buf = calloc(1, aligned_state_size);
|
||||
|
||||
if (!g_extern.state_buf)
|
||||
{
|
||||
RARCH_ERR("Failed to allocate memory for rewind buffer.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!pretro_serialize(g_extern.state_buf, g_extern.state_size))
|
||||
{
|
||||
RARCH_ERR("Failed to perform initial serialization for rewind.\n");
|
||||
free(g_extern.state_buf);
|
||||
g_extern.state_buf = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
RARCH_LOG("Initing rewind buffer with size: %u MB\n", (unsigned)(g_settings.rewind_buffer_size / 1000000));
|
||||
g_extern.state_manager = state_manager_new(aligned_state_size, g_settings.rewind_buffer_size, g_extern.state_buf);
|
||||
g_extern.state_manager = state_manager_new(g_extern.state_size, g_settings.rewind_buffer_size);
|
||||
|
||||
if (!g_extern.state_manager)
|
||||
RARCH_WARN("Failed to init rewind buffer. Rewinding will be disabled.\n");
|
||||
|
||||
void *state;
|
||||
state_manager_push_where(g_extern.state_manager, &state);
|
||||
pretro_serialize(state, g_extern.state_size);
|
||||
state_manager_push_do(g_extern.state_manager);
|
||||
}
|
||||
|
||||
void rarch_deinit_rewind(void)
|
||||
@ -1569,9 +1556,6 @@ void rarch_deinit_rewind(void)
|
||||
if (g_extern.state_manager)
|
||||
state_manager_free(g_extern.state_manager);
|
||||
g_extern.state_manager = NULL;
|
||||
|
||||
free(g_extern.state_buf);
|
||||
g_extern.state_buf = NULL;
|
||||
}
|
||||
|
||||
#ifdef HAVE_BSV_MOVIE
|
||||
@ -2187,7 +2171,7 @@ static void check_rewind(void)
|
||||
if (input_key_pressed_func(RARCH_REWIND))
|
||||
{
|
||||
msg_queue_clear(g_extern.msg_queue);
|
||||
void *buf;
|
||||
const void *buf;
|
||||
if (state_manager_pop(g_extern.state_manager, &buf))
|
||||
{
|
||||
g_extern.frame_is_reverse = true;
|
||||
@ -2214,11 +2198,15 @@ static void check_rewind(void)
|
||||
if (cnt == 0)
|
||||
#endif
|
||||
{
|
||||
void *state;
|
||||
state_manager_push_where(g_extern.state_manager, &state);
|
||||
|
||||
RARCH_PERFORMANCE_INIT(rewind_serialize);
|
||||
RARCH_PERFORMANCE_START(rewind_serialize);
|
||||
pretro_serialize(g_extern.state_buf, g_extern.state_size);
|
||||
pretro_serialize(state, g_extern.state_size);
|
||||
RARCH_PERFORMANCE_STOP(rewind_serialize);
|
||||
state_manager_push(g_extern.state_manager, g_extern.state_buf);
|
||||
|
||||
state_manager_push_do(g_extern.state_manager);
|
||||
}
|
||||
}
|
||||
|
||||
|
506
rewind.c
506
rewind.c
@ -1,6 +1,7 @@
|
||||
/* RetroArch - A frontend for libretro.
|
||||
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
|
||||
*
|
||||
* Copyright (C) 2014 - Alfred Agrell
|
||||
*
|
||||
* RetroArch is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
@ -13,159 +14,223 @@
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#include "rewind.h"
|
||||
#include "performance.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "boolean.h"
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "general.h"
|
||||
|
||||
#ifndef UINT16_MAX
|
||||
#define UINT16_MAX 0xffff
|
||||
#endif
|
||||
#ifndef UINT32_MAX
|
||||
#define UINT32_MAX 0xffffffffu
|
||||
#endif
|
||||
|
||||
#undef CPU_X86
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i686__)
|
||||
#define CPU_X86
|
||||
#endif
|
||||
|
||||
// Other arches SIGBUS (usually) on unaligned accesses.
|
||||
#ifndef CPU_X86
|
||||
#define NO_UNALIGNED_MEM
|
||||
#endif
|
||||
|
||||
// Format per frame:
|
||||
// size nextstart;
|
||||
// repeat {
|
||||
// uint16 numchanged; // everything is counted in units of uint16
|
||||
// if (numchanged) {
|
||||
// uint16 numunchanged; // skip these before handling numchanged
|
||||
// uint16[numchanged] changeddata;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// uint32 numunchanged;
|
||||
// if (!numunchanged) break;
|
||||
// }
|
||||
// }
|
||||
// size thisstart;
|
||||
//
|
||||
// The start offsets point to 'nextstart' of any given compressed frame.
|
||||
// Each uint16 is stored native endian; anything that claims any other endianness refers to the endianness of this specific item.
|
||||
// The uint32 is stored little endian.
|
||||
// Each size value is stored native endian if alignment is not enforced; if it is, they're little endian.
|
||||
// The start of the buffer contains a size pointing to the end of the buffer; the end points to its start.
|
||||
// Wrapping is handled by returning to the start of the buffer if the compressed data could potentially hit the edge;
|
||||
// if the compressed data could potentially overwrite the tail pointer, the tail retreats until it can no longer collide.
|
||||
// This means that on average, ~2*maxcompsize is unused at any given moment.
|
||||
|
||||
// These are called very few constant times per frame, keep it as simple as possible.
|
||||
static inline void write_size_t(void *ptr, size_t val)
|
||||
{
|
||||
memcpy(ptr, &val, sizeof(val));
|
||||
}
|
||||
static inline size_t read_size_t(const void *ptr)
|
||||
{
|
||||
size_t ret;
|
||||
memcpy(&ret, ptr, sizeof(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct state_manager
|
||||
{
|
||||
uint64_t *buffer;
|
||||
size_t buf_size;
|
||||
size_t buf_size_mask;
|
||||
uint32_t *tmp_state;
|
||||
size_t top_ptr;
|
||||
size_t bottom_ptr;
|
||||
size_t state_size;
|
||||
bool first_pop;
|
||||
uint8_t *data;
|
||||
size_t capacity;
|
||||
uint8_t *head; // Reading and writing is done here.
|
||||
uint8_t *tail; // If head comes close to this, discard a frame.
|
||||
|
||||
uint8_t *thisblock;
|
||||
uint8_t *nextblock;
|
||||
|
||||
size_t blocksize; // This one is runded up from reset::blocksize.
|
||||
size_t maxcompsize; // size_t + (blocksize + 131071) / 131072 * (blocksize + u16 + u16) + u16 + u32 + size_t (yes, the math is a bit ugly).
|
||||
|
||||
unsigned entries;
|
||||
bool thisblock_valid;
|
||||
};
|
||||
|
||||
static inline size_t nearest_pow2_size(size_t v)
|
||||
state_manager_t *state_manager_new(size_t state_size, size_t buffer_size)
|
||||
{
|
||||
size_t orig = v;
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
#if SIZE_MAX >= UINT16_C(0xffff)
|
||||
v |= v >> 8;
|
||||
#endif
|
||||
#if SIZE_MAX >= UINT32_C(0xffffffff)
|
||||
v |= v >> 16;
|
||||
#endif
|
||||
#if SIZE_MAX >= UINT64_C(0xffffffffffffffff)
|
||||
v |= v >> 32;
|
||||
#endif
|
||||
v++;
|
||||
|
||||
size_t next = v;
|
||||
size_t prev = v >> 1;
|
||||
|
||||
if ((next - orig) < (orig - prev))
|
||||
return next;
|
||||
else
|
||||
return prev;
|
||||
}
|
||||
|
||||
state_manager_t *state_manager_new(size_t state_size, size_t buffer_size, void *init_buffer)
|
||||
{
|
||||
if (buffer_size <= state_size * 4) // Need a sufficient buffer size.
|
||||
return NULL;
|
||||
|
||||
state_manager_t *state = (state_manager_t*)calloc(1, sizeof(*state));
|
||||
if (!state)
|
||||
return NULL;
|
||||
|
||||
// We need 4-byte aligned state_size to avoid having to enforce this with unneeded memcpy's!
|
||||
rarch_assert(state_size % 4 == 0);
|
||||
state->top_ptr = 1;
|
||||
size_t newblocksize = ((state_size - 1) | (sizeof(uint16_t) - 1)) + 1;
|
||||
state->blocksize = newblocksize;
|
||||
|
||||
state->state_size = state_size / sizeof(uint32_t); // Works in multiple of 4.
|
||||
state->buf_size = nearest_pow2_size(buffer_size) / sizeof(uint64_t); // Works in multiple of 8.
|
||||
state->buf_size_mask = state->buf_size - 1;
|
||||
RARCH_LOG("Readjusted rewind buffer size to %u MiB\n", (unsigned)(sizeof(uint64_t) * (state->buf_size >> 20)));
|
||||
const int maxcblkcover = UINT16_MAX * sizeof(uint16_t);
|
||||
const int maxcblks = (state->blocksize + maxcblkcover - 1) / maxcblkcover;
|
||||
state->maxcompsize = state->blocksize + maxcblks * sizeof(uint16_t) * 2 + sizeof(uint16_t) + sizeof(uint32_t) + sizeof(size_t) * 2;
|
||||
|
||||
if (!(state->buffer = (uint64_t*)calloc(1, state->buf_size * sizeof(uint64_t))))
|
||||
goto error;
|
||||
if (!(state->tmp_state = (uint32_t*)calloc(1, state->state_size * sizeof(uint32_t))))
|
||||
state->data = (uint8_t*)malloc(buffer_size);
|
||||
|
||||
state->thisblock = (uint8_t*)calloc(state->blocksize + sizeof(uint16_t) * 4 + 16, 1);
|
||||
state->nextblock = (uint8_t*)calloc(state->blocksize + sizeof(uint16_t) * 4 + 16, 1);
|
||||
if (!state->data || !state->thisblock || !state->nextblock)
|
||||
goto error;
|
||||
|
||||
memcpy(state->tmp_state, init_buffer, state_size);
|
||||
// Force in a different byte at the end, so we don't need to check bounds in the innermost loop (it's expensive).
|
||||
// There is also a large amount of data that's the same, to stop the other scan
|
||||
// There is also some padding at the end. This is so we don't read outside the buffer end if we're reading in large blocks;
|
||||
// it doesn't make any difference to us, but sacrificing 16 bytes to get Valgrind happy is worth it.
|
||||
*(uint16_t*)(state->thisblock + state->blocksize + sizeof(uint16_t) * 3) = 0xFFFF;
|
||||
*(uint16_t*)(state->nextblock + state->blocksize + sizeof(uint16_t) * 3) = 0x0000;
|
||||
|
||||
state->capacity = buffer_size;
|
||||
|
||||
state->head = state->data + sizeof(size_t);
|
||||
state->tail = state->data + sizeof(size_t);
|
||||
|
||||
return state;
|
||||
|
||||
error:
|
||||
if (state)
|
||||
{
|
||||
free(state->buffer);
|
||||
free(state->tmp_state);
|
||||
free(state);
|
||||
}
|
||||
state_manager_free(state);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void state_manager_free(state_manager_t *state)
|
||||
{
|
||||
free(state->buffer);
|
||||
free(state->tmp_state);
|
||||
free(state->data);
|
||||
free(state->thisblock);
|
||||
free(state->nextblock);
|
||||
free(state);
|
||||
}
|
||||
|
||||
bool state_manager_pop(state_manager_t *state, void **data)
|
||||
{
|
||||
*data = state->tmp_state;
|
||||
if (state->first_pop)
|
||||
bool state_manager_pop(state_manager_t *state, const void **data)
|
||||
{
|
||||
*data = NULL;
|
||||
|
||||
if (state->thisblock_valid)
|
||||
{
|
||||
state->first_pop = false;
|
||||
state->thisblock_valid = false;
|
||||
state->entries--;
|
||||
*data = state->thisblock;
|
||||
return true;
|
||||
}
|
||||
|
||||
state->top_ptr = (state->top_ptr - 1) & state->buf_size_mask;
|
||||
|
||||
if (state->top_ptr == state->bottom_ptr) // Our stack is completely empty... :v
|
||||
{
|
||||
state->top_ptr = (state->top_ptr + 1) & state->buf_size_mask;
|
||||
if (state->head == state->tail)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (state->buffer[state->top_ptr])
|
||||
size_t start = read_size_t(state->head - sizeof(size_t));
|
||||
state->head = state->data + start;
|
||||
|
||||
const uint8_t *compressed = state->data + start + sizeof(size_t);
|
||||
uint8_t *out = state->thisblock;
|
||||
|
||||
// Begin decompression code
|
||||
// out is the last pushed (or returned) state
|
||||
const uint16_t *compressed16 = (const uint16_t*)compressed;
|
||||
uint16_t *out16 = (uint16_t*)out;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
// Apply the xor patch.
|
||||
uint32_t addr = state->buffer[state->top_ptr] >> 32;
|
||||
uint32_t xor_ = state->buffer[state->top_ptr] & 0xFFFFFFFFU;
|
||||
state->tmp_state[addr] ^= xor_;
|
||||
uint16_t i;
|
||||
uint16_t numchanged = *(compressed16++);
|
||||
if (numchanged)
|
||||
{
|
||||
out16 += *compressed16++;
|
||||
// We could do memcpy, but it seems that memcpy has a constant-per-call overhead that actually shows up.
|
||||
// Our average size in here seems to be 8 or something.
|
||||
// Therefore, we do something with lower overhead.
|
||||
for (i = 0; i < numchanged; i++)
|
||||
out16[i] = compressed16[i];
|
||||
|
||||
state->top_ptr = (state->top_ptr - 1) & state->buf_size_mask;
|
||||
}
|
||||
|
||||
if (state->top_ptr == state->bottom_ptr) // Our stack is completely empty... :v
|
||||
{
|
||||
state->top_ptr = (state->top_ptr + 1) & state->buf_size_mask;
|
||||
return true;
|
||||
compressed16 += numchanged;
|
||||
out16 += numchanged;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t numunchanged = compressed16[0] | (compressed16[1] << 16);
|
||||
if (!numunchanged)
|
||||
break;
|
||||
compressed16 += 2;
|
||||
out16 += numunchanged;
|
||||
}
|
||||
}
|
||||
// End decompression code
|
||||
|
||||
state->entries--;
|
||||
*data = state->thisblock;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void reassign_bottom(state_manager_t *state)
|
||||
void state_manager_push_where(state_manager_t *state, void **data)
|
||||
{
|
||||
state->bottom_ptr = (state->top_ptr + 1) & state->buf_size_mask;
|
||||
while (state->buffer[state->bottom_ptr]) // Skip ahead until we find the first 0 (boundary for state delta).
|
||||
state->bottom_ptr = (state->bottom_ptr + 1) & state->buf_size_mask;
|
||||
// We need to ensure we have an uncompressed copy of the last pushed state, or we could
|
||||
// end up applying a 'patch' to wrong savestate, and that'd blow up rather quickly.
|
||||
if (!state->thisblock_valid)
|
||||
{
|
||||
const void *ignored;
|
||||
if (state_manager_pop(state, &ignored))
|
||||
{
|
||||
state->thisblock_valid = true;
|
||||
state->entries++;
|
||||
}
|
||||
}
|
||||
|
||||
*data = state->nextblock;
|
||||
}
|
||||
|
||||
#if __SSE2__
|
||||
|
||||
#if defined(__GNUC__)
|
||||
static inline unsigned compat_ctz(uint32_t v)
|
||||
static inline int compat_ctz(unsigned x)
|
||||
{
|
||||
return __builtin_ctz(v);
|
||||
return __builtin_ctz(x);
|
||||
}
|
||||
#else
|
||||
// Only checks at nibble granularity, because that's what we need.
|
||||
static inline unsigned compat_ctz(uint32_t v)
|
||||
static inline int compat_ctz(unsigned x)
|
||||
{
|
||||
if (v & 0x000f)
|
||||
if (x & 0x000f)
|
||||
return 0;
|
||||
if (v & 0x00f0)
|
||||
if (x & 0x00f0)
|
||||
return 4;
|
||||
if (v & 0x0f00)
|
||||
if (x & 0x0f00)
|
||||
return 8;
|
||||
if (v & 0xf000)
|
||||
if (x & 0xf000)
|
||||
return 12;
|
||||
return 16;
|
||||
}
|
||||
@ -173,87 +238,212 @@ static inline unsigned compat_ctz(uint32_t v)
|
||||
|
||||
#include <emmintrin.h>
|
||||
// There's no equivalent in libc, you'd think so ... std::mismatch exists, but it's not optimized at all. :(
|
||||
static unsigned find_mismatch(const uint32_t *a, const uint32_t *b, unsigned samples)
|
||||
static inline size_t find_change(const uint16_t *a, const uint16_t *b)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned sse_samples = samples & ~3;
|
||||
for (i = 0; i < sse_samples; i += 4)
|
||||
{
|
||||
__m128i v0 = _mm_loadu_si128((const __m128i*)(a + i));
|
||||
__m128i v1 = _mm_loadu_si128((const __m128i*)(b + i));
|
||||
__m128i c = _mm_cmpeq_epi32(v0, v1);
|
||||
uint32_t mask = _mm_movemask_epi8(c);
|
||||
if (mask != 0xffff) // Something has changed, figure out where.
|
||||
return i + (compat_ctz(~mask) >> 2);
|
||||
}
|
||||
const __m128i *a128 = (const __m128i*)a;
|
||||
const __m128i *b128 = (const __m128i*)b;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
__m128i v0 = _mm_loadu_si128(a128);
|
||||
__m128i v1 = _mm_loadu_si128(b128);
|
||||
__m128i c = _mm_cmpeq_epi32(v0, v1);
|
||||
|
||||
for (; i < samples; i++)
|
||||
if (a[i] != b[i])
|
||||
return i;
|
||||
uint32_t mask = _mm_movemask_epi8(c);
|
||||
if (mask != 0xffff) // Something has changed, figure out where.
|
||||
{
|
||||
size_t ret = (((uint8_t*)a128 - (uint8_t*)a) | (compat_ctz(~mask))) >> 1;
|
||||
return ret | (a[ret] == b[ret]);
|
||||
}
|
||||
|
||||
return samples;
|
||||
a128++;
|
||||
b128++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static unsigned find_mismatch(const uint32_t *a, const uint32_t *b, unsigned samples)
|
||||
static inline size_t find_change(const uint16_t *a, const uint16_t *b)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < samples; i++)
|
||||
if (a[i] != b[i])
|
||||
return i;
|
||||
return samples;
|
||||
const uint16_t *a_org = a;
|
||||
#ifdef NO_UNALIGNED_MEM
|
||||
while (((uintptr_t)a & (sizeof(size_t) - 1)) && *a == *b)
|
||||
{
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
if (*a == *b)
|
||||
#endif
|
||||
{
|
||||
const size_t *a_big = (const size_t*)a;
|
||||
const size_t *b_big = (const size_t*)b;
|
||||
|
||||
while (*a_big == *b_big)
|
||||
{
|
||||
a_big++;
|
||||
b_big++;
|
||||
}
|
||||
a = (const uint16_t*)a_big;
|
||||
b = (const uint16_t*)b_big;
|
||||
|
||||
while (*a == *b)
|
||||
{
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
}
|
||||
return a - a_org;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void generate_delta(state_manager_t *state, const void *data)
|
||||
static inline size_t find_same(const uint16_t *a, const uint16_t *b)
|
||||
{
|
||||
size_t i;
|
||||
bool crossed = false;
|
||||
uint32_t *old_state = state->tmp_state;
|
||||
const uint32_t *new_state = (const uint32_t*)data;
|
||||
const uint16_t *a_org = a;
|
||||
#ifdef NO_UNALIGNED_MEM
|
||||
if (((uintptr_t)a & (sizeof(uint32_t) - 1)) && *a != *b)
|
||||
{
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
if (*a != *b)
|
||||
#endif
|
||||
{
|
||||
// With this, it's random whether two consecutive identical words are caught.
|
||||
// Luckily, compression rate is the same for both cases, and three is always caught.
|
||||
// (We prefer to miss two-word blocks, anyways; fewer iterations of the outer loop, as well as in the decompressor.)
|
||||
const uint32_t *a_big = (const uint32_t*)a;
|
||||
const uint32_t *b_big = (const uint32_t*)b;
|
||||
|
||||
while (*a_big != *b_big)
|
||||
{
|
||||
a_big++;
|
||||
b_big++;
|
||||
}
|
||||
a = (const uint16_t*)a_big;
|
||||
b = (const uint16_t*)b_big;
|
||||
|
||||
if (a != a_org && a[-1] == b[-1])
|
||||
{
|
||||
a--;
|
||||
b--;
|
||||
}
|
||||
}
|
||||
return a - a_org;
|
||||
}
|
||||
|
||||
state->buffer[state->top_ptr++] = 0; // For each separate delta, we have a 0 value sentinel in between.
|
||||
state->top_ptr &= state->buf_size_mask;
|
||||
|
||||
// Check if top_ptr and bottom_ptr crossed each other, which means we need to delete old cruft.
|
||||
if (state->top_ptr == state->bottom_ptr)
|
||||
crossed = true;
|
||||
|
||||
for (i = 0; i < state->state_size; i++)
|
||||
void state_manager_push_do(state_manager_t *state)
|
||||
{
|
||||
if (state->thisblock_valid)
|
||||
{
|
||||
unsigned avail = state->state_size - i;
|
||||
unsigned pos = find_mismatch(old_state + i, new_state + i, avail);
|
||||
if (pos == avail)
|
||||
break;
|
||||
if (state->capacity < sizeof(size_t) + state->maxcompsize)
|
||||
return;
|
||||
|
||||
i += pos;
|
||||
recheckcapacity:;
|
||||
|
||||
// If the data differs (xor != 0), we push that xor on the stack with index and xor.
|
||||
// This can be reversed by reapplying the xor.
|
||||
// This, if states don't really differ much, we'll save lots of space :)
|
||||
// Hopefully this will work really well with save states.
|
||||
uint32_t xor_ = old_state[i] ^ new_state[i];
|
||||
old_state[i] = new_state[i];
|
||||
size_t headpos = state->head - state->data;
|
||||
size_t tailpos = state->tail - state->data;
|
||||
size_t remaining = (tailpos + state->capacity - sizeof(size_t) - headpos - 1) % state->capacity + 1;
|
||||
if (remaining <= state->maxcompsize)
|
||||
{
|
||||
state->tail = state->data + read_size_t(state->tail);
|
||||
state->entries--;
|
||||
goto recheckcapacity;
|
||||
}
|
||||
|
||||
state->buffer[state->top_ptr] = ((uint64_t)i << 32) | xor_;
|
||||
state->top_ptr = (state->top_ptr + 1) & state->buf_size_mask;
|
||||
RARCH_PERFORMANCE_INIT(gen_deltas);
|
||||
RARCH_PERFORMANCE_START(gen_deltas);
|
||||
|
||||
if (state->top_ptr == state->bottom_ptr)
|
||||
crossed = true;
|
||||
const uint8_t *oldb = state->thisblock;
|
||||
const uint8_t *newb = state->nextblock;
|
||||
uint8_t *compressed = state->head + sizeof(size_t);
|
||||
|
||||
// Begin compression code; 'compressed' will point to the end of the compressed data (excluding the prev pointer).
|
||||
const uint16_t *old16 = (const uint16_t*)oldb;
|
||||
const uint16_t *new16 = (const uint16_t*)newb;
|
||||
uint16_t *compressed16 = (uint16_t*)compressed;
|
||||
size_t num16s = state->blocksize / sizeof(uint16_t);
|
||||
|
||||
while (num16s)
|
||||
{
|
||||
size_t i;
|
||||
size_t skip = find_change(old16, new16);
|
||||
|
||||
if (skip >= num16s)
|
||||
break;
|
||||
|
||||
old16 += skip;
|
||||
new16 += skip;
|
||||
num16s -= skip;
|
||||
|
||||
if (skip > UINT16_MAX)
|
||||
{
|
||||
if (skip > UINT32_MAX)
|
||||
{
|
||||
// This will make it scan the entire thing again, but it only hits on 8GB unchanged
|
||||
// data anyways, and if you're doing that, you've got bigger problems.
|
||||
skip = UINT32_MAX;
|
||||
}
|
||||
*compressed16++ = 0;
|
||||
*compressed16++ = skip;
|
||||
*compressed16++ = skip >> 16;
|
||||
skip = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t changed = find_same(old16, new16);
|
||||
if (changed > UINT16_MAX)
|
||||
changed = UINT16_MAX;
|
||||
|
||||
*compressed16++ = changed;
|
||||
*compressed16++ = skip;
|
||||
|
||||
for (i = 0; i < changed; i++)
|
||||
compressed16[i] = old16[i];
|
||||
|
||||
old16 += changed;
|
||||
new16 += changed;
|
||||
num16s -= changed;
|
||||
compressed16 += changed;
|
||||
}
|
||||
|
||||
compressed16[0] = 0;
|
||||
compressed16[1] = 0;
|
||||
compressed16[2] = 0;
|
||||
compressed = (uint8_t*)(compressed16 + 3);
|
||||
// End compression code.
|
||||
|
||||
if (compressed - state->data + state->maxcompsize > state->capacity)
|
||||
{
|
||||
compressed = state->data;
|
||||
if (state->tail == state->data + sizeof(size_t))
|
||||
state->tail = state->data + read_size_t(state->tail);
|
||||
}
|
||||
write_size_t(compressed, state->head-state->data);
|
||||
compressed += sizeof(size_t);
|
||||
write_size_t(state->head, compressed-state->data);
|
||||
state->head = compressed;
|
||||
|
||||
RARCH_PERFORMANCE_STOP(gen_deltas);
|
||||
}
|
||||
else
|
||||
state->thisblock_valid = true;
|
||||
|
||||
if (crossed)
|
||||
reassign_bottom(state);
|
||||
uint8_t *swap = state->thisblock;
|
||||
state->thisblock = state->nextblock;
|
||||
state->nextblock = swap;
|
||||
|
||||
state->entries++;
|
||||
return;
|
||||
}
|
||||
|
||||
bool state_manager_push(state_manager_t *state, const void *data)
|
||||
void state_manager_capacity(state_manager_t *state, unsigned *entries, size_t *bytes, bool *full)
|
||||
{
|
||||
RARCH_PERFORMANCE_INIT(gen_delta);
|
||||
RARCH_PERFORMANCE_START(gen_delta);
|
||||
generate_delta(state, data);
|
||||
RARCH_PERFORMANCE_STOP(gen_delta);
|
||||
size_t headpos = state->head - state->data;
|
||||
size_t tailpos = state->tail - state->data;
|
||||
size_t remaining = (tailpos + state->capacity - sizeof(size_t) - headpos - 1) % state->capacity + 1;
|
||||
|
||||
state->first_pop = true;
|
||||
|
||||
return true;
|
||||
if (entries)
|
||||
*entries = state->entries;
|
||||
if (bytes)
|
||||
*bytes = state->capacity-remaining;
|
||||
if (full)
|
||||
*full = remaining <= state->maxcompsize * 2;
|
||||
}
|
||||
|
||||
|
10
rewind.h
10
rewind.h
@ -21,11 +21,11 @@
|
||||
|
||||
typedef struct state_manager state_manager_t;
|
||||
|
||||
// Always pass in at least 4-byte aligned data and sizes!
|
||||
|
||||
state_manager_t *state_manager_new(size_t state_size, size_t buffer_size, void *init_buffer);
|
||||
state_manager_t *state_manager_new(size_t state_size, size_t buffer_size);
|
||||
void state_manager_free(state_manager_t *state);
|
||||
bool state_manager_pop(state_manager_t *state, void **data);
|
||||
bool state_manager_push(state_manager_t *state, const void *data);
|
||||
bool state_manager_pop(state_manager_t *state, const void **data);
|
||||
void state_manager_push_where(state_manager_t *state, void **data);
|
||||
void state_manager_push_do(state_manager_t *state);
|
||||
void state_manager_capacity(state_manager_t *state, unsigned int *entries, size_t *bytes, bool *full);
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user