Split these functions out. ctult needs them for netplay.

This commit is contained in:
Alcaro 2015-06-27 04:53:37 +02:00
parent 361879bc22
commit e71d8f852a
2 changed files with 287 additions and 249 deletions

486
rewind.c
View File

@ -19,7 +19,6 @@
#include "rewind.h"
#include "performance.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <retro_inline.h>
#include "intl/intl.h"
@ -63,91 +62,19 @@ repeat {
size thisstart;
#endif
/* The start offsets point to 'nextstart' of any given compressed frame.
* Each uint16 is stored native endian; anything that claims any other
* endianness refers to the endianness of this specific item.
* The uint32 is stored little endian.
*
* Each size value is stored native endian if alignment is not enforced;
* if it is, they're little endian.
*
* The start of the buffer contains a size pointing to the end of the
* buffer; the end points to its start.
*
* Wrapping is handled by returning to the start of the buffer if the
* compressed data could potentially hit the edge;
*
* if the compressed data could potentially overwrite the tail pointer,
* the tail retreats until it can no longer collide.
*
* This means that on average, ~2 * maxcompsize is
* unused at any given moment. */
/* These are called very few constant times per frame,
* keep it as simple as possible. */
static INLINE void write_size_t(void *ptr, size_t val)
size_t state_manager_raw_maxsize(size_t uncomp)
{
memcpy(ptr, &val, sizeof(val));
const int maxcblkcover = UINT16_MAX * sizeof(uint16_t); /* bytes covered by a compressed block */
size_t uncomp16 = (uncomp + sizeof(uint16_t) - 1) & ~sizeof(uint16_t); /* uncompressed size, rounded to 16 bits */
size_t maxcblks = (uncomp + maxcblkcover - 1) / maxcblkcover; /* number of blocks */
return uncomp16 + maxcblks * sizeof(uint16_t)*2 /* two u16 overhead per block */ + sizeof(uint16_t)*3; /* three u16 to end it */
}
static INLINE size_t read_size_t(const void *ptr)
void *state_manager_raw_alloc(size_t len, uint16_t uniq)
{
size_t ret;
size_t len16 = (len + sizeof(uint16_t) - 1) & ~sizeof(uint16_t);
memcpy(&ret, ptr, sizeof(ret));
return ret;
}
struct state_manager
{
uint8_t *data;
size_t capacity;
/* Reading and writing is done here here. */
uint8_t *head;
/* If head comes close to this, discard a frame. */
uint8_t *tail;
uint8_t *thisblock;
uint8_t *nextblock;
/* This one is rounded up from reset::blocksize. */
size_t blocksize;
/* size_t + (blocksize + 131071) / 131072 *
* (blocksize + u16 + u16) + u16 + u32 + size_t
* (yes, the math is a bit ugly). */
size_t maxcompsize;
unsigned entries;
bool thisblock_valid;
};
state_manager_t *state_manager_new(size_t state_size, size_t buffer_size)
{
size_t newblocksize;
int maxcblks;
const int maxcblkcover = UINT16_MAX * sizeof(uint16_t);
state_manager_t *state = (state_manager_t*)calloc(1, sizeof(*state));
if (!state)
return NULL;
newblocksize = ((state_size - 1) | (sizeof(uint16_t) - 1)) + 1;
state->blocksize = newblocksize;
maxcblks = (state->blocksize + maxcblkcover - 1) / maxcblkcover;
state->maxcompsize = state->blocksize + maxcblks * sizeof(uint16_t) * 2 +
sizeof(uint16_t) + sizeof(uint32_t) + sizeof(size_t) * 2;
state->data = (uint8_t*)malloc(buffer_size);
state->thisblock = (uint8_t*)
calloc(state->blocksize + sizeof(uint16_t) * 4 + 16, 1);
state->nextblock = (uint8_t*)
calloc(state->blocksize + sizeof(uint16_t) * 4 + 16, 1);
if (!state->data || !state->thisblock || !state->nextblock)
goto error;
uint16_t *ret = (uint16_t*)calloc(len16 + sizeof(uint16_t) * 4 + 16, 1);
/* Force in a different byte at the end, so we don't need to check
* bounds in the innermost loop (it's expensive).
@ -160,120 +87,9 @@ state_manager_t *state_manager_new(size_t state_size, size_t buffer_size)
*
* It doesn't make any difference to us, but sacrificing 16 bytes to get
* Valgrind happy is worth it. */
*(uint16_t*)(state->thisblock + state->blocksize + sizeof(uint16_t) * 3) =
0xFFFF;
*(uint16_t*)(state->nextblock + state->blocksize + sizeof(uint16_t) * 3) =
0x0000;
ret[len16/sizeof(uint16_t) + 3] = uniq;
state->capacity = buffer_size;
state->head = state->data + sizeof(size_t);
state->tail = state->data + sizeof(size_t);
return state;
error:
state_manager_free(state);
return NULL;
}
void state_manager_free(state_manager_t *state)
{
if (!state)
return;
free(state->data);
free(state->thisblock);
free(state->nextblock);
free(state);
}
bool state_manager_pop(state_manager_t *state, const void **data)
{
size_t start;
uint8_t *out = NULL;
uint16_t *out16 = NULL;
const uint8_t *compressed = NULL;
const uint16_t *compressed16 = NULL;
*data = NULL;
if (state->thisblock_valid)
{
state->thisblock_valid = false;
state->entries--;
*data = state->thisblock;
return true;
}
if (state->head == state->tail)
return false;
start = read_size_t(state->head - sizeof(size_t));
state->head = state->data + start;
compressed = state->data + start + sizeof(size_t);
out = state->thisblock;
/* Begin decompression code
* out is the last pushed (or returned) state */
compressed16 = (const uint16_t*)compressed;
out16 = (uint16_t*)out;
for (;;)
{
uint16_t i;
uint16_t numchanged = *(compressed16++);
if (numchanged)
{
out16 += *compressed16++;
/* We could do memcpy, but it seems that memcpy has a
* constant-per-call overhead that actually shows up.
*
* Our average size in here seems to be 8 or something.
* Therefore, we do something with lower overhead. */
for (i = 0; i < numchanged; i++)
out16[i] = compressed16[i];
compressed16 += numchanged;
out16 += numchanged;
}
else
{
uint32_t numunchanged = compressed16[0] | (compressed16[1] << 16);
if (!numunchanged)
break;
compressed16 += 2;
out16 += numunchanged;
}
}
/* End decompression code */
state->entries--;
*data = state->thisblock;
return true;
}
void state_manager_push_where(state_manager_t *state, void **data)
{
/* We need to ensure we have an uncompressed copy of the last
* pushed state, or we could end up applying a 'patch' to wrong
* savestate, and that'd blow up rather quickly. */
if (!state->thisblock_valid)
{
const void *ignored;
if (state_manager_pop(state, &ignored))
{
state->thisblock_valid = true;
state->entries++;
}
}
*data = state->nextblock;
return ret;
}
#if __SSE2__
@ -402,48 +218,12 @@ static INLINE size_t find_same(const uint16_t *a, const uint16_t *b)
return a - a_org;
}
void state_manager_push_do(state_manager_t *state)
size_t state_manager_raw_compress(const void *src, const void *dst, size_t len, void *patch)
{
uint8_t *swap = NULL;
if (state->thisblock_valid)
{
const uint8_t *oldb, *newb;
const uint16_t *old16, *new16;
uint8_t *compressed;
uint16_t *compressed16;
size_t num16s;
size_t headpos, tailpos, remaining;
if (state->capacity < sizeof(size_t) + state->maxcompsize)
return;
recheckcapacity:;
headpos = state->head - state->data;
tailpos = state->tail - state->data;
remaining = (tailpos + state->capacity -
sizeof(size_t) - headpos - 1) % state->capacity + 1;
if (remaining <= state->maxcompsize)
{
state->tail = state->data + read_size_t(state->tail);
state->entries--;
goto recheckcapacity;
}
RARCH_PERFORMANCE_INIT(gen_deltas);
RARCH_PERFORMANCE_START(gen_deltas);
oldb = state->thisblock;
newb = state->nextblock;
compressed = state->head + sizeof(size_t);
/* Begin compression code; 'compressed' will point to
* the end of the compressed data (excluding the prev pointer). */
old16 = (const uint16_t*)oldb;
new16 = (const uint16_t*)newb;
compressed16 = (uint16_t*)compressed;
num16s = state->blocksize / sizeof(uint16_t);
const uint16_t *old16 = (const uint16_t*)src;
const uint16_t *new16 = (const uint16_t*)dst;
uint16_t *compressed16 = (uint16_t*)patch;
size_t num16s = (len + sizeof(uint16_t) - 1) / sizeof(uint16_t);
while (num16s)
{
@ -492,8 +272,240 @@ recheckcapacity:;
compressed16[0] = 0;
compressed16[1] = 0;
compressed16[2] = 0;
compressed = (uint8_t*)(compressed16 + 3);
/* End compression code. */
return (uint8_t*)(compressed16+3) - (uint8_t*)patch;
}
void state_manager_raw_decompress(const void *patch, size_t patchlen, void *data, size_t datalen)
{
uint16_t *out16 = (uint16_t*)data;
const uint16_t *patch16 = (const uint16_t*)patch;
(void)patchlen;
(void)datalen;
for (;;)
{
uint16_t i;
uint16_t numchanged = *(patch16++);
if (numchanged)
{
out16 += *patch16++;
/* We could do memcpy, but it seems that memcpy has a
* constant-per-call overhead that actually shows up.
*
* Our average size in here seems to be 8 or something.
* Therefore, we do something with lower overhead. */
for (i = 0; i < numchanged; i++)
out16[i] = patch16[i];
patch16 += numchanged;
out16 += numchanged;
}
else
{
uint32_t numunchanged = patch16[0] | (patch16[1] << 16);
if (!numunchanged)
break;
patch16 += 2;
out16 += numunchanged;
}
}
}
/* The start offsets point to 'nextstart' of any given compressed frame.
* Each uint16 is stored native endian; anything that claims any other
* endianness refers to the endianness of this specific item.
* The uint32 is stored little endian.
*
* Each size value is stored native endian if alignment is not enforced;
* if it is, they're little endian.
*
* The start of the buffer contains a size pointing to the end of the
* buffer; the end points to its start.
*
* Wrapping is handled by returning to the start of the buffer if the
* compressed data could potentially hit the edge;
*
* if the compressed data could potentially overwrite the tail pointer,
* the tail retreats until it can no longer collide.
*
* This means that on average, ~2 * maxcompsize is
* unused at any given moment. */
/* These are called very few constant times per frame,
* keep it as simple as possible. */
static INLINE void write_size_t(void *ptr, size_t val)
{
memcpy(ptr, &val, sizeof(val));
}
static INLINE size_t read_size_t(const void *ptr)
{
size_t ret;
memcpy(&ret, ptr, sizeof(ret));
return ret;
}
struct state_manager
{
uint8_t *data;
size_t capacity;
/* Reading and writing is done here here. */
uint8_t *head;
/* If head comes close to this, discard a frame. */
uint8_t *tail;
uint8_t *thisblock;
uint8_t *nextblock;
/* This one is rounded up from reset::blocksize. */
size_t blocksize;
/* size_t + (blocksize + 131071) / 131072 *
* (blocksize + u16 + u16) + u16 + u32 + size_t
* (yes, the math is a bit ugly). */
size_t maxcompsize;
unsigned entries;
bool thisblock_valid;
};
state_manager_t *state_manager_new(size_t state_size, size_t buffer_size)
{
state_manager_t *state = (state_manager_t*)calloc(1, sizeof(*state));
if (!state)
return NULL;
state->blocksize = (state_size + sizeof(uint16_t) - 1) & ~sizeof(uint16_t);
/* the compressed data is surrounded by pointers to the other side */
state->maxcompsize = state_manager_raw_maxsize(state_size) + sizeof(size_t) * 2;
state->data = (uint8_t*)malloc(buffer_size);
state->thisblock = (uint8_t*)state_manager_raw_alloc(state_size, 0);
state->nextblock = (uint8_t*)state_manager_raw_alloc(state_size, 1);
if (!state->data || !state->thisblock || !state->nextblock)
goto error;
state->capacity = buffer_size;
state->head = state->data + sizeof(size_t);
state->tail = state->data + sizeof(size_t);
return state;
error:
state_manager_free(state);
return NULL;
}
void state_manager_free(state_manager_t *state)
{
if (!state)
return;
free(state->data);
free(state->thisblock);
free(state->nextblock);
free(state);
}
bool state_manager_pop(state_manager_t *state, const void **data)
{
size_t start;
uint8_t *out = NULL;
uint16_t *out16 = NULL;
const uint8_t *compressed = NULL;
const uint16_t *compressed16 = NULL;
*data = NULL;
if (state->thisblock_valid)
{
state->thisblock_valid = false;
state->entries--;
*data = state->thisblock;
return true;
}
if (state->head == state->tail)
return false;
start = read_size_t(state->head - sizeof(size_t));
state->head = state->data + start;
compressed = state->data + start + sizeof(size_t);
out = state->thisblock;
state_manager_raw_decompress(compressed, state->maxcompsize, out, state->blocksize);
state->entries--;
*data = state->thisblock;
return true;
}
void state_manager_push_where(state_manager_t *state, void **data)
{
/* We need to ensure we have an uncompressed copy of the last
* pushed state, or we could end up applying a 'patch' to wrong
* savestate, and that'd blow up rather quickly. */
if (!state->thisblock_valid)
{
const void *ignored;
if (state_manager_pop(state, &ignored))
{
state->thisblock_valid = true;
state->entries++;
}
}
*data = state->nextblock;
}
void state_manager_push_do(state_manager_t *state)
{
uint8_t *swap = NULL;
if (state->thisblock_valid)
{
const uint8_t *oldb, *newb;
const uint16_t *old16, *new16;
uint8_t *compressed;
uint16_t *compressed16;
size_t num16s;
size_t headpos, tailpos, remaining;
if (state->capacity < sizeof(size_t) + state->maxcompsize)
return;
recheckcapacity:;
headpos = state->head - state->data;
tailpos = state->tail - state->data;
remaining = (tailpos + state->capacity -
sizeof(size_t) - headpos - 1) % state->capacity + 1;
if (remaining <= state->maxcompsize)
{
state->tail = state->data + read_size_t(state->tail);
state->entries--;
goto recheckcapacity;
}
RARCH_PERFORMANCE_INIT(gen_deltas);
RARCH_PERFORMANCE_START(gen_deltas);
oldb = state->thisblock;
newb = state->nextblock;
compressed = state->head + sizeof(size_t);
compressed += state_manager_raw_compress(oldb, newb, state->blocksize, compressed);
if (compressed - state->data + state->maxcompsize > state->capacity)
{

View File

@ -24,6 +24,7 @@ extern "C" {
#include <stddef.h>
#include <boolean.h>
#include <stdint.h>
typedef struct state_manager state_manager_t;
@ -42,6 +43,31 @@ void state_manager_capacity(state_manager_t *state,
void init_rewind(void);
/* Returns the maximum compressed size of a savestate. It is very likely to compress to far less. */
size_t state_manager_raw_maxsize(size_t uncomp);
/*
* See state_manager_raw_compress for information about this.
* When you're done with it, send it to free().
*/
void *state_manager_raw_alloc(size_t len, uint16_t uniq);
/*
* Takes two savestates and creates a patch that turns 'src' into 'dst'.
* Both 'src' and 'dst' must be returned from state_manager_raw_alloc(), with the same 'len', and different 'uniq'.
* 'patch' must be size 'state_manager_raw_maxsize(len)' or more.
* Returns the number of bytes actually written to 'patch'.
*/
size_t state_manager_raw_compress(const void *src, const void *dst, size_t len, void *patch);
/*
* Takes 'patch' from a previous call to 'state_manager_raw_compress' and applies it to 'data' ('src' from that call),
* yielding 'dst' in that call.
* If the given arguments do not match a previous call to state_manager_raw_compress(), anything at all can happen.
*/
void state_manager_raw_decompress(const void *patch, size_t patchlen, void *data, size_t datalen);
#ifdef __cplusplus
}
#endif