Split these functions out. ctult needs them for netplay.

2025-04-02 07:20:34 +00:00 · 2015-06-27 04:53:37 +02:00 · 2015-06-27 04:53:37 +02:00 · e71d8f852a
commit e71d8f852a
parent 361879bc22
2 changed files with 287 additions and 249 deletions
--- a/rewind.c
+++ b/rewind.c
@ -19,7 +19,6 @@
 #include "rewind.h"
 #include "performance.h"
 #include <stdlib.h>
-#include <stdint.h>
 #include <string.h>
 #include <retro_inline.h>
 #include "intl/intl.h"
@ -63,91 +62,19 @@ repeat {
 size thisstart;
 #endif

-/* The start offsets point to 'nextstart' of any given compressed frame.
- * Each uint16 is stored native endian; anything that claims any other 
- * endianness refers to the endianness of this specific item.
- * The uint32 is stored little endian.
- *
- * Each size value is stored native endian if alignment is not enforced; 
- * if it is, they're little endian.
- *
- * The start of the buffer contains a size pointing to the end of the 
- * buffer; the end points to its start.
- *
- * Wrapping is handled by returning to the start of the buffer if the 
- * compressed data could potentially hit the edge;
- *
- * if the compressed data could potentially overwrite the tail pointer, 
- * the tail retreats until it can no longer collide.
- *
- * This means that on average, ~2 * maxcompsize is 
- * unused at any given moment. */
-
-
-/* These are called very few constant times per frame, 
- * keep it as simple as possible. */
-static INLINE void write_size_t(void *ptr, size_t val)
+size_t state_manager_raw_maxsize(size_t uncomp)
 {
-   memcpy(ptr, &val, sizeof(val));
+   const int maxcblkcover = UINT16_MAX * sizeof(uint16_t); /* bytes covered by a compressed block */
+   size_t uncomp16 = (uncomp + sizeof(uint16_t) - 1) & ~sizeof(uint16_t); /* uncompressed size, rounded to 16 bits */
+   size_t maxcblks = (uncomp + maxcblkcover - 1) / maxcblkcover; /* number of blocks */
+   return uncomp16 + maxcblks * sizeof(uint16_t)*2 /* two u16 overhead per block */ + sizeof(uint16_t)*3; /* three u16 to end it */
 }

-static INLINE size_t read_size_t(const void *ptr)
+void *state_manager_raw_alloc(size_t len, uint16_t uniq)
 {
-   size_t ret;
+   size_t len16 = (len + sizeof(uint16_t) - 1) & ~sizeof(uint16_t);

-   memcpy(&ret, ptr, sizeof(ret));
-   return ret;
-}
-
-struct state_manager
-{
-   uint8_t *data;
-   size_t capacity;
-   /* Reading and writing is done here here. */
-   uint8_t *head;
-   /* If head comes close to this, discard a frame. */
-   uint8_t *tail;
-
-   uint8_t *thisblock;
-   uint8_t *nextblock;
-
-   /* This one is rounded up from reset::blocksize. */
-   size_t blocksize;
-
-   /* size_t + (blocksize + 131071) / 131072 * 
-    * (blocksize + u16 + u16) + u16 + u32 + size_t
-    * (yes, the math is a bit ugly). */
-   size_t maxcompsize;
-
-   unsigned entries;
-   bool thisblock_valid;
-};
-
-state_manager_t *state_manager_new(size_t state_size, size_t buffer_size)
-{
-   size_t newblocksize;
-   int maxcblks;
-   const int maxcblkcover = UINT16_MAX * sizeof(uint16_t);
-   state_manager_t *state = (state_manager_t*)calloc(1, sizeof(*state));
-
-   if (!state)
-      return NULL;
-
-   newblocksize = ((state_size - 1) | (sizeof(uint16_t) - 1)) + 1;
-   state->blocksize = newblocksize;
-
-   maxcblks = (state->blocksize + maxcblkcover - 1) / maxcblkcover;
-   state->maxcompsize = state->blocksize + maxcblks * sizeof(uint16_t) * 2 +
-      sizeof(uint16_t) + sizeof(uint32_t) + sizeof(size_t) * 2;
-
-   state->data = (uint8_t*)malloc(buffer_size);
-
-   state->thisblock = (uint8_t*)
-      calloc(state->blocksize + sizeof(uint16_t) * 4 + 16, 1);
-   state->nextblock = (uint8_t*)
-      calloc(state->blocksize + sizeof(uint16_t) * 4 + 16, 1);
-   if (!state->data || !state->thisblock || !state->nextblock)
-      goto error;
+   uint16_t *ret = (uint16_t*)calloc(len16 + sizeof(uint16_t) * 4 + 16, 1);

   /* Force in a different byte at the end, so we don't need to check 
    * bounds in the innermost loop (it's expensive).
@ -160,120 +87,9 @@ state_manager_t *state_manager_new(size_t state_size, size_t buffer_size)
    *
    * It doesn't make any difference to us, but sacrificing 16 bytes to get 
    * Valgrind happy is worth it. */
-   *(uint16_t*)(state->thisblock + state->blocksize + sizeof(uint16_t) * 3) =
-      0xFFFF;
-   *(uint16_t*)(state->nextblock + state->blocksize + sizeof(uint16_t) * 3) =
-      0x0000;
+   ret[len16/sizeof(uint16_t) + 3] = uniq;

-   state->capacity = buffer_size;
-
-   state->head = state->data + sizeof(size_t);
-   state->tail = state->data + sizeof(size_t);
-
-   return state;
-
-error:
-   state_manager_free(state);
-   return NULL;
-}
-
-void state_manager_free(state_manager_t *state)
-{
-   if (!state)
-      return;
-
-   free(state->data);
-   free(state->thisblock);
-   free(state->nextblock);
-   free(state);
-}
-
-bool state_manager_pop(state_manager_t *state, const void **data)
-{
-   size_t start;
-   uint8_t *out                 = NULL;
-   uint16_t *out16              = NULL;
-   const uint8_t *compressed    = NULL;
-   const uint16_t *compressed16 = NULL;
-
-   *data = NULL;
-
-   if (state->thisblock_valid)
-   {
-      state->thisblock_valid = false;
-      state->entries--;
-      *data = state->thisblock;
-      return true;
-   }
-
-   if (state->head == state->tail)
-      return false;
-
-   start = read_size_t(state->head - sizeof(size_t));
-   state->head = state->data + start;
-
-   compressed = state->data + start + sizeof(size_t);
-   out = state->thisblock;
-
-   /* Begin decompression code
-    * out is the last pushed (or returned) state */
-   compressed16 = (const uint16_t*)compressed;
-   out16 = (uint16_t*)out;
-
-   for (;;)
-   {
-      uint16_t i;
-      uint16_t numchanged = *(compressed16++);
-
-      if (numchanged)
-      {
-         out16 += *compressed16++;
-
-         /* We could do memcpy, but it seems that memcpy has a 
-          * constant-per-call overhead that actually shows up.
-          *
-          * Our average size in here seems to be 8 or something.
-          * Therefore, we do something with lower overhead. */
-         for (i = 0; i < numchanged; i++)
-            out16[i] = compressed16[i];
-
-         compressed16 += numchanged;
-         out16 += numchanged;
-      }
-      else
-      {
-         uint32_t numunchanged = compressed16[0] | (compressed16[1] << 16);
-
-         if (!numunchanged)
-            break;
-         compressed16 += 2;
-         out16 += numunchanged;
-      }
-   }
-   /* End decompression code */
-
-   state->entries--;
-   *data = state->thisblock;
-   return true;
-}
-
-void state_manager_push_where(state_manager_t *state, void **data)
-{
-   /* We need to ensure we have an uncompressed copy of the last
-    * pushed state, or we could end up applying a 'patch' to wrong 
-    * savestate, and that'd blow up rather quickly. */
-
-   if (!state->thisblock_valid) 
-   {
-      const void *ignored;
-      if (state_manager_pop(state, &ignored))
-      {
-         state->thisblock_valid = true;
-         state->entries++;
-      }
-   }
-   
-   *data = state->nextblock;
+   return ret;
 }

 #if __SSE2__
@ -402,48 +218,12 @@ static INLINE size_t find_same(const uint16_t *a, const uint16_t *b)
   return a - a_org;
 }

-void state_manager_push_do(state_manager_t *state)
+size_t state_manager_raw_compress(const void *src, const void *dst, size_t len, void *patch)
 {
-   uint8_t *swap = NULL;
-
-   if (state->thisblock_valid)
-   {
-      const uint8_t *oldb, *newb;
-      const uint16_t *old16, *new16;
-      uint8_t *compressed;
-      uint16_t *compressed16;
-      size_t num16s;
-      size_t headpos, tailpos, remaining;
-      if (state->capacity < sizeof(size_t) + state->maxcompsize)
-         return;
-
-recheckcapacity:;
-
-      headpos = state->head - state->data;
-      tailpos = state->tail - state->data;
-      remaining = (tailpos + state->capacity -
-            sizeof(size_t) - headpos - 1) % state->capacity + 1;
-
-      if (remaining <= state->maxcompsize)
-      {
-         state->tail = state->data + read_size_t(state->tail);
-         state->entries--;
-         goto recheckcapacity;
-      }
-
-      RARCH_PERFORMANCE_INIT(gen_deltas);
-      RARCH_PERFORMANCE_START(gen_deltas);
-
-      oldb = state->thisblock;
-      newb = state->nextblock;
-      compressed = state->head + sizeof(size_t);
-
-      /* Begin compression code; 'compressed' will point to 
-       * the end of the compressed data (excluding the prev pointer). */
-      old16 = (const uint16_t*)oldb;
-      new16 = (const uint16_t*)newb;
-      compressed16 = (uint16_t*)compressed;
-      num16s = state->blocksize / sizeof(uint16_t);
+   const uint16_t *old16 = (const uint16_t*)src;
+   const uint16_t *new16 = (const uint16_t*)dst;
+   uint16_t *compressed16 = (uint16_t*)patch;
+   size_t num16s = (len + sizeof(uint16_t) - 1) / sizeof(uint16_t);
   
   while (num16s)
   {
@ -492,8 +272,240 @@ recheckcapacity:;
   compressed16[0] = 0;
   compressed16[1] = 0;
   compressed16[2] = 0;
-      compressed = (uint8_t*)(compressed16 + 3);
-      /* End compression code. */
+   
+   return (uint8_t*)(compressed16+3) - (uint8_t*)patch;
+}
+
+void state_manager_raw_decompress(const void *patch, size_t patchlen, void *data, size_t datalen)
+{
+   uint16_t *out16 = (uint16_t*)data;
+   const uint16_t *patch16 = (const uint16_t*)patch;
+   
+   (void)patchlen;
+   (void)datalen;
+   
+   for (;;)
+   {
+      uint16_t i;
+      uint16_t numchanged = *(patch16++);
+
+      if (numchanged)
+      {
+         out16 += *patch16++;
+
+         /* We could do memcpy, but it seems that memcpy has a 
+          * constant-per-call overhead that actually shows up.
+          *
+          * Our average size in here seems to be 8 or something.
+          * Therefore, we do something with lower overhead. */
+         for (i = 0; i < numchanged; i++)
+            out16[i] = patch16[i];
+
+         patch16 += numchanged;
+         out16 += numchanged;
+      }
+      else
+      {
+         uint32_t numunchanged = patch16[0] | (patch16[1] << 16);
+
+         if (!numunchanged)
+            break;
+         patch16 += 2;
+         out16 += numunchanged;
+      }
+   }
+}
+
+/* The start offsets point to 'nextstart' of any given compressed frame.
+ * Each uint16 is stored native endian; anything that claims any other 
+ * endianness refers to the endianness of this specific item.
+ * The uint32 is stored little endian.
+ *
+ * Each size value is stored native endian if alignment is not enforced; 
+ * if it is, they're little endian.
+ *
+ * The start of the buffer contains a size pointing to the end of the 
+ * buffer; the end points to its start.
+ *
+ * Wrapping is handled by returning to the start of the buffer if the 
+ * compressed data could potentially hit the edge;
+ *
+ * if the compressed data could potentially overwrite the tail pointer, 
+ * the tail retreats until it can no longer collide.
+ *
+ * This means that on average, ~2 * maxcompsize is 
+ * unused at any given moment. */
+
+
+/* These are called very few constant times per frame, 
+ * keep it as simple as possible. */
+static INLINE void write_size_t(void *ptr, size_t val)
+{
+   memcpy(ptr, &val, sizeof(val));
+}
+
+static INLINE size_t read_size_t(const void *ptr)
+{
+   size_t ret;
+
+   memcpy(&ret, ptr, sizeof(ret));
+   return ret;
+}
+
+struct state_manager
+{
+   uint8_t *data;
+   size_t capacity;
+   /* Reading and writing is done here here. */
+   uint8_t *head;
+   /* If head comes close to this, discard a frame. */
+   uint8_t *tail;
+
+   uint8_t *thisblock;
+   uint8_t *nextblock;
+
+   /* This one is rounded up from reset::blocksize. */
+   size_t blocksize;
+
+   /* size_t + (blocksize + 131071) / 131072 * 
+    * (blocksize + u16 + u16) + u16 + u32 + size_t
+    * (yes, the math is a bit ugly). */
+   size_t maxcompsize;
+
+   unsigned entries;
+   bool thisblock_valid;
+};
+
+state_manager_t *state_manager_new(size_t state_size, size_t buffer_size)
+{
+   state_manager_t *state = (state_manager_t*)calloc(1, sizeof(*state));
+
+   if (!state)
+      return NULL;
+
+   state->blocksize = (state_size + sizeof(uint16_t) - 1) & ~sizeof(uint16_t);
+   /* the compressed data is surrounded by pointers to the other side */
+   state->maxcompsize = state_manager_raw_maxsize(state_size) + sizeof(size_t) * 2;
+   state->data = (uint8_t*)malloc(buffer_size);
+
+   state->thisblock = (uint8_t*)state_manager_raw_alloc(state_size, 0);
+   state->nextblock = (uint8_t*)state_manager_raw_alloc(state_size, 1);
+   if (!state->data || !state->thisblock || !state->nextblock)
+      goto error;
+
+   state->capacity = buffer_size;
+
+   state->head = state->data + sizeof(size_t);
+   state->tail = state->data + sizeof(size_t);
+
+   return state;
+
+error:
+   state_manager_free(state);
+   return NULL;
+}
+
+void state_manager_free(state_manager_t *state)
+{
+   if (!state)
+      return;
+
+   free(state->data);
+   free(state->thisblock);
+   free(state->nextblock);
+   free(state);
+}
+
+bool state_manager_pop(state_manager_t *state, const void **data)
+{
+   size_t start;
+   uint8_t *out                 = NULL;
+   uint16_t *out16              = NULL;
+   const uint8_t *compressed    = NULL;
+   const uint16_t *compressed16 = NULL;
+
+   *data = NULL;
+
+   if (state->thisblock_valid)
+   {
+      state->thisblock_valid = false;
+      state->entries--;
+      *data = state->thisblock;
+      return true;
+   }
+
+   if (state->head == state->tail)
+      return false;
+
+   start = read_size_t(state->head - sizeof(size_t));
+   state->head = state->data + start;
+
+   compressed = state->data + start + sizeof(size_t);
+   out = state->thisblock;
+
+   state_manager_raw_decompress(compressed, state->maxcompsize, out, state->blocksize);
+
+   state->entries--;
+   *data = state->thisblock;
+   return true;
+}
+
+void state_manager_push_where(state_manager_t *state, void **data)
+{
+   /* We need to ensure we have an uncompressed copy of the last
+    * pushed state, or we could end up applying a 'patch' to wrong 
+    * savestate, and that'd blow up rather quickly. */
+
+   if (!state->thisblock_valid) 
+   {
+      const void *ignored;
+      if (state_manager_pop(state, &ignored))
+      {
+         state->thisblock_valid = true;
+         state->entries++;
+      }
+   }
+   
+   *data = state->nextblock;
+}
+
+void state_manager_push_do(state_manager_t *state)
+{
+   uint8_t *swap = NULL;
+
+   if (state->thisblock_valid)
+   {
+      const uint8_t *oldb, *newb;
+      const uint16_t *old16, *new16;
+      uint8_t *compressed;
+      uint16_t *compressed16;
+      size_t num16s;
+      size_t headpos, tailpos, remaining;
+      if (state->capacity < sizeof(size_t) + state->maxcompsize)
+         return;
+
+recheckcapacity:;
+
+      headpos = state->head - state->data;
+      tailpos = state->tail - state->data;
+      remaining = (tailpos + state->capacity -
+            sizeof(size_t) - headpos - 1) % state->capacity + 1;
+
+      if (remaining <= state->maxcompsize)
+      {
+         state->tail = state->data + read_size_t(state->tail);
+         state->entries--;
+         goto recheckcapacity;
+      }
+
+      RARCH_PERFORMANCE_INIT(gen_deltas);
+      RARCH_PERFORMANCE_START(gen_deltas);
+
+      oldb = state->thisblock;
+      newb = state->nextblock;
+      compressed = state->head + sizeof(size_t);
+
+      compressed += state_manager_raw_compress(oldb, newb, state->blocksize, compressed);

      if (compressed - state->data + state->maxcompsize > state->capacity)
      {
--- a/rewind.h
+++ b/rewind.h
@ -24,6 +24,7 @@ extern "C" {

 #include <stddef.h>
 #include <boolean.h>
+#include <stdint.h>

 typedef struct state_manager state_manager_t;

@ -42,6 +43,31 @@ void state_manager_capacity(state_manager_t *state,

 void init_rewind(void);

+
+/* Returns the maximum compressed size of a savestate. It is very likely to compress to far less. */
+size_t state_manager_raw_maxsize(size_t uncomp);
+
+/*
+ * See state_manager_raw_compress for information about this.
+ * When you're done with it, send it to free().
+ */
+void *state_manager_raw_alloc(size_t len, uint16_t uniq);
+
+/*
+ * Takes two savestates and creates a patch that turns 'src' into 'dst'.
+ * Both 'src' and 'dst' must be returned from state_manager_raw_alloc(), with the same 'len', and different 'uniq'.
+ * 'patch' must be size 'state_manager_raw_maxsize(len)' or more.
+ * Returns the number of bytes actually written to 'patch'.
+ */
+size_t state_manager_raw_compress(const void *src, const void *dst, size_t len, void *patch);
+
+/*
+ * Takes 'patch' from a previous call to 'state_manager_raw_compress' and applies it to 'data' ('src' from that call),
+ * yielding 'dst' in that call.
+ * If the given arguments do not match a previous call to state_manager_raw_compress(), anything at all can happen.
+ */
+void state_manager_raw_decompress(const void *patch, size_t patchlen, void *data, size_t datalen);
+
 #ifdef __cplusplus
 }
 #endif