From 3fdf93a2733d39aae7045491e92f23841ae1ac0c Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sun, 1 Nov 2020 17:22:32 +0100 Subject: [PATCH] Update libretro-common --- .../formats/libchdr/libchdr_lzma.c | 39 +++++++++++++------ .../formats/libchdr/libchdr_zlib.c | 23 ++++++++--- .../include/libchdr/libchdr_zlib.h | 1 + libretro-common/include/libchdr/lzma.h | 1 + 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/libretro-common/formats/libchdr/libchdr_lzma.c b/libretro-common/formats/libchdr/libchdr_lzma.c index 25fabe3cf4..0110a9280d 100644 --- a/libretro-common/formats/libchdr/libchdr_lzma.c +++ b/libretro-common/formats/libchdr/libchdr_lzma.c @@ -66,11 +66,16 @@ *------------------------------------------------- */ +/* Huge alignment values for possible SIMD optimization by compiler (NEON, SSE, AVX) */ +#define LZMA_MIN_ALIGNMENT_BITS 512 +#define LZMA_MIN_ALIGNMENT_BYTES (LZMA_MIN_ALIGNMENT_BITS / 8) + static void *lzma_fast_alloc(void *p, size_t size) { int scan; - uint32_t *addr = NULL; + uint32_t *addr = NULL; lzma_allocator *codec = (lzma_allocator *)(p); + uintptr_t vaddr = 0; /* compute the size, rounding to the nearest 1k */ size = (size + 0x3ff) & ~0x3ff; @@ -83,27 +88,36 @@ static void *lzma_fast_alloc(void *p, size_t size) { /* set the low bit of the size so we don't match next time */ *ptr |= 1; - return ptr + 1; + + /* return aligned address of the block */ + return codec->allocptr2[scan]; } } /* alloc a new one and put it into the list */ - addr = (uint32_t *)malloc(sizeof(uint32_t) * size + sizeof(uintptr_t)); - if (!addr) + addr = (uint32_t *)malloc(size + sizeof(uint32_t) + LZMA_MIN_ALIGNMENT_BYTES); + if (addr==NULL) return NULL; - for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++) { if (codec->allocptr[scan] == NULL) { + /* store block address */ codec->allocptr[scan] = addr; + + /* compute aligned address, store it */ + vaddr = (uintptr_t)addr; + vaddr = (vaddr + sizeof(uint32_t) + (LZMA_MIN_ALIGNMENT_BYTES-1)) & (~(LZMA_MIN_ALIGNMENT_BYTES-1)); + codec->allocptr2[scan] = (uint32_t*)vaddr; break; } } /* set the low bit of the size so we don't match next time */ - *addr = (uint32_t)(size | 1); - return addr + (sizeof(uint32_t) == sizeof(uintptr_t) ? 1 : 2); + *addr = size | 1; + + /* return aligned address */ + return (void*)vaddr; } /*------------------------------------------------- @@ -114,21 +128,22 @@ static void *lzma_fast_alloc(void *p, size_t size) static void lzma_fast_free(void *p, void *address) { int scan; - uint32_t *ptr; - lzma_allocator *codec; + uint32_t *ptr = NULL; + lzma_allocator *codec = NULL; + if (address == NULL) return; codec = (lzma_allocator *)(p); /* find the hunk */ - ptr = (uint32_t *)(address) - 1; + ptr = (uint32_t *)address; for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++) { - if (ptr == codec->allocptr[scan]) + if (ptr == codec->allocptr2[scan]) { /* clear the low bit of the size to allow matches */ - *ptr &= ~1; + *codec->allocptr[scan] &= ~1; return; } } diff --git a/libretro-common/formats/libchdr/libchdr_zlib.c b/libretro-common/formats/libchdr/libchdr_zlib.c index 7e6fe19ac8..3623baa253 100644 --- a/libretro-common/formats/libchdr/libchdr_zlib.c +++ b/libretro-common/formats/libchdr/libchdr_zlib.c @@ -241,9 +241,14 @@ chd_error zlib_codec_decompress(void *codec, const uint8_t *src, uint32_t comple allocates and frees memory frequently -------------------------------------------------*/ +/* Huge alignment values for possible SIMD optimization by compiler (NEON, SSE, AVX) */ +#define ZLIB_MIN_ALIGNMENT_BITS 512 +#define ZLIB_MIN_ALIGNMENT_BYTES (ZLIB_MIN_ALIGNMENT_BITS / 8) + voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) { zlib_allocator *alloc = (zlib_allocator *)opaque; + uintptr_t paddr = 0; UINT32 *ptr; int i; @@ -258,12 +263,14 @@ voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) { /* set the low bit of the size so we don't match next time */ *ptr |= 1; - return ptr + 1; + + /* return aligned block address */ + return (voidpf)(alloc->allocptr2[i]); } } /* alloc a new one */ - ptr = (UINT32 *)malloc(size + sizeof(uintptr_t)); + ptr = (UINT32 *)malloc(size + sizeof(UINT32) + ZLIB_MIN_ALIGNMENT_BYTES); if (!ptr) return NULL; @@ -272,12 +279,16 @@ voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) if (!alloc->allocptr[i]) { alloc->allocptr[i] = ptr; + paddr = (((uintptr_t)ptr) + sizeof(UINT32) + (ZLIB_MIN_ALIGNMENT_BYTES-1)) & (~(ZLIB_MIN_ALIGNMENT_BYTES-1)); + alloc->allocptr2[i] = (uint32_t*)paddr; break; } /* set the low bit of the size so we don't match next time */ *ptr = size | 1; - return ptr + (sizeof(uint32_t) == sizeof(uintptr_t) ? 1 : 2); + + /* return aligned block address */ + return (voidpf)paddr; } /*------------------------------------------------- @@ -288,15 +299,15 @@ voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) void zlib_fast_free(voidpf opaque, voidpf address) { zlib_allocator *alloc = (zlib_allocator *)opaque; - UINT32 *ptr = (UINT32 *)address - (sizeof(uint32_t) == sizeof(uintptr_t) ? 1 : 2); + UINT32 *ptr = (UINT32 *)address; int i; /* find the hunk */ for (i = 0; i < MAX_ZLIB_ALLOCS; i++) - if (ptr == alloc->allocptr[i]) + if (ptr == alloc->allocptr2[i]) { /* clear the low bit of the size to allow matches */ - *ptr &= ~1; + *(alloc->allocptr[i]) &= ~1; return; } } diff --git a/libretro-common/include/libchdr/libchdr_zlib.h b/libretro-common/include/libchdr/libchdr_zlib.h index 324faeeaa2..86544d6d00 100644 --- a/libretro-common/include/libchdr/libchdr_zlib.h +++ b/libretro-common/include/libchdr/libchdr_zlib.h @@ -27,6 +27,7 @@ typedef struct _zlib_allocator zlib_allocator; struct _zlib_allocator { UINT32 * allocptr[MAX_ZLIB_ALLOCS]; + UINT32 * allocptr2[MAX_ZLIB_ALLOCS]; }; typedef struct _zlib_codec_data zlib_codec_data; diff --git a/libretro-common/include/libchdr/lzma.h b/libretro-common/include/libchdr/lzma.h index e3cccbc196..15deb62759 100644 --- a/libretro-common/include/libchdr/lzma.h +++ b/libretro-common/include/libchdr/lzma.h @@ -30,6 +30,7 @@ struct _lzma_allocator void (*Free)(void *p, void *address); /* address can be 0 */ void (*FreeSz)(void *p, void *address, size_t size); /* address can be 0 */ uint32_t* allocptr[MAX_LZMA_ALLOCS]; + uint32_t* allocptr2[MAX_LZMA_ALLOCS]; }; typedef struct _lzma_codec_data lzma_codec_data;