From 00a80adfae80b99fb1b78e48ed8abe1b11eceb1c Mon Sep 17 00:00:00 2001 From: Megamouse Date: Wed, 13 Dec 2023 20:36:41 +0100 Subject: [PATCH] Update 7z to 23.01 --- 3rdparty/7z/7zlib.vcxproj | 19 + 3rdparty/7z/7zlib.vcxproj.filters | 357 ++----- 3rdparty/7z/CMakeLists.txt | 82 +- 3rdparty/7z/src/7z.h | 12 +- 3rdparty/7z/src/7zAlloc.c | 69 +- 3rdparty/7z/src/7zAlloc.h | 6 +- 3rdparty/7z/src/7zArcIn.c | 405 ++++---- 3rdparty/7z/src/7zBuf.h | 6 +- 3rdparty/7z/src/7zCrc.c | 130 +-- 3rdparty/7z/src/7zCrc.h | 14 +- 3rdparty/7z/src/7zCrcOpt.c | 16 +- 3rdparty/7z/src/7zDec.c | 172 ++-- 3rdparty/7z/src/7zFile.c | 33 +- 3rdparty/7z/src/7zFile.h | 9 +- 3rdparty/7z/src/7zStream.c | 67 +- 3rdparty/7z/src/7zTypes.h | 262 +++-- 3rdparty/7z/src/7zVersion.h | 8 +- 3rdparty/7z/src/7zVersion.rc | 110 +- 3rdparty/7z/src/7zWindows.h | 101 ++ 3rdparty/7z/src/Aes.c | 108 +- 3rdparty/7z/src/Aes.h | 36 +- 3rdparty/7z/src/AesOpt.c | 348 ++++--- 3rdparty/7z/src/Alloc.c | 192 ++-- 3rdparty/7z/src/Alloc.h | 19 +- 3rdparty/7z/src/Bcj2.c | 325 +++--- 3rdparty/7z/src/Bcj2.h | 270 ++++- 3rdparty/7z/src/Bcj2Enc.c | 571 +++++++---- 3rdparty/7z/src/Blake2.h | 48 + 3rdparty/7z/src/Blake2s.c | 250 +++++ 3rdparty/7z/src/Bra.c | 528 ++++++---- 3rdparty/7z/src/Bra.h | 117 ++- 3rdparty/7z/src/Bra86.c | 221 ++-- 3rdparty/7z/src/BraIA64.c | 57 +- 3rdparty/7z/src/BwtSort.c | 516 ++++++++++ 3rdparty/7z/src/BwtSort.h | 26 + 3rdparty/7z/src/Compiler.h | 162 ++- 3rdparty/7z/src/CpuArch.c | 847 +++++++++++----- 3rdparty/7z/src/CpuArch.h | 228 +++-- 3rdparty/7z/src/Delta.h | 6 +- 3rdparty/7z/src/DllSecur.c | 127 ++- 3rdparty/7z/src/DllSecur.h | 6 +- 3rdparty/7z/src/HuffEnc.c | 154 +++ 3rdparty/7z/src/HuffEnc.h | 23 + 3rdparty/7z/src/LzFind.c | 519 ++++++---- 3rdparty/7z/src/LzFind.h | 53 +- 3rdparty/7z/src/LzFindMt.c | 70 +- 3rdparty/7z/src/LzFindMt.h | 10 +- 3rdparty/7z/src/LzFindOpt.c | 14 +- 3rdparty/7z/src/LzHash.h | 8 +- 3rdparty/7z/src/Lzma2Dec.c | 12 +- 3rdparty/7z/src/Lzma2Dec.h | 15 +- 3rdparty/7z/src/Lzma2DecMt.c | 155 +-- 3rdparty/7z/src/Lzma2DecMt.h | 20 +- 3rdparty/7z/src/Lzma2Enc.c | 174 ++-- 3rdparty/7z/src/Lzma2Enc.h | 20 +- 3rdparty/7z/src/Lzma86.h | 6 +- 3rdparty/7z/src/Lzma86Dec.c | 7 +- 3rdparty/7z/src/Lzma86Enc.c | 7 +- 3rdparty/7z/src/LzmaDec.c | 190 ++-- 3rdparty/7z/src/LzmaDec.h | 17 +- 3rdparty/7z/src/LzmaEnc.c | 380 ++++--- 3rdparty/7z/src/LzmaEnc.h | 23 +- 3rdparty/7z/src/LzmaLib.c | 8 +- 3rdparty/7z/src/LzmaLib.h | 12 +- 3rdparty/7z/src/MtCoder.c | 104 +- 3rdparty/7z/src/MtCoder.h | 50 +- 3rdparty/7z/src/MtDec.c | 101 +- 3rdparty/7z/src/MtDec.h | 34 +- 3rdparty/7z/src/Ppmd.h | 12 +- 3rdparty/7z/src/Ppmd7.c | 186 ++-- 3rdparty/7z/src/Ppmd7.h | 10 +- 3rdparty/7z/src/Ppmd7Dec.c | 57 +- 3rdparty/7z/src/Ppmd7Enc.c | 87 +- 3rdparty/7z/src/Ppmd7aDec.c | 295 ++++++ 3rdparty/7z/src/Ppmd8.c | 1565 +++++++++++++++++++++++++++++ 3rdparty/7z/src/Ppmd8.h | 181 ++++ 3rdparty/7z/src/Ppmd8Dec.c | 295 ++++++ 3rdparty/7z/src/Ppmd8Enc.c | 338 +++++++ 3rdparty/7z/src/Precomp.h | 6 +- 3rdparty/7z/src/RotateDefs.h | 28 +- 3rdparty/7z/src/Sha1.c | 498 +++++++++ 3rdparty/7z/src/Sha1.h | 76 ++ 3rdparty/7z/src/Sha1Opt.c | 386 +++++++ 3rdparty/7z/src/Sha256.c | 146 +-- 3rdparty/7z/src/Sha256.h | 12 +- 3rdparty/7z/src/Sha256Opt.c | 129 +-- 3rdparty/7z/src/Sort.h | 6 +- 3rdparty/7z/src/SwapBytes.c | 800 +++++++++++++++ 3rdparty/7z/src/SwapBytes.h | 17 + 3rdparty/7z/src/Threads.c | 86 +- 3rdparty/7z/src/Threads.h | 50 +- 3rdparty/7z/src/Xz.c | 4 +- 3rdparty/7z/src/Xz.h | 56 +- 3rdparty/7z/src/XzCrc64.c | 32 +- 3rdparty/7z/src/XzCrc64.h | 12 +- 3rdparty/7z/src/XzCrc64Opt.c | 28 +- 3rdparty/7z/src/XzDec.c | 300 +++--- 3rdparty/7z/src/XzEnc.c | 270 ++--- 3rdparty/7z/src/XzEnc.h | 23 +- 3rdparty/7z/src/XzIn.c | 75 +- 100 files changed, 11232 insertions(+), 3906 deletions(-) create mode 100644 3rdparty/7z/src/7zWindows.h create mode 100644 3rdparty/7z/src/Blake2.h create mode 100644 3rdparty/7z/src/Blake2s.c create mode 100644 3rdparty/7z/src/BwtSort.c create mode 100644 3rdparty/7z/src/BwtSort.h create mode 100644 3rdparty/7z/src/HuffEnc.c create mode 100644 3rdparty/7z/src/HuffEnc.h create mode 100644 3rdparty/7z/src/Ppmd7aDec.c create mode 100644 3rdparty/7z/src/Ppmd8.c create mode 100644 3rdparty/7z/src/Ppmd8.h create mode 100644 3rdparty/7z/src/Ppmd8Dec.c create mode 100644 3rdparty/7z/src/Ppmd8Enc.c create mode 100644 3rdparty/7z/src/Sha1.c create mode 100644 3rdparty/7z/src/Sha1.h create mode 100644 3rdparty/7z/src/Sha1Opt.c create mode 100644 3rdparty/7z/src/SwapBytes.c create mode 100644 3rdparty/7z/src/SwapBytes.h diff --git a/3rdparty/7z/7zlib.vcxproj b/3rdparty/7z/7zlib.vcxproj index db3421ee9c..f39c506b64 100644 --- a/3rdparty/7z/7zlib.vcxproj +++ b/3rdparty/7z/7zlib.vcxproj @@ -26,14 +26,18 @@ + + + + @@ -48,10 +52,13 @@ + + + @@ -72,14 +79,18 @@ + + + + @@ -91,10 +102,18 @@ + + + + + + + + diff --git a/3rdparty/7z/7zlib.vcxproj.filters b/3rdparty/7z/7zlib.vcxproj.filters index 59e30f6cff..9ff9afe990 100644 --- a/3rdparty/7z/7zlib.vcxproj.filters +++ b/3rdparty/7z/7zlib.vcxproj.filters @@ -1,264 +1,107 @@  - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hh;hpp;hxx;hm;inl;inc;ipp;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/3rdparty/7z/CMakeLists.txt b/3rdparty/7z/CMakeLists.txt index cd9094f301..fb77d6773c 100644 --- a/3rdparty/7z/CMakeLists.txt +++ b/3rdparty/7z/CMakeLists.txt @@ -1,50 +1,62 @@ # 7z sdk if(WIN32) add_library(3rdparty_7z STATIC EXCLUDE_FROM_ALL - src/Sha256.c - src/Ppmd7Dec.c - src/XzDec.c - src/XzEnc.c - src/Lzma2Dec.c - src/XzCrc64.c - src/DllSecur.c - src/Lzma2DecMt.c - src/BraIA64.c + src/7zAlloc.c + src/7zArcIn.c + src/7zBuf.c + src/7zBuf2.c + src/7zCrc.c + src/7zCrcOpt.c + src/7zDec.c src/7zFile.c src/7zStream.c - src/Lzma86Enc.c - src/Threads.c - src/7zAlloc.c - src/LzmaEnc.c - src/MtCoder.c - src/Lzma86Dec.c - src/Sort.c - src/LzFindMt.c - src/7zDec.c - src/Bcj2.c - src/Ppmd7.c - src/Bra86.c - src/Bcj2Enc.c - src/7zBuf.c src/Aes.c src/AesOpt.c - src/XzCrc64Opt.c - src/7zArcIn.c - src/Lzma2Enc.c + src/Alloc.c + src/Bcj2.c + src/Bcj2Enc.c + src/Blake2s.c src/Bra.c - src/7zCrcOpt.c - src/7zBuf2.c - src/LzFind.c - src/Ppmd7Enc.c + src/Bra86.c + src/BraIA64.c + src/BwtSort.c src/CpuArch.c src/Delta.c - src/XzIn.c - src/Alloc.c - src/Xz.c + src/DllSecur.c + src/HuffEnc.c + src/LzFind.c + src/LzFindMt.c + src/LzFindOpt.c + src/Lzma2Dec.c + src/Lzma2DecMt.c + src/Lzma2Enc.c + src/Lzma86Dec.c + src/Lzma86Enc.c src/LzmaDec.c + src/LzmaEnc.c src/LzmaLib.c - src/7zCrc.c - src/MtDec.c) + src/MtCoder.c + src/MtDec.c + src/Ppmd7.c + src/Ppmd7aDec.c + src/Ppmd7Dec.c + src/Ppmd7Enc.c + src/Ppmd8.c + src/Ppmd8Dec.c + src/Ppmd8Enc.c + src/Sha1.c + src/Sha1Opt.c + src/Sha256.c + src/Sha256Opt.c + src/Sort.c + src/SwapBytes.c + src/Threads.c + src/Xz.c + src/XzCrc64.c + src/XzCrc64Opt.c + src/XzDec.c + src/XzEnc.c + src/XzIn.c) target_include_directories(3rdparty_7z INTERFACE $ $) diff --git a/3rdparty/7z/src/7z.h b/3rdparty/7z/src/7z.h index 969523cd3e..b42405cc22 100644 --- a/3rdparty/7z/src/7z.h +++ b/3rdparty/7z/src/7z.h @@ -1,8 +1,8 @@ /* 7z.h -- 7z interface -2018-07-02 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_H -#define __7Z_H +#ifndef ZIP7_INC_7Z_H +#define ZIP7_INC_7Z_H #include "7zTypes.h" @@ -98,7 +98,7 @@ typedef struct UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex); SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, - ILookInStream *stream, UInt64 startPos, + ILookInStreamPtr stream, UInt64 startPos, Byte *outBuffer, size_t outSize, ISzAllocPtr allocMain); @@ -174,7 +174,7 @@ UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 SRes SzArEx_Extract( const CSzArEx *db, - ILookInStream *inStream, + ILookInStreamPtr inStream, UInt32 fileIndex, /* index of file */ UInt32 *blockIndex, /* index of solid block */ Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */ @@ -196,7 +196,7 @@ SZ_ERROR_INPUT_EOF SZ_ERROR_FAIL */ -SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream, +SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream, ISzAllocPtr allocMain, ISzAllocPtr allocTemp); EXTERN_C_END diff --git a/3rdparty/7z/src/7zAlloc.c b/3rdparty/7z/src/7zAlloc.c index ea32809c65..3a8e2cbf44 100644 --- a/3rdparty/7z/src/7zAlloc.c +++ b/3rdparty/7z/src/7zAlloc.c @@ -1,5 +1,5 @@ -/* 7zAlloc.c -- Allocation functions -2017-04-03 : Igor Pavlov : Public domain */ +/* 7zAlloc.c -- Allocation functions for 7z processing +2023-03-04 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,74 +7,83 @@ #include "7zAlloc.h" -/* #define _SZ_ALLOC_DEBUG */ -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ +/* #define SZ_ALLOC_DEBUG */ +/* use SZ_ALLOC_DEBUG to debug alloc/free operations */ -#ifdef _SZ_ALLOC_DEBUG +#ifdef SZ_ALLOC_DEBUG +/* #ifdef _WIN32 -#include +#include "7zWindows.h" #endif +*/ #include -int g_allocCount = 0; -int g_allocCountTemp = 0; +static int g_allocCount = 0; +static int g_allocCountTemp = 0; +static void Print_Alloc(const char *s, size_t size, int *counter) +{ + const unsigned size2 = (unsigned)size; + fprintf(stderr, "\n%s count = %10d : %10u bytes; ", s, *counter, size2); + (*counter)++; +} +static void Print_Free(const char *s, int *counter) +{ + (*counter)--; + fprintf(stderr, "\n%s count = %10d", s, *counter); +} #endif void *SzAlloc(ISzAllocPtr p, size_t size) { - UNUSED_VAR(p); + UNUSED_VAR(p) if (size == 0) return 0; - #ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc %10u bytes; count = %10d", (unsigned)size, g_allocCount); - g_allocCount++; + #ifdef SZ_ALLOC_DEBUG + Print_Alloc("Alloc", size, &g_allocCount); #endif return malloc(size); } void SzFree(ISzAllocPtr p, void *address) { - UNUSED_VAR(p); - #ifdef _SZ_ALLOC_DEBUG - if (address != 0) - { - g_allocCount--; - fprintf(stderr, "\nFree; count = %10d", g_allocCount); - } + UNUSED_VAR(p) + #ifdef SZ_ALLOC_DEBUG + if (address) + Print_Free("Free ", &g_allocCount); #endif free(address); } void *SzAllocTemp(ISzAllocPtr p, size_t size) { - UNUSED_VAR(p); + UNUSED_VAR(p) if (size == 0) return 0; - #ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_temp %10u bytes; count = %10d", (unsigned)size, g_allocCountTemp); - g_allocCountTemp++; + #ifdef SZ_ALLOC_DEBUG + Print_Alloc("Alloc_temp", size, &g_allocCountTemp); + /* #ifdef _WIN32 return HeapAlloc(GetProcessHeap(), 0, size); #endif + */ #endif return malloc(size); } void SzFreeTemp(ISzAllocPtr p, void *address) { - UNUSED_VAR(p); - #ifdef _SZ_ALLOC_DEBUG - if (address != 0) - { - g_allocCountTemp--; - fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp); - } + UNUSED_VAR(p) + #ifdef SZ_ALLOC_DEBUG + if (address) + Print_Free("Free_temp ", &g_allocCountTemp); + /* #ifdef _WIN32 HeapFree(GetProcessHeap(), 0, address); return; #endif + */ #endif free(address); } diff --git a/3rdparty/7z/src/7zAlloc.h b/3rdparty/7z/src/7zAlloc.h index c0f89d73cb..bedd125c0a 100644 --- a/3rdparty/7z/src/7zAlloc.h +++ b/3rdparty/7z/src/7zAlloc.h @@ -1,8 +1,8 @@ /* 7zAlloc.h -- Allocation functions -2017-04-03 : Igor Pavlov : Public domain */ +2023-03-04 : Igor Pavlov : Public domain */ -#ifndef __7Z_ALLOC_H -#define __7Z_ALLOC_H +#ifndef ZIP7_INC_7Z_ALLOC_H +#define ZIP7_INC_7Z_ALLOC_H #include "7zTypes.h" diff --git a/3rdparty/7z/src/7zArcIn.c b/3rdparty/7z/src/7zArcIn.c index 7ccc721012..951b7df45e 100644 --- a/3rdparty/7z/src/7zArcIn.c +++ b/3rdparty/7z/src/7zArcIn.c @@ -1,5 +1,5 @@ /* 7zArcIn.c -- 7z Input functions -2021-02-09 : Igor Pavlov : Public domain */ +2023-05-11 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -10,10 +10,11 @@ #include "7zCrc.h" #include "CpuArch.h" -#define MY_ALLOC(T, p, size, alloc) { \ - if ((p = (T *)ISzAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; } +#define MY_ALLOC(T, p, size, alloc) \ + { if ((p = (T *)ISzAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; } -#define MY_ALLOC_ZE(T, p, size, alloc) { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) } +#define MY_ALLOC_ZE(T, p, size, alloc) \ + { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) } #define MY_ALLOC_AND_CPY(to, size, from, alloc) \ { MY_ALLOC(Byte, to, size, alloc); memcpy(to, from, size); } @@ -58,7 +59,7 @@ enum EIdEnum const Byte k7zSignature[k7zSignatureSize] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; -#define SzBitUi32s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; } +#define SzBitUi32s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; } static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc) { @@ -69,8 +70,8 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc) } else { - MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc); - MY_ALLOC(UInt32, p->Vals, num, alloc); + MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc) + MY_ALLOC(UInt32, p->Vals, num, alloc) } return SZ_OK; } @@ -81,7 +82,7 @@ static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; } -#define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; } +#define SzBitUi64s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; } static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) { @@ -96,7 +97,7 @@ static void SzAr_Init(CSzAr *p) p->NumFolders = 0; p->PackPositions = NULL; - SzBitUi32s_Init(&p->FolderCRCs); + SzBitUi32s_INIT(&p->FolderCRCs) p->FoCodersOffsets = NULL; p->FoStartPackStreamIndex = NULL; @@ -142,11 +143,11 @@ void SzArEx_Init(CSzArEx *p) p->FileNameOffsets = NULL; p->FileNames = NULL; - SzBitUi32s_Init(&p->CRCs); - SzBitUi32s_Init(&p->Attribs); - // SzBitUi32s_Init(&p->Parents); - SzBitUi64s_Init(&p->MTime); - SzBitUi64s_Init(&p->CTime); + SzBitUi32s_INIT(&p->CRCs) + SzBitUi32s_INIT(&p->Attribs) + // SzBitUi32s_INIT(&p->Parents) + SzBitUi64s_INIT(&p->MTime) + SzBitUi64s_INIT(&p->CTime) } void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc) @@ -180,11 +181,20 @@ static int TestSignatureCandidate(const Byte *testBytes) return 1; } -#define SzData_Clear(p) { (p)->Data = NULL; (p)->Size = 0; } +#define SzData_CLEAR(p) { (p)->Data = NULL; (p)->Size = 0; } + +#define SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) \ + (_sd_)->Size--; dest = *(_sd_)->Data++; + +#define SZ_READ_BYTE_SD(_sd_, dest) \ + if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; \ + SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) -#define SZ_READ_BYTE_SD(_sd_, dest) if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; (_sd_)->Size--; dest = *(_sd_)->Data++; #define SZ_READ_BYTE(dest) SZ_READ_BYTE_SD(sd, dest) -#define SZ_READ_BYTE_2(dest) if (sd.Size == 0) return SZ_ERROR_ARCHIVE; sd.Size--; dest = *sd.Data++; + +#define SZ_READ_BYTE_2(dest) \ + if (sd.Size == 0) return SZ_ERROR_ARCHIVE; \ + sd.Size--; dest = *sd.Data++; #define SKIP_DATA(sd, size) { sd->Size -= (size_t)(size); sd->Data += (size_t)(size); } #define SKIP_DATA2(sd, size) { sd.Size -= (size_t)(size); sd.Data += (size_t)(size); } @@ -192,25 +202,25 @@ static int TestSignatureCandidate(const Byte *testBytes) #define SZ_READ_32(dest) if (sd.Size < 4) return SZ_ERROR_ARCHIVE; \ dest = GetUi32(sd.Data); SKIP_DATA2(sd, 4); -static MY_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value) +static Z7_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value) { Byte firstByte, mask; unsigned i; UInt32 v; - SZ_READ_BYTE(firstByte); + SZ_READ_BYTE(firstByte) if ((firstByte & 0x80) == 0) { *value = firstByte; return SZ_OK; } - SZ_READ_BYTE(v); + SZ_READ_BYTE(v) if ((firstByte & 0x40) == 0) { *value = (((UInt32)firstByte & 0x3F) << 8) | v; return SZ_OK; } - SZ_READ_BYTE(mask); + SZ_READ_BYTE(mask) *value = v | ((UInt32)mask << 8); mask = 0x20; for (i = 2; i < 8; i++) @@ -218,11 +228,11 @@ static MY_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value) Byte b; if ((firstByte & mask) == 0) { - UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1); + const UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1); *value |= (highPart << (8 * i)); return SZ_OK; } - SZ_READ_BYTE(b); + SZ_READ_BYTE(b) *value |= ((UInt64)b << (8 * i)); mask >>= 1; } @@ -230,7 +240,7 @@ static MY_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value) } -static MY_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value) +static Z7_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value) { Byte firstByte; UInt64 value64; @@ -244,7 +254,7 @@ static MY_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value) sd->Size--; return SZ_OK; } - RINOK(ReadNumber(sd, &value64)); + RINOK(ReadNumber(sd, &value64)) if (value64 >= (UInt32)0x80000000 - 1) return SZ_ERROR_UNSUPPORTED; if (value64 >= ((UInt64)(1) << ((sizeof(size_t) - 1) * 8 + 4))) @@ -258,10 +268,10 @@ static MY_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value) static SRes SkipData(CSzData *sd) { UInt64 size; - RINOK(ReadNumber(sd, &size)); + RINOK(ReadNumber(sd, &size)) if (size > sd->Size) return SZ_ERROR_ARCHIVE; - SKIP_DATA(sd, size); + SKIP_DATA(sd, size) return SZ_OK; } @@ -270,22 +280,22 @@ static SRes WaitId(CSzData *sd, UInt32 id) for (;;) { UInt64 type; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) if (type == id) return SZ_OK; if (type == k7zIdEnd) return SZ_ERROR_ARCHIVE; - RINOK(SkipData(sd)); + RINOK(SkipData(sd)) } } static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v) { - UInt32 numBytes = (numItems + 7) >> 3; + const UInt32 numBytes = (numItems + 7) >> 3; if (numBytes > sd->Size) return SZ_ERROR_ARCHIVE; *v = sd->Data; - SKIP_DATA(sd, numBytes); + SKIP_DATA(sd, numBytes) return SZ_OK; } @@ -307,48 +317,48 @@ static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems) return sum; } -static MY_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAllocPtr alloc) +static Z7_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAllocPtr alloc) { Byte allAreDefined; Byte *v2; - UInt32 numBytes = (numItems + 7) >> 3; + const UInt32 numBytes = (numItems + 7) >> 3; *v = NULL; - SZ_READ_BYTE(allAreDefined); + SZ_READ_BYTE(allAreDefined) if (numBytes == 0) return SZ_OK; if (allAreDefined == 0) { if (numBytes > sd->Size) return SZ_ERROR_ARCHIVE; - MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc); - SKIP_DATA(sd, numBytes); + MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc) + SKIP_DATA(sd, numBytes) return SZ_OK; } - MY_ALLOC(Byte, *v, numBytes, alloc); + MY_ALLOC(Byte, *v, numBytes, alloc) v2 = *v; memset(v2, 0xFF, (size_t)numBytes); { - unsigned numBits = (unsigned)numItems & 7; + const unsigned numBits = (unsigned)numItems & 7; if (numBits != 0) v2[(size_t)numBytes - 1] = (Byte)((((UInt32)1 << numBits) - 1) << (8 - numBits)); } return SZ_OK; } -static MY_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) +static Z7_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) { UInt32 i; CSzData sd; UInt32 *vals; const Byte *defs; - MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc); + MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc) sd = *sd2; defs = crcs->Defs; vals = crcs->Vals; for (i = 0; i < numItems; i++) if (SzBitArray_Check(defs, i)) { - SZ_READ_32(vals[i]); + SZ_READ_32(vals[i]) } else vals[i] = 0; @@ -359,7 +369,7 @@ static MY_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *c static SRes ReadBitUi32s(CSzData *sd, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) { SzBitUi32s_Free(crcs, alloc); - RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc)); + RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc)) return ReadUi32s(sd, numItems, crcs, alloc); } @@ -367,36 +377,36 @@ static SRes SkipBitUi32s(CSzData *sd, UInt32 numItems) { Byte allAreDefined; UInt32 numDefined = numItems; - SZ_READ_BYTE(allAreDefined); + SZ_READ_BYTE(allAreDefined) if (!allAreDefined) { - size_t numBytes = (numItems + 7) >> 3; + const size_t numBytes = (numItems + 7) >> 3; if (numBytes > sd->Size) return SZ_ERROR_ARCHIVE; numDefined = CountDefinedBits(sd->Data, numItems); - SKIP_DATA(sd, numBytes); + SKIP_DATA(sd, numBytes) } if (numDefined > (sd->Size >> 2)) return SZ_ERROR_ARCHIVE; - SKIP_DATA(sd, (size_t)numDefined * 4); + SKIP_DATA(sd, (size_t)numDefined * 4) return SZ_OK; } static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAllocPtr alloc) { - RINOK(SzReadNumber32(sd, &p->NumPackStreams)); + RINOK(SzReadNumber32(sd, &p->NumPackStreams)) - RINOK(WaitId(sd, k7zIdSize)); - MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc); + RINOK(WaitId(sd, k7zIdSize)) + MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc) { UInt64 sum = 0; UInt32 i; - UInt32 numPackStreams = p->NumPackStreams; + const UInt32 numPackStreams = p->NumPackStreams; for (i = 0; i < numPackStreams; i++) { UInt64 packSize; p->PackPositions[i] = sum; - RINOK(ReadNumber(sd, &packSize)); + RINOK(ReadNumber(sd, &packSize)) sum += packSize; if (sum < packSize) return SZ_ERROR_ARCHIVE; @@ -407,16 +417,16 @@ static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAllocPtr alloc) for (;;) { UInt64 type; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) if (type == k7zIdEnd) return SZ_OK; if (type == k7zIdCRC) { /* CRC of packed streams is unused now */ - RINOK(SkipBitUi32s(sd, p->NumPackStreams)); + RINOK(SkipBitUi32s(sd, p->NumPackStreams)) continue; } - RINOK(SkipData(sd)); + RINOK(SkipData(sd)) } } @@ -442,7 +452,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) f->NumPackStreams = 0; f->UnpackStream = 0; - RINOK(SzReadNumber32(sd, &numCoders)); + RINOK(SzReadNumber32(sd, &numCoders)) if (numCoders == 0 || numCoders > SZ_NUM_CODERS_IN_FOLDER_MAX) return SZ_ERROR_UNSUPPORTED; @@ -453,7 +463,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) unsigned idSize, j; UInt64 id; - SZ_READ_BYTE(mainByte); + SZ_READ_BYTE(mainByte) if ((mainByte & 0xC0) != 0) return SZ_ERROR_UNSUPPORTED; @@ -481,12 +491,12 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) { UInt32 numStreams; - RINOK(SzReadNumber32(sd, &numStreams)); + RINOK(SzReadNumber32(sd, &numStreams)) if (numStreams > k_NumCodersStreams_in_Folder_MAX) return SZ_ERROR_UNSUPPORTED; coder->NumStreams = (Byte)numStreams; - RINOK(SzReadNumber32(sd, &numStreams)); + RINOK(SzReadNumber32(sd, &numStreams)) if (numStreams != 1) return SZ_ERROR_UNSUPPORTED; } @@ -499,7 +509,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) if ((mainByte & 0x20) != 0) { UInt32 propsSize = 0; - RINOK(SzReadNumber32(sd, &propsSize)); + RINOK(SzReadNumber32(sd, &propsSize)) if (propsSize > sd->Size) return SZ_ERROR_ARCHIVE; if (propsSize >= 0x80) @@ -549,12 +559,12 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) { CSzBond *bp = f->Bonds + i; - RINOK(SzReadNumber32(sd, &bp->InIndex)); + RINOK(SzReadNumber32(sd, &bp->InIndex)) if (bp->InIndex >= numInStreams || streamUsed[bp->InIndex]) return SZ_ERROR_ARCHIVE; streamUsed[bp->InIndex] = True; - RINOK(SzReadNumber32(sd, &bp->OutIndex)); + RINOK(SzReadNumber32(sd, &bp->OutIndex)) if (bp->OutIndex >= numCoders || coderUsed[bp->OutIndex]) return SZ_ERROR_ARCHIVE; coderUsed[bp->OutIndex] = True; @@ -584,7 +594,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) for (i = 0; i < numPackStreams; i++) { UInt32 index; - RINOK(SzReadNumber32(sd, &index)); + RINOK(SzReadNumber32(sd, &index)) if (index >= numInStreams || streamUsed[index]) return SZ_ERROR_ARCHIVE; streamUsed[index] = True; @@ -598,7 +608,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) } -static MY_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num) +static Z7_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num) { CSzData sd; sd = *sd2; @@ -606,7 +616,7 @@ static MY_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num) { Byte firstByte, mask; unsigned i; - SZ_READ_BYTE_2(firstByte); + SZ_READ_BYTE_2(firstByte) if ((firstByte & 0x80) == 0) continue; if ((firstByte & 0x40) == 0) @@ -622,7 +632,7 @@ static MY_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num) mask >>= 1; if (i > sd.Size) return SZ_ERROR_ARCHIVE; - SKIP_DATA2(sd, i); + SKIP_DATA2(sd, i) } *sd2 = sd; return SZ_OK; @@ -645,30 +655,30 @@ static SRes ReadUnpackInfo(CSzAr *p, const Byte *startBufPtr; Byte external; - RINOK(WaitId(sd2, k7zIdFolder)); + RINOK(WaitId(sd2, k7zIdFolder)) - RINOK(SzReadNumber32(sd2, &numFolders)); + RINOK(SzReadNumber32(sd2, &numFolders)) if (numFolders > numFoldersMax) return SZ_ERROR_UNSUPPORTED; p->NumFolders = numFolders; - SZ_READ_BYTE_SD(sd2, external); + SZ_READ_BYTE_SD(sd2, external) if (external == 0) sd = *sd2; else { UInt32 index; - RINOK(SzReadNumber32(sd2, &index)); + RINOK(SzReadNumber32(sd2, &index)) if (index >= numTempBufs) return SZ_ERROR_ARCHIVE; sd.Data = tempBufs[index].data; sd.Size = tempBufs[index].size; } - MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc); - MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc); - MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc); - MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc); + MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc) + MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc) + MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc) + MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc) startBufPtr = sd.Data; @@ -681,7 +691,7 @@ static SRes ReadUnpackInfo(CSzAr *p, p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr); - RINOK(SzReadNumber32(&sd, &numCoders)); + RINOK(SzReadNumber32(&sd, &numCoders)) if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX) return SZ_ERROR_UNSUPPORTED; @@ -691,7 +701,7 @@ static SRes ReadUnpackInfo(CSzAr *p, unsigned idSize; UInt32 coderInStreams; - SZ_READ_BYTE_2(mainByte); + SZ_READ_BYTE_2(mainByte) if ((mainByte & 0xC0) != 0) return SZ_ERROR_UNSUPPORTED; idSize = (mainByte & 0xF); @@ -699,15 +709,15 @@ static SRes ReadUnpackInfo(CSzAr *p, return SZ_ERROR_UNSUPPORTED; if (idSize > sd.Size) return SZ_ERROR_ARCHIVE; - SKIP_DATA2(sd, idSize); + SKIP_DATA2(sd, idSize) coderInStreams = 1; if ((mainByte & 0x10) != 0) { UInt32 coderOutStreams; - RINOK(SzReadNumber32(&sd, &coderInStreams)); - RINOK(SzReadNumber32(&sd, &coderOutStreams)); + RINOK(SzReadNumber32(&sd, &coderInStreams)) + RINOK(SzReadNumber32(&sd, &coderOutStreams)) if (coderInStreams > k_Scan_NumCodersStreams_in_Folder_MAX || coderOutStreams != 1) return SZ_ERROR_UNSUPPORTED; } @@ -717,10 +727,10 @@ static SRes ReadUnpackInfo(CSzAr *p, if ((mainByte & 0x20) != 0) { UInt32 propsSize; - RINOK(SzReadNumber32(&sd, &propsSize)); + RINOK(SzReadNumber32(&sd, &propsSize)) if (propsSize > sd.Size) return SZ_ERROR_ARCHIVE; - SKIP_DATA2(sd, propsSize); + SKIP_DATA2(sd, propsSize) } } @@ -734,7 +744,7 @@ static SRes ReadUnpackInfo(CSzAr *p, Byte coderUsed[k_Scan_NumCoders_MAX]; UInt32 i; - UInt32 numBonds = numCoders - 1; + const UInt32 numBonds = numCoders - 1; if (numInStreams < numBonds) return SZ_ERROR_ARCHIVE; @@ -750,12 +760,12 @@ static SRes ReadUnpackInfo(CSzAr *p, { UInt32 index; - RINOK(SzReadNumber32(&sd, &index)); + RINOK(SzReadNumber32(&sd, &index)) if (index >= numInStreams || streamUsed[index]) return SZ_ERROR_ARCHIVE; streamUsed[index] = True; - RINOK(SzReadNumber32(&sd, &index)); + RINOK(SzReadNumber32(&sd, &index)) if (index >= numCoders || coderUsed[index]) return SZ_ERROR_ARCHIVE; coderUsed[index] = True; @@ -767,7 +777,7 @@ static SRes ReadUnpackInfo(CSzAr *p, for (i = 0; i < numPackStreams; i++) { UInt32 index; - RINOK(SzReadNumber32(&sd, &index)); + RINOK(SzReadNumber32(&sd, &index)) if (index >= numInStreams || streamUsed[index]) return SZ_ERROR_ARCHIVE; streamUsed[index] = True; @@ -802,7 +812,7 @@ static SRes ReadUnpackInfo(CSzAr *p, const size_t dataSize = (size_t)(sd.Data - startBufPtr); p->FoStartPackStreamIndex[fo] = packStreamIndex; p->FoCodersOffsets[fo] = dataSize; - MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc); + MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc) } if (external != 0) @@ -812,21 +822,21 @@ static SRes ReadUnpackInfo(CSzAr *p, sd = *sd2; } - RINOK(WaitId(&sd, k7zIdCodersUnpackSize)); + RINOK(WaitId(&sd, k7zIdCodersUnpackSize)) - MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc); + MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc) { UInt32 i; for (i = 0; i < numCodersOutStreams; i++) { - RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i)); + RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i)) } } for (;;) { UInt64 type; - RINOK(ReadID(&sd, &type)); + RINOK(ReadID(&sd, &type)) if (type == k7zIdEnd) { *sd2 = sd; @@ -834,10 +844,10 @@ static SRes ReadUnpackInfo(CSzAr *p, } if (type == k7zIdCRC) { - RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc)); + RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc)) continue; } - RINOK(SkipData(&sd)); + RINOK(SkipData(&sd)) } } @@ -862,13 +872,13 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) { UInt64 type = 0; UInt32 numSubDigests = 0; - UInt32 numFolders = p->NumFolders; + const UInt32 numFolders = p->NumFolders; UInt32 numUnpackStreams = numFolders; UInt32 numUnpackSizesInData = 0; for (;;) { - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) if (type == k7zIdNumUnpackStream) { UInt32 i; @@ -878,7 +888,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) for (i = 0; i < numFolders; i++) { UInt32 numStreams; - RINOK(SzReadNumber32(sd, &numStreams)); + RINOK(SzReadNumber32(sd, &numStreams)) if (numUnpackStreams > numUnpackStreams + numStreams) return SZ_ERROR_UNSUPPORTED; numUnpackStreams += numStreams; @@ -892,7 +902,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) } if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd) break; - RINOK(SkipData(sd)); + RINOK(SkipData(sd)) } if (!ssi->sdNumSubStreams.Data) @@ -908,9 +918,9 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) if (type == k7zIdSize) { ssi->sdSizes.Data = sd->Data; - RINOK(SkipNumbers(sd, numUnpackSizesInData)); + RINOK(SkipNumbers(sd, numUnpackSizesInData)) ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data); - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) } for (;;) @@ -920,14 +930,14 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) if (type == k7zIdCRC) { ssi->sdCRCs.Data = sd->Data; - RINOK(SkipBitUi32s(sd, numSubDigests)); + RINOK(SkipBitUi32s(sd, numSubDigests)) ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data); } else { - RINOK(SkipData(sd)); + RINOK(SkipData(sd)) } - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) } } @@ -940,31 +950,31 @@ static SRes SzReadStreamsInfo(CSzAr *p, { UInt64 type; - SzData_Clear(&ssi->sdSizes); - SzData_Clear(&ssi->sdCRCs); - SzData_Clear(&ssi->sdNumSubStreams); + SzData_CLEAR(&ssi->sdSizes) + SzData_CLEAR(&ssi->sdCRCs) + SzData_CLEAR(&ssi->sdNumSubStreams) *dataOffset = 0; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) if (type == k7zIdPackInfo) { - RINOK(ReadNumber(sd, dataOffset)); + RINOK(ReadNumber(sd, dataOffset)) if (*dataOffset > p->RangeLimit) return SZ_ERROR_ARCHIVE; - RINOK(ReadPackInfo(p, sd, alloc)); + RINOK(ReadPackInfo(p, sd, alloc)) if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset) return SZ_ERROR_ARCHIVE; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) } if (type == k7zIdUnpackInfo) { - RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc)); - RINOK(ReadID(sd, &type)); + RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc)) + RINOK(ReadID(sd, &type)) } if (type == k7zIdSubStreamsInfo) { - RINOK(ReadSubStreamsInfo(p, sd, ssi)); - RINOK(ReadID(sd, &type)); + RINOK(ReadSubStreamsInfo(p, sd, ssi)) + RINOK(ReadID(sd, &type)) } else { @@ -976,7 +986,7 @@ static SRes SzReadStreamsInfo(CSzAr *p, } static SRes SzReadAndDecodePackedStreams( - ILookInStream *inStream, + ILookInStreamPtr inStream, CSzData *sd, CBuf *tempBufs, UInt32 numFoldersMax, @@ -988,7 +998,7 @@ static SRes SzReadAndDecodePackedStreams( UInt32 fo; CSubStreamInfo ssi; - RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp)); + RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp)) dataStartPos += baseOffset; if (p->NumFolders == 0) @@ -1000,7 +1010,7 @@ static SRes SzReadAndDecodePackedStreams( for (fo = 0; fo < p->NumFolders; fo++) { CBuf *tempBuf = tempBufs + fo; - UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo); + const UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo); if ((size_t)unpackSize != unpackSize) return SZ_ERROR_MEM; if (!Buf_Create(tempBuf, (size_t)unpackSize, allocTemp)) @@ -1010,8 +1020,8 @@ static SRes SzReadAndDecodePackedStreams( for (fo = 0; fo < p->NumFolders; fo++) { const CBuf *tempBuf = tempBufs + fo; - RINOK(LookInStream_SeekTo(inStream, dataStartPos)); - RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp)); + RINOK(LookInStream_SeekTo(inStream, dataStartPos)) + RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp)) } return SZ_OK; @@ -1046,7 +1056,7 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; } -static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num, +static Z7_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num, CSzData *sd2, const CBuf *tempBufs, UInt32 numTempBufs, ISzAllocPtr alloc) @@ -1057,22 +1067,22 @@ static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num, Byte *defs; Byte external; - RINOK(ReadBitVector(sd2, num, &p->Defs, alloc)); + RINOK(ReadBitVector(sd2, num, &p->Defs, alloc)) - SZ_READ_BYTE_SD(sd2, external); + SZ_READ_BYTE_SD(sd2, external) if (external == 0) sd = *sd2; else { UInt32 index; - RINOK(SzReadNumber32(sd2, &index)); + RINOK(SzReadNumber32(sd2, &index)) if (index >= numTempBufs) return SZ_ERROR_ARCHIVE; sd.Data = tempBufs[index].data; sd.Size = tempBufs[index].size; } - MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc); + MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc) vals = p->Vals; defs = p->Defs; for (i = 0; i < num; i++) @@ -1082,7 +1092,7 @@ static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num, return SZ_ERROR_ARCHIVE; vals[i].Low = GetUi32(sd.Data); vals[i].High = GetUi32(sd.Data + 4); - SKIP_DATA2(sd, 8); + SKIP_DATA2(sd, 8) } else vals[i].High = vals[i].Low = 0; @@ -1100,7 +1110,7 @@ static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num, static SRes SzReadHeader2( CSzArEx *p, /* allocMain */ CSzData *sd, - ILookInStream *inStream, + ILookInStreamPtr inStream, CBuf *tempBufs, UInt32 *numTempBufs, ISzAllocPtr allocMain, ISzAllocPtr allocTemp @@ -1111,26 +1121,26 @@ static SRes SzReadHeader2( { UInt64 type; - SzData_Clear(&ssi.sdSizes); - SzData_Clear(&ssi.sdCRCs); - SzData_Clear(&ssi.sdNumSubStreams); + SzData_CLEAR(&ssi.sdSizes) + SzData_CLEAR(&ssi.sdCRCs) + SzData_CLEAR(&ssi.sdNumSubStreams) ssi.NumSubDigests = 0; ssi.NumTotalSubStreams = 0; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) if (type == k7zIdArchiveProperties) { for (;;) { UInt64 type2; - RINOK(ReadID(sd, &type2)); + RINOK(ReadID(sd, &type2)) if (type2 == k7zIdEnd) break; - RINOK(SkipData(sd)); + RINOK(SkipData(sd)) } - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) } if (type == k7zIdAdditionalStreamsInfo) @@ -1148,15 +1158,15 @@ static SRes SzReadHeader2( if (res != SZ_OK) return res; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) } if (type == k7zIdMainStreamsInfo) { RINOK(SzReadStreamsInfo(&p->db, sd, (UInt32)1 << 30, tempBufs, *numTempBufs, - &p->dataPos, &ssi, allocMain)); + &p->dataPos, &ssi, allocMain)) p->dataPos += p->startPosAfterHeader; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) } if (type == k7zIdEnd) @@ -1174,23 +1184,23 @@ static SRes SzReadHeader2( const Byte *emptyStreams = NULL; const Byte *emptyFiles = NULL; - RINOK(SzReadNumber32(sd, &numFiles)); + RINOK(SzReadNumber32(sd, &numFiles)) p->NumFiles = numFiles; for (;;) { UInt64 type; UInt64 size; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) if (type == k7zIdEnd) break; - RINOK(ReadNumber(sd, &size)); + RINOK(ReadNumber(sd, &size)) if (size > sd->Size) return SZ_ERROR_ARCHIVE; if (type >= ((UInt32)1 << 8)) { - SKIP_DATA(sd, size); + SKIP_DATA(sd, size) } else switch ((unsigned)type) { @@ -1200,7 +1210,7 @@ static SRes SzReadHeader2( const Byte *namesData; Byte external; - SZ_READ_BYTE(external); + SZ_READ_BYTE(external) if (external == 0) { namesSize = (size_t)size - 1; @@ -1209,7 +1219,7 @@ static SRes SzReadHeader2( else { UInt32 index; - RINOK(SzReadNumber32(sd, &index)); + RINOK(SzReadNumber32(sd, &index)) if (index >= *numTempBufs) return SZ_ERROR_ARCHIVE; namesData = (tempBufs)[index].data; @@ -1218,25 +1228,25 @@ static SRes SzReadHeader2( if ((namesSize & 1) != 0) return SZ_ERROR_ARCHIVE; - MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain); - MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain); + MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain) + MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain) RINOK(SzReadFileNames(p->FileNames, namesSize, numFiles, p->FileNameOffsets)) if (external == 0) { - SKIP_DATA(sd, namesSize); + SKIP_DATA(sd, namesSize) } break; } case k7zIdEmptyStream: { - RINOK(RememberBitVector(sd, numFiles, &emptyStreams)); + RINOK(RememberBitVector(sd, numFiles, &emptyStreams)) numEmptyStreams = CountDefinedBits(emptyStreams, numFiles); emptyFiles = NULL; break; } case k7zIdEmptyFile: { - RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles)); + RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles)) break; } case k7zIdWinAttrib: @@ -1245,22 +1255,22 @@ static SRes SzReadHeader2( CSzData sdSwitch; CSzData *sdPtr; SzBitUi32s_Free(&p->Attribs, allocMain); - RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain)); + RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain)) - SZ_READ_BYTE(external); + SZ_READ_BYTE(external) if (external == 0) sdPtr = sd; else { UInt32 index; - RINOK(SzReadNumber32(sd, &index)); + RINOK(SzReadNumber32(sd, &index)) if (index >= *numTempBufs) return SZ_ERROR_ARCHIVE; sdSwitch.Data = (tempBufs)[index].data; sdSwitch.Size = (tempBufs)[index].size; sdPtr = &sdSwitch; } - RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain)); + RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain)) break; } /* @@ -1273,11 +1283,11 @@ static SRes SzReadHeader2( break; } */ - case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)); break; - case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)); break; + case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break; + case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break; default: { - SKIP_DATA(sd, size); + SKIP_DATA(sd, size) } } } @@ -1288,10 +1298,10 @@ static SRes SzReadHeader2( for (;;) { UInt64 type; - RINOK(ReadID(sd, &type)); + RINOK(ReadID(sd, &type)) if (type == k7zIdEnd) break; - RINOK(SkipData(sd)); + RINOK(SkipData(sd)) } { @@ -1303,40 +1313,37 @@ static SRes SzReadHeader2( UInt64 unpackPos = 0; const Byte *digestsDefs = NULL; const Byte *digestsVals = NULL; - UInt32 digestsValsIndex = 0; - UInt32 digestIndex; - Byte allDigestsDefined = 0; + UInt32 digestIndex = 0; Byte isDirMask = 0; Byte crcMask = 0; Byte mask = 0x80; - MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain); - MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain); - MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain); - MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain); + MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain) + MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain) + MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain) + MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain) - RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain)); + RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain)) if (ssi.sdCRCs.Size != 0) { - SZ_READ_BYTE_SD(&ssi.sdCRCs, allDigestsDefined); + Byte allDigestsDefined = 0; + SZ_READ_BYTE_SD_NOCHECK(&ssi.sdCRCs, allDigestsDefined) if (allDigestsDefined) digestsVals = ssi.sdCRCs.Data; else { - size_t numBytes = (ssi.NumSubDigests + 7) >> 3; + const size_t numBytes = (ssi.NumSubDigests + 7) >> 3; digestsDefs = ssi.sdCRCs.Data; digestsVals = digestsDefs + numBytes; } } - digestIndex = 0; - for (i = 0; i < numFiles; i++, mask >>= 1) { if (mask == 0) { - UInt32 byteIndex = (i - 1) >> 3; + const UInt32 byteIndex = (i - 1) >> 3; p->IsDirs[byteIndex] = isDirMask; p->CRCs.Defs[byteIndex] = crcMask; isDirMask = 0; @@ -1374,18 +1381,17 @@ static SRes SzReadHeader2( numSubStreams = 1; if (ssi.sdNumSubStreams.Data) { - RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)); + RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)) } remSubStreams = numSubStreams; if (numSubStreams != 0) break; { - UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); unpackPos += folderUnpackSize; if (unpackPos < folderUnpackSize) return SZ_ERROR_ARCHIVE; } - folderIndex++; } } @@ -1397,47 +1403,44 @@ static SRes SzReadHeader2( if (--remSubStreams == 0) { - UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); - UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]]; + const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + const UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]]; if (folderUnpackSize < unpackPos - startFolderUnpackPos) return SZ_ERROR_ARCHIVE; unpackPos = startFolderUnpackPos + folderUnpackSize; if (unpackPos < folderUnpackSize) return SZ_ERROR_ARCHIVE; - if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, i)) + if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, folderIndex)) { p->CRCs.Vals[i] = p->db.FolderCRCs.Vals[folderIndex]; crcMask |= mask; } - else if (allDigestsDefined || (digestsDefs && SzBitArray_Check(digestsDefs, digestIndex))) - { - p->CRCs.Vals[i] = GetUi32(digestsVals + (size_t)digestsValsIndex * 4); - digestsValsIndex++; - crcMask |= mask; - } - folderIndex++; } else { UInt64 v; - RINOK(ReadNumber(&ssi.sdSizes, &v)); + RINOK(ReadNumber(&ssi.sdSizes, &v)) unpackPos += v; if (unpackPos < v) return SZ_ERROR_ARCHIVE; - if (allDigestsDefined || (digestsDefs && SzBitArray_Check(digestsDefs, digestIndex))) + } + if ((crcMask & mask) == 0 && digestsVals) + { + if (!digestsDefs || SzBitArray_Check(digestsDefs, digestIndex)) { - p->CRCs.Vals[i] = GetUi32(digestsVals + (size_t)digestsValsIndex * 4); - digestsValsIndex++; + p->CRCs.Vals[i] = GetUi32(digestsVals); + digestsVals += 4; crcMask |= mask; } + digestIndex++; } } if (mask != 0x80) { - UInt32 byteIndex = (i - 1) >> 3; + const UInt32 byteIndex = (i - 1) >> 3; p->IsDirs[byteIndex] = isDirMask; p->CRCs.Defs[byteIndex] = crcMask; } @@ -1454,7 +1457,7 @@ static SRes SzReadHeader2( break; if (!ssi.sdNumSubStreams.Data) return SZ_ERROR_ARCHIVE; - RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)); + RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)) if (numSubStreams != 0) return SZ_ERROR_ARCHIVE; /* @@ -1479,7 +1482,7 @@ static SRes SzReadHeader2( static SRes SzReadHeader( CSzArEx *p, CSzData *sd, - ILookInStream *inStream, + ILookInStreamPtr inStream, ISzAllocPtr allocMain, ISzAllocPtr allocTemp) { @@ -1498,7 +1501,7 @@ static SRes SzReadHeader( for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++) Buf_Free(tempBufs + i, allocTemp); - RINOK(res); + RINOK(res) if (sd->Size != 0) return SZ_ERROR_FAIL; @@ -1508,7 +1511,7 @@ static SRes SzReadHeader( static SRes SzArEx_Open2( CSzArEx *p, - ILookInStream *inStream, + ILookInStreamPtr inStream, ISzAllocPtr allocMain, ISzAllocPtr allocTemp) { @@ -1521,9 +1524,9 @@ static SRes SzArEx_Open2( SRes res; startArcPos = 0; - RINOK(ILookInStream_Seek(inStream, &startArcPos, SZ_SEEK_CUR)); + RINOK(ILookInStream_Seek(inStream, &startArcPos, SZ_SEEK_CUR)) - RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE)); + RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE)) if (!TestSignatureCandidate(header)) return SZ_ERROR_NO_ARCHIVE; @@ -1552,14 +1555,14 @@ static SRes SzArEx_Open2( { Int64 pos = 0; - RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END)); + RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END)) if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset || (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset || (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) return SZ_ERROR_INPUT_EOF; } - RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset)); + RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset)) if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp)) return SZ_ERROR_MEM; @@ -1634,10 +1637,10 @@ static SRes SzArEx_Open2( } -SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream, +SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream, ISzAllocPtr allocMain, ISzAllocPtr allocTemp) { - SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp); + const SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp); if (res != SZ_OK) SzArEx_Free(p, allocMain); return res; @@ -1646,7 +1649,7 @@ SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream, SRes SzArEx_Extract( const CSzArEx *p, - ILookInStream *inStream, + ILookInStreamPtr inStream, UInt32 fileIndex, UInt32 *blockIndex, Byte **tempBuf, @@ -1656,7 +1659,7 @@ SRes SzArEx_Extract( ISzAllocPtr allocMain, ISzAllocPtr allocTemp) { - UInt32 folderIndex = p->FileToFolder[fileIndex]; + const UInt32 folderIndex = p->FileToFolder[fileIndex]; SRes res = SZ_OK; *offset = 0; @@ -1673,13 +1676,13 @@ SRes SzArEx_Extract( if (*tempBuf == NULL || *blockIndex != folderIndex) { - UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + const UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex); /* UInt64 unpackSizeSpec = p->UnpackPositions[p->FolderToFile[(size_t)folderIndex + 1]] - p->UnpackPositions[p->FolderToFile[folderIndex]]; */ - size_t unpackSize = (size_t)unpackSizeSpec; + const size_t unpackSize = (size_t)unpackSizeSpec; if (unpackSize != unpackSizeSpec) return SZ_ERROR_MEM; @@ -1707,7 +1710,7 @@ SRes SzArEx_Extract( if (res == SZ_OK) { - UInt64 unpackPos = p->UnpackPositions[fileIndex]; + const UInt64 unpackPos = p->UnpackPositions[fileIndex]; *offset = (size_t)(unpackPos - p->UnpackPositions[p->FolderToFile[folderIndex]]); *outSizeProcessed = (size_t)(p->UnpackPositions[(size_t)fileIndex + 1] - unpackPos); if (*offset + *outSizeProcessed > *outBufferSize) @@ -1723,8 +1726,8 @@ SRes SzArEx_Extract( size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest) { - size_t offs = p->FileNameOffsets[fileIndex]; - size_t len = p->FileNameOffsets[fileIndex + 1] - offs; + const size_t offs = p->FileNameOffsets[fileIndex]; + const size_t len = p->FileNameOffsets[fileIndex + 1] - offs; if (dest != 0) { size_t i; diff --git a/3rdparty/7z/src/7zBuf.h b/3rdparty/7z/src/7zBuf.h index 5942d6e623..ca34c19097 100644 --- a/3rdparty/7z/src/7zBuf.h +++ b/3rdparty/7z/src/7zBuf.h @@ -1,8 +1,8 @@ /* 7zBuf.h -- Byte Buffer -2017-04-03 : Igor Pavlov : Public domain */ +2023-03-04 : Igor Pavlov : Public domain */ -#ifndef __7Z_BUF_H -#define __7Z_BUF_H +#ifndef ZIP7_INC_7Z_BUF_H +#define ZIP7_INC_7Z_BUF_H #include "7zTypes.h" diff --git a/3rdparty/7z/src/7zCrc.c b/3rdparty/7z/src/7zCrc.c index c7ec353d60..087189902c 100644 --- a/3rdparty/7z/src/7zCrc.c +++ b/3rdparty/7z/src/7zCrc.c @@ -1,5 +1,5 @@ -/* 7zCrc.c -- CRC32 init -2021-04-01 : Igor Pavlov : Public domain */ +/* 7zCrc.c -- CRC32 calculation and init +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -13,22 +13,20 @@ #else #define CRC_NUM_TABLES 9 - #define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24)) - - UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table); - UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table); + UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table); + UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table); #endif #ifndef MY_CPU_BE - UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); - UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); + UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); + UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); #endif -typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); - +/* extern CRC_FUNC g_CrcUpdateT4; CRC_FUNC g_CrcUpdateT4; +*/ extern CRC_FUNC g_CrcUpdateT8; CRC_FUNC g_CrcUpdateT8; @@ -44,20 +42,22 @@ CRC_FUNC g_CrcUpdate; UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; -UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size) +UInt32 Z7_FASTCALL CrcUpdate(UInt32 v, const void *data, size_t size) { return g_CrcUpdate(v, data, size, g_CrcTable); } -UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size) +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size) { return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL; } +#if CRC_NUM_TABLES < 4 \ + || (CRC_NUM_TABLES == 4 && defined(MY_CPU_BE)) \ + || (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) - -UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table); -UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table) +UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table); +UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; const Byte *pEnd = p + size; @@ -65,7 +65,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U v = CRC_UPDATE_BYTE_2(v, *p); return v; } - +#endif /* ---------- hardware CRC ---------- */ @@ -78,16 +78,29 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U #if defined(_MSC_VER) #if defined(MY_CPU_ARM64) #if (_MSC_VER >= 1910) + #ifndef __clang__ #define USE_ARM64_CRC + #include + #endif #endif #endif #elif (defined(__clang__) && (__clang_major__ >= 3)) \ || (defined(__GNUC__) && (__GNUC__ > 4)) #if !defined(__ARM_FEATURE_CRC32) #define __ARM_FEATURE_CRC32 1 - #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers + #if defined(__clang__) + #if defined(MY_CPU_ARM64) + #define ATTRIB_CRC __attribute__((__target__("crc"))) + #else + #define ATTRIB_CRC __attribute__((__target__("armv8-a,crc"))) + #endif + #else + #if defined(MY_CPU_ARM64) + #define ATTRIB_CRC __attribute__((__target__("+crc"))) + #else #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) #endif + #endif #endif #if defined(__ARM_FEATURE_CRC32) #define USE_ARM64_CRC @@ -105,7 +118,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U #pragma message("ARM64 CRC emulation") -MY_FORCE_INLINE +Z7_FORCE_INLINE UInt32 __crc32b(UInt32 v, UInt32 data) { const UInt32 *table = g_CrcTable; @@ -113,7 +126,7 @@ UInt32 __crc32b(UInt32 v, UInt32 data) return v; } -MY_FORCE_INLINE +Z7_FORCE_INLINE UInt32 __crc32w(UInt32 v, UInt32 data) { const UInt32 *table = g_CrcTable; @@ -124,7 +137,7 @@ UInt32 __crc32w(UInt32 v, UInt32 data) return v; } -MY_FORCE_INLINE +Z7_FORCE_INLINE UInt32 __crc32d(UInt32 v, UInt64 data) { const UInt32 *table = g_CrcTable; @@ -156,9 +169,9 @@ UInt32 __crc32d(UInt32 v, UInt64 data) // #pragma message("USE ARM HW CRC") ATTRIB_CRC -UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table); +UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table); ATTRIB_CRC -UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table) +UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; UNUSED_VAR(table); @@ -188,9 +201,9 @@ UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, cons } ATTRIB_CRC -UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table); +UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table); ATTRIB_CRC -UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table) +UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; UNUSED_VAR(table); @@ -219,6 +232,9 @@ UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, cons return v; } +#undef T0_32_UNROLL_BYTES +#undef T0_64_UNROLL_BYTES + #endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) #endif // MY_CPU_LE @@ -226,7 +242,7 @@ UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, cons -void MY_FAST_CALL CrcGenerateTable() +void Z7_FASTCALL CrcGenerateTable(void) { UInt32 i; for (i = 0; i < 256; i++) @@ -239,64 +255,62 @@ void MY_FAST_CALL CrcGenerateTable() } for (i = 256; i < 256 * CRC_NUM_TABLES; i++) { - UInt32 r = g_CrcTable[(size_t)i - 256]; + const UInt32 r = g_CrcTable[(size_t)i - 256]; g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8); } #if CRC_NUM_TABLES < 4 - - g_CrcUpdate = CrcUpdateT1; - - #else - - #ifdef MY_CPU_LE - - g_CrcUpdateT4 = CrcUpdateT4; - g_CrcUpdate = CrcUpdateT4; - - #if CRC_NUM_TABLES >= 8 + g_CrcUpdate = CrcUpdateT1; + #elif defined(MY_CPU_LE) + // g_CrcUpdateT4 = CrcUpdateT4; + #if CRC_NUM_TABLES < 8 + g_CrcUpdate = CrcUpdateT4; + #else // CRC_NUM_TABLES >= 8 g_CrcUpdateT8 = CrcUpdateT8; - + /* #ifdef MY_CPU_X86_OR_AMD64 if (!CPU_Is_InOrder()) #endif - g_CrcUpdate = CrcUpdateT8; + */ + g_CrcUpdate = CrcUpdateT8; #endif - #else { - #ifndef MY_CPU_BE + #ifndef MY_CPU_BE UInt32 k = 0x01020304; const Byte *p = (const Byte *)&k; if (p[0] == 4 && p[1] == 3) { - g_CrcUpdateT4 = CrcUpdateT4; - g_CrcUpdate = CrcUpdateT4; - #if CRC_NUM_TABLES >= 8 - g_CrcUpdateT8 = CrcUpdateT8; - g_CrcUpdate = CrcUpdateT8; + #if CRC_NUM_TABLES < 8 + // g_CrcUpdateT4 = CrcUpdateT4; + g_CrcUpdate = CrcUpdateT4; + #else // CRC_NUM_TABLES >= 8 + g_CrcUpdateT8 = CrcUpdateT8; + g_CrcUpdate = CrcUpdateT8; #endif } else if (p[0] != 1 || p[1] != 2) g_CrcUpdate = CrcUpdateT1; else - #endif + #endif // MY_CPU_BE { for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--) { - UInt32 x = g_CrcTable[(size_t)i - 256]; - g_CrcTable[i] = CRC_UINT32_SWAP(x); + const UInt32 x = g_CrcTable[(size_t)i - 256]; + g_CrcTable[i] = Z7_BSWAP32(x); } - g_CrcUpdateT4 = CrcUpdateT1_BeT4; - g_CrcUpdate = CrcUpdateT1_BeT4; - #if CRC_NUM_TABLES >= 8 - g_CrcUpdateT8 = CrcUpdateT1_BeT8; - g_CrcUpdate = CrcUpdateT1_BeT8; + #if CRC_NUM_TABLES <= 4 + g_CrcUpdate = CrcUpdateT1; + #elif CRC_NUM_TABLES <= 8 + // g_CrcUpdateT4 = CrcUpdateT1_BeT4; + g_CrcUpdate = CrcUpdateT1_BeT4; + #else // CRC_NUM_TABLES > 8 + g_CrcUpdateT8 = CrcUpdateT1_BeT8; + g_CrcUpdate = CrcUpdateT1_BeT8; #endif } } - #endif - #endif + #endif // CRC_NUM_TABLES < 4 #ifdef MY_CPU_LE #ifdef USE_ARM64_CRC @@ -320,3 +334,7 @@ void MY_FAST_CALL CrcGenerateTable() #endif #endif } + +#undef kCrcPoly +#undef CRC64_NUM_TABLES +#undef CRC_UPDATE_BYTE_2 diff --git a/3rdparty/7z/src/7zCrc.h b/3rdparty/7z/src/7zCrc.h index 3b0459402b..ef1c1afab5 100644 --- a/3rdparty/7z/src/7zCrc.h +++ b/3rdparty/7z/src/7zCrc.h @@ -1,8 +1,8 @@ /* 7zCrc.h -- CRC32 calculation -2013-01-18 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_CRC_H -#define __7Z_CRC_H +#ifndef ZIP7_INC_7Z_CRC_H +#define ZIP7_INC_7Z_CRC_H #include "7zTypes.h" @@ -11,14 +11,16 @@ EXTERN_C_BEGIN extern UInt32 g_CrcTable[]; /* Call CrcGenerateTable one time before other CRC functions */ -void MY_FAST_CALL CrcGenerateTable(void); +void Z7_FASTCALL CrcGenerateTable(void); #define CRC_INIT_VAL 0xFFFFFFFF #define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL) #define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) -UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size); -UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size); +UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size); +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size); + +typedef UInt32 (Z7_FASTCALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); EXTERN_C_END diff --git a/3rdparty/7z/src/7zCrcOpt.c b/3rdparty/7z/src/7zCrcOpt.c index efaa7ab9dc..f34913635e 100644 --- a/3rdparty/7z/src/7zCrcOpt.c +++ b/3rdparty/7z/src/7zCrcOpt.c @@ -1,5 +1,5 @@ /* 7zCrcOpt.c -- CRC32 calculation -2021-02-09 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -9,8 +9,8 @@ #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) -UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); -UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) +UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); +UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) @@ -29,8 +29,8 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U return v; } -UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); -UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) +UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); +UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++) @@ -61,11 +61,11 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U #ifndef MY_CPU_LE -#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24)) +#define CRC_UINT32_SWAP(v) Z7_BSWAP32(v) #define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8)) -UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table) +UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; table += 0x100; @@ -86,7 +86,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co return CRC_UINT32_SWAP(v); } -UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table) +UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; table += 0x100; diff --git a/3rdparty/7z/src/7zDec.c b/3rdparty/7z/src/7zDec.c index 83e37d166b..19cf81ebc5 100644 --- a/3rdparty/7z/src/7zDec.c +++ b/3rdparty/7z/src/7zDec.c @@ -1,11 +1,11 @@ /* 7zDec.c -- Decoding from 7z folder -2021-02-09 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" #include -/* #define _7ZIP_PPMD_SUPPPORT */ +/* #define Z7_PPMD_SUPPORT */ #include "7z.h" #include "7zCrc.h" @@ -16,27 +16,49 @@ #include "Delta.h" #include "LzmaDec.h" #include "Lzma2Dec.h" -#ifdef _7ZIP_PPMD_SUPPPORT +#ifdef Z7_PPMD_SUPPORT #include "Ppmd7.h" #endif #define k_Copy 0 -#ifndef _7Z_NO_METHOD_LZMA2 +#ifndef Z7_NO_METHOD_LZMA2 #define k_LZMA2 0x21 #endif #define k_LZMA 0x30101 #define k_BCJ2 0x303011B -#ifndef _7Z_NO_METHODS_FILTERS + +#if !defined(Z7_NO_METHODS_FILTERS) +#define Z7_USE_BRANCH_FILTER +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || \ + defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARM64) +#define Z7_USE_FILTER_ARM64 +#ifndef Z7_USE_BRANCH_FILTER +#define Z7_USE_BRANCH_FILTER +#endif +#define k_ARM64 0xa +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || \ + defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARMT) +#define Z7_USE_FILTER_ARMT +#ifndef Z7_USE_BRANCH_FILTER +#define Z7_USE_BRANCH_FILTER +#endif +#define k_ARMT 0x3030701 +#endif + +#ifndef Z7_NO_METHODS_FILTERS #define k_Delta 3 #define k_BCJ 0x3030103 #define k_PPC 0x3030205 #define k_IA64 0x3030401 #define k_ARM 0x3030501 -#define k_ARMT 0x3030701 #define k_SPARC 0x3030805 #endif -#ifdef _7ZIP_PPMD_SUPPPORT +#ifdef Z7_PPMD_SUPPORT #define k_PPMD 0x30401 @@ -49,12 +71,12 @@ typedef struct UInt64 processed; BoolInt extra; SRes res; - const ILookInStream *inStream; + ILookInStreamPtr inStream; } CByteInToLook; -static Byte ReadByte(const IByteIn *pp) +static Byte ReadByte(IByteInPtr pp) { - CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CByteInToLook) if (p->cur != p->end) return *p->cur++; if (p->res == SZ_OK) @@ -67,13 +89,13 @@ static Byte ReadByte(const IByteIn *pp) p->cur = p->begin; p->end = p->begin + size; if (size != 0) - return *p->cur++;; + return *p->cur++; } p->extra = True; return 0; } -static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, const ILookInStream *inStream, +static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) { CPpmd7 ppmd; @@ -138,14 +160,14 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c #endif -static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream, +static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) { CLzmaDec state; SRes res = SZ_OK; - LzmaDec_Construct(&state); - RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain)); + LzmaDec_CONSTRUCT(&state) + RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain)) state.dic = outBuffer; state.dicBufSize = outSize; LzmaDec_Init(&state); @@ -196,18 +218,18 @@ static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, I } -#ifndef _7Z_NO_METHOD_LZMA2 +#ifndef Z7_NO_METHOD_LZMA2 -static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream, +static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) { CLzma2Dec state; SRes res = SZ_OK; - Lzma2Dec_Construct(&state); + Lzma2Dec_CONSTRUCT(&state) if (propsSize != 1) return SZ_ERROR_DATA; - RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain)); + RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain)) state.decoder.dic = outBuffer; state.decoder.dicBufSize = outSize; Lzma2Dec_Init(&state); @@ -257,7 +279,7 @@ static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, #endif -static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer) +static SRes SzDecodeCopy(UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer) { while (inSize > 0) { @@ -265,13 +287,13 @@ static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer size_t curSize = (1 << 18); if (curSize > inSize) curSize = (size_t)inSize; - RINOK(ILookInStream_Look(inStream, &inBuf, &curSize)); + RINOK(ILookInStream_Look(inStream, &inBuf, &curSize)) if (curSize == 0) return SZ_ERROR_INPUT_EOF; memcpy(outBuffer, inBuf, curSize); outBuffer += curSize; inSize -= curSize; - RINOK(ILookInStream_Skip(inStream, curSize)); + RINOK(ILookInStream_Skip(inStream, curSize)) } return SZ_OK; } @@ -282,12 +304,12 @@ static BoolInt IS_MAIN_METHOD(UInt32 m) { case k_Copy: case k_LZMA: - #ifndef _7Z_NO_METHOD_LZMA2 + #ifndef Z7_NO_METHOD_LZMA2 case k_LZMA2: - #endif - #ifdef _7ZIP_PPMD_SUPPPORT + #endif + #ifdef Z7_PPMD_SUPPORT case k_PPMD: - #endif + #endif return True; } return False; @@ -317,7 +339,7 @@ static SRes CheckSupportedFolder(const CSzFolder *f) } - #ifndef _7Z_NO_METHODS_FILTERS + #if defined(Z7_USE_BRANCH_FILTER) if (f->NumCoders == 2) { @@ -333,13 +355,20 @@ static SRes CheckSupportedFolder(const CSzFolder *f) return SZ_ERROR_UNSUPPORTED; switch ((UInt32)c->MethodID) { + #if !defined(Z7_NO_METHODS_FILTERS) case k_Delta: case k_BCJ: case k_PPC: case k_IA64: case k_SPARC: case k_ARM: + #endif + #ifdef Z7_USE_FILTER_ARM64 + case k_ARM64: + #endif + #ifdef Z7_USE_FILTER_ARMT case k_ARMT: + #endif break; default: return SZ_ERROR_UNSUPPORTED; @@ -372,15 +401,16 @@ static SRes CheckSupportedFolder(const CSzFolder *f) return SZ_ERROR_UNSUPPORTED; } -#ifndef _7Z_NO_METHODS_FILTERS -#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break; -#endif + + + + static SRes SzFolder_Decode2(const CSzFolder *folder, const Byte *propsData, const UInt64 *unpackSizes, const UInt64 *packPositions, - ILookInStream *inStream, UInt64 startPos, + ILookInStreamPtr inStream, UInt64 startPos, Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain, Byte *tempBuf[]) { @@ -389,7 +419,7 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, SizeT tempSize3 = 0; Byte *tempBuf3 = 0; - RINOK(CheckSupportedFolder(folder)); + RINOK(CheckSupportedFolder(folder)) for (ci = 0; ci < folder->NumCoders; ci++) { @@ -404,8 +434,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, SizeT outSizeCur = outSize; if (folder->NumCoders == 4) { - UInt32 indices[] = { 3, 2, 0 }; - UInt64 unpackSize = unpackSizes[ci]; + const UInt32 indices[] = { 3, 2, 0 }; + const UInt64 unpackSize = unpackSizes[ci]; si = indices[ci]; if (ci < 2) { @@ -431,37 +461,37 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, } offset = packPositions[si]; inSize = packPositions[(size_t)si + 1] - offset; - RINOK(LookInStream_SeekTo(inStream, startPos + offset)); + RINOK(LookInStream_SeekTo(inStream, startPos + offset)) if (coder->MethodID == k_Copy) { if (inSize != outSizeCur) /* check it */ return SZ_ERROR_DATA; - RINOK(SzDecodeCopy(inSize, inStream, outBufCur)); + RINOK(SzDecodeCopy(inSize, inStream, outBufCur)) } else if (coder->MethodID == k_LZMA) { - RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)); + RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) } - #ifndef _7Z_NO_METHOD_LZMA2 + #ifndef Z7_NO_METHOD_LZMA2 else if (coder->MethodID == k_LZMA2) { - RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)); + RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) } - #endif - #ifdef _7ZIP_PPMD_SUPPPORT + #endif + #ifdef Z7_PPMD_SUPPORT else if (coder->MethodID == k_PPMD) { - RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)); + RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) } - #endif + #endif else return SZ_ERROR_UNSUPPORTED; } else if (coder->MethodID == k_BCJ2) { - UInt64 offset = packPositions[1]; - UInt64 s3Size = packPositions[2] - offset; + const UInt64 offset = packPositions[1]; + const UInt64 s3Size = packPositions[2] - offset; if (ci != 3) return SZ_ERROR_UNSUPPORTED; @@ -473,8 +503,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, if (!tempBuf[2] && tempSizes[2] != 0) return SZ_ERROR_MEM; - RINOK(LookInStream_SeekTo(inStream, startPos + offset)); - RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2])); + RINOK(LookInStream_SeekTo(inStream, startPos + offset)) + RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2])) if ((tempSizes[0] & 3) != 0 || (tempSizes[1] & 3) != 0 || @@ -493,26 +523,22 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, p.destLim = outBuffer + outSize; Bcj2Dec_Init(&p); - RINOK(Bcj2Dec_Decode(&p)); + RINOK(Bcj2Dec_Decode(&p)) { unsigned i; for (i = 0; i < 4; i++) if (p.bufs[i] != p.lims[i]) return SZ_ERROR_DATA; - - if (!Bcj2Dec_IsFinished(&p)) - return SZ_ERROR_DATA; - - if (p.dest != p.destLim - || p.state != BCJ2_STREAM_MAIN) + if (p.dest != p.destLim || !Bcj2Dec_IsMaybeFinished(&p)) return SZ_ERROR_DATA; } } } - #ifndef _7Z_NO_METHODS_FILTERS + #if defined(Z7_USE_BRANCH_FILTER) else if (ci == 1) { + #if !defined(Z7_NO_METHODS_FILTERS) if (coder->MethodID == k_Delta) { if (coder->PropsSize != 1) @@ -522,31 +548,53 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, Delta_Init(state); Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize); } + continue; } - else + #endif + + #ifdef Z7_USE_FILTER_ARM64 + if (coder->MethodID == k_ARM64) + { + UInt32 pc = 0; + if (coder->PropsSize == 4) + pc = GetUi32(propsData + coder->PropsOffset); + else if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc); + continue; + } + #endif + + #if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) { if (coder->PropsSize != 0) return SZ_ERROR_UNSUPPORTED; + #define CASE_BRA_CONV(isa) case k_ ## isa: Z7_BRANCH_CONV_DEC(isa)(outBuffer, outSize, 0); break; // pc = 0; switch (coder->MethodID) { + #if !defined(Z7_NO_METHODS_FILTERS) case k_BCJ: { - UInt32 state; - x86_Convert_Init(state); - x86_Convert(outBuffer, outSize, 0, &state, 0); + UInt32 state = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0 break; } CASE_BRA_CONV(PPC) CASE_BRA_CONV(IA64) CASE_BRA_CONV(SPARC) CASE_BRA_CONV(ARM) + #endif + #if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) CASE_BRA_CONV(ARMT) + #endif default: return SZ_ERROR_UNSUPPORTED; } + continue; } - } - #endif + #endif + } // (c == 1) + #endif else return SZ_ERROR_UNSUPPORTED; } @@ -556,7 +604,7 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, - ILookInStream *inStream, UInt64 startPos, + ILookInStreamPtr inStream, UInt64 startPos, Byte *outBuffer, size_t outSize, ISzAllocPtr allocMain) { diff --git a/3rdparty/7z/src/7zFile.c b/3rdparty/7z/src/7zFile.c index 900125d52c..96d4edd204 100644 --- a/3rdparty/7z/src/7zFile.c +++ b/3rdparty/7z/src/7zFile.c @@ -1,5 +1,5 @@ /* 7zFile.c -- File IO -2021-04-29 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -268,7 +268,7 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size) return errno; if (processed == 0) break; - data = (void *)((Byte *)data + (size_t)processed); + data = (const void *)((const Byte *)data + (size_t)processed); originalSize -= (size_t)processed; *size += (size_t)processed; } @@ -287,7 +287,8 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) DWORD moveMethod; UInt32 low = (UInt32)*pos; LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ - switch (origin) + // (int) to eliminate clang warning + switch ((int)origin) { case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break; case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break; @@ -308,7 +309,7 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) int moveMethod; // = origin; - switch (origin) + switch ((int)origin) { case SZ_SEEK_SET: moveMethod = SEEK_SET; break; case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break; @@ -387,10 +388,10 @@ WRes File_GetLength(CSzFile *p, UInt64 *length) /* ---------- FileSeqInStream ---------- */ -static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size) +static SRes FileSeqInStream_Read(ISeqInStreamPtr pp, void *buf, size_t *size) { - CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt); - WRes wres = File_Read(&p->file, buf, size); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileSeqInStream) + const WRes wres = File_Read(&p->file, buf, size); p->wres = wres; return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } @@ -403,18 +404,18 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p) /* ---------- FileInStream ---------- */ -static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size) +static SRes FileInStream_Read(ISeekInStreamPtr pp, void *buf, size_t *size) { - CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); - WRes wres = File_Read(&p->file, buf, size); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream) + const WRes wres = File_Read(&p->file, buf, size); p->wres = wres; return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } -static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin) +static SRes FileInStream_Seek(ISeekInStreamPtr pp, Int64 *pos, ESzSeek origin) { - CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); - WRes wres = File_Seek(&p->file, pos, origin); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream) + const WRes wres = File_Seek(&p->file, pos, origin); p->wres = wres; return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } @@ -428,10 +429,10 @@ void FileInStream_CreateVTable(CFileInStream *p) /* ---------- FileOutStream ---------- */ -static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size) +static size_t FileOutStream_Write(ISeqOutStreamPtr pp, const void *data, size_t size) { - CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt); - WRes wres = File_Write(&p->file, data, &size); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileOutStream) + const WRes wres = File_Write(&p->file, data, &size); p->wres = wres; return size; } diff --git a/3rdparty/7z/src/7zFile.h b/3rdparty/7z/src/7zFile.h index c7a30fc2b1..d7c871a734 100644 --- a/3rdparty/7z/src/7zFile.h +++ b/3rdparty/7z/src/7zFile.h @@ -1,8 +1,8 @@ /* 7zFile.h -- File IO -2021-02-15 : Igor Pavlov : Public domain */ +2023-03-05 : Igor Pavlov : Public domain */ -#ifndef __7Z_FILE_H -#define __7Z_FILE_H +#ifndef ZIP7_INC_FILE_H +#define ZIP7_INC_FILE_H #ifdef _WIN32 #define USE_WINDOWS_FILE @@ -10,7 +10,8 @@ #endif #ifdef USE_WINDOWS_FILE -#include +#include "7zWindows.h" + #else // note: USE_FOPEN mode is limited to 32-bit file size // #define USE_FOPEN diff --git a/3rdparty/7z/src/7zStream.c b/3rdparty/7z/src/7zStream.c index 4b472a41d5..92f821e2cd 100644 --- a/3rdparty/7z/src/7zStream.c +++ b/3rdparty/7z/src/7zStream.c @@ -1,5 +1,5 @@ /* 7zStream.c -- 7z Stream functions -2021-02-09 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,12 +7,33 @@ #include "7zTypes.h" -SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType) + +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize) +{ + size_t size = *processedSize; + *processedSize = 0; + while (size != 0) + { + size_t cur = size; + const SRes res = ISeqInStream_Read(stream, buf, &cur); + *processedSize += cur; + buf = (void *)((Byte *)buf + cur); + size -= cur; + if (res != SZ_OK) + return res; + if (cur == 0) + return SZ_OK; + } + return SZ_OK; +} + +/* +SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType) { while (size != 0) { size_t processed = size; - RINOK(ISeqInStream_Read(stream, buf, &processed)); + RINOK(ISeqInStream_Read(stream, buf, &processed)) if (processed == 0) return errorType; buf = (void *)((Byte *)buf + processed); @@ -21,42 +42,44 @@ SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes return SZ_OK; } -SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size) +SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size) { return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); } +*/ -SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf) + +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf) { size_t processed = 1; - RINOK(ISeqInStream_Read(stream, buf, &processed)); + RINOK(ISeqInStream_Read(stream, buf, &processed)) return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; } -SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset) { Int64 t = (Int64)offset; return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); } -SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size) +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size) { const void *lookBuf; if (*size == 0) return SZ_OK; - RINOK(ILookInStream_Look(stream, &lookBuf, size)); + RINOK(ILookInStream_Look(stream, &lookBuf, size)) memcpy(buf, lookBuf, *size); return ILookInStream_Skip(stream, *size); } -SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType) +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType) { while (size != 0) { size_t processed = size; - RINOK(ILookInStream_Read(stream, buf, &processed)); + RINOK(ILookInStream_Read(stream, buf, &processed)) if (processed == 0) return errorType; buf = (void *)((Byte *)buf + processed); @@ -65,16 +88,16 @@ SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRe return SZ_OK; } -SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size) +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size) { return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); } -#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt); +#define GET_LookToRead2 Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLookToRead2) -static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size) +static SRes LookToRead2_Look_Lookahead(ILookInStreamPtr pp, const void **buf, size_t *size) { SRes res = SZ_OK; GET_LookToRead2 @@ -93,7 +116,7 @@ static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf return res; } -static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size) +static SRes LookToRead2_Look_Exact(ILookInStreamPtr pp, const void **buf, size_t *size) { SRes res = SZ_OK; GET_LookToRead2 @@ -113,14 +136,14 @@ static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, si return res; } -static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset) +static SRes LookToRead2_Skip(ILookInStreamPtr pp, size_t offset) { GET_LookToRead2 p->pos += offset; return SZ_OK; } -static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size) +static SRes LookToRead2_Read(ILookInStreamPtr pp, void *buf, size_t *size) { GET_LookToRead2 size_t rem = p->size - p->pos; @@ -134,7 +157,7 @@ static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size) return SZ_OK; } -static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin) +static SRes LookToRead2_Seek(ILookInStreamPtr pp, Int64 *pos, ESzSeek origin) { GET_LookToRead2 p->pos = p->size = 0; @@ -153,9 +176,9 @@ void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead) -static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size) +static SRes SecToLook_Read(ISeqInStreamPtr pp, void *buf, size_t *size) { - CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToLook) return LookInStream_LookRead(p->realStream, buf, size); } @@ -164,9 +187,9 @@ void SecToLook_CreateVTable(CSecToLook *p) p->vt.Read = SecToLook_Read; } -static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size) +static SRes SecToRead_Read(ISeqInStreamPtr pp, void *buf, size_t *size) { - CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToRead) return ILookInStream_Read(p->realStream, buf, size); } diff --git a/3rdparty/7z/src/7zTypes.h b/3rdparty/7z/src/7zTypes.h index 763d3a9b6e..0eb9513cae 100644 --- a/3rdparty/7z/src/7zTypes.h +++ b/3rdparty/7z/src/7zTypes.h @@ -1,8 +1,8 @@ /* 7zTypes.h -- Basic types -2022-04-01 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H +#ifndef ZIP7_7Z_TYPES_H +#define ZIP7_7Z_TYPES_H #ifdef _WIN32 /* #include */ @@ -52,6 +52,11 @@ typedef int SRes; #define MY_ALIGN(n) #endif #else + /* + // C11/C++11: + #include + #define MY_ALIGN(n) alignas(n) + */ #define MY_ALIGN(n) __attribute__ ((aligned(n))) #endif @@ -62,7 +67,7 @@ typedef int SRes; typedef unsigned WRes; #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) -// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) +// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) #else // _WIN32 @@ -70,13 +75,13 @@ typedef unsigned WRes; typedef int WRes; // (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT -#define MY__FACILITY_ERRNO 0x800 -#define MY__FACILITY_WIN32 7 -#define MY__FACILITY__WRes MY__FACILITY_ERRNO +#define MY_FACILITY_ERRNO 0x800 +#define MY_FACILITY_WIN32 7 +#define MY_FACILITY_WRes MY_FACILITY_ERRNO #define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ ( (HRESULT)(x) & 0x0000FFFF) \ - | (MY__FACILITY__WRes << 16) \ + | (MY_FACILITY_WRes << 16) \ | (HRESULT)0x80000000 )) #define MY_SRes_HRESULT_FROM_WRes(x) \ @@ -120,17 +125,17 @@ typedef int WRes; #define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) #define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) -// if (MY__FACILITY__WRes != FACILITY_WIN32), +// if (MY_FACILITY_WRes != FACILITY_WIN32), // we use FACILITY_WIN32 for COM errors: #define E_OUTOFMEMORY ((HRESULT)0x8007000EL) #define E_INVALIDARG ((HRESULT)0x80070057L) -#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) +#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) /* // we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: #define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) #define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) -#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) */ #define TEXT(quote) quote @@ -156,18 +161,18 @@ typedef int WRes; #ifndef RINOK -#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; } #endif #ifndef RINOK_WRes -#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; } #endif typedef unsigned char Byte; typedef short Int16; typedef unsigned short UInt16; -#ifdef _LZMA_UINT32_IS_ULONG +#ifdef Z7_DECL_Int32_AS_long typedef long Int32; typedef unsigned long UInt32; #else @@ -206,37 +211,51 @@ typedef size_t SIZE_T; #endif // _WIN32 -#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) +#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL) -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ +#ifdef Z7_DECL_Int64_AS_long typedef long Int64; typedef unsigned long UInt64; #else -#if defined(_MSC_VER) || defined(__BORLANDC__) +#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__) typedef __int64 Int64; typedef unsigned __int64 UInt64; -#define UINT64_CONST(n) n +#else +#if defined(__clang__) || defined(__GNUC__) +#include +typedef int64_t Int64; +typedef uint64_t UInt64; #else typedef long long int Int64; typedef unsigned long long int UInt64; -#define UINT64_CONST(n) n ## ULL +// #define UINT64_CONST(n) n ## ULL +#endif #endif #endif -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; +#define UINT64_CONST(n) n + + +#ifdef Z7_DECL_SizeT_AS_unsigned_int +typedef unsigned int SizeT; #else typedef size_t SizeT; #endif +/* +#if (defined(_MSC_VER) && _MSC_VER <= 1200) +typedef size_t MY_uintptr_t; +#else +#include +typedef uintptr_t MY_uintptr_t; +#endif +*/ + typedef int BoolInt; /* typedef BoolInt Bool; */ #define True 1 @@ -244,23 +263,23 @@ typedef int BoolInt; #ifdef _WIN32 -#define MY_STD_CALL __stdcall +#define Z7_STDCALL __stdcall #else -#define MY_STD_CALL +#define Z7_STDCALL #endif #ifdef _MSC_VER #if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) +#define Z7_NO_INLINE __declspec(noinline) #else -#define MY_NO_INLINE +#define Z7_NO_INLINE #endif -#define MY_FORCE_INLINE __forceinline +#define Z7_FORCE_INLINE __forceinline -#define MY_CDECL __cdecl -#define MY_FAST_CALL __fastcall +#define Z7_CDECL __cdecl +#define Z7_FASTCALL __fastcall #else // _MSC_VER @@ -268,27 +287,25 @@ typedef int BoolInt; || (defined(__clang__) && (__clang_major__ >= 4)) \ || defined(__INTEL_COMPILER) \ || defined(__xlC__) -#define MY_NO_INLINE __attribute__((noinline)) -// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#define Z7_NO_INLINE __attribute__((noinline)) +#define Z7_FORCE_INLINE __attribute__((always_inline)) inline #else -#define MY_NO_INLINE +#define Z7_NO_INLINE +#define Z7_FORCE_INLINE #endif -#define MY_FORCE_INLINE - - -#define MY_CDECL +#define Z7_CDECL #if defined(_M_IX86) \ || defined(__i386__) -// #define MY_FAST_CALL __attribute__((fastcall)) -// #define MY_FAST_CALL __attribute__((cdecl)) -#define MY_FAST_CALL +// #define Z7_FASTCALL __attribute__((fastcall)) +// #define Z7_FASTCALL __attribute__((cdecl)) +#define Z7_FASTCALL #elif defined(MY_CPU_AMD64) -// #define MY_FAST_CALL __attribute__((ms_abi)) -#define MY_FAST_CALL +// #define Z7_FASTCALL __attribute__((ms_abi)) +#define Z7_FASTCALL #else -#define MY_FAST_CALL +#define Z7_FASTCALL #endif #endif // _MSC_VER @@ -296,41 +313,49 @@ typedef int BoolInt; /* The following interfaces use first parameter as pointer to structure */ -typedef struct IByteIn IByteIn; -struct IByteIn +// #define Z7_C_IFACE_CONST_QUAL +#define Z7_C_IFACE_CONST_QUAL const + +#define Z7_C_IFACE_DECL(a) \ + struct a ## _; \ + typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \ + typedef struct a ## _ a; \ + struct a ## _ + + +Z7_C_IFACE_DECL (IByteIn) { - Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ + Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */ }; #define IByteIn_Read(p) (p)->Read(p) -typedef struct IByteOut IByteOut; -struct IByteOut +Z7_C_IFACE_DECL (IByteOut) { - void (*Write)(const IByteOut *p, Byte b); + void (*Write)(IByteOutPtr p, Byte b); }; #define IByteOut_Write(p, b) (p)->Write(p, b) -typedef struct ISeqInStream ISeqInStream; -struct ISeqInStream +Z7_C_IFACE_DECL (ISeqInStream) { - SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); + SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) < input(*size)) is allowed */ }; #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) +/* try to read as much as avail in stream and limited by (*processedSize) */ +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize); /* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); -SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); +// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size); +// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf); -typedef struct ISeqOutStream ISeqOutStream; -struct ISeqOutStream +Z7_C_IFACE_DECL (ISeqOutStream) { - size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); + size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size); /* Returns: result - the number of actually written bytes. (result < size) means error */ }; @@ -344,29 +369,26 @@ typedef enum } ESzSeek; -typedef struct ISeekInStream ISeekInStream; -struct ISeekInStream +Z7_C_IFACE_DECL (ISeekInStream) { - SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ - SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); + SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin); }; #define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) -typedef struct ILookInStream ILookInStream; -struct ILookInStream +Z7_C_IFACE_DECL (ILookInStream) { - SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); + SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) > input(*size)) is not allowed (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(const ILookInStream *p, size_t offset); + SRes (*Skip)(ILookInStreamPtr p, size_t offset); /* offset must be <= output(*size) of Look */ - - SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); + SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size); /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); + SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin); }; #define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) @@ -375,19 +397,18 @@ struct ILookInStream #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) -SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); -SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset); /* reads via ILookInStream::Read */ -SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); -SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); - +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size); typedef struct { ILookInStream vt; - const ISeekInStream *realStream; + ISeekInStreamPtr realStream; size_t pos; size_t size; /* it's data size */ @@ -399,13 +420,13 @@ typedef struct void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); -#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } +#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; } typedef struct { ISeqInStream vt; - const ILookInStream *realStream; + ILookInStreamPtr realStream; } CSecToLook; void SecToLook_CreateVTable(CSecToLook *p); @@ -415,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p); typedef struct { ISeqInStream vt; - const ILookInStream *realStream; + ILookInStreamPtr realStream; } CSecToRead; void SecToRead_CreateVTable(CSecToRead *p); -typedef struct ICompressProgress ICompressProgress; - -struct ICompressProgress +Z7_C_IFACE_DECL (ICompressProgress) { - SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); + SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize); /* Returns: result. (result != SZ_OK) means break. Value (UInt64)(Int64)-1 for size means unknown value. */ }; + #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) @@ -466,13 +486,13 @@ struct ISzAlloc -#ifndef MY_container_of +#ifndef Z7_container_of /* -#define MY_container_of(ptr, type, m) container_of(ptr, type, m) -#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) -#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) -#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +#define Z7_container_of(ptr, type, m) container_of(ptr, type, m) +#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) */ /* @@ -481,24 +501,64 @@ struct ISzAlloc GCC 4.8.1 : classes with non-public variable members" */ -#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) +#define Z7_container_of(ptr, type, m) \ + ((type *)(void *)((char *)(void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +#define Z7_container_of_CONST(ptr, type, m) \ + ((const type *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +/* +#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \ + ((type *)(void *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) +*/ #endif -#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) +#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m) +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) /* -#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) */ -#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) +#if defined (__clang__) || defined(__GNUC__) +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL +#endif -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) -/* -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) -*/ +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ + Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ + Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) -#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) +// #define ZIP7_DECLARE_HANDLE(name) typedef void *name; +#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name; + + +#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifndef Z7_ARRAY_SIZE +#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + #ifdef _WIN32 @@ -527,3 +587,11 @@ struct ISzAlloc EXTERN_C_END #endif + +/* +#ifndef Z7_ST +#ifdef _7ZIP_ST +#define Z7_ST +#endif +#endif +*/ diff --git a/3rdparty/7z/src/7zVersion.h b/3rdparty/7z/src/7zVersion.h index fa9e6fc539..bf21a25f5b 100644 --- a/3rdparty/7z/src/7zVersion.h +++ b/3rdparty/7z/src/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 22 +#define MY_VER_MAJOR 23 #define MY_VER_MINOR 01 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "22.01" +#define MY_VERSION_NUMBERS "23.01" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2022-07-15" +#define MY_DATE "2023-06-20" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2023 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/3rdparty/7z/src/7zVersion.rc b/3rdparty/7z/src/7zVersion.rc index 6ed26de744..e520995ddc 100644 --- a/3rdparty/7z/src/7zVersion.rc +++ b/3rdparty/7z/src/7zVersion.rc @@ -1,55 +1,55 @@ -#define MY_VS_FFI_FILEFLAGSMASK 0x0000003FL -#define MY_VOS_NT_WINDOWS32 0x00040004L -#define MY_VOS_CE_WINDOWS32 0x00050004L - -#define MY_VFT_APP 0x00000001L -#define MY_VFT_DLL 0x00000002L - -// #include - -#ifndef MY_VERSION -#include "7zVersion.h" -#endif - -#define MY_VER MY_VER_MAJOR,MY_VER_MINOR,MY_VER_BUILD,0 - -#ifdef DEBUG -#define DBG_FL VS_FF_DEBUG -#else -#define DBG_FL 0 -#endif - -#define MY_VERSION_INFO(fileType, descr, intName, origName) \ -LANGUAGE 9, 1 \ -1 VERSIONINFO \ - FILEVERSION MY_VER \ - PRODUCTVERSION MY_VER \ - FILEFLAGSMASK MY_VS_FFI_FILEFLAGSMASK \ - FILEFLAGS DBG_FL \ - FILEOS MY_VOS_NT_WINDOWS32 \ - FILETYPE fileType \ - FILESUBTYPE 0x0L \ -BEGIN \ - BLOCK "StringFileInfo" \ - BEGIN \ - BLOCK "040904b0" \ - BEGIN \ - VALUE "CompanyName", "Igor Pavlov" \ - VALUE "FileDescription", descr \ - VALUE "FileVersion", MY_VERSION \ - VALUE "InternalName", intName \ - VALUE "LegalCopyright", MY_COPYRIGHT \ - VALUE "OriginalFilename", origName \ - VALUE "ProductName", "7-Zip" \ - VALUE "ProductVersion", MY_VERSION \ - END \ - END \ - BLOCK "VarFileInfo" \ - BEGIN \ - VALUE "Translation", 0x409, 1200 \ - END \ -END - -#define MY_VERSION_INFO_APP(descr, intName) MY_VERSION_INFO(MY_VFT_APP, descr, intName, intName ".exe") - -#define MY_VERSION_INFO_DLL(descr, intName) MY_VERSION_INFO(MY_VFT_DLL, descr, intName, intName ".dll") +#define MY_VS_FFI_FILEFLAGSMASK 0x0000003FL +#define MY_VOS_NT_WINDOWS32 0x00040004L +#define MY_VOS_CE_WINDOWS32 0x00050004L + +#define MY_VFT_APP 0x00000001L +#define MY_VFT_DLL 0x00000002L + +// #include + +#ifndef MY_VERSION +#include "7zVersion.h" +#endif + +#define MY_VER MY_VER_MAJOR,MY_VER_MINOR,MY_VER_BUILD,0 + +#ifdef DEBUG +#define DBG_FL VS_FF_DEBUG +#else +#define DBG_FL 0 +#endif + +#define MY_VERSION_INFO(fileType, descr, intName, origName) \ +LANGUAGE 9, 1 \ +1 VERSIONINFO \ + FILEVERSION MY_VER \ + PRODUCTVERSION MY_VER \ + FILEFLAGSMASK MY_VS_FFI_FILEFLAGSMASK \ + FILEFLAGS DBG_FL \ + FILEOS MY_VOS_NT_WINDOWS32 \ + FILETYPE fileType \ + FILESUBTYPE 0x0L \ +BEGIN \ + BLOCK "StringFileInfo" \ + BEGIN \ + BLOCK "040904b0" \ + BEGIN \ + VALUE "CompanyName", "Igor Pavlov" \ + VALUE "FileDescription", descr \ + VALUE "FileVersion", MY_VERSION \ + VALUE "InternalName", intName \ + VALUE "LegalCopyright", MY_COPYRIGHT \ + VALUE "OriginalFilename", origName \ + VALUE "ProductName", "7-Zip" \ + VALUE "ProductVersion", MY_VERSION \ + END \ + END \ + BLOCK "VarFileInfo" \ + BEGIN \ + VALUE "Translation", 0x409, 1200 \ + END \ +END + +#define MY_VERSION_INFO_APP(descr, intName) MY_VERSION_INFO(MY_VFT_APP, descr, intName, intName ".exe") + +#define MY_VERSION_INFO_DLL(descr, intName) MY_VERSION_INFO(MY_VFT_DLL, descr, intName, intName ".dll") diff --git a/3rdparty/7z/src/7zWindows.h b/3rdparty/7z/src/7zWindows.h new file mode 100644 index 0000000000..42c6db8bfc --- /dev/null +++ b/3rdparty/7z/src/7zWindows.h @@ -0,0 +1,101 @@ +/* 7zWindows.h -- StdAfx +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_WINDOWS_H +#define ZIP7_INC_7Z_WINDOWS_H + +#ifdef _WIN32 + +#if defined(__clang__) +# pragma clang diagnostic push +#endif + +#if defined(_MSC_VER) + +#pragma warning(push) +#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' + +#if _MSC_VER == 1900 +// for old kit10 versions +// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext': +#endif +// win10 Windows Kit: +#endif // _MSC_VER + +#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64) +// for msvc6 without sdk2003 +#define RPC_NO_WINDOWS_H +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +// #if defined(__GNUC__) && !defined(__clang__) +#include +#else +#include +#endif +// #include +// #include + +// but if precompiled with clang-cl then we need +// #include +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#if defined(__clang__) +# pragma clang diagnostic pop +#endif + +#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64) +#ifndef _W64 + +typedef long LONG_PTR, *PLONG_PTR; +typedef unsigned long ULONG_PTR, *PULONG_PTR; +typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; + +#define Z7_OLD_WIN_SDK +#endif // _W64 +#endif // _MSC_VER == 1200 + +#ifdef Z7_OLD_WIN_SDK + +#ifndef INVALID_FILE_ATTRIBUTES +#define INVALID_FILE_ATTRIBUTES ((DWORD)-1) +#endif +#ifndef INVALID_SET_FILE_POINTER +#define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif +#ifndef FILE_SPECIAL_ACCESS +#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS) +#endif + +// ShlObj.h: +// #define BIF_NEWDIALOGSTYLE 0x0040 + +#pragma warning(disable : 4201) +// #pragma warning(disable : 4115) + +#undef VARIANT_TRUE +#define VARIANT_TRUE ((VARIANT_BOOL)-1) +#endif + +#endif // Z7_OLD_WIN_SDK + +#ifdef UNDER_CE +#undef VARIANT_TRUE +#define VARIANT_TRUE ((VARIANT_BOOL)-1) +#endif + + +#if defined(_MSC_VER) +#if _MSC_VER >= 1400 && _MSC_VER <= 1600 + // BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed + // string.h + // #pragma warning(disable : 4514) +#endif +#endif + + +/* #include "7zTypes.h" */ + +#endif diff --git a/3rdparty/7z/src/Aes.c b/3rdparty/7z/src/Aes.c index 9ad66c5c8d..d6732a6653 100644 --- a/3rdparty/7z/src/Aes.c +++ b/3rdparty/7z/src/Aes.c @@ -1,5 +1,5 @@ /* Aes.c -- AES encryption / decryption -2021-05-13 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,7 +7,7 @@ #include "Aes.h" AES_CODE_FUNC g_AesCbc_Decode; -#ifndef _SFX +#ifndef Z7_SFX AES_CODE_FUNC g_AesCbc_Encode; AES_CODE_FUNC g_AesCtr_Code; UInt32 g_Aes_SupportedFunctions_Flags; @@ -51,7 +51,7 @@ static Byte InvS[256]; #define DD(x) (D + (x << 8)) -// #define _SHOW_AES_STATUS +// #define Z7_SHOW_AES_STATUS #ifdef MY_CPU_X86_OR_AMD64 #define USE_HW_AES @@ -72,11 +72,11 @@ static Byte InvS[256]; #endif #ifdef USE_HW_AES -#ifdef _SHOW_AES_STATUS +#ifdef Z7_SHOW_AES_STATUS #include -#define _PRF(x) x +#define PRF(x) x #else -#define _PRF(x) +#define PRF(x) #endif #endif @@ -90,23 +90,23 @@ void AesGenTables(void) for (i = 0; i < 256; i++) { { - UInt32 a1 = Sbox[i]; - UInt32 a2 = xtime(a1); - UInt32 a3 = a2 ^ a1; + const UInt32 a1 = Sbox[i]; + const UInt32 a2 = xtime(a1); + const UInt32 a3 = a2 ^ a1; TT(0)[i] = Ui32(a2, a1, a1, a3); TT(1)[i] = Ui32(a3, a2, a1, a1); TT(2)[i] = Ui32(a1, a3, a2, a1); TT(3)[i] = Ui32(a1, a1, a3, a2); } { - UInt32 a1 = InvS[i]; - UInt32 a2 = xtime(a1); - UInt32 a4 = xtime(a2); - UInt32 a8 = xtime(a4); - UInt32 a9 = a8 ^ a1; - UInt32 aB = a8 ^ a2 ^ a1; - UInt32 aD = a8 ^ a4 ^ a1; - UInt32 aE = a8 ^ a4 ^ a2; + const UInt32 a1 = InvS[i]; + const UInt32 a2 = xtime(a1); + const UInt32 a4 = xtime(a2); + const UInt32 a8 = xtime(a4); + const UInt32 a9 = a8 ^ a1; + const UInt32 aB = a8 ^ a2 ^ a1; + const UInt32 aD = a8 ^ a4 ^ a1; + const UInt32 aE = a8 ^ a4 ^ a2; DD(0)[i] = Ui32(aE, a9, aD, aB); DD(1)[i] = Ui32(aB, aE, a9, aD); DD(2)[i] = Ui32(aD, aB, aE, a9); @@ -116,7 +116,7 @@ void AesGenTables(void) { AES_CODE_FUNC d = AesCbc_Decode; - #ifndef _SFX + #ifndef Z7_SFX AES_CODE_FUNC e = AesCbc_Encode; AES_CODE_FUNC c = AesCtr_Code; UInt32 flags = 0; @@ -126,10 +126,10 @@ void AesGenTables(void) if (CPU_IsSupported_AES()) { // #pragma message ("AES HW") - _PRF(printf("\n===AES HW\n")); + PRF(printf("\n===AES HW\n")); d = AesCbc_Decode_HW; - #ifndef _SFX + #ifndef Z7_SFX e = AesCbc_Encode_HW; c = AesCtr_Code_HW; flags = k_Aes_SupportedFunctions_HW; @@ -138,9 +138,9 @@ void AesGenTables(void) #ifdef MY_CPU_X86_OR_AMD64 if (CPU_IsSupported_VAES_AVX2()) { - _PRF(printf("\n===vaes avx2\n")); + PRF(printf("\n===vaes avx2\n")); d = AesCbc_Decode_HW_256; - #ifndef _SFX + #ifndef Z7_SFX c = AesCtr_Code_HW_256; flags |= k_Aes_SupportedFunctions_HW_256; #endif @@ -150,7 +150,7 @@ void AesGenTables(void) #endif g_AesCbc_Decode = d; - #ifndef _SFX + #ifndef Z7_SFX g_AesCbc_Encode = e; g_AesCtr_Code = c; g_Aes_SupportedFunctions_Flags = flags; @@ -194,7 +194,7 @@ void AesGenTables(void) #define FD(i, x) InvS[gb(x, m[(i - x) & 3])] #define FD4(i) dest[i] = Ui32(FD(i, 0), FD(i, 1), FD(i, 2), FD(i, 3)) ^ w[i]; -void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) +void Z7_FASTCALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) { unsigned i, m; const UInt32 *wLim; @@ -230,7 +230,7 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) while (++w != wLim); } -void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) +void Z7_FASTCALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) { unsigned i, num; Aes_SetKey_Enc(w, key, keySize); @@ -251,7 +251,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) src and dest are pointers to 4 UInt32 words. src and dest can point to same block */ -// MY_FORCE_INLINE +// Z7_FORCE_INLINE static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src) { UInt32 s[4]; @@ -265,17 +265,20 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src) w += 4; for (;;) { - HT16(m, s, 0); + HT16(m, s, 0) if (--numRounds2 == 0) break; - HT16(s, m, 4); + HT16(s, m, 4) w += 8; } w += 4; - FT4(0); FT4(1); FT4(2); FT4(3); + FT4(0) + FT4(1) + FT4(2) + FT4(3) } -MY_FORCE_INLINE +Z7_FORCE_INLINE static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src) { UInt32 s[4]; @@ -289,12 +292,15 @@ static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src) for (;;) { w -= 8; - HD16(m, s, 4); + HD16(m, s, 4) if (--numRounds2 == 0) break; - HD16(s, m, 0); + HD16(s, m, 0) } - FD4(0); FD4(1); FD4(2); FD4(3); + FD4(0) + FD4(1) + FD4(2) + FD4(3) } void AesCbc_Init(UInt32 *p, const Byte *iv) @@ -304,7 +310,7 @@ void AesCbc_Init(UInt32 *p, const Byte *iv) p[i] = GetUi32(iv + i * 4); } -void MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks) +void Z7_FASTCALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks) { for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE) { @@ -315,14 +321,14 @@ void MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks) Aes_Encode(p + 4, p, p); - SetUi32(data, p[0]); - SetUi32(data + 4, p[1]); - SetUi32(data + 8, p[2]); - SetUi32(data + 12, p[3]); + SetUi32(data, p[0]) + SetUi32(data + 4, p[1]) + SetUi32(data + 8, p[2]) + SetUi32(data + 12, p[3]) } } -void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks) +void Z7_FASTCALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks) { UInt32 in[4], out[4]; for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE) @@ -334,10 +340,10 @@ void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks) Aes_Decode(p + 4, out, in); - SetUi32(data, p[0] ^ out[0]); - SetUi32(data + 4, p[1] ^ out[1]); - SetUi32(data + 8, p[2] ^ out[2]); - SetUi32(data + 12, p[3] ^ out[3]); + SetUi32(data, p[0] ^ out[0]) + SetUi32(data + 4, p[1] ^ out[1]) + SetUi32(data + 8, p[2] ^ out[2]) + SetUi32(data + 12, p[3] ^ out[3]) p[0] = in[0]; p[1] = in[1]; @@ -346,7 +352,7 @@ void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks) } } -void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) +void Z7_FASTCALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) { for (; numBlocks != 0; numBlocks--) { @@ -360,7 +366,7 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) for (i = 0; i < 4; i++, data += 4) { - UInt32 t = temp[i]; + const UInt32 t = temp[i]; #ifdef MY_CPU_LE_UNALIGN *((UInt32 *)(void *)data) ^= t; @@ -373,3 +379,15 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) } } } + +#undef xtime +#undef Ui32 +#undef gb0 +#undef gb1 +#undef gb2 +#undef gb3 +#undef gb +#undef TT +#undef DD +#undef USE_HW_AES +#undef PRF diff --git a/3rdparty/7z/src/Aes.h b/3rdparty/7z/src/Aes.h index 602e25ea24..2373afb970 100644 --- a/3rdparty/7z/src/Aes.h +++ b/3rdparty/7z/src/Aes.h @@ -1,8 +1,8 @@ /* Aes.h -- AES encryption / decryption -2018-04-28 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __AES_H -#define __AES_H +#ifndef ZIP7_INC_AES_H +#define ZIP7_INC_AES_H #include "7zTypes.h" @@ -20,19 +20,19 @@ void AesGenTables(void); /* aes - 16-byte aligned pointer to keyMode+roundKeys sequence */ /* keySize = 16 or 24 or 32 (bytes) */ -typedef void (MY_FAST_CALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize); -void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize); -void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize); +typedef void (Z7_FASTCALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize); +void Z7_FASTCALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize); +void Z7_FASTCALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize); /* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */ void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */ /* data - 16-byte aligned pointer to data */ /* numBlocks - the number of 16-byte blocks in data array */ -typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks); +typedef void (Z7_FASTCALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks); extern AES_CODE_FUNC g_AesCbc_Decode; -#ifndef _SFX +#ifndef Z7_SFX extern AES_CODE_FUNC g_AesCbc_Encode; extern AES_CODE_FUNC g_AesCtr_Code; #define k_Aes_SupportedFunctions_HW (1 << 2) @@ -41,19 +41,19 @@ extern UInt32 g_Aes_SupportedFunctions_Flags; #endif -#define DECLARE__AES_CODE_FUNC(funcName) \ - void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks); +#define Z7_DECLARE_AES_CODE_FUNC(funcName) \ + void Z7_FASTCALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks); -DECLARE__AES_CODE_FUNC (AesCbc_Encode) -DECLARE__AES_CODE_FUNC (AesCbc_Decode) -DECLARE__AES_CODE_FUNC (AesCtr_Code) +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Encode) +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode) +Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code) -DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW) -DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW) -DECLARE__AES_CODE_FUNC (AesCtr_Code_HW) +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Encode_HW) +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode_HW) +Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code_HW) -DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256) -DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256) +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode_HW_256) +Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code_HW_256) EXTERN_C_END diff --git a/3rdparty/7z/src/AesOpt.c b/3rdparty/7z/src/AesOpt.c index 1bdc9a882f..af911e22f9 100644 --- a/3rdparty/7z/src/AesOpt.c +++ b/3rdparty/7z/src/AesOpt.c @@ -1,39 +1,33 @@ /* AesOpt.c -- AES optimized code for x86 AES hardware instructions -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" +#include "Aes.h" #include "CpuArch.h" #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__clang__) - #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8) - #define USE_INTEL_AES - #define ATTRIB_AES __attribute__((__target__("aes"))) - #if (__clang_major__ >= 8) - #define USE_INTEL_VAES - #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2"))) - #endif - #endif - #elif defined(__GNUC__) - #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) - #define USE_INTEL_AES - #ifndef __AES__ - #define ATTRIB_AES __attribute__((__target__("aes"))) - #endif - #if (__GNUC__ >= 8) - #define USE_INTEL_VAES - #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2"))) - #endif - #endif - #elif defined(__INTEL_COMPILER) + #if defined(__INTEL_COMPILER) #if (__INTEL_COMPILER >= 1110) #define USE_INTEL_AES #if (__INTEL_COMPILER >= 1900) #define USE_INTEL_VAES #endif #endif + #elif defined(__clang__) && (__clang_major__ > 3 || __clang_major__ == 3 && __clang_minor__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4) + #define USE_INTEL_AES + #if !defined(__AES__) + #define ATTRIB_AES __attribute__((__target__("aes"))) + #endif + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_INTEL_VAES + #if !defined(__AES__) || !defined(__VAES__) || !defined(__AVX__) || !defined(__AVX2__) + #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx,avx2"))) + #endif + #endif #elif defined(_MSC_VER) #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) #define USE_INTEL_AES @@ -56,12 +50,15 @@ #include #ifndef USE_INTEL_VAES -#define AES_TYPE_keys __m128i -#define AES_TYPE_data __m128i +#define AES_TYPE_keys UInt32 +#define AES_TYPE_data Byte +// #define AES_TYPE_keys __m128i +// #define AES_TYPE_data __m128i #endif #define AES_FUNC_START(name) \ - void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks) + void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks) + // void Z7_FASTCALL name(__m128i *p, __m128i *data, size_t numBlocks) #define AES_FUNC_START2(name) \ AES_FUNC_START (name); \ @@ -69,14 +66,16 @@ ATTRIB_AES \ AES_FUNC_START (name) #define MM_OP(op, dest, src) dest = op(dest, src); -#define MM_OP_m(op, src) MM_OP(op, m, src); +#define MM_OP_m(op, src) MM_OP(op, m, src) -#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src); -#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src); +#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src) +#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src) AES_FUNC_START2 (AesCbc_Encode_HW) { + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; __m128i m = *p; const __m128i k0 = p[2]; const __m128i k1 = p[3]; @@ -86,17 +85,17 @@ AES_FUNC_START2 (AesCbc_Encode_HW) UInt32 r = numRounds2; const __m128i *w = p + 4; __m128i temp = *data; - MM_XOR (temp, k0); - MM_XOR (m, temp); - MM_OP_m (_mm_aesenc_si128, k1); + MM_XOR (temp, k0) + MM_XOR (m, temp) + MM_OP_m (_mm_aesenc_si128, k1) do { - MM_OP_m (_mm_aesenc_si128, w[0]); - MM_OP_m (_mm_aesenc_si128, w[1]); + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenc_si128, w[1]) w += 2; } while (--r); - MM_OP_m (_mm_aesenclast_si128, w[0]); + MM_OP_m (_mm_aesenclast_si128, w[0]) *data = m; } *p = m; @@ -104,14 +103,14 @@ AES_FUNC_START2 (AesCbc_Encode_HW) #define WOP_1(op) -#define WOP_2(op) WOP_1 (op) op (m1, 1); -#define WOP_3(op) WOP_2 (op) op (m2, 2); -#define WOP_4(op) WOP_3 (op) op (m3, 3); +#define WOP_2(op) WOP_1 (op) op (m1, 1) +#define WOP_3(op) WOP_2 (op) op (m2, 2) +#define WOP_4(op) WOP_3 (op) op (m3, 3) #ifdef MY_CPU_AMD64 -#define WOP_5(op) WOP_4 (op) op (m4, 4); -#define WOP_6(op) WOP_5 (op) op (m5, 5); -#define WOP_7(op) WOP_6 (op) op (m6, 6); -#define WOP_8(op) WOP_7 (op) op (m7, 7); +#define WOP_5(op) WOP_4 (op) op (m4, 4) +#define WOP_6(op) WOP_5 (op) op (m5, 5) +#define WOP_7(op) WOP_6 (op) op (m6, 6) +#define WOP_8(op) WOP_7 (op) op (m7, 7) #endif /* #define WOP_9(op) WOP_8 (op) op (m8, 8); @@ -130,20 +129,20 @@ AES_FUNC_START2 (AesCbc_Encode_HW) #define WOP_M1 WOP_4 #endif -#define WOP(op) op (m0, 0); WOP_M1(op) +#define WOP(op) op (m0, 0) WOP_M1(op) -#define DECLARE_VAR(reg, ii) __m128i reg +#define DECLARE_VAR(reg, ii) __m128i reg; #define LOAD_data( reg, ii) reg = data[ii]; #define STORE_data( reg, ii) data[ii] = reg; #if (NUM_WAYS > 1) -#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]); +#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]) #endif -#define AVX__DECLARE_VAR(reg, ii) __m256i reg -#define AVX__LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii]; -#define AVX__STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg; -#define AVX__XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii])); +#define AVX_DECLARE_VAR(reg, ii) __m256i reg; +#define AVX_LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii]; +#define AVX_STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg; +#define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii])) #define MM_OP_key(op, reg) MM_OP(op, reg, key); @@ -154,23 +153,23 @@ AES_FUNC_START2 (AesCbc_Encode_HW) #define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg) -#define AVX__AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg) -#define AVX__AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg) -#define AVX__AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg) -#define AVX__AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg) -#define AVX__AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg) +#define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg) +#define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg) +#define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg) +#define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg) +#define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg) -#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one); reg = ctr; -#define CTR_END( reg, ii) MM_XOR (data[ii], reg); +#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one) reg = ctr; +#define CTR_END( reg, ii) MM_XOR (data[ii], reg) -#define AVX__CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key); -#define AVX__CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg); +#define AVX_CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two) reg = _mm256_xor_si256(ctr2, key); +#define AVX_CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg) #define WOP_KEY(op, n) { \ const __m128i key = w[n]; \ WOP(op); } -#define AVX__WOP_KEY(op, n) { \ +#define AVX_WOP_KEY(op, n) { \ const __m256i key = w[n]; \ WOP(op); } @@ -218,6 +217,8 @@ AES_FUNC_START2 (AesCbc_Encode_HW) AES_FUNC_START2 (AesCbc_Decode_HW) { + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; __m128i iv = *p; const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1; const __m128i *dataEnd; @@ -228,7 +229,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW) const __m128i *w = wStart; WOP (DECLARE_VAR) - WOP (LOAD_data); + WOP (LOAD_data) WOP_KEY (AES_XOR, 1) do @@ -239,10 +240,10 @@ AES_FUNC_START2 (AesCbc_Decode_HW) while (w != p); WOP_KEY (AES_DEC_LAST, 0) - MM_XOR (m0, iv); + MM_XOR (m0, iv) WOP_M1 (XOR_data_M1) iv = data[NUM_WAYS - 1]; - WOP (STORE_data); + WOP (STORE_data) } WIDE_LOOP_END @@ -252,15 +253,15 @@ AES_FUNC_START2 (AesCbc_Decode_HW) __m128i m = _mm_xor_si128 (w[2], *data); do { - MM_OP_m (_mm_aesdec_si128, w[1]); - MM_OP_m (_mm_aesdec_si128, w[0]); + MM_OP_m (_mm_aesdec_si128, w[1]) + MM_OP_m (_mm_aesdec_si128, w[0]) w -= 2; } while (w != p); - MM_OP_m (_mm_aesdec_si128, w[1]); - MM_OP_m (_mm_aesdeclast_si128, w[0]); + MM_OP_m (_mm_aesdec_si128, w[1]) + MM_OP_m (_mm_aesdeclast_si128, w[0]) - MM_XOR (m, iv); + MM_XOR (m, iv) iv = *data; *data = m; } @@ -271,6 +272,8 @@ AES_FUNC_START2 (AesCbc_Decode_HW) AES_FUNC_START2 (AesCtr_Code_HW) { + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; __m128i ctr = *p; UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1; const __m128i *dataEnd; @@ -283,7 +286,7 @@ AES_FUNC_START2 (AesCtr_Code_HW) const __m128i *w = p; UInt32 r = numRoundsMinus2; WOP (DECLARE_VAR) - WOP (CTR_START); + WOP (CTR_START) WOP_KEY (AES_XOR, 0) w += 1; do @@ -294,7 +297,7 @@ AES_FUNC_START2 (AesCtr_Code_HW) while (--r); WOP_KEY (AES_ENC_LAST, 0) - WOP (CTR_END); + WOP (CTR_END) } WIDE_LOOP_END @@ -303,19 +306,19 @@ AES_FUNC_START2 (AesCtr_Code_HW) UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; const __m128i *w = p; __m128i m; - MM_OP (_mm_add_epi64, ctr, one); + MM_OP (_mm_add_epi64, ctr, one) m = _mm_xor_si128 (ctr, p[0]); w += 1; do { - MM_OP_m (_mm_aesenc_si128, w[0]); - MM_OP_m (_mm_aesenc_si128, w[1]); + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenc_si128, w[1]) w += 2; } while (--numRounds2); - MM_OP_m (_mm_aesenc_si128, w[0]); - MM_OP_m (_mm_aesenclast_si128, w[1]); - MM_XOR (*data, m); + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenclast_si128, w[1]) + MM_XOR (*data, m) } p[-2] = ctr; @@ -325,17 +328,58 @@ AES_FUNC_START2 (AesCtr_Code_HW) #ifdef USE_INTEL_VAES +/* +GCC before 2013-Jun: + : + #ifdef __AVX__ + #include + #endif +GCC after 2013-Jun: + : + #include +CLANG 3.8+: +{ + : + #if !defined(_MSC_VER) || defined(__AVX__) + #include + #endif + + if (the compiler is clang for Windows and if global arch is not set for __AVX__) + [ if (defined(_MSC_VER) && !defined(__AVX__)) ] + { + doesn't include + and we have 2 ways to fix it: + 1) we can define required __AVX__ before + or + 2) we can include after + } +} + +If we include manually for GCC/CLANG, it's +required that must be included before . +*/ + +/* #if defined(__clang__) && defined(_MSC_VER) -#define __SSE4_2__ -#define __AES__ #define __AVX__ #define __AVX2__ #define __VAES__ -#define __AVX512F__ -#define __AVX512VL__ #endif +*/ #include +#if defined(__clang__) && defined(_MSC_VER) + #if !defined(__AVX__) + #include + #endif + #if !defined(__AVX2__) + #include + #endif + #if !defined(__VAES__) + #include + #endif +#endif // __clang__ && _MSC_VER + #define VAES_FUNC_START2(name) \ AES_FUNC_START (name); \ @@ -344,6 +388,8 @@ AES_FUNC_START (name) VAES_FUNC_START2 (AesCbc_Decode_HW_256) { + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; __m128i iv = *p; const __m128i *dataEnd; UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; @@ -353,22 +399,22 @@ VAES_FUNC_START2 (AesCbc_Decode_HW_256) { const __m256i *w = keys + numRounds - 2; - WOP (AVX__DECLARE_VAR) - WOP (AVX__LOAD_data); - AVX__WOP_KEY (AVX__AES_XOR, 1) + WOP (AVX_DECLARE_VAR) + WOP (AVX_LOAD_data) + AVX_WOP_KEY (AVX_AES_XOR, 1) do { - AVX__WOP_KEY (AVX__AES_DEC, 0) + AVX_WOP_KEY (AVX_AES_DEC, 0) w--; } while (w != keys); - AVX__WOP_KEY (AVX__AES_DEC_LAST, 0) + AVX_WOP_KEY (AVX_AES_DEC_LAST, 0) - AVX_XOR (m0, _mm256_setr_m128i(iv, data[0])); - WOP_M1 (AVX__XOR_data_M1) + AVX_XOR (m0, _mm256_setr_m128i(iv, data[0])) + WOP_M1 (AVX_XOR_data_M1) iv = data[NUM_WAYS * 2 - 1]; - WOP (AVX__STORE_data); + WOP (AVX_STORE_data) } WIDE_LOOP_END_AVX(;) @@ -378,15 +424,15 @@ VAES_FUNC_START2 (AesCbc_Decode_HW_256) __m128i m = _mm_xor_si128 (w[2], *data); do { - MM_OP_m (_mm_aesdec_si128, w[1]); - MM_OP_m (_mm_aesdec_si128, w[0]); + MM_OP_m (_mm_aesdec_si128, w[1]) + MM_OP_m (_mm_aesdec_si128, w[0]) w -= 2; } while (w != p); - MM_OP_m (_mm_aesdec_si128, w[1]); - MM_OP_m (_mm_aesdeclast_si128, w[0]); + MM_OP_m (_mm_aesdec_si128, w[1]) + MM_OP_m (_mm_aesdeclast_si128, w[0]) - MM_XOR (m, iv); + MM_XOR (m, iv) iv = *data; *data = m; } @@ -403,18 +449,20 @@ AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm _mm256_broadcastsi128_si256 : vbroadcasti128 */ -#define AVX__CTR_LOOP_START \ +#define AVX_CTR_LOOP_START \ ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \ two = _mm256_setr_m128i(one, one); \ two = _mm256_add_epi64(two, two); \ // two = _mm256_setr_epi64x(2, 0, 2, 0); -#define AVX__CTR_LOOP_ENC \ +#define AVX_CTR_LOOP_ENC \ ctr = _mm256_extracti128_si256 (ctr2, 1); \ VAES_FUNC_START2 (AesCtr_Code_HW_256) { + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; __m128i ctr = *p; UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; const __m128i *dataEnd; @@ -422,44 +470,44 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256) __m256i ctr2, two; p += 2; - WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START) + WIDE_LOOP_START_AVX (AVX_CTR_LOOP_START) { const __m256i *w = keys; UInt32 r = numRounds - 2; - WOP (AVX__DECLARE_VAR) - AVX__WOP_KEY (AVX__CTR_START, 0); + WOP (AVX_DECLARE_VAR) + AVX_WOP_KEY (AVX_CTR_START, 0) w += 1; do { - AVX__WOP_KEY (AVX__AES_ENC, 0) + AVX_WOP_KEY (AVX_AES_ENC, 0) w += 1; } while (--r); - AVX__WOP_KEY (AVX__AES_ENC_LAST, 0) + AVX_WOP_KEY (AVX_AES_ENC_LAST, 0) - WOP (AVX__CTR_END); + WOP (AVX_CTR_END) } - WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC) + WIDE_LOOP_END_AVX (AVX_CTR_LOOP_ENC) SINGLE_LOOP { UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; const __m128i *w = p; __m128i m; - MM_OP (_mm_add_epi64, ctr, one); + MM_OP (_mm_add_epi64, ctr, one) m = _mm_xor_si128 (ctr, p[0]); w += 1; do { - MM_OP_m (_mm_aesenc_si128, w[0]); - MM_OP_m (_mm_aesenc_si128, w[1]); + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenc_si128, w[1]) w += 2; } while (--numRounds2); - MM_OP_m (_mm_aesenc_si128, w[0]); - MM_OP_m (_mm_aesenclast_si128, w[1]); - MM_XOR (*data, m); + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenclast_si128, w[1]) + MM_XOR (*data, m) } p[-2] = ctr; @@ -477,7 +525,7 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256) #define AES_TYPE_data Byte #define AES_FUNC_START(name) \ - void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \ + void Z7_FASTCALL name(UInt32 *p, Byte *data, size_t numBlocks) \ #define AES_COMPAT_STUB(name) \ AES_FUNC_START(name); \ @@ -496,8 +544,8 @@ AES_COMPAT_STUB (AesCtr_Code) #pragma message("VAES HW_SW stub was used") #define VAES_COMPAT_STUB(name) \ - void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \ - void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \ + void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \ + void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \ { name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); } VAES_COMPAT_STUB (AesCbc_Decode_HW) @@ -551,7 +599,8 @@ VAES_COMPAT_STUB (AesCtr_Code_HW) typedef uint8x16_t v128; #define AES_FUNC_START(name) \ - void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks) + void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks) + // void Z7_FASTCALL name(v128 *p, v128 *data, size_t numBlocks) #define AES_FUNC_START2(name) \ AES_FUNC_START (name); \ @@ -559,18 +608,20 @@ ATTRIB_AES \ AES_FUNC_START (name) #define MM_OP(op, dest, src) dest = op(dest, src); -#define MM_OP_m(op, src) MM_OP(op, m, src); +#define MM_OP_m(op, src) MM_OP(op, m, src) #define MM_OP1_m(op) m = op(m); -#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src); -#define MM_XOR_m( src) MM_XOR(m, src); +#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src) +#define MM_XOR_m( src) MM_XOR(m, src) -#define AES_E_m(k) MM_OP_m (vaeseq_u8, k); -#define AES_E_MC_m(k) AES_E_m (k); MM_OP1_m(vaesmcq_u8); +#define AES_E_m(k) MM_OP_m (vaeseq_u8, k) +#define AES_E_MC_m(k) AES_E_m (k) MM_OP1_m(vaesmcq_u8) AES_FUNC_START2 (AesCbc_Encode_HW) { + v128 *p = (v128*)(void*)ivAes; + v128 *data = (v128*)(void*)data8; v128 m = *p; const v128 k0 = p[2]; const v128 k1 = p[3]; @@ -608,7 +659,7 @@ AES_FUNC_START2 (AesCbc_Encode_HW) AES_E_MC_m (p[14]) } } - AES_E_m (k_z1); + AES_E_m (k_z1) MM_XOR_m (k_z0); *data = m; } @@ -617,44 +668,44 @@ AES_FUNC_START2 (AesCbc_Encode_HW) #define WOP_1(op) -#define WOP_2(op) WOP_1 (op) op (m1, 1); -#define WOP_3(op) WOP_2 (op) op (m2, 2); -#define WOP_4(op) WOP_3 (op) op (m3, 3); -#define WOP_5(op) WOP_4 (op) op (m4, 4); -#define WOP_6(op) WOP_5 (op) op (m5, 5); -#define WOP_7(op) WOP_6 (op) op (m6, 6); -#define WOP_8(op) WOP_7 (op) op (m7, 7); +#define WOP_2(op) WOP_1 (op) op (m1, 1) +#define WOP_3(op) WOP_2 (op) op (m2, 2) +#define WOP_4(op) WOP_3 (op) op (m3, 3) +#define WOP_5(op) WOP_4 (op) op (m4, 4) +#define WOP_6(op) WOP_5 (op) op (m5, 5) +#define WOP_7(op) WOP_6 (op) op (m6, 6) +#define WOP_8(op) WOP_7 (op) op (m7, 7) #define NUM_WAYS 8 #define WOP_M1 WOP_8 -#define WOP(op) op (m0, 0); WOP_M1(op) +#define WOP(op) op (m0, 0) WOP_M1(op) -#define DECLARE_VAR(reg, ii) v128 reg +#define DECLARE_VAR(reg, ii) v128 reg; #define LOAD_data( reg, ii) reg = data[ii]; #define STORE_data( reg, ii) data[ii] = reg; #if (NUM_WAYS > 1) -#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]); +#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]) #endif -#define MM_OP_key(op, reg) MM_OP (op, reg, key); +#define MM_OP_key(op, reg) MM_OP (op, reg, key) -#define AES_D_m(k) MM_OP_m (vaesdq_u8, k); -#define AES_D_IMC_m(k) AES_D_m (k); MM_OP1_m (vaesimcq_u8); +#define AES_D_m(k) MM_OP_m (vaesdq_u8, k) +#define AES_D_IMC_m(k) AES_D_m (k) MM_OP1_m (vaesimcq_u8) #define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg) #define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg) #define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg) -#define AES_D_IMC( reg, ii) AES_D (reg, ii); reg = vaesimcq_u8(reg) -#define AES_E_MC( reg, ii) AES_E (reg, ii); reg = vaesmcq_u8(reg) +#define AES_D_IMC( reg, ii) AES_D (reg, ii) reg = vaesimcq_u8(reg); +#define AES_E_MC( reg, ii) AES_E (reg, ii) reg = vaesmcq_u8(reg); -#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one); reg = vreinterpretq_u8_u64(ctr); -#define CTR_END( reg, ii) MM_XOR (data[ii], reg); +#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one) reg = vreinterpretq_u8_u64(ctr); +#define CTR_END( reg, ii) MM_XOR (data[ii], reg) #define WOP_KEY(op, n) { \ const v128 key = w[n]; \ - WOP(op); } + WOP(op) } #define WIDE_LOOP_START \ dataEnd = data + numBlocks; \ @@ -672,6 +723,8 @@ AES_FUNC_START2 (AesCbc_Encode_HW) AES_FUNC_START2 (AesCbc_Decode_HW) { + v128 *p = (v128*)(void*)ivAes; + v128 *data = (v128*)(void*)data8; v128 iv = *p; const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; const v128 *dataEnd; @@ -681,7 +734,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW) { const v128 *w = wStart; WOP (DECLARE_VAR) - WOP (LOAD_data); + WOP (LOAD_data) WOP_KEY (AES_D_IMC, 2) do { @@ -695,7 +748,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW) MM_XOR (m0, iv); WOP_M1 (XOR_data_M1) iv = data[NUM_WAYS - 1]; - WOP (STORE_data); + WOP (STORE_data) } WIDE_LOOP_END @@ -724,6 +777,8 @@ AES_FUNC_START2 (AesCbc_Decode_HW) AES_FUNC_START2 (AesCtr_Code_HW) { + v128 *p = (v128*)(void*)ivAes; + v128 *data = (v128*)(void*)data8; uint64x2_t ctr = vreinterpretq_u64_u8(*p); const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; const v128 *dataEnd; @@ -735,7 +790,7 @@ AES_FUNC_START2 (AesCtr_Code_HW) { const v128 *w = p; WOP (DECLARE_VAR) - WOP (CTR_START); + WOP (CTR_START) do { WOP_KEY (AES_E_MC, 0) @@ -746,7 +801,7 @@ AES_FUNC_START2 (AesCtr_Code_HW) WOP_KEY (AES_E_MC, 0) WOP_KEY (AES_E, 1) WOP_KEY (AES_XOR, 2) - WOP (CTR_END); + WOP (CTR_END) } WIDE_LOOP_END @@ -762,10 +817,10 @@ AES_FUNC_START2 (AesCtr_Code_HW) w += 2; } while (w != wEnd); - AES_E_MC_m (w[0]); - AES_E_m (w[1]); - MM_XOR_m (w[2]); - CTR_END (m, 0); + AES_E_MC_m (w[0]) + AES_E_m (w[1]) + MM_XOR_m (w[2]) + CTR_END (m, 0) } p[-2] = vreinterpretq_u8_u64(ctr); @@ -774,3 +829,12 @@ AES_FUNC_START2 (AesCtr_Code_HW) #endif // USE_HW_AES #endif // MY_CPU_ARM_OR_ARM64 + +#undef NUM_WAYS +#undef WOP_M1 +#undef WOP +#undef DECLARE_VAR +#undef LOAD_data +#undef STORE_data +#undef USE_INTEL_AES +#undef USE_HW_AES diff --git a/3rdparty/7z/src/Alloc.c b/3rdparty/7z/src/Alloc.c index 142a1ea221..0b12133dea 100644 --- a/3rdparty/7z/src/Alloc.c +++ b/3rdparty/7z/src/Alloc.c @@ -1,38 +1,54 @@ /* Alloc.c -- Memory allocation functions -2021-07-13 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include - #ifdef _WIN32 -#include +#include "7zWindows.h" #endif #include #include "Alloc.h" -/* #define _SZ_ALLOC_DEBUG */ +#ifdef _WIN32 +#ifdef Z7_LARGE_PAGES +#if defined(__clang__) || defined(__GNUC__) +typedef void (*Z7_voidFunction)(void); +#define MY_CAST_FUNC (Z7_voidFunction) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define MY_CAST_FUNC (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define MY_CAST_FUNC +#endif +#endif // Z7_LARGE_PAGES +#endif // _WIN32 -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ -#ifdef _SZ_ALLOC_DEBUG +// #define SZ_ALLOC_DEBUG +/* #define SZ_ALLOC_DEBUG */ +/* use SZ_ALLOC_DEBUG to debug alloc/free operations */ +#ifdef SZ_ALLOC_DEBUG + +#include #include -int g_allocCount = 0; -int g_allocCountMid = 0; -int g_allocCountBig = 0; +static int g_allocCount = 0; +#ifdef _WIN32 +static int g_allocCountMid = 0; +static int g_allocCountBig = 0; +#endif #define CONVERT_INT_TO_STR(charType, tempSize) \ - unsigned char temp[tempSize]; unsigned i = 0; \ - while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \ + char temp[tempSize]; unsigned i = 0; \ + while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \ *s++ = (charType)('0' + (unsigned)val); \ while (i != 0) { i--; *s++ = temp[i]; } \ *s = 0; static void ConvertUInt64ToString(UInt64 val, char *s) { - CONVERT_INT_TO_STR(char, 24); + CONVERT_INT_TO_STR(char, 24) } #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) @@ -77,7 +93,7 @@ static void PrintAligned(const char *s, size_t align) Print(s); } -static void PrintLn() +static void PrintLn(void) { Print("\n"); } @@ -89,10 +105,10 @@ static void PrintHex(UInt64 v, size_t align) PrintAligned(s, align); } -static void PrintDec(UInt64 v, size_t align) +static void PrintDec(int v, size_t align) { char s[32]; - ConvertUInt64ToString(v, s); + ConvertUInt64ToString((unsigned)v, s); PrintAligned(s, align); } @@ -102,12 +118,19 @@ static void PrintAddr(void *p) } -#define PRINT_ALLOC(name, cnt, size, ptr) \ +#define PRINT_REALLOC(name, cnt, size, ptr) { \ + Print(name " "); \ + if (!ptr) PrintDec(cnt++, 10); \ + PrintHex(size, 10); \ + PrintAddr(ptr); \ + PrintLn(); } + +#define PRINT_ALLOC(name, cnt, size, ptr) { \ Print(name " "); \ PrintDec(cnt++, 10); \ PrintHex(size, 10); \ PrintAddr(ptr); \ - PrintLn(); + PrintLn(); } #define PRINT_FREE(name, cnt, ptr) if (ptr) { \ Print(name " "); \ @@ -117,7 +140,9 @@ static void PrintAddr(void *p) #else +#ifdef _WIN32 #define PRINT_ALLOC(name, cnt, size, ptr) +#endif #define PRINT_FREE(name, cnt, ptr) #define Print(s) #define PrintLn() @@ -127,16 +152,31 @@ static void PrintAddr(void *p) #endif +/* +by specification: + malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free() + realloc(NULL, size) : the call is equivalent to malloc(size) + realloc(non_NULL, 0) : the call is equivalent to free(ptr) + +in main compilers: + malloc(0) : returns non_NULL + realloc(NULL, 0) : returns non_NULL + realloc(non_NULL, 0) : returns NULL +*/ + void *MyAlloc(size_t size) { if (size == 0) return NULL; - PRINT_ALLOC("Alloc ", g_allocCount, size, NULL); - #ifdef _SZ_ALLOC_DEBUG + // PRINT_ALLOC("Alloc ", g_allocCount, size, NULL) + #ifdef SZ_ALLOC_DEBUG { void *p = malloc(size); - // PRINT_ALLOC("Alloc ", g_allocCount, size, p); + if (p) + { + PRINT_ALLOC("Alloc ", g_allocCount, size, p) + } return p; } #else @@ -146,33 +186,64 @@ void *MyAlloc(size_t size) void MyFree(void *address) { - PRINT_FREE("Free ", g_allocCount, address); + PRINT_FREE("Free ", g_allocCount, address) free(address); } +void *MyRealloc(void *address, size_t size) +{ + if (size == 0) + { + MyFree(address); + return NULL; + } + // PRINT_REALLOC("Realloc ", g_allocCount, size, address) + #ifdef SZ_ALLOC_DEBUG + { + void *p = realloc(address, size); + if (p) + { + PRINT_REALLOC("Realloc ", g_allocCount, size, address) + } + return p; + } + #else + return realloc(address, size); + #endif +} + + #ifdef _WIN32 void *MidAlloc(size_t size) { if (size == 0) return NULL; - - PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL); - + #ifdef SZ_ALLOC_DEBUG + { + void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + if (p) + { + PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p) + } + return p; + } + #else return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + #endif } void MidFree(void *address) { - PRINT_FREE("Free-Mid", g_allocCountMid, address); + PRINT_FREE("Free-Mid", g_allocCountMid, address) if (!address) return; VirtualFree(address, 0, MEM_RELEASE); } -#ifdef _7ZIP_LARGE_PAGES +#ifdef Z7_LARGE_PAGES #ifdef MEM_LARGE_PAGES #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES @@ -183,34 +254,35 @@ void MidFree(void *address) extern SIZE_T g_LargePageSize; SIZE_T g_LargePageSize = 0; -typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID); +typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID); -#endif // _7ZIP_LARGE_PAGES - -void SetLargePageSize() +void SetLargePageSize(void) { - #ifdef _7ZIP_LARGE_PAGES + #ifdef Z7_LARGE_PAGES SIZE_T size; - GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) - GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); - if (!largePageMinimum) + const + Func_GetLargePageMinimum fn = + (Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetLargePageMinimum"); + if (!fn) return; - size = largePageMinimum(); + size = fn(); if (size == 0 || (size & (size - 1)) != 0) return; g_LargePageSize = size; #endif } +#endif // Z7_LARGE_PAGES void *BigAlloc(size_t size) { if (size == 0) return NULL; - PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL); - - #ifdef _7ZIP_LARGE_PAGES + PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL) + + #ifdef Z7_LARGE_PAGES { SIZE_T ps = g_LargePageSize; if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) @@ -220,38 +292,38 @@ void *BigAlloc(size_t size) size2 = (size + ps) & ~ps; if (size2 >= size) { - void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); - if (res) - return res; + void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); + if (p) + { + PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p) + return p; + } } } } #endif - return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + return MidAlloc(size); } void BigFree(void *address) { - PRINT_FREE("Free-Big", g_allocCountBig, address); - - if (!address) - return; - VirtualFree(address, 0, MEM_RELEASE); + PRINT_FREE("Free-Big", g_allocCountBig, address) + MidFree(address); } -#endif +#endif // _WIN32 -static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } -static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } +static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); } const ISzAlloc g_Alloc = { SzAlloc, SzFree }; #ifdef _WIN32 -static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } -static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } -static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } -static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } +static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); } +static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); } +static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); } +static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); } const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; #endif @@ -334,7 +406,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) void *p; void *pAligned; size_t newSize; - UNUSED_VAR(pp); + UNUSED_VAR(pp) /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned block to prevent cache line sharing with another allocated blocks */ @@ -362,7 +434,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) #else void *p; - UNUSED_VAR(pp); + UNUSED_VAR(pp) if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) return NULL; @@ -377,7 +449,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) static void SzAlignedFree(ISzAllocPtr pp, void *address) { - UNUSED_VAR(pp); + UNUSED_VAR(pp) #ifndef USE_posix_memalign if (address) MyFree(((void **)address)[-1]); @@ -401,7 +473,7 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) { - CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); void *adr; void *pAligned; size_t newSize; @@ -447,7 +519,7 @@ static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) { if (address) { - CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); PrintLn(); Print("- Aligned Free: "); PrintLn(); diff --git a/3rdparty/7z/src/Alloc.h b/3rdparty/7z/src/Alloc.h index 59de107601..a88125d69a 100644 --- a/3rdparty/7z/src/Alloc.h +++ b/3rdparty/7z/src/Alloc.h @@ -1,19 +1,32 @@ /* Alloc.h -- Memory allocation functions -2021-07-13 : Igor Pavlov : Public domain */ +2023-03-04 : Igor Pavlov : Public domain */ -#ifndef __COMMON_ALLOC_H -#define __COMMON_ALLOC_H +#ifndef ZIP7_INC_ALLOC_H +#define ZIP7_INC_ALLOC_H #include "7zTypes.h" EXTERN_C_BEGIN +/* + MyFree(NULL) : is allowed, as free(NULL) + MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL + MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL +MyRealloc() is similar to realloc() for the following cases: + MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr) + MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed + MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed +*/ + void *MyAlloc(size_t size); void MyFree(void *address); +void *MyRealloc(void *address, size_t size); #ifdef _WIN32 +#ifdef Z7_LARGE_PAGES void SetLargePageSize(void); +#endif void *MidAlloc(size_t size); void MidFree(void *address); diff --git a/3rdparty/7z/src/Bcj2.c b/3rdparty/7z/src/Bcj2.c index c1772f2344..c562e47ce8 100644 --- a/3rdparty/7z/src/Bcj2.c +++ b/3rdparty/7z/src/Bcj2.c @@ -1,29 +1,24 @@ /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) -2021-02-09 : Igor Pavlov : Public domain */ +2023-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "Bcj2.h" #include "CpuArch.h" -#define CProb UInt16 - #define kTopValue ((UInt32)1 << 24) -#define kNumModelBits 11 -#define kBitModelTotal (1 << kNumModelBits) +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 -#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound) -#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); -#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits)); +// UInt32 bcj2_stats[256 + 2][2]; void Bcj2Dec_Init(CBcj2Dec *p) { unsigned i; - - p->state = BCJ2_DEC_STATE_OK; + p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK; p->ip = 0; - p->temp[3] = 0; + p->temp = 0; p->range = 0; p->code = 0; for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) @@ -32,217 +27,248 @@ void Bcj2Dec_Init(CBcj2Dec *p) SRes Bcj2Dec_Decode(CBcj2Dec *p) { + UInt32 v = p->temp; + // const Byte *src; if (p->range <= 5) { - p->state = BCJ2_DEC_STATE_OK; + UInt32 code = p->code; + p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */ for (; p->range != 5; p->range++) { - if (p->range == 1 && p->code != 0) + if (p->range == 1 && code != 0) return SZ_ERROR_DATA; - if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) { p->state = BCJ2_STREAM_RC; return SZ_OK; } - - p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + p->code = code; } - - if (p->code == 0xFFFFFFFF) + if (code == 0xffffffff) return SZ_ERROR_DATA; - - p->range = 0xFFFFFFFF; + p->range = 0xffffffff; } - else if (p->state >= BCJ2_DEC_STATE_ORIG_0) + // else { - while (p->state <= BCJ2_DEC_STATE_ORIG_3) + unsigned state = p->state; + // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop + if (BCJ2_IS_32BIT_STREAM(state)) + { + const Byte *cur = p->bufs[state]; + if (cur == p->lims[state]) + return SZ_OK; + p->bufs[state] = cur + 4; + { + const UInt32 ip = p->ip + 4; + v = GetBe32a(cur) - ip; + p->ip = ip; + } + state = BCJ2_DEC_STATE_ORIG_0; + } + if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4) { Byte *dest = p->dest; - if (dest == p->destLim) - return SZ_OK; - *dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0]; - p->state++; - p->dest = dest + 1; - } - } - - /* - if (BCJ2_IS_32BIT_STREAM(p->state)) - { - const Byte *cur = p->bufs[p->state]; - if (cur == p->lims[p->state]) - return SZ_OK; - p->bufs[p->state] = cur + 4; - - { - UInt32 val; - Byte *dest; - SizeT rem; - - p->ip += 4; - val = GetBe32(cur) - p->ip; - dest = p->dest; - rem = p->destLim - dest; - if (rem < 4) + for (;;) { - SizeT i; - SetUi32(p->temp, val); - for (i = 0; i < rem; i++) - dest[i] = p->temp[i]; - p->dest = dest + rem; - p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; - return SZ_OK; + if (dest == p->destLim) + { + p->state = state; + p->temp = v; + return SZ_OK; + } + *dest++ = (Byte)v; + p->dest = dest; + if (++state == BCJ2_DEC_STATE_ORIG_3 + 1) + break; + v >>= 8; } - SetUi32(dest, val); - p->temp[3] = (Byte)(val >> 24); - p->dest = dest + 4; - p->state = BCJ2_DEC_STATE_OK; } } - */ + // src = p->bufs[BCJ2_STREAM_MAIN]; for (;;) { + /* if (BCJ2_IS_32BIT_STREAM(p->state)) p->state = BCJ2_DEC_STATE_OK; else + */ { if (p->range < kTopValue) { if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) { p->state = BCJ2_STREAM_RC; + p->temp = v; return SZ_OK; } p->range <<= 8; p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; } - { const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; const Byte *srcLim; - Byte *dest; - SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); - - if (num == 0) + Byte *dest = p->dest; { - p->state = BCJ2_STREAM_MAIN; - return SZ_OK; + const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); + SizeT num = (SizeT)(p->destLim - dest); + if (num >= rem) + num = rem; + #define NUM_ITERS 4 + #if (NUM_ITERS & (NUM_ITERS - 1)) == 0 + num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x)) + #else + num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x)) + #endif + srcLim = src + num; } - - dest = p->dest; - if (num > (SizeT)(p->destLim - dest)) + + #define NUM_SHIFT_BITS 24 + #define ONE_ITER(indx) { \ + const unsigned b = src[indx]; \ + *dest++ = (Byte)b; \ + v = (v << NUM_SHIFT_BITS) | b; \ + if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ + if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ + ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ + /* ++dest */; /* v = b; */ } + + if (src != srcLim) + for (;;) { - num = (SizeT)(p->destLim - dest); - if (num == 0) + /* The dependency chain of 2-cycle for (v) calculation is not big problem here. + But we can remove dependency chain with v = b in the end of loop. */ + ONE_ITER(0) + #if (NUM_ITERS > 1) + ONE_ITER(1) + #if (NUM_ITERS > 2) + ONE_ITER(2) + #if (NUM_ITERS > 3) + ONE_ITER(3) + #if (NUM_ITERS > 4) + ONE_ITER(4) + #if (NUM_ITERS > 5) + ONE_ITER(5) + #if (NUM_ITERS > 6) + ONE_ITER(6) + #if (NUM_ITERS > 7) + ONE_ITER(7) + #endif + #endif + #endif + #endif + #endif + #endif + #endif + + src += NUM_ITERS; + if (src == srcLim) + break; + } + + if (src == srcLim) + #if (NUM_ITERS > 1) + for (;;) + #endif + { + #if (NUM_ITERS > 1) + if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim) + #endif { - p->state = BCJ2_DEC_STATE_ORIG; + const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); + p->bufs[BCJ2_STREAM_MAIN] = src; + p->dest = dest; + p->ip += (UInt32)num; + /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ + p->state = + src == p->lims[BCJ2_STREAM_MAIN] ? + (unsigned)BCJ2_STREAM_MAIN : + (unsigned)BCJ2_DEC_STATE_ORIG; + p->temp = v; return SZ_OK; } + #if (NUM_ITERS > 1) + ONE_ITER(0) + src++; + #endif } - - srcLim = src + num; - if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80) - *dest = src[0]; - else for (;;) { - Byte b = *src; - *dest = b; - if (b != 0x0F) - { - if ((b & 0xFE) == 0xE8) - break; - dest++; - if (++src != srcLim) - continue; - break; - } - dest++; - if (++src == srcLim) - break; - if ((*src & 0xF0) != 0x80) - continue; - *dest = *src; - break; - } - - num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); - - if (src == srcLim) - { - p->temp[3] = src[-1]; - p->bufs[BCJ2_STREAM_MAIN] = src; + const SizeT num = (SizeT)(dest - p->dest); + p->dest = dest; // p->dest += num; + p->bufs[BCJ2_STREAM_MAIN] += num; // = src; p->ip += (UInt32)num; - p->dest += num; - p->state = - p->bufs[BCJ2_STREAM_MAIN] == - p->lims[BCJ2_STREAM_MAIN] ? - (unsigned)BCJ2_STREAM_MAIN : - (unsigned)BCJ2_DEC_STATE_ORIG; - return SZ_OK; } - { UInt32 bound, ttt; - CProb *prob; - Byte b = src[0]; - Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]); - - p->temp[3] = b; - p->bufs[BCJ2_STREAM_MAIN] = src + 1; - num++; - p->ip += (UInt32)num; - p->dest += num; - - prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0)); - - _IF_BIT_0 + CBcj2Prob *prob; // unsigned index; + /* + prob = p->probs + (unsigned)((Byte)v == 0xe8 ? + 2 + (Byte)(v >> 8) : + ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1)); + */ { - _UPDATE_0 + const unsigned c = ((v + 0x17) >> 6) & 1; + prob = p->probs + (unsigned) + (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); + // (Byte) + // 8x->0 : e9->1 : xxe8->xx+2 + // 8x->0x100 : e9->0x101 : xxe8->xx + // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v)); + // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v)); + } + ttt = *prob; + bound = (p->range >> kNumBitModelTotalBits) * ttt; + if (p->code < bound) + { + // bcj2_stats[prob - p->probs][0]++; + p->range = bound; + *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); continue; } - _UPDATE_1 - + { + // bcj2_stats[prob - p->probs][1]++; + p->range -= bound; + p->code -= bound; + *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); + } } } } - { - UInt32 val; - unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; + /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */ + // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; + const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; const Byte *cur = p->bufs[cj]; Byte *dest; SizeT rem; - if (cur == p->lims[cj]) { p->state = cj; break; } - - val = GetBe32(cur); + v = GetBe32a(cur); p->bufs[cj] = cur + 4; - - p->ip += 4; - val -= p->ip; + { + const UInt32 ip = p->ip + 4; + v -= ip; + p->ip = ip; + } dest = p->dest; rem = (SizeT)(p->destLim - dest); - if (rem < 4) { - p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8; - p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8; - p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8; - p->temp[3] = (Byte)val; + if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8; + if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8; + if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}} + p->temp = v; p->dest = dest + rem; p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; break; } - - SetUi32(dest, val); - p->temp[3] = (Byte)(val >> 24); + SetUi32(dest, v) + v >>= 24; p->dest = dest + 4; } } @@ -252,6 +278,13 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) p->range <<= 8; p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; } - return SZ_OK; } + +#undef NUM_ITERS +#undef ONE_ITER +#undef NUM_SHIFT_BITS +#undef kTopValue +#undef kNumBitModelTotalBits +#undef kBitModelTotal +#undef kNumMoveBits diff --git a/3rdparty/7z/src/Bcj2.h b/3rdparty/7z/src/Bcj2.h index 68893d2d12..2eda0c820f 100644 --- a/3rdparty/7z/src/Bcj2.h +++ b/3rdparty/7z/src/Bcj2.h @@ -1,8 +1,8 @@ -/* Bcj2.h -- BCJ2 Converter for x86 code -2014-11-10 : Igor Pavlov : Public domain */ +/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2) +2023-03-02 : Igor Pavlov : Public domain */ -#ifndef __BCJ2_H -#define __BCJ2_H +#ifndef ZIP7_INC_BCJ2_H +#define ZIP7_INC_BCJ2_H #include "7zTypes.h" @@ -26,37 +26,68 @@ enum BCJ2_DEC_STATE_ORIG_3, BCJ2_DEC_STATE_ORIG, - BCJ2_DEC_STATE_OK + BCJ2_DEC_STATE_ERROR /* after detected data error */ }; enum { BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS, - BCJ2_ENC_STATE_OK + BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */ }; -#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) +/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */ +#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2) /* CBcj2Dec / CBcj2Enc bufs sizes: BUF_SIZE(n) = lims[n] - bufs[n] -bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4: +bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4: (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0 (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0 */ +// typedef UInt32 CBcj2Prob; +typedef UInt16 CBcj2Prob; + +/* +BCJ2 encoder / decoder internal requirements: + - If last bytes of stream contain marker (e8/e8/0f8x), then + there is also encoded symbol (0 : no conversion) in RC stream. + - One case of overlapped instructions is supported, + if last byte of converted instruction is (0f) and next byte is (8x): + marker [xx xx xx 0f] 8x + then the pair (0f 8x) is treated as marker. +*/ + +/* ---------- BCJ2 Decoder ---------- */ + /* CBcj2Dec: -dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions: +(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions: bufs[BCJ2_STREAM_MAIN] >= dest && - bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv + + bufs[BCJ2_STREAM_MAIN] - dest >= BUF_SIZE(BCJ2_STREAM_CALL) + BUF_SIZE(BCJ2_STREAM_JUMP) - tempReserv = 0 : for first call of Bcj2Dec_Decode - tempReserv = 4 : for any other calls of Bcj2Dec_Decode - overlap with offset = 1 is not allowed + reserve = bufs[BCJ2_STREAM_MAIN] - dest - + ( BUF_SIZE(BCJ2_STREAM_CALL) + + BUF_SIZE(BCJ2_STREAM_JUMP) ) + and additional conditions: + if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init()) + { + (reserve != 1) : if (ver < v23.00) + } + else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init()) + { + (reserve >= 6) : if (ver < v23.00) + (reserve >= 4) : if (ver >= v23.00) + We need that (reserve) because after first call of Bcj2Dec_Decode(), + CBcj2Dec::temp can contain up to 4 bytes for writing to (dest). + } + (reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode(). + (reserve == 0) also is allowed in case of multi-call, if we use fixed buffers, + and (reserve) is calculated from full (final) sizes of all streams before first call. */ typedef struct @@ -68,22 +99,66 @@ typedef struct unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ - UInt32 ip; - Byte temp[4]; + UInt32 ip; /* property of starting base for decoding */ + UInt32 temp; /* Byte temp[4]; */ UInt32 range; UInt32 code; - UInt16 probs[2 + 256]; + CBcj2Prob probs[2 + 256]; } CBcj2Dec; + +/* Note: + Bcj2Dec_Init() sets (CBcj2Dec::ip = 0) + if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init() +*/ void Bcj2Dec_Init(CBcj2Dec *p); -/* Returns: SZ_OK or SZ_ERROR_DATA */ + +/* Bcj2Dec_Decode(): + returns: + SZ_OK + SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct +*/ SRes Bcj2Dec_Decode(CBcj2Dec *p); -#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0) +/* To check that decoding was finished you can compare + sizes of processed streams with sizes known from another sources. + You must do at least one mandatory check from the two following options: + - the check for size of processed output (ORIG) stream. + - the check for size of processed input (MAIN) stream. + additional optional checks: + - the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC) + - the checks Bcj2Dec_IsMaybeFinished*() + also before actual decoding you can check that the + following condition is met for stream sizes: + ( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) ) +*/ + +/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for + additional input data in BCJ2_STREAM_MAIN stream. + Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding. +*/ +#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN) + +/* if the stream decoding was finished correctly, then range decoder + part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0). + Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding. +*/ +#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0) + +/* use Bcj2Dec_IsMaybeFinished() only as additional check + after at least one mandatory check from the two following options: + - the check for size of processed output (ORIG) stream. + - the check for size of processed input (MAIN) stream. +*/ +#define Bcj2Dec_IsMaybeFinished(_p_) ( \ + Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \ + Bcj2Dec_IsMaybeFinished_code(_p_)) +/* ---------- BCJ2 Encoder ---------- */ + typedef enum { BCJ2_ENC_FINISH_MODE_CONTINUE, @@ -91,6 +166,91 @@ typedef enum BCJ2_ENC_FINISH_MODE_END_STREAM } EBcj2Enc_FinishMode; +/* + BCJ2_ENC_FINISH_MODE_CONTINUE: + process non finished encoding. + It notifies the encoder that additional further calls + can provide more input data (src) than provided by current call. + In that case the CBcj2Enc encoder still can move (src) pointer + up to (srcLim), but CBcj2Enc encoder can store some of the last + processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer. + at return: + (CBcj2Enc::src will point to position that includes + processed data and data copied to (temp[]) buffer) + That data from (temp[]) buffer will be used in further calls. + + BCJ2_ENC_FINISH_MODE_END_BLOCK: + finish encoding of current block (ended at srcLim) without RC flushing. + at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) && + CBcj2Enc::src == CBcj2Enc::srcLim) + : it shows that block encoding was finished. And the encoder is + ready for new (src) data or for stream finish operation. + finished block means + { + CBcj2Enc has completed block encoding up to (srcLim). + (1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will + not cross block boundary at (srcLim). + temporary CBcj2Enc buffer for (ORIG) src data is empty. + 3 output uncompressed streams (MAIN, CALL, JUMP) were flushed. + RC stream was not flushed. And RC stream will cross block boundary. + } + Note: some possible implementation of BCJ2 encoder could + write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(), + and it could calculate symbol for RC in another call of Bcj2Enc_Encode(). + BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol. + And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls. + So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK + to ensure that RC symbol is calculated and written in proper block. + + BCJ2_ENC_FINISH_MODE_END_STREAM + finish encoding of stream (ended at srcLim) fully including RC flushing. + at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED) + : it shows that stream encoding was finished fully, + and all output streams were flushed fully. + also Bcj2Enc_IsFinished() can be called. +*/ + + +/* + 32-bit relative offset in JUMP/CALL commands is + - (mod 4 GiB) for 32-bit x86 code + - signed Int32 for 64-bit x86-64 code + BCJ2 encoder also does internal relative to absolute address conversions. + And there are 2 possible ways to do it: + before v23: we used 32-bit variables and (mod 4 GiB) conversion + since v23: we use 64-bit variables and (signed Int32 offset) conversion. + The absolute address condition for conversion in v23: + ((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64) + note that if (fileSize64 > 2 GiB). there is difference between + old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23). + And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases. +*/ + +/* +// for old (v22) way for conversion: +typedef UInt32 CBcj2Enc_ip_unsigned; +typedef Int32 CBcj2Enc_ip_signed; +#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31) +*/ +typedef UInt64 CBcj2Enc_ip_unsigned; +typedef Int64 CBcj2Enc_ip_signed; + +/* maximum size of file that can be used for conversion condition */ +#define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2) + +/* default value of fileSize64_minus1 variable that means + that absolute address limitation will not be used */ +#define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1) + +/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */ +#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \ + ((CBcj2Enc_ip_unsigned)(fileSize) - 1) + +/* set CBcj2Enc::fileSize64_minus1 variable from size of file */ +#define Bcj2Enc_SET_FileSize(p, fileSize) \ + (p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize); + + typedef struct { Byte *bufs[BCJ2_NUM_STREAMS]; @@ -101,45 +261,71 @@ typedef struct unsigned state; EBcj2Enc_FinishMode finishMode; - Byte prevByte; + Byte context; + Byte flushRem; + Byte isFlushState; Byte cache; UInt32 range; UInt64 low; UInt64 cacheSize; + + // UInt32 context; // for marker version, it can include marker flag. - UInt32 ip; - - /* 32-bit ralative offset in JUMP/CALL commands is - - (mod 4 GB) in 32-bit mode - - signed Int32 in 64-bit mode - We use (mod 4 GB) check for fileSize. - Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */ - UInt32 fileIp; - UInt32 fileSize; /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */ - UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */ + /* (ip64) and (fileIp64) correspond to virtual source stream position + that doesn't include data in temp[] */ + CBcj2Enc_ip_unsigned ip64; /* current (ip) position */ + CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */ + CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */ + UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */ + // UInt32 relatExcludeBits; UInt32 tempTarget; - unsigned tempPos; - Byte temp[4 * 2]; - - unsigned flushPos; - - UInt16 probs[2 + 256]; + unsigned tempPos; /* the number of bytes that were copied to temp[] buffer + (tempPos <= 4) outside of Bcj2Enc_Encode() */ + // Byte temp[4]; // for marker version + Byte temp[8]; + CBcj2Prob probs[2 + 256]; } CBcj2Enc; void Bcj2Enc_Init(CBcj2Enc *p); + + +/* +Bcj2Enc_Encode(): at exit: + p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream + (bufs[p->State] == lims[p->State]) + p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream + (src == srcLim) + p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream +*/ void Bcj2Enc_Encode(CBcj2Enc *p); -#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos) -#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5) +/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream. + CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer. + (CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after + fully processed data and after data copied to temp buffer. + So if the caller needs to get real number of fully processed input + bytes (without look ahead data in temp buffer), + the caller must subtruct (CBcj2Enc::tempPos) value from processed size + value that is calculated based on current (CBcj2Enc::src): + cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) - + Bcj2Enc_Get_AvailInputSize_in_Temp(&enc); +*/ +/* get the size of input data that was stored in temp[] buffer: */ +#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos) +#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0) -#define BCJ2_RELAT_LIMIT_NUM_BITS 26 -#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS) - -/* limit for CBcj2Enc::fileSize variable */ -#define BCJ2_FileSize_MAX ((UInt32)1 << 31) +/* Note : the decoder supports overlapping of marker (0f 80). + But we can eliminate such overlapping cases by setting + the limit for relative offset conversion as + CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB) +*/ +/* default value for CBcj2Enc::relatLimit */ +#define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24) +#define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31) +// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5 EXTERN_C_END diff --git a/3rdparty/7z/src/Bcj2Enc.c b/3rdparty/7z/src/Bcj2Enc.c index 71ac5091d5..53da590351 100644 --- a/3rdparty/7z/src/Bcj2Enc.c +++ b/3rdparty/7z/src/Bcj2Enc.c @@ -1,60 +1,62 @@ -/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code) -2021-02-09 : Igor Pavlov : Public domain */ +/* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2) +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" /* #define SHOW_STAT */ - #ifdef SHOW_STAT #include -#define PRF(x) x +#define PRF2(s) printf("%s ip=%8x tempPos=%d src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src)); #else -#define PRF(x) +#define PRF2(s) #endif -#include - #include "Bcj2.h" #include "CpuArch.h" -#define CProb UInt16 - #define kTopValue ((UInt32)1 << 24) -#define kNumModelBits 11 -#define kBitModelTotal (1 << kNumModelBits) +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 void Bcj2Enc_Init(CBcj2Enc *p) { unsigned i; - - p->state = BCJ2_ENC_STATE_OK; + p->state = BCJ2_ENC_STATE_ORIG; p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; - - p->prevByte = 0; - + p->context = 0; + p->flushRem = 5; + p->isFlushState = 0; p->cache = 0; - p->range = 0xFFFFFFFF; + p->range = 0xffffffff; p->low = 0; p->cacheSize = 1; - - p->ip = 0; - - p->fileIp = 0; - p->fileSize = 0; - p->relatLimit = BCJ2_RELAT_LIMIT; - + p->ip64 = 0; + p->fileIp64 = 0; + p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED; + p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT; + // p->relatExcludeBits = 0; p->tempPos = 0; - - p->flushPos = 0; - for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) p->probs[i] = kBitModelTotal >> 1; } -static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p) +// Z7_NO_INLINE +Z7_FORCE_INLINE +static BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p) { - if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0) + const UInt32 low = (UInt32)p->low; + const unsigned high = (unsigned) + #if defined(Z7_MSC_VER_ORIGINAL) \ + && defined(MY_CPU_X86) \ + && defined(MY_CPU_LE) \ + && !defined(MY_CPU_64BIT) + // we try to rid of __aullshr() call in MSVS-x86 + (((const UInt32 *)&p->low)[1]); // [1] : for little-endian only + #else + (p->low >> 32); + #endif + if (low < (UInt32)0xff000000 || high != 0) { Byte *buf = p->bufs[BCJ2_STREAM_RC]; do @@ -65,247 +67,440 @@ static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p) p->bufs[BCJ2_STREAM_RC] = buf; return True; } - *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32)); - p->cache = 0xFF; + *buf++ = (Byte)(p->cache + high); + p->cache = 0xff; } while (--p->cacheSize); p->bufs[BCJ2_STREAM_RC] = buf; - p->cache = (Byte)((UInt32)p->low >> 24); + p->cache = (Byte)(low >> 24); } p->cacheSize++; - p->low = (UInt32)p->low << 8; + p->low = low << 8; return False; } + +/* +We can use 2 alternative versions of code: +1) non-marker version: + Byte CBcj2Enc::context + Byte temp[8]; + Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer. + Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction + with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call. + +2) marker version: + UInt32 CBcj2Enc::context + Byte CBcj2Enc::temp[4]; + MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker. + it's allowed that + one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest, + and another call of Bcj2Enc_Encode_2() does offset conversion. + So different values of (fileIp) and (fileSize) are possible + in these different Bcj2Enc_Encode_2() calls. + +Also marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop. +So we use non-marker version. +*/ + +/* + Corner cases with overlap in multi-block. + before v23: there was one corner case, where converted instruction + could start in one sub-stream and finish in next sub-stream. + If multi-block (solid) encoding is used, + and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream. + and (0f) is last byte of previous sub-stream + and (8x) is first byte of current sub-stream + then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder. + BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage, + if that offset meets limit requirements. + If encoder allows 32-bit offset conversion for such overlap case, + then the data in 3 uncompressed BCJ2 streams for some sub-stream + can depend from data of previous sub-stream. + That corner case is not big problem, and it's rare case. + Since v23.00 we do additional check to prevent conversions in such overlap cases. +*/ + +/* + Bcj2Enc_Encode_2() output variables at exit: + { + if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG)) + { + it means that encoder needs more input data. + if (p->srcLim == p->src) at exit, then + { + (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) + all input data were read and processed, and we are ready for + new input data. + } + else + { + (p->srcLim != p->src) + (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) + The encoder have found e8/e9/0f_8x marker, + and p->src points to last byte of that marker, + Bcj2Enc_Encode_2() needs more input data to get totally + 5 bytes (last byte of marker and 32-bit branch offset) + as continuous array starting from p->src. + (p->srcLim - p->src < 5) requirement is met after exit. + So non-processed resedue from p->src to p->srcLim is always less than 5 bytes. + } + } + } +*/ + +Z7_NO_INLINE static void Bcj2Enc_Encode_2(CBcj2Enc *p) { - if (BCJ2_IS_32BIT_STREAM(p->state)) + if (!p->isFlushState) { - Byte *cur = p->bufs[p->state]; - if (cur == p->lims[p->state]) - return; - SetBe32(cur, p->tempTarget); - p->bufs[p->state] = cur + 4; - } - - p->state = BCJ2_ENC_STATE_ORIG; - - for (;;) - { - if (p->range < kTopValue) - { - if (RangeEnc_ShiftLow(p)) - return; - p->range <<= 8; - } - + const Byte *src; + UInt32 v; { + const unsigned state = p->state; + if (BCJ2_IS_32BIT_STREAM(state)) + { + Byte *cur = p->bufs[state]; + if (cur == p->lims[state]) + return; + SetBe32a(cur, p->tempTarget) + p->bufs[state] = cur + 4; + } + } + p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit + src = p->src; + v = p->context; + + // #define WRITE_CONTEXT p->context = v; // for marker version + #define WRITE_CONTEXT p->context = (Byte)v; + #define WRITE_CONTEXT_AND_SRC p->src = src; WRITE_CONTEXT + + for (;;) + { + // const Byte *src; + // UInt32 v; + CBcj2Enc_ip_unsigned ip; + if (p->range < kTopValue) + { + // to reduce register pressure and code size: we save and restore local variables. + WRITE_CONTEXT_AND_SRC + if (Bcj2_RangeEnc_ShiftLow(p)) + return; + p->range <<= 8; + src = p->src; + v = p->context; + } + // src = p->src; + // #define MARKER_FLAG ((UInt32)1 << 17) + // if ((v & MARKER_FLAG) == 0) // for marker version { - const Byte *src = p->src; const Byte *srcLim; - Byte *dest; - SizeT num = (SizeT)(p->srcLim - src); - - if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) + Byte *dest = p->bufs[BCJ2_STREAM_MAIN]; { - if (num <= 4) - return; - num -= 4; + const SizeT remSrc = (SizeT)(p->srcLim - src); + SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); + if (rem >= remSrc) + rem = remSrc; + srcLim = src + rem; } - else if (num == 0) - break; + /* p->context contains context of previous byte: + bits [0 : 7] : src[-1], if (src) was changed in this call + bits [8 : 31] : are undefined for non-marker version + */ + // v = p->context; + #define NUM_SHIFT_BITS 24 + #define CONV_FLAG ((UInt32)1 << 16) + #define ONE_ITER { \ + b = src[0]; \ + *dest++ = (Byte)b; \ + v = (v << NUM_SHIFT_BITS) | b; \ + if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ + if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ + ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ + src++; if (src == srcLim) { break; } } - dest = p->bufs[BCJ2_STREAM_MAIN]; - if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest)) + if (src != srcLim) + for (;;) { - num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); - if (num == 0) + /* clang can generate ineffective code with setne instead of two jcc instructions. + we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */ + unsigned b; + ONE_ITER + ONE_ITER + } + + ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]); + p->bufs[BCJ2_STREAM_MAIN] = dest; + p->ip64 = ip; + + if (src == srcLim) + { + WRITE_CONTEXT_AND_SRC + if (src != p->srcLim) { p->state = BCJ2_STREAM_MAIN; return; } - } - - srcLim = src + num; - - if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80) - *dest = src[0]; - else for (;;) - { - Byte b = *src; - *dest = b; - if (b != 0x0F) - { - if ((b & 0xFE) == 0xE8) - break; - dest++; - if (++src != srcLim) - continue; - break; - } - dest++; - if (++src == srcLim) - break; - if ((*src & 0xF0) != 0x80) - continue; - *dest = *src; + /* (p->src == p->srcLim) + (p->state == BCJ2_ENC_STATE_ORIG) */ + if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) + return; + /* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */ + // (p->flushRem == 5); + p->isFlushState = 1; break; } - - num = (SizeT)(src - p->src); - - if (src == srcLim) + src++; + // p->src = src; + } + // ip = p->ip; // for marker version + /* marker was found */ + /* (v) contains marker that was found: + bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7] + : value of src[-2] : xx/xx/0f + bits [0 : 7] : value of src[-1] : e8/e9/8x + */ + { { - p->prevByte = src[-1]; - p->bufs[BCJ2_STREAM_MAIN] = dest; - p->src = src; - p->ip += (UInt32)num; - continue; - } - - { - Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]); - BoolInt needConvert; - - p->bufs[BCJ2_STREAM_MAIN] = dest + 1; - p->ip += (UInt32)num + 1; - src++; - - needConvert = False; - + #if NUM_SHIFT_BITS != 24 + v &= ~(UInt32)CONV_FLAG; + #endif + // UInt32 relat = 0; if ((SizeT)(p->srcLim - src) >= 4) { - UInt32 relatVal = GetUi32(src); - if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize) - && ((relatVal + p->relatLimit) >> 1) < p->relatLimit) - needConvert = True; - } - - { - UInt32 bound; - unsigned ttt; - Byte b = src[-1]; - CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0)); - - ttt = *prob; - bound = (p->range >> kNumModelBits) * ttt; - - if (!needConvert) + /* + if (relat != 0 || (Byte)v != 0xe8) + BoolInt isBigOffset = True; + */ + const UInt32 relat = GetUi32(src); + /* + #define EXCLUDE_FLAG ((UInt32)1 << 4) + #define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0) + if (p->relatExcludeBits != 0) { + const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1); + isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0); + } + // isBigOffset = False; // for debug + */ + ip -= p->fileIp64; + // Use the following if check, if (ip) is 64-bit: + if (ip > (((v + 0x20) >> 5) & 1)) // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9) + if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1) + if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit) + v |= CONV_FLAG; + } + else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) + { + // (p->srcLim - src < 4) + // /* + // for non-marker version + p->ip64--; // p->ip = ip - 1; + p->bufs[BCJ2_STREAM_MAIN]--; + src--; + v >>= NUM_SHIFT_BITS; + // (0 < p->srcLim - p->src <= 4) + // */ + // v |= MARKER_FLAG; // for marker version + /* (p->state == BCJ2_ENC_STATE_ORIG) */ + WRITE_CONTEXT_AND_SRC + return; + } + { + const unsigned c = ((v + 0x17) >> 6) & 1; + CBcj2Prob *prob = p->probs + (unsigned) + (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); + /* + ((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) : + ((Byte)v < 0xe8 ? 0 : 1)); // ((v >> 5) & 1)); + */ + const unsigned ttt = *prob; + const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt; + if ((v & CONV_FLAG) == 0) + { + // static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy); + // v = (Byte)v; // for marker version p->range = bound; - *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); - p->src = src; - p->prevByte = b; + *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + // WRITE_CONTEXT_AND_SRC continue; } - p->low += bound; p->range -= bound; - *prob = (CProb)(ttt - (ttt >> kNumMoveBits)); - + *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); + } + // p->context = src[3]; + { + // const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP); + const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; + ip = p->ip64; + v = GetUi32(src); // relat + ip += 4; + p->ip64 = ip; + src += 4; + // p->src = src; { - UInt32 relatVal = GetUi32(src); - UInt32 absVal; - p->ip += 4; - absVal = p->ip + relatVal; - p->prevByte = src[3]; - src += 4; - p->src = src; + const UInt32 absol = (UInt32)ip + v; + Byte *cur = p->bufs[cj]; + v >>= 24; + // WRITE_CONTEXT + if (cur == p->lims[cj]) { - unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; - Byte *cur = p->bufs[cj]; - if (cur == p->lims[cj]) - { - p->state = cj; - p->tempTarget = absVal; - return; - } - SetBe32(cur, absVal); - p->bufs[cj] = cur + 4; + p->state = cj; + p->tempTarget = absol; + WRITE_CONTEXT_AND_SRC + return; } + SetBe32a(cur, absol) + p->bufs[cj] = cur + 4; } } } } - } + } // end of loop } - if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) - return; - - for (; p->flushPos < 5; p->flushPos++) - if (RangeEnc_ShiftLow(p)) + for (; p->flushRem != 0; p->flushRem--) + if (Bcj2_RangeEnc_ShiftLow(p)) return; - p->state = BCJ2_ENC_STATE_OK; + p->state = BCJ2_ENC_STATE_FINISHED; } +/* +BCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer. +So base function Bcj2Enc_Encode_2() + in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with + (p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim) +Bcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer. + so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(), + then (p->src == p->srcLim). + And the caller's code is simpler with Bcj2Enc_Encode(). +*/ + +Z7_NO_INLINE void Bcj2Enc_Encode(CBcj2Enc *p) { - PRF(printf("\n")); - PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); - + PRF2("\n----") if (p->tempPos != 0) { + /* extra: number of bytes that were copied from (src) to (temp) buffer in this call */ unsigned extra = 0; - + /* We will touch only minimal required number of bytes in input (src) stream. + So we will add input bytes from (src) stream to temp[] with step of 1 byte. + We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call + in first loop iteration because + - previous call of Bcj2Enc_Encode() could use another (finishMode), + - previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG). + the case with full temp[] buffer (p->tempPos == 4) is possible here. + */ for (;;) { + // (0 < p->tempPos <= 5) // in non-marker version + /* p->src : the current src data position including extra bytes + that were copied to temp[] buffer in this call */ const Byte *src = p->src; const Byte *srcLim = p->srcLim; - EBcj2Enc_FinishMode finishMode = p->finishMode; - + const EBcj2Enc_FinishMode finishMode = p->finishMode; + if (src != srcLim) + { + /* if there are some src data after the data copied to temp[], + then we use MODE_CONTINUE for temp data */ + p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; + } p->src = p->temp; p->srcLim = p->temp + p->tempPos; - if (src != srcLim) - p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; - - PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); - + PRF2(" ") Bcj2Enc_Encode_2(p); - { - unsigned num = (unsigned)(p->src - p->temp); - unsigned tempPos = p->tempPos - num; + const unsigned num = (unsigned)(p->src - p->temp); + const unsigned tempPos = p->tempPos - num; unsigned i; p->tempPos = tempPos; for (i = 0; i < tempPos; i++) - p->temp[i] = p->temp[(size_t)i + num]; - + p->temp[i] = p->temp[(SizeT)i + num]; + // tempPos : number of bytes in temp buffer p->src = src; p->srcLim = srcLim; p->finishMode = finishMode; - - if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim) + if (p->state != BCJ2_ENC_STATE_ORIG) + { + // (p->tempPos <= 4) // in non-marker version + /* if (the reason of exit from Bcj2Enc_Encode_2() + is not BCJ2_ENC_STATE_ORIG), + then we exit from Bcj2Enc_Encode() with same reason */ + // optional code begin : we rollback (src) and tempPos, if it's possible: + if (extra >= tempPos) + extra = tempPos; + p->src = src - extra; + p->tempPos = tempPos - extra; + // optional code end : rollback of (src) and tempPos return; - + } + /* (p->tempPos <= 4) + (p->state == BCJ2_ENC_STATE_ORIG) + so encoder needs more data than in temp[] */ + if (src == srcLim) + return; // src buffer has no more input data. + /* (src != srcLim) + so we can provide more input data from src for Bcj2Enc_Encode_2() */ if (extra >= tempPos) { - p->src = src - tempPos; + /* (extra >= tempPos) means that temp buffer contains + only data from src buffer of this call. + So now we can encode without temp buffer */ + p->src = src - tempPos; // rollback (src) p->tempPos = 0; break; } - - p->temp[tempPos] = src[0]; + // we append one additional extra byte from (src) to temp[] buffer: + p->temp[tempPos] = *src; p->tempPos = tempPos + 1; + // (0 < p->tempPos <= 5) // in non-marker version p->src = src + 1; extra++; } } } - PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); - + PRF2("++++") + // (p->tempPos == 0) Bcj2Enc_Encode_2(p); + PRF2("====") if (p->state == BCJ2_ENC_STATE_ORIG) { const Byte *src = p->src; - unsigned rem = (unsigned)(p->srcLim - src); - unsigned i; - for (i = 0; i < rem; i++) - p->temp[i] = src[i]; - p->tempPos = rem; - p->src = src + rem; + const Byte *srcLim = p->srcLim; + const unsigned rem = (unsigned)(srcLim - src); + /* (rem <= 4) here. + if (p->src != p->srcLim), then + - we copy non-processed bytes from (p->src) to temp[] buffer, + - we set p->src equal to p->srcLim. + */ + if (rem) + { + unsigned i = 0; + p->src = srcLim; + p->tempPos = rem; + // (0 < p->tempPos <= 4) + do + p->temp[i] = src[i]; + while (++i != rem); + } + // (p->tempPos <= 4) + // (p->src == p->srcLim) } } + +#undef PRF2 +#undef CONV_FLAG +#undef MARKER_FLAG +#undef WRITE_CONTEXT +#undef WRITE_CONTEXT_AND_SRC +#undef ONE_ITER +#undef NUM_SHIFT_BITS +#undef kTopValue +#undef kNumBitModelTotalBits +#undef kBitModelTotal +#undef kNumMoveBits diff --git a/3rdparty/7z/src/Blake2.h b/3rdparty/7z/src/Blake2.h new file mode 100644 index 0000000000..723523568a --- /dev/null +++ b/3rdparty/7z/src/Blake2.h @@ -0,0 +1,48 @@ +/* Blake2.h -- BLAKE2 Hash +2023-03-04 : Igor Pavlov : Public domain +2015 : Samuel Neves : Public domain */ + +#ifndef ZIP7_INC_BLAKE2_H +#define ZIP7_INC_BLAKE2_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define BLAKE2S_BLOCK_SIZE 64 +#define BLAKE2S_DIGEST_SIZE 32 +#define BLAKE2SP_PARALLEL_DEGREE 8 + +typedef struct +{ + UInt32 h[8]; + UInt32 t[2]; + UInt32 f[2]; + Byte buf[BLAKE2S_BLOCK_SIZE]; + UInt32 bufPos; + UInt32 lastNode_f1; + UInt32 dummy[2]; /* for sizeof(CBlake2s) alignment */ +} CBlake2s; + +/* You need to xor CBlake2s::h[i] with input parameter block after Blake2s_Init0() */ +/* +void Blake2s_Init0(CBlake2s *p); +void Blake2s_Update(CBlake2s *p, const Byte *data, size_t size); +void Blake2s_Final(CBlake2s *p, Byte *digest); +*/ + + +typedef struct +{ + CBlake2s S[BLAKE2SP_PARALLEL_DEGREE]; + unsigned bufPos; +} CBlake2sp; + + +void Blake2sp_Init(CBlake2sp *p); +void Blake2sp_Update(CBlake2sp *p, const Byte *data, size_t size); +void Blake2sp_Final(CBlake2sp *p, Byte *digest); + +EXTERN_C_END + +#endif diff --git a/3rdparty/7z/src/Blake2s.c b/3rdparty/7z/src/Blake2s.c new file mode 100644 index 0000000000..2a84b57a42 --- /dev/null +++ b/3rdparty/7z/src/Blake2s.c @@ -0,0 +1,250 @@ +/* Blake2s.c -- BLAKE2s and BLAKE2sp Hash +2023-03-04 : Igor Pavlov : Public domain +2015 : Samuel Neves : Public domain */ + +#include "Precomp.h" + +#include + +#include "Blake2.h" +#include "CpuArch.h" +#include "RotateDefs.h" + +#define rotr32 rotrFixed + +#define BLAKE2S_NUM_ROUNDS 10 +#define BLAKE2S_FINAL_FLAG (~(UInt32)0) + +static const UInt32 k_Blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const Byte k_Blake2s_Sigma[BLAKE2S_NUM_ROUNDS][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + + +static void Blake2s_Init0(CBlake2s *p) +{ + unsigned i; + for (i = 0; i < 8; i++) + p->h[i] = k_Blake2s_IV[i]; + p->t[0] = 0; + p->t[1] = 0; + p->f[0] = 0; + p->f[1] = 0; + p->bufPos = 0; + p->lastNode_f1 = 0; +} + + +static void Blake2s_Compress(CBlake2s *p) +{ + UInt32 m[16]; + UInt32 v[16]; + + { + unsigned i; + + for (i = 0; i < 16; i++) + m[i] = GetUi32(p->buf + i * sizeof(m[i])); + + for (i = 0; i < 8; i++) + v[i] = p->h[i]; + } + + v[ 8] = k_Blake2s_IV[0]; + v[ 9] = k_Blake2s_IV[1]; + v[10] = k_Blake2s_IV[2]; + v[11] = k_Blake2s_IV[3]; + + v[12] = p->t[0] ^ k_Blake2s_IV[4]; + v[13] = p->t[1] ^ k_Blake2s_IV[5]; + v[14] = p->f[0] ^ k_Blake2s_IV[6]; + v[15] = p->f[1] ^ k_Blake2s_IV[7]; + + #define G(r,i,a,b,c,d) \ + a += b + m[sigma[2*i+0]]; d ^= a; d = rotr32(d, 16); c += d; b ^= c; b = rotr32(b, 12); \ + a += b + m[sigma[2*i+1]]; d ^= a; d = rotr32(d, 8); c += d; b ^= c; b = rotr32(b, 7); \ + + #define R(r) \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]) \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]) \ + G(r,2,v[ 2],v[ 6],v[10],v[14]) \ + G(r,3,v[ 3],v[ 7],v[11],v[15]) \ + G(r,4,v[ 0],v[ 5],v[10],v[15]) \ + G(r,5,v[ 1],v[ 6],v[11],v[12]) \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]) \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]) \ + + { + unsigned r; + for (r = 0; r < BLAKE2S_NUM_ROUNDS; r++) + { + const Byte *sigma = k_Blake2s_Sigma[r]; + R(r) + } + /* R(0); R(1); R(2); R(3); R(4); R(5); R(6); R(7); R(8); R(9); */ + } + + #undef G + #undef R + + { + unsigned i; + for (i = 0; i < 8; i++) + p->h[i] ^= v[i] ^ v[i + 8]; + } +} + + +#define Blake2s_Increment_Counter(S, inc) \ + { p->t[0] += (inc); p->t[1] += (p->t[0] < (inc)); } + +#define Blake2s_Set_LastBlock(p) \ + { p->f[0] = BLAKE2S_FINAL_FLAG; p->f[1] = p->lastNode_f1; } + + +static void Blake2s_Update(CBlake2s *p, const Byte *data, size_t size) +{ + while (size != 0) + { + unsigned pos = (unsigned)p->bufPos; + unsigned rem = BLAKE2S_BLOCK_SIZE - pos; + + if (size <= rem) + { + memcpy(p->buf + pos, data, size); + p->bufPos += (UInt32)size; + return; + } + + memcpy(p->buf + pos, data, rem); + Blake2s_Increment_Counter(S, BLAKE2S_BLOCK_SIZE) + Blake2s_Compress(p); + p->bufPos = 0; + data += rem; + size -= rem; + } +} + + +static void Blake2s_Final(CBlake2s *p, Byte *digest) +{ + unsigned i; + + Blake2s_Increment_Counter(S, (UInt32)p->bufPos) + Blake2s_Set_LastBlock(p) + memset(p->buf + p->bufPos, 0, BLAKE2S_BLOCK_SIZE - p->bufPos); + Blake2s_Compress(p); + + for (i = 0; i < 8; i++) + { + SetUi32(digest + sizeof(p->h[i]) * i, p->h[i]) + } +} + + +/* ---------- BLAKE2s ---------- */ + +/* we need to xor CBlake2s::h[i] with input parameter block after Blake2s_Init0() */ +/* +typedef struct +{ + Byte digest_length; + Byte key_length; + Byte fanout; + Byte depth; + UInt32 leaf_length; + Byte node_offset[6]; + Byte node_depth; + Byte inner_length; + Byte salt[BLAKE2S_SALTBYTES]; + Byte personal[BLAKE2S_PERSONALBYTES]; +} CBlake2sParam; +*/ + + +static void Blake2sp_Init_Spec(CBlake2s *p, unsigned node_offset, unsigned node_depth) +{ + Blake2s_Init0(p); + + p->h[0] ^= (BLAKE2S_DIGEST_SIZE | ((UInt32)BLAKE2SP_PARALLEL_DEGREE << 16) | ((UInt32)2 << 24)); + p->h[2] ^= ((UInt32)node_offset); + p->h[3] ^= ((UInt32)node_depth << 16) | ((UInt32)BLAKE2S_DIGEST_SIZE << 24); + /* + P->digest_length = BLAKE2S_DIGEST_SIZE; + P->key_length = 0; + P->fanout = BLAKE2SP_PARALLEL_DEGREE; + P->depth = 2; + P->leaf_length = 0; + store48(P->node_offset, node_offset); + P->node_depth = node_depth; + P->inner_length = BLAKE2S_DIGEST_SIZE; + */ +} + + +void Blake2sp_Init(CBlake2sp *p) +{ + unsigned i; + + p->bufPos = 0; + + for (i = 0; i < BLAKE2SP_PARALLEL_DEGREE; i++) + Blake2sp_Init_Spec(&p->S[i], i, 0); + + p->S[BLAKE2SP_PARALLEL_DEGREE - 1].lastNode_f1 = BLAKE2S_FINAL_FLAG; +} + + +void Blake2sp_Update(CBlake2sp *p, const Byte *data, size_t size) +{ + unsigned pos = p->bufPos; + while (size != 0) + { + unsigned index = pos / BLAKE2S_BLOCK_SIZE; + unsigned rem = BLAKE2S_BLOCK_SIZE - (pos & (BLAKE2S_BLOCK_SIZE - 1)); + if (rem > size) + rem = (unsigned)size; + Blake2s_Update(&p->S[index], data, rem); + size -= rem; + data += rem; + pos += rem; + pos &= (BLAKE2S_BLOCK_SIZE * BLAKE2SP_PARALLEL_DEGREE - 1); + } + p->bufPos = pos; +} + + +void Blake2sp_Final(CBlake2sp *p, Byte *digest) +{ + CBlake2s R; + unsigned i; + + Blake2sp_Init_Spec(&R, 0, 1); + R.lastNode_f1 = BLAKE2S_FINAL_FLAG; + + for (i = 0; i < BLAKE2SP_PARALLEL_DEGREE; i++) + { + Byte hash[BLAKE2S_DIGEST_SIZE]; + Blake2s_Final(&p->S[i], hash); + Blake2s_Update(&R, hash, BLAKE2S_DIGEST_SIZE); + } + + Blake2s_Final(&R, digest); +} + +#undef rotr32 diff --git a/3rdparty/7z/src/Bra.c b/3rdparty/7z/src/Bra.c index cdefa4d2e9..411f25232b 100644 --- a/3rdparty/7z/src/Bra.c +++ b/3rdparty/7z/src/Bra.c @@ -1,230 +1,420 @@ -/* Bra.c -- Converters for RISC code -2021-02-09 : Igor Pavlov : Public domain */ +/* Bra.c -- Branch converters for RISC code +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include "CpuArch.h" #include "Bra.h" +#include "CpuArch.h" +#include "RotateDefs.h" -SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name + +#define Z7_BRANCH_FUNC_MAIN(name) \ +static \ +Z7_FORCE_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding) + +#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ + { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \ + +#ifdef Z7_EXTRACT_ONLY +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) +#else +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1) +#endif + +#if defined(__clang__) +#define BR_EXTERNAL_FOR +#define BR_NEXT_ITERATION continue; +#else +#define BR_EXTERNAL_FOR for (;;) +#define BR_NEXT_ITERATION break; +#endif + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + // GCC is not good for __builtin_expect() here + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // #define Z7_unlikely [[unlikely]] + // #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_likely [[likely]] +#else + // #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +Z7_BRANCH_FUNC_MAIN(ARM64) { - Byte *p; + // Byte *p = data; const Byte *lim; - size &= ~(size_t)3; - ip += 4; - p = data; - lim = data + size; - - if (encoding) - - for (;;) + const UInt32 flag = (UInt32)1 << (24 - 4); + const UInt32 mask = ((UInt32)1 << 24) - (flag << 1); + size &= ~(SizeT)3; + // if (size == 0) return p; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + + BR_EXTERNAL_FOR { + // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE for (;;) { - if (p >= lim) - return (SizeT)(p - data); + UInt32 v; + if Z7_UNLIKELY(p == lim) + return p; + v = GetUi32a(p); p += 4; - if (p[-1] == 0xEB) - break; - } - { - UInt32 v = GetUi32(p - 4); - v <<= 2; - v += ip + (UInt32)(p - data); - v >>= 2; - v &= 0x00FFFFFF; - v |= 0xEB000000; - SetUi32(p - 4, v); - } - } - - for (;;) - { - for (;;) - { - if (p >= lim) - return (SizeT)(p - data); - p += 4; - if (p[-1] == 0xEB) - break; - } - { - UInt32 v = GetUi32(p - 4); - v <<= 2; - v -= ip + (UInt32)(p - data); - v >>= 2; - v &= 0x00FFFFFF; - v |= 0xEB000000; - SetUi32(p - 4, v); + if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0) + { + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x03ffffff; + v |= 0x94000000; + SetUi32a(p - 4, v) + BR_NEXT_ITERATION + } + // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0) + v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0) + { + UInt32 z, c; + // v = rotrFixed(v, 8); + v += flag; if Z7_UNLIKELY(v & mask) continue; + z = (v & 0xffffffe0) | (v >> 26); + c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7; + BR_CONVERT_VAL(z, c) + v &= 0x1f; + v |= 0x90000000; + v |= z << 26; + v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag); + SetUi32a(p - 4, v) + } } } } +Z7_BRANCH_FUNCS_IMP(ARM64) -SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +Z7_BRANCH_FUNC_MAIN(ARM) { - Byte *p; + // Byte *p = data; const Byte *lim; - size &= ~(size_t)1; - p = data; - lim = data + size - 4; - - if (encoding) + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to next instruction */ + pc += 8 - 4; for (;;) { - UInt32 b1; for (;;) { - UInt32 b3; - if (p > lim) - return (SizeT)(p - data); - b1 = p[1]; - b3 = p[3]; - p += 2; - b1 ^= 8; - if ((b3 & b1) >= 0xF8) - break; + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; } { - UInt32 v = - ((UInt32)b1 << 19) - + (((UInt32)p[1] & 0x7) << 8) - + (((UInt32)p[-2] << 11)) - + (p[0]); - - p += 2; - { - UInt32 cur = (ip + (UInt32)(p - data)) >> 1; - v += cur; - } - - p[-4] = (Byte)(v >> 11); - p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); - p[-2] = (Byte)v; - p[-1] = (Byte)(0xF8 | (v >> 8)); - } - } - - for (;;) - { - UInt32 b1; - for (;;) - { - UInt32 b3; - if (p > lim) - return (SizeT)(p - data); - b1 = p[1]; - b3 = p[3]; - p += 2; - b1 ^= 8; - if ((b3 & b1) >= 0xF8) - break; - } - { - UInt32 v = - ((UInt32)b1 << 19) - + (((UInt32)p[1] & 0x7) << 8) - + (((UInt32)p[-2] << 11)) - + (p[0]); - - p += 2; - { - UInt32 cur = (ip + (UInt32)(p - data)) >> 1; - v -= cur; - } - - /* - SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000)); - SetUi16(p - 2, (UInt16)(v | 0xF800)); - */ - - p[-4] = (Byte)(v >> 11); - p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); - p[-2] = (Byte)v; - p[-1] = (Byte)(0xF8 | (v >> 8)); + UInt32 v = GetUi32a(p - 4); + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x00ffffff; + v |= 0xeb000000; + SetUi32a(p - 4, v) } } } +Z7_BRANCH_FUNCS_IMP(ARM) -SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +Z7_BRANCH_FUNC_MAIN(PPC) { - Byte *p; + // Byte *p = data; const Byte *lim; - size &= ~(size_t)3; - ip -= 4; - p = data; - lim = data + size; - + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + for (;;) { + UInt32 v; for (;;) { - if (p >= lim) - return (SizeT)(p - data); + if Z7_UNLIKELY(p == lim) + return p; + // v = GetBe32a(p); + v = *(UInt32 *)(void *)p; p += 4; - /* if ((v & 0xFC000003) == 0x48000001) */ - if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) - break; + // if ((v & 0xfc000003) == 0x48000001) break; + // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break; + if Z7_UNLIKELY( + ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001)) + & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break; } { - UInt32 v = GetBe32(p - 4); - if (encoding) - v += ip + (UInt32)(p - data); - else - v -= ip + (UInt32)(p - data); - v &= 0x03FFFFFF; + v = Z7_CONV_NATIVE_TO_BE_32(v); + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= 0x03ffffff; v |= 0x48000000; - SetBe32(p - 4, v); + SetBe32a(p - 4, v) } } } +Z7_BRANCH_FUNCS_IMP(PPC) -SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED +#define BR_SPARC_USE_ROTATE +#endif + +Z7_BRANCH_FUNC_MAIN(SPARC) { - Byte *p; + // Byte *p = data; const Byte *lim; - size &= ~(size_t)3; - ip -= 4; - p = data; - lim = data + size; - + const UInt32 flag = (UInt32)1 << 22; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction for (;;) { + UInt32 v; for (;;) { - if (p >= lim) - return (SizeT)(p - data); - /* - v = GetBe32(p); - p += 4; - m = v + ((UInt32)5 << 29); - m ^= (UInt32)7 << 29; - m += (UInt32)1 << 22; - if ((m & ((UInt32)0x1FF << 23)) == 0) - break; + if Z7_UNLIKELY(p == lim) + return p; + /* // the code without GetBe32a(): + { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; } */ + v = GetBe32a(p); p += 4; - if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) || - (p[-4] == 0x7F && (p[-3] >= 0xC0))) + #ifdef BR_SPARC_USE_ROTATE + v = rotlFixed(v, 2); + v += (flag << 2) - 1; + if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0) + #else + v += (UInt32)5 << 29; + v ^= (UInt32)7 << 29; + v += flag; + if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0) + #endif break; } { - UInt32 v = GetBe32(p - 4); + // UInt32 v = GetBe32a(p - 4); + #ifndef BR_SPARC_USE_ROTATE v <<= 2; - if (encoding) - v += ip + (UInt32)(p - data); - else - v -= ip + (UInt32)(p - data); - - v &= 0x01FFFFFF; - v -= (UInt32)1 << 24; - v ^= 0xFF000000; + #endif + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= (flag << 3) - 1; + #ifdef BR_SPARC_USE_ROTATE + v -= (flag << 2) - 1; + v = rotrFixed(v, 2); + #else + v -= (flag << 2); v >>= 2; - v |= 0x40000000; - SetBe32(p - 4, v); + v |= (UInt32)1 << 30; + #endif + SetBe32a(p - 4, v) } } } +Z7_BRANCH_FUNCS_IMP(SPARC) + + +Z7_BRANCH_FUNC_MAIN(ARMT) +{ + // Byte *p = data; + Byte *lim; + size &= ~(SizeT)1; + // if (size == 0) return p; + if (size <= 2) return p; + size -= 2; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to the +2 instructions from current instruction */ + // pc += 4 - 4; + // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1; + // #define ARMT_TAIL_PROC { goto armt_tail; } + #define ARMT_TAIL_PROC { return p; } + + do + { + /* in MSVC 32-bit x86 compilers: + UInt32 version : it loads value from memory with movzx + Byte version : it loads value to 8-bit register (AL/CL) + movzx version is slightly faster in some cpus + */ + unsigned b1; + // Byte / unsigned + b1 = p[1]; + // optimized version to reduce one (p >= lim) check: + // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8) + for (;;) + { + unsigned b3; // Byte / UInt32 + /* (Byte)(b3) normalization can use low byte computations in MSVC. + It gives smaller code, and no loss of speed in some compilers/cpus. + But new MSVC 32-bit x86 compilers use more slow load + from memory to low byte register in that case. + So we try to use full 32-bit computations for faster code. + */ + // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break; + } + { + /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation. + But gcc/clang for arm64 can use bfi instruction for full code here */ + UInt32 v = + ((UInt32)GetUi16a(p - 2) << 11) | + ((UInt32)GetUi16a(p) & 0x7FF); + /* + UInt32 v = + ((UInt32)p[1 - 2] << 19) + + (((UInt32)p[1] & 0x7) << 8) + + (((UInt32)p[-2] << 11)) + + (p[0]); + */ + p += 2; + { + UInt32 c = BR_PC_GET >> 1; + BR_CONVERT_VAL(v, c) + } + SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000)) + SetUi16a(p - 2, (UInt16)(v | 0xf800)) + /* + p[-4] = (Byte)(v >> 11); + p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7)); + p[-2] = (Byte)v; + p[-1] = (Byte)(0xf8 | (v >> 8)); + */ + } + } + while (p < lim); + return p; + // armt_tail: + // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim; + // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2)); + // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); + // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); +} +Z7_BRANCH_FUNCS_IMP(ARMT) + + +// #define BR_IA64_NO_INLINE + +Z7_BRANCH_FUNC_MAIN(IA64) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)15; + lim = p + size; + pc -= 1 << 4; + pc >>= 4 - 1; + // pc -= 1 << 1; + + for (;;) + { + unsigned m; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e)); + p += 16; + pc += 1 << 1; + if (m &= 3) + break; + } + { + p += (ptrdiff_t)m * 5 - 20; // negative value is expected here. + do + { + const UInt32 t = + #if defined(MY_CPU_X86_OR_AMD64) + // we use 32-bit load here to reduce code size on x86: + GetUi32(p); + #else + GetUi16(p); + #endif + UInt32 z = GetUi32(p + 1) >> m; + p += 5; + if (((t >> m) & (0x70 << 1)) == 0 + && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0) + { + UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z; + z ^= v; + #ifdef BR_IA64_NO_INLINE + v |= (v & ((UInt32)1 << (23 + 1))) >> 3; + { + UInt32 c = pc; + BR_CONVERT_VAL(v, c) + } + v &= (0x1fffff << 1) | 1; + #else + { + if (encoding) + { + // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits + pc &= (0x1fffff << 1) | 1; + v += pc; + } + else + { + // pc |= 0xc00000 << 1; // we need to set at least 2 bits + pc |= ~(UInt32)((0x1fffff << 1) | 1); + v -= pc; + } + } + v &= ~(UInt32)(0x600000 << 1); + #endif + v += (0x700000 << 1); + v &= (0x8fffff << 1) | 1; + z |= v; + z <<= m; + SetUi32(p + 1 - 5, z) + } + m++; + } + while (m &= 3); // while (m < 4); + } + } +} +Z7_BRANCH_FUNCS_IMP(IA64) diff --git a/3rdparty/7z/src/Bra.h b/3rdparty/7z/src/Bra.h index aba8dce14f..aeedaba305 100644 --- a/3rdparty/7z/src/Bra.h +++ b/3rdparty/7z/src/Bra.h @@ -1,64 +1,99 @@ /* Bra.h -- Branch converters for executables -2013-01-18 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __BRA_H -#define __BRA_H +#ifndef ZIP7_INC_BRA_H +#define ZIP7_INC_BRA_H #include "7zTypes.h" EXTERN_C_BEGIN -/* -These functions convert relative addresses to absolute addresses -in CALL instructions to increase the compression ratio. - - In: - data - data buffer - size - size of data - ip - current virtual Instruction Pinter (IP) value - state - state variable for x86 converter - encoding - 0 (for decoding), 1 (for encoding) - - Out: - state - state variable for x86 converter +#define Z7_BRANCH_CONV_DEC(name) z7_BranchConv_ ## name ## _Dec +#define Z7_BRANCH_CONV_ENC(name) z7_BranchConv_ ## name ## _Enc +#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec +#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc - Returns: - The number of processed bytes. If you call these functions with multiple calls, - you must start next call with first byte after block of processed bytes. +#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc) +#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state) + +typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv)); +typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt)); + +#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0 +Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_DEC(X86)); +Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_ENC(X86)); + +#define Z7_BRANCH_FUNCS_DECL(name) \ +Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_DEC(name)); \ +Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_ENC(name)); + +Z7_BRANCH_FUNCS_DECL(ARM64) +Z7_BRANCH_FUNCS_DECL(ARM) +Z7_BRANCH_FUNCS_DECL(ARMT) +Z7_BRANCH_FUNCS_DECL(PPC) +Z7_BRANCH_FUNCS_DECL(SPARC) +Z7_BRANCH_FUNCS_DECL(IA64) + +/* +These functions convert data that contain CPU instructions. +Each such function converts relative addresses to absolute addresses in some +branch instructions: CALL (in all converters) and JUMP (X86 converter only). +Such conversion allows to increase compression ratio, if we compress that data. + +There are 2 types of converters: + Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc); + Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state); +Each Converter supports 2 versions: one for encoding +and one for decoding (_Enc/_Dec postfixes in function name). + +In params: + data : data buffer + size : size of data + pc : current virtual Program Counter (Instruction Pinter) value +In/Out param: + state : pointer to state variable (for X86 converter only) + +Return: + The pointer to position in (data) buffer after last byte that was processed. + If the caller calls converter again, it must call it starting with that position. + But the caller is allowed to move data in buffer. so pointer to + current processed position also will be changed for next call. + Also the caller must increase internal (pc) value for next call. +Each converter has some characteristics: Endian, Alignment, LookAhead. Type Endian Alignment LookAhead - x86 little 1 4 + X86 little 1 4 ARMT little 2 2 ARM little 4 0 + ARM64 little 4 0 PPC big 4 0 SPARC big 4 0 IA64 little 16 0 - size must be >= Alignment + LookAhead, if it's not last block. - If (size < Alignment + LookAhead), converter returns 0. + (data) must be aligned for (Alignment). + processed size can be calculated as: + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0) + it means that converter needs more data for processing. + If (size < Alignment + LookAhead) + then (processed == 0) is allowed. - Example: - - UInt32 ip = 0; - for () - { - ; size must be >= Alignment + LookAhead, if it's not last block - SizeT processed = Convert(data, size, ip, 1); - data += processed; - size -= processed; - ip += processed; - } +Example code for conversion in loop: + UInt32 pc = 0; + size = 0; + for (;;) + { + size += Load_more_input_data(data + size); + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0 && no_more_input_data_after_size) + break; // we stop convert loop + data += processed; + size -= processed; + pc += processed; + } */ -#define x86_Convert_Init(state) { state = 0; } -SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding); -SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); - EXTERN_C_END #endif diff --git a/3rdparty/7z/src/Bra86.c b/3rdparty/7z/src/Bra86.c index d857dac677..9c468ba742 100644 --- a/3rdparty/7z/src/Bra86.c +++ b/3rdparty/7z/src/Bra86.c @@ -1,82 +1,187 @@ -/* Bra86.c -- Converter for x86 code (BCJ) -2021-02-09 : Igor Pavlov : Public domain */ +/* Bra86.c -- Branch converter for X86 code (BCJ) +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "Bra.h" +#include "CpuArch.h" -#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0) -SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding) +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t) +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name + +#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0) + +#ifdef MY_CPU_LE_UNALIGN + #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8; + #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0) +#else + #define BR86_PREPARE_BCJ_SCAN + // bad for MSVC X86 (partial write to byte reg): + #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8) + // bad for old MSVC (partial write to byte reg): + // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0) +#endif + +static +Z7_FORCE_INLINE +Z7_ATTRIB_NO_VECTOR +Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding) { - SizeT pos = 0; - UInt32 mask = *state & 7; if (size < 5) - return 0; - size -= 4; - ip += 5; + return p; + { + // Byte *p = data; + const Byte *lim = p + size - 4; + unsigned mask = (unsigned)*state; // & 7; +#ifdef BR_CONV_USE_OPT_PC_PTR + /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4), + because call/jump offset is relative to the next instruction. + if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4), + because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before. + */ + pc += 4; +#endif + BR_PC_INIT + goto start; - for (;;) + for (;; mask |= 4) { - Byte *p = data + pos; - const Byte *limit = data + size; - for (; p < limit; p++) - if ((*p & 0xFE) == 0xE8) - break; - + // cont: mask |= 4; + start: + if (p >= lim) + goto fin; { - SizeT d = (SizeT)(p - data) - pos; - pos = (SizeT)(p - data); - if (p >= limit) - { - *state = (d > 2 ? 0 : mask >> (unsigned)d); - return pos; - } - if (d > 2) - mask = 0; - else - { - mask >>= (unsigned)d; - if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1]))) - { - mask = (mask >> 1) | 4; - pos++; - continue; - } - } + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0; + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } } + goto main_loop; - if (Test86MSByte(p[4])) + m0: p--; + m1: p--; + m2: p--; + if (mask == 0) + goto a3; + if (p > lim) + goto fin_p; + + // if (((0x17u >> mask) & 1) == 0) + if (mask > 4 || mask == 3) { - UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); - UInt32 cur = ip + (UInt32)pos; - pos += 5; - if (encoding) - v += cur; - else - v -= cur; - if (mask != 0) + mask >>= 1; + continue; // goto cont; + } + mask >>= 1; + if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask])) + continue; // goto cont; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) { - unsigned sh = (mask & 6) << 2; - if (Test86MSByte((Byte)(v >> sh))) + mask <<= 3; + if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask)) { - v ^= (((UInt32)0x100 << sh) - 1); - if (encoding) - v += cur; - else - v -= cur; + v ^= (((UInt32)0x100 << mask) - 1); + #ifdef MY_CPU_X86 + // for X86 : we can recalculate (c) to reduce register pressure + c = BR_PC_GET; + #endif + BR_CONVERT_VAL(v, c) } mask = 0; } - p[1] = (Byte)v; - p[2] = (Byte)(v >> 8); - p[3] = (Byte)(v >> 16); - p[4] = (Byte)(0 - ((v >> 24) & 1)); + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; } - else + + main_loop: + if (p >= lim) + goto fin; + for (;;) { - mask = (mask >> 1) | 4; - pos++; + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto a0; } + if (BR86_IS_BCJ_BYTE(1)) { goto a1; } + if (BR86_IS_BCJ_BYTE(2)) { goto a2; } + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } + if (p >= lim) + goto fin; + } + + a0: p--; + a1: p--; + a2: p--; + a3: + if (p > lim) + goto fin_p; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; } } + +fin_p: + p--; +fin: + // the following processing for tail is optional and can be commented + /* + lim += 4; + for (; p < lim; p++, mask >>= 1) + if ((*p & 0xfe) == 0xe8) + break; + */ + *state = (UInt32)mask; + return p; + } } + + +#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \ + { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); } + +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0) +#ifndef Z7_EXTRACT_ONLY +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1) +#endif diff --git a/3rdparty/7z/src/BraIA64.c b/3rdparty/7z/src/BraIA64.c index 2656907a0b..1b61927ccd 100644 --- a/3rdparty/7z/src/BraIA64.c +++ b/3rdparty/7z/src/BraIA64.c @@ -1,53 +1,14 @@ /* BraIA64.c -- Converter for IA-64 code -2017-01-26 : Igor Pavlov : Public domain */ +2023-02-20 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include "CpuArch.h" -#include "Bra.h" +// the code was moved to Bra.c -SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) -{ - SizeT i; - if (size < 16) - return 0; - size -= 16; - i = 0; - do - { - unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3; - if (m) - { - m++; - do - { - Byte *p = data + (i + (size_t)m * 5 - 8); - if (((p[3] >> m) & 15) == 5 - && (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0) - { - unsigned raw = GetUi32(p); - unsigned v = raw >> m; - v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3); - - v <<= 4; - if (encoding) - v += ip + (UInt32)i; - else - v -= ip + (UInt32)i; - v >>= 4; - - v &= 0x1FFFFF; - v += 0x700000; - v &= 0x8FFFFF; - raw &= ~((UInt32)0x8FFFFF << m); - raw |= (v << m); - SetUi32(p, raw); - } - } - while (++m <= 4); - } - i += 16; - } - while (i <= size); - return i; -} +#ifdef _MSC_VER +#pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty +#endif + +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wempty-translation-unit" +#endif diff --git a/3rdparty/7z/src/BwtSort.c b/3rdparty/7z/src/BwtSort.c new file mode 100644 index 0000000000..05ad6de8b9 --- /dev/null +++ b/3rdparty/7z/src/BwtSort.c @@ -0,0 +1,516 @@ +/* BwtSort.c -- BWT block sorting +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "BwtSort.h" +#include "Sort.h" + +/* #define BLOCK_SORT_USE_HEAP_SORT */ + +/* Don't change it !!! */ +#define kNumHashBytes 2 +#define kNumHashValues (1 << (kNumHashBytes * 8)) + +/* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */ +#define kNumRefBitsMax 12 + +#define BS_TEMP_SIZE kNumHashValues + +#ifdef BLOCK_SORT_EXTERNAL_FLAGS + +/* 32 Flags in UInt32 word */ +#define kNumFlagsBits 5 +#define kNumFlagsInWord (1 << kNumFlagsBits) +#define kFlagsMask (kNumFlagsInWord - 1) +#define kAllFlags 0xFFFFFFFF + +#else + +#define kNumBitsMax 20 +#define kIndexMask ((1 << kNumBitsMax) - 1) +#define kNumExtraBits (32 - kNumBitsMax) +#define kNumExtra0Bits (kNumExtraBits - 2) +#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) + +#define SetFinishedGroupSize(p, size) \ + { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ + if ((size) > (1 << kNumExtra0Bits)) { \ + *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ + +static void SetGroupSize(UInt32 *p, UInt32 size) +{ + if (--size == 0) + return; + *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); + if (size >= (1 << kNumExtra0Bits)) + { + *p |= 0x40000000; + p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); + } +} + +#endif + +/* +SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks + "range" is not real range. It's only for optimization. +returns: 1 - if there are groups, 0 - no more groups +*/ + +static +UInt32 +Z7_FASTCALL +SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices + #ifndef BLOCK_SORT_USE_HEAP_SORT + , UInt32 left, UInt32 range + #endif + ) +{ + UInt32 *ind2 = Indices + groupOffset; + UInt32 *Groups; + if (groupSize <= 1) + { + /* + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + SetFinishedGroupSize(ind2, 1) + #endif + */ + return 0; + } + Groups = Indices + BlockSize + BS_TEMP_SIZE; + if (groupSize <= ((UInt32)1 << NumRefBits) + #ifndef BLOCK_SORT_USE_HEAP_SORT + && groupSize <= range + #endif + ) + { + UInt32 *temp = Indices + BlockSize; + UInt32 j; + UInt32 mask, thereAreGroups, group, cg; + { + UInt32 gPrev; + UInt32 gRes = 0; + { + UInt32 sp = ind2[0] + NumSortedBytes; + if (sp >= BlockSize) sp -= BlockSize; + gPrev = Groups[sp]; + temp[0] = (gPrev << NumRefBits); + } + + for (j = 1; j < groupSize; j++) + { + UInt32 sp = ind2[j] + NumSortedBytes; + UInt32 g; + if (sp >= BlockSize) sp -= BlockSize; + g = Groups[sp]; + temp[j] = (g << NumRefBits) | j; + gRes |= (gPrev ^ g); + } + if (gRes == 0) + { + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + SetGroupSize(ind2, groupSize); + #endif + return 1; + } + } + + HeapSort(temp, groupSize); + mask = (((UInt32)1 << NumRefBits) - 1); + thereAreGroups = 0; + + group = groupOffset; + cg = (temp[0] >> NumRefBits); + temp[0] = ind2[temp[0] & mask]; + + { + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + UInt32 *Flags = Groups + BlockSize; + #else + UInt32 prevGroupStart = 0; + #endif + + for (j = 1; j < groupSize; j++) + { + UInt32 val = temp[j]; + UInt32 cgCur = (val >> NumRefBits); + + if (cgCur != cg) + { + cg = cgCur; + group = groupOffset + j; + + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + { + UInt32 t = group - 1; + Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); + } + #else + SetGroupSize(temp + prevGroupStart, j - prevGroupStart); + prevGroupStart = j; + #endif + } + else + thereAreGroups = 1; + { + UInt32 ind = ind2[val & mask]; + temp[j] = ind; + Groups[ind] = group; + } + } + + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + SetGroupSize(temp + prevGroupStart, j - prevGroupStart); + #endif + } + + for (j = 0; j < groupSize; j++) + ind2[j] = temp[j]; + return thereAreGroups; + } + + /* Check that all strings are in one group (cannot sort) */ + { + UInt32 group, j; + UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; + group = Groups[sp]; + for (j = 1; j < groupSize; j++) + { + sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; + if (Groups[sp] != group) + break; + } + if (j == groupSize) + { + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + SetGroupSize(ind2, groupSize); + #endif + return 1; + } + } + + #ifndef BLOCK_SORT_USE_HEAP_SORT + { + /* ---------- Range Sort ---------- */ + UInt32 i; + UInt32 mid; + for (;;) + { + UInt32 j; + if (range <= 1) + { + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + SetGroupSize(ind2, groupSize); + #endif + return 1; + } + mid = left + ((range + 1) >> 1); + j = groupSize; + i = 0; + do + { + UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; + if (Groups[sp] >= mid) + { + for (j--; j > i; j--) + { + sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; + if (Groups[sp] < mid) + { + UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp; + break; + } + } + if (i >= j) + break; + } + } + while (++i < j); + if (i == 0) + { + range = range - (mid - left); + left = mid; + } + else if (i == groupSize) + range = (mid - left); + else + break; + } + + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + { + UInt32 t = (groupOffset + i - 1); + UInt32 *Flags = Groups + BlockSize; + Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); + } + #endif + + { + UInt32 j; + for (j = i; j < groupSize; j++) + Groups[ind2[j]] = groupOffset + i; + } + + { + UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); + return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); + } + + } + + #else + + /* ---------- Heap Sort ---------- */ + + { + UInt32 j; + for (j = 0; j < groupSize; j++) + { + UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; + ind2[j] = sp; + } + + HeapSortRef(ind2, Groups, groupSize); + + /* Write Flags */ + { + UInt32 sp = ind2[0]; + UInt32 group = Groups[sp]; + + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + UInt32 *Flags = Groups + BlockSize; + #else + UInt32 prevGroupStart = 0; + #endif + + for (j = 1; j < groupSize; j++) + { + sp = ind2[j]; + if (Groups[sp] != group) + { + group = Groups[sp]; + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + { + UInt32 t = groupOffset + j - 1; + Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); + } + #else + SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); + prevGroupStart = j; + #endif + } + } + + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); + #endif + } + { + /* Write new Groups values and Check that there are groups */ + UInt32 thereAreGroups = 0; + for (j = 0; j < groupSize; j++) + { + UInt32 group = groupOffset + j; + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); + if ((ind2[j] & 0x40000000) != 0) + subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits); + subGroupSize++; + for (;;) + { + UInt32 original = ind2[j]; + UInt32 sp = original & kIndexMask; + if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; + ind2[j] = sp | (original & ~kIndexMask); + Groups[sp] = group; + if (--subGroupSize == 0) + break; + j++; + thereAreGroups = 1; + } + #else + UInt32 *Flags = Groups + BlockSize; + for (;;) + { + UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; + ind2[j] = sp; + Groups[sp] = group; + if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) + break; + j++; + thereAreGroups = 1; + } + #endif + } + return thereAreGroups; + } + } + #endif +} + +/* conditions: blockSize > 0 */ +UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) +{ + UInt32 *counters = Indices + blockSize; + UInt32 i; + UInt32 *Groups; + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + UInt32 *Flags; + #endif + + /* Radix-Sort for 2 bytes */ + for (i = 0; i < kNumHashValues; i++) + counters[i] = 0; + for (i = 0; i < blockSize - 1; i++) + counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++; + counters[((UInt32)data[i] << 8) | data[0]]++; + + Groups = counters + BS_TEMP_SIZE; + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + Flags = Groups + blockSize; + { + UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; + for (i = 0; i < numWords; i++) + Flags[i] = kAllFlags; + } + #endif + + { + UInt32 sum = 0; + for (i = 0; i < kNumHashValues; i++) + { + UInt32 groupSize = counters[i]; + if (groupSize > 0) + { + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + UInt32 t = sum + groupSize - 1; + Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); + #endif + sum += groupSize; + } + counters[i] = sum - groupSize; + } + + for (i = 0; i < blockSize - 1; i++) + Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]; + Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; + + for (i = 0; i < blockSize - 1; i++) + Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i; + Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; + + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + { + UInt32 prev = 0; + for (i = 0; i < kNumHashValues; i++) + { + UInt32 prevGroupSize = counters[i] - prev; + if (prevGroupSize == 0) + continue; + SetGroupSize(Indices + prev, prevGroupSize); + prev = counters[i]; + } + } + #endif + } + + { + int NumRefBits; + UInt32 NumSortedBytes; + for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); + NumRefBits = 32 - NumRefBits; + if (NumRefBits > kNumRefBitsMax) + NumRefBits = kNumRefBitsMax; + + for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) + { + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + UInt32 finishedGroupSize = 0; + #endif + UInt32 newLimit = 0; + for (i = 0; i < blockSize;) + { + UInt32 groupSize; + #ifdef BLOCK_SORT_EXTERNAL_FLAGS + + if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) + { + i++; + continue; + } + for (groupSize = 1; + (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; + groupSize++); + + groupSize++; + + #else + + groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); + { + BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); + if ((Indices[i] & 0x40000000) != 0) + { + groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); + Indices[(size_t)i + 1] &= kIndexMask; + } + Indices[i] &= kIndexMask; + groupSize++; + if (finishedGroup || groupSize == 1) + { + Indices[i - finishedGroupSize] &= kIndexMask; + if (finishedGroupSize > 1) + Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask; + { + UInt32 newGroupSize = groupSize + finishedGroupSize; + SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize) + finishedGroupSize = newGroupSize; + } + i += groupSize; + continue; + } + finishedGroupSize = 0; + } + + #endif + + if (NumSortedBytes >= blockSize) + { + UInt32 j; + for (j = 0; j < groupSize; j++) + { + UInt32 t = (i + j); + /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ + Groups[Indices[t]] = t; + } + } + else + if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices + #ifndef BLOCK_SORT_USE_HEAP_SORT + , 0, blockSize + #endif + ) != 0) + newLimit = i + groupSize; + i += groupSize; + } + if (newLimit == 0) + break; + } + } + #ifndef BLOCK_SORT_EXTERNAL_FLAGS + for (i = 0; i < blockSize;) + { + UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); + if ((Indices[i] & 0x40000000) != 0) + { + groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); + Indices[(size_t)i + 1] &= kIndexMask; + } + Indices[i] &= kIndexMask; + groupSize++; + i += groupSize; + } + #endif + return Groups[0]; +} diff --git a/3rdparty/7z/src/BwtSort.h b/3rdparty/7z/src/BwtSort.h new file mode 100644 index 0000000000..a34b243a33 --- /dev/null +++ b/3rdparty/7z/src/BwtSort.h @@ -0,0 +1,26 @@ +/* BwtSort.h -- BWT block sorting +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_BWT_SORT_H +#define ZIP7_INC_BWT_SORT_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* use BLOCK_SORT_EXTERNAL_FLAGS if blockSize can be > 1M */ +/* #define BLOCK_SORT_EXTERNAL_FLAGS */ + +#ifdef BLOCK_SORT_EXTERNAL_FLAGS +#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) ((((blockSize) + 31) >> 5)) +#else +#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) 0 +#endif + +#define BLOCK_SORT_BUF_SIZE(blockSize) ((blockSize) * 2 + BLOCK_SORT_EXTERNAL_SIZE(blockSize) + (1 << 16)) + +UInt32 BlockSort(UInt32 *indices, const Byte *data, UInt32 blockSize); + +EXTERN_C_END + +#endif diff --git a/3rdparty/7z/src/Compiler.h b/3rdparty/7z/src/Compiler.h index eba374298d..ca4618b394 100644 --- a/3rdparty/7z/src/Compiler.h +++ b/3rdparty/7z/src/Compiler.h @@ -1,12 +1,37 @@ -/* Compiler.h -2021-01-05 : Igor Pavlov : Public domain */ +/* Compiler.h : Compiler specific defines and pragmas +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_COMPILER_H -#define __7Z_COMPILER_H +#ifndef ZIP7_INC_COMPILER_H +#define ZIP7_INC_COMPILER_H + +#if defined(__clang__) +# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif +#if defined(__clang__) && defined(__apple_build_version__) +# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__clang__) +# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__GNUC__) +# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#ifdef _MSC_VER +#if !defined(__clang__) && !defined(__GNUC__) +#define Z7_MSC_VER_ORIGINAL _MSC_VER +#endif +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +#define Z7_MINGW +#endif + +// #pragma GCC diagnostic ignored "-Wunknown-pragmas" + +#ifdef __clang__ +// padding size of '' with 4 bytes to alignment boundary +#pragma GCC diagnostic ignored "-Wpadded" +#endif - #ifdef __clang__ - #pragma clang diagnostic ignored "-Wunused-private-field" - #endif #ifdef _MSC_VER @@ -17,24 +42,115 @@ #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int #endif - #if _MSC_VER >= 1300 - #pragma warning(disable : 4996) // This function or variable may be unsafe - #else - #pragma warning(disable : 4511) // copy constructor could not be generated - #pragma warning(disable : 4512) // assignment operator could not be generated - #pragma warning(disable : 4514) // unreferenced inline function has been removed - #pragma warning(disable : 4702) // unreachable code - #pragma warning(disable : 4710) // not inlined - #pragma warning(disable : 4714) // function marked as __forceinline not inlined - #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information - #endif +#if defined(_MSC_VER) && _MSC_VER >= 1800 +#pragma warning(disable : 4464) // relative include path contains '..' +#endif - #ifdef __clang__ - #pragma clang diagnostic ignored "-Wdeprecated-declarations" - #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" - // #pragma clang diagnostic ignored "-Wreserved-id-macro" - #endif +// == 1200 : -O1 : for __forceinline +// >= 1900 : -O1 : for printf +#pragma warning(disable : 4710) // function not inlined +#if _MSC_VER < 1900 +// winnt.h: 'Int64ShllMod32' +#pragma warning(disable : 4514) // unreferenced inline function has been removed +#endif + +#if _MSC_VER < 1300 +// #pragma warning(disable : 4702) // unreachable code +// Bra.c : -O1: +#pragma warning(disable : 4714) // function marked as __forceinline not inlined +#endif + +/* +#if _MSC_VER > 1400 && _MSC_VER <= 1900 +// strcat: This function or variable may be unsafe +// sysinfoapi.h: kit10: GetVersion was declared deprecated +#pragma warning(disable : 4996) +#endif +*/ + +#if _MSC_VER > 1200 +// -Wall warnings + +#pragma warning(disable : 4711) // function selected for automatic inline expansion +#pragma warning(disable : 4820) // '2' bytes padding added after data member + +#if _MSC_VER >= 1400 && _MSC_VER < 1920 +// 1400: string.h: _DBG_MEMCPY_INLINE_ +// 1600 - 191x : smmintrin.h __cplusplus' +// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' +#pragma warning(disable : 4668) + +// 1400 - 1600 : WinDef.h : 'FARPROC' : +// 1900 - 191x : immintrin.h: _readfsbase_u32 +// no function prototype given : converting '()' to '(void)' +#pragma warning(disable : 4255) +#endif + +#if _MSC_VER >= 1914 +// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#pragma warning(disable : 5045) +#endif + +#endif // _MSC_VER > 1200 +#endif // _MSC_VER + + +#if defined(__clang__) && (__clang_major__ >= 4) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("clang loop unroll(disable)") \ + _Pragma("clang loop vectorize(disable)") + #define Z7_ATTRIB_NO_VECTORIZE +#elif defined(__GNUC__) && (__GNUC__ >= 5) + #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) + // __attribute__((optimize("no-unroll-loops"))); + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE +#elif defined(_MSC_VER) && (_MSC_VER >= 1920) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("loop( no_vector )") + #define Z7_ATTRIB_NO_VECTORIZE +#else + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + #define Z7_ATTRIB_NO_VECTORIZE +#endif + +#if defined(MY_CPU_X86_OR_AMD64) && ( \ + defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5)) + #define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse"))) +#else + #define Z7_ATTRIB_NO_SSE +#endif + +#define Z7_ATTRIB_NO_VECTOR \ + Z7_ATTRIB_NO_VECTORIZE \ + Z7_ATTRIB_NO_SSE + + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // GCC is not good for __builtin_expect() + #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_unlikely [[unlikely]] + // #define Z7_likely [[likely]] +#else + #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000)) +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") +#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif #define UNUSED_VAR(x) (void)x; diff --git a/3rdparty/7z/src/CpuArch.c b/3rdparty/7z/src/CpuArch.c index a0e93e8b08..5092d380d9 100644 --- a/3rdparty/7z/src/CpuArch.c +++ b/3rdparty/7z/src/CpuArch.c @@ -1,187 +1,318 @@ /* CpuArch.c -- CPU specific code -2021-07-13 : Igor Pavlov : Public domain */ +2023-05-18 : Igor Pavlov : Public domain */ #include "Precomp.h" +// #include + #include "CpuArch.h" #ifdef MY_CPU_X86_OR_AMD64 -#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) -#define USE_ASM +#undef NEED_CHECK_FOR_CPUID +#if !defined(MY_CPU_AMD64) +#define NEED_CHECK_FOR_CPUID #endif -#if !defined(USE_ASM) && _MSC_VER >= 1500 -#include +/* + cpuid instruction supports (subFunction) parameter in ECX, + that is used only with some specific (function) parameter values. + But we always use only (subFunction==0). +*/ +/* + __cpuid(): MSVC and GCC/CLANG use same function/macro name + but parameters are different. + We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function. +*/ + +#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \ + || defined(__clang__) /* && (__clang_major__ >= 10) */ + +/* there was some CLANG/GCC compilers that have issues with + rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined). + compiler's contains the macro __cpuid() that is similar to our code. + The history of __cpuid() changes in CLANG/GCC: + GCC: + 2007: it preserved ebx for (__PIC__ && __i386__) + 2013: it preserved rbx and ebx for __PIC__ + 2014: it doesn't preserves rbx and ebx anymore + we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem. + CLANG: + 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check. + Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)? + Do we need __PIC__ test for CLANG or we must care about rbx even if + __PIC__ is not defined? +*/ + +#define ASM_LN "\n" + +#if defined(MY_CPU_AMD64) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%rbx, %q1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%rbx, %q1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + + /* "=&r" selects free register. It can select even rbx, if that register is free. + "=&D" for (RDI) also works, but the code can be larger with "=&D" + "2"(0) means (subFunction = 0), + 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + +#elif defined(MY_CPU_X86) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%ebx, %k1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%ebx, %k1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + +#else + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "cpuid" \ + : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + #endif -#if defined(USE_ASM) && !defined(MY_CPU_AMD64) -static UInt32 CheckFlag(UInt32 flag) + +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) { - #ifdef _MSC_VER - __asm pushfd; - __asm pop EAX; - __asm mov EDX, EAX; - __asm xor EAX, flag; - __asm push EAX; - __asm popfd; - __asm pushfd; - __asm pop EAX; - __asm xor EAX, EDX; - __asm push EDX; - __asm popfd; - __asm and flag, EAX; - #else - __asm__ __volatile__ ( - "pushf\n\t" - "pop %%EAX\n\t" - "movl %%EAX,%%EDX\n\t" - "xorl %0,%%EAX\n\t" - "push %%EAX\n\t" - "popf\n\t" - "pushf\n\t" - "pop %%EAX\n\t" - "xorl %%EDX,%%EAX\n\t" - "push %%EDX\n\t" - "popf\n\t" - "andl %%EAX, %0\n\t": - "=c" (flag) : "c" (flag) : - "%eax", "%edx"); - #endif - return flag; + x86_cpuid_MACRO(p, func) } -#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False; + + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + UInt32 a; + __asm__ __volatile__ ( + ASM_LN "pushf" + ASM_LN "pushf" + ASM_LN "pop %0" + // ASM_LN "movl %0, %1" + // ASM_LN "xorl $0x200000, %0" + ASM_LN "btc %1, %0" + ASM_LN "push %0" + ASM_LN "popf" + ASM_LN "pushf" + ASM_LN "pop %0" + ASM_LN "xorl (%%esp), %0" + + ASM_LN "popf" + ASM_LN + : "=&r" (a) // "=a" + : "i" (EFALGS_CPUID_BIT) + ); + if ((a & (1 << EFALGS_CPUID_BIT)) == 0) + return 0; + #endif + { + UInt32 p[4]; + x86_cpuid_MACRO(p, 0) + return p[0]; + } +} + +#undef ASM_LN + +#elif !defined(_MSC_VER) + +/* +// for gcc/clang and other: we can try to use __cpuid macro: +#include +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + __cpuid(func, p[0], p[1], p[2], p[3]); +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return (UInt32)__get_cpuid_max(0, NULL); +} +*/ +// for unsupported cpuid: +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(func) + p[0] = p[1] = p[2] = p[3] = 0; +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return 0; +} + +#else // _MSC_VER + +#if !defined(MY_CPU_AMD64) + +UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + __asm pushfd + __asm pushfd + /* + __asm pop eax + // __asm mov edx, eax + __asm btc eax, EFALGS_CPUID_BIT + __asm push eax + */ + __asm btc dword ptr [esp], EFALGS_CPUID_BIT + __asm popfd + __asm pushfd + __asm pop eax + // __asm xor eax, edx + __asm xor eax, [esp] + // __asm push edx + __asm popfd + __asm and eax, (1 shl EFALGS_CPUID_BIT) + __asm jz end_func + #endif + __asm push ebx + __asm xor eax, eax // func + __asm xor ecx, ecx // subFunction (optional) for (func == 0) + __asm cpuid + __asm pop ebx + #if defined(NEED_CHECK_FOR_CPUID) + end_func: + #endif + __asm ret 0 +} + +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm xor ecx, ecx // subfunction (optional) for (func == 0) + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 0 +} + +#else // MY_CPU_AMD64 + + #if _MSC_VER >= 1600 + #include + #define MY_cpuidex __cpuidex + #else +/* + __cpuid (func == (0 or 7)) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) x64 doesn't clear ECX + We still can use __cpuid for low (func) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FASTCALL / NO_INLINE func, + So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + +DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! +*/ +static +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo) +{ + UNUSED_VAR(subFunction) + __cpuid(CPUInfo, func); +} + #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) + #pragma message("======== MY_cpuidex_HACK WAS USED ========") + #endif // _MSC_VER >= 1600 + +#if !defined(MY_CPU_AMD64) +/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code, + so we disable inlining here */ +Z7_NO_INLINE +#endif +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + MY_cpuidex((int *)p, (int)func, 0); +} + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + int a[4]; + MY_cpuidex(a, 0, 0); + return a[0]; +} + +#endif // MY_CPU_AMD64 +#endif // _MSC_VER + +#if defined(NEED_CHECK_FOR_CPUID) +#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; } #else #define CHECK_CPUID_IS_SUPPORTED #endif +#undef NEED_CHECK_FOR_CPUID -#ifndef USE_ASM - #ifdef _MSC_VER - #if _MSC_VER >= 1600 - #define MY__cpuidex __cpuidex - #else - -/* - __cpuid (function == 4) requires subfunction number in ECX. - MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. - __cpuid() in new MSVC clears ECX. - __cpuid() in old MSVC (14.00) doesn't clear ECX - We still can use __cpuid for low (function) values that don't require ECX, - but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). - So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, - where ECX value is first parameter for FAST_CALL / NO_INLINE function, - So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and - old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. - - DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! -*/ static -MY_NO_INLINE -void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) -{ - UNUSED_VAR(subFunction); - __cpuid(CPUInfo, function); -} - - #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) - #pragma message("======== MY__cpuidex_HACK WAS USED ========") - #endif - #else - #define MY__cpuidex(info, func, func2) __cpuid(info, func) - #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") - #endif -#endif - - - - -void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) -{ - #ifdef USE_ASM - - #ifdef _MSC_VER - - UInt32 a2, b2, c2, d2; - __asm xor EBX, EBX; - __asm xor ECX, ECX; - __asm xor EDX, EDX; - __asm mov EAX, function; - __asm cpuid; - __asm mov a2, EAX; - __asm mov b2, EBX; - __asm mov c2, ECX; - __asm mov d2, EDX; - - *a = a2; - *b = b2; - *c = c2; - *d = d2; - - #else - - __asm__ __volatile__ ( - #if defined(MY_CPU_AMD64) && defined(__PIC__) - "mov %%rbx, %%rdi;" - "cpuid;" - "xchg %%rbx, %%rdi;" - : "=a" (*a) , - "=D" (*b) , - #elif defined(MY_CPU_X86) && defined(__PIC__) - "mov %%ebx, %%edi;" - "cpuid;" - "xchgl %%ebx, %%edi;" - : "=a" (*a) , - "=D" (*b) , - #else - "cpuid" - : "=a" (*a) , - "=b" (*b) , - #endif - "=c" (*c) , - "=d" (*d) - : "0" (function), "c"(0) ) ; - - #endif - - #else - - int CPUInfo[4]; - - MY__cpuidex(CPUInfo, (int)function, 0); - - *a = (UInt32)CPUInfo[0]; - *b = (UInt32)CPUInfo[1]; - *c = (UInt32)CPUInfo[2]; - *d = (UInt32)CPUInfo[3]; - - #endif -} - -BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) +BoolInt x86cpuid_Func_1(UInt32 *p) { CHECK_CPUID_IS_SUPPORTED - MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); - MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); + z7_x86_cpuid(p, 1); return True; } -static const UInt32 kVendors[][3] = +/* +static const UInt32 kVendors[][1] = { - { 0x756E6547, 0x49656E69, 0x6C65746E}, - { 0x68747541, 0x69746E65, 0x444D4163}, - { 0x746E6543, 0x48727561, 0x736C7561} + { 0x756E6547 }, // , 0x49656E69, 0x6C65746E }, + { 0x68747541 }, // , 0x69746E65, 0x444D4163 }, + { 0x746E6543 } // , 0x48727561, 0x736C7561 } }; +*/ + +/* +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; +int x86cpuid_GetFirm(const Cx86cpuid *p); +#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf)) +#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf)) +#define x86cpuid_ver_GetStepping(ver) (ver & 0xf) int x86cpuid_GetFirm(const Cx86cpuid *p) { unsigned i; - for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++) { const UInt32 *v = kVendors[i]; - if (v[0] == p->vendor[0] && - v[1] == p->vendor[1] && - v[2] == p->vendor[2]) + if (v[0] == p->vendor[0] + // && v[1] == p->vendor[1] + // && v[2] == p->vendor[2] + ) return (int)i; } return -1; @@ -190,41 +321,55 @@ int x86cpuid_GetFirm(const Cx86cpuid *p) BoolInt CPU_Is_InOrder() { Cx86cpuid p; - int firm; UInt32 family, model; if (!x86cpuid_CheckAndRead(&p)) return True; - family = x86cpuid_GetFamily(p.ver); - model = x86cpuid_GetModel(p.ver); - - firm = x86cpuid_GetFirm(&p); + family = x86cpuid_ver_GetFamily(p.ver); + model = x86cpuid_ver_GetModel(p.ver); - switch (firm) + switch (x86cpuid_GetFirm(&p)) { case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( - /* In-Order Atom CPU */ - model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ - || model == 0x26 /* 45 nm, Z6xx */ - || model == 0x27 /* 32 nm, Z2460 */ - || model == 0x35 /* 32 nm, Z2760 */ - || model == 0x36 /* 32 nm, N2xxx, D2xxx */ + // In-Order Atom CPU + model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 + || model == 0x26 // 45 nm, Z6xx + || model == 0x27 // 32 nm, Z2460 + || model == 0x35 // 32 nm, Z2760 + || model == 0x36 // 32 nm, N2xxx, D2xxx ))); case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); } - return True; + return False; // v23 : unknown processors are not In-Order } +*/ + +#ifdef _WIN32 +#include "7zWindows.h" +#endif #if !defined(MY_CPU_AMD64) && defined(_WIN32) -#include -static BoolInt CPU_Sys_Is_SSE_Supported() + +/* for legacy SSE ia32: there is no user-space cpu instruction to check + that OS supports SSE register storing/restoring on context switches. + So we need some OS-specific function to check that it's safe to use SSE registers. +*/ + +Z7_FORCE_INLINE +static BoolInt CPU_Sys_Is_SSE_Supported(void) { - OSVERSIONINFO vi; - vi.dwOSVersionInfoSize = sizeof(vi); - if (!GetVersionEx(&vi)) - return False; - return (vi.dwMajorVersion >= 5); +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4996) // `GetVersion': was declared deprecated +#endif + /* low byte is major version of Windows + We suppose that any Windows version since + Windows2000 (major == 5) supports SSE registers */ + return (Byte)GetVersion() >= 5; +#if defined(_MSC_VER) + #pragma warning(pop) +#endif } #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; #else @@ -232,94 +377,300 @@ static BoolInt CPU_Sys_Is_SSE_Supported() #endif -static UInt32 X86_CPUID_ECX_Get_Flags() +#if !defined(MY_CPU_AMD64) + +BoolInt CPU_IsSupported_CMOV(void) { - Cx86cpuid p; - CHECK_SYS_SSE_SUPPORT - if (!x86cpuid_CheckAndRead(&p)) + UInt32 a[4]; + if (!x86cpuid_Func_1(&a[0])) return 0; - return p.c; + return (a[3] >> 15) & 1; } -BoolInt CPU_IsSupported_AES() +BoolInt CPU_IsSupported_SSE(void) { - return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; -} - -BoolInt CPU_IsSupported_SSSE3() -{ - return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; -} - -BoolInt CPU_IsSupported_SSE41() -{ - return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; -} - -BoolInt CPU_IsSupported_SHA() -{ - Cx86cpuid p; + UInt32 a[4]; CHECK_SYS_SSE_SUPPORT - if (!x86cpuid_CheckAndRead(&p)) - return False; + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (a[3] >> 25) & 1; +} - if (p.maxFunc < 7) +BoolInt CPU_IsSupported_SSE2(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (a[3] >> 26) & 1; +} + +#endif + + +static UInt32 x86cpuid_Func_1_ECX(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return a[2]; +} + +BoolInt CPU_IsSupported_AES(void) +{ + return (x86cpuid_Func_1_ECX() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3(void) +{ + return (x86cpuid_Func_1_ECX() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41(void) +{ + return (x86cpuid_Func_1_ECX() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA(void) +{ + CHECK_SYS_SSE_SUPPORT + + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + UInt32 d[4]; + z7_x86_cpuid(d, 7); return (d[1] >> 29) & 1; } } -// #include +/* +MSVC: _xgetbv() intrinsic is available since VS2010SP1. + MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in + that we can use or check. + For any 32-bit x86 we can use asm code in MSVC, + but MSVC asm code is huge after compilation. + So _xgetbv() is better -#ifdef _WIN32 -#include +ICC: _xgetbv() intrinsic is available (in what version of ICC?) + ICC defines (__GNUC___) and it supports gnu assembler + also ICC supports MASM style code with -use-msasm switch. + but ICC doesn't support __attribute__((__target__)) + +GCC/CLANG 9: + _xgetbv() is macro that works via __builtin_ia32_xgetbv() + and we need __attribute__((__target__("xsave")). + But with __target__("xsave") the function will be not + inlined to function that has no __target__("xsave") attribute. + If we want _xgetbv() call inlining, then we should use asm version + instead of calling _xgetbv(). + Note:intrinsic is broke before GCC 8.2: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684 +*/ + +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \ + || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \ + || defined(__GNUC__) && (__GNUC__ >= 9) \ + || defined(__clang__) && (__clang_major__ >= 9) +// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler +#if defined(__INTEL_COMPILER) +#define ATTRIB_XGETBV +#elif defined(__GNUC__) || defined(__clang__) +// we don't define ATTRIB_XGETBV here, because asm version is better for inlining. +// #define ATTRIB_XGETBV __attribute__((__target__("xsave"))) +#else +#define ATTRIB_XGETBV +#endif #endif -BoolInt CPU_IsSupported_AVX2() -{ - Cx86cpuid p; - CHECK_SYS_SSE_SUPPORT +#if defined(ATTRIB_XGETBV) +#include +#endif + +// XFEATURE_ENABLED_MASK/XCR0 +#define MY_XCR_XFEATURE_ENABLED_MASK 0 + +#if defined(ATTRIB_XGETBV) +ATTRIB_XGETBV +#endif +static UInt64 x86_xgetbv_0(UInt32 num) +{ +#if defined(ATTRIB_XGETBV) + { + return + #if (defined(_MSC_VER)) + _xgetbv(num); + #else + __builtin_ia32_xgetbv( + #if !defined(__clang__) + (int) + #endif + num); + #endif + } + +#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC) + + UInt32 a, d; + #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) + __asm__ + ( + "xgetbv" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #else // is old gcc + __asm__ + ( + ".byte 0x0f, 0x01, 0xd0" "\n\t" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #endif + return ((UInt64)d << 32) | a; + // return a; + +#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64) + + UInt32 a, d; + __asm { + push eax + push edx + push ecx + mov ecx, num; + // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK + _emit 0x0f + _emit 0x01 + _emit 0xd0 + mov a, eax + mov d, edx + pop ecx + pop edx + pop eax + } + return ((UInt64)d << 32) | a; + // return a; + +#else // it's unknown compiler + // #error "Need xgetbv function" + UNUSED_VAR(num) + // for MSVC-X64 we could call external function from external file. + /* Actually we had checked OSXSAVE/AVX in cpuid before. + So it's expected that OS supports at least AVX and below. */ + // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0 + return + // (1 << 0) | // x87 + (1 << 1) // SSE + | (1 << 2); // AVX + +#endif +} + +#ifdef _WIN32 +/* + Windows versions do not know about new ISA extensions that + can be introduced. But we still can use new extensions, + even if Windows doesn't report about supporting them, + But we can use new extensions, only if Windows knows about new ISA extension + that changes the number or size of registers: SSE, AVX/XSAVE, AVX512 + So it's enough to check + MY_PF_AVX_INSTRUCTIONS_AVAILABLE + instead of + MY_PF_AVX2_INSTRUCTIONS_AVAILABLE +*/ +#define MY_PF_XSAVE_ENABLED 17 +// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36 +// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37 +// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38 +// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39 +// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40 +// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41 +#endif + +BoolInt CPU_IsSupported_AVX(void) +{ #ifdef _WIN32 - #define MY__PF_XSAVE_ENABLED 17 - if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED)) return False; + /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from + some latest Win10 revisions. But we need AVX in older Windows also. + So we don't use the following check: */ + /* + if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE)) + return False; + */ #endif - if (!x86cpuid_CheckAndRead(&p)) + /* + OS must use new special XSAVE/XRSTOR instructions to save + AVX registers when it required for context switching. + At OS statring: + OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions. + Also OS sets bitmask in XCR0 register that defines what + registers will be processed by XSAVE instruction: + XCR0.SSE[bit 0] - x87 registers and state + XCR0.SSE[bit 1] - SSE registers and state + XCR0.AVX[bit 2] - AVX registers and state + CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27]. + So we can read that bit in user-space. + XCR0 is available for reading in user-space by new XGETBV instruction. + */ + { + const UInt32 c = x86cpuid_Func_1_ECX(); + if (0 == (1 + & (c >> 28) // AVX instructions are supported by hardware + & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS. + return False; + } + + /* also we can check + CPUID.1:ECX.XSAVE [bit 26] : that shows that + XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware. + But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */ + + /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1), + in most cases we expect that OS also will support storing/restoring + for AVX and SSE states at least. + But to be ensure for that we call user-space instruction + XGETBV(0) to get XCR0 value that contains bitmask that defines + what exact states(registers) OS have enabled for storing/restoring. + */ + + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=%d\n", bm); + return 1 + & (bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring + } + // since Win7SP1: we can use GetEnabledXStateFeatures(); +} + + +BoolInt CPU_IsSupported_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) return False; - if (p.maxFunc < 7) + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + UInt32 d[4]; + z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 & (d[1] >> 5); // avx2 } } -BoolInt CPU_IsSupported_VAES_AVX2() +BoolInt CPU_IsSupported_VAES_AVX2(void) { - Cx86cpuid p; - CHECK_SYS_SSE_SUPPORT - - #ifdef _WIN32 - #define MY__PF_XSAVE_ENABLED 17 - if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + if (!CPU_IsSupported_AVX()) return False; - #endif - - if (!x86cpuid_CheckAndRead(&p)) - return False; - if (p.maxFunc < 7) + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + UInt32 d[4]; + z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 & (d[1] >> 5) // avx2 @@ -328,20 +679,15 @@ BoolInt CPU_IsSupported_VAES_AVX2() } } -BoolInt CPU_IsSupported_PageGB() +BoolInt CPU_IsSupported_PageGB(void) { - Cx86cpuid cpuid; - if (!x86cpuid_CheckAndRead(&cpuid)) - return False; + CHECK_CPUID_IS_SUPPORTED { - UInt32 d[4] = { 0 }; - MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); + UInt32 d[4]; + z7_x86_cpuid(d, 0x80000000); if (d[0] < 0x80000001) return False; - } - { - UInt32 d[4] = { 0 }; - MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]); + z7_x86_cpuid(d, 0x80000001); return (d[3] >> 26) & 1; } } @@ -351,11 +697,11 @@ BoolInt CPU_IsSupported_PageGB() #ifdef _WIN32 -#include +#include "7zWindows.h" -BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } -BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } -BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } #else @@ -378,28 +724,27 @@ static void Print_sysctlbyname(const char *name) } } */ +/* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); +*/ -static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) +static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name) { UInt32 val = 0; - if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) return 1; return 0; } - /* - Print_sysctlbyname("hw.pagesize"); - Print_sysctlbyname("machdep.cpu.brand_string"); - */ - BoolInt CPU_IsSupported_CRC32(void) { - return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); } BoolInt CPU_IsSupported_NEON(void) { - return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); + return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); } #ifdef MY_CPU_ARM64 @@ -461,15 +806,15 @@ MY_HWCAP_CHECK_FUNC (AES) #include -int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) { return sysctlbyname(name, buf, bufSize, NULL, 0); } -int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) { size_t bufSize = sizeof(*val); - int res = My_sysctlbyname_Get(name, val, &bufSize); + const int res = z7_sysctlbyname_Get(name, val, &bufSize); if (res == 0 && bufSize != sizeof(*val)) return EFAULT; return res; diff --git a/3rdparty/7z/src/CpuArch.h b/3rdparty/7z/src/CpuArch.h index ba8782714d..6c1cd2ab22 100644 --- a/3rdparty/7z/src/CpuArch.h +++ b/3rdparty/7z/src/CpuArch.h @@ -1,8 +1,8 @@ /* CpuArch.h -- CPU specific code -2022-07-15 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __CPU_ARCH_H -#define __CPU_ARCH_H +#ifndef ZIP7_INC_CPU_ARCH_H +#define ZIP7_INC_CPU_ARCH_H #include "7zTypes.h" @@ -51,7 +51,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. || defined(__AARCH64EB__) \ || defined(__aarch64__) #define MY_CPU_ARM64 - #define MY_CPU_NAME "arm64" + #ifdef __ILP32__ + #define MY_CPU_NAME "arm64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "arm64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif #define MY_CPU_64BIT #endif @@ -68,8 +74,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_ARM #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_ARMT #define MY_CPU_NAME "armt" #else + #define MY_CPU_ARM32 #define MY_CPU_NAME "arm" #endif /* #define MY_CPU_32BIT */ @@ -103,6 +111,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. || defined(__PPC__) \ || defined(_POWER) +#define MY_CPU_PPC_OR_PPC64 + #if defined(__ppc64__) \ || defined(__powerpc64__) \ || defined(_LP64) \ @@ -197,6 +207,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #error Stop_Compiling_Bad_Endian #endif +#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE) + #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time +#endif #if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) #error Stop_Compiling_Bad_32_64_BIT @@ -253,6 +266,67 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. +#ifdef __has_builtin + #define Z7_has_builtin(x) __has_builtin(x) +#else + #define Z7_has_builtin(x) 0 +#endif + + +#define Z7_BSWAP32_CONST(v) \ + ( (((UInt32)(v) << 24) ) \ + | (((UInt32)(v) << 8) & (UInt32)0xff0000) \ + | (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \ + | (((UInt32)(v) >> 24) )) + + +#if defined(_MSC_VER) && (_MSC_VER >= 1300) + +#include + +/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */ + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +#define Z7_BSWAP16(v) _byteswap_ushort(v) +#define Z7_BSWAP32(v) _byteswap_ulong (v) +#define Z7_BSWAP64(v) _byteswap_uint64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) + +#define Z7_BSWAP16(v) __builtin_bswap16(v) +#define Z7_BSWAP32(v) __builtin_bswap32(v) +#define Z7_BSWAP64(v) __builtin_bswap64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#else + +#define Z7_BSWAP16(v) ((UInt16) \ + ( ((UInt32)(v) << 8) \ + | ((UInt32)(v) >> 8) \ + )) + +#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v) + +#define Z7_BSWAP64(v) \ + ( ( ( (UInt64)(v) ) << 8 * 7 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \ + | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \ + | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \ + | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \ + | ( ( (UInt64)(v) >> 8 * 7 ) ) \ + ) + +#endif + + + #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ || defined(MY_CPU_ARM64) @@ -272,13 +346,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define GetUi32(p) (*(const UInt32 *)(const void *)(p)) #ifdef MY_CPU_LE_UNALIGN_64 #define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } #endif #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } -#ifdef MY_CPU_LE_UNALIGN_64 -#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } -#endif #else @@ -305,51 +377,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif -#ifndef MY_CPU_LE_UNALIGN_64 - +#ifndef GetUi64 #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) +#endif +#ifndef SetUi64 #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ - SetUi32(_ppp2_ , (UInt32)_vvv2_); \ - SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } - + SetUi32(_ppp2_ , (UInt32)_vvv2_) \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) } #endif +#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) +#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) +#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); } -#ifdef __has_builtin - #define MY__has_builtin(x) __has_builtin(x) -#else - #define MY__has_builtin(x) 0 +#if defined(MY_CPU_LE_UNALIGN_64) +#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) #endif -#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) - -/* Note: we use bswap instruction, that is unsupported in 386 cpu */ - -#include - -#pragma intrinsic(_byteswap_ushort) -#pragma intrinsic(_byteswap_ulong) -#pragma intrinsic(_byteswap_uint64) - -/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) - -#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) - -#elif defined(MY_CPU_LE_UNALIGN) && ( \ - (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ - || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) - -/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ -#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) -#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) - -#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) - #else #define GetBe32(p) ( \ @@ -358,8 +405,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. ((UInt32)((const Byte *)(p))[2] << 8) | \ ((const Byte *)(p))[3] ) -#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) - #define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ _ppp_[0] = (Byte)(_vvv_ >> 24); \ _ppp_[1] = (Byte)(_vvv_ >> 16); \ @@ -368,50 +413,83 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif +#ifndef GetBe64 +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) +#endif #ifndef GetBe16 - #define GetBe16(p) ( (UInt16) ( \ ((UInt16)((const Byte *)(p))[0] << 8) | \ ((const Byte *)(p))[1] )) - #endif +#if defined(MY_CPU_BE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_NATIVE_TO_BE_32(v) (v) +#elif defined(MY_CPU_LE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +#else +#error Stop_Compiling_Unknown_Endian_CONV +#endif + + +#if defined(MY_CPU_BE) + +#define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetUi32a(p) GetUi32(p) +#define GetUi16a(p) GetUi16(p) +#define SetUi32a(p, v) SetUi32(p, v) +#define SetUi16a(p, v) SetUi16(p, v) + +#elif defined(MY_CPU_LE) + +#define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetBe32a(p) GetBe32(p) +#define GetBe16a(p) GetBe16(p) +#define SetBe32a(p, v) SetBe32(p, v) +#define SetBe16a(p, v) SetBe16(p, v) + +#else +#error Stop_Compiling_Unknown_Endian_CPU_a +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_OR_ARM64) \ + || defined(MY_CPU_PPC_OR_PPC64) + #define Z7_CPU_FAST_ROTATE_SUPPORTED +#endif + #ifdef MY_CPU_X86_OR_AMD64 -typedef struct -{ - UInt32 maxFunc; - UInt32 vendor[3]; - UInt32 ver; - UInt32 b; - UInt32 c; - UInt32 d; -} Cx86cpuid; - -enum -{ - CPU_FIRM_INTEL, - CPU_FIRM_AMD, - CPU_FIRM_VIA -}; - -void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d); - -BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p); -int x86cpuid_GetFirm(const Cx86cpuid *p); - -#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF)) -#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) -#define x86cpuid_GetStepping(ver) (ver & 0xF) - -BoolInt CPU_Is_InOrder(void); +void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function); +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); +#if defined(MY_CPU_AMD64) +#define Z7_IF_X86_CPUID_SUPPORTED +#else +#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc()) +#endif BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX(void); BoolInt CPU_IsSupported_AVX2(void); BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_CMOV(void); +BoolInt CPU_IsSupported_SSE(void); +BoolInt CPU_IsSupported_SSE2(void); BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSE41(void); BoolInt CPU_IsSupported_SHA(void); @@ -436,8 +514,8 @@ BoolInt CPU_IsSupported_AES(void); #endif #if defined(__APPLE__) -int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); -int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); #endif EXTERN_C_END diff --git a/3rdparty/7z/src/Delta.h b/3rdparty/7z/src/Delta.h index e59d5a252b..be77c6c6c1 100644 --- a/3rdparty/7z/src/Delta.h +++ b/3rdparty/7z/src/Delta.h @@ -1,8 +1,8 @@ /* Delta.h -- Delta converter -2013-01-18 : Igor Pavlov : Public domain */ +2023-03-03 : Igor Pavlov : Public domain */ -#ifndef __DELTA_H -#define __DELTA_H +#ifndef ZIP7_INC_DELTA_H +#define ZIP7_INC_DELTA_H #include "7zTypes.h" diff --git a/3rdparty/7z/src/DllSecur.c b/3rdparty/7z/src/DllSecur.c index a37c1b3e2c..f6c23de6f0 100644 --- a/3rdparty/7z/src/DllSecur.c +++ b/3rdparty/7z/src/DllSecur.c @@ -1,18 +1,28 @@ /* DllSecur.c -- DLL loading security -2022-07-15 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" #ifdef _WIN32 -#include +#include "7zWindows.h" #include "DllSecur.h" #ifndef UNDER_CE -#if defined(__GNUC__) && (__GNUC__ >= 8) - #pragma GCC diagnostic ignored "-Wcast-function-type" +#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) + // #pragma GCC diagnostic ignored "-Wcast-function-type" +#endif + +#if defined(__clang__) || defined(__GNUC__) +typedef void (*Z7_voidFunction)(void); +#define MY_CAST_FUNC (Z7_voidFunction) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define MY_CAST_FUNC (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define MY_CAST_FUNC #endif typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags); @@ -20,95 +30,82 @@ typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags); #define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400 #define MY_LOAD_LIBRARY_SEARCH_SYSTEM32 0x800 +#define DELIM "\0" + static const char * const g_Dlls = + "userenv" + DELIM "setupapi" + DELIM "apphelp" + DELIM "propsys" + DELIM "dwmapi" + DELIM "cryptbase" + DELIM "oleacc" + DELIM "clbcatq" + DELIM "version" #ifndef _CONSOLE - "UXTHEME\0" + DELIM "uxtheme" #endif - "USERENV\0" - "SETUPAPI\0" - "APPHELP\0" - "PROPSYS\0" - "DWMAPI\0" - "CRYPTBASE\0" - "OLEACC\0" - "CLBCATQ\0" - "VERSION\0" - ; + DELIM; #endif -// #define MY_CAST_FUNC (void(*)()) -#define MY_CAST_FUNC +#ifdef __clang__ + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif +#if defined (_MSC_VER) && _MSC_VER >= 1900 +// sysinfoapi.h: kit10: GetVersion was declared deprecated +#pragma warning(disable : 4996) +#endif -void My_SetDefaultDllDirectories() +#define IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN \ + if ((UInt16)GetVersion() != 6) { \ + const \ + Func_SetDefaultDllDirectories setDllDirs = \ + (Func_SetDefaultDllDirectories) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \ + "SetDefaultDllDirectories"); \ + if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; } + +void My_SetDefaultDllDirectories(void) { #ifndef UNDER_CE - - OSVERSIONINFO vi; - vi.dwOSVersionInfoSize = sizeof(vi); - if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0) - { - Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories) - MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories"); - if (setDllDirs) - if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) - return; - } - + IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN #endif } -void LoadSecurityDlls() +void LoadSecurityDlls(void) { #ifndef UNDER_CE - - wchar_t buf[MAX_PATH + 100]; - - { - // at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ??? - OSVERSIONINFO vi; - vi.dwOSVersionInfoSize = sizeof(vi); - if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0) - { - Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories) - MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories"); - if (setDllDirs) - if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) - return; - } - } - - { - unsigned len = GetSystemDirectoryW(buf, MAX_PATH + 2); - if (len == 0 || len > MAX_PATH) - return; - } + // at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ??? + IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN { + wchar_t buf[MAX_PATH + 100]; const char *dll; - unsigned pos = (unsigned)lstrlenW(buf); - + unsigned pos = GetSystemDirectoryW(buf, MAX_PATH + 2); + if (pos == 0 || pos > MAX_PATH) + return; if (buf[pos - 1] != '\\') buf[pos++] = '\\'; - - for (dll = g_Dlls; dll[0] != 0;) + for (dll = g_Dlls; *dll != 0;) { - unsigned k = 0; + wchar_t *dest = &buf[pos]; for (;;) { - char c = *dll++; - buf[pos + k] = (Byte)c; - k++; + const char c = *dll++; if (c == 0) break; + *dest++ = (Byte)c; } - - lstrcatW(buf, L".dll"); + dest[0] = '.'; + dest[1] = 'd'; + dest[2] = 'l'; + dest[3] = 'l'; + dest[4] = 0; + // lstrcatW(buf, L".dll"); LoadLibraryExW(buf, NULL, LOAD_WITH_ALTERED_SEARCH_PATH); } } - #endif } -#endif +#endif // _WIN32 diff --git a/3rdparty/7z/src/DllSecur.h b/3rdparty/7z/src/DllSecur.h index 0fd8070e54..87bacc637c 100644 --- a/3rdparty/7z/src/DllSecur.h +++ b/3rdparty/7z/src/DllSecur.h @@ -1,8 +1,8 @@ /* DllSecur.h -- DLL loading for security -2018-02-19 : Igor Pavlov : Public domain */ +2023-03-03 : Igor Pavlov : Public domain */ -#ifndef __DLL_SECUR_H -#define __DLL_SECUR_H +#ifndef ZIP7_INC_DLL_SECUR_H +#define ZIP7_INC_DLL_SECUR_H #include "7zTypes.h" diff --git a/3rdparty/7z/src/HuffEnc.c b/3rdparty/7z/src/HuffEnc.c new file mode 100644 index 0000000000..3dc1e392a6 --- /dev/null +++ b/3rdparty/7z/src/HuffEnc.c @@ -0,0 +1,154 @@ +/* HuffEnc.c -- functions for Huffman encoding +2023-03-04 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "HuffEnc.h" +#include "Sort.h" + +#define kMaxLen 16 +#define NUM_BITS 10 +#define MASK (((unsigned)1 << NUM_BITS) - 1) + +#define NUM_COUNTERS 64 + +#define HUFFMAN_SPEED_OPT + +void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen) +{ + UInt32 num = 0; + /* if (maxLen > 10) maxLen = 10; */ + { + UInt32 i; + + #ifdef HUFFMAN_SPEED_OPT + + UInt32 counters[NUM_COUNTERS]; + for (i = 0; i < NUM_COUNTERS; i++) + counters[i] = 0; + for (i = 0; i < numSymbols; i++) + { + UInt32 freq = freqs[i]; + counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++; + } + + for (i = 1; i < NUM_COUNTERS; i++) + { + UInt32 temp = counters[i]; + counters[i] = num; + num += temp; + } + + for (i = 0; i < numSymbols; i++) + { + UInt32 freq = freqs[i]; + if (freq == 0) + lens[i] = 0; + else + p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS); + } + counters[0] = 0; + HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]); + + #else + + for (i = 0; i < numSymbols; i++) + { + UInt32 freq = freqs[i]; + if (freq == 0) + lens[i] = 0; + else + p[num++] = i | (freq << NUM_BITS); + } + HeapSort(p, num); + + #endif + } + + if (num < 2) + { + unsigned minCode = 0; + unsigned maxCode = 1; + if (num == 1) + { + maxCode = (unsigned)p[0] & MASK; + if (maxCode == 0) + maxCode++; + } + p[minCode] = 0; + p[maxCode] = 1; + lens[minCode] = lens[maxCode] = 1; + return; + } + + { + UInt32 b, e, i; + + i = b = e = 0; + do + { + UInt32 n, m, freq; + n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; + freq = (p[n] & ~MASK); + p[n] = (p[n] & MASK) | (e << NUM_BITS); + m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; + freq += (p[m] & ~MASK); + p[m] = (p[m] & MASK) | (e << NUM_BITS); + p[e] = (p[e] & MASK) | freq; + e++; + } + while (num - e > 1); + + { + UInt32 lenCounters[kMaxLen + 1]; + for (i = 0; i <= kMaxLen; i++) + lenCounters[i] = 0; + + p[--e] &= MASK; + lenCounters[1] = 2; + while (e != 0) + { + UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1; + p[e] = (p[e] & MASK) | (len << NUM_BITS); + if (len >= maxLen) + for (len = maxLen - 1; lenCounters[len] == 0; len--); + lenCounters[len]--; + lenCounters[(size_t)len + 1] += 2; + } + + { + UInt32 len; + i = 0; + for (len = maxLen; len != 0; len--) + { + UInt32 k; + for (k = lenCounters[len]; k != 0; k--) + lens[p[i++] & MASK] = (Byte)len; + } + } + + { + UInt32 nextCodes[kMaxLen + 1]; + { + UInt32 code = 0; + UInt32 len; + for (len = 1; len <= kMaxLen; len++) + nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1; + } + /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */ + + { + UInt32 k; + for (k = 0; k < numSymbols; k++) + p[k] = nextCodes[lens[k]]++; + } + } + } + } +} + +#undef kMaxLen +#undef NUM_BITS +#undef MASK +#undef NUM_COUNTERS +#undef HUFFMAN_SPEED_OPT diff --git a/3rdparty/7z/src/HuffEnc.h b/3rdparty/7z/src/HuffEnc.h new file mode 100644 index 0000000000..cbc5d11f94 --- /dev/null +++ b/3rdparty/7z/src/HuffEnc.h @@ -0,0 +1,23 @@ +/* HuffEnc.h -- Huffman encoding +2023-03-05 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_HUFF_ENC_H +#define ZIP7_INC_HUFF_ENC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* +Conditions: + num <= 1024 = 2 ^ NUM_BITS + Sum(freqs) < 4M = 2 ^ (32 - NUM_BITS) + maxLen <= 16 = kMaxLen + Num_Items(p) >= HUFFMAN_TEMP_SIZE(num) +*/ + +void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 num, UInt32 maxLen); + +EXTERN_C_END + +#endif diff --git a/3rdparty/7z/src/LzFind.c b/3rdparty/7z/src/LzFind.c index a17c06bc31..7589a4c322 100644 --- a/3rdparty/7z/src/LzFind.c +++ b/3rdparty/7z/src/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2021-11-29 : Igor Pavlov : Public domain */ +2023-03-14 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -17,7 +17,7 @@ #define kEmptyHashValue 0 #define kMaxValForNormalize ((UInt32)0) -// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug +// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses @@ -67,10 +67,10 @@ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) { - if (!p->directInput) + // if (!p->directInput) { - ISzAlloc_Free(alloc, p->bufferBase); - p->bufferBase = NULL; + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = NULL; } } @@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all { if (blockSize == 0) return 0; - if (!p->bufferBase || p->blockSize != blockSize) + if (!p->bufBase || p->blockSize != blockSize) { // size_t blockSizeT; LzInWindow_Free(p, alloc); @@ -101,11 +101,11 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all #endif */ - p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); - // printf("\nbufferBase = %p\n", p->bufferBase); + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); + // printf("\nbufferBase = %p\n", p->bufBase); // return 0; // for debug } - return (p->bufferBase != NULL); + return (p->bufBase != NULL); } static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } @@ -113,7 +113,7 @@ static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_ReadBlock(CMatchFinder *p) { if (p->streamEndWasReached || p->result != SZ_OK) @@ -127,8 +127,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); if (curSize > p->directInputRem) curSize = (UInt32)p->directInputRem; - p->directInputRem -= curSize; p->streamPos += curSize; + p->directInputRem -= curSize; if (p->directInputRem == 0) p->streamEndWasReached = 1; return; @@ -136,8 +136,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) for (;;) { - Byte *dest = p->buffer + GET_AVAIL_BYTES(p); - size_t size = (size_t)(p->bufferBase + p->blockSize - dest); + const Byte *dest = p->buffer + GET_AVAIL_BYTES(p); + size_t size = (size_t)(p->bufBase + p->blockSize - dest); if (size == 0) { /* we call ReadBlock() after NeedMove() and MoveBlock(). @@ -153,7 +153,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) // #define kRead 3 // if (size > kRead) size = kRead; // for debug - p->result = ISeqInStream_Read(p->stream, dest, &size); + /* + // we need cast (Byte *)dest. + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wcast-qual" + #endif + */ + p->result = ISeqInStream_Read(p->stream, + p->bufBase + (dest - p->bufBase), &size); if (p->result != SZ_OK) return; if (size == 0) @@ -173,14 +180,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) -MY_NO_INLINE +Z7_NO_INLINE void MatchFinder_MoveBlock(CMatchFinder *p) { - const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; + const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore; const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; - p->buffer = p->bufferBase + keepBefore; - memmove(p->bufferBase, - p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), + p->buffer = p->bufBase + keepBefore; + memmove(p->bufBase, + p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)), keepBefore + (size_t)GET_AVAIL_BYTES(p)); } @@ -198,7 +205,7 @@ int MatchFinder_NeedMove(CMatchFinder *p) return 0; if (p->streamEndWasReached || p->result != SZ_OK) return 0; - return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); + return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter); } void MatchFinder_ReadIfRequired(CMatchFinder *p) @@ -214,6 +221,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p) p->cutValue = 32; p->btMode = 1; p->numHashBytes = 4; + p->numHashBytes_Min = 2; + p->numHashOutBits = 0; p->bigHash = 0; } @@ -222,8 +231,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p) void MatchFinder_Construct(CMatchFinder *p) { unsigned i; - p->bufferBase = NULL; + p->buffer = NULL; + p->bufBase = NULL; p->directInput = 0; + p->stream = NULL; p->hash = NULL; p->expectedDataSize = (UInt64)(Int64)-1; MatchFinder_SetDefaultSettings(p); @@ -238,6 +249,8 @@ void MatchFinder_Construct(CMatchFinder *p) } } +#undef kCrcPoly + static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->hash); @@ -252,7 +265,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) { - size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + const size_t sizeInBytes = (size_t)num * sizeof(CLzRef); if (sizeInBytes / sizeof(CLzRef) != num) return NULL; return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); @@ -298,6 +311,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) } +// input is historySize +static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs) +{ + if (p->numHashBytes == 2) + return (1 << 16) - 1; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + return hs; +} + +// input is historySize +static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs) +{ + if (p->numHashBytes == 2) + return (1 << 16) - 1; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + hs >>= 1; + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + return hs; +} + + int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) @@ -318,78 +387,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, p->blockSize = 0; if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) { - const UInt32 newCyclicBufferSize = historySize + 1; // do not change it - UInt32 hs; - p->matchMaxLen = matchMaxLen; + size_t hashSizeSum; { - // UInt32 hs4; - p->fixedHashSize = 0; - hs = (1 << 16) - 1; - if (p->numHashBytes != 2) + UInt32 hs; + UInt32 hsCur; + + if (p->numHashOutBits != 0) { - hs = historySize; - if (hs > p->expectedDataSize) - hs = (UInt32)p->expectedDataSize; - if (hs != 0) - hs--; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - // we propagated 16 bits in (hs). Low 16 bits must be set later - hs >>= 1; - if (hs >= (1 << 24)) - { - if (p->numHashBytes == 3) - hs = (1 << 24) - 1; - else - hs >>= 1; - /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ - } - - // hs = ((UInt32)1 << 25) - 1; // for test - + unsigned numBits = p->numHashOutBits; + const unsigned nbMax = + (p->numHashBytes == 2 ? 16 : + (p->numHashBytes == 3 ? 24 : 32)); + if (numBits > nbMax) + numBits = nbMax; + if (numBits >= 32) + hs = (UInt32)0 - 1; + else + hs = ((UInt32)1 << numBits) - 1; // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) hs |= (1 << 16) - 1; /* don't change it! */ - - // bt5: we adjust the size with recommended minimum size if (p->numHashBytes >= 5) hs |= (256 << kLzHash_CrcShift_2) - 1; + { + const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize); + if (hs > hs2) + hs = hs2; + } + hsCur = hs; + if (p->expectedDataSize < historySize) + { + const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize); + if (hsCur > hs2) + hsCur = hs2; + } + } + else + { + hs = MatchFinder_GetHashMask(p, historySize); + hsCur = hs; + if (p->expectedDataSize < historySize) + { + hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize); + if (hsCur > hs) // is it possible? + hsCur = hs; + } } - p->hashMask = hs; - hs++; - /* - hs4 = (1 << 20); - if (hs4 > hs) - hs4 = hs; - // hs4 = (1 << 16); // for test - p->hash4Mask = hs4 - 1; - */ + p->hashMask = hsCur; - if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; - if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; - hs += p->fixedHashSize; + hashSizeSum = hs; + hashSizeSum++; + if (hashSizeSum < hs) + return 0; + { + UInt32 fixedHashSize = 0; + if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size; + if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; + hashSizeSum += fixedHashSize; + p->fixedHashSize = fixedHashSize; + } } + p->matchMaxLen = matchMaxLen; + { size_t newSize; size_t numSons; + const UInt32 newCyclicBufferSize = historySize + 1; // do not change it p->historySize = historySize; - p->hashSizeSum = hs; p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) numSons = newCyclicBufferSize; if (p->btMode) numSons <<= 1; - newSize = hs + numSons; + newSize = hashSizeSum + numSons; + + if (numSons < newCyclicBufferSize || newSize < numSons) + return 0; // aligned size is not required here, but it can be better for some loops #define NUM_REFS_ALIGN_MASK 0xF newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; - if (p->hash && p->numRefs == newSize) + // 22.02: we don't reallocate buffer, if old size is enough + if (p->hash && p->numRefs >= newSize) return 1; MatchFinder_FreeThisClassMemory(p, alloc); @@ -398,7 +480,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, if (p->hash) { - p->son = p->hash + p->hashSizeSum; + p->son = p->hash + hashSizeSum; return 1; } } @@ -470,7 +552,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p) void MatchFinder_Init_4(CMatchFinder *p) { - p->buffer = p->bufferBase; + if (!p->directInput) + p->buffer = p->bufBase; { /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. the code in CMatchFinderMt expects (pos = 1) */ @@ -507,20 +590,20 @@ void MatchFinder_Init(CMatchFinder *p) #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__clang__) && (__clang_major__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ >= 8) \ - || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) - #define USE_SATUR_SUB_128 - #define USE_AVX2 - #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) - #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + + #define USE_LZFIND_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_256 + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) + #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2"))) #elif defined(_MSC_VER) #if (_MSC_VER >= 1600) - #define USE_SATUR_SUB_128 - #if (_MSC_VER >= 1900) - #define USE_AVX2 - #include // avx - #endif + #define USE_LZFIND_SATUR_SUB_128 + #endif + #if (_MSC_VER >= 1900) + #define USE_LZFIND_SATUR_SUB_256 #endif #endif @@ -529,16 +612,16 @@ void MatchFinder_Init(CMatchFinder *p) #if defined(__clang__) && (__clang_major__ >= 8) \ || defined(__GNUC__) && (__GNUC__ >= 8) - #define USE_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_128 #ifdef MY_CPU_ARM64 - // #define ATTRIB_SSE41 __attribute__((__target__(""))) + // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__(""))) #else - // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) #endif #elif defined(_MSC_VER) #if (_MSC_VER >= 1910) - #define USE_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_128 #endif #endif @@ -550,121 +633,130 @@ void MatchFinder_Init(CMatchFinder *p) #endif -/* -#ifndef ATTRIB_SSE41 - #define ATTRIB_SSE41 -#endif -#ifndef ATTRIB_AVX2 - #define ATTRIB_AVX2 -#endif -*/ -#ifdef USE_SATUR_SUB_128 +#ifdef USE_LZFIND_SATUR_SUB_128 -// #define _SHOW_HW_STATUS +// #define Z7_SHOW_HW_STATUS -#ifdef _SHOW_HW_STATUS +#ifdef Z7_SHOW_HW_STATUS #include -#define _PRF(x) x -_PRF(;) +#define PRF(x) x +PRF(;) #else -#define _PRF(x) +#define PRF(x) #endif + #ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM64 -// #define FORCE_SATUR_SUB_128 +// #define FORCE_LZFIND_SATUR_SUB_128 #endif +typedef uint32x4_t LzFind_v128; +#define SASUB_128_V(v, s) \ + vsubq_u32(vmaxq_u32(v, s), s) -typedef uint32x4_t v128; -#define SASUB_128(i) \ - *(v128 *)(void *)(items + (i) * 4) = \ - vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); - -#else +#else // MY_CPU_ARM_OR_ARM64 #include // sse4.1 -typedef __m128i v128; +typedef __m128i LzFind_v128; +// SSE 4.1 +#define SASUB_128_V(v, s) \ + _mm_sub_epi32(_mm_max_epu32(v, s), s) + +#endif // MY_CPU_ARM_OR_ARM64 + + #define SASUB_128(i) \ - *(v128 *)(void *)(items + (i) * 4) = \ - _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 - -#endif + *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \ + *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2); - -MY_NO_INLINE +Z7_NO_INLINE static -#ifdef ATTRIB_SSE41 -ATTRIB_SSE41 +#ifdef LZFIND_ATTRIB_SSE41 +LZFIND_ATTRIB_SSE41 #endif void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - v128 sub2 = + const LzFind_v128 sub2 = #ifdef MY_CPU_ARM_OR_ARM64 vdupq_n_u32(subValue); #else _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); #endif + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - SASUB_128(0) - SASUB_128(1) - SASUB_128(2) - SASUB_128(3) - items += 4 * 4; + SASUB_128(0) SASUB_128(1) items += 2 * 4; + SASUB_128(0) SASUB_128(1) items += 2 * 4; } while (items != lim); } -#ifdef USE_AVX2 +#ifdef USE_LZFIND_SATUR_SUB_256 #include // avx +/* +clang :immintrin.h uses +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX2__) +#include +#endif +so we need for clang-cl */ -#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 +#if defined(__clang__) +#include +#include +#endif -MY_NO_INLINE +// AVX2: +#define SASUB_256(i) \ + *( __m256i *)( void *)(items + (i) * 8) = \ + _mm256_sub_epi32(_mm256_max_epu32( \ + *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); + +Z7_NO_INLINE static -#ifdef ATTRIB_AVX2 -ATTRIB_AVX2 +#ifdef LZFIND_ATTRIB_AVX2 +LZFIND_ATTRIB_AVX2 #endif void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - __m256i sub2 = _mm256_set_epi32( + const __m256i sub2 = _mm256_set_epi32( (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - SASUB_256(0) - SASUB_256(1) - items += 2 * 8; + SASUB_256(0) SASUB_256(1) items += 2 * 8; + SASUB_256(0) SASUB_256(1) items += 2 * 8; } while (items != lim); } -#endif // USE_AVX2 +#endif // USE_LZFIND_SATUR_SUB_256 -#ifndef FORCE_SATUR_SUB_128 -typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( +#ifndef FORCE_LZFIND_SATUR_SUB_128 +typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)( UInt32 subValue, CLzRef *items, const CLzRef *lim); static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; -#endif // FORCE_SATUR_SUB_128 +#endif // FORCE_LZFIND_SATUR_SUB_128 -#endif // USE_SATUR_SUB_128 +#endif // USE_LZFIND_SATUR_SUB_128 // kEmptyHashValue must be zero -// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; -#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; +// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; } +#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; } -#ifdef FORCE_SATUR_SUB_128 +#ifdef FORCE_LZFIND_SATUR_SUB_128 #define DEFAULT_SaturSub LzFind_SaturSub_128 @@ -672,24 +764,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; #define DEFAULT_SaturSub LzFind_SaturSub_32 -MY_NO_INLINE +Z7_NO_INLINE static void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) { + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - UInt32 v; - SASUB_32(0) - SASUB_32(1) - SASUB_32(2) - SASUB_32(3) - SASUB_32(4) - SASUB_32(5) - SASUB_32(6) - SASUB_32(7) - items += 8; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; } while (items != lim); } @@ -697,27 +784,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) #endif -MY_NO_INLINE +Z7_NO_INLINE void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) { - #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) - - CLzRef *lim; - - for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) + #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7) + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) { - UInt32 v; - SASUB_32(0); + SASUB_32(0) items++; } - { - #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) - lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); - numItems &= K_NORM_ALIGN_MASK; + const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1); + CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask); + numItems &= k_Align_Mask; if (items != lim) { - #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) + #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128) if (g_LzFind_SaturSub) g_LzFind_SaturSub(subValue, items, lim); else @@ -726,12 +809,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) } items = lim; } - - + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE for (; numItems != 0; numItems--) { - UInt32 v; - SASUB_32(0); + SASUB_32(0) items++; } } @@ -740,7 +821,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) // call MatchFinder_CheckLimits() only after (p->pos++) update -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_CheckLimits(CMatchFinder *p) { if (// !p->streamEndWasReached && p->result == SZ_OK && @@ -768,11 +849,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p) const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; // const UInt32 subValue = (1 << 15); // for debug // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); - size_t numSonRefs = p->cyclicBufferSize; - if (p->btMode) - numSonRefs <<= 1; - Inline_MatchFinder_ReduceOffsets(p, subValue); - MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); + MatchFinder_REDUCE_OFFSETS(p, subValue) + MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize); + { + size_t numSonRefs = p->cyclicBufferSize; + if (p->btMode) + numSonRefs <<= 1; + MatchFinder_Normalize3(subValue, p->son, numSonRefs); + } } if (p->cyclicBufferPos == p->cyclicBufferSize) @@ -785,7 +869,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p) /* (lenLimit > maxLen) */ -MY_FORCE_INLINE +Z7_FORCE_INLINE static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *d, unsigned maxLen) @@ -867,7 +951,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, } -MY_FORCE_INLINE +Z7_FORCE_INLINE UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *d, UInt32 maxLen) @@ -1004,7 +1088,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const #define MOVE_POS_RET MOVE_POS return distances; -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_MovePos(CMatchFinder *p) { /* we go here at the end of stream data, when (avail < num_hash_bytes) @@ -1015,11 +1099,11 @@ static void MatchFinder_MovePos(CMatchFinder *p) if (p->btMode) p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue */ - MOVE_POS; + MOVE_POS } #define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ + unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ cur = p->buffer; @@ -1028,11 +1112,11 @@ static void MatchFinder_MovePos(CMatchFinder *p) #define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); +#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num); #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ distances = func(MF_PARAMS(p), \ - distances, (UInt32)_maxLen_); MOVE_POS_RET; + distances, (UInt32)_maxLen_); MOVE_POS_RET #define GET_MATCHES_FOOTER_BT(_maxLen_) \ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) @@ -1052,7 +1136,7 @@ static void MatchFinder_MovePos(CMatchFinder *p) static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { GET_MATCHES_HEADER(2) - HASH2_CALC; + HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_BT(1) @@ -1061,7 +1145,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_BT(2) @@ -1082,7 +1166,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(3) - HASH3_CALC; + HASH3_CALC hash = p->hash; pos = p->pos; @@ -1107,7 +1191,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { SkipMatchesSpec(MF_PARAMS(p)); - MOVE_POS_RET; + MOVE_POS_RET } } @@ -1123,7 +1207,7 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(4) - HASH4_CALC; + HASH4_CALC hash = p->hash; pos = p->pos; @@ -1190,7 +1274,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(5) - HASH5_CALC; + HASH5_CALC hash = p->hash; pos = p->pos; @@ -1246,7 +1330,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { SkipMatchesSpec(MF_PARAMS(p)); - MOVE_POS_RET; + MOVE_POS_RET } break; } @@ -1263,7 +1347,7 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(4) - HASH4_CALC; + HASH4_CALC hash = p->hash; pos = p->pos; @@ -1314,12 +1398,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; + MOVE_POS_RET } break; } - GET_MATCHES_FOOTER_HC(maxLen); + GET_MATCHES_FOOTER_HC(maxLen) } @@ -1330,7 +1414,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(5) - HASH5_CALC; + HASH5_CALC hash = p->hash; pos = p->pos; @@ -1386,19 +1470,19 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; + MOVE_POS_RET } break; } - GET_MATCHES_FOOTER_HC(maxLen); + GET_MATCHES_FOOTER_HC(maxLen) } UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_HC(2) @@ -1409,7 +1493,7 @@ static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { SKIP_HEADER(2) { - HASH2_CALC; + HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; } @@ -1420,7 +1504,7 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { SKIP_HEADER(3) { - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; } @@ -1433,7 +1517,7 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { UInt32 h2; UInt32 *hash; - HASH3_CALC; + HASH3_CALC hash = p->hash; curMatch = (hash + kFix3HashSize)[hv]; hash[h2] = @@ -1448,7 +1532,7 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { UInt32 h2, h3; UInt32 *hash; - HASH4_CALC; + HASH4_CALC hash = p->hash; curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = @@ -1464,7 +1548,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { UInt32 h2, h3; UInt32 *hash; - HASH5_CALC; + HASH5_CALC hash = p->hash; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = @@ -1478,7 +1562,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) #define HC_SKIP_HEADER(minLen) \ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ - Byte *cur; \ + const Byte *cur; \ UInt32 *hash; \ UInt32 *son; \ UInt32 pos = p->pos; \ @@ -1510,7 +1594,7 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) HC_SKIP_HEADER(4) UInt32 h2, h3; - HASH4_CALC; + HASH4_CALC curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = @@ -1540,7 +1624,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { HC_SKIP_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = hash[hv]; hash[hv] = pos; @@ -1590,17 +1674,17 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) -void LzFindPrepare() +void LzFindPrepare(void) { - #ifndef FORCE_SATUR_SUB_128 - #ifdef USE_SATUR_SUB_128 + #ifndef FORCE_LZFIND_SATUR_SUB_128 + #ifdef USE_LZFIND_SATUR_SUB_128 LZFIND_SATUR_SUB_CODE_FUNC f = NULL; #ifdef MY_CPU_ARM_OR_ARM64 { if (CPU_IsSupported_NEON()) { // #pragma message ("=== LzFind NEON") - _PRF(printf("\n=== LzFind NEON\n")); + PRF(printf("\n=== LzFind NEON\n")); f = LzFind_SaturSub_128; } // f = 0; // for debug @@ -1609,20 +1693,25 @@ void LzFindPrepare() if (CPU_IsSupported_SSE41()) { // #pragma message ("=== LzFind SSE41") - _PRF(printf("\n=== LzFind SSE41\n")); + PRF(printf("\n=== LzFind SSE41\n")); f = LzFind_SaturSub_128; - #ifdef USE_AVX2 + #ifdef USE_LZFIND_SATUR_SUB_256 if (CPU_IsSupported_AVX2()) { // #pragma message ("=== LzFind AVX2") - _PRF(printf("\n=== LzFind AVX2\n")); + PRF(printf("\n=== LzFind AVX2\n")); f = LzFind_SaturSub_256; } #endif } #endif // MY_CPU_ARM_OR_ARM64 g_LzFind_SaturSub = f; - #endif // USE_SATUR_SUB_128 - #endif // FORCE_SATUR_SUB_128 + #endif // USE_LZFIND_SATUR_SUB_128 + #endif // FORCE_LZFIND_SATUR_SUB_128 } + + +#undef MOVE_POS +#undef MOVE_POS_RET +#undef PRF diff --git a/3rdparty/7z/src/LzFind.h b/3rdparty/7z/src/LzFind.h index 8f9fade230..5acdb98647 100644 --- a/3rdparty/7z/src/LzFind.h +++ b/3rdparty/7z/src/LzFind.h @@ -1,8 +1,8 @@ /* LzFind.h -- Match finder for LZ algorithms -2021-07-13 : Igor Pavlov : Public domain */ +2023-03-04 : Igor Pavlov : Public domain */ -#ifndef __LZ_FIND_H -#define __LZ_FIND_H +#ifndef ZIP7_INC_LZ_FIND_H +#define ZIP7_INC_LZ_FIND_H #include "7zTypes.h" @@ -10,9 +10,9 @@ EXTERN_C_BEGIN typedef UInt32 CLzRef; -typedef struct _CMatchFinder +typedef struct { - Byte *buffer; + const Byte *buffer; UInt32 pos; UInt32 posLimit; UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ @@ -32,8 +32,8 @@ typedef struct _CMatchFinder UInt32 hashMask; UInt32 cutValue; - Byte *bufferBase; - ISeqInStream *stream; + Byte *bufBase; + ISeqInStreamPtr stream; UInt32 blockSize; UInt32 keepSizeBefore; @@ -43,7 +43,9 @@ typedef struct _CMatchFinder size_t directInputRem; UInt32 historySize; UInt32 fixedHashSize; - UInt32 hashSizeSum; + Byte numHashBytes_Min; + Byte numHashOutBits; + Byte _pad2_[2]; SRes result; UInt32 crc[256]; size_t numRefs; @@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p); void MatchFinder_Construct(CMatchFinder *p); -/* Conditions: - historySize <= 3 GB - keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +/* (directInput = 0) is default value. + It's required to provide correct (directInput) value + before calling MatchFinder_Create(). + You can set (directInput) by any of the following calls: + - MatchFinder_SET_DIRECT_INPUT_BUF() + - MatchFinder_SET_STREAM() + - MatchFinder_SET_STREAM_MODE() */ + +#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \ + (p)->stream = NULL; \ + (p)->directInput = 1; \ + (p)->buffer = (_src_); \ + (p)->directInputRem = (_srcLen_); } + +/* +#define MatchFinder_SET_STREAM_MODE(p) { \ + (p)->directInput = 0; } +*/ + +#define MatchFinder_SET_STREAM(p, _stream_) { \ + (p)->stream = _stream_; \ + (p)->directInput = 0; } + + int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); -// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); /* -#define Inline_MatchFinder_InitPos(p, val) \ +#define MatchFinder_INIT_POS(p, val) \ (p)->pos = (val); \ (p)->streamPos = (val); */ -#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ +// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); +#define MatchFinder_REDUCE_OFFSETS(p, subValue) \ (p)->pos -= (subValue); \ (p)->streamPos -= (subValue); @@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef void (*Mf_Skip_Func)(void *object, UInt32); -typedef struct _IMatchFinder +typedef struct { Mf_Init_Func Init; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; diff --git a/3rdparty/7z/src/LzFindMt.c b/3rdparty/7z/src/LzFindMt.c index 3865263b7b..32b9ecb417 100644 --- a/3rdparty/7z/src/LzFindMt.c +++ b/3rdparty/7z/src/LzFindMt.c @@ -1,5 +1,5 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2021-12-21 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -69,7 +69,7 @@ extern UInt64 g_NumIters_Bytes; UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } -#define __MT_HASH4_CALC { \ +#define MT_HASH4_CALC { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ h2 = temp & (kHash2Size - 1); \ temp ^= ((UInt32)cur[2] << 8); \ @@ -79,14 +79,14 @@ extern UInt64 g_NumIters_Bytes; */ -MY_NO_INLINE +Z7_NO_INLINE static void MtSync_Construct(CMtSync *p) { p->affinity = 0; p->wasCreated = False; p->csWasInitialized = False; p->csWasEntered = False; - Thread_Construct(&p->thread); + Thread_CONSTRUCT(&p->thread) Event_Construct(&p->canStart); Event_Construct(&p->wasStopped); Semaphore_Construct(&p->freeSemaphore); @@ -116,7 +116,7 @@ static void MtSync_Construct(CMtSync *p) (p)->csWasEntered = False; } -MY_NO_INLINE +Z7_NO_INLINE static UInt32 MtSync_GetNextBlock(CMtSync *p) { UInt32 numBlocks = 0; @@ -140,14 +140,14 @@ static UInt32 MtSync_GetNextBlock(CMtSync *p) // buffer is UNLOCKED here Semaphore_Wait(&p->filledSemaphore); - LOCK_BUFFER(p); + LOCK_BUFFER(p) return numBlocks; } /* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ -MY_NO_INLINE +Z7_NO_INLINE static void MtSync_StopWriting(CMtSync *p) { if (!Thread_WasCreated(&p->thread) || p->needStart) @@ -185,7 +185,7 @@ static void MtSync_StopWriting(CMtSync *p) } -MY_NO_INLINE +Z7_NO_INLINE static void MtSync_Destruct(CMtSync *p) { PRF(printf("\nMtSync_Destruct %p\n", p)); @@ -220,11 +220,11 @@ static void MtSync_Destruct(CMtSync *p) // #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } // we want to get real system error codes here instead of SZ_ERROR_THREAD -#define RINOK_THREAD(x) RINOK(x) +#define RINOK_THREAD(x) RINOK_WRes(x) // call it before each new file (when new starting is required): -MY_NO_INLINE +Z7_NO_INLINE static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) { WRes wres; @@ -245,12 +245,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void * if (p->wasCreated) return SZ_OK; - RINOK_THREAD(CriticalSection_Init(&p->cs)); + RINOK_THREAD(CriticalSection_Init(&p->cs)) p->csWasInitialized = True; p->csWasEntered = False; - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)) + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)) p->needStart = True; p->exit = True; /* p->exit is unused before (canStart) Event. @@ -264,13 +264,13 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void * else wres = Thread_Create(&p->thread, startAddress, obj); - RINOK_THREAD(wres); + RINOK_THREAD(wres) p->wasCreated = True; return SZ_OK; } -MY_NO_INLINE +Z7_NO_INLINE static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) { const WRes wres = MtSync_Create_WRes(p, startAddress, obj); @@ -519,7 +519,7 @@ static void HashThreadFunc(CMatchFinderMt *mt) if (mf->pos > (UInt32)kMtMaxValForNormalize - num) { const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); - Inline_MatchFinder_ReduceOffsets(mf, subValue); + MatchFinder_REDUCE_OFFSETS(mf, subValue) MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); } @@ -560,7 +560,7 @@ static void HashThreadFunc(CMatchFinderMt *mt) */ -UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes); @@ -749,7 +749,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) } -MY_NO_INLINE +Z7_NO_INLINE static void BtThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->btSync; @@ -864,15 +864,15 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) return SZ_ERROR_MEM; - RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)); - RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)); + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)) + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)) return SZ_OK; } SRes MatchFinderMt_InitMt(CMatchFinderMt *p) { - RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)); + RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)) return MtSync_Init(&p->btSync, kMtBtNumBlocks); } @@ -941,7 +941,7 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) } -MY_NO_INLINE +Z7_NO_INLINE static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) { if (p->failure_LZ_BT) @@ -1163,7 +1163,7 @@ UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) */ -static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; UInt32 *hash = p->hash; @@ -1179,9 +1179,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) (hash + kFix3HashSize)[h3] = m; // (hash + kFix4HashSize)[h4] = m; - #define _USE_H2 - - #ifdef _USE_H2 + // #define BT5_USE_H2 + // #ifdef BT5_USE_H2 if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { d[1] = m - c2 - 1; @@ -1197,8 +1196,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) } d[0] = 3; d += 2; - - #ifdef _USE_H4 + + #ifdef BT5_USE_H4 if (c4 >= matchMinPos) if ( cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && @@ -1214,7 +1213,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) d[0] = 2; d += 2; } - #endif + // #endif if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) { @@ -1228,7 +1227,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) d += 2; } - #ifdef _USE_H4 + #ifdef BT5_USE_H4 if (c4 >= matchMinPos) if ( cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && @@ -1244,7 +1243,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) } -static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) { const UInt32 *bt = p->btBufPos; const UInt32 len = *bt++; @@ -1268,7 +1267,7 @@ static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) -static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) { const UInt32 *bt = p->btBufPos; UInt32 len = *bt++; @@ -1398,3 +1397,10 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) break; } } + +#undef RINOK_THREAD +#undef PRF +#undef MF +#undef GetUi24hi_from32 +#undef LOCK_BUFFER +#undef UNLOCK_BUFFER diff --git a/3rdparty/7z/src/LzFindMt.h b/3rdparty/7z/src/LzFindMt.h index ee9a1b6fe9..5002d0c642 100644 --- a/3rdparty/7z/src/LzFindMt.h +++ b/3rdparty/7z/src/LzFindMt.h @@ -1,15 +1,15 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2021-07-12 : Igor Pavlov : Public domain */ +2023-03-05 : Igor Pavlov : Public domain */ -#ifndef __LZ_FIND_MT_H -#define __LZ_FIND_MT_H +#ifndef ZIP7_INC_LZ_FIND_MT_H +#define ZIP7_INC_LZ_FIND_MT_H #include "LzFind.h" #include "Threads.h" EXTERN_C_BEGIN -typedef struct _CMtSync +typedef struct { UInt32 numProcessedBlocks; CThread thread; @@ -39,7 +39,7 @@ typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distance typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); -typedef struct _CMatchFinderMt +typedef struct { /* LZ */ const Byte *pointerToCurPos; diff --git a/3rdparty/7z/src/LzFindOpt.c b/3rdparty/7z/src/LzFindOpt.c index dbb6ad9fc9..e590aac1ee 100644 --- a/3rdparty/7z/src/LzFindOpt.c +++ b/3rdparty/7z/src/LzFindOpt.c @@ -1,5 +1,5 @@ /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms -2021-07-13 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -41,8 +41,8 @@ UInt64 g_NumIters_Bytes; // #define CYC_TO_POS_OFFSET 1 // for debug /* -MY_NO_INLINE -UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, +Z7_NO_INLINE +UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) { do @@ -214,13 +214,13 @@ else to eliminate "movsx" BUG in old MSVC x64 compiler. */ -UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes); -MY_NO_INLINE -UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, +Z7_NO_INLINE +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes) @@ -404,7 +404,7 @@ else /* typedef UInt32 uint32plus; // size_t -UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, +UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes) diff --git a/3rdparty/7z/src/LzHash.h b/3rdparty/7z/src/LzHash.h index a682f83bec..3b172ebc67 100644 --- a/3rdparty/7z/src/LzHash.h +++ b/3rdparty/7z/src/LzHash.h @@ -1,8 +1,8 @@ -/* LzHash.h -- HASH functions for LZ algorithms -2019-10-30 : Igor Pavlov : Public domain */ +/* LzHash.h -- HASH constants for LZ algorithms +2023-03-05 : Igor Pavlov : Public domain */ -#ifndef __LZ_HASH_H -#define __LZ_HASH_H +#ifndef ZIP7_INC_LZ_HASH_H +#define ZIP7_INC_LZ_HASH_H /* (kHash2Size >= (1 << 8)) : Required diff --git a/3rdparty/7z/src/Lzma2Dec.c b/3rdparty/7z/src/Lzma2Dec.c index f9f98095df..54c87a6a26 100644 --- a/3rdparty/7z/src/Lzma2Dec.c +++ b/3rdparty/7z/src/Lzma2Dec.c @@ -1,5 +1,5 @@ /* Lzma2Dec.c -- LZMA2 Decoder -2021-02-09 : Igor Pavlov : Public domain */ +2023-03-03 : Igor Pavlov : Public domain */ /* #define SHOW_DEBUG_INFO */ @@ -71,14 +71,14 @@ static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props) SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) { Byte props[LZMA_PROPS_SIZE]; - RINOK(Lzma2Dec_GetOldProps(prop, props)); + RINOK(Lzma2Dec_GetOldProps(prop, props)) return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc); } SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) { Byte props[LZMA_PROPS_SIZE]; - RINOK(Lzma2Dec_GetOldProps(prop, props)); + RINOK(Lzma2Dec_GetOldProps(prop, props)) return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc); } @@ -474,8 +474,8 @@ SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, SizeT outSize = *destLen, inSize = *srcLen; *destLen = *srcLen = 0; *status = LZMA_STATUS_NOT_SPECIFIED; - Lzma2Dec_Construct(&p); - RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)); + Lzma2Dec_CONSTRUCT(&p) + RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)) p.decoder.dic = dest; p.decoder.dicBufSize = outSize; Lzma2Dec_Init(&p); @@ -487,3 +487,5 @@ SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, Lzma2Dec_FreeProbs(&p, alloc); return res; } + +#undef PRF diff --git a/3rdparty/7z/src/Lzma2Dec.h b/3rdparty/7z/src/Lzma2Dec.h index da50387250..9b15ab04ad 100644 --- a/3rdparty/7z/src/Lzma2Dec.h +++ b/3rdparty/7z/src/Lzma2Dec.h @@ -1,8 +1,8 @@ /* Lzma2Dec.h -- LZMA2 Decoder -2018-02-19 : Igor Pavlov : Public domain */ +2023-03-03 : Igor Pavlov : Public domain */ -#ifndef __LZMA2_DEC_H -#define __LZMA2_DEC_H +#ifndef ZIP7_INC_LZMA2_DEC_H +#define ZIP7_INC_LZMA2_DEC_H #include "LzmaDec.h" @@ -22,9 +22,10 @@ typedef struct CLzmaDec decoder; } CLzma2Dec; -#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder) -#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc) -#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc) +#define Lzma2Dec_CONSTRUCT(p) LzmaDec_CONSTRUCT(&(p)->decoder) +#define Lzma2Dec_Construct(p) Lzma2Dec_CONSTRUCT(p) +#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc) +#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc) SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); @@ -90,7 +91,7 @@ Lzma2Dec_GetUnpackExtra() returns the value that shows at current input positon. */ -#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0); +#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0) /* ---------- One Call Interface ---------- */ diff --git a/3rdparty/7z/src/Lzma2DecMt.c b/3rdparty/7z/src/Lzma2DecMt.c index 252b5be49f..e1038a1257 100644 --- a/3rdparty/7z/src/Lzma2DecMt.c +++ b/3rdparty/7z/src/Lzma2DecMt.c @@ -1,44 +1,44 @@ /* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ #include "Precomp.h" // #define SHOW_DEBUG_INFO - -// #define _7ZIP_ST +// #define Z7_ST #ifdef SHOW_DEBUG_INFO #include #endif -#ifndef _7ZIP_ST -#ifdef SHOW_DEBUG_INFO -#define PRF(x) x -#else -#define PRF(x) -#endif -#define PRF_STR(s) PRF(printf("\n" s "\n")) -#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2)) -#endif - #include "Alloc.h" #include "Lzma2Dec.h" #include "Lzma2DecMt.h" -#ifndef _7ZIP_ST +#ifndef Z7_ST #include "MtDec.h" #define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28) #endif +#ifndef Z7_ST +#ifdef SHOW_DEBUG_INFO +#define PRF(x) x +#else +#define PRF(x) +#endif +#define PRF_STR(s) PRF(printf("\n" s "\n");) +#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2);) +#endif + + void Lzma2DecMtProps_Init(CLzma2DecMtProps *p) { p->inBufSize_ST = 1 << 20; p->outStep_ST = 1 << 20; - #ifndef _7ZIP_ST + #ifndef Z7_ST p->numThreads = 1; p->inBufSize_MT = 1 << 18; p->outBlockMax = LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT; @@ -48,7 +48,7 @@ void Lzma2DecMtProps_Init(CLzma2DecMtProps *p) -#ifndef _7ZIP_ST +#ifndef Z7_ST /* ---------- CLzma2DecMtThread ---------- */ @@ -81,7 +81,7 @@ typedef struct /* ---------- CLzma2DecMt ---------- */ -typedef struct +struct CLzma2DecMt { // ISzAllocPtr alloc; ISzAllocPtr allocMid; @@ -90,9 +90,9 @@ typedef struct CLzma2DecMtProps props; Byte prop; - ISeqInStream *inStream; - ISeqOutStream *outStream; - ICompressProgress *progress; + ISeqInStreamPtr inStream; + ISeqOutStreamPtr outStream; + ICompressProgressPtr progress; BoolInt finishMode; BoolInt outSize_Defined; @@ -111,14 +111,13 @@ typedef struct size_t inPos; size_t inLim; - #ifndef _7ZIP_ST + #ifndef Z7_ST UInt64 outProcessed_Parse; BoolInt mtc_WasConstructed; CMtDec mtc; - CLzma2DecMtThread coders[MTDEC__THREADS_MAX]; + CLzma2DecMtThread coders[MTDEC_THREADS_MAX]; #endif - -} CLzma2DecMt; +}; @@ -142,11 +141,11 @@ CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid) // Lzma2DecMtProps_Init(&p->props); - #ifndef _7ZIP_ST + #ifndef Z7_ST p->mtc_WasConstructed = False; { unsigned i; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) { CLzma2DecMtThread *t = &p->coders[i]; t->dec_created = False; @@ -156,16 +155,16 @@ CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid) } #endif - return p; + return (CLzma2DecMtHandle)(void *)p; } -#ifndef _7ZIP_ST +#ifndef Z7_ST static void Lzma2DecMt_FreeOutBufs(CLzma2DecMt *p) { unsigned i; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) { CLzma2DecMtThread *t = &p->coders[i]; if (t->outBuf) @@ -196,13 +195,15 @@ static void Lzma2DecMt_FreeSt(CLzma2DecMt *p) } -void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp) +// #define GET_CLzma2DecMt_p CLzma2DecMt *p = (CLzma2DecMt *)(void *)pp; + +void Lzma2DecMt_Destroy(CLzma2DecMtHandle p) { - CLzma2DecMt *p = (CLzma2DecMt *)pp; + // GET_CLzma2DecMt_p Lzma2DecMt_FreeSt(p); - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtc_WasConstructed) { @@ -211,7 +212,7 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp) } { unsigned i; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) { CLzma2DecMtThread *t = &p->coders[i]; if (t->dec_created) @@ -226,19 +227,19 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp) #endif - ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, pp); + ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, p); } -#ifndef _7ZIP_ST +#ifndef Z7_ST static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc) { CLzma2DecMt *me = (CLzma2DecMt *)obj; CLzma2DecMtThread *t = &me->coders[coderIndex]; - PRF_STR_INT_2("Parse", coderIndex, cc->srcSize); + PRF_STR_INT_2("Parse", coderIndex, cc->srcSize) cc->state = MTDEC_PARSE_CONTINUE; @@ -246,7 +247,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa { if (!t->dec_created) { - Lzma2Dec_Construct(&t->dec); + Lzma2Dec_CONSTRUCT(&t->dec) t->dec_created = True; AlignOffsetAlloc_CreateVTable(&t->alloc); { @@ -297,7 +298,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa // that must be finished at position <= outBlockMax. { - const SizeT srcOrig = cc->srcSize; + const size_t srcOrig = cc->srcSize; SizeT srcSize_Point = 0; SizeT dicPos_Point = 0; @@ -306,10 +307,10 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa for (;;) { - SizeT srcCur = srcOrig - cc->srcSize; + SizeT srcCur = (SizeT)(srcOrig - cc->srcSize); status = Lzma2Dec_Parse(&t->dec, - limit - t->dec.decoder.dicPos, + (SizeT)limit - t->dec.decoder.dicPos, cc->src + cc->srcSize, &srcCur, checkFinishBlock); @@ -333,7 +334,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa if (t->dec.decoder.dicPos >= (1 << 14)) break; dicPos_Point = t->dec.decoder.dicPos; - srcSize_Point = cc->srcSize; + srcSize_Point = (SizeT)cc->srcSize; continue; } @@ -391,7 +392,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa if (unpackRem != 0) { /* we also reserve space for max possible number of output bytes of current LZMA chunk */ - SizeT rem = limit - dicPos; + size_t rem = limit - dicPos; if (rem > unpackRem) rem = unpackRem; dicPos += rem; @@ -444,7 +445,7 @@ static SRes Lzma2DecMt_MtCallback_PreCode(void *pp, unsigned coderIndex) } t->dec.decoder.dic = dest; - t->dec.decoder.dicBufSize = t->outPreSize; + t->dec.decoder.dicBufSize = (SizeT)t->outPreSize; t->needInit = True; @@ -462,7 +463,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex, UNUSED_VAR(srcFinished) - PRF_STR_INT_2("Code", coderIndex, srcSize); + PRF_STR_INT_2("Code", coderIndex, srcSize) *inCodePos = t->inCodeSize; *outCodePos = 0; @@ -476,13 +477,13 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex, { ELzmaStatus status; - size_t srcProcessed = srcSize; + SizeT srcProcessed = (SizeT)srcSize; BoolInt blockWasFinished = ((int)t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK || t->parseStatus == LZMA2_PARSE_STATUS_NEW_BLOCK); SRes res = Lzma2Dec_DecodeToDic(&t->dec, - t->outPreSize, + (SizeT)t->outPreSize, src, &srcProcessed, blockWasFinished ? LZMA_FINISH_END : LZMA_FINISH_ANY, &status); @@ -540,7 +541,7 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex, UNUSED_VAR(srcSize) UNUSED_VAR(isCross) - PRF_STR_INT_2("Write", coderIndex, srcSize); + PRF_STR_INT_2("Write", coderIndex, srcSize) *needContinue = False; *canRecode = True; @@ -588,7 +589,7 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex, *needContinue = needContinue2; return SZ_OK; } - RINOK(MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0)); + RINOK(MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0)) } } @@ -611,11 +612,11 @@ static SRes Lzma2Dec_Prepare_ST(CLzma2DecMt *p) { if (!p->dec_created) { - Lzma2Dec_Construct(&p->dec); + Lzma2Dec_CONSTRUCT(&p->dec) p->dec_created = True; } - RINOK(Lzma2Dec_Allocate(&p->dec, p->prop, &p->alignOffsetAlloc.vt)); + RINOK(Lzma2Dec_Allocate(&p->dec, p->prop, &p->alignOffsetAlloc.vt)) if (!p->inBuf || p->inBufSize != p->props.inBufSize_ST) { @@ -634,7 +635,7 @@ static SRes Lzma2Dec_Prepare_ST(CLzma2DecMt *p) static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p - #ifndef _7ZIP_ST + #ifndef Z7_ST , BoolInt tMode #endif ) @@ -646,7 +647,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p CLzma2Dec *dec; - #ifndef _7ZIP_ST + #ifndef Z7_ST if (tMode) { Lzma2DecMt_FreeOutBufs(p); @@ -654,7 +655,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p } #endif - RINOK(Lzma2Dec_Prepare_ST(p)); + RINOK(Lzma2Dec_Prepare_ST(p)) dec = &p->dec; @@ -681,7 +682,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p if (inPos == inLim) { - #ifndef _7ZIP_ST + #ifndef Z7_ST if (tMode) { inData = MtDec_Read(&p->mtc, &inLim); @@ -710,7 +711,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p { SizeT next = dec->decoder.dicBufSize; if (next - wrPos > p->props.outStep_ST) - next = wrPos + p->props.outStep_ST; + next = wrPos + (SizeT)p->props.outStep_ST; size = next - dicPos; } @@ -726,7 +727,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p } } - inProcessed = inLim - inPos; + inProcessed = (SizeT)(inLim - inPos); res = Lzma2Dec_DecodeToDic(dec, dicPos + size, inData + inPos, &inProcessed, finishMode, &status); @@ -755,7 +756,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p dec->decoder.dicPos = 0; wrPos = dec->decoder.dicPos; - RINOK(res2); + RINOK(res2) if (needStop) { @@ -788,7 +789,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p UInt64 outDelta = p->outProcessed - outPrev; if (inDelta >= (1 << 22) || outDelta >= (1 << 22)) { - RINOK(ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed)); + RINOK(ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed)) inPrev = p->inProcessed; outPrev = p->outProcessed; } @@ -798,20 +799,20 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p -SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, +SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p, Byte prop, const CLzma2DecMtProps *props, - ISeqOutStream *outStream, const UInt64 *outDataSize, int finishMode, + ISeqOutStreamPtr outStream, const UInt64 *outDataSize, int finishMode, // Byte *outBuf, size_t *outBufSize, - ISeqInStream *inStream, + ISeqInStreamPtr inStream, // const Byte *inData, size_t inDataSize, UInt64 *inProcessed, // UInt64 *outProcessed, int *isMT, - ICompressProgress *progress) + ICompressProgressPtr progress) { - CLzma2DecMt *p = (CLzma2DecMt *)pp; - #ifndef _7ZIP_ST + // GET_CLzma2DecMt_p + #ifndef Z7_ST BoolInt tMode; #endif @@ -845,7 +846,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, *isMT = False; - #ifndef _7ZIP_ST + #ifndef Z7_ST tMode = False; @@ -939,7 +940,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, p->readWasFinished = p->mtc.readWasFinished; p->inProcessed = p->mtc.inProcessed; - PRF_STR("----- decoding ST -----"); + PRF_STR("----- decoding ST -----") } } @@ -950,7 +951,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, { SRes res = Lzma2Dec_Decode_ST(p - #ifndef _7ZIP_ST + #ifndef Z7_ST , tMode #endif ); @@ -967,7 +968,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, res = p->readRes; /* - #ifndef _7ZIP_ST + #ifndef Z7_ST if (res == SZ_OK && tMode && p->mtc.parseRes != SZ_OK) res = p->mtc.parseRes; #endif @@ -980,13 +981,13 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, /* ---------- Read from CLzma2DecMtHandle Interface ---------- */ -SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp, +SRes Lzma2DecMt_Init(CLzma2DecMtHandle p, Byte prop, const CLzma2DecMtProps *props, const UInt64 *outDataSize, int finishMode, - ISeqInStream *inStream) + ISeqInStreamPtr inStream) { - CLzma2DecMt *p = (CLzma2DecMt *)pp; + // GET_CLzma2DecMt_p if (prop > 40) return SZ_ERROR_UNSUPPORTED; @@ -1015,11 +1016,11 @@ SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp, } -SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp, +SRes Lzma2DecMt_Read(CLzma2DecMtHandle p, Byte *data, size_t *outSize, UInt64 *inStreamProcessed) { - CLzma2DecMt *p = (CLzma2DecMt *)pp; + // GET_CLzma2DecMt_p ELzmaFinishMode finishMode; SRes readRes; size_t size = *outSize; @@ -1055,8 +1056,8 @@ SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp, readRes = ISeqInStream_Read(p->inStream, p->inBuf, &p->inLim); } - inCur = p->inLim - p->inPos; - outCur = size; + inCur = (SizeT)(p->inLim - p->inPos); + outCur = (SizeT)size; res = Lzma2Dec_DecodeToBuf(&p->dec, data, &outCur, p->inBuf + p->inPos, &inCur, finishMode, &status); @@ -1088,3 +1089,7 @@ SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp, return readRes; } } + +#undef PRF +#undef PRF_STR +#undef PRF_STR_INT_2 diff --git a/3rdparty/7z/src/Lzma2DecMt.h b/3rdparty/7z/src/Lzma2DecMt.h index 96f89a3ca2..cb7c8f1e53 100644 --- a/3rdparty/7z/src/Lzma2DecMt.h +++ b/3rdparty/7z/src/Lzma2DecMt.h @@ -1,8 +1,8 @@ /* Lzma2DecMt.h -- LZMA2 Decoder Multi-thread -2018-02-17 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ -#ifndef __LZMA2_DEC_MT_H -#define __LZMA2_DEC_MT_H +#ifndef ZIP7_INC_LZMA2_DEC_MT_H +#define ZIP7_INC_LZMA2_DEC_MT_H #include "7zTypes.h" @@ -13,7 +13,7 @@ typedef struct size_t inBufSize_ST; size_t outStep_ST; - #ifndef _7ZIP_ST + #ifndef Z7_ST unsigned numThreads; size_t inBufSize_MT; size_t outBlockMax; @@ -38,7 +38,9 @@ SRes: SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) */ -typedef void * CLzma2DecMtHandle; +typedef struct CLzma2DecMt CLzma2DecMt; +typedef CLzma2DecMt * CLzma2DecMtHandle; +// Z7_DECLARE_HANDLE(CLzma2DecMtHandle) CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid); void Lzma2DecMt_Destroy(CLzma2DecMtHandle p); @@ -46,11 +48,11 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle p); SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p, Byte prop, const CLzma2DecMtProps *props, - ISeqOutStream *outStream, + ISeqOutStreamPtr outStream, const UInt64 *outDataSize, // NULL means undefined int finishMode, // 0 - partial unpacking is allowed, 1 - if lzma2 stream must be finished // Byte *outBuf, size_t *outBufSize, - ISeqInStream *inStream, + ISeqInStreamPtr inStream, // const Byte *inData, size_t inDataSize, // out variables: @@ -58,7 +60,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p, int *isMT, /* out: (*isMT == 0), if single thread decoding was used */ // UInt64 *outProcessed, - ICompressProgress *progress); + ICompressProgressPtr progress); /* ---------- Read from CLzma2DecMtHandle Interface ---------- */ @@ -67,7 +69,7 @@ SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp, Byte prop, const CLzma2DecMtProps *props, const UInt64 *outDataSize, int finishMode, - ISeqInStream *inStream); + ISeqInStreamPtr inStream); SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp, Byte *data, size_t *outSize, diff --git a/3rdparty/7z/src/Lzma2Enc.c b/3rdparty/7z/src/Lzma2Enc.c index c8b114cb48..85aa80d77e 100644 --- a/3rdparty/7z/src/Lzma2Enc.c +++ b/3rdparty/7z/src/Lzma2Enc.c @@ -1,18 +1,18 @@ /* Lzma2Enc.c -- LZMA2 Encoder -2021-02-09 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ #include "Precomp.h" #include -/* #define _7ZIP_ST */ +/* #define Z7_ST */ #include "Lzma2Enc.h" -#ifndef _7ZIP_ST +#ifndef Z7_ST #include "MtCoder.h" #else -#define MTCODER__THREADS_MAX 1 +#define MTCODER_THREADS_MAX 1 #endif #define LZMA2_CONTROL_LZMA (1 << 7) @@ -40,7 +40,7 @@ typedef struct { ISeqInStream vt; - ISeqInStream *realStream; + ISeqInStreamPtr realStream; UInt64 limit; UInt64 processed; int finished; @@ -53,15 +53,15 @@ static void LimitedSeqInStream_Init(CLimitedSeqInStream *p) p->finished = 0; } -static SRes LimitedSeqInStream_Read(const ISeqInStream *pp, void *data, size_t *size) +static SRes LimitedSeqInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size) { - CLimitedSeqInStream *p = CONTAINER_FROM_VTBL(pp, CLimitedSeqInStream, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLimitedSeqInStream) size_t size2 = *size; SRes res = SZ_OK; if (p->limit != (UInt64)(Int64)-1) { - UInt64 rem = p->limit - p->processed; + const UInt64 rem = p->limit - p->processed; if (size2 > rem) size2 = (size_t)rem; } @@ -95,8 +95,8 @@ static SRes Lzma2EncInt_InitStream(CLzma2EncInt *p, const CLzma2EncProps *props) { SizeT propsSize = LZMA_PROPS_SIZE; Byte propsEncoded[LZMA_PROPS_SIZE]; - RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps)); - RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize)); + RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps)) + RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize)) p->propsByte = propsEncoded[0]; p->propsAreSet = True; } @@ -111,23 +111,23 @@ static void Lzma2EncInt_InitBlock(CLzma2EncInt *p) } -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); -void LzmaEnc_Finish(CLzmaEncHandle pp); -void LzmaEnc_SaveState(CLzmaEncHandle pp); -void LzmaEnc_RestoreState(CLzmaEncHandle pp); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p); +void LzmaEnc_Finish(CLzmaEncHandle p); +void LzmaEnc_SaveState(CLzmaEncHandle p); +void LzmaEnc_RestoreState(CLzmaEncHandle p); /* -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp); +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p); */ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf, - size_t *packSizeRes, ISeqOutStream *outStream) + size_t *packSizeRes, ISeqOutStreamPtr outStream) { size_t packSizeLimit = *packSizeRes; size_t packSize = packSizeLimit; @@ -167,7 +167,7 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf, while (unpackSize > 0) { - UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE; + const UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE; if (packSizeLimit - destPos < u + 3) return SZ_ERROR_OUTPUT_EOF; outBuf[destPos++] = (Byte)(p->srcPos == 0 ? LZMA2_CONTROL_COPY_RESET_DIC : LZMA2_CONTROL_COPY_NO_RESET); @@ -196,9 +196,9 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf, { size_t destPos = 0; - UInt32 u = unpackSize - 1; - UInt32 pm = (UInt32)(packSize - 1); - unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0); + const UInt32 u = unpackSize - 1; + const UInt32 pm = (UInt32)(packSize - 1); + const unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0); PRF(printf(" ")); @@ -231,7 +231,7 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf, void Lzma2EncProps_Init(CLzma2EncProps *p) { LzmaEncProps_Init(&p->lzmaProps); - p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO; + p->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO; p->numBlockThreads_Reduced = -1; p->numBlockThreads_Max = -1; p->numTotalThreads = -1; @@ -251,8 +251,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p) t2 = p->numBlockThreads_Max; t3 = p->numTotalThreads; - if (t2 > MTCODER__THREADS_MAX) - t2 = MTCODER__THREADS_MAX; + if (t2 > MTCODER_THREADS_MAX) + t2 = MTCODER_THREADS_MAX; if (t3 <= 0) { @@ -268,8 +268,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p) t1 = 1; t2 = t3; } - if (t2 > MTCODER__THREADS_MAX) - t2 = MTCODER__THREADS_MAX; + if (t2 > MTCODER_THREADS_MAX) + t2 = MTCODER_THREADS_MAX; } else if (t1 <= 0) { @@ -286,8 +286,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p) fileSize = p->lzmaProps.reduceSize; - if ( p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID - && p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO + if ( p->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID + && p->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO && (p->blockSize < fileSize || fileSize == (UInt64)(Int64)-1)) p->lzmaProps.reduceSize = p->blockSize; @@ -297,19 +297,19 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p) t1 = p->lzmaProps.numThreads; - if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID) + if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID) { t2r = t2 = 1; t3 = t1; } - else if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO && t2 <= 1) + else if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO && t2 <= 1) { /* if there is no block multi-threading, we use SOLID block */ - p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID; + p->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID; } else { - if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO) + if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO) { const UInt32 kMinSize = (UInt32)1 << 20; const UInt32 kMaxSize = (UInt32)1 << 28; @@ -344,7 +344,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p) } -static SRes Progress(ICompressProgress *p, UInt64 inSize, UInt64 outSize) +static SRes Progress(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize) { return (p && ICompressProgress_Progress(p, inSize, outSize) != SZ_OK) ? SZ_ERROR_PROGRESS : SZ_OK; } @@ -352,7 +352,7 @@ static SRes Progress(ICompressProgress *p, UInt64 inSize, UInt64 outSize) /* ---------- Lzma2 ---------- */ -typedef struct +struct CLzma2Enc { Byte propEncoded; CLzma2EncProps props; @@ -363,23 +363,22 @@ typedef struct ISzAllocPtr alloc; ISzAllocPtr allocBig; - CLzma2EncInt coders[MTCODER__THREADS_MAX]; + CLzma2EncInt coders[MTCODER_THREADS_MAX]; - #ifndef _7ZIP_ST + #ifndef Z7_ST - ISeqOutStream *outStream; + ISeqOutStreamPtr outStream; Byte *outBuf; size_t outBuf_Rem; /* remainder in outBuf */ size_t outBufSize; /* size of allocated outBufs[i] */ - size_t outBufsDataSizes[MTCODER__BLOCKS_MAX]; + size_t outBufsDataSizes[MTCODER_BLOCKS_MAX]; BoolInt mtCoder_WasConstructed; CMtCoder mtCoder; - Byte *outBufs[MTCODER__BLOCKS_MAX]; + Byte *outBufs[MTCODER_BLOCKS_MAX]; #endif - -} CLzma2Enc; +}; @@ -396,30 +395,30 @@ CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig) p->allocBig = allocBig; { unsigned i; - for (i = 0; i < MTCODER__THREADS_MAX; i++) + for (i = 0; i < MTCODER_THREADS_MAX; i++) p->coders[i].enc = NULL; } - #ifndef _7ZIP_ST + #ifndef Z7_ST p->mtCoder_WasConstructed = False; { unsigned i; - for (i = 0; i < MTCODER__BLOCKS_MAX; i++) + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) p->outBufs[i] = NULL; p->outBufSize = 0; } #endif - return p; + return (CLzma2EncHandle)p; } -#ifndef _7ZIP_ST +#ifndef Z7_ST static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p) { unsigned i; - for (i = 0; i < MTCODER__BLOCKS_MAX; i++) + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) if (p->outBufs[i]) { ISzAlloc_Free(p->alloc, p->outBufs[i]); @@ -430,12 +429,13 @@ static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p) #endif +// #define GET_CLzma2Enc_p CLzma2Enc *p = (CLzma2Enc *)(void *)p; -void Lzma2Enc_Destroy(CLzma2EncHandle pp) +void Lzma2Enc_Destroy(CLzma2EncHandle p) { - CLzma2Enc *p = (CLzma2Enc *)pp; + // GET_CLzma2Enc_p unsigned i; - for (i = 0; i < MTCODER__THREADS_MAX; i++) + for (i = 0; i < MTCODER_THREADS_MAX; i++) { CLzma2EncInt *t = &p->coders[i]; if (t->enc) @@ -446,7 +446,7 @@ void Lzma2Enc_Destroy(CLzma2EncHandle pp) } - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtCoder_WasConstructed) { MtCoder_Destruct(&p->mtCoder); @@ -458,13 +458,13 @@ void Lzma2Enc_Destroy(CLzma2EncHandle pp) ISzAlloc_Free(p->alloc, p->tempBufLzma); p->tempBufLzma = NULL; - ISzAlloc_Free(p->alloc, pp); + ISzAlloc_Free(p->alloc, p); } -SRes Lzma2Enc_SetProps(CLzma2EncHandle pp, const CLzma2EncProps *props) +SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props) { - CLzma2Enc *p = (CLzma2Enc *)pp; + // GET_CLzma2Enc_p CLzmaEncProps lzmaProps = props->lzmaProps; LzmaEncProps_Normalize(&lzmaProps); if (lzmaProps.lc + lzmaProps.lp > LZMA2_LCLP_MAX) @@ -475,16 +475,16 @@ SRes Lzma2Enc_SetProps(CLzma2EncHandle pp, const CLzma2EncProps *props) } -void Lzma2Enc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize) { - CLzma2Enc *p = (CLzma2Enc *)pp; + // GET_CLzma2Enc_p p->expectedDataSize = expectedDataSiize; } -Byte Lzma2Enc_WriteProperties(CLzma2EncHandle pp) +Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p) { - CLzma2Enc *p = (CLzma2Enc *)pp; + // GET_CLzma2Enc_p unsigned i; UInt32 dicSize = LzmaEncProps_GetDictSize(&p->props.lzmaProps); for (i = 0; i < 40; i++) @@ -497,12 +497,12 @@ Byte Lzma2Enc_WriteProperties(CLzma2EncHandle pp) static SRes Lzma2Enc_EncodeMt1( CLzma2Enc *me, CLzma2EncInt *p, - ISeqOutStream *outStream, + ISeqOutStreamPtr outStream, Byte *outBuf, size_t *outBufSize, - ISeqInStream *inStream, + ISeqInStreamPtr inStream, const Byte *inData, size_t inDataSize, int finished, - ICompressProgress *progress) + ICompressProgressPtr progress) { UInt64 unpackTotal = 0; UInt64 packTotal = 0; @@ -540,12 +540,12 @@ static SRes Lzma2Enc_EncodeMt1( } } - RINOK(Lzma2EncInt_InitStream(p, &me->props)); + RINOK(Lzma2EncInt_InitStream(p, &me->props)) for (;;) { SRes res = SZ_OK; - size_t inSizeCur = 0; + SizeT inSizeCur = 0; Lzma2EncInt_InitBlock(p); @@ -559,7 +559,7 @@ static SRes Lzma2Enc_EncodeMt1( if (me->expectedDataSize != (UInt64)(Int64)-1 && me->expectedDataSize >= unpackTotal) expected = me->expectedDataSize - unpackTotal; - if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID + if (me->props.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID && expected > me->props.blockSize) expected = (size_t)me->props.blockSize; @@ -569,14 +569,14 @@ static SRes Lzma2Enc_EncodeMt1( &limitedInStream.vt, LZMA2_KEEP_WINDOW_SIZE, me->alloc, - me->allocBig)); + me->allocBig)) } else { - inSizeCur = inDataSize - (size_t)unpackTotal; - if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID + inSizeCur = (SizeT)(inDataSize - (size_t)unpackTotal); + if (me->props.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID && inSizeCur > me->props.blockSize) - inSizeCur = (size_t)me->props.blockSize; + inSizeCur = (SizeT)(size_t)me->props.blockSize; // LzmaEnc_SetDataSize(p->enc, inSizeCur); @@ -584,7 +584,7 @@ static SRes Lzma2Enc_EncodeMt1( inData + (size_t)unpackTotal, inSizeCur, LZMA2_KEEP_WINDOW_SIZE, me->alloc, - me->allocBig)); + me->allocBig)) } for (;;) @@ -621,7 +621,7 @@ static SRes Lzma2Enc_EncodeMt1( unpackTotal += p->srcPos; - RINOK(res); + RINOK(res) if (p->srcPos != (inStream ? limitedInStream.processed : inSizeCur)) return SZ_ERROR_FAIL; @@ -652,12 +652,12 @@ static SRes Lzma2Enc_EncodeMt1( -#ifndef _7ZIP_ST +#ifndef Z7_ST -static SRes Lzma2Enc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex, +static SRes Lzma2Enc_MtCallback_Code(void *p, unsigned coderIndex, unsigned outBufIndex, const Byte *src, size_t srcSize, int finished) { - CLzma2Enc *me = (CLzma2Enc *)pp; + CLzma2Enc *me = (CLzma2Enc *)p; size_t destSize = me->outBufSize; SRes res; CMtProgressThunk progressThunk; @@ -692,9 +692,9 @@ static SRes Lzma2Enc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned out } -static SRes Lzma2Enc_MtCallback_Write(void *pp, unsigned outBufIndex) +static SRes Lzma2Enc_MtCallback_Write(void *p, unsigned outBufIndex) { - CLzma2Enc *me = (CLzma2Enc *)pp; + CLzma2Enc *me = (CLzma2Enc *)p; size_t size = me->outBufsDataSizes[outBufIndex]; const Byte *data = me->outBufs[outBufIndex]; @@ -713,14 +713,14 @@ static SRes Lzma2Enc_MtCallback_Write(void *pp, unsigned outBufIndex) -SRes Lzma2Enc_Encode2(CLzma2EncHandle pp, - ISeqOutStream *outStream, +SRes Lzma2Enc_Encode2(CLzma2EncHandle p, + ISeqOutStreamPtr outStream, Byte *outBuf, size_t *outBufSize, - ISeqInStream *inStream, + ISeqInStreamPtr inStream, const Byte *inData, size_t inDataSize, - ICompressProgress *progress) + ICompressProgressPtr progress) { - CLzma2Enc *p = (CLzma2Enc *)pp; + // GET_CLzma2Enc_p if (inStream && inData) return SZ_ERROR_PARAM; @@ -730,11 +730,11 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp, { unsigned i; - for (i = 0; i < MTCODER__THREADS_MAX; i++) + for (i = 0; i < MTCODER_THREADS_MAX; i++) p->coders[i].propsAreSet = False; } - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->props.numBlockThreads_Reduced > 1) { @@ -772,7 +772,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp, return SZ_ERROR_PARAM; /* SZ_ERROR_MEM */ { - size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16; + const size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16; if (destBlockSize < p->mtCoder.blockSize) return SZ_ERROR_PARAM; if (p->outBufSize != destBlockSize) @@ -784,7 +784,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp, p->mtCoder.expectedDataSize = p->expectedDataSize; { - SRes res = MtCoder_Code(&p->mtCoder); + const SRes res = MtCoder_Code(&p->mtCoder); if (!outStream) *outBufSize = (size_t)(p->outBuf - outBuf); return res; @@ -801,3 +801,5 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp, True, /* finished */ progress); } + +#undef PRF diff --git a/3rdparty/7z/src/Lzma2Enc.h b/3rdparty/7z/src/Lzma2Enc.h index 65f2dd145d..bead0fc52f 100644 --- a/3rdparty/7z/src/Lzma2Enc.h +++ b/3rdparty/7z/src/Lzma2Enc.h @@ -1,15 +1,15 @@ /* Lzma2Enc.h -- LZMA2 Encoder -2017-07-27 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ -#ifndef __LZMA2_ENC_H -#define __LZMA2_ENC_H +#ifndef ZIP7_INC_LZMA2_ENC_H +#define ZIP7_INC_LZMA2_ENC_H #include "LzmaEnc.h" EXTERN_C_BEGIN -#define LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO 0 -#define LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID ((UInt64)(Int64)-1) +#define LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO 0 +#define LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID ((UInt64)(Int64)-1) typedef struct { @@ -36,7 +36,9 @@ SRes: SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) */ -typedef void * CLzma2EncHandle; +typedef struct CLzma2Enc CLzma2Enc; +typedef CLzma2Enc * CLzma2EncHandle; +// Z7_DECLARE_HANDLE(CLzma2EncHandle) CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig); void Lzma2Enc_Destroy(CLzma2EncHandle p); @@ -44,11 +46,11 @@ SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props); void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize); Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p); SRes Lzma2Enc_Encode2(CLzma2EncHandle p, - ISeqOutStream *outStream, + ISeqOutStreamPtr outStream, Byte *outBuf, size_t *outBufSize, - ISeqInStream *inStream, + ISeqInStreamPtr inStream, const Byte *inData, size_t inDataSize, - ICompressProgress *progress); + ICompressProgressPtr progress); EXTERN_C_END diff --git a/3rdparty/7z/src/Lzma86.h b/3rdparty/7z/src/Lzma86.h index 83057e5983..9127d73b7d 100644 --- a/3rdparty/7z/src/Lzma86.h +++ b/3rdparty/7z/src/Lzma86.h @@ -1,8 +1,8 @@ /* Lzma86.h -- LZMA + x86 (BCJ) Filter -2013-01-18 : Igor Pavlov : Public domain */ +2023-03-03 : Igor Pavlov : Public domain */ -#ifndef __LZMA86_H -#define __LZMA86_H +#ifndef ZIP7_INC_LZMA86_H +#define ZIP7_INC_LZMA86_H #include "7zTypes.h" diff --git a/3rdparty/7z/src/Lzma86Dec.c b/3rdparty/7z/src/Lzma86Dec.c index 20ac5e7a97..9b15f3630b 100644 --- a/3rdparty/7z/src/Lzma86Dec.c +++ b/3rdparty/7z/src/Lzma86Dec.c @@ -1,5 +1,5 @@ /* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder -2016-05-16 : Igor Pavlov : Public domain */ +2023-03-03 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -46,9 +46,8 @@ SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen) return res; if (useFilter == 1) { - UInt32 x86State; - x86_Convert_Init(x86State); - x86_Convert(dest, *destLen, 0, &x86State, 0); + UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + z7_BranchConvSt_X86_Dec(dest, *destLen, 0, &x86State); } return SZ_OK; } diff --git a/3rdparty/7z/src/Lzma86Enc.c b/3rdparty/7z/src/Lzma86Enc.c index 99397bc5e3..1c034a74cc 100644 --- a/3rdparty/7z/src/Lzma86Enc.c +++ b/3rdparty/7z/src/Lzma86Enc.c @@ -1,5 +1,5 @@ /* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder -2018-07-04 : Igor Pavlov : Public domain */ +2023-03-03 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -46,9 +46,8 @@ int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen, memcpy(filteredStream, src, srcLen); } { - UInt32 x86State; - x86_Convert_Init(x86State); - x86_Convert(filteredStream, srcLen, 0, &x86State, 1); + UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + z7_BranchConvSt_X86_Enc(filteredStream, srcLen, 0, &x86State); } } diff --git a/3rdparty/7z/src/LzmaDec.c b/3rdparty/7z/src/LzmaDec.c index 80b70a9eee..b6bcd5099b 100644 --- a/3rdparty/7z/src/LzmaDec.c +++ b/3rdparty/7z/src/LzmaDec.c @@ -1,5 +1,5 @@ /* LzmaDec.c -- LZMA Decoder -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-07 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -8,15 +8,15 @@ /* #include "CpuArch.h" */ #include "LzmaDec.h" -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define RC_INIT_SIZE 5 -#ifndef _LZMA_DEC_OPT +#ifndef Z7_LZMA_DEC_OPT #define kNumMoveBits 5 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } @@ -25,14 +25,14 @@ #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ - { UPDATE_0(p); i = (i + i); A0; } else \ - { UPDATE_1(p); i = (i + i) + 1; A1; } + { UPDATE_0(p) i = (i + i); A0; } else \ + { UPDATE_1(p) i = (i + i) + 1; A1; } #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ - { UPDATE_0(p + i); A0; } else \ - { UPDATE_1(p + i); A1; } + { UPDATE_0(p + i) A0; } else \ + { UPDATE_1(p + i) A1; } #define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) @@ -40,19 +40,19 @@ #define TREE_DECODE(probs, limit, i) \ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } -/* #define _LZMA_SIZE_OPT */ +/* #define Z7_LZMA_SIZE_OPT */ -#ifdef _LZMA_SIZE_OPT +#ifdef Z7_LZMA_SIZE_OPT #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) #else #define TREE_6_DECODE(probs, i) \ { i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ i -= 0x40; } #endif @@ -64,25 +64,25 @@ probLit = prob + (offs + bit + symbol); \ GET_BIT2(probLit, symbol, offs ^= bit; , ;) -#endif // _LZMA_DEC_OPT +#endif // Z7_LZMA_DEC_OPT #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } -#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define UPDATE_0_CHECK range = bound; #define UPDATE_1_CHECK range -= bound; code -= bound; #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ - { UPDATE_0_CHECK; i = (i + i); A0; } else \ - { UPDATE_1_CHECK; i = (i + i) + 1; A1; } + { UPDATE_0_CHECK i = (i + i); A0; } else \ + { UPDATE_1_CHECK i = (i + i) + 1; A1; } #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) #define TREE_DECODE_CHECK(probs, limit, i) \ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ - { UPDATE_0_CHECK; i += m; m += m; } else \ - { UPDATE_1_CHECK; m += m; i += m; } + { UPDATE_0_CHECK i += m; m += m; } else \ + { UPDATE_1_CHECK m += m; i += m; } #define kNumPosBitsMax 4 @@ -224,14 +224,14 @@ Out: */ -#ifdef _LZMA_DEC_OPT +#ifdef Z7_LZMA_DEC_OPT -int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); #else static -int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { CLzmaProb *probs = GET_PROBS; unsigned state = (unsigned)p->state; @@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit IF_BIT_0(prob) { unsigned symbol; - UPDATE_0(prob); + UPDATE_0(prob) prob = probs + Literal; if (processedPos != 0 || checkDicSize != 0) prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); @@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit { state -= (state < 4) ? state : 3; symbol = 1; - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT do { NORMAL_LITER_DEC } while (symbol < 0x100); #else NORMAL_LITER_DEC @@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit unsigned offs = 0x100; state -= (state < 10) ? 3 : 6; symbol = 1; - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT do { unsigned bit; @@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit } { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRep + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) state += kNumStates; prob = probs + LenCoder; } else { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG0 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) // that case was checked before with kBadRepCode // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } @@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit state = state < kNumLitStates ? 9 : 11; continue; } - UPDATE_1(prob); + UPDATE_1(prob) } else { UInt32 distance; - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG1 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) distance = rep1; } else { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG2 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) distance = rep2; } else { - UPDATE_1(prob); + UPDATE_1(prob) distance = rep3; rep3 = rep2; } @@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit prob = probs + RepLenCoder; } - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT { unsigned lim, offset; CLzmaProb *probLen = prob + LenChoice; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE; offset = 0; lim = (1 << kLenNumLowBits); } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenChoice2; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; lim = (1 << kLenNumLowBits); } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenHigh; offset = kLenNumLowSymbols * 2; lim = (1 << kLenNumHighBits); } } - TREE_DECODE(probLen, lim, len); + TREE_DECODE(probLen, lim, len) len += offset; } #else @@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit CLzmaProb *probLen = prob + LenChoice; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE; len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) len -= 8; } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenChoice2; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenHigh; - TREE_DECODE(probLen, (1 << kLenNumHighBits), len); + TREE_DECODE(probLen, (1 << kLenNumHighBits), len) len += kLenNumLowSymbols * 2; } } @@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit UInt32 distance; prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); + TREE_6_DECODE(prob, distance) if (distance >= kStartPosModelIndex) { unsigned posSlot = (unsigned)distance; @@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit distance++; do { - REV_BIT_VAR(prob, distance, m); + REV_BIT_VAR(prob, distance, m) } while (--numDirectBits); distance -= m; @@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit distance <<= kNumAlignBits; { unsigned i = 1; - REV_BIT_CONST(prob, i, 1); - REV_BIT_CONST(prob, i, 2); - REV_BIT_CONST(prob, i, 4); - REV_BIT_LAST (prob, i, 8); + REV_BIT_CONST(prob, i, 1) + REV_BIT_CONST(prob, i, 2) + REV_BIT_CONST(prob, i, 4) + REV_BIT_LAST (prob, i, 8) distance |= i; } if (distance == (UInt32)0xFFFFFFFF) @@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit } while (dicPos < limit && buf < bufLimit); - NORMALIZE; + NORMALIZE p->buf = buf; p->range = range; @@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { unsigned len = (unsigned)p->remainLen; if (len == 0 /* || len >= kMatchSpecLenStart */) @@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize): (p->checkDicSize == p->prop.dicSize) */ -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { if (p->checkDicSize == 0) { @@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt else { unsigned len; - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRep + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK state = 0; prob = probs + LenCoder; res = DUMMY_MATCH; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK res = DUMMY_REP; prob = probs + IsRepG0 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK break; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK } } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRepG1 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRepG2 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK } } } @@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt const CLzmaProb *probLen = prob + LenChoice; IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; + UPDATE_0_CHECK probLen = prob + LenLow + GET_LEN_STATE; offset = 0; limit = 1 << kLenNumLowBits; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK probLen = prob + LenChoice2; IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; + UPDATE_0_CHECK probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; limit = 1 << kLenNumLowBits; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK probLen = prob + LenHigh; offset = kLenNumLowSymbols * 2; limit = 1 << kLenNumHighBits; } } - TREE_DECODE_CHECK(probLen, limit, len); + TREE_DECODE_CHECK(probLen, limit, len) len += offset; } @@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt prob = probs + PosSlot + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot) if (posSlot >= kStartPosModelIndex) { unsigned numDirectBits = ((posSlot >> 1) - 1); @@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt unsigned m = 1; do { - REV_BIT_CHECK(prob, i, m); + REV_BIT_CHECK(prob, i, m) } while (--numDirectBits); } @@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt } break; } - NORMALIZE_CHECK; + NORMALIZE_CHECK *bufOut = buf; return res; @@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have */ -#define RETURN__NOT_FINISHED__FOR_FINISH \ +#define RETURN_NOT_FINISHED_FOR_FINISH \ *status = LZMA_STATUS_NOT_FINISHED; \ return SZ_ERROR_DATA; // for strict mode // return SZ_OK; // for relaxed mode @@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr } if (p->remainLen != 0) { - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } checkEndMarkNow = 1; } @@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr for (i = 0; i < (unsigned)dummyProcessed; i++) p->tempBuf[i] = src[i]; // p->remainLen = kMatchSpecLen_Error_Data; - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } bufLimit = src; @@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; p->tempBufSize = (unsigned)dummyProcessed; // p->remainLen = kMatchSpecLen_Error_Data; - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } } @@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) { CLzmaProps propNew; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) p->prop = propNew; return SZ_OK; } @@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll { CLzmaProps propNew; SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) { UInt32 dictSize = propNew.dicSize; SizeT mask = ((UInt32)1 << 12) - 1; if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; - else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1; dicBufSize = ((SizeT)dictSize + mask) & ~mask; if (dicBufSize < dictSize) dicBufSize = dictSize; @@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, *status = LZMA_STATUS_NOT_SPECIFIED; if (inSize < RC_INIT_SIZE) return SZ_ERROR_INPUT_EOF; - LzmaDec_Construct(&p); - RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); + LzmaDec_CONSTRUCT(&p) + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)) p.dic = dest; p.dicBufSize = outSize; LzmaDec_Init(&p); diff --git a/3rdparty/7z/src/LzmaDec.h b/3rdparty/7z/src/LzmaDec.h index 6194b7d127..6432f60852 100644 --- a/3rdparty/7z/src/LzmaDec.h +++ b/3rdparty/7z/src/LzmaDec.h @@ -1,19 +1,19 @@ /* LzmaDec.h -- LZMA Decoder -2020-03-19 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __LZMA_DEC_H -#define __LZMA_DEC_H +#ifndef ZIP7_INC_LZMA_DEC_H +#define ZIP7_INC_LZMA_DEC_H #include "7zTypes.h" EXTERN_C_BEGIN -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, +/* #define Z7_LZMA_PROB32 */ +/* Z7_LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ typedef -#ifdef _LZMA_PROB32 +#ifdef Z7_LZMA_PROB32 UInt32 #else UInt16 @@ -25,7 +25,7 @@ typedef #define LZMA_PROPS_SIZE 5 -typedef struct _CLzmaProps +typedef struct { Byte lc; Byte lp; @@ -73,7 +73,8 @@ typedef struct Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; -#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p) void LzmaDec_Init(CLzmaDec *p); diff --git a/3rdparty/7z/src/LzmaEnc.c b/3rdparty/7z/src/LzmaEnc.c index ca9154aefb..64bb1aaba4 100644 --- a/3rdparty/7z/src/LzmaEnc.c +++ b/3rdparty/7z/src/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2022-07-15: Igor Pavlov : Public domain */ +2023-04-13: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -16,22 +16,22 @@ #include "LzmaEnc.h" #include "LzFind.h" -#ifndef _7ZIP_ST +#ifndef Z7_ST #include "LzFindMt.h" #endif /* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); -void LzmaEnc_Finish(CLzmaEncHandle pp); -void LzmaEnc_SaveState(CLzmaEncHandle pp); -void LzmaEnc_RestoreState(CLzmaEncHandle pp); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p); +void LzmaEnc_Finish(CLzmaEncHandle p); +void LzmaEnc_SaveState(CLzmaEncHandle p); +void LzmaEnc_RestoreState(CLzmaEncHandle p); #ifdef SHOW_STAT static unsigned g_STAT_OFFSET = 0; @@ -40,8 +40,8 @@ static unsigned g_STAT_OFFSET = 0; /* for good normalization speed we still reserve 256 MB before 4 GB range */ #define kLzmaMaxHistorySize ((UInt32)15 << 28) -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) @@ -60,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->dictSize = p->mc = 0; p->reduceSize = (UInt64)(Int64)-1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->numHashOutBits = 0; p->writeEndMark = 0; p->affinity = 0; } @@ -99,7 +100,7 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->numThreads < 0) p->numThreads = - #ifndef _7ZIP_ST + #ifndef Z7_ST ((p->btMode && p->algo) ? 2 : 1); #else 1; @@ -293,7 +294,7 @@ typedef struct #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) typedef -#ifdef _LZMA_PROB32 +#ifdef Z7_LZMA_PROB32 UInt32 #else UInt16 @@ -350,7 +351,7 @@ typedef struct Byte *buf; Byte *bufLim; Byte *bufBase; - ISeqOutStream *outStream; + ISeqOutStreamPtr outStream; UInt64 processed; SRes res; } CRangeEnc; @@ -383,7 +384,7 @@ typedef struct typedef UInt32 CProbPrice; -typedef struct +struct CLzmaEnc { void *matchFinderObj; IMatchFinder2 matchFinder; @@ -426,7 +427,7 @@ typedef struct UInt32 dictSize; SRes result; - #ifndef _7ZIP_ST + #ifndef Z7_ST BoolInt mtMode; // begin of CMatchFinderMt is used in LZ thread CMatchFinderMt matchFinderMt; @@ -439,7 +440,7 @@ typedef struct // we suppose that we have 8-bytes alignment after CMatchFinder - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte pad[128]; #endif @@ -479,77 +480,59 @@ typedef struct CSaveState saveState; // BoolInt mf_Failure; - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte pad2[128]; #endif -} CLzmaEnc; +}; #define MFB (p->matchFinderBase) /* -#ifndef _7ZIP_ST +#ifndef Z7_ST #define MFB (p->matchFinderMt.MatchFinder) #endif */ -#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); +// #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p; +// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p; -void LzmaEnc_SaveState(CLzmaEncHandle pp) +#define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr)); + +#define COPY_LZMA_ENC_STATE(d, s, p) \ + (d)->state = (s)->state; \ + COPY_ARR(d, s, reps) \ + COPY_ARR(d, s, posAlignEncoder) \ + COPY_ARR(d, s, isRep) \ + COPY_ARR(d, s, isRepG0) \ + COPY_ARR(d, s, isRepG1) \ + COPY_ARR(d, s, isRepG2) \ + COPY_ARR(d, s, isMatch) \ + COPY_ARR(d, s, isRep0Long) \ + COPY_ARR(d, s, posSlotEncoder) \ + COPY_ARR(d, s, posEncoders) \ + (d)->lenProbs = (s)->lenProbs; \ + (d)->repLenProbs = (s)->repLenProbs; \ + memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb)); + +void LzmaEnc_SaveState(CLzmaEncHandle p) { - CLzmaEnc *p = (CLzmaEnc *)pp; - CSaveState *dest = &p->saveState; - - dest->state = p->state; - - dest->lenProbs = p->lenProbs; - dest->repLenProbs = p->repLenProbs; + // GET_CLzmaEnc_p + CSaveState *v = &p->saveState; + COPY_LZMA_ENC_STATE(v, p, p) +} - COPY_ARR(dest, p, reps); - - COPY_ARR(dest, p, posAlignEncoder); - COPY_ARR(dest, p, isRep); - COPY_ARR(dest, p, isRepG0); - COPY_ARR(dest, p, isRepG1); - COPY_ARR(dest, p, isRepG2); - COPY_ARR(dest, p, isMatch); - COPY_ARR(dest, p, isRep0Long); - COPY_ARR(dest, p, posSlotEncoder); - COPY_ARR(dest, p, posEncoders); - - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); +void LzmaEnc_RestoreState(CLzmaEncHandle p) +{ + // GET_CLzmaEnc_p + const CSaveState *v = &p->saveState; + COPY_LZMA_ENC_STATE(p, v, p) } -void LzmaEnc_RestoreState(CLzmaEncHandle pp) +Z7_NO_INLINE +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2) { - CLzmaEnc *dest = (CLzmaEnc *)pp; - const CSaveState *p = &dest->saveState; - - dest->state = p->state; - - dest->lenProbs = p->lenProbs; - dest->repLenProbs = p->repLenProbs; - - COPY_ARR(dest, p, reps); - - COPY_ARR(dest, p, posAlignEncoder); - COPY_ARR(dest, p, isRep); - COPY_ARR(dest, p, isRepG0); - COPY_ARR(dest, p, isRepG1); - COPY_ARR(dest, p, isRepG2); - COPY_ARR(dest, p, isMatch); - COPY_ARR(dest, p, isRep0Long); - COPY_ARR(dest, p, posSlotEncoder); - COPY_ARR(dest, p, posEncoders); - - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); -} - - - -SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p CLzmaEncProps props = *props2; LzmaEncProps_Normalize(&props); @@ -585,6 +568,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) p->fastMode = (props.algo == 0); // p->_maxMode = True; MFB.btMode = (Byte)(props.btMode ? 1 : 0); + // MFB.btMode = (Byte)(props.btMode); { unsigned numHashBytes = 4; if (props.btMode) @@ -595,13 +579,15 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) if (props.numHashBytes >= 5) numHashBytes = 5; MFB.numHashBytes = numHashBytes; + // MFB.numHashBytes_Min = 2; + MFB.numHashOutBits = (Byte)props.numHashOutBits; } MFB.cutValue = props.mc; p->writeEndMark = (BoolInt)props.writeEndMark; - #ifndef _7ZIP_ST + #ifndef Z7_ST /* if (newMultiThread != _multiThread) { @@ -618,9 +604,9 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) } -void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize) { - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p MFB.expectedDataSize = expectedDataSiize; } @@ -684,7 +670,7 @@ static void RangeEnc_Init(CRangeEnc *p) p->res = SZ_OK; } -MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) +Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) { const size_t num = (size_t)(p->buf - p->bufBase); if (p->res == SZ_OK) @@ -696,7 +682,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) p->buf = p->bufBase; } -MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p) { UInt32 low = (UInt32)p->low; unsigned high = (unsigned)(p->low >> 32); @@ -741,9 +727,9 @@ static void RangeEnc_FlushData(CRangeEnc *p) ttt = *(prob); \ newBound = (range >> kNumBitModelTotalBits) * ttt; -// #define _LZMA_ENC_USE_BRANCH +// #define Z7_LZMA_ENC_USE_BRANCH -#ifdef _LZMA_ENC_USE_BRANCH +#ifdef Z7_LZMA_ENC_USE_BRANCH #define RC_BIT(p, prob, bit) { \ RC_BIT_PRE(p, prob) \ @@ -811,7 +797,7 @@ static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) CLzmaProb *prob = probs + (sym >> 8); UInt32 bit = (sym >> 7) & 1; sym <<= 1; - RC_BIT(p, prob, bit); + RC_BIT(p, prob, bit) } while (sym < 0x10000); p->range = range; @@ -833,7 +819,7 @@ static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UIn bit = (sym >> 7) & 1; sym <<= 1; offs &= ~(matchByte ^ sym); - RC_BIT(p, prob, bit); + RC_BIT(p, prob, bit) } while (sym < 0x10000); p->range = range; @@ -867,10 +853,10 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) #define GET_PRICE(prob, bit) \ - p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits] #define GET_PRICEa(prob, bit) \ - ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits] #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] @@ -921,7 +907,7 @@ static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBi unsigned bit = sym & 1; // RangeEnc_EncodeBit(rc, probs + m, bit); sym >>= 1; - RC_BIT(rc, probs + m, bit); + RC_BIT(rc, probs + m, bit) m = (m << 1) | bit; } while (--numBits); @@ -944,15 +930,15 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS UInt32 range, ttt, newBound; CLzmaProb *probs = p->low; range = rc->range; - RC_BIT_PRE(rc, probs); + RC_BIT_PRE(rc, probs) if (sym >= kLenNumLowSymbols) { - RC_BIT_1(rc, probs); + RC_BIT_1(rc, probs) probs += kLenNumLowSymbols; - RC_BIT_PRE(rc, probs); + RC_BIT_PRE(rc, probs) if (sym >= kLenNumLowSymbols * 2) { - RC_BIT_1(rc, probs); + RC_BIT_1(rc, probs) rc->range = range; // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); @@ -965,11 +951,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS { unsigned m; unsigned bit; - RC_BIT_0(rc, probs); + RC_BIT_0(rc, probs) probs += (posState << (1 + kLenNumLowBits)); - bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; - bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; - bit = sym & 1; RC_BIT(rc, probs + m, bit); + bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit; + bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit; + bit = sym & 1; RC_BIT(rc, probs + m, bit) rc->range = range; } } @@ -990,7 +976,7 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price } -MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( +Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables( CLenPriceEnc *p, unsigned numPosStates, const CLenEnc *enc, @@ -1152,7 +1138,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) + GET_PRICE_1(p->isRep[state]) \ + GET_PRICE_0(p->isRepG0[state]) -MY_FORCE_INLINE +Z7_FORCE_INLINE static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) { UInt32 price; @@ -1331,7 +1317,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) LitEnc_GetPrice(probs, curByte, p->ProbPrices)); } - MakeAs_Lit(&p->opt[1]); + MakeAs_Lit(&p->opt[1]) matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); @@ -1343,7 +1329,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (shortRepPrice < p->opt[1].price) { p->opt[1].price = shortRepPrice; - MakeAs_ShortRep(&p->opt[1]); + MakeAs_ShortRep(&p->opt[1]) } if (last < 2) { @@ -1410,7 +1396,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) else { unsigned slot; - GetPosSlot2(dist, slot); + GetPosSlot2(dist, slot) price += p->alignPrices[dist & kAlignMask]; price += p->posSlotPrices[lenToPosState][slot]; } @@ -1486,7 +1472,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) unsigned delta = best - cur; if (delta != 0) { - MOVE_POS(p, delta); + MOVE_POS(p, delta) } } cur = best; @@ -1633,7 +1619,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { nextOpt->price = litPrice; nextOpt->len = 1; - MakeAs_Lit(nextOpt); + MakeAs_Lit(nextOpt) nextIsLit = True; } } @@ -1667,7 +1653,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { nextOpt->price = shortRepPrice; nextOpt->len = 1; - MakeAs_ShortRep(nextOpt); + MakeAs_ShortRep(nextOpt) nextIsLit = False; } } @@ -1871,7 +1857,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) - GetPosSlot2(dist, posSlot); + GetPosSlot2(dist, posSlot) for (len = /*2*/ startLen; ; len++) { @@ -1962,7 +1948,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) break; dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) - GetPosSlot2(dist, posSlot); + GetPosSlot2(dist, posSlot) } } } @@ -2138,7 +2124,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { UInt32 ttt, newBound; RC_BIT_PRE(p, probs + m) - RC_BIT_1(&p->rc, probs + m); + RC_BIT_1(&p->rc, probs + m) m = (m << 1) + 1; } while (m < (1 << kNumPosSlotBits)); @@ -2163,7 +2149,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { UInt32 ttt, newBound; RC_BIT_PRE(p, probs + m) - RC_BIT_1(&p->rc, probs + m); + RC_BIT_1(&p->rc, probs + m) m = (m << 1) + 1; } while (m < kAlignTableSize); @@ -2179,7 +2165,7 @@ static SRes CheckErrors(CLzmaEnc *p) if (p->rc.res != SZ_OK) p->result = SZ_ERROR_WRITE; - #ifndef _7ZIP_ST + #ifndef Z7_ST if ( // p->mf_Failure || (p->mtMode && @@ -2187,7 +2173,7 @@ static SRes CheckErrors(CLzmaEnc *p) p->matchFinderMt.failure_LZ_BT)) ) { - p->result = MY_HRES_ERROR__INTERNAL_ERROR; + p->result = MY_HRES_ERROR_INTERNAL_ERROR; // printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); } #endif @@ -2201,7 +2187,7 @@ static SRes CheckErrors(CLzmaEnc *p) } -MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) +Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) { /* ReleaseMFStream(); */ p->finished = True; @@ -2213,7 +2199,7 @@ MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) } -MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) +Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) { unsigned i; const CProbPrice *ProbPrices = p->ProbPrices; @@ -2237,7 +2223,7 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) } -MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) +Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) { // int y; for (y = 0; y < 100; y++) { @@ -2337,7 +2323,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p) RangeEnc_Construct(&p->rc); MatchFinder_Construct(&MFB); - #ifndef _7ZIP_ST + #ifndef Z7_ST p->matchFinderMt.MatchFinder = &MFB; MatchFinderMt_Construct(&p->matchFinderMt); #endif @@ -2345,7 +2331,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p) { CLzmaEncProps props; LzmaEncProps_Init(&props); - LzmaEnc_SetProps(p, &props); + LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props); } #ifndef LZMA_LOG_BSR @@ -2376,7 +2362,7 @@ static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - #ifndef _7ZIP_ST + #ifndef Z7_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); #endif @@ -2387,21 +2373,22 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); + // GET_CLzmaEnc_p + LzmaEnc_Destruct(p, alloc, allocBig); ISzAlloc_Free(alloc, p); } -MY_NO_INLINE +Z7_NO_INLINE static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; if (p->needInit) { - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtMode) { - RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); + RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)) } #endif p->matchFinder.Init(p->matchFinderObj); @@ -2410,7 +2397,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa if (p->finished) return p->result; - RINOK(CheckErrors(p)); + RINOK(CheckErrors(p)) nowPos32 = (UInt32)p->nowPos64; startPos32 = nowPos32; @@ -2473,7 +2460,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa const Byte *data; unsigned state; - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) p->rc.range = range; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; probs = LIT_PROBS(nowPos32, *(data - 1)); @@ -2487,53 +2474,53 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRep[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist < LZMA_NUM_REPS) { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG0[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 0) { - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) probs = &p->isRep0Long[p->state][posState]; RC_BIT_PRE(&p->rc, probs) if (len != 1) { - RC_BIT_1_BASE(&p->rc, probs); + RC_BIT_1_BASE(&p->rc, probs) } else { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) p->state = kShortRepNextStates[p->state]; } } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG1[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 1) { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) dist = p->reps[1]; } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG2[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 2) { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) dist = p->reps[2]; } else { - RC_BIT_1_BASE(&p->rc, probs); + RC_BIT_1_BASE(&p->rc, probs) dist = p->reps[3]; p->reps[3] = p->reps[2]; } @@ -2557,7 +2544,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa else { unsigned posSlot; - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) p->rc.range = range; p->state = kMatchNextStates[p->state]; @@ -2571,7 +2558,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa p->reps[0] = dist + 1; p->matchPriceCount++; - GetPosSlot(dist, posSlot); + GetPosSlot(dist, posSlot) // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); { UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); @@ -2582,7 +2569,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; sym <<= 1; - RC_BIT(&p->rc, prob, bit); + RC_BIT(&p->rc, prob, bit) } while (sym < (1 << kNumPosSlotBits * 2)); p->rc.range = range; @@ -2626,10 +2613,10 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa { unsigned m = 1; unsigned bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) p->rc.range = range; // p->alignPriceCount++; } @@ -2704,7 +2691,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, if (!RangeEnc_Alloc(&p->rc, alloc)) return SZ_ERROR_MEM; - #ifndef _7ZIP_ST + #ifndef Z7_ST p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); #endif @@ -2748,15 +2735,14 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, (numFastBytes + LZMA_MATCH_LEN_MAX + 1) */ - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtMode) { RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ - , allocBig)); + , allocBig)) p->matchFinderObj = &p->matchFinderMt; - MFB.bigHash = (Byte)( - (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0); + MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else @@ -2872,59 +2858,53 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr p->finished = False; p->result = SZ_OK; - RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); + p->nowPos64 = 0; + p->needInit = 1; + RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); - p->nowPos64 = 0; return SZ_OK; } -static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, +static SRes LzmaEnc_Prepare(CLzmaEncHandle p, + ISeqOutStreamPtr outStream, + ISeqInStreamPtr inStream, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)pp; - MFB.stream = inStream; - p->needInit = 1; + // GET_CLzmaEnc_p + MatchFinder_SET_STREAM(&MFB, inStream) p->rc.outStream = outStream; return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); } -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, - ISeqInStream *inStream, UInt32 keepWindowSize, +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, + ISeqInStreamPtr inStream, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)pp; - MFB.stream = inStream; - p->needInit = 1; + // GET_CLzmaEnc_p + MatchFinder_SET_STREAM(&MFB, inStream) return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } -static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, + const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig) { - MFB.directInput = 1; - MFB.bufferBase = (Byte *)src; - MFB.directInputRem = srcLen; -} - -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, - UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - LzmaEnc_SetInputBuf(p, src, srcLen); - p->needInit = 1; - - LzmaEnc_SetDataSize(pp, srcLen); + // GET_CLzmaEnc_p + MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen) + LzmaEnc_SetDataSize(p, srcLen); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } -void LzmaEnc_Finish(CLzmaEncHandle pp) +void LzmaEnc_Finish(CLzmaEncHandle p) { - #ifndef _7ZIP_ST - CLzmaEnc *p = (CLzmaEnc *)pp; + #ifndef Z7_ST + // GET_CLzmaEnc_p if (p->mtMode) MatchFinderMt_ReleaseStream(&p->matchFinderMt); #else - UNUSED_VAR(pp); + UNUSED_VAR(p) #endif } @@ -2933,13 +2913,13 @@ typedef struct { ISeqOutStream vt; Byte *data; - SizeT rem; + size_t rem; BoolInt overflow; } CLzmaEnc_SeqOutStreamBuf; -static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) +static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size) { - CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf) if (p->rem < size) { size = p->rem; @@ -2956,25 +2936,25 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s /* -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + GET_const_CLzmaEnc_p return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } */ -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_const_CLzmaEnc_p return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; } // (desiredPackSize == 0) is not allowed -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) { - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p UInt64 nowPos64; SRes res; CLzmaEnc_SeqOutStreamBuf outStream; @@ -3006,12 +2986,12 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, } -MY_NO_INLINE -static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) +Z7_NO_INLINE +static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress) { SRes res = SZ_OK; - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte allocaDummy[0x300]; allocaDummy[0] = 0; allocaDummy[1] = allocaDummy[0]; @@ -3033,7 +3013,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) } } - LzmaEnc_Finish(p); + LzmaEnc_Finish((CLzmaEncHandle)(void *)p); /* if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) @@ -3045,21 +3025,22 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) } -SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); - return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); + // GET_CLzmaEnc_p + RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig)) + return LzmaEnc_Encode2(p, progress); } -SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size) { if (*size < LZMA_PROPS_SIZE) return SZ_ERROR_PARAM; *size = LZMA_PROPS_SIZE; { - const CLzmaEnc *p = (const CLzmaEnc *)pp; + // GET_CLzmaEnc_p const UInt32 dictSize = p->dictSize; UInt32 v; props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); @@ -3083,23 +3064,24 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) while (v < dictSize); } - SetUi32(props + 1, v); + SetUi32(props + 1, v) return SZ_OK; } } -unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p) { - return (unsigned)((CLzmaEnc *)pp)->writeEndMark; + // GET_CLzmaEnc_p + return (unsigned)p->writeEndMark; } -SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { SRes res; - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p CLzmaEnc_SeqOutStreamBuf outStream; @@ -3111,7 +3093,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte p->writeEndMark = writeEndMark; p->rc.outStream = &outStream.vt; - res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); + res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig); if (res == SZ_OK) { @@ -3120,7 +3102,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte res = SZ_ERROR_FAIL; } - *destLen -= outStream.rem; + *destLen -= (SizeT)outStream.rem; if (outStream.overflow) return SZ_ERROR_OUTPUT_EOF; return res; @@ -3129,9 +3111,9 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); + CLzmaEncHandle p = LzmaEnc_Create(alloc); SRes res; if (!p) return SZ_ERROR_MEM; @@ -3151,10 +3133,10 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, /* -#ifndef _7ZIP_ST -void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) +#ifndef Z7_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2]) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + GET_const_CLzmaEnc_p lz_threads[0] = p->matchFinderMt.hashSync.thread; lz_threads[1] = p->matchFinderMt.btSync.thread; } diff --git a/3rdparty/7z/src/LzmaEnc.h b/3rdparty/7z/src/LzmaEnc.h index 26757ba6b0..08711cbac4 100644 --- a/3rdparty/7z/src/LzmaEnc.h +++ b/3rdparty/7z/src/LzmaEnc.h @@ -1,8 +1,8 @@ /* LzmaEnc.h -- LZMA Encoder -2019-10-30 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ -#ifndef __LZMA_ENC_H -#define __LZMA_ENC_H +#ifndef ZIP7_INC_LZMA_ENC_H +#define ZIP7_INC_LZMA_ENC_H #include "7zTypes.h" @@ -10,7 +10,7 @@ EXTERN_C_BEGIN #define LZMA_PROPS_SIZE 5 -typedef struct _CLzmaEncProps +typedef struct { int level; /* 0 <= level <= 9 */ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version @@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps int fb; /* 5 <= fb <= 273, default = 32 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int numHashBytes; /* 2, 3 or 4, default = 4 */ + unsigned numHashOutBits; /* default = ? */ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ int numThreads; /* 1 or 2, default = 2 */ + // int _pad; + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ @@ -51,7 +54,9 @@ SRes: SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) */ -typedef void * CLzmaEncHandle; +typedef struct CLzmaEnc CLzmaEnc; +typedef CLzmaEnc * CLzmaEncHandle; +// Z7_DECLARE_HANDLE(CLzmaEncHandle) CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); @@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); -SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); /* ---------- One Call Interface ---------- */ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); EXTERN_C_END diff --git a/3rdparty/7z/src/LzmaLib.c b/3rdparty/7z/src/LzmaLib.c index c10cf1a0f2..350c1953de 100644 --- a/3rdparty/7z/src/LzmaLib.c +++ b/3rdparty/7z/src/LzmaLib.c @@ -1,12 +1,14 @@ /* LzmaLib.c -- LZMA library wrapper -2015-06-13 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" #include "Alloc.h" #include "LzmaDec.h" #include "LzmaEnc.h" #include "LzmaLib.h" -MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, +Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, unsigned char *outProps, size_t *outPropsSize, int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ @@ -32,7 +34,7 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char } -MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, +Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, const unsigned char *props, size_t propsSize) { ELzmaStatus status; diff --git a/3rdparty/7z/src/LzmaLib.h b/3rdparty/7z/src/LzmaLib.h index 4103e224ac..79612a42e3 100644 --- a/3rdparty/7z/src/LzmaLib.h +++ b/3rdparty/7z/src/LzmaLib.h @@ -1,14 +1,14 @@ /* LzmaLib.h -- LZMA library interface -2021-04-03 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __LZMA_LIB_H -#define __LZMA_LIB_H +#ifndef ZIP7_INC_LZMA_LIB_H +#define ZIP7_INC_LZMA_LIB_H #include "7zTypes.h" EXTERN_C_BEGIN -#define MY_STDAPI int MY_STD_CALL +#define Z7_STDAPI int Z7_STDCALL #define LZMA_PROPS_SIZE 5 @@ -100,7 +100,7 @@ Returns: SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ -MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, +Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* default = (1 << 24) */ @@ -130,7 +130,7 @@ Returns: SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) */ -MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, +Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, const unsigned char *props, size_t propsSize); EXTERN_C_END diff --git a/3rdparty/7z/src/MtCoder.c b/3rdparty/7z/src/MtCoder.c index e39d9cb191..f264584ed8 100644 --- a/3rdparty/7z/src/MtCoder.c +++ b/3rdparty/7z/src/MtCoder.c @@ -1,28 +1,28 @@ /* MtCoder.c -- Multi-thread Coder -2021-12-21 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "MtCoder.h" -#ifndef _7ZIP_ST +#ifndef Z7_ST -static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize) +static SRes MtProgressThunk_Progress(ICompressProgressPtr pp, UInt64 inSize, UInt64 outSize) { - CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CMtProgressThunk) UInt64 inSize2 = 0; UInt64 outSize2 = 0; if (inSize != (UInt64)(Int64)-1) { - inSize2 = inSize - thunk->inSize; - thunk->inSize = inSize; + inSize2 = inSize - p->inSize; + p->inSize = inSize; } if (outSize != (UInt64)(Int64)-1) { - outSize2 = outSize - thunk->outSize; - thunk->outSize = outSize; + outSize2 = outSize - p->outSize; + p->outSize = outSize; } - return MtProgress_ProgressAdd(thunk->mtProgress, inSize2, outSize2); + return MtProgress_ProgressAdd(p->mtProgress, inSize2, outSize2); } @@ -36,20 +36,12 @@ void MtProgressThunk_CreateVTable(CMtProgressThunk *p) #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } -static WRes ArEvent_OptCreate_And_Reset(CEvent *p) -{ - if (Event_IsCreated(p)) - return Event_Reset(p); - return AutoResetEvent_CreateNotSignaled(p); -} - - static THREAD_FUNC_DECL ThreadFunc(void *pp); static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t) { - WRes wres = ArEvent_OptCreate_And_Reset(&t->startEvent); + WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->startEvent); if (wres == 0) { t->stop = False; @@ -84,24 +76,6 @@ static void MtCoderThread_Destruct(CMtCoderThread *t) -static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize) -{ - size_t size = *processedSize; - *processedSize = 0; - while (size != 0) - { - size_t cur = size; - SRes res = ISeqInStream_Read(stream, data, &cur); - *processedSize += cur; - data += cur; - size -= cur; - RINOK(res); - if (cur == 0) - return SZ_OK; - } - return SZ_OK; -} - /* ThreadFunc2() returns: @@ -152,7 +126,7 @@ static SRes ThreadFunc2(CMtCoderThread *t) } if (res == SZ_OK) { - res = FullRead(mtc->inStream, t->inBuf, &size); + res = SeqInStream_ReadMax(mtc->inStream, t->inBuf, &size); readProcessed = mtc->readProcessed + size; mtc->readProcessed = readProcessed; } @@ -253,7 +227,7 @@ static SRes ThreadFunc2(CMtCoderThread *t) block->finished = finished; } - #ifdef MTCODER__USE_WRITE_THREAD + #ifdef MTCODER_USE_WRITE_THREAD RINOK_THREAD(Event_Set(&mtc->writeEvents[bi])) #else { @@ -352,7 +326,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp) MtProgress_SetError(&mtc->mtProgress, res); } - #ifndef MTCODER__USE_WRITE_THREAD + #ifndef MTCODER_USE_WRITE_THREAD { unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); if (numFinished == mtc->numStartedThreads) @@ -389,7 +363,7 @@ void MtCoder_Construct(CMtCoder *p) Event_Construct(&p->readEvent); Semaphore_Construct(&p->blocksSemaphore); - for (i = 0; i < MTCODER__THREADS_MAX; i++) + for (i = 0; i < MTCODER_THREADS_MAX; i++) { CMtCoderThread *t = &p->threads[i]; t->mtCoder = p; @@ -397,11 +371,11 @@ void MtCoder_Construct(CMtCoder *p) t->inBuf = NULL; t->stop = False; Event_Construct(&t->startEvent); - Thread_Construct(&t->thread); + Thread_CONSTRUCT(&t->thread) } - #ifdef MTCODER__USE_WRITE_THREAD - for (i = 0; i < MTCODER__BLOCKS_MAX; i++) + #ifdef MTCODER_USE_WRITE_THREAD + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) Event_Construct(&p->writeEvents[i]); #else Event_Construct(&p->finishedEvent); @@ -424,14 +398,14 @@ static void MtCoder_Free(CMtCoder *p) Event_Set(&p->readEvent); */ - for (i = 0; i < MTCODER__THREADS_MAX; i++) + for (i = 0; i < MTCODER_THREADS_MAX; i++) MtCoderThread_Destruct(&p->threads[i]); Event_Close(&p->readEvent); Semaphore_Close(&p->blocksSemaphore); - #ifdef MTCODER__USE_WRITE_THREAD - for (i = 0; i < MTCODER__BLOCKS_MAX; i++) + #ifdef MTCODER_USE_WRITE_THREAD + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) Event_Close(&p->writeEvents[i]); #else Event_Close(&p->finishedEvent); @@ -455,20 +429,20 @@ SRes MtCoder_Code(CMtCoder *p) unsigned i; SRes res = SZ_OK; - if (numThreads > MTCODER__THREADS_MAX) - numThreads = MTCODER__THREADS_MAX; - numBlocksMax = MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads); + if (numThreads > MTCODER_THREADS_MAX) + numThreads = MTCODER_THREADS_MAX; + numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads); if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++; if (p->blockSize < ((UInt32)1 << 24)) numBlocksMax++; if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++; - if (numBlocksMax > MTCODER__BLOCKS_MAX) - numBlocksMax = MTCODER__BLOCKS_MAX; + if (numBlocksMax > MTCODER_BLOCKS_MAX) + numBlocksMax = MTCODER_BLOCKS_MAX; if (p->blockSize != p->allocatedBufsSize) { - for (i = 0; i < MTCODER__THREADS_MAX; i++) + for (i = 0; i < MTCODER_THREADS_MAX; i++) { CMtCoderThread *t = &p->threads[i]; if (t->inBuf) @@ -484,23 +458,23 @@ SRes MtCoder_Code(CMtCoder *p) MtProgress_Init(&p->mtProgress, p->progress); - #ifdef MTCODER__USE_WRITE_THREAD + #ifdef MTCODER_USE_WRITE_THREAD for (i = 0; i < numBlocksMax; i++) { - RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->writeEvents[i])); + RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->writeEvents[i])) } #else - RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent)); + RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->finishedEvent)) #endif { - RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent)); - RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax)); + RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->readEvent)) + RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax)) } - for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++) + for (i = 0; i < MTCODER_BLOCKS_MAX - 1; i++) p->freeBlockList[i] = i + 1; - p->freeBlockList[MTCODER__BLOCKS_MAX - 1] = (unsigned)(int)-1; + p->freeBlockList[MTCODER_BLOCKS_MAX - 1] = (unsigned)(int)-1; p->freeBlockHead = 0; p->readProcessed = 0; @@ -508,10 +482,10 @@ SRes MtCoder_Code(CMtCoder *p) p->numBlocksMax = numBlocksMax; p->stopReading = False; - #ifndef MTCODER__USE_WRITE_THREAD + #ifndef MTCODER_USE_WRITE_THREAD p->writeIndex = 0; p->writeRes = SZ_OK; - for (i = 0; i < MTCODER__BLOCKS_MAX; i++) + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) p->ReadyBlocks[i] = False; p->numFinishedThreads = 0; #endif @@ -522,12 +496,12 @@ SRes MtCoder_Code(CMtCoder *p) // for (i = 0; i < numThreads; i++) { CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++]; - RINOK(MtCoderThread_CreateAndStart(nextThread)); + RINOK(MtCoderThread_CreateAndStart(nextThread)) } RINOK_THREAD(Event_Set(&p->readEvent)) - #ifdef MTCODER__USE_WRITE_THREAD + #ifdef MTCODER_USE_WRITE_THREAD { unsigned bi = 0; @@ -582,7 +556,7 @@ SRes MtCoder_Code(CMtCoder *p) if (res == SZ_OK) res = p->mtProgress.res; - #ifndef MTCODER__USE_WRITE_THREAD + #ifndef MTCODER_USE_WRITE_THREAD if (res == SZ_OK) res = p->writeRes; #endif @@ -593,3 +567,5 @@ SRes MtCoder_Code(CMtCoder *p) } #endif + +#undef RINOK_THREAD diff --git a/3rdparty/7z/src/MtCoder.h b/3rdparty/7z/src/MtCoder.h index 603329d367..c031fe0f73 100644 --- a/3rdparty/7z/src/MtCoder.h +++ b/3rdparty/7z/src/MtCoder.h @@ -1,30 +1,30 @@ /* MtCoder.h -- Multi-thread Coder -2018-07-04 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ -#ifndef __MT_CODER_H -#define __MT_CODER_H +#ifndef ZIP7_INC_MT_CODER_H +#define ZIP7_INC_MT_CODER_H #include "MtDec.h" EXTERN_C_BEGIN /* - if ( defined MTCODER__USE_WRITE_THREAD) : main thread writes all data blocks to output stream - if (not defined MTCODER__USE_WRITE_THREAD) : any coder thread can write data blocks to output stream + if ( defined MTCODER_USE_WRITE_THREAD) : main thread writes all data blocks to output stream + if (not defined MTCODER_USE_WRITE_THREAD) : any coder thread can write data blocks to output stream */ -/* #define MTCODER__USE_WRITE_THREAD */ +/* #define MTCODER_USE_WRITE_THREAD */ -#ifndef _7ZIP_ST - #define MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1) - #define MTCODER__THREADS_MAX 64 - #define MTCODER__BLOCKS_MAX (MTCODER__GET_NUM_BLOCKS_FROM_THREADS(MTCODER__THREADS_MAX) + 3) +#ifndef Z7_ST + #define MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1) + #define MTCODER_THREADS_MAX 64 + #define MTCODER_BLOCKS_MAX (MTCODER_GET_NUM_BLOCKS_FROM_THREADS(MTCODER_THREADS_MAX) + 3) #else - #define MTCODER__THREADS_MAX 1 - #define MTCODER__BLOCKS_MAX 1 + #define MTCODER_THREADS_MAX 1 + #define MTCODER_BLOCKS_MAX 1 #endif -#ifndef _7ZIP_ST +#ifndef Z7_ST typedef struct @@ -37,15 +37,15 @@ typedef struct void MtProgressThunk_CreateVTable(CMtProgressThunk *p); -#define MtProgressThunk_Init(p) { (p)->inSize = 0; (p)->outSize = 0; } +#define MtProgressThunk_INIT(p) { (p)->inSize = 0; (p)->outSize = 0; } -struct _CMtCoder; +struct CMtCoder_; typedef struct { - struct _CMtCoder *mtCoder; + struct CMtCoder_ *mtCoder; unsigned index; int stop; Byte *inBuf; @@ -71,7 +71,7 @@ typedef struct } CMtCoderBlock; -typedef struct _CMtCoder +typedef struct CMtCoder_ { /* input variables */ @@ -79,11 +79,11 @@ typedef struct _CMtCoder unsigned numThreadsMax; UInt64 expectedDataSize; - ISeqInStream *inStream; + ISeqInStreamPtr inStream; const Byte *inData; size_t inDataSize; - ICompressProgress *progress; + ICompressProgressPtr progress; ISzAllocPtr allocBig; IMtCoderCallback2 *mtCallback; @@ -100,13 +100,13 @@ typedef struct _CMtCoder BoolInt stopReading; SRes readRes; - #ifdef MTCODER__USE_WRITE_THREAD - CAutoResetEvent writeEvents[MTCODER__BLOCKS_MAX]; + #ifdef MTCODER_USE_WRITE_THREAD + CAutoResetEvent writeEvents[MTCODER_BLOCKS_MAX]; #else CAutoResetEvent finishedEvent; SRes writeRes; unsigned writeIndex; - Byte ReadyBlocks[MTCODER__BLOCKS_MAX]; + Byte ReadyBlocks[MTCODER_BLOCKS_MAX]; LONG numFinishedThreads; #endif @@ -120,11 +120,11 @@ typedef struct _CMtCoder CCriticalSection cs; unsigned freeBlockHead; - unsigned freeBlockList[MTCODER__BLOCKS_MAX]; + unsigned freeBlockList[MTCODER_BLOCKS_MAX]; CMtProgress mtProgress; - CMtCoderBlock blocks[MTCODER__BLOCKS_MAX]; - CMtCoderThread threads[MTCODER__THREADS_MAX]; + CMtCoderBlock blocks[MTCODER_BLOCKS_MAX]; + CMtCoderThread threads[MTCODER_THREADS_MAX]; } CMtCoder; diff --git a/3rdparty/7z/src/MtDec.c b/3rdparty/7z/src/MtDec.c index 854087b929..5aef9447d8 100644 --- a/3rdparty/7z/src/MtDec.c +++ b/3rdparty/7z/src/MtDec.c @@ -1,5 +1,5 @@ /* MtDec.c -- Multi-thread Decoder -2021-12-21 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -14,7 +14,7 @@ #include "MtDec.h" -#ifndef _7ZIP_ST +#ifndef Z7_ST #ifdef SHOW_DEBUG_INFO #define PRF(x) x @@ -24,7 +24,7 @@ #define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d)) -void MtProgress_Init(CMtProgress *p, ICompressProgress *progress) +void MtProgress_Init(CMtProgress *p, ICompressProgressPtr progress) { p->progress = progress; p->res = SZ_OK; @@ -81,36 +81,28 @@ void MtProgress_SetError(CMtProgress *p, SRes res) #define RINOK_THREAD(x) RINOK_WRes(x) -static WRes ArEvent_OptCreate_And_Reset(CEvent *p) +struct CMtDecBufLink_ { - if (Event_IsCreated(p)) - return Event_Reset(p); - return AutoResetEvent_CreateNotSignaled(p); -} - - -struct __CMtDecBufLink -{ - struct __CMtDecBufLink *next; + struct CMtDecBufLink_ *next; void *pad[3]; }; -typedef struct __CMtDecBufLink CMtDecBufLink; +typedef struct CMtDecBufLink_ CMtDecBufLink; #define MTDEC__LINK_DATA_OFFSET sizeof(CMtDecBufLink) #define MTDEC__DATA_PTR_FROM_LINK(link) ((Byte *)(link) + MTDEC__LINK_DATA_OFFSET) -static THREAD_FUNC_DECL ThreadFunc(void *pp); +static THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp); static WRes MtDecThread_CreateEvents(CMtDecThread *t) { - WRes wres = ArEvent_OptCreate_And_Reset(&t->canWrite); + WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->canWrite); if (wres == 0) { - wres = ArEvent_OptCreate_And_Reset(&t->canRead); + wres = AutoResetEvent_OptCreate_And_Reset(&t->canRead); if (wres == 0) return SZ_OK; } @@ -126,7 +118,7 @@ static SRes MtDecThread_CreateAndStart(CMtDecThread *t) { if (Thread_WasCreated(&t->thread)) return SZ_OK; - wres = Thread_Create(&t->thread, ThreadFunc, t); + wres = Thread_Create(&t->thread, MtDec_ThreadFunc, t); if (wres == 0) return SZ_OK; } @@ -167,7 +159,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t) static void MtDec_CloseThreads(CMtDec *p) { unsigned i; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) MtDecThread_CloseThread(&p->threads[i]); } @@ -179,25 +171,6 @@ static void MtDecThread_Destruct(CMtDecThread *t) -static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize) -{ - size_t size = *processedSize; - *processedSize = 0; - while (size != 0) - { - size_t cur = size; - SRes res = ISeqInStream_Read(stream, data, &cur); - *processedSize += cur; - data += cur; - size -= cur; - RINOK(res); - if (cur == 0) - return SZ_OK; - } - return SZ_OK; -} - - static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, BoolInt *wasInterrupted) { SRes res; @@ -253,7 +226,7 @@ Byte *MtDec_GetCrossBuff(CMtDec *p) /* - ThreadFunc2() returns: + MtDec_ThreadFunc2() returns: 0 - in all normal cases (even for stream error or memory allocation error) (!= 0) - WRes error return by system threading function */ @@ -261,11 +234,11 @@ Byte *MtDec_GetCrossBuff(CMtDec *p) // #define MTDEC_ProgessStep (1 << 22) #define MTDEC_ProgessStep (1 << 0) -static WRes ThreadFunc2(CMtDecThread *t) +static WRes MtDec_ThreadFunc2(CMtDecThread *t) { CMtDec *p = t->mtDec; - PRF_STR_INT("ThreadFunc2", t->index); + PRF_STR_INT("MtDec_ThreadFunc2", t->index) // SetThreadAffinityMask(GetCurrentThread(), 1 << t->index); @@ -295,13 +268,13 @@ static WRes ThreadFunc2(CMtDecThread *t) // CMtDecCallbackInfo parse; CMtDecThread *nextThread; - PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index); + PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index) - RINOK_THREAD(Event_Wait(&t->canRead)); + RINOK_THREAD(Event_Wait(&t->canRead)) if (p->exitThread) return 0; - PRF_STR_INT("after Event_Wait(&t->canRead)", t->index); + PRF_STR_INT("after Event_Wait(&t->canRead)", t->index) // if (t->index == 3) return 19; // for test @@ -373,7 +346,7 @@ static WRes ThreadFunc2(CMtDecThread *t) { size = p->inBufSize; - res = FullRead(p->inStream, data, &size); + res = SeqInStream_ReadMax(p->inStream, data, &size); // size = 10; // test @@ -615,7 +588,7 @@ static WRes ThreadFunc2(CMtDecThread *t) // if ( !finish ) we must call Event_Set(&nextThread->canWrite) in any case // if ( finish ) we switch to single-thread mode and there are 2 ways at the end of current iteration (current block): // - if (needContinue) after Write(&needContinue), we restore decoding with new iteration - // - otherwise we stop decoding and exit from ThreadFunc2() + // - otherwise we stop decoding and exit from MtDec_ThreadFunc2() // Don't change (finish) variable in the further code @@ -688,7 +661,7 @@ static WRes ThreadFunc2(CMtDecThread *t) // ---------- WRITE ---------- - RINOK_THREAD(Event_Wait(&t->canWrite)); + RINOK_THREAD(Event_Wait(&t->canWrite)) { BoolInt isErrorMode = False; @@ -801,14 +774,14 @@ static WRes ThreadFunc2(CMtDecThread *t) if (!finish) { - RINOK_THREAD(Event_Set(&nextThread->canWrite)); + RINOK_THREAD(Event_Set(&nextThread->canWrite)) } else { if (needContinue) { // we restore decoding with new iteration - RINOK_THREAD(Event_Set(&p->threads[0].canWrite)); + RINOK_THREAD(Event_Set(&p->threads[0].canWrite)) } else { @@ -817,7 +790,7 @@ static WRes ThreadFunc2(CMtDecThread *t) return SZ_OK; p->exitThread = True; } - RINOK_THREAD(Event_Set(&p->threads[0].canRead)); + RINOK_THREAD(Event_Set(&p->threads[0].canRead)) } } } @@ -836,7 +809,7 @@ static WRes ThreadFunc2(CMtDecThread *t) #endif -static THREAD_FUNC_DECL ThreadFunc1(void *pp) +static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp) { WRes res; @@ -845,7 +818,7 @@ static THREAD_FUNC_DECL ThreadFunc1(void *pp) // fprintf(stdout, "\n%d = %p\n", t->index, &t); - res = ThreadFunc2(t); + res = MtDec_ThreadFunc2(t); p = t->mtDec; if (res == 0) return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes; @@ -862,14 +835,14 @@ static THREAD_FUNC_DECL ThreadFunc1(void *pp) return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res; } -static MY_NO_INLINE THREAD_FUNC_DECL ThreadFunc(void *pp) +static Z7_NO_INLINE THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp) { #ifdef USE_ALLOCA CMtDecThread *t = (CMtDecThread *)pp; // fprintf(stderr, "\n%d = %p - before", t->index, &t); t->allocaPtr = alloca(t->index * 128); #endif - return ThreadFunc1(pp); + return MtDec_ThreadFunc1(pp); } @@ -883,7 +856,7 @@ int MtDec_PrepareRead(CMtDec *p) { unsigned i; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) if (i > p->numStartedThreads || p->numFilledThreads <= (i >= p->filledThreadStart ? @@ -987,7 +960,7 @@ void MtDec_Construct(CMtDec *p) p->allocatedBufsSize = 0; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) { CMtDecThread *t = &p->threads[i]; t->mtDec = p; @@ -995,7 +968,7 @@ void MtDec_Construct(CMtDec *p) t->inBuf = NULL; Event_Construct(&t->canRead); Event_Construct(&t->canWrite); - Thread_Construct(&t->thread); + Thread_CONSTRUCT(&t->thread) } // Event_Construct(&p->finishedEvent); @@ -1010,7 +983,7 @@ static void MtDec_Free(CMtDec *p) p->exitThread = True; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) MtDecThread_Destruct(&p->threads[i]); // Event_Close(&p->finishedEvent); @@ -1061,15 +1034,15 @@ SRes MtDec_Code(CMtDec *p) { unsigned numThreads = p->numThreadsMax; - if (numThreads > MTDEC__THREADS_MAX) - numThreads = MTDEC__THREADS_MAX; + if (numThreads > MTDEC_THREADS_MAX) + numThreads = MTDEC_THREADS_MAX; p->numStartedThreads_Limit = numThreads; p->numStartedThreads = 0; } if (p->inBufSize != p->allocatedBufsSize) { - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) { CMtDecThread *t = &p->threads[i]; if (t->inBuf) @@ -1086,7 +1059,7 @@ SRes MtDec_Code(CMtDec *p) MtProgress_Init(&p->mtProgress, p->progress); - // RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent)); + // RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->finishedEvent)) p->exitThread = False; p->exitThreadWRes = 0; @@ -1098,7 +1071,7 @@ SRes MtDec_Code(CMtDec *p) wres = MtDecThread_CreateEvents(nextThread); if (wres == 0) { wres = Event_Set(&nextThread->canWrite); if (wres == 0) { wres = Event_Set(&nextThread->canRead); - if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread); + if (wres == 0) { THREAD_FUNC_RET_TYPE res = MtDec_ThreadFunc(nextThread); wres = (WRes)(UINT_PTR)res; if (wres != 0) { @@ -1137,3 +1110,5 @@ SRes MtDec_Code(CMtDec *p) } #endif + +#undef PRF diff --git a/3rdparty/7z/src/MtDec.h b/3rdparty/7z/src/MtDec.h index 7a30b6a9e5..f214b3ab43 100644 --- a/3rdparty/7z/src/MtDec.h +++ b/3rdparty/7z/src/MtDec.h @@ -1,46 +1,46 @@ /* MtDec.h -- Multi-thread Decoder -2020-03-05 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __MT_DEC_H -#define __MT_DEC_H +#ifndef ZIP7_INC_MT_DEC_H +#define ZIP7_INC_MT_DEC_H #include "7zTypes.h" -#ifndef _7ZIP_ST +#ifndef Z7_ST #include "Threads.h" #endif EXTERN_C_BEGIN -#ifndef _7ZIP_ST +#ifndef Z7_ST -#ifndef _7ZIP_ST - #define MTDEC__THREADS_MAX 32 +#ifndef Z7_ST + #define MTDEC_THREADS_MAX 32 #else - #define MTDEC__THREADS_MAX 1 + #define MTDEC_THREADS_MAX 1 #endif typedef struct { - ICompressProgress *progress; + ICompressProgressPtr progress; SRes res; UInt64 totalInSize; UInt64 totalOutSize; CCriticalSection cs; } CMtProgress; -void MtProgress_Init(CMtProgress *p, ICompressProgress *progress); +void MtProgress_Init(CMtProgress *p, ICompressProgressPtr progress); SRes MtProgress_Progress_ST(CMtProgress *p); SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize); SRes MtProgress_GetError(CMtProgress *p); void MtProgress_SetError(CMtProgress *p, SRes res); -struct _CMtDec; +struct CMtDec; typedef struct { - struct _CMtDec *mtDec; + struct CMtDec_ *mtDec; unsigned index; void *inBuf; @@ -117,7 +117,7 @@ typedef struct -typedef struct _CMtDec +typedef struct CMtDec_ { /* input variables */ @@ -126,11 +126,11 @@ typedef struct _CMtDec // size_t inBlockMax; unsigned numThreadsMax_2; - ISeqInStream *inStream; + ISeqInStreamPtr inStream; // const Byte *inData; // size_t inDataSize; - ICompressProgress *progress; + ICompressProgressPtr progress; ISzAllocPtr alloc; IMtDecCallback2 *mtCallback; @@ -171,11 +171,11 @@ typedef struct _CMtDec unsigned filledThreadStart; unsigned numFilledThreads; - #ifndef _7ZIP_ST + #ifndef Z7_ST BoolInt needInterrupt; UInt64 interruptIndex; CMtProgress mtProgress; - CMtDecThread threads[MTDEC__THREADS_MAX]; + CMtDecThread threads[MTDEC_THREADS_MAX]; #endif } CMtDec; diff --git a/3rdparty/7z/src/Ppmd.h b/3rdparty/7z/src/Ppmd.h index ee93ecece4..da1ed375ed 100644 --- a/3rdparty/7z/src/Ppmd.h +++ b/3rdparty/7z/src/Ppmd.h @@ -1,9 +1,9 @@ /* Ppmd.h -- PPMD codec common code -2021-04-13 : Igor Pavlov : Public domain +2023-03-05 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ -#ifndef __PPMD_H -#define __PPMD_H +#ifndef ZIP7_INC_PPMD_H +#define ZIP7_INC_PPMD_H #include "CpuArch.h" @@ -48,8 +48,10 @@ typedef struct Byte Count; /* Count to next change of Shift */ } CPpmd_See; -#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ - { (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); } +#define Ppmd_See_UPDATE(p) \ + { if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ + { (p)->Summ = (UInt16)((p)->Summ << 1); \ + (p)->Count = (Byte)(3 << (p)->Shift++); }} typedef struct diff --git a/3rdparty/7z/src/Ppmd7.c b/3rdparty/7z/src/Ppmd7.c index b6ecf1430d..d84e6fe9af 100644 --- a/3rdparty/7z/src/Ppmd7.c +++ b/3rdparty/7z/src/Ppmd7.c @@ -1,5 +1,5 @@ /* Ppmd7.c -- PPMdH codec -2021-04-13 : Igor Pavlov : Public domain +2023-04-02 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #include "Precomp.h" @@ -14,7 +14,7 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ MY_ALIGN(16) static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; MY_ALIGN(16) -static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; +static const UInt16 PPMD7_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; #define MAX_FREQ 124 #define UNIT_SIZE 12 @@ -33,7 +33,7 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x #define ONE_STATE(ctx) Ppmd7Context_OneState(ctx) #define SUFFIX(ctx) CTX((ctx)->Suffix) -typedef CPpmd7_Context * CTX_PTR; +typedef CPpmd7_Context * PPMD7_CTX_PTR; struct CPpmd7_Node_; @@ -107,14 +107,14 @@ BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc) // ---------- Internal Memory Allocator ---------- /* We can use CPpmd7_Node in list of free units (as in Ppmd8) - But we still need one additional list walk pass in GlueFreeBlocks(). - So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode() + But we still need one additional list walk pass in Ppmd7_GlueFreeBlocks(). + So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in Ppmd7_InsertNode() / Ppmd7_RemoveNode() */ #define EMPTY_NODE 0 -static void InsertNode(CPpmd7 *p, void *node, unsigned indx) +static void Ppmd7_InsertNode(CPpmd7 *p, void *node, unsigned indx) { *((CPpmd_Void_Ref *)node) = p->FreeList[indx]; // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; @@ -124,7 +124,7 @@ static void InsertNode(CPpmd7 *p, void *node, unsigned indx) } -static void *RemoveNode(CPpmd7 *p, unsigned indx) +static void *Ppmd7_RemoveNode(CPpmd7 *p, unsigned indx) { CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); p->FreeList[indx] = *node; @@ -134,32 +134,32 @@ static void *RemoveNode(CPpmd7 *p, unsigned indx) } -static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +static void Ppmd7_SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) { unsigned i, nu = I2U(oldIndx) - I2U(newIndx); ptr = (Byte *)ptr + U2B(I2U(newIndx)); if (I2U(i = U2I(nu)) != nu) { unsigned k = I2U(--i); - InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + Ppmd7_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); } - InsertNode(p, ptr, i); + Ppmd7_InsertNode(p, ptr, i); } /* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ -typedef union _CPpmd7_Node_Union +typedef union { CPpmd7_Node Node; CPpmd7_Node_Ref NextRef; } CPpmd7_Node_Union; -/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks() +/* Original PPmdH (Ppmd7) code uses doubly linked list in Ppmd7_GlueFreeBlocks() we use single linked list similar to Ppmd8 code */ -static void GlueFreeBlocks(CPpmd7 *p) +static void Ppmd7_GlueFreeBlocks(CPpmd7 *p) { /* we use first UInt16 field of 12-bytes UNITs as record type stamp @@ -239,27 +239,27 @@ static void GlueFreeBlocks(CPpmd7 *p) if (nu == 0) continue; for (; nu > 128; nu -= 128, node += 128) - InsertNode(p, node, PPMD_NUM_INDEXES - 1); + Ppmd7_InsertNode(p, node, PPMD_NUM_INDEXES - 1); if (I2U(i = U2I(nu)) != nu) { unsigned k = I2U(--i); - InsertNode(p, node + k, (unsigned)nu - k - 1); + Ppmd7_InsertNode(p, node + k, (unsigned)nu - k - 1); } - InsertNode(p, node, i); + Ppmd7_InsertNode(p, node, i); } } -MY_NO_INLINE -static void *AllocUnitsRare(CPpmd7 *p, unsigned indx) +Z7_NO_INLINE +static void *Ppmd7_AllocUnitsRare(CPpmd7 *p, unsigned indx) { unsigned i; if (p->GlueCount == 0) { - GlueFreeBlocks(p); + Ppmd7_GlueFreeBlocks(p); if (p->FreeList[indx] != 0) - return RemoveNode(p, indx); + return Ppmd7_RemoveNode(p, indx); } i = indx; @@ -277,17 +277,17 @@ static void *AllocUnitsRare(CPpmd7 *p, unsigned indx) while (p->FreeList[i] == 0); { - void *block = RemoveNode(p, i); - SplitBlock(p, block, i, indx); + void *block = Ppmd7_RemoveNode(p, i); + Ppmd7_SplitBlock(p, block, i, indx); return block; } } -static void *AllocUnits(CPpmd7 *p, unsigned indx) +static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx) { if (p->FreeList[indx] != 0) - return RemoveNode(p, indx); + return Ppmd7_RemoveNode(p, indx); { UInt32 numBytes = U2B(I2U(indx)); Byte *lo = p->LoUnit; @@ -297,11 +297,11 @@ static void *AllocUnits(CPpmd7 *p, unsigned indx) return lo; } } - return AllocUnitsRare(p, indx); + return Ppmd7_AllocUnitsRare(p, indx); } -#define MyMem12Cpy(dest, src, num) \ +#define MEM_12_CPY(dest, src, num) \ { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } @@ -315,12 +315,12 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU return oldPtr; if (p->FreeList[i1] != 0) { - void *ptr = RemoveNode(p, i1); - MyMem12Cpy(ptr, oldPtr, newNU); - InsertNode(p, oldPtr, i0); + void *ptr = Ppmd7_RemoveNode(p, i1); + MEM_12_CPY(ptr, oldPtr, newNU) + Ppmd7_InsertNode(p, oldPtr, i0); return ptr; } - SplitBlock(p, oldPtr, i0, i1); + Ppmd7_SplitBlock(p, oldPtr, i0, i1); return oldPtr; } */ @@ -329,14 +329,14 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) { - Ppmd_SET_SUCCESSOR(p, v); + Ppmd_SET_SUCCESSOR(p, v) } -MY_NO_INLINE +Z7_NO_INLINE static -void RestartModel(CPpmd7 *p) +void Ppmd7_RestartModel(CPpmd7 *p) { unsigned i, k; @@ -352,8 +352,8 @@ void RestartModel(CPpmd7 *p) p->PrevSuccess = 0; { - CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ - CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + CPpmd7_Context *mc = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd7_AllocUnits(p, PPMD_NUM_INDEXES - 1); */ p->LoUnit += U2B(256 / 2); p->MaxContext = p->MinContext = mc; @@ -391,7 +391,7 @@ void RestartModel(CPpmd7 *p) { unsigned m; UInt16 *dest = p->BinSumm[i] + k; - UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2)); + const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD7_kInitBinEsc[k] / (i + 2)); for (m = 0; m < 64; m += 8) dest[m] = val; } @@ -423,13 +423,13 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) { p->MaxOrder = maxOrder; - RestartModel(p); + Ppmd7_RestartModel(p); } /* - CreateSuccessors() + Ppmd7_CreateSuccessors() It's called when (FoundState->Successor) is RAW-Successor, that is the link to position in Raw text. So we create Context records and write the links to @@ -445,10 +445,10 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) also it can return pointer to real context of same order, */ -MY_NO_INLINE -static CTX_PTR CreateSuccessors(CPpmd7 *p) +Z7_NO_INLINE +static PPMD7_CTX_PTR Ppmd7_CreateSuccessors(CPpmd7 *p) { - CTX_PTR c = p->MinContext; + PPMD7_CTX_PTR c = p->MinContext; CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); Byte newSym, newFreq; unsigned numPs = 0; @@ -522,15 +522,15 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p) do { - CTX_PTR c1; + PPMD7_CTX_PTR c1; /* = AllocContext(p); */ if (p->HiUnit != p->LoUnit) - c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); + c1 = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); else if (p->FreeList[0] != 0) - c1 = (CTX_PTR)RemoveNode(p, 0); + c1 = (PPMD7_CTX_PTR)Ppmd7_RemoveNode(p, 0); else { - c1 = (CTX_PTR)AllocUnitsRare(p, 0); + c1 = (PPMD7_CTX_PTR)Ppmd7_AllocUnitsRare(p, 0); if (!c1) return NULL; } @@ -550,16 +550,16 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p) -#define SwapStates(s) \ +#define SWAP_STATES(s) \ { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } void Ppmd7_UpdateModel(CPpmd7 *p); -MY_NO_INLINE +Z7_NO_INLINE void Ppmd7_UpdateModel(CPpmd7 *p) { CPpmd_Void_Ref maxSuccessor, minSuccessor; - CTX_PTR c, mc; + PPMD7_CTX_PTR c, mc; unsigned s0, ns; @@ -592,7 +592,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p) if (s[0].Freq >= s[-1].Freq) { - SwapStates(s); + SWAP_STATES(s) s--; } } @@ -610,10 +610,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p) { /* MAX ORDER context */ /* (FoundState->Successor) is RAW-Successor. */ - p->MaxContext = p->MinContext = CreateSuccessors(p); + p->MaxContext = p->MinContext = Ppmd7_CreateSuccessors(p); if (!p->MinContext) { - RestartModel(p); + Ppmd7_RestartModel(p); return; } SetSuccessor(p->FoundState, REF(p->MinContext)); @@ -629,7 +629,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p) p->Text = text; if (text >= p->UnitsStart) { - RestartModel(p); + Ppmd7_RestartModel(p); return; } maxSuccessor = REF(text); @@ -645,10 +645,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p) if (minSuccessor <= maxSuccessor) { // minSuccessor is RAW-Successor. So we will create real contexts records: - CTX_PTR cs = CreateSuccessors(p); + PPMD7_CTX_PTR cs = Ppmd7_CreateSuccessors(p); if (!cs) { - RestartModel(p); + Ppmd7_RestartModel(p); return; } minSuccessor = REF(cs); @@ -715,16 +715,16 @@ void Ppmd7_UpdateModel(CPpmd7 *p) unsigned i = U2I(oldNU); if (i != U2I((size_t)oldNU + 1)) { - void *ptr = AllocUnits(p, i + 1); + void *ptr = Ppmd7_AllocUnits(p, i + 1); void *oldPtr; if (!ptr) { - RestartModel(p); + Ppmd7_RestartModel(p); return; } oldPtr = STATS(c); - MyMem12Cpy(ptr, oldPtr, oldNU); - InsertNode(p, oldPtr, i); + MEM_12_CPY(ptr, oldPtr, oldNU) + Ppmd7_InsertNode(p, oldPtr, i); c->Union4.Stats = STATS_REF(ptr); } } @@ -739,10 +739,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p) else { // instead of One-symbol context we create 2-symbol context - CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0); + CPpmd_State *s = (CPpmd_State*)Ppmd7_AllocUnits(p, 0); if (!s) { - RestartModel(p); + Ppmd7_RestartModel(p); return; } { @@ -795,8 +795,8 @@ void Ppmd7_UpdateModel(CPpmd7 *p) -MY_NO_INLINE -static void Rescale(CPpmd7 *p) +Z7_NO_INLINE +static void Ppmd7_Rescale(CPpmd7 *p) { unsigned i, adder, sumFreq, escFreq; CPpmd_State *stats = STATS(p->MinContext); @@ -885,7 +885,7 @@ static void Rescale(CPpmd7 *p) *s = *stats; s->Freq = (Byte)freq; // (freq <= 260 / 4) p->FoundState = s; - InsertNode(p, stats, U2I(n0)); + Ppmd7_InsertNode(p, stats, U2I(n0)); return; } @@ -899,13 +899,13 @@ static void Rescale(CPpmd7 *p) { if (p->FreeList[i1] != 0) { - void *ptr = RemoveNode(p, i1); + void *ptr = Ppmd7_RemoveNode(p, i1); p->MinContext->Union4.Stats = STATS_REF(ptr); - MyMem12Cpy(ptr, (const void *)stats, n1); - InsertNode(p, stats, i0); + MEM_12_CPY(ptr, (const void *)stats, n1) + Ppmd7_InsertNode(p, stats, i0); } else - SplitBlock(p, stats, i0, i1); + Ppmd7_SplitBlock(p, stats, i0, i1); } } } @@ -948,9 +948,9 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) } -static void NextContext(CPpmd7 *p) +static void Ppmd7_NextContext(CPpmd7 *p) { - CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + PPMD7_CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); if (p->OrderFall == 0 && (const Byte *)c > p->Text) p->MaxContext = p->MinContext = c; else @@ -967,12 +967,12 @@ void Ppmd7_Update1(CPpmd7 *p) s->Freq = (Byte)freq; if (freq > s[-1].Freq) { - SwapStates(s); + SWAP_STATES(s) p->FoundState = --s; if (freq > MAX_FREQ) - Rescale(p); + Ppmd7_Rescale(p); } - NextContext(p); + Ppmd7_NextContext(p); } @@ -988,8 +988,8 @@ void Ppmd7_Update1_0(CPpmd7 *p) freq += 4; s->Freq = (Byte)freq; if (freq > MAX_FREQ) - Rescale(p); - NextContext(p); + Ppmd7_Rescale(p); + Ppmd7_NextContext(p); } @@ -1000,7 +1000,7 @@ void Ppmd7_UpdateBin(CPpmd7 *p) p->FoundState->Freq = (Byte)(freq + (freq < 128)); p->PrevSuccess = 1; p->RunLength++; - NextContext(p); + Ppmd7_NextContext(p); } */ @@ -1013,7 +1013,7 @@ void Ppmd7_Update2(CPpmd7 *p) p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); s->Freq = (Byte)freq; if (freq > MAX_FREQ) - Rescale(p); + Ppmd7_Rescale(p); Ppmd7_UpdateModel(p); } @@ -1042,8 +1042,8 @@ Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record. The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. The code doesn't free UNITs allocated for CPpmd7_Context records. -The code calls RestartModel(), when there is no free memory for allocation. -And RestartModel() changes the state to orignal start state, with full free block. +The code calls Ppmd7_RestartModel(), when there is no free memory for allocation. +And Ppmd7_RestartModel() changes the state to orignal start state, with full free block. The code allocates UNITs with the following order: @@ -1051,14 +1051,14 @@ The code allocates UNITs with the following order: Allocation of 1 UNIT for Context record - from free space (HiUnit) down to (LoUnit) - from FreeList[0] - - AllocUnitsRare() + - Ppmd7_AllocUnitsRare() -AllocUnits() for CPpmd_State vectors: +Ppmd7_AllocUnits() for CPpmd_State vectors: - from FreeList[i] - from free space (LoUnit) up to (HiUnit) - - AllocUnitsRare() + - Ppmd7_AllocUnitsRare() -AllocUnitsRare() +Ppmd7_AllocUnitsRare() - if (GlueCount == 0) { Glue lists, GlueCount = 255, allocate from FreeList[i]] } - loop for all higher sized FreeList[...] lists @@ -1093,8 +1093,8 @@ The PPMd code tries to fulfill the condition: We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. -SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions. -Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for +SummFreq and Escape_Freq can be changed in Ppmd7_Rescale() and *Update*() functions. +Ppmd7_Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Ppmd7_Rescale() for max-order context. When the PPMd code still break (Total <= RC::Range) condition in range coder, @@ -1102,3 +1102,21 @@ we have two ways to resolve that problem: 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. */ + +#undef MAX_FREQ +#undef UNIT_SIZE +#undef U2B +#undef U2I +#undef I2U +#undef I2U_UInt16 +#undef REF +#undef STATS_REF +#undef CTX +#undef STATS +#undef ONE_STATE +#undef SUFFIX +#undef NODE +#undef EMPTY_NODE +#undef MEM_12_CPY +#undef SUCCESSOR +#undef SWAP_STATES diff --git a/3rdparty/7z/src/Ppmd7.h b/3rdparty/7z/src/Ppmd7.h index 297e35fe9f..65c22ae9a1 100644 --- a/3rdparty/7z/src/Ppmd7.h +++ b/3rdparty/7z/src/Ppmd7.h @@ -1,11 +1,11 @@ /* Ppmd7.h -- Ppmd7 (PPMdH) compression codec -2021-04-13 : Igor Pavlov : Public domain +2023-04-02 : Igor Pavlov : Public domain This code is based on: PPMd var.H (2001): Dmitry Shkarin : Public domain */ -#ifndef __PPMD7_H -#define __PPMD7_H +#ifndef ZIP7_INC_PPMD7_H +#define ZIP7_INC_PPMD7_H #include "Ppmd.h" @@ -55,7 +55,7 @@ typedef struct UInt32 Range; UInt32 Code; UInt32 Low; - IByteIn *Stream; + IByteInPtr Stream; } CPpmd7_RangeDec; @@ -66,7 +66,7 @@ typedef struct // Byte _dummy_[3]; UInt64 Low; UInt64 CacheSize; - IByteOut *Stream; + IByteOutPtr Stream; } CPpmd7z_RangeEnc; diff --git a/3rdparty/7z/src/Ppmd7Dec.c b/3rdparty/7z/src/Ppmd7Dec.c index a18f0b8733..d45e24a948 100644 --- a/3rdparty/7z/src/Ppmd7Dec.c +++ b/3rdparty/7z/src/Ppmd7Dec.c @@ -1,5 +1,5 @@ /* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder -2021-04-13 : Igor Pavlov : Public domain +2023-04-02 : Igor Pavlov : Public domain This code is based on: PPMd var.H (2001): Dmitry Shkarin : Public domain */ @@ -8,7 +8,7 @@ This code is based on: #include "Ppmd7.h" -#define kTopValue (1 << 24) +#define kTopValue ((UInt32)1 << 24) #define READ_BYTE(p) IByteIn_Read((p)->Stream) @@ -37,9 +37,9 @@ BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p) #define R (&p->rc.dec) -MY_FORCE_INLINE -// MY_NO_INLINE -static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size) +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size) { @@ -48,18 +48,18 @@ static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size) RC_NORM_LOCAL(R) } -#define RC_Decode(start, size) RangeDec_Decode(p, start, size); -#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) -#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) +#define RC_Decode(start, size) Ppmd7z_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) #define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) -typedef CPpmd7_Context * CTX_PTR; +// typedef CPpmd7_Context * CTX_PTR; #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) void Ppmd7_UpdateModel(CPpmd7 *p); #define MASK(sym) ((unsigned char *)charMask)[sym] -// MY_FORCE_INLINE +// Z7_FORCE_INLINE // static int Ppmd7z_DecodeSymbol(CPpmd7 *p) { @@ -70,7 +70,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); unsigned i; UInt32 count, hiCnt; - UInt32 summFreq = p->MinContext->Union2.SummFreq; + const UInt32 summFreq = p->MinContext->Union2.SummFreq; @@ -81,7 +81,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) if ((Int32)(count -= s->Freq) < 0) { Byte sym; - RC_DecodeFinal(0, s->Freq); + RC_DecodeFinal(0, s->Freq) p->FoundState = s; sym = s->Symbol; Ppmd7_Update1_0(p); @@ -96,7 +96,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) if ((Int32)(count -= (++s)->Freq) < 0) { Byte sym; - RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) p->FoundState = s; sym = s->Symbol; Ppmd7_Update1(p); @@ -109,10 +109,10 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) return PPMD7_SYM_ERROR; hiCnt -= count; - RC_Decode(hiCnt, summFreq - hiCnt); + RC_Decode(hiCnt, summFreq - hiCnt) p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); - PPMD_SetAllBitsIn256Bytes(charMask); + PPMD_SetAllBitsIn256Bytes(charMask) // i = p->MinContext->NumStats - 1; // do { MASK((--s)->Symbol) = 0; } while (--i); { @@ -152,7 +152,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) // Ppmd7_UpdateBin(p); { unsigned freq = s->Freq; - CTX_PTR c = CTX(SUCCESSOR(s)); + CPpmd7_Context *c = CTX(SUCCESSOR(s)); sym = s->Symbol; p->FoundState = s; p->PrevSuccess = 1; @@ -176,7 +176,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) R->Range -= size0; RC_NORM_LOCAL(R) - PPMD_SetAllBitsIn256Bytes(charMask); + PPMD_SetAllBitsIn256Bytes(charMask) MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; p->PrevSuccess = 0; } @@ -245,13 +245,13 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) { count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; - }; + } } s--; - RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) // new (see->Summ) value can overflow over 16-bits in some rare cases - Ppmd_See_Update(see); + Ppmd_See_UPDATE(see) p->FoundState = s; sym = s->Symbol; Ppmd7_Update2(p); @@ -261,7 +261,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) if (count >= freqSum) return PPMD7_SYM_ERROR; - RC_Decode(hiCnt, freqSum - hiCnt); + RC_Decode(hiCnt, freqSum - hiCnt) // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. // new (see->Summ) value can overflow over 16-bits in some rare cases @@ -295,3 +295,18 @@ Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim) return buf; } */ + +#undef kTopValue +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/3rdparty/7z/src/Ppmd7Enc.c b/3rdparty/7z/src/Ppmd7Enc.c index 6af1ec15ec..1d2d370aca 100644 --- a/3rdparty/7z/src/Ppmd7Enc.c +++ b/3rdparty/7z/src/Ppmd7Enc.c @@ -1,5 +1,5 @@ /* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder -2021-04-13 : Igor Pavlov : Public domain +2023-04-02 : Igor Pavlov : Public domain This code is based on: PPMd var.H (2001): Dmitry Shkarin : Public domain */ @@ -8,7 +8,7 @@ This code is based on: #include "Ppmd7.h" -#define kTopValue (1 << 24) +#define kTopValue ((UInt32)1 << 24) #define R (&p->rc.enc) @@ -20,8 +20,8 @@ void Ppmd7z_Init_RangeEnc(CPpmd7 *p) R->CacheSize = 1; } -MY_NO_INLINE -static void RangeEnc_ShiftLow(CPpmd7 *p) +Z7_NO_INLINE +static void Ppmd7z_RangeEnc_ShiftLow(CPpmd7 *p) { if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0) { @@ -38,53 +38,53 @@ static void RangeEnc_ShiftLow(CPpmd7 *p) R->Low = (UInt32)((UInt32)R->Low << 8); } -#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p); -#define RC_NORM_1(p) RC_NORM_BASE(p) } -#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} +#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; Ppmd7z_RangeEnc_ShiftLow(p); +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} // we must use only one type of Normalization from two: LOCAL or REMOTE #define RC_NORM_LOCAL(p) // RC_NORM(p) #define RC_NORM_REMOTE(p) RC_NORM(p) /* -#define RangeEnc_Encode(p, start, _size_) \ +#define Ppmd7z_RangeEnc_Encode(p, start, _size_) \ { UInt32 size = _size_; \ R->Low += start * R->Range; \ R->Range *= size; \ RC_NORM_LOCAL(p); } */ -MY_FORCE_INLINE -// MY_NO_INLINE -static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size) +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7z_RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size) { R->Low += start * R->Range; R->Range *= size; - RC_NORM_LOCAL(p); + RC_NORM_LOCAL(p) } void Ppmd7z_Flush_RangeEnc(CPpmd7 *p) { unsigned i; for (i = 0; i < 5; i++) - RangeEnc_ShiftLow(p); + Ppmd7z_RangeEnc_ShiftLow(p); } -#define RC_Encode(start, size) RangeEnc_Encode(p, start, size); -#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p); +#define RC_Encode(start, size) Ppmd7z_RangeEnc_Encode(p, start, size); +#define RC_EncodeFinal(start, size) RC_Encode(start, size) RC_NORM_REMOTE(p) #define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) #define SUFFIX(ctx) CTX((ctx)->Suffix) -typedef CPpmd7_Context * CTX_PTR; +// typedef CPpmd7_Context * CTX_PTR; #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) void Ppmd7_UpdateModel(CPpmd7 *p); #define MASK(sym) ((unsigned char *)charMask)[sym] -MY_FORCE_INLINE +Z7_FORCE_INLINE static void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) { @@ -104,7 +104,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) if (s->Symbol == symbol) { // R->Range /= p->MinContext->Union2.SummFreq; - RC_EncodeFinal(0, s->Freq); + RC_EncodeFinal(0, s->Freq) p->FoundState = s; Ppmd7_Update1_0(p); return; @@ -117,7 +117,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) if ((++s)->Symbol == symbol) { // R->Range /= p->MinContext->Union2.SummFreq; - RC_EncodeFinal(sum, s->Freq); + RC_EncodeFinal(sum, s->Freq) p->FoundState = s; Ppmd7_Update1(p); return; @@ -127,10 +127,10 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) while (--i); // R->Range /= p->MinContext->Union2.SummFreq; - RC_Encode(sum, p->MinContext->Union2.SummFreq - sum); + RC_Encode(sum, p->MinContext->Union2.SummFreq - sum) p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); - PPMD_SetAllBitsIn256Bytes(charMask); + PPMD_SetAllBitsIn256Bytes(charMask) // MASK(s->Symbol) = 0; // i = p->MinContext->NumStats - 1; // do { MASK((--s)->Symbol) = 0; } while (--i); @@ -153,20 +153,20 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) UInt16 *prob = Ppmd7_GetBinSumm(p); CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); UInt32 pr = *prob; - UInt32 bound = (R->Range >> 14) * pr; + const UInt32 bound = (R->Range >> 14) * pr; pr = PPMD_UPDATE_PROB_1(pr); if (s->Symbol == symbol) { *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); // RangeEnc_EncodeBit_0(p, bound); R->Range = bound; - RC_NORM_1(p); + RC_NORM_1(p) // p->FoundState = s; // Ppmd7_UpdateBin(p); { - unsigned freq = s->Freq; - CTX_PTR c = CTX(SUCCESSOR(s)); + const unsigned freq = s->Freq; + CPpmd7_Context *c = CTX(SUCCESSOR(s)); p->FoundState = s; p->PrevSuccess = 1; p->RunLength++; @@ -187,7 +187,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) R->Range -= bound; RC_NORM_LOCAL(p) - PPMD_SetAllBitsIn256Bytes(charMask); + PPMD_SetAllBitsIn256Bytes(charMask) MASK(s->Symbol) = 0; p->PrevSuccess = 0; } @@ -248,14 +248,14 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) do { - unsigned cur = s->Symbol; + const unsigned cur = s->Symbol; if ((int)cur == symbol) { - UInt32 low = sum; - UInt32 freq = s->Freq; + const UInt32 low = sum; + const UInt32 freq = s->Freq; unsigned num2; - Ppmd_See_Update(see); + Ppmd_See_UPDATE(see) p->FoundState = s; sum += escFreq; @@ -279,7 +279,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) R->Range /= sum; - RC_EncodeFinal(low, freq); + RC_EncodeFinal(low, freq) Ppmd7_Update2(p); return; } @@ -289,21 +289,21 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) while (--i); { - UInt32 total = sum + escFreq; + const UInt32 total = sum + escFreq; see->Summ = (UInt16)(see->Summ + total); R->Range /= total; - RC_Encode(sum, escFreq); + RC_Encode(sum, escFreq) } { - CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + const CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); s--; MASK(s->Symbol) = 0; do { - unsigned sym0 = s2[0].Symbol; - unsigned sym1 = s2[1].Symbol; + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; s2 += 2; MASK(sym0) = 0; MASK(sym1) = 0; @@ -321,3 +321,18 @@ void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim) Ppmd7z_EncodeSymbol(p, *buf); } } + +#undef kTopValue +#undef WRITE_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Encode +#undef RC_EncodeFinal +#undef SUFFIX +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/3rdparty/7z/src/Ppmd7aDec.c b/3rdparty/7z/src/Ppmd7aDec.c new file mode 100644 index 0000000000..55e164e19f --- /dev/null +++ b/3rdparty/7z/src/Ppmd7aDec.c @@ -0,0 +1,295 @@ +/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#include "Precomp.h" + +#include "Ppmd7.h" + +#define kTop ((UInt32)1 << 24) +#define kBot ((UInt32)1 << 15) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + p->Low = 0; + + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM(p) \ + while ((p->Low ^ (p->Low + p->Range)) < kTop \ + || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \ + p->Code = (p->Code << 8) | READ_BYTE(p); \ + p->Range <<= 8; p->Low <<= 8; } + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7a_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size) +{ + start *= R->Range; + R->Low += start; + R->Code -= start; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd7a_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] + + +int Ppmd7a_DecodeSymbol(CPpmd7 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 1) + { + CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + const UInt32 summFreq = p->MinContext->Union2.SummFreq; + + if (summFreq > R->Range) + return PPMD7_SYM_ERROR; + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = (unsigned)p->MinContext->NumStats - 1; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt16 *prob = Ppmd7_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM(R) + + + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + R->Low += size0; + R->Code -= size0; + R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + + CPpmd_See *see; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + + if (freqSum > R->Range) + return PPMD7_SYM_ERROR; + + count = RC_GetThreshold(freqSum); + + if (count < hiCnt) + { + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (unsigned)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update2(p); + return sym; + } + + if (count >= freqSum) + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +#undef kTop +#undef kBot +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/3rdparty/7z/src/Ppmd8.c b/3rdparty/7z/src/Ppmd8.c new file mode 100644 index 0000000000..28abf27973 --- /dev/null +++ b/3rdparty/7z/src/Ppmd8.c @@ -0,0 +1,1565 @@ +/* Ppmd8.c -- PPMdI codec +2023-04-02 : Igor Pavlov : Public domain +This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */ + +#include "Precomp.h" + +#include + +#include "Ppmd8.h" + + + + +MY_ALIGN(16) +static const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) +static const UInt16 PPMD8_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; + +#define MAX_FREQ 124 +#define UNIT_SIZE 12 + +#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) +#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) + + +#define REF(ptr) Ppmd_GetRef(p, ptr) + +#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) + +#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref)) +#define STATS(ctx) Ppmd8_GetStats(p, ctx) +#define ONE_STATE(ctx) Ppmd8Context_OneState(ctx) +#define SUFFIX(ctx) CTX((ctx)->Suffix) + +typedef CPpmd8_Context * PPMD8_CTX_PTR; + +struct CPpmd8_Node_; + +typedef Ppmd_Ref_Type(struct CPpmd8_Node_) CPpmd8_Node_Ref; + +typedef struct CPpmd8_Node_ +{ + UInt32 Stamp; + + CPpmd8_Node_Ref Next; + UInt32 NU; +} CPpmd8_Node; + +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd8_Node) + +void Ppmd8_Construct(CPpmd8 *p) +{ + unsigned i, k, m; + + p->Base = NULL; + + for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) + { + unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); + do { p->Units2Indx[k++] = (Byte)i; } while (--step); + p->Indx2Units[i] = (Byte)k; + } + + p->NS2BSIndx[0] = (0 << 1); + p->NS2BSIndx[1] = (1 << 1); + memset(p->NS2BSIndx + 2, (2 << 1), 9); + memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); + + for (i = 0; i < 5; i++) + p->NS2Indx[i] = (Byte)i; + + for (m = i, k = 1; i < 260; i++) + { + p->NS2Indx[i] = (Byte)m; + if (--k == 0) + k = (++m) - 4; + } + + memcpy(p->ExpEscape, PPMD8_kExpEscape, 16); +} + + +void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Base); + p->Size = 0; + p->Base = NULL; +} + + +BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc) +{ + if (!p->Base || p->Size != size) + { + Ppmd8_Free(p, alloc); + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) + return False; + p->Size = size; + } + return True; +} + + + +// ---------- Internal Memory Allocator ---------- + + + + + + +#define EMPTY_NODE 0xFFFFFFFF + + +static void Ppmd8_InsertNode(CPpmd8 *p, void *node, unsigned indx) +{ + ((CPpmd8_Node *)node)->Stamp = EMPTY_NODE; + ((CPpmd8_Node *)node)->Next = (CPpmd8_Node_Ref)p->FreeList[indx]; + ((CPpmd8_Node *)node)->NU = I2U(indx); + p->FreeList[indx] = REF(node); + p->Stamps[indx]++; +} + + +static void *Ppmd8_RemoveNode(CPpmd8 *p, unsigned indx) +{ + CPpmd8_Node *node = NODE((CPpmd8_Node_Ref)p->FreeList[indx]); + p->FreeList[indx] = node->Next; + p->Stamps[indx]--; + + return node; +} + + +static void Ppmd8_SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +{ + unsigned i, nu = I2U(oldIndx) - I2U(newIndx); + ptr = (Byte *)ptr + U2B(I2U(newIndx)); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd8_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + } + Ppmd8_InsertNode(p, ptr, i); +} + + + + + + + + + + + + + + +static void Ppmd8_GlueFreeBlocks(CPpmd8 *p) +{ + /* + we use first UInt32 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0xFF + CPpmd8_Context { Byte NumStats; Byte Flags; UInt16 SummFreq; : Flags != 0xFF ??? + CPpmd8_Node { UInt32 Stamp : Stamp == 0xFFFFFFFF for free record + : Stamp == 0 for guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd8_Context record + */ + CPpmd8_Node_Ref n; + + p->GlueCount = 1 << 13; + memset(p->Stamps, 0, sizeof(p->Stamps)); + + /* we set guard NODE at LoUnit */ + if (p->LoUnit != p->HiUnit) + ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0; + + { + /* Glue free blocks */ + CPpmd8_Node_Ref *prev = &n; + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + + CPpmd8_Node_Ref next = (CPpmd8_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + CPpmd8_Node *node = NODE(next); + UInt32 nu = node->NU; + *prev = next; + next = node->Next; + if (nu != 0) + { + CPpmd8_Node *node2; + prev = &(node->Next); + while ((node2 = node + nu)->Stamp == EMPTY_NODE) + { + nu += node2->NU; + node2->NU = 0; + node->NU = nu; + } + } + } + } + + *prev = 0; + } + + + + + + + + + + + + + + + + + + + + + /* Fill lists of free blocks */ + while (n != 0) + { + CPpmd8_Node *node = NODE(n); + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) + Ppmd8_InsertNode(p, node, PPMD_NUM_INDEXES - 1); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd8_InsertNode(p, node + k, (unsigned)nu - k - 1); + } + Ppmd8_InsertNode(p, node, i); + } +} + + +Z7_NO_INLINE +static void *Ppmd8_AllocUnitsRare(CPpmd8 *p, unsigned indx) +{ + unsigned i; + + if (p->GlueCount == 0) + { + Ppmd8_GlueFreeBlocks(p); + if (p->FreeList[indx] != 0) + return Ppmd8_RemoveNode(p, indx); + } + + i = indx; + + do + { + if (++i == PPMD_NUM_INDEXES) + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; + p->GlueCount--; + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : (NULL); + } + } + while (p->FreeList[i] == 0); + + { + void *block = Ppmd8_RemoveNode(p, i); + Ppmd8_SplitBlock(p, block, i, indx); + return block; + } +} + + +static void *Ppmd8_AllocUnits(CPpmd8 *p, unsigned indx) +{ + if (p->FreeList[indx] != 0) + return Ppmd8_RemoveNode(p, indx); + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } + } + return Ppmd8_AllocUnitsRare(p, indx); +} + + +#define MEM_12_CPY(dest, src, num) \ + { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ + do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + + + +static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU) +{ + unsigned i0 = U2I(oldNU); + unsigned i1 = U2I(newNU); + if (i0 == i1) + return oldPtr; + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd8_RemoveNode(p, i1); + MEM_12_CPY(ptr, oldPtr, newNU) + Ppmd8_InsertNode(p, oldPtr, i0); + return ptr; + } + Ppmd8_SplitBlock(p, oldPtr, i0, i1); + return oldPtr; +} + + +static void FreeUnits(CPpmd8 *p, void *ptr, unsigned nu) +{ + Ppmd8_InsertNode(p, ptr, U2I(nu)); +} + + +static void SpecialFreeUnit(CPpmd8 *p, void *ptr) +{ + if ((Byte *)ptr != p->UnitsStart) + Ppmd8_InsertNode(p, ptr, 0); + else + { + #ifdef PPMD8_FREEZE_SUPPORT + *(UInt32 *)ptr = EMPTY_NODE; /* it's used for (Flags == 0xFF) check in RemoveBinContexts() */ + #endif + p->UnitsStart += UNIT_SIZE; + } +} + + +/* +static void *MoveUnitsUp(CPpmd8 *p, void *oldPtr, unsigned nu) +{ + unsigned indx = U2I(nu); + void *ptr; + if ((Byte *)oldPtr > p->UnitsStart + (1 << 14) || REF(oldPtr) > p->FreeList[indx]) + return oldPtr; + ptr = Ppmd8_RemoveNode(p, indx); + MEM_12_CPY(ptr, oldPtr, nu) + if ((Byte *)oldPtr != p->UnitsStart) + Ppmd8_InsertNode(p, oldPtr, indx); + else + p->UnitsStart += U2B(I2U(indx)); + return ptr; +} +*/ + +static void ExpandTextArea(CPpmd8 *p) +{ + UInt32 count[PPMD_NUM_INDEXES]; + unsigned i; + + memset(count, 0, sizeof(count)); + if (p->LoUnit != p->HiUnit) + ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0; + + { + CPpmd8_Node *node = (CPpmd8_Node *)(void *)p->UnitsStart; + while (node->Stamp == EMPTY_NODE) + { + UInt32 nu = node->NU; + node->Stamp = 0; + count[U2I(nu)]++; + node += nu; + } + p->UnitsStart = (Byte *)node; + } + + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + UInt32 cnt = count[i]; + if (cnt == 0) + continue; + { + CPpmd8_Node_Ref *prev = (CPpmd8_Node_Ref *)&p->FreeList[i]; + CPpmd8_Node_Ref n = *prev; + p->Stamps[i] -= cnt; + for (;;) + { + CPpmd8_Node *node = NODE(n); + n = node->Next; + if (node->Stamp != 0) + { + prev = &node->Next; + continue; + } + *prev = n; + if (--cnt == 0) + break; + } + } + } +} + + +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +static void Ppmd8State_SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) +{ + Ppmd_SET_SUCCESSOR(p, v) +} + +#define RESET_TEXT(offs) { p->Text = p->Base + p->AlignOffset + (offs); } + +Z7_NO_INLINE +static +void Ppmd8_RestartModel(CPpmd8 *p) +{ + unsigned i, k, m; + + memset(p->FreeList, 0, sizeof(p->FreeList)); + memset(p->Stamps, 0, sizeof(p->Stamps)); + RESET_TEXT(0) + p->HiUnit = p->Text + p->Size; + p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; + p->GlueCount = 0; + + p->OrderFall = p->MaxOrder; + p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; + p->PrevSuccess = 0; + + { + CPpmd8_Context *mc = (PPMD8_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd8_AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + mc->Flags = 0; + mc->NumStats = 256 - 1; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + Ppmd8State_SetSuccessor(s, 0); + } + } + + + + + + + + + + + + + for (i = m = 0; m < 25; m++) + { + while (p->NS2Indx[i] == m) + i++; + for (k = 0; k < 8; k++) + { + unsigned r; + UInt16 *dest = p->BinSumm[m] + k; + const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD8_kInitBinEsc[k] / (i + 1)); + for (r = 0; r < 64; r += 8) + dest[r] = val; + } + } + + for (i = m = 0; m < 24; m++) + { + unsigned summ; + CPpmd_See *s; + while (p->NS2Indx[(size_t)i + 3] == m + 3) + i++; + s = p->See[m]; + summ = ((2 * i + 5) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 32; k++, s++) + { + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); + s->Count = 7; + } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ +} + + +void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod) +{ + p->MaxOrder = maxOrder; + p->RestoreMethod = restoreMethod; + Ppmd8_RestartModel(p); +} + + +#define FLAG_RESCALED (1 << 2) +// #define FLAG_SYM_HIGH (1 << 3) +#define FLAG_PREV_HIGH (1 << 4) + +#define HiBits_Prepare(sym) ((unsigned)(sym) + 0xC0) + +#define HiBits_Convert_3(flags) (((flags) >> (8 - 3)) & (1 << 3)) +#define HiBits_Convert_4(flags) (((flags) >> (8 - 4)) & (1 << 4)) + +#define PPMD8_HiBitsFlag_3(sym) HiBits_Convert_3(HiBits_Prepare(sym)) +#define PPMD8_HiBitsFlag_4(sym) HiBits_Convert_4(HiBits_Prepare(sym)) + +// #define PPMD8_HiBitsFlag_3(sym) (0x08 * ((sym) >= 0x40)) +// #define PPMD8_HiBitsFlag_4(sym) (0x10 * ((sym) >= 0x40)) + +/* +Refresh() is called when we remove some symbols (successors) in context. +It increases Escape_Freq for sum of all removed symbols. +*/ + +static void Refresh(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned oldNU, unsigned scale) +{ + unsigned i = ctx->NumStats, escFreq, sumFreq, flags; + CPpmd_State *s = (CPpmd_State *)ShrinkUnits(p, STATS(ctx), oldNU, (i + 2) >> 1); + ctx->Union4.Stats = REF(s); + + // #ifdef PPMD8_FREEZE_SUPPORT + /* + (ctx->Union2.SummFreq >= ((UInt32)1 << 15)) can be in FREEZE mode for some files. + It's not good for range coder. So new versions of support fix: + - original PPMdI code rev.1 + + original PPMdI code rev.2 + - 7-Zip default ((PPMD8_FREEZE_SUPPORT is not defined) + + 7-Zip (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE) + if we use that fixed line, we can lose compatibility with some files created before fix + if we don't use that fixed line, the program can work incorrectly in FREEZE mode in rare case. + */ + // if (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE) + { + scale |= (ctx->Union2.SummFreq >= ((UInt32)1 << 15)); + } + // #endif + + + + flags = HiBits_Prepare(s->Symbol); + { + unsigned freq = s->Freq; + escFreq = ctx->Union2.SummFreq - freq; + freq = (freq + scale) >> scale; + sumFreq = freq; + s->Freq = (Byte)freq; + } + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + scale) >> scale; + sumFreq += freq; + s->Freq = (Byte)freq; + flags |= HiBits_Prepare(s->Symbol); + } + while (--i); + + ctx->Union2.SummFreq = (UInt16)(sumFreq + ((escFreq + scale) >> scale)); + ctx->Flags = (Byte)((ctx->Flags & (FLAG_PREV_HIGH + FLAG_RESCALED * scale)) + HiBits_Convert_3(flags)); +} + + +static void SWAP_STATES(CPpmd_State *t1, CPpmd_State *t2) +{ + CPpmd_State tmp = *t1; + *t1 = *t2; + *t2 = tmp; +} + + +/* +CutOff() reduces contexts: + It conversts Successors at MaxOrder to another Contexts to NULL-Successors + It removes RAW-Successors and NULL-Successors that are not Order-0 + and it removes contexts when it has no Successors. + if the (Union4.Stats) is close to (UnitsStart), it moves it up. +*/ + +static CPpmd_Void_Ref CutOff(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned order) +{ + int ns = ctx->NumStats; + unsigned nu; + CPpmd_State *stats; + + if (ns == 0) + { + CPpmd_State *s = ONE_STATE(ctx); + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart) + { + if (order < p->MaxOrder) + successor = CutOff(p, CTX(successor), order + 1); + else + successor = 0; + Ppmd8State_SetSuccessor(s, successor); + if (successor || order <= 9) /* O_BOUND */ + return REF(ctx); + } + SpecialFreeUnit(p, ctx); + return 0; + } + + nu = ((unsigned)ns + 2) >> 1; + // ctx->Union4.Stats = STATS_REF(MoveUnitsUp(p, STATS(ctx), nu)); + { + unsigned indx = U2I(nu); + stats = STATS(ctx); + + if ((UInt32)((Byte *)stats - p->UnitsStart) <= (1 << 14) + && (CPpmd_Void_Ref)ctx->Union4.Stats <= p->FreeList[indx]) + { + void *ptr = Ppmd8_RemoveNode(p, indx); + ctx->Union4.Stats = STATS_REF(ptr); + MEM_12_CPY(ptr, (const void *)stats, nu) + if ((Byte *)stats != p->UnitsStart) + Ppmd8_InsertNode(p, stats, indx); + else + p->UnitsStart += U2B(I2U(indx)); + stats = ptr; + } + } + + { + CPpmd_State *s = stats + (unsigned)ns; + do + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) < p->UnitsStart) + { + CPpmd_State *s2 = stats + (unsigned)(ns--); + if (order) + { + if (s != s2) + *s = *s2; + } + else + { + SWAP_STATES(s, s2); + Ppmd8State_SetSuccessor(s2, 0); + } + } + else + { + if (order < p->MaxOrder) + Ppmd8State_SetSuccessor(s, CutOff(p, CTX(successor), order + 1)); + else + Ppmd8State_SetSuccessor(s, 0); + } + } + while (--s >= stats); + } + + if (ns != ctx->NumStats && order) + { + if (ns < 0) + { + FreeUnits(p, stats, nu); + SpecialFreeUnit(p, ctx); + return 0; + } + ctx->NumStats = (Byte)ns; + if (ns == 0) + { + const Byte sym = stats->Symbol; + ctx->Flags = (Byte)((ctx->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(sym)); + // *ONE_STATE(ctx) = *stats; + ctx->Union2.State2.Symbol = sym; + ctx->Union2.State2.Freq = (Byte)(((unsigned)stats->Freq + 11) >> 3); + ctx->Union4.State4.Successor_0 = stats->Successor_0; + ctx->Union4.State4.Successor_1 = stats->Successor_1; + FreeUnits(p, stats, nu); + } + else + { + Refresh(p, ctx, nu, ctx->Union2.SummFreq > 16 * (unsigned)ns); + } + } + + return REF(ctx); +} + + + +#ifdef PPMD8_FREEZE_SUPPORT + +/* +RemoveBinContexts() + It conversts Successors at MaxOrder to another Contexts to NULL-Successors + It changes RAW-Successors to NULL-Successors + removes Bin Context without Successor, if suffix of that context is also binary. +*/ + +static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned order) +{ + if (!ctx->NumStats) + { + CPpmd_State *s = ONE_STATE(ctx); + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder) + successor = RemoveBinContexts(p, CTX(successor), order + 1); + else + successor = 0; + Ppmd8State_SetSuccessor(s, successor); + /* Suffix context can be removed already, since different (high-order) + Successors may refer to same context. So we check Flags == 0xFF (Stamp == EMPTY_NODE) */ + if (!successor && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF)) + { + FreeUnits(p, ctx, 1); + return 0; + } + } + else + { + CPpmd_State *s = STATS(ctx) + ctx->NumStats; + do + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder) + Ppmd8State_SetSuccessor(s, RemoveBinContexts(p, CTX(successor), order + 1)); + else + Ppmd8State_SetSuccessor(s, 0); + } + while (--s >= STATS(ctx)); + } + + return REF(ctx); +} + +#endif + + + +static UInt32 GetUsedMemory(const CPpmd8 *p) +{ + UInt32 v = 0; + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + v += p->Stamps[i] * I2U(i); + return p->Size - (UInt32)(p->HiUnit - p->LoUnit) - (UInt32)(p->UnitsStart - p->Text) - U2B(v); +} + +#ifdef PPMD8_FREEZE_SUPPORT + #define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1, fSuccessor) +#else + #define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1) +#endif + + +static void RestoreModel(CPpmd8 *p, PPMD8_CTX_PTR ctxError + #ifdef PPMD8_FREEZE_SUPPORT + , PPMD8_CTX_PTR fSuccessor + #endif + ) +{ + PPMD8_CTX_PTR c; + CPpmd_State *s; + RESET_TEXT(0) + + // we go here in cases of error of allocation for context (c1) + // Order(MinContext) < Order(ctxError) <= Order(MaxContext) + + // We remove last symbol from each of contexts [p->MaxContext ... ctxError) contexts + // So we rollback all created (symbols) before error. + for (c = p->MaxContext; c != ctxError; c = SUFFIX(c)) + if (--(c->NumStats) == 0) + { + s = STATS(c); + c->Flags = (Byte)((c->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(s->Symbol)); + // *ONE_STATE(c) = *s; + c->Union2.State2.Symbol = s->Symbol; + c->Union2.State2.Freq = (Byte)(((unsigned)s->Freq + 11) >> 3); + c->Union4.State4.Successor_0 = s->Successor_0; + c->Union4.State4.Successor_1 = s->Successor_1; + + SpecialFreeUnit(p, s); + } + else + { + /* Refresh() can increase Escape_Freq on value of Freq of last symbol, that was added before error. + so the largest possible increase for Escape_Freq is (8) from value before ModelUpoadet() */ + Refresh(p, c, ((unsigned)c->NumStats + 3) >> 1, 0); + } + + // increase Escape Freq for context [ctxError ... p->MinContext) + for (; c != p->MinContext; c = SUFFIX(c)) + if (c->NumStats == 0) + { + // ONE_STATE(c) + c->Union2.State2.Freq = (Byte)(((unsigned)c->Union2.State2.Freq + 1) >> 1); + } + else if ((c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 4)) > 128 + 4 * c->NumStats) + Refresh(p, c, ((unsigned)c->NumStats + 2) >> 1, 1); + + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + p->MaxContext = fSuccessor; + p->GlueCount += !(p->Stamps[1] & 1); // why? + } + else if (p->RestoreMethod == PPMD8_RESTORE_METHOD_FREEZE) + { + while (p->MaxContext->Suffix) + p->MaxContext = SUFFIX(p->MaxContext); + RemoveBinContexts(p, p->MaxContext, 0); + // we change the current mode to (PPMD8_RESTORE_METHOD_FREEZE + 1) + p->RestoreMethod = PPMD8_RESTORE_METHOD_FREEZE + 1; + p->GlueCount = 0; + p->OrderFall = p->MaxOrder; + } + else + #endif + if (p->RestoreMethod == PPMD8_RESTORE_METHOD_RESTART || GetUsedMemory(p) < (p->Size >> 1)) + Ppmd8_RestartModel(p); + else + { + while (p->MaxContext->Suffix) + p->MaxContext = SUFFIX(p->MaxContext); + do + { + CutOff(p, p->MaxContext, 0); + ExpandTextArea(p); + } + while (GetUsedMemory(p) > 3 * (p->Size >> 2)); + p->GlueCount = 0; + p->OrderFall = p->MaxOrder; + } + p->MinContext = p->MaxContext; +} + + + +Z7_NO_INLINE +static PPMD8_CTX_PTR Ppmd8_CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, PPMD8_CTX_PTR c) +{ + + CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); + Byte newSym, newFreq, flags; + unsigned numPs = 0; + CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; /* fixed over Shkarin's code. Maybe it could work without + 1 too. */ + + if (!skip) + ps[numPs++] = p->FoundState; + + while (c->Suffix) + { + CPpmd_Void_Ref successor; + CPpmd_State *s; + c = SUFFIX(c); + + if (s1) { s = s1; s1 = NULL; } + else if (c->NumStats != 0) + { + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + if (s->Freq < MAX_FREQ - 9) { s->Freq++; c->Union2.SummFreq++; } + } + else + { + s = ONE_STATE(c); + s->Freq = (Byte)(s->Freq + (!SUFFIX(c)->NumStats & (s->Freq < 24))); + } + successor = SUCCESSOR(s); + if (successor != upBranch) + { + + c = CTX(successor); + if (numPs == 0) + { + + + return c; + } + break; + } + ps[numPs++] = s; + } + + + + + + newSym = *(const Byte *)Ppmd8_GetPtr(p, upBranch); + upBranch++; + flags = (Byte)(PPMD8_HiBitsFlag_4(p->FoundState->Symbol) + PPMD8_HiBitsFlag_3(newSym)); + + if (c->NumStats == 0) + newFreq = c->Union2.State2.Freq; + else + { + UInt32 cf, s0; + CPpmd_State *s; + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + + + max(newFreq)= (s->Freq - 1), when (s0 == 1) + + + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((cf + 2 * s0 - 3) / s0))); + } + + + + do + { + PPMD8_CTX_PTR c1; + /* = AllocContext(p); */ + if (p->HiUnit != p->LoUnit) + c1 = (PPMD8_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); + else if (p->FreeList[0] != 0) + c1 = (PPMD8_CTX_PTR)Ppmd8_RemoveNode(p, 0); + else + { + c1 = (PPMD8_CTX_PTR)Ppmd8_AllocUnitsRare(p, 0); + if (!c1) + return NULL; + } + c1->Flags = flags; + c1->NumStats = 0; + c1->Union2.State2.Symbol = newSym; + c1->Union2.State2.Freq = newFreq; + Ppmd8State_SetSuccessor(ONE_STATE(c1), upBranch); + c1->Suffix = REF(c); + Ppmd8State_SetSuccessor(ps[--numPs], REF(c1)); + c = c1; + } + while (numPs != 0); + + return c; +} + + +static PPMD8_CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, PPMD8_CTX_PTR c) +{ + CPpmd_State *s = NULL; + PPMD8_CTX_PTR c1 = c; + CPpmd_Void_Ref upBranch = REF(p->Text); + + #ifdef PPMD8_FREEZE_SUPPORT + /* The BUG in Shkarin's code was fixed: ps could overflow in CUT_OFF mode. */ + CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; + unsigned numPs = 0; + ps[numPs++] = p->FoundState; + #endif + + Ppmd8State_SetSuccessor(p->FoundState, upBranch); + p->OrderFall++; + + for (;;) + { + if (s1) + { + c = SUFFIX(c); + s = s1; + s1 = NULL; + } + else + { + if (!c->Suffix) + { + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + do { Ppmd8State_SetSuccessor(ps[--numPs], REF(c)); } while (numPs); + RESET_TEXT(1) + p->OrderFall = 1; + } + #endif + return c; + } + c = SUFFIX(c); + if (c->NumStats) + { + if ((s = STATS(c))->Symbol != p->FoundState->Symbol) + do { s++; } while (s->Symbol != p->FoundState->Symbol); + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + else + { + s = ONE_STATE(c); + s->Freq = (Byte)(s->Freq + (s->Freq < 32)); + } + } + if (SUCCESSOR(s)) + break; + #ifdef PPMD8_FREEZE_SUPPORT + ps[numPs++] = s; + #endif + Ppmd8State_SetSuccessor(s, upBranch); + p->OrderFall++; + } + + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + c = CTX(SUCCESSOR(s)); + do { Ppmd8State_SetSuccessor(ps[--numPs], REF(c)); } while (numPs); + RESET_TEXT(1) + p->OrderFall = 1; + return c; + } + else + #endif + if (SUCCESSOR(s) <= upBranch) + { + PPMD8_CTX_PTR successor; + CPpmd_State *s2 = p->FoundState; + p->FoundState = s; + + successor = Ppmd8_CreateSuccessors(p, False, NULL, c); + if (!successor) + Ppmd8State_SetSuccessor(s, 0); + else + Ppmd8State_SetSuccessor(s, REF(successor)); + p->FoundState = s2; + } + + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if (p->OrderFall == 1 && c1 == p->MaxContext) + { + Ppmd8State_SetSuccessor(p->FoundState, successor); + p->Text--; + } + if (successor == 0) + return NULL; + return CTX(successor); + } +} + + + +void Ppmd8_UpdateModel(CPpmd8 *p); +Z7_NO_INLINE +void Ppmd8_UpdateModel(CPpmd8 *p) +{ + CPpmd_Void_Ref maxSuccessor, minSuccessor = SUCCESSOR(p->FoundState); + PPMD8_CTX_PTR c; + unsigned s0, ns, fFreq = p->FoundState->Freq; + Byte flag, fSymbol = p->FoundState->Symbol; + { + CPpmd_State *s = NULL; + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) + { + /* Update Freqs in Suffix Context */ + + c = SUFFIX(p->MinContext); + + if (c->NumStats == 0) + { + s = ONE_STATE(c); + if (s->Freq < 32) + s->Freq++; + } + else + { + Byte sym = p->FoundState->Symbol; + s = STATS(c); + + if (s->Symbol != sym) + { + do + { + + s++; + } + while (s->Symbol != sym); + + if (s[0].Freq >= s[-1].Freq) + { + SWAP_STATES(&s[0], &s[-1]); + s--; + } + } + + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + } + + c = p->MaxContext; + if (p->OrderFall == 0 && minSuccessor) + { + PPMD8_CTX_PTR cs = Ppmd8_CreateSuccessors(p, True, s, p->MinContext); + if (!cs) + { + Ppmd8State_SetSuccessor(p->FoundState, 0); + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + Ppmd8State_SetSuccessor(p->FoundState, REF(cs)); + p->MinContext = p->MaxContext = cs; + return; + } + + + + + { + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + RESTORE_MODEL(c, CTX(minSuccessor)); /* check it */ + return; + } + maxSuccessor = REF(text); + } + + if (!minSuccessor) + { + PPMD8_CTX_PTR cs = ReduceOrder(p, s, p->MinContext); + if (!cs) + { + RESTORE_MODEL(c, NULL); + return; + } + minSuccessor = REF(cs); + } + else if ((Byte *)Ppmd8_GetPtr(p, minSuccessor) < p->UnitsStart) + { + PPMD8_CTX_PTR cs = Ppmd8_CreateSuccessors(p, False, s, p->MinContext); + if (!cs) + { + RESTORE_MODEL(c, NULL); + return; + } + minSuccessor = REF(cs); + } + + if (--p->OrderFall == 0) + { + maxSuccessor = minSuccessor; + p->Text -= (p->MaxContext != p->MinContext); + } + #ifdef PPMD8_FREEZE_SUPPORT + else if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + maxSuccessor = minSuccessor; + RESET_TEXT(0) + p->OrderFall = 0; + } + #endif + } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flag = (Byte)(PPMD8_HiBitsFlag_3(fSymbol)); + s0 = p->MinContext->Union2.SummFreq - (ns = p->MinContext->NumStats) - fFreq; + + for (; c != p->MinContext; c = SUFFIX(c)) + { + unsigned ns1; + UInt32 sum; + + if ((ns1 = c->NumStats) != 0) + { + if ((ns1 & 1) != 0) + { + /* Expand for one UNIT */ + unsigned oldNU = (ns1 + 1) >> 1; + unsigned i = U2I(oldNU); + if (i != U2I((size_t)oldNU + 1)) + { + void *ptr = Ppmd8_AllocUnits(p, i + 1); + void *oldPtr; + if (!ptr) + { + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + oldPtr = STATS(c); + MEM_12_CPY(ptr, oldPtr, oldNU) + Ppmd8_InsertNode(p, oldPtr, i); + c->Union4.Stats = STATS_REF(ptr); + } + } + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 1 here. + an average increase is 1/3 per symbol */ + sum += (3 * ns1 + 1 < ns); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < ???). Do we need to truncate (sum) to 16-bit */ + // sum = (UInt16)sum; + } + else + { + + CPpmd_State *s = (CPpmd_State*)Ppmd8_AllocUnits(p, 0); + if (!s) + { + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // Ppmd8State_SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + + s->Freq = (Byte)freq; + + sum = freq + p->InitEsc + (ns > 2); // Ppmd8 (> 2) + } + } + + { + CPpmd_State *s = STATS(c) + ns1 + 1; + UInt32 cf = 2 * (sum + 6) * (UInt32)fFreq; + UInt32 sf = (UInt32)s0 + sum; + s->Symbol = fSymbol; + c->NumStats = (Byte)(ns1 + 1); + Ppmd8State_SetSuccessor(s, maxSuccessor); + c->Flags |= flag; + if (cf < 6 * sf) + { + cf = (unsigned)1 + (cf > sf) + (cf >= 4 * sf); + sum += 4; + /* It can add (1, 2, 3) to Escape_Freq */ + } + else + { + cf = (unsigned)4 + (cf > 9 * sf) + (cf > 12 * sf) + (cf > 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; + } + + } + p->MaxContext = p->MinContext = CTX(minSuccessor); +} + + + +Z7_NO_INLINE +static void Ppmd8_Rescale(CPpmd8 *p) +{ + unsigned i, adder, sumFreq, escFreq; + CPpmd_State *stats = STATS(p->MinContext); + CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) + { + CPpmd_State tmp = *s; + do + s[0] = s[-1]; + while (--s != stats); + *s = tmp; + } + + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + + + + + + adder = (p->OrderFall != 0); + + #ifdef PPMD8_FREEZE_SUPPORT + adder |= (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE); + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = p->MinContext->NumStats; + s->Freq = (Byte)sumFreq; + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + CPpmd_State tmp = *s; + CPpmd_State *s1 = s; + do + { + s1[0] = s1[-1]; + } + while (--s1 != stats && freq > s1[-1].Freq); + *s1 = tmp; + } + } + while (--i); + + if (s->Freq == 0) + { + /* Remove all items with Freq == 0 */ + CPpmd8_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + + + + escFreq += i; + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (Byte)(numStatsNew); + n0 = (numStats + 2) >> 1; + + if (numStatsNew == 0) + { + + unsigned freq = (2 * (unsigned)stats->Freq + escFreq - 1) / escFreq; + if (freq > MAX_FREQ / 3) + freq = MAX_FREQ / 3; + mc->Flags = (Byte)((mc->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(stats->Symbol)); + + + + + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; + p->FoundState = s; + Ppmd8_InsertNode(p, stats, U2I(n0)); + return; + } + + n1 = (numStatsNew + 2) >> 1; + if (n0 != n1) + mc->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + { + // here we are for max order only. So Ppmd8_MakeEscFreq() doesn't use mc->Flags + // but we still need current (Flags & FLAG_PREV_HIGH), if we will convert context to 1-symbol context later. + /* + unsigned flags = HiBits_Prepare((s = STATS(mc))->Symbol); + i = mc->NumStats; + do { flags |= HiBits_Prepare((++s)->Symbol); } while (--i); + mc->Flags = (Byte)((mc->Flags & ~FLAG_SYM_HIGH) + HiBits_Convert_3(flags)); + */ + } + } + + + + + + + { + CPpmd8_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + mc->Flags |= FLAG_RESCALED; + p->FoundState = STATS(mc); + } +} + + +CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq) +{ + CPpmd_See *see; + const CPpmd8_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 0xFF) + { + // (3 <= numStats + 2 <= 256) (3 <= NS2Indx[3] and NS2Indx[256] === 26) + see = p->See[(size_t)(unsigned)p->NS2Indx[(size_t)numStats + 2] - 3] + + (mc->Union2.SummFreq > 11 * (numStats + 1)) + + 2 * (unsigned)(2 * numStats < ((unsigned)SUFFIX(mc)->NumStats + numMasked1)) + + mc->Flags; + + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + *escFreq = r + (r == 0); + } + } + else + { + see = &p->DummySee; + *escFreq = 1; + } + return see; +} + + +static void Ppmd8_NextContext(CPpmd8 *p) +{ + PPMD8_CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart) + p->MaxContext = p->MinContext = c; + else + Ppmd8_UpdateModel(p); +} + + +void Ppmd8_Update1(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + SWAP_STATES(s, &s[-1]); + p->FoundState = --s; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + } + Ppmd8_NextContext(p); +} + + +void Ppmd8_Update1_0(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + CPpmd8_Context *mc = p->MinContext; + unsigned freq = s->Freq; + unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq >= summFreq); // Ppmd8 (>=) + p->RunLength += (int)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + Ppmd8_NextContext(p); +} + + +/* +void Ppmd8_UpdateBin(CPpmd8 *p) +{ + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196) + p->PrevSuccess = 1; + p->RunLength++; + Ppmd8_NextContext(p); +} +*/ + +void Ppmd8_Update2(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->RunLength = p->InitRL; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + Ppmd8_UpdateModel(p); +} + +/* H->I changes: + NS2Indx + GlueCount, and Glue method + BinSum + See / EscFreq + Ppmd8_CreateSuccessors updates more suffix contexts + Ppmd8_UpdateModel consts. + PrevSuccess Update + +Flags: + (1 << 2) - the Context was Rescaled + (1 << 3) - there is symbol in Stats with (sym >= 0x40) in + (1 << 4) - main symbol of context is (sym >= 0x40) +*/ + +#undef RESET_TEXT +#undef FLAG_RESCALED +#undef FLAG_PREV_HIGH +#undef HiBits_Prepare +#undef HiBits_Convert_3 +#undef HiBits_Convert_4 +#undef PPMD8_HiBitsFlag_3 +#undef PPMD8_HiBitsFlag_4 +#undef RESTORE_MODEL + +#undef MAX_FREQ +#undef UNIT_SIZE +#undef U2B +#undef U2I +#undef I2U + +#undef REF +#undef STATS_REF +#undef CTX +#undef STATS +#undef ONE_STATE +#undef SUFFIX +#undef NODE +#undef EMPTY_NODE +#undef MEM_12_CPY +#undef SUCCESSOR +#undef SWAP_STATES diff --git a/3rdparty/7z/src/Ppmd8.h b/3rdparty/7z/src/Ppmd8.h new file mode 100644 index 0000000000..d5bb57e159 --- /dev/null +++ b/3rdparty/7z/src/Ppmd8.h @@ -0,0 +1,181 @@ +/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#ifndef ZIP7_INC_PPMD8_H +#define ZIP7_INC_PPMD8_H + +#include "Ppmd.h" + +EXTERN_C_BEGIN + +#define PPMD8_MIN_ORDER 2 +#define PPMD8_MAX_ORDER 16 + + + + +struct CPpmd8_Context_; + +typedef Ppmd_Ref_Type(struct CPpmd8_Context_) CPpmd8_Context_Ref; + +// MY_CPU_pragma_pack_push_1 + +typedef struct CPpmd8_Context_ +{ + Byte NumStats; + Byte Flags; + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + + CPpmd8_Context_Ref Suffix; +} CPpmd8_Context; + +// MY_CPU_pragma_pop + +#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + +/* PPMdI code rev.2 contains the fix over PPMdI code rev.1. + But the code PPMdI.2 is not compatible with PPMdI.1 for some files compressed + in FREEZE mode. So we disable FREEZE mode support. */ + +// #define PPMD8_FREEZE_SUPPORT + +enum +{ + PPMD8_RESTORE_METHOD_RESTART, + PPMD8_RESTORE_METHOD_CUT_OFF + #ifdef PPMD8_FREEZE_SUPPORT + , PPMD8_RESTORE_METHOD_FREEZE + #endif + , PPMD8_RESTORE_METHOD_UNSUPPPORTED +}; + + + + + + + + +typedef struct +{ + CPpmd8_Context *MinContext, *MaxContext; + CPpmd_State *FoundState; + unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, RestoreMethod; + Int32 RunLength, InitRL; /* must be 32-bit at least */ + + UInt32 Size; + UInt32 GlueCount; + UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; + + UInt32 Range; + UInt32 Code; + UInt32 Low; + union + { + IByteInPtr In; + IByteOutPtr Out; + } Stream; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment + Byte Units2Indx[128]; + CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; + UInt32 Stamps[PPMD_NUM_INDEXES]; + Byte NS2BSIndx[256], NS2Indx[260]; + Byte ExpEscape[16]; + CPpmd_See DummySee, See[24][32]; + UInt16 BinSumm[25][64]; + +} CPpmd8; + + +void Ppmd8_Construct(CPpmd8 *p); +BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc); +void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc); +void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod); +#define Ppmd8_WasAllocated(p) ((p)->Base != NULL) + + +/* ---------- Internal Functions ---------- */ + +#define Ppmd8_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd8_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd8_Context) +#define Ppmd8_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) + +void Ppmd8_Update1(CPpmd8 *p); +void Ppmd8_Update1_0(CPpmd8 *p); +void Ppmd8_Update2(CPpmd8 *p); + + + + + + +#define Ppmd8_GetBinSumm(p) \ + &p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \ + + p->MinContext->Flags ] + + +CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale); + + +/* 20.01: the original PPMdI encoder and decoder probably could work incorrectly in some rare cases, + where the original PPMdI code can give "Divide by Zero" operation. + We use the following fix to allow correct working of encoder and decoder in any cases. + We correct (Escape_Freq) and (_sum_), if (_sum_) is larger than p->Range) */ +#define PPMD8_CORRECT_SUM_RANGE(p, _sum_) if (_sum_ > p->Range /* /1 */) _sum_ = p->Range; + + +/* ---------- Decode ---------- */ + +#define PPMD8_SYM_END (-1) +#define PPMD8_SYM_ERROR (-2) + +/* +You must set (CPpmd8::Stream.In) before Ppmd8_RangeDec_Init() + +Ppmd8_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD8_SYM_END : End of payload marker + -2 : PPMD8_SYM_ERROR : Data error +*/ + + +BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p); +#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd8_DecodeSymbol(CPpmd8 *p); + + + + + + + + +/* ---------- Encode ---------- */ + +#define Ppmd8_Init_RangeEnc(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; } +void Ppmd8_Flush_RangeEnc(CPpmd8 *p); +void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol); + + +EXTERN_C_END + +#endif diff --git a/3rdparty/7z/src/Ppmd8Dec.c b/3rdparty/7z/src/Ppmd8Dec.c new file mode 100644 index 0000000000..72d3626ea1 --- /dev/null +++ b/3rdparty/7z/src/Ppmd8Dec.c @@ -0,0 +1,295 @@ +/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#include "Precomp.h" + +#include "Ppmd8.h" + +#define kTop ((UInt32)1 << 24) +#define kBot ((UInt32)1 << 15) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream.In) + +BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + p->Low = 0; + + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM(p) \ + while ((p->Low ^ (p->Low + p->Range)) < kTop \ + || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \ + p->Code = (p->Code << 8) | READ_BYTE(p); \ + p->Range <<= 8; p->Low <<= 8; } + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R p + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd8_RD_Decode(CPpmd8 *p, UInt32 start, UInt32 size) +{ + start *= R->Range; + R->Low += start; + R->Code -= start; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd8_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref)) +// typedef CPpmd8_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd8_UpdateModel(CPpmd8 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] + + +int Ppmd8_DecodeSymbol(CPpmd8 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 0) + { + CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + UInt32 summFreq = p->MinContext->Union2.SummFreq; + + PPMD8_CORRECT_SUM_RANGE(p, summFreq) + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = p->MinContext->NumStats; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD8_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd8Context_OneState(p->MinContext); + UInt16 *prob = Ppmd8_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM(R) + + + + // sym = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol; + // Ppmd8_UpdateBin(p); + { + unsigned freq = s->Freq; + CPpmd8_Context *c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 196)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart) + p->MaxContext = p->MinContext = c; + else + Ppmd8_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(rc2, size0); + R->Low += size0; + R->Code -= size0; + R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + UInt32 freqSum2; + CPpmd_See *see; + CPpmd8_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD8_SYM_END; + mc = Ppmd8_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd8_GetStats(p, mc); + + { + unsigned num = (unsigned)mc->NumStats + 1; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + freqSum2 = freqSum; + PPMD8_CORRECT_SUM_RANGE(R, freqSum2) + + + count = RC_GetThreshold(freqSum2); + + if (count < hiCnt) + { + Byte sym; + // Ppmd_See_UPDATE(see) // new (see->Summ) value can overflow over 16-bits in some rare cases + s = Ppmd8_GetStats(p, p->MinContext); + hiCnt = count; + + + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update2(p); + return sym; + } + + if (count >= freqSum2) + return PPMD8_SYM_ERROR; + + RC_Decode(hiCnt, freqSum2 - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd8_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats + 1; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +#undef kTop +#undef kBot +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/3rdparty/7z/src/Ppmd8Enc.c b/3rdparty/7z/src/Ppmd8Enc.c new file mode 100644 index 0000000000..9e29ef710e --- /dev/null +++ b/3rdparty/7z/src/Ppmd8Enc.c @@ -0,0 +1,338 @@ +/* Ppmd8Enc.c -- Ppmd8 (PPMdI) Encoder +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#include "Precomp.h" + +#include "Ppmd8.h" + +#define kTop ((UInt32)1 << 24) +#define kBot ((UInt32)1 << 15) + +#define WRITE_BYTE(p) IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24)) + +void Ppmd8_Flush_RangeEnc(CPpmd8 *p) +{ + unsigned i; + for (i = 0; i < 4; i++, p->Low <<= 8 ) + WRITE_BYTE(p); +} + + + + + + +#define RC_NORM(p) \ + while ((p->Low ^ (p->Low + p->Range)) < kTop \ + || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) \ + { WRITE_BYTE(p); p->Range <<= 8; p->Low <<= 8; } + + + + + + + + + + + + + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +// #define RC_PRE(total) p->Range /= total; +// #define RC_PRE(total) + +#define R p + + + + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd8_RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total) +{ + R->Low += start * (R->Range /= total); + R->Range *= size; + RC_NORM_LOCAL(R) +} + + + + + + + + + + +#define RC_Encode(start, size, total) Ppmd8_RangeEnc_Encode(p, start, size, total); +#define RC_EncodeFinal(start, size, total) RC_Encode(start, size, total) RC_NORM_REMOTE(p) + +#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref)) + +// typedef CPpmd8_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) + +void Ppmd8_UpdateModel(CPpmd8 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] + +// Z7_FORCE_INLINE +// static +void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 0) + { + CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext); + UInt32 sum; + unsigned i; + UInt32 summFreq = p->MinContext->Union2.SummFreq; + + PPMD8_CORRECT_SUM_RANGE(p, summFreq) + + // RC_PRE(summFreq); + + if (s->Symbol == symbol) + { + + RC_EncodeFinal(0, s->Freq, summFreq) + p->FoundState = s; + Ppmd8_Update1_0(p); + return; + } + p->PrevSuccess = 0; + sum = s->Freq; + i = p->MinContext->NumStats; + do + { + if ((++s)->Symbol == symbol) + { + + RC_EncodeFinal(sum, s->Freq, summFreq) + p->FoundState = s; + Ppmd8_Update1(p); + return; + } + sum += s->Freq; + } + while (--i); + + + RC_Encode(sum, summFreq - sum, summFreq) + + + PPMD_SetAllBitsIn256Bytes(charMask) + // MASK(s->Symbol) = 0; + // i = p->MinContext->NumStats; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + UInt16 *prob = Ppmd8_GetBinSumm(p); + CPpmd_State *s = Ppmd8Context_OneState(p->MinContext); + UInt32 pr = *prob; + const UInt32 bound = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + if (s->Symbol == symbol) + { + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + // RangeEnc_EncodeBit_0(p, bound); + R->Range = bound; + RC_NORM(R) + + // p->FoundState = s; + // Ppmd8_UpdateBin(p); + { + const unsigned freq = s->Freq; + CPpmd8_Context *c = CTX(SUCCESSOR(s)); + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196) + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart) + p->MaxContext = p->MinContext = c; + else + Ppmd8_UpdateModel(p); + } + return; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + // RangeEnc_EncodeBit_1(p, bound); + R->Low += bound; + R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - bound; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(s->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_See *see; + CPpmd_State *s; + UInt32 sum, escFreq; + CPpmd8_Context *mc; + unsigned i, numMasked; + + RC_NORM_REMOTE(p) + + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return; /* EndMarker (symbol = -1) */ + mc = Ppmd8_GetContext(p, mc->Suffix); + + } + while (mc->NumStats == numMasked); + + p->MinContext = mc; + + see = Ppmd8_MakeEscFreq(p, numMasked, &escFreq); + + + + + + + + + + + + + + + + + + + + + + + + + s = Ppmd8_GetStats(p, p->MinContext); + sum = 0; + i = (unsigned)p->MinContext->NumStats + 1; + + do + { + const unsigned cur = s->Symbol; + if ((int)cur == symbol) + { + const UInt32 low = sum; + const UInt32 freq = s->Freq; + unsigned num2; + + Ppmd_See_UPDATE(see) + p->FoundState = s; + sum += escFreq; + + num2 = i / 2; + i &= 1; + sum += freq & (0 - (UInt32)i); + if (num2 != 0) + { + s += i; + for (;;) + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + sum += (s[-2].Freq & (unsigned)(MASK(sym0))); + sum += (s[-1].Freq & (unsigned)(MASK(sym1))); + if (--num2 == 0) + break; + } + } + + PPMD8_CORRECT_SUM_RANGE(p, sum) + + RC_EncodeFinal(low, freq, sum) + Ppmd8_Update2(p); + return; + } + sum += (s->Freq & (unsigned)(MASK(cur))); + s++; + } + while (--i); + + { + UInt32 total = sum + escFreq; + see->Summ = (UInt16)(see->Summ + total); + PPMD8_CORRECT_SUM_RANGE(p, total) + + RC_Encode(sum, total - sum, total) + } + + { + const CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext); + s--; + MASK(s->Symbol) = 0; + do + { + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } +} + + + + + + + + + +#undef kTop +#undef kBot +#undef WRITE_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Encode +#undef RC_EncodeFinal + +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/3rdparty/7z/src/Precomp.h b/3rdparty/7z/src/Precomp.h index edb5814439..dc476aa69e 100644 --- a/3rdparty/7z/src/Precomp.h +++ b/3rdparty/7z/src/Precomp.h @@ -1,8 +1,8 @@ /* Precomp.h -- StdAfx -2013-11-12 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_PRECOMP_H -#define __7Z_PRECOMP_H +#ifndef ZIP7_INC_PRECOMP_H +#define ZIP7_INC_PRECOMP_H #include "Compiler.h" /* #include "7zTypes.h" */ diff --git a/3rdparty/7z/src/RotateDefs.h b/3rdparty/7z/src/RotateDefs.h index 6c790e791e..8026cd1066 100644 --- a/3rdparty/7z/src/RotateDefs.h +++ b/3rdparty/7z/src/RotateDefs.h @@ -1,14 +1,14 @@ /* RotateDefs.h -- Rotate functions -2015-03-25 : Igor Pavlov : Public domain */ +2023-06-18 : Igor Pavlov : Public domain */ -#ifndef __ROTATE_DEFS_H -#define __ROTATE_DEFS_H +#ifndef ZIP7_INC_ROTATE_DEFS_H +#define ZIP7_INC_ROTATE_DEFS_H #ifdef _MSC_VER #include -/* don't use _rotl with MINGW. It can insert slow call to function. */ +/* don't use _rotl with old MINGW. It can insert slow call to function. */ /* #if (_MSC_VER >= 1200) */ #pragma intrinsic(_rotl) @@ -18,12 +18,32 @@ #define rotlFixed(x, n) _rotl((x), (n)) #define rotrFixed(x, n) _rotr((x), (n)) +#if (_MSC_VER >= 1300) +#define Z7_ROTL64(x, n) _rotl64((x), (n)) +#define Z7_ROTR64(x, n) _rotr64((x), (n)) +#else +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + #else /* new compilers can translate these macros to fast commands. */ +#if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5) +/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63))) +#else +/* for old GCC / clang: */ #define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) #define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif #endif diff --git a/3rdparty/7z/src/Sha1.c b/3rdparty/7z/src/Sha1.c new file mode 100644 index 0000000000..fd6c018c95 --- /dev/null +++ b/3rdparty/7z/src/Sha1.c @@ -0,0 +1,498 @@ +/* Sha1.c -- SHA-1 Hash +2023-04-02 : Igor Pavlov : Public domain +This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */ + +#include "Precomp.h" + +#include + +#include "CpuArch.h" +#include "RotateDefs.h" +#include "Sha1.h" + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +// #define USE_MY_MM +#endif + +#ifdef MY_CPU_X86_OR_AMD64 + #ifdef _MSC_VER + #if _MSC_VER >= 1200 + #define Z7_COMPILER_SHA1_SUPPORTED + #endif + #elif defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define Z7_COMPILER_SHA1_SUPPORTED + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 8) // fix that check + #define Z7_COMPILER_SHA1_SUPPORTED + #endif + #elif defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1800) // fix that check + #define Z7_COMPILER_SHA1_SUPPORTED + #endif + #endif +#elif defined(MY_CPU_ARM_OR_ARM64) + #ifdef _MSC_VER + #if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037 + #define Z7_COMPILER_SHA1_SUPPORTED + #endif + #elif defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define Z7_COMPILER_SHA1_SUPPORTED + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define Z7_COMPILER_SHA1_SUPPORTED + #endif + #endif +#endif + +void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); + +#ifdef Z7_COMPILER_SHA1_SUPPORTED + void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); + + static SHA1_FUNC_UPDATE_BLOCKS g_SHA1_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks; + static SHA1_FUNC_UPDATE_BLOCKS g_SHA1_FUNC_UPDATE_BLOCKS_HW; + + #define SHA1_UPDATE_BLOCKS(p) p->func_UpdateBlocks +#else + #define SHA1_UPDATE_BLOCKS(p) Sha1_UpdateBlocks +#endif + + +BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo) +{ + SHA1_FUNC_UPDATE_BLOCKS func = Sha1_UpdateBlocks; + + #ifdef Z7_COMPILER_SHA1_SUPPORTED + if (algo != SHA1_ALGO_SW) + { + if (algo == SHA1_ALGO_DEFAULT) + func = g_SHA1_FUNC_UPDATE_BLOCKS; + else + { + if (algo != SHA1_ALGO_HW) + return False; + func = g_SHA1_FUNC_UPDATE_BLOCKS_HW; + if (!func) + return False; + } + } + #else + if (algo > 1) + return False; + #endif + + p->func_UpdateBlocks = func; + return True; +} + + +/* define it for speed optimization */ +// #define Z7_SHA1_UNROLL + +// allowed unroll steps: (1, 2, 4, 5, 20) + +#undef Z7_SHA1_BIG_W +#ifdef Z7_SHA1_UNROLL + #define STEP_PRE 20 + #define STEP_MAIN 20 +#else + #define Z7_SHA1_BIG_W + #define STEP_PRE 5 + #define STEP_MAIN 5 +#endif + + +#ifdef Z7_SHA1_BIG_W + #define kNumW 80 + #define w(i) W[i] +#else + #define kNumW 16 + #define w(i) W[(i)&15] +#endif + +#define w0(i) (W[i] = GetBe32(data + (size_t)(i) * 4)) +#define w1(i) (w(i) = rotlFixed(w((size_t)(i)-3) ^ w((size_t)(i)-8) ^ w((size_t)(i)-14) ^ w((size_t)(i)-16), 1)) + +#define f0(x,y,z) ( 0x5a827999 + (z^(x&(y^z))) ) +#define f1(x,y,z) ( 0x6ed9eba1 + (x^y^z) ) +#define f2(x,y,z) ( 0x8f1bbcdc + ((x&y)|(z&(x|y))) ) +#define f3(x,y,z) ( 0xca62c1d6 + (x^y^z) ) + +/* +#define T1(fx, ww) \ + tmp = e + fx(b,c,d) + ww + rotlFixed(a, 5); \ + e = d; \ + d = c; \ + c = rotlFixed(b, 30); \ + b = a; \ + a = tmp; \ +*/ + +#define T5(a,b,c,d,e, fx, ww) \ + e += fx(b,c,d) + ww + rotlFixed(a, 5); \ + b = rotlFixed(b, 30); \ + + +/* +#define R1(i, fx, wx) \ + T1 ( fx, wx(i)); \ + +#define R2(i, fx, wx) \ + R1 ( (i) , fx, wx); \ + R1 ( (i) + 1, fx, wx); \ + +#define R4(i, fx, wx) \ + R2 ( (i) , fx, wx); \ + R2 ( (i) + 2, fx, wx); \ +*/ + +#define M5(i, fx, wx0, wx1) \ + T5 ( a,b,c,d,e, fx, wx0((i) ) ) \ + T5 ( e,a,b,c,d, fx, wx1((i)+1) ) \ + T5 ( d,e,a,b,c, fx, wx1((i)+2) ) \ + T5 ( c,d,e,a,b, fx, wx1((i)+3) ) \ + T5 ( b,c,d,e,a, fx, wx1((i)+4) ) \ + +#define R5(i, fx, wx) \ + M5 ( i, fx, wx, wx) \ + + +#if STEP_PRE > 5 + + #define R20_START \ + R5 ( 0, f0, w0) \ + R5 ( 5, f0, w0) \ + R5 ( 10, f0, w0) \ + M5 ( 15, f0, w0, w1) \ + + #elif STEP_PRE == 5 + + #define R20_START \ + { size_t i; for (i = 0; i < 15; i += STEP_PRE) \ + { R5(i, f0, w0) } } \ + M5 ( 15, f0, w0, w1) \ + +#else + + #if STEP_PRE == 1 + #define R_PRE R1 + #elif STEP_PRE == 2 + #define R_PRE R2 + #elif STEP_PRE == 4 + #define R_PRE R4 + #endif + + #define R20_START \ + { size_t i; for (i = 0; i < 16; i += STEP_PRE) \ + { R_PRE(i, f0, w0) } } \ + R4 ( 16, f0, w1) \ + +#endif + + + +#if STEP_MAIN > 5 + + #define R20(ii, fx) \ + R5 ( (ii) , fx, w1) \ + R5 ( (ii) + 5 , fx, w1) \ + R5 ( (ii) + 10, fx, w1) \ + R5 ( (ii) + 15, fx, w1) \ + +#else + + #if STEP_MAIN == 1 + #define R_MAIN R1 + #elif STEP_MAIN == 2 + #define R_MAIN R2 + #elif STEP_MAIN == 4 + #define R_MAIN R4 + #elif STEP_MAIN == 5 + #define R_MAIN R5 + #endif + + #define R20(ii, fx) \ + { size_t i; for (i = (ii); i < (ii) + 20; i += STEP_MAIN) \ + { R_MAIN(i, fx, w1) } } \ + +#endif + + + +void Sha1_InitState(CSha1 *p) +{ + p->count = 0; + p->state[0] = 0x67452301; + p->state[1] = 0xEFCDAB89; + p->state[2] = 0x98BADCFE; + p->state[3] = 0x10325476; + p->state[4] = 0xC3D2E1F0; +} + +void Sha1_Init(CSha1 *p) +{ + p->func_UpdateBlocks = + #ifdef Z7_COMPILER_SHA1_SUPPORTED + g_SHA1_FUNC_UPDATE_BLOCKS; + #else + NULL; + #endif + Sha1_InitState(p); +} + + +Z7_NO_INLINE +void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks) +{ + UInt32 a, b, c, d, e; + UInt32 W[kNumW]; + // if (numBlocks != 0x1264378347) return; + if (numBlocks == 0) + return; + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + + do + { + #if STEP_PRE < 5 || STEP_MAIN < 5 + UInt32 tmp; + #endif + + R20_START + R20(20, f1) + R20(40, f2) + R20(60, f3) + + a += state[0]; + b += state[1]; + c += state[2]; + d += state[3]; + e += state[4]; + + state[0] = a; + state[1] = b; + state[2] = c; + state[3] = d; + state[4] = e; + + data += 64; + } + while (--numBlocks); +} + + +#define Sha1_UpdateBlock(p) SHA1_UPDATE_BLOCKS(p)(p->state, p->buffer, 1) + +void Sha1_Update(CSha1 *p, const Byte *data, size_t size) +{ + if (size == 0) + return; + + { + unsigned pos = (unsigned)p->count & 0x3F; + unsigned num; + + p->count += size; + + num = 64 - pos; + if (num > size) + { + memcpy(p->buffer + pos, data, size); + return; + } + + if (pos != 0) + { + size -= num; + memcpy(p->buffer + pos, data, num); + data += num; + Sha1_UpdateBlock(p); + } + } + { + size_t numBlocks = size >> 6; + SHA1_UPDATE_BLOCKS(p)(p->state, data, numBlocks); + size &= 0x3F; + if (size == 0) + return; + data += (numBlocks << 6); + memcpy(p->buffer, data, size); + } +} + + +void Sha1_Final(CSha1 *p, Byte *digest) +{ + unsigned pos = (unsigned)p->count & 0x3F; + + + p->buffer[pos++] = 0x80; + + if (pos > (64 - 8)) + { + while (pos != 64) { p->buffer[pos++] = 0; } + // memset(&p->buf.buffer[pos], 0, 64 - pos); + Sha1_UpdateBlock(p); + pos = 0; + } + + /* + if (pos & 3) + { + p->buffer[pos] = 0; + p->buffer[pos + 1] = 0; + p->buffer[pos + 2] = 0; + pos += 3; + pos &= ~3; + } + { + for (; pos < 64 - 8; pos += 4) + *(UInt32 *)(&p->buffer[pos]) = 0; + } + */ + + memset(&p->buffer[pos], 0, (64 - 8) - pos); + + { + const UInt64 numBits = (p->count << 3); + SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32)) + SetBe32(p->buffer + 64 - 4, (UInt32)(numBits)) + } + + Sha1_UpdateBlock(p); + + SetBe32(digest, p->state[0]) + SetBe32(digest + 4, p->state[1]) + SetBe32(digest + 8, p->state[2]) + SetBe32(digest + 12, p->state[3]) + SetBe32(digest + 16, p->state[4]) + + + + + Sha1_InitState(p); +} + + +void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size) +{ + const UInt64 numBits = (p->count + size) << 3; + SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32)) + SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits)) + // SetBe32((UInt32 *)(block + size), 0x80000000); + SetUi32((UInt32 *)(void *)(block + size), 0x80) + size += 4; + while (size != (SHA1_NUM_BLOCK_WORDS - 2) * 4) + { + *((UInt32 *)(void *)(block + size)) = 0; + size += 4; + } +} + +void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest) +{ + MY_ALIGN (16) + UInt32 st[SHA1_NUM_DIGEST_WORDS]; + + st[0] = p->state[0]; + st[1] = p->state[1]; + st[2] = p->state[2]; + st[3] = p->state[3]; + st[4] = p->state[4]; + + SHA1_UPDATE_BLOCKS(p)(st, data, 1); + + SetBe32(destDigest + 0 , st[0]) + SetBe32(destDigest + 1 * 4, st[1]) + SetBe32(destDigest + 2 * 4, st[2]) + SetBe32(destDigest + 3 * 4, st[3]) + SetBe32(destDigest + 4 * 4, st[4]) +} + + +void Sha1Prepare(void) +{ + #ifdef Z7_COMPILER_SHA1_SUPPORTED + SHA1_FUNC_UPDATE_BLOCKS f, f_hw; + f = Sha1_UpdateBlocks; + f_hw = NULL; + #ifdef MY_CPU_X86_OR_AMD64 + #ifndef USE_MY_MM + if (CPU_IsSupported_SHA() + && CPU_IsSupported_SSSE3() + // && CPU_IsSupported_SSE41() + ) + #endif + #else + if (CPU_IsSupported_SHA1()) + #endif + { + // printf("\n========== HW SHA1 ======== \n"); + #if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER) + /* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037). + It generated incorrect SHA-1 code. + 21.03 : we test sha1-hardware code at runtime initialization */ + + #pragma message("== SHA1 code: MSC compiler : failure-check code was inserted") + + UInt32 state[5] = { 0, 1, 2, 3, 4 } ; + Byte data[64]; + unsigned i; + for (i = 0; i < sizeof(data); i += 2) + { + data[i ] = (Byte)(i); + data[i + 1] = (Byte)(i + 1); + } + + Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64); + + if ( state[0] != 0x9acd7297 + || state[1] != 0x4624d898 + || state[2] != 0x0bf079f0 + || state[3] != 0x031e61b3 + || state[4] != 0x8323fe20) + { + // printf("\n========== SHA-1 hardware version failure ======== \n"); + } + else + #endif + { + f = f_hw = Sha1_UpdateBlocks_HW; + } + } + g_SHA1_FUNC_UPDATE_BLOCKS = f; + g_SHA1_FUNC_UPDATE_BLOCKS_HW = f_hw; + #endif +} + +#undef kNumW +#undef w +#undef w0 +#undef w1 +#undef f0 +#undef f1 +#undef f2 +#undef f3 +#undef T1 +#undef T5 +#undef M5 +#undef R1 +#undef R2 +#undef R4 +#undef R5 +#undef R20_START +#undef R_PRE +#undef R_MAIN +#undef STEP_PRE +#undef STEP_MAIN +#undef Z7_SHA1_BIG_W +#undef Z7_SHA1_UNROLL +#undef Z7_COMPILER_SHA1_SUPPORTED diff --git a/3rdparty/7z/src/Sha1.h b/3rdparty/7z/src/Sha1.h new file mode 100644 index 0000000000..fecd9d3107 --- /dev/null +++ b/3rdparty/7z/src/Sha1.h @@ -0,0 +1,76 @@ +/* Sha1.h -- SHA-1 Hash +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_SHA1_H +#define ZIP7_INC_SHA1_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define SHA1_NUM_BLOCK_WORDS 16 +#define SHA1_NUM_DIGEST_WORDS 5 + +#define SHA1_BLOCK_SIZE (SHA1_NUM_BLOCK_WORDS * 4) +#define SHA1_DIGEST_SIZE (SHA1_NUM_DIGEST_WORDS * 4) + +typedef void (Z7_FASTCALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks); + +/* + if (the system supports different SHA1 code implementations) + { + (CSha1::func_UpdateBlocks) will be used + (CSha1::func_UpdateBlocks) can be set by + Sha1_Init() - to default (fastest) + Sha1_SetFunction() - to any algo + } + else + { + (CSha1::func_UpdateBlocks) is ignored. + } +*/ + +typedef struct +{ + SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks; + UInt64 count; + UInt64 _pad_2[2]; + UInt32 state[SHA1_NUM_DIGEST_WORDS]; + UInt32 _pad_3[3]; + Byte buffer[SHA1_BLOCK_SIZE]; +} CSha1; + + +#define SHA1_ALGO_DEFAULT 0 +#define SHA1_ALGO_SW 1 +#define SHA1_ALGO_HW 2 + +/* +Sha1_SetFunction() +return: + 0 - (algo) value is not supported, and func_UpdateBlocks was not changed + 1 - func_UpdateBlocks was set according (algo) value. +*/ + +BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo); + +void Sha1_InitState(CSha1 *p); +void Sha1_Init(CSha1 *p); +void Sha1_Update(CSha1 *p, const Byte *data, size_t size); +void Sha1_Final(CSha1 *p, Byte *digest); + +void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size); +void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest); + +// void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); + +/* +call Sha1Prepare() once at program start. +It prepares all supported implementations, and detects the fastest implementation. +*/ + +void Sha1Prepare(void); + +EXTERN_C_END + +#endif diff --git a/3rdparty/7z/src/Sha1Opt.c b/3rdparty/7z/src/Sha1Opt.c new file mode 100644 index 0000000000..27796aa47e --- /dev/null +++ b/3rdparty/7z/src/Sha1Opt.c @@ -0,0 +1,386 @@ +/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" +#include "Compiler.h" +#include "CpuArch.h" + +#if defined(_MSC_VER) +#if (_MSC_VER < 1900) && (_MSC_VER >= 1200) +// #define USE_MY_MM +#endif +#endif + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check + #define USE_HW_SHA + #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) + #define USE_HW_SHA + #if !defined(_INTEL_COMPILER) + // icc defines __GNUC__, but icc doesn't support __attribute__(__target__) + #if !defined(__SHA__) || !defined(__SSSE3__) + #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) + #endif + #endif + #elif defined(_MSC_VER) + #ifdef USE_MY_MM + #define USE_VER_MIN 1300 + #else + #define USE_VER_MIN 1900 + #endif + #if (_MSC_VER >= USE_VER_MIN) + #define USE_HW_SHA + #endif + #endif +// #endif // MY_CPU_X86_OR_AMD64 + +#ifdef USE_HW_SHA + +// #pragma message("Sha1 HW") + +// sse/sse2/ssse3: +#include +// sha*: +#include + +#if defined (__clang__) && defined(_MSC_VER) + // #if !defined(__SSSE3__) + // #endif + #if !defined(__SHA__) + #include + #endif +#else + +#ifdef USE_MY_MM +#include "My_mm.h" +#endif + +#endif + +/* +SHA1 uses: +SSE2: + _mm_loadu_si128 + _mm_storeu_si128 + _mm_set_epi32 + _mm_add_epi32 + _mm_shuffle_epi32 / pshufd + _mm_xor_si128 + _mm_cvtsi128_si32 + _mm_cvtsi32_si128 +SSSE3: + _mm_shuffle_epi8 / pshufb + +SHA: + _mm_sha1* +*/ + + +#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src); +#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask); +#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask); +#ifdef __clang__ +#define SHA1_RNDS4_RET_TYPE_CAST (__m128i) +#else +#define SHA1_RNDS4_RET_TYPE_CAST +#endif +#define SHA1_RND4(abcd, e0, f) abcd = SHA1_RNDS4_RET_TYPE_CAST _mm_sha1rnds4_epu32(abcd, e0, f); +#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m); +#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); +#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src); +#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src); + + +#define LOAD_SHUFFLE(m, k) \ + m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \ + SHUFFLE_EPI8(m, mask) \ + +#define SM1(m0, m1, m2, m3) \ + SHA1_MSG1(m0, m1) \ + +#define SM2(m0, m1, m2, m3) \ + XOR_SI128(m3, m1) \ + SHA1_MSG2(m3, m2) \ + +#define SM3(m0, m1, m2, m3) \ + XOR_SI128(m3, m1) \ + SM1(m0, m1, m2, m3) \ + SHA1_MSG2(m3, m2) \ + +#define NNN(m0, m1, m2, m3) + + + + + + + + + + + + + + + + + +#define R4(k, e0, e1, m0, m1, m2, m3, OP) \ + e1 = abcd; \ + SHA1_RND4(abcd, e0, (k) / 5) \ + SHA1_NEXTE(e1, m1) \ + OP(m0, m1, m2, m3) \ + +#define R16(k, mx, OP0, OP1, OP2, OP3) \ + R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \ + R4 ( (k)*4+1, e1,e0, m1,m2,m3,m0, OP1 ) \ + R4 ( (k)*4+2, e0,e1, m2,m3,m0,m1, OP2 ) \ + R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \ + +#define PREPARE_STATE \ + SHUFFLE_EPI32 (abcd, 0x1B) \ + SHUFFLE_EPI32 (e0, 0x1B) \ + + + + + +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); +#ifdef ATTRIB_SHA +ATTRIB_SHA +#endif +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) +{ + const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); + + __m128i abcd, e0; + + if (numBlocks == 0) + return; + + abcd = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); // dbca + e0 = _mm_cvtsi32_si128((int)state[4]); // 000e + + PREPARE_STATE + + do + { + __m128i abcd_save, e2; + __m128i m0, m1, m2, m3; + __m128i e1; + + + abcd_save = abcd; + e2 = e0; + + LOAD_SHUFFLE (m0, 0) + LOAD_SHUFFLE (m1, 1) + LOAD_SHUFFLE (m2, 2) + LOAD_SHUFFLE (m3, 3) + + ADD_EPI32(e0, m0) + + R16 ( 0, m0, SM1, SM3, SM3, SM3 ) + R16 ( 1, m0, SM3, SM3, SM3, SM3 ) + R16 ( 2, m0, SM3, SM3, SM3, SM3 ) + R16 ( 3, m0, SM3, SM3, SM3, SM3 ) + R16 ( 4, e2, SM2, NNN, NNN, NNN ) + + ADD_EPI32(abcd, abcd_save) + + data += 64; + } + while (--numBlocks); + + PREPARE_STATE + + _mm_storeu_si128((__m128i *) (void *) state, abcd); + *(state+4) = (UInt32)_mm_cvtsi128_si32(e0); +} + +#endif // USE_HW_SHA + +#elif defined(MY_CPU_ARM_OR_ARM64) + + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_SHA + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define USE_HW_SHA + #endif + #elif defined(_MSC_VER) + #if _MSC_VER >= 1910 + #define USE_HW_SHA + #endif + #endif + +#ifdef USE_HW_SHA + +// #pragma message("=== Sha1 HW === ") + +#if defined(__clang__) || defined(__GNUC__) + #ifdef MY_CPU_ARM64 + #define ATTRIB_SHA __attribute__((__target__("+crypto"))) + #else + #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif +#else + // _MSC_VER + // for arm32 + #define _ARM_USE_NEW_NEON_INTRINSICS +#endif + +#if defined(_MSC_VER) && defined(MY_CPU_ARM64) +#include +#else +#include +#endif + +typedef uint32x4_t v128; +// typedef __n128 v128; // MSVC + +#ifdef MY_CPU_BE + #define MY_rev32_for_LE(x) +#else + #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x))) +#endif + +#define LOAD_128(_p) (*(const v128 *)(const void *)(_p)) +#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v) + +#define LOAD_SHUFFLE(m, k) \ + m = LOAD_128((data + (k) * 16)); \ + MY_rev32_for_LE(m); \ + +#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3); +#define SU1(dest, src) dest = vsha1su1q_u32(dest, src); +#define C(e) abcd = vsha1cq_u32(abcd, e, t); +#define P(e) abcd = vsha1pq_u32(abcd, e, t); +#define M(e) abcd = vsha1mq_u32(abcd, e, t); +#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0)) +#define T(m, c) t = vaddq_u32(m, c) + +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +#ifdef ATTRIB_SHA +ATTRIB_SHA +#endif +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + v128 abcd; + v128 c0, c1, c2, c3; + uint32_t e0; + + if (numBlocks == 0) + return; + + c0 = vdupq_n_u32(0x5a827999); + c1 = vdupq_n_u32(0x6ed9eba1); + c2 = vdupq_n_u32(0x8f1bbcdc); + c3 = vdupq_n_u32(0xca62c1d6); + + abcd = LOAD_128(&state[0]); + e0 = state[4]; + + do + { + v128 abcd_save; + v128 m0, m1, m2, m3; + v128 t; + uint32_t e0_save, e1; + + abcd_save = abcd; + e0_save = e0; + + LOAD_SHUFFLE (m0, 0) + LOAD_SHUFFLE (m1, 1) + LOAD_SHUFFLE (m2, 2) + LOAD_SHUFFLE (m3, 3) + + T(m0, c0); H(e1); C(e0); + T(m1, c0); SU0(m0, m1, m2); H(e0); C(e1); + T(m2, c0); SU0(m1, m2, m3); SU1(m0, m3); H(e1); C(e0); + T(m3, c0); SU0(m2, m3, m0); SU1(m1, m0); H(e0); C(e1); + T(m0, c0); SU0(m3, m0, m1); SU1(m2, m1); H(e1); C(e0); + T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1); + T(m2, c1); SU0(m1, m2, m3); SU1(m0, m3); H(e1); P(e0); + T(m3, c1); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1); + T(m0, c1); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0); + T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1); + T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0); + T(m3, c2); SU0(m2, m3, m0); SU1(m1, m0); H(e0); M(e1); + T(m0, c2); SU0(m3, m0, m1); SU1(m2, m1); H(e1); M(e0); + T(m1, c2); SU0(m0, m1, m2); SU1(m3, m2); H(e0); M(e1); + T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0); + T(m3, c3); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1); + T(m0, c3); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0); + T(m1, c3); SU1(m3, m2); H(e0); P(e1); + T(m2, c3); H(e1); P(e0); + T(m3, c3); H(e0); P(e1); + + abcd = vaddq_u32(abcd, abcd_save); + e0 += e0_save; + + data += 64; + } + while (--numBlocks); + + STORE_128(&state[0], abcd); + state[4] = e0; +} + +#endif // USE_HW_SHA + +#endif // MY_CPU_ARM_OR_ARM64 + + +#ifndef USE_HW_SHA + +// #error Stop_Compiling_UNSUPPORTED_SHA +// #include + +// #include "Sha1.h" +void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); + +#pragma message("Sha1 HW-SW stub was used") + +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) +{ + Sha1_UpdateBlocks(state, data, numBlocks); + /* + UNUSED_VAR(state); + UNUSED_VAR(data); + UNUSED_VAR(numBlocks); + exit(1); + return; + */ +} + +#endif + +#undef SU0 +#undef SU1 +#undef C +#undef P +#undef M +#undef H +#undef T +#undef MY_rev32_for_LE +#undef NNN +#undef LOAD_128 +#undef STORE_128 +#undef LOAD_SHUFFLE +#undef SM1 +#undef SM2 +#undef SM3 +#undef NNN +#undef R4 +#undef R16 +#undef PREPARE_STATE +#undef USE_HW_SHA +#undef ATTRIB_SHA +#undef USE_VER_MIN diff --git a/3rdparty/7z/src/Sha256.c b/3rdparty/7z/src/Sha256.c index c03b75afe6..538ccaa996 100644 --- a/3rdparty/7z/src/Sha256.c +++ b/3rdparty/7z/src/Sha256.c @@ -1,5 +1,5 @@ /* Sha256.c -- SHA-256 Hash -2021-04-01 : Igor Pavlov : Public domain +2023-04-02 : Igor Pavlov : Public domain This code is based on public domain code from Wei Dai's Crypto++ library. */ #include "Precomp.h" @@ -17,48 +17,48 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */ #ifdef MY_CPU_X86_OR_AMD64 #ifdef _MSC_VER #if _MSC_VER >= 1200 - #define _SHA_SUPPORTED + #define Z7_COMPILER_SHA256_SUPPORTED #endif #elif defined(__clang__) #if (__clang_major__ >= 8) // fix that check - #define _SHA_SUPPORTED + #define Z7_COMPILER_SHA256_SUPPORTED #endif #elif defined(__GNUC__) #if (__GNUC__ >= 8) // fix that check - #define _SHA_SUPPORTED + #define Z7_COMPILER_SHA256_SUPPORTED #endif #elif defined(__INTEL_COMPILER) #if (__INTEL_COMPILER >= 1800) // fix that check - #define _SHA_SUPPORTED + #define Z7_COMPILER_SHA256_SUPPORTED #endif #endif #elif defined(MY_CPU_ARM_OR_ARM64) #ifdef _MSC_VER #if _MSC_VER >= 1910 - #define _SHA_SUPPORTED + #define Z7_COMPILER_SHA256_SUPPORTED #endif #elif defined(__clang__) #if (__clang_major__ >= 8) // fix that check - #define _SHA_SUPPORTED + #define Z7_COMPILER_SHA256_SUPPORTED #endif #elif defined(__GNUC__) #if (__GNUC__ >= 6) // fix that check - #define _SHA_SUPPORTED + #define Z7_COMPILER_SHA256_SUPPORTED #endif #endif #endif -void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); -#ifdef _SHA_SUPPORTED - void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +#ifdef Z7_COMPILER_SHA256_SUPPORTED + void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); - static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks; - static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW; + static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks; + static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW; - #define UPDATE_BLOCKS(p) p->func_UpdateBlocks + #define SHA256_UPDATE_BLOCKS(p) p->func_UpdateBlocks #else - #define UPDATE_BLOCKS(p) Sha256_UpdateBlocks + #define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks #endif @@ -66,16 +66,16 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo) { SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks; - #ifdef _SHA_SUPPORTED + #ifdef Z7_COMPILER_SHA256_SUPPORTED if (algo != SHA256_ALGO_SW) { if (algo == SHA256_ALGO_DEFAULT) - func = g_FUNC_UPDATE_BLOCKS; + func = g_SHA256_FUNC_UPDATE_BLOCKS; else { if (algo != SHA256_ALGO_HW) return False; - func = g_FUNC_UPDATE_BLOCKS_HW; + func = g_SHA256_FUNC_UPDATE_BLOCKS_HW; if (!func) return False; } @@ -92,17 +92,18 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo) /* define it for speed optimization */ -#ifdef _SFX +#ifdef Z7_SFX #define STEP_PRE 1 #define STEP_MAIN 1 #else #define STEP_PRE 2 #define STEP_MAIN 4 - // #define _SHA256_UNROLL + // #define Z7_SHA256_UNROLL #endif +#undef Z7_SHA256_BIG_W #if STEP_MAIN != 16 - #define _SHA256_BIG_W + #define Z7_SHA256_BIG_W #endif @@ -124,8 +125,8 @@ void Sha256_InitState(CSha256 *p) void Sha256_Init(CSha256 *p) { p->func_UpdateBlocks = - #ifdef _SHA_SUPPORTED - g_FUNC_UPDATE_BLOCKS; + #ifdef Z7_COMPILER_SHA256_SUPPORTED + g_SHA256_FUNC_UPDATE_BLOCKS; #else NULL; #endif @@ -145,7 +146,7 @@ void Sha256_Init(CSha256 *p) #define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15)) -#ifdef _SHA256_BIG_W +#ifdef Z7_SHA256_BIG_W // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned. #define w(j, i) W[(size_t)(j) + i] #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i)) @@ -176,7 +177,7 @@ void Sha256_Init(CSha256 *p) #define R1_PRE(i) T1( W_PRE, i) #define R1_MAIN(i) T1( W_MAIN, i) -#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4) +#if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4) #define R2_MAIN(i) \ R1_MAIN(i) \ R1_MAIN(i + 1) \ @@ -185,7 +186,7 @@ void Sha256_Init(CSha256 *p) -#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8 +#if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8 #define T4( a,b,c,d,e,f,g,h, wx, i) \ h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ @@ -223,7 +224,7 @@ void Sha256_Init(CSha256 *p) #endif -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); // static extern MY_ALIGN(64) @@ -252,11 +253,11 @@ const UInt32 SHA256_K_ARRAY[64] = { #define K SHA256_K_ARRAY -MY_NO_INLINE -void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks) +Z7_NO_INLINE +void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks) { UInt32 W - #ifdef _SHA256_BIG_W + #ifdef Z7_SHA256_BIG_W [64]; #else [16]; @@ -266,7 +267,7 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t UInt32 a,b,c,d,e,f,g,h; - #if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4) + #if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4) UInt32 tmp; #endif @@ -297,12 +298,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t #else - R1_PRE(0); + R1_PRE(0) #if STEP_PRE >= 2 - R1_PRE(1); + R1_PRE(1) #if STEP_PRE >= 4 - R1_PRE(2); - R1_PRE(3); + R1_PRE(2) + R1_PRE(3) #endif #endif @@ -311,32 +312,32 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t for (j = 16; j < 64; j += STEP_MAIN) { - #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8 + #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8 #if STEP_MAIN < 8 - R4_MAIN(0); + R4_MAIN(0) #else - R8_MAIN(0); + R8_MAIN(0) #if STEP_MAIN == 16 - R8_MAIN(8); + R8_MAIN(8) #endif #endif #else - R1_MAIN(0); + R1_MAIN(0) #if STEP_MAIN >= 2 - R1_MAIN(1); + R1_MAIN(1) #if STEP_MAIN >= 4 - R2_MAIN(2); + R2_MAIN(2) #if STEP_MAIN >= 8 - R2_MAIN(4); - R2_MAIN(6); + R2_MAIN(4) + R2_MAIN(6) #if STEP_MAIN >= 16 - R2_MAIN(8); - R2_MAIN(10); - R2_MAIN(12); - R2_MAIN(14); + R2_MAIN(8) + R2_MAIN(10) + R2_MAIN(12) + R2_MAIN(14) #endif #endif #endif @@ -367,7 +368,7 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t #undef s1 #undef K -#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1) +#define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1) void Sha256_Update(CSha256 *p, const Byte *data, size_t size) { @@ -397,7 +398,7 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size) } { size_t numBlocks = size >> 6; - UPDATE_BLOCKS(p)(p->state, data, numBlocks); + SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks); size &= 0x3F; if (size == 0) return; @@ -441,8 +442,8 @@ void Sha256_Final(CSha256 *p, Byte *digest) { UInt64 numBits = (p->count << 3); - SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32)); - SetBe32(p->buffer + 64 - 4, (UInt32)(numBits)); + SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32)) + SetBe32(p->buffer + 64 - 4, (UInt32)(numBits)) } Sha256_UpdateBlock(p); @@ -451,8 +452,8 @@ void Sha256_Final(CSha256 *p, Byte *digest) { UInt32 v0 = p->state[i]; UInt32 v1 = p->state[(size_t)i + 1]; - SetBe32(digest , v0); - SetBe32(digest + 4, v1); + SetBe32(digest , v0) + SetBe32(digest + 4, v1) digest += 8; } @@ -460,9 +461,9 @@ void Sha256_Final(CSha256 *p, Byte *digest) } -void Sha256Prepare() +void Sha256Prepare(void) { - #ifdef _SHA_SUPPORTED + #ifdef Z7_COMPILER_SHA256_SUPPORTED SHA256_FUNC_UPDATE_BLOCKS f, f_hw; f = Sha256_UpdateBlocks; f_hw = NULL; @@ -480,7 +481,36 @@ void Sha256Prepare() // printf("\n========== HW SHA256 ======== \n"); f = f_hw = Sha256_UpdateBlocks_HW; } - g_FUNC_UPDATE_BLOCKS = f; - g_FUNC_UPDATE_BLOCKS_HW = f_hw; + g_SHA256_FUNC_UPDATE_BLOCKS = f; + g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw; #endif } + +#undef S0 +#undef S1 +#undef s0 +#undef s1 +#undef Ch +#undef Maj +#undef W_MAIN +#undef W_PRE +#undef w +#undef blk2_main +#undef blk2 +#undef T1 +#undef T4 +#undef T8 +#undef R1_PRE +#undef R1_MAIN +#undef R2_MAIN +#undef R4 +#undef R4_PRE +#undef R4_MAIN +#undef R8 +#undef R8_PRE +#undef R8_MAIN +#undef STEP_PRE +#undef STEP_MAIN +#undef Z7_SHA256_BIG_W +#undef Z7_SHA256_UNROLL +#undef Z7_COMPILER_SHA256_SUPPORTED diff --git a/3rdparty/7z/src/Sha256.h b/3rdparty/7z/src/Sha256.h index f529339865..af8c9bc8a8 100644 --- a/3rdparty/7z/src/Sha256.h +++ b/3rdparty/7z/src/Sha256.h @@ -1,8 +1,8 @@ /* Sha256.h -- SHA-256 Hash -2021-01-01 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_SHA256_H -#define __7Z_SHA256_H +#ifndef ZIP7_INC_SHA256_H +#define ZIP7_INC_SHA256_H #include "7zTypes.h" @@ -14,7 +14,7 @@ EXTERN_C_BEGIN #define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4) #define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4) -typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks); +typedef void (Z7_FASTCALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks); /* if (the system supports different SHA256 code implementations) @@ -34,7 +34,7 @@ typedef struct { SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks; UInt64 count; - UInt64 __pad_2[2]; + UInt64 _pad_2[2]; UInt32 state[SHA256_NUM_DIGEST_WORDS]; Byte buffer[SHA256_BLOCK_SIZE]; @@ -62,7 +62,7 @@ void Sha256_Final(CSha256 *p, Byte *digest); -// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); +// void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); /* call Sha256Prepare() once at program start. diff --git a/3rdparty/7z/src/Sha256Opt.c b/3rdparty/7z/src/Sha256Opt.c index cc8c53e1b6..4fccb336f1 100644 --- a/3rdparty/7z/src/Sha256Opt.c +++ b/3rdparty/7z/src/Sha256Opt.c @@ -1,7 +1,9 @@ /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" +#include "Compiler.h" +#include "CpuArch.h" #if defined(_MSC_VER) #if (_MSC_VER < 1900) && (_MSC_VER >= 1200) @@ -9,41 +11,26 @@ #endif #endif -#include "CpuArch.h" - #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__clang__) - #if (__clang_major__ >= 8) // fix that check + #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check #define USE_HW_SHA - #ifndef __SHA__ + #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) + #define USE_HW_SHA + #if !defined(_INTEL_COMPILER) + // icc defines __GNUC__, but icc doesn't support __attribute__(__target__) + #if !defined(__SHA__) || !defined(__SSSE3__) #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) - #if defined(_MSC_VER) - // SSSE3: for clang-cl: - #include - #define __SHA__ - #endif #endif - - #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 8) // fix that check - #define USE_HW_SHA - #ifndef __SHA__ - #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) - // #pragma GCC target("sha,ssse3") #endif - #endif - #elif defined(__INTEL_COMPILER) - #if (__INTEL_COMPILER >= 1800) // fix that check - #define USE_HW_SHA - #endif #elif defined(_MSC_VER) #ifdef USE_MY_MM #define USE_VER_MIN 1300 #else - #define USE_VER_MIN 1910 + #define USE_VER_MIN 1900 #endif - #if _MSC_VER >= USE_VER_MIN + #if (_MSC_VER >= USE_VER_MIN) #define USE_HW_SHA #endif #endif @@ -52,16 +39,19 @@ #ifdef USE_HW_SHA // #pragma message("Sha256 HW") -// #include -#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +// sse/sse2/ssse3: +#include +// sha*: #include -#else -#include -#if defined(_MSC_VER) && (_MSC_VER >= 1600) -// #include -#endif +#if defined (__clang__) && defined(_MSC_VER) + // #if !defined(__SSSE3__) + // #endif + #if !defined(__SHA__) + #include + #endif +#else #ifdef USE_MY_MM #include "My_mm.h" @@ -98,9 +88,9 @@ const UInt32 SHA256_K_ARRAY[64]; #define K SHA256_K_ARRAY -#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); -#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); -#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); +#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); +#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); +#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); #define LOAD_SHUFFLE(m, k) \ @@ -112,7 +102,7 @@ const UInt32 SHA256_K_ARRAY[64]; #define SM2(g0, g1, g2, g3) \ tmp = _mm_alignr_epi8(g1, g0, 4); \ - ADD_EPI32(g2, tmp); \ + ADD_EPI32(g2, tmp) \ SHA25G_MSG2(g2, g1); \ // #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k) @@ -138,16 +128,16 @@ const UInt32 SHA256_K_ARRAY[64]; // We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2 #define R4(k, g0, g1, g2, g3, OP0, OP1) \ - RND2_0(g0, k); \ - OP0(g0, g1, g2, g3); \ - RND2_1; \ - OP1(g0, g1, g2, g3); \ + RND2_0(g0, k) \ + OP0(g0, g1, g2, g3) \ + RND2_1 \ + OP1(g0, g1, g2, g3) \ #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ - R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ - R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ - R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ - R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ + R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \ + R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \ + R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \ + R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \ #define PREPARE_STATE \ tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \ @@ -157,11 +147,11 @@ const UInt32 SHA256_K_ARRAY[64]; state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \ -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); #ifdef ATTRIB_SHA ATTRIB_SHA #endif -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); __m128i tmp; @@ -192,13 +182,13 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size - R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); - R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); - R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); - R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ) + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ) - ADD_EPI32(state0, state0_save); - ADD_EPI32(state1, state1_save); + ADD_EPI32(state0, state0_save) + ADD_EPI32(state1, state1_save) data += 64; } @@ -298,11 +288,11 @@ const UInt32 SHA256_K_ARRAY[64]; R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); #ifdef ATTRIB_SHA ATTRIB_SHA #endif -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { v128 state0, state1; @@ -353,12 +343,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size // #include // #include "Sha256.h" -void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); #pragma message("Sha256 HW-SW stub was used") -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { Sha256_UpdateBlocks(state, data, numBlocks); /* @@ -371,3 +361,26 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size } #endif + + + +#undef K +#undef RND2 +#undef RND2_0 +#undef RND2_1 + +#undef MY_rev32_for_LE +#undef NNN +#undef LOAD_128 +#undef STORE_128 +#undef LOAD_SHUFFLE +#undef SM1 +#undef SM2 + +#undef NNN +#undef R4 +#undef R16 +#undef PREPARE_STATE +#undef USE_HW_SHA +#undef ATTRIB_SHA +#undef USE_VER_MIN diff --git a/3rdparty/7z/src/Sort.h b/3rdparty/7z/src/Sort.h index 7209d7824d..1bb2b1e7e6 100644 --- a/3rdparty/7z/src/Sort.h +++ b/3rdparty/7z/src/Sort.h @@ -1,8 +1,8 @@ /* Sort.h -- Sort functions -2014-04-05 : Igor Pavlov : Public domain */ +2023-03-05 : Igor Pavlov : Public domain */ -#ifndef __7Z_SORT_H -#define __7Z_SORT_H +#ifndef ZIP7_INC_SORT_H +#define ZIP7_INC_SORT_H #include "7zTypes.h" diff --git a/3rdparty/7z/src/SwapBytes.c b/3rdparty/7z/src/SwapBytes.c new file mode 100644 index 0000000000..7901bbaa87 --- /dev/null +++ b/3rdparty/7z/src/SwapBytes.c @@ -0,0 +1,800 @@ +/* SwapBytes.c -- Byte Swap conversion filter +2023-04-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Compiler.h" +#include "CpuArch.h" +#include "RotateDefs.h" +#include "SwapBytes.h" + +typedef UInt16 CSwapUInt16; +typedef UInt32 CSwapUInt32; + +// #define k_SwapBytes_Mode_BASE 0 + +#ifdef MY_CPU_X86_OR_AMD64 + +#define k_SwapBytes_Mode_SSE2 1 +#define k_SwapBytes_Mode_SSSE3 2 +#define k_SwapBytes_Mode_AVX2 3 + + // #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + #if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + #define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_AVX2 + #define SWAP_ATTRIB_SSE2 __attribute__((__target__("sse2"))) + #define SWAP_ATTRIB_SSSE3 __attribute__((__target__("ssse3"))) + #define SWAP_ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #elif defined(_MSC_VER) + #if (_MSC_VER == 1900) + #pragma warning(disable : 4752) // found Intel(R) Advanced Vector Extensions; consider using /arch:AVX + #endif + #if (_MSC_VER >= 1900) + #define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_AVX2 + #elif (_MSC_VER >= 1500) // (VS2008) + #define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_SSSE3 + #elif (_MSC_VER >= 1310) // (VS2003) + #define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_SSE2 + #endif + #endif // _MSC_VER + +/* +// for debug +#ifdef k_SwapBytes_Mode_MAX +#undef k_SwapBytes_Mode_MAX +#endif +*/ + +#ifndef k_SwapBytes_Mode_MAX +#define k_SwapBytes_Mode_MAX 0 +#endif + +#if (k_SwapBytes_Mode_MAX != 0) && defined(MY_CPU_AMD64) + #define k_SwapBytes_Mode_MIN k_SwapBytes_Mode_SSE2 +#else + #define k_SwapBytes_Mode_MIN 0 +#endif + +#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_AVX2) + #define USE_SWAP_AVX2 +#endif +#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_SSSE3) + #define USE_SWAP_SSSE3 +#endif +#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_SSE2) + #define USE_SWAP_128 +#endif + +#if k_SwapBytes_Mode_MAX <= k_SwapBytes_Mode_MIN || !defined(USE_SWAP_128) +#define FORCE_SWAP_MODE +#endif + + +#ifdef USE_SWAP_128 +/* + MMX + SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2 + SSE4A + AES + AVX, AVX2, FMA +*/ + +#include // sse2 +// typedef __m128i v128; + +#define SWAP2_128(i) { \ + const __m128i v = *(const __m128i *)(const void *)(items + (i) * 8); \ + *( __m128i *)( void *)(items + (i) * 8) = \ + _mm_or_si128( \ + _mm_slli_epi16(v, 8), \ + _mm_srli_epi16(v, 8)); } +// _mm_or_si128() has more ports to execute than _mm_add_epi16(). + +static +#ifdef SWAP_ATTRIB_SSE2 +SWAP_ATTRIB_SSE2 +#endif +void +Z7_FASTCALL +SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP2_128(0) SWAP2_128(1) items += 2 * 8; + SWAP2_128(0) SWAP2_128(1) items += 2 * 8; + } + while (items != lim); +} + +/* +// sse2 +#define SWAP4_128_pack(i) { \ + __m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \ + __m128i v0 = _mm_unpacklo_epi8(v, mask); \ + __m128i v1 = _mm_unpackhi_epi8(v, mask); \ + v0 = _mm_shufflelo_epi16(v0, 0x1b); \ + v1 = _mm_shufflelo_epi16(v1, 0x1b); \ + v0 = _mm_shufflehi_epi16(v0, 0x1b); \ + v1 = _mm_shufflehi_epi16(v1, 0x1b); \ + *(__m128i *)(void *)(items + (i) * 4) = _mm_packus_epi16(v0, v1); } + +static +#ifdef SWAP_ATTRIB_SSE2 +SWAP_ATTRIB_SSE2 +#endif +void +Z7_FASTCALL +SwapBytes4_128_pack(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + const __m128i mask = _mm_setzero_si128(); + // const __m128i mask = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP4_128_pack(0); items += 1 * 4; + // SWAP4_128_pack(0); SWAP4_128_pack(1); items += 2 * 4; + } + while (items != lim); +} + +// sse2 +#define SWAP4_128_shift(i) { \ + __m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \ + __m128i v2; \ + v2 = _mm_or_si128( \ + _mm_slli_si128(_mm_and_si128(v, mask), 1), \ + _mm_and_si128(_mm_srli_si128(v, 1), mask)); \ + v = _mm_or_si128( \ + _mm_slli_epi32(v, 24), \ + _mm_srli_epi32(v, 24)); \ + *(__m128i *)(void *)(items + (i) * 4) = _mm_or_si128(v2, v); } + +static +#ifdef SWAP_ATTRIB_SSE2 +SWAP_ATTRIB_SSE2 +#endif +void +Z7_FASTCALL +SwapBytes4_128_shift(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + #define M1 0xff00 + const __m128i mask = _mm_set_epi32(M1, M1, M1, M1); + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + // SWAP4_128_shift(0) SWAP4_128_shift(1) items += 2 * 4; + // SWAP4_128_shift(0) SWAP4_128_shift(1) items += 2 * 4; + SWAP4_128_shift(0); items += 1 * 4; + } + while (items != lim); +} +*/ + + +#if defined(USE_SWAP_SSSE3) || defined(USE_SWAP_AVX2) + +#define SWAP_SHUF_REV_SEQ_2_VALS(v) (v)+1, (v) +#define SWAP_SHUF_REV_SEQ_4_VALS(v) (v)+3, (v)+2, (v)+1, (v) + +#define SWAP2_SHUF_MASK_16_BYTES \ + SWAP_SHUF_REV_SEQ_2_VALS (0 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (1 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (2 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (3 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (4 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (5 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (6 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (7 * 2) + +#define SWAP4_SHUF_MASK_16_BYTES \ + SWAP_SHUF_REV_SEQ_4_VALS (0 * 4), \ + SWAP_SHUF_REV_SEQ_4_VALS (1 * 4), \ + SWAP_SHUF_REV_SEQ_4_VALS (2 * 4), \ + SWAP_SHUF_REV_SEQ_4_VALS (3 * 4) + +#if defined(USE_SWAP_AVX2) +/* if we use 256_BIT_INIT_MASK, each static array mask will be larger for 16 bytes */ +// #define SWAP_USE_256_BIT_INIT_MASK +#endif + +#if defined(SWAP_USE_256_BIT_INIT_MASK) && defined(USE_SWAP_AVX2) +#define SWAP_MASK_INIT_SIZE 32 +#else +#define SWAP_MASK_INIT_SIZE 16 +#endif + +MY_ALIGN(SWAP_MASK_INIT_SIZE) +static const Byte k_ShufMask_Swap2[] = +{ + SWAP2_SHUF_MASK_16_BYTES + #if SWAP_MASK_INIT_SIZE > 16 + , SWAP2_SHUF_MASK_16_BYTES + #endif +}; + +MY_ALIGN(SWAP_MASK_INIT_SIZE) +static const Byte k_ShufMask_Swap4[] = +{ + SWAP4_SHUF_MASK_16_BYTES + #if SWAP_MASK_INIT_SIZE > 16 + , SWAP4_SHUF_MASK_16_BYTES + #endif +}; + + +#ifdef USE_SWAP_SSSE3 + +#include // ssse3 + +#define SHUF_128(i) *(items + (i)) = \ + _mm_shuffle_epi8(*(items + (i)), mask); // SSSE3 + +// Z7_NO_INLINE +static +#ifdef SWAP_ATTRIB_SSSE3 +SWAP_ATTRIB_SSSE3 +#endif +Z7_ATTRIB_NO_VECTORIZE +void +Z7_FASTCALL +ShufBytes_128(void *items8, const void *lim8, const void *mask128_ptr) +{ + __m128i *items = (__m128i *)items8; + const __m128i *lim = (const __m128i *)lim8; + // const __m128i mask = _mm_set_epi8(SHUF_SWAP2_MASK_16_VALS); + // const __m128i mask = _mm_set_epi8(SHUF_SWAP4_MASK_16_VALS); + // const __m128i mask = _mm_load_si128((const __m128i *)(const void *)&(k_ShufMask_Swap4[0])); + // const __m128i mask = _mm_load_si128((const __m128i *)(const void *)&(k_ShufMask_Swap4[0])); + // const __m128i mask = *(const __m128i *)(const void *)&(k_ShufMask_Swap4[0]); + const __m128i mask = *(const __m128i *)mask128_ptr; + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SHUF_128(0) SHUF_128(1) items += 2; + SHUF_128(0) SHUF_128(1) items += 2; + } + while (items != lim); +} + +#endif // USE_SWAP_SSSE3 + + + +#ifdef USE_SWAP_AVX2 + +#include // avx, avx2 +#if defined(__clang__) +#include +#include +#endif + +#define SHUF_256(i) *(items + (i)) = \ + _mm256_shuffle_epi8(*(items + (i)), mask); // AVX2 + +// Z7_NO_INLINE +static +#ifdef SWAP_ATTRIB_AVX2 +SWAP_ATTRIB_AVX2 +#endif +Z7_ATTRIB_NO_VECTORIZE +void +Z7_FASTCALL +ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr) +{ + __m256i *items = (__m256i *)items8; + const __m256i *lim = (const __m256i *)lim8; + /* + UNUSED_VAR(mask128_ptr) + __m256i mask = + for Swap4: _mm256_setr_epi8(SWAP4_SHUF_MASK_16_BYTES, SWAP4_SHUF_MASK_16_BYTES); + for Swap2: _mm256_setr_epi8(SWAP2_SHUF_MASK_16_BYTES, SWAP2_SHUF_MASK_16_BYTES); + */ + const __m256i mask = + #if SWAP_MASK_INIT_SIZE > 16 + *(const __m256i *)(const void *)mask128_ptr; + #else + /* msvc: broadcastsi128() version reserves the stack for no reason + msvc 19.29-: _mm256_insertf128_si256() / _mm256_set_m128i)) versions use non-avx movdqu xmm0,XMMWORD PTR [r8] + msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want + */ + // _mm256_broadcastsi128_si256(*mask128_ptr); + /* + #define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) + MY_mm256_set_m128i + */ + _mm256_set_m128i( + *(const __m128i *)mask128_ptr, + *(const __m128i *)mask128_ptr); + #endif + + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SHUF_256(0) SHUF_256(1) items += 2; + SHUF_256(0) SHUF_256(1) items += 2; + } + while (items != lim); +} + +#endif // USE_SWAP_AVX2 +#endif // USE_SWAP_SSSE3 || USE_SWAP_AVX2 +#endif // USE_SWAP_128 + + + +// compile message "NEON intrinsics not available with the soft-float ABI" +#elif defined(MY_CPU_ARM_OR_ARM64) || \ + (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) +// #elif defined(MY_CPU_ARM64) + + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) \ + || defined(MY_CPU_ARM64) + #define USE_SWAP_128 + #endif + #ifdef MY_CPU_ARM64 + // #define SWAP_ATTRIB_NEON __attribute__((__target__(""))) + #else + // #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1910) + #define USE_SWAP_128 + #endif + #endif + + #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #include + #else + #include + #endif + +#ifndef USE_SWAP_128 + #define FORCE_SWAP_MODE +#else + +#ifdef MY_CPU_ARM64 + // for debug : comment it + #define FORCE_SWAP_MODE +#else + #define k_SwapBytes_Mode_NEON 1 +#endif +// typedef uint8x16_t v128; +#define SWAP2_128(i) *(uint8x16_t *) (void *)(items + (i) * 8) = \ + vrev16q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 8)); +#define SWAP4_128(i) *(uint8x16_t *) (void *)(items + (i) * 4) = \ + vrev32q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 4)); + +// Z7_NO_INLINE +static +#ifdef SWAP_ATTRIB_NEON +SWAP_ATTRIB_NEON +#endif +Z7_ATTRIB_NO_VECTORIZE +void +Z7_FASTCALL +SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP2_128(0) SWAP2_128(1) items += 2 * 8; + SWAP2_128(0) SWAP2_128(1) items += 2 * 8; + } + while (items != lim); +} + +// Z7_NO_INLINE +static +#ifdef SWAP_ATTRIB_NEON +SWAP_ATTRIB_NEON +#endif +Z7_ATTRIB_NO_VECTORIZE +void +Z7_FASTCALL +SwapBytes4_128(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP4_128(0) SWAP4_128(1) items += 2 * 4; + SWAP4_128(0) SWAP4_128(1) items += 2 * 4; + } + while (items != lim); +} + +#endif // USE_SWAP_128 + +#else // MY_CPU_ARM_OR_ARM64 +#define FORCE_SWAP_MODE +#endif // MY_CPU_ARM_OR_ARM64 + + + + + + +#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_X86) + /* _byteswap_ushort() in MSVC x86 32-bit works via slow { mov dh, al; mov dl, ah } + So we use own versions of byteswap function */ + #if (_MSC_VER < 1400 ) // old MSVC-X86 without _rotr16() support + #define SWAP2_16(i) { UInt32 v = items[i]; v += (v << 16); v >>= 8; items[i] = (CSwapUInt16)v; } + #else // is new MSVC-X86 with fast _rotr16() + #include + #define SWAP2_16(i) { items[i] = _rotr16(items[i], 8); } + #endif +#else // is not MSVC-X86 + #define SWAP2_16(i) { CSwapUInt16 v = items[i]; items[i] = Z7_BSWAP16(v); } +#endif // MSVC-X86 + +#if defined(Z7_CPU_FAST_BSWAP_SUPPORTED) + #define SWAP4_32(i) { CSwapUInt32 v = items[i]; items[i] = Z7_BSWAP32(v); } +#else + #define SWAP4_32(i) \ + { UInt32 v = items[i]; \ + v = ((v & 0xff00ff) << 8) + ((v >> 8) & 0xff00ff); \ + v = rotlFixed(v, 16); \ + items[i] = v; } +#endif + + + + +#if defined(FORCE_SWAP_MODE) && defined(USE_SWAP_128) + #define DEFAULT_Swap2 SwapBytes2_128 + #if !defined(MY_CPU_X86_OR_AMD64) + #define DEFAULT_Swap4 SwapBytes4_128 + #endif +#endif + +#if !defined(DEFAULT_Swap2) || !defined(DEFAULT_Swap4) + +#define SWAP_BASE_FUNCS_PREFIXES \ +Z7_FORCE_INLINE \ +static \ +Z7_ATTRIB_NO_VECTOR \ +void Z7_FASTCALL + + +#ifdef MY_CPU_64BIT + +#if defined(MY_CPU_ARM64) \ + && defined(__ARM_ARCH) && (__ARM_ARCH >= 8) \ + && ( (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4))) + + #define SWAP2_64_VAR(v) asm ("rev16 %x0,%x0" : "+r" (v)); + #define SWAP4_64_VAR(v) asm ("rev32 %x0,%x0" : "+r" (v)); + +#else // is not ARM64-GNU + +#if !defined(MY_CPU_X86_OR_AMD64) || (k_SwapBytes_Mode_MIN == 0) || !defined(USE_SWAP_128) + #define SWAP2_64_VAR(v) \ + v = ( 0x00ff00ff00ff00ff & (v >> 8)) \ + + ((0x00ff00ff00ff00ff & v) << 8); + /* plus gives faster code in MSVC */ +#endif + +#ifdef Z7_CPU_FAST_BSWAP_SUPPORTED + #define SWAP4_64_VAR(v) \ + v = Z7_BSWAP64(v); \ + v = Z7_ROTL64(v, 32); +#else + #define SWAP4_64_VAR(v) \ + v = ( 0x000000ff000000ff & (v >> 24)) \ + + ((0x000000ff000000ff & v) << 24 ) \ + + ( 0x0000ff000000ff00 & (v >> 8)) \ + + ((0x0000ff000000ff00 & v) << 8 ) \ + ; +#endif + +#endif // ARM64-GNU + + +#ifdef SWAP2_64_VAR + +#define SWAP2_64(i) { \ + UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 4); \ + SWAP2_64_VAR(v) \ + *(UInt64 *)(void *)(items + (i) * 4) = v; } + +SWAP_BASE_FUNCS_PREFIXES +SwapBytes2_64(CSwapUInt16 *items, const CSwapUInt16 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP2_64(0) SWAP2_64(1) items += 2 * 4; + SWAP2_64(0) SWAP2_64(1) items += 2 * 4; + } + while (items != lim); +} + + #define DEFAULT_Swap2 SwapBytes2_64 + #if !defined(FORCE_SWAP_MODE) + #define SWAP2_DEFAULT_MODE 0 + #endif +#else // !defined(SWAP2_64_VAR) + #define DEFAULT_Swap2 SwapBytes2_128 + #if !defined(FORCE_SWAP_MODE) + #define SWAP2_DEFAULT_MODE 1 + #endif +#endif // SWAP2_64_VAR + + +#define SWAP4_64(i) { \ + UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 2); \ + SWAP4_64_VAR(v) \ + *(UInt64 *)(void *)(items + (i) * 2) = v; } + +SWAP_BASE_FUNCS_PREFIXES +SwapBytes4_64(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP4_64(0) SWAP4_64(1) items += 2 * 2; + SWAP4_64(0) SWAP4_64(1) items += 2 * 2; + } + while (items != lim); +} + +#define DEFAULT_Swap4 SwapBytes4_64 + +#else // is not 64BIT + + +#if defined(MY_CPU_ARM_OR_ARM64) \ + && defined(__ARM_ARCH) && (__ARM_ARCH >= 6) \ + && ( (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4))) + +#ifdef MY_CPU_64BIT + #define SWAP2_32_VAR(v) asm ("rev16 %w0,%w0" : "+r" (v)); +#else + #define SWAP2_32_VAR(v) asm ("rev16 %0,%0" : "+r" (v)); // for clang/gcc + // asm ("rev16 %r0,%r0" : "+r" (a)); // for gcc +#endif + +#elif defined(_MSC_VER) && (_MSC_VER < 1300) && defined(MY_CPU_X86) \ + || !defined(Z7_CPU_FAST_BSWAP_SUPPORTED) \ + || !defined(Z7_CPU_FAST_ROTATE_SUPPORTED) + // old msvc doesn't support _byteswap_ulong() + #define SWAP2_32_VAR(v) \ + v = ((v & 0xff00ff) << 8) + ((v >> 8) & 0xff00ff); + +#else // is not ARM and is not old-MSVC-X86 and fast BSWAP/ROTATE are supported + #define SWAP2_32_VAR(v) \ + v = Z7_BSWAP32(v); \ + v = rotlFixed(v, 16); + +#endif // GNU-ARM* + +#define SWAP2_32(i) { \ + UInt32 v = *(const UInt32 *)(const void *)(items + (i) * 2); \ + SWAP2_32_VAR(v); \ + *(UInt32 *)(void *)(items + (i) * 2) = v; } + + +SWAP_BASE_FUNCS_PREFIXES +SwapBytes2_32(CSwapUInt16 *items, const CSwapUInt16 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP2_32(0) SWAP2_32(1) items += 2 * 2; + SWAP2_32(0) SWAP2_32(1) items += 2 * 2; + } + while (items != lim); +} + + +SWAP_BASE_FUNCS_PREFIXES +SwapBytes4_32(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP4_32(0) SWAP4_32(1) items += 2; + SWAP4_32(0) SWAP4_32(1) items += 2; + } + while (items != lim); +} + +#define DEFAULT_Swap2 SwapBytes2_32 +#define DEFAULT_Swap4 SwapBytes4_32 +#if !defined(FORCE_SWAP_MODE) + #define SWAP2_DEFAULT_MODE 0 +#endif + +#endif // MY_CPU_64BIT +#endif // if !defined(DEFAULT_Swap2) || !defined(DEFAULT_Swap4) + + + +#if !defined(FORCE_SWAP_MODE) +static unsigned g_SwapBytes_Mode; +#endif + +/* size of largest unrolled loop iteration: 128 bytes = 4 * 32 bytes (AVX). */ +#define SWAP_ITERATION_BLOCK_SIZE_MAX (1 << 7) + +// 32 bytes for (AVX) or 2 * 16-bytes for NEON. +#define SWAP_VECTOR_ALIGN_SIZE (1 << 5) + +Z7_NO_INLINE +void z7_SwapBytes2(CSwapUInt16 *items, size_t numItems) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--) + { + SWAP2_16(0) + items++; + } + { + const size_t k_Align_Mask = SWAP_ITERATION_BLOCK_SIZE_MAX / sizeof(CSwapUInt16) - 1; + size_t numItems2 = numItems; + CSwapUInt16 *lim; + numItems &= k_Align_Mask; + numItems2 &= ~(size_t)k_Align_Mask; + lim = items + numItems2; + if (numItems2 != 0) + { + #if !defined(FORCE_SWAP_MODE) + #ifdef MY_CPU_X86_OR_AMD64 + #ifdef USE_SWAP_AVX2 + if (g_SwapBytes_Mode > k_SwapBytes_Mode_SSSE3) + ShufBytes_256((__m256i *)(void *)items, + (const __m256i *)(const void *)lim, + (const __m128i *)(const void *)&(k_ShufMask_Swap2[0])); + else + #endif + #ifdef USE_SWAP_SSSE3 + if (g_SwapBytes_Mode >= k_SwapBytes_Mode_SSSE3) + ShufBytes_128((__m128i *)(void *)items, + (const __m128i *)(const void *)lim, + (const __m128i *)(const void *)&(k_ShufMask_Swap2[0])); + else + #endif + #endif // MY_CPU_X86_OR_AMD64 + #if SWAP2_DEFAULT_MODE == 0 + if (g_SwapBytes_Mode != 0) + SwapBytes2_128(items, lim); + else + #endif + #endif // FORCE_SWAP_MODE + DEFAULT_Swap2(items, lim); + } + items = lim; + } + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0; numItems--) + { + SWAP2_16(0) + items++; + } +} + + +Z7_NO_INLINE +void z7_SwapBytes4(CSwapUInt32 *items, size_t numItems) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--) + { + SWAP4_32(0) + items++; + } + { + const size_t k_Align_Mask = SWAP_ITERATION_BLOCK_SIZE_MAX / sizeof(CSwapUInt32) - 1; + size_t numItems2 = numItems; + CSwapUInt32 *lim; + numItems &= k_Align_Mask; + numItems2 &= ~(size_t)k_Align_Mask; + lim = items + numItems2; + if (numItems2 != 0) + { + #if !defined(FORCE_SWAP_MODE) + #ifdef MY_CPU_X86_OR_AMD64 + #ifdef USE_SWAP_AVX2 + if (g_SwapBytes_Mode > k_SwapBytes_Mode_SSSE3) + ShufBytes_256((__m256i *)(void *)items, + (const __m256i *)(const void *)lim, + (const __m128i *)(const void *)&(k_ShufMask_Swap4[0])); + else + #endif + #ifdef USE_SWAP_SSSE3 + if (g_SwapBytes_Mode >= k_SwapBytes_Mode_SSSE3) + ShufBytes_128((__m128i *)(void *)items, + (const __m128i *)(const void *)lim, + (const __m128i *)(const void *)&(k_ShufMask_Swap4[0])); + else + #endif + #else // MY_CPU_X86_OR_AMD64 + + if (g_SwapBytes_Mode != 0) + SwapBytes4_128(items, lim); + else + #endif // MY_CPU_X86_OR_AMD64 + #endif // FORCE_SWAP_MODE + DEFAULT_Swap4(items, lim); + } + items = lim; + } + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0; numItems--) + { + SWAP4_32(0) + items++; + } +} + + +// #define SHOW_HW_STATUS + +#ifdef SHOW_HW_STATUS +#include +#define PRF(x) x +#else +#define PRF(x) +#endif + +void z7_SwapBytesPrepare(void) +{ +#ifndef FORCE_SWAP_MODE + unsigned mode = 0; // k_SwapBytes_Mode_BASE; + +#ifdef MY_CPU_ARM_OR_ARM64 + { + if (CPU_IsSupported_NEON()) + { + // #pragma message ("=== SwapBytes NEON") + PRF(printf("\n=== SwapBytes NEON\n");) + mode = k_SwapBytes_Mode_NEON; + } + } +#else // MY_CPU_ARM_OR_ARM64 + { + #ifdef USE_SWAP_AVX2 + if (CPU_IsSupported_AVX2()) + { + // #pragma message ("=== SwapBytes AVX2") + PRF(printf("\n=== SwapBytes AVX2\n");) + mode = k_SwapBytes_Mode_AVX2; + } + else + #endif + #ifdef USE_SWAP_SSSE3 + if (CPU_IsSupported_SSSE3()) + { + // #pragma message ("=== SwapBytes SSSE3") + PRF(printf("\n=== SwapBytes SSSE3\n");) + mode = k_SwapBytes_Mode_SSSE3; + } + else + #endif + #if !defined(MY_CPU_AMD64) + if (CPU_IsSupported_SSE2()) + #endif + { + // #pragma message ("=== SwapBytes SSE2") + PRF(printf("\n=== SwapBytes SSE2\n");) + mode = k_SwapBytes_Mode_SSE2; + } + } +#endif // MY_CPU_ARM_OR_ARM64 + g_SwapBytes_Mode = mode; + // g_SwapBytes_Mode = 0; // for debug +#endif // FORCE_SWAP_MODE + PRF(printf("\n=== SwapBytesPrepare\n");) +} + +#undef PRF diff --git a/3rdparty/7z/src/SwapBytes.h b/3rdparty/7z/src/SwapBytes.h new file mode 100644 index 0000000000..d442467386 --- /dev/null +++ b/3rdparty/7z/src/SwapBytes.h @@ -0,0 +1,17 @@ +/* SwapBytes.h -- Byte Swap conversion filter +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_SWAP_BYTES_H +#define ZIP7_INC_SWAP_BYTES_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void z7_SwapBytes2(UInt16 *data, size_t numItems); +void z7_SwapBytes4(UInt32 *data, size_t numItems); +void z7_SwapBytesPrepare(void); + +EXTERN_C_END + +#endif diff --git a/3rdparty/7z/src/Threads.c b/3rdparty/7z/src/Threads.c index 6eb45b08a8..d8aaa83277 100644 --- a/3rdparty/7z/src/Threads.c +++ b/3rdparty/7z/src/Threads.c @@ -1,5 +1,5 @@ /* Threads.c -- multithreading library -2021-12-21 : Igor Pavlov : Public domain */ +2023-03-04 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -11,9 +11,9 @@ #include "Threads.h" -static WRes GetError() +static WRes GetError(void) { - DWORD res = GetLastError(); + const DWORD res = GetLastError(); return res ? (WRes)res : 1; } @@ -173,6 +173,9 @@ WRes CriticalSection_Init(CCriticalSection *p) Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception Windows Vista+ : no exceptions */ #ifdef _MSC_VER + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wlanguage-extension-token" + #endif __try #endif { @@ -193,18 +196,26 @@ WRes CriticalSection_Init(CCriticalSection *p) // ---------- POSIX ---------- #ifndef __APPLE__ -#ifndef _7ZIP_AFFINITY_DISABLE +#ifndef Z7_AFFINITY_DISABLE // _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET +// clang < 3.6 : unknown warning group '-Wreserved-id-macro' +// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier" +// clang >= 13 : do not give warning +#if !defined(_GNU_SOURCE) + #if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12) + #pragma GCC diagnostic ignored "-Wreserved-id-macro" + #endif #define _GNU_SOURCE -#endif -#endif +#endif // !defined(_GNU_SOURCE) +#endif // Z7_AFFINITY_DISABLE +#endif // __APPLE__ #include "Threads.h" #include #include #include -#ifdef _7ZIP_AFFINITY_SUPPORTED +#ifdef Z7_AFFINITY_SUPPORTED // #include #endif @@ -212,15 +223,12 @@ WRes CriticalSection_Init(CCriticalSection *p) // #include // #define PRF(p) p #define PRF(p) - -#define Print(s) PRF(printf("\n%s\n", s)) - -// #include +#define Print(s) PRF(printf("\n%s\n", s);) WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) { // new thread in Posix probably inherits affinity from parrent thread - Print("Thread_Create_With_CpuSet"); + Print("Thread_Create_With_CpuSet") pthread_attr_t attr; int ret; @@ -228,7 +236,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, p->_created = 0; - RINOK(pthread_attr_init(&attr)); + RINOK(pthread_attr_init(&attr)) ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); @@ -236,7 +244,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, { if (cpuSet) { - #ifdef _7ZIP_AFFINITY_SUPPORTED + #ifdef Z7_AFFINITY_SUPPORTED /* printf("\n affinity :"); @@ -292,7 +300,7 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) { - Print("Thread_Create_WithAffinity"); + Print("Thread_Create_WithAffinity") CCpuSet cs; unsigned i; CpuSet_Zero(&cs); @@ -312,7 +320,7 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param WRes Thread_Close(CThread *p) { - // Print("Thread_Close"); + // Print("Thread_Close") int ret; if (!p->_created) return 0; @@ -326,7 +334,7 @@ WRes Thread_Close(CThread *p) WRes Thread_Wait_Close(CThread *p) { - // Print("Thread_Wait_Close"); + // Print("Thread_Wait_Close") void *thread_return; int ret; if (!p->_created) @@ -343,8 +351,8 @@ WRes Thread_Wait_Close(CThread *p) static WRes Event_Create(CEvent *p, int manualReset, int signaled) { - RINOK(pthread_mutex_init(&p->_mutex, NULL)); - RINOK(pthread_cond_init(&p->_cond, NULL)); + RINOK(pthread_mutex_init(&p->_mutex, NULL)) + RINOK(pthread_cond_init(&p->_cond, NULL)) p->_manual_reset = manualReset; p->_state = (signaled ? True : False); p->_created = 1; @@ -363,7 +371,7 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) WRes Event_Set(CEvent *p) { - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) p->_state = True; int res1 = pthread_cond_broadcast(&p->_cond); int res2 = pthread_mutex_unlock(&p->_mutex); @@ -372,14 +380,14 @@ WRes Event_Set(CEvent *p) WRes Event_Reset(CEvent *p) { - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) p->_state = False; return pthread_mutex_unlock(&p->_mutex); } WRes Event_Wait(CEvent *p) { - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) while (p->_state == False) { // ETIMEDOUT @@ -411,8 +419,8 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) { if (initCount > maxCount || maxCount < 1) return EINVAL; - RINOK(pthread_mutex_init(&p->_mutex, NULL)); - RINOK(pthread_cond_init(&p->_cond, NULL)); + RINOK(pthread_mutex_init(&p->_mutex, NULL)) + RINOK(pthread_cond_init(&p->_cond, NULL)) p->_count = initCount; p->_maxCount = maxCount; p->_created = 1; @@ -448,7 +456,7 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) if (releaseCount < 1) return EINVAL; - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) newCount = p->_count + releaseCount; if (newCount > p->_maxCount) @@ -458,13 +466,13 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) p->_count = newCount; ret = pthread_cond_broadcast(&p->_cond); } - RINOK(pthread_mutex_unlock(&p->_mutex)); + RINOK(pthread_mutex_unlock(&p->_mutex)) return ret; } WRes Semaphore_Wait(CSemaphore *p) { - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) while (p->_count < 1) { pthread_cond_wait(&p->_cond, &p->_mutex); @@ -489,7 +497,7 @@ WRes Semaphore_Close(CSemaphore *p) WRes CriticalSection_Init(CCriticalSection *p) { - // Print("CriticalSection_Init"); + // Print("CriticalSection_Init") if (!p) return EINTR; return pthread_mutex_init(&p->_mutex, NULL); @@ -497,7 +505,7 @@ WRes CriticalSection_Init(CCriticalSection *p) void CriticalSection_Enter(CCriticalSection *p) { - // Print("CriticalSection_Enter"); + // Print("CriticalSection_Enter") if (p) { // int ret = @@ -507,7 +515,7 @@ void CriticalSection_Enter(CCriticalSection *p) void CriticalSection_Leave(CCriticalSection *p) { - // Print("CriticalSection_Leave"); + // Print("CriticalSection_Leave") if (p) { // int ret = @@ -517,7 +525,7 @@ void CriticalSection_Leave(CCriticalSection *p) void CriticalSection_Delete(CCriticalSection *p) { - // Print("CriticalSection_Delete"); + // Print("CriticalSection_Delete") if (p) { // int ret = @@ -527,14 +535,28 @@ void CriticalSection_Delete(CCriticalSection *p) LONG InterlockedIncrement(LONG volatile *addend) { - // Print("InterlockedIncrement"); + // Print("InterlockedIncrement") #ifdef USE_HACK_UNSAFE_ATOMIC LONG val = *addend + 1; *addend = val; return val; #else + + #if defined(__clang__) && (__clang_major__ >= 8) + #pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst" + #endif return __sync_add_and_fetch(addend, 1); #endif } #endif // _WIN32 + +WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) +{ + if (Event_IsCreated(p)) + return Event_Reset(p); + return AutoResetEvent_CreateNotSignaled(p); +} + +#undef PRF +#undef Print diff --git a/3rdparty/7z/src/Threads.h b/3rdparty/7z/src/Threads.h index e9493afff6..c6aa6a8f18 100644 --- a/3rdparty/7z/src/Threads.h +++ b/3rdparty/7z/src/Threads.h @@ -1,18 +1,19 @@ /* Threads.h -- multithreading library -2021-12-21 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_THREADS_H -#define __7Z_THREADS_H +#ifndef ZIP7_INC_THREADS_H +#define ZIP7_INC_THREADS_H #ifdef _WIN32 -#include +#include "7zWindows.h" + #else #if defined(__linux__) #if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) -#ifndef _7ZIP_AFFINITY_DISABLE -#define _7ZIP_AFFINITY_SUPPORTED -// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED") +#ifndef Z7_AFFINITY_DISABLE +#define Z7_AFFINITY_SUPPORTED +// #pragma message(" ==== Z7_AFFINITY_SUPPORTED") // #define _GNU_SOURCE #endif #endif @@ -33,7 +34,7 @@ WRes Handle_WaitObject(HANDLE h); typedef HANDLE CThread; -#define Thread_Construct(p) { *(p) = NULL; } +#define Thread_CONSTRUCT(p) { *(p) = NULL; } #define Thread_WasCreated(p) (*(p) != NULL) #define Thread_Close(p) HandlePtr_Close(p) // #define Thread_Wait(p) Handle_WaitObject(*(p)) @@ -52,42 +53,46 @@ typedef #endif THREAD_FUNC_RET_TYPE; +#define THREAD_FUNC_RET_ZERO 0 + typedef DWORD_PTR CAffinityMask; typedef DWORD_PTR CCpuSet; -#define CpuSet_Zero(p) { *(p) = 0; } -#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); } +#define CpuSet_Zero(p) *(p) = (0) +#define CpuSet_Set(p, cpu) *(p) |= ((DWORD_PTR)1 << (cpu)) #else // _WIN32 -typedef struct _CThread +typedef struct { pthread_t _tid; int _created; } CThread; -#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; } -#define Thread_WasCreated(p) ((p)->_created != 0) +#define Thread_CONSTRUCT(p) { (p)->_tid = 0; (p)->_created = 0; } +#define Thread_WasCreated(p) ((p)->_created != 0) WRes Thread_Close(CThread *p); // #define Thread_Wait Thread_Wait_Close typedef void * THREAD_FUNC_RET_TYPE; +#define THREAD_FUNC_RET_ZERO NULL + typedef UInt64 CAffinityMask; -#ifdef _7ZIP_AFFINITY_SUPPORTED +#ifdef Z7_AFFINITY_SUPPORTED typedef cpu_set_t CCpuSet; -#define CpuSet_Zero(p) CPU_ZERO(p) -#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) -#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) +#define CpuSet_Zero(p) CPU_ZERO(p) +#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) +#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) #else typedef UInt64 CCpuSet; -#define CpuSet_Zero(p) { *(p) = 0; } -#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); } -#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) +#define CpuSet_Zero(p) *(p) = (0) +#define CpuSet_Set(p, cpu) *(p) |= ((UInt64)1 << (cpu)) +#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) #endif @@ -95,7 +100,7 @@ typedef UInt64 CCpuSet; #endif // _WIN32 -#define THREAD_FUNC_CALL_TYPE MY_STD_CALL +#define THREAD_FUNC_CALL_TYPE Z7_STDCALL #if defined(_WIN32) && defined(__GNUC__) /* GCC compiler for x86 32-bit uses the rule: @@ -187,6 +192,7 @@ WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); + WRes Event_Set(CEvent *p); WRes Event_Reset(CEvent *p); WRes Event_Wait(CEvent *p); @@ -227,6 +233,8 @@ LONG InterlockedIncrement(LONG volatile *addend); #endif // _WIN32 +WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p); + EXTERN_C_END #endif diff --git a/3rdparty/7z/src/Xz.c b/3rdparty/7z/src/Xz.c index d6e2596a90..87eaf80a93 100644 --- a/3rdparty/7z/src/Xz.c +++ b/3rdparty/7z/src/Xz.c @@ -1,5 +1,5 @@ /* Xz.c - Xz -2021-02-09 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -70,7 +70,7 @@ int XzCheck_Final(CXzCheck *p, Byte *digest) switch (p->mode) { case XZ_CHECK_CRC32: - SetUi32(digest, CRC_GET_DIGEST(p->crc)); + SetUi32(digest, CRC_GET_DIGEST(p->crc)) break; case XZ_CHECK_CRC64: { diff --git a/3rdparty/7z/src/Xz.h b/3rdparty/7z/src/Xz.h index cf9458e39a..27472db917 100644 --- a/3rdparty/7z/src/Xz.h +++ b/3rdparty/7z/src/Xz.h @@ -1,21 +1,23 @@ /* Xz.h - Xz interface -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ -#ifndef __XZ_H -#define __XZ_H +#ifndef ZIP7_INC_XZ_H +#define ZIP7_INC_XZ_H #include "Sha256.h" +#include "Delta.h" EXTERN_C_BEGIN #define XZ_ID_Subblock 1 #define XZ_ID_Delta 3 -#define XZ_ID_X86 4 -#define XZ_ID_PPC 5 -#define XZ_ID_IA64 6 -#define XZ_ID_ARM 7 -#define XZ_ID_ARMT 8 +#define XZ_ID_X86 4 +#define XZ_ID_PPC 5 +#define XZ_ID_IA64 6 +#define XZ_ID_ARM 7 +#define XZ_ID_ARMT 8 #define XZ_ID_SPARC 9 +#define XZ_ID_ARM64 0xa #define XZ_ID_LZMA2 0x21 unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value); @@ -53,7 +55,7 @@ typedef struct #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0) SRes XzBlock_Parse(CXzBlock *p, const Byte *header); -SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes); +SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes); /* ---------- xz stream ---------- */ @@ -101,7 +103,7 @@ typedef UInt16 CXzStreamFlags; unsigned XzFlags_GetCheckSize(CXzStreamFlags f); SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf); -SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream); +SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream); typedef struct { @@ -112,6 +114,7 @@ typedef struct typedef struct { CXzStreamFlags flags; + // Byte _pad[6]; size_t numBlocks; CXzBlockSizes *blocks; UInt64 startOffset; @@ -134,7 +137,7 @@ typedef struct void Xzs_Construct(CXzs *p); void Xzs_Free(CXzs *p, ISzAllocPtr alloc); -SRes Xzs_ReadBackward(CXzs *p, ILookInStream *inStream, Int64 *startOffset, ICompressProgress *progress, ISzAllocPtr alloc); +SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc); UInt64 Xzs_GetNumBlocks(const CXzs *p); UInt64 Xzs_GetUnpackSize(const CXzs *p); @@ -160,9 +163,9 @@ typedef enum } ECoderFinishMode; -typedef struct _IStateCoder +typedef struct { - void *p; + void *p; // state object; void (*Free)(void *p, ISzAllocPtr alloc); SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc); void (*Init)(void *p); @@ -174,6 +177,20 @@ typedef struct _IStateCoder } IStateCoder; +typedef struct +{ + UInt32 methodId; + UInt32 delta; + UInt32 ip; + UInt32 X86_State; + Byte delta_State[DELTA_STATE_SIZE]; +} CXzBcFilterStateBase; + +typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size); + +SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, + Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc); + #define MIXCODER_NUM_FILTERS_MAX 4 @@ -422,7 +439,7 @@ typedef struct size_t outStep_ST; // size of output buffer for Single-Thread decoding BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data. - #ifndef _7ZIP_ST + #ifndef Z7_ST unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding. @@ -432,8 +449,9 @@ typedef struct void XzDecMtProps_Init(CXzDecMtProps *p); - -typedef void * CXzDecMtHandle; +typedef struct CXzDecMt CXzDecMt; +typedef CXzDecMt * CXzDecMtHandle; +// Z7_DECLARE_HANDLE(CXzDecMtHandle) /* alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc). @@ -503,14 +521,14 @@ SRes XzDecMt_Decode(CXzDecMtHandle p, const CXzDecMtProps *props, const UInt64 *outDataSize, // NULL means undefined int finishMode, // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished - ISeqOutStream *outStream, + ISeqOutStreamPtr outStream, // Byte *outBuf, size_t *outBufSize, - ISeqInStream *inStream, + ISeqInStreamPtr inStream, // const Byte *inData, size_t inDataSize, CXzStatInfo *stat, // out: decoding results and statistics int *isMT, // out: 0 means that ST (Single-Thread) version was used // 1 means that MT (Multi-Thread) version was used - ICompressProgress *progress); + ICompressProgressPtr progress); EXTERN_C_END diff --git a/3rdparty/7z/src/XzCrc64.c b/3rdparty/7z/src/XzCrc64.c index e9ca9ec265..b2592af3ee 100644 --- a/3rdparty/7z/src/XzCrc64.c +++ b/3rdparty/7z/src/XzCrc64.c @@ -1,5 +1,5 @@ /* XzCrc64.c -- CRC64 calculation -2017-05-23 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -12,39 +12,30 @@ #define CRC64_NUM_TABLES 4 #else #define CRC64_NUM_TABLES 5 - #define CRC_UINT64_SWAP(v) \ - ((v >> 56) \ - | ((v >> 40) & ((UInt64)0xFF << 8)) \ - | ((v >> 24) & ((UInt64)0xFF << 16)) \ - | ((v >> 8) & ((UInt64)0xFF << 24)) \ - | ((v << 8) & ((UInt64)0xFF << 32)) \ - | ((v << 24) & ((UInt64)0xFF << 40)) \ - | ((v << 40) & ((UInt64)0xFF << 48)) \ - | ((v << 56))) - UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); + UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); #endif #ifndef MY_CPU_BE - UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); + UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); #endif -typedef UInt64 (MY_FAST_CALL *CRC64_FUNC)(UInt64 v, const void *data, size_t size, const UInt64 *table); +typedef UInt64 (Z7_FASTCALL *CRC64_FUNC)(UInt64 v, const void *data, size_t size, const UInt64 *table); static CRC64_FUNC g_Crc64Update; UInt64 g_Crc64Table[256 * CRC64_NUM_TABLES]; -UInt64 MY_FAST_CALL Crc64Update(UInt64 v, const void *data, size_t size) +UInt64 Z7_FASTCALL Crc64Update(UInt64 v, const void *data, size_t size) { return g_Crc64Update(v, data, size, g_Crc64Table); } -UInt64 MY_FAST_CALL Crc64Calc(const void *data, size_t size) +UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size) { return g_Crc64Update(CRC64_INIT_VAL, data, size, g_Crc64Table) ^ CRC64_INIT_VAL; } -void MY_FAST_CALL Crc64GenerateTable() +void Z7_FASTCALL Crc64GenerateTable(void) { UInt32 i; for (i = 0; i < 256; i++) @@ -57,7 +48,7 @@ void MY_FAST_CALL Crc64GenerateTable() } for (i = 256; i < 256 * CRC64_NUM_TABLES; i++) { - UInt64 r = g_Crc64Table[(size_t)i - 256]; + const UInt64 r = g_Crc64Table[(size_t)i - 256]; g_Crc64Table[i] = g_Crc64Table[r & 0xFF] ^ (r >> 8); } @@ -76,11 +67,14 @@ void MY_FAST_CALL Crc64GenerateTable() { for (i = 256 * CRC64_NUM_TABLES - 1; i >= 256; i--) { - UInt64 x = g_Crc64Table[(size_t)i - 256]; - g_Crc64Table[i] = CRC_UINT64_SWAP(x); + const UInt64 x = g_Crc64Table[(size_t)i - 256]; + g_Crc64Table[i] = Z7_BSWAP64(x); } g_Crc64Update = XzCrc64UpdateT1_BeT4; } } #endif } + +#undef kCrc64Poly +#undef CRC64_NUM_TABLES diff --git a/3rdparty/7z/src/XzCrc64.h b/3rdparty/7z/src/XzCrc64.h index 71b10d57e5..1d2793fbc6 100644 --- a/3rdparty/7z/src/XzCrc64.h +++ b/3rdparty/7z/src/XzCrc64.h @@ -1,8 +1,8 @@ /* XzCrc64.h -- CRC64 calculation -2013-01-18 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __XZ_CRC64_H -#define __XZ_CRC64_H +#ifndef ZIP7_INC_XZ_CRC64_H +#define ZIP7_INC_XZ_CRC64_H #include @@ -12,14 +12,14 @@ EXTERN_C_BEGIN extern UInt64 g_Crc64Table[]; -void MY_FAST_CALL Crc64GenerateTable(void); +void Z7_FASTCALL Crc64GenerateTable(void); #define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF) #define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL) #define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) -UInt64 MY_FAST_CALL Crc64Update(UInt64 crc, const void *data, size_t size); -UInt64 MY_FAST_CALL Crc64Calc(const void *data, size_t size); +UInt64 Z7_FASTCALL Crc64Update(UInt64 crc, const void *data, size_t size); +UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size); EXTERN_C_END diff --git a/3rdparty/7z/src/XzCrc64Opt.c b/3rdparty/7z/src/XzCrc64Opt.c index a0637dd222..524d8a1239 100644 --- a/3rdparty/7z/src/XzCrc64Opt.c +++ b/3rdparty/7z/src/XzCrc64Opt.c @@ -1,5 +1,5 @@ /* XzCrc64Opt.c -- CRC64 calculation -2021-02-09 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -9,15 +9,15 @@ #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) -UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); -UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table) +UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); +UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table) { const Byte *p = (const Byte *)data; for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) v = CRC64_UPDATE_BYTE_2(v, *p); for (; size >= 4; size -= 4, p += 4) { - UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; + const UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; v = (v >> 32) ^ (table + 0x300)[((d ) & 0xFF)] ^ (table + 0x200)[((d >> 8) & 0xFF)] @@ -34,29 +34,19 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con #ifndef MY_CPU_LE -#define CRC_UINT64_SWAP(v) \ - ((v >> 56) \ - | ((v >> 40) & ((UInt64)0xFF << 8)) \ - | ((v >> 24) & ((UInt64)0xFF << 16)) \ - | ((v >> 8) & ((UInt64)0xFF << 24)) \ - | ((v << 8) & ((UInt64)0xFF << 32)) \ - | ((v << 24) & ((UInt64)0xFF << 40)) \ - | ((v << 40) & ((UInt64)0xFF << 48)) \ - | ((v << 56))) - #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8)) -UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); -UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table) +UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); +UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table) { const Byte *p = (const Byte *)data; table += 0x100; - v = CRC_UINT64_SWAP(v); + v = Z7_BSWAP64(v); for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) v = CRC64_UPDATE_BYTE_2_BE(v, *p); for (; size >= 4; size -= 4, p += 4) { - UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; + const UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; v = (v << 32) ^ (table + 0x000)[((d ) & 0xFF)] ^ (table + 0x100)[((d >> 8) & 0xFF)] @@ -65,7 +55,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size } for (; size > 0; size--, p++) v = CRC64_UPDATE_BYTE_2_BE(v, *p); - return CRC_UINT64_SWAP(v); + return Z7_BSWAP64(v); } #endif diff --git a/3rdparty/7z/src/XzDec.c b/3rdparty/7z/src/XzDec.c index 49329f16a6..2d113a6643 100644 --- a/3rdparty/7z/src/XzDec.c +++ b/3rdparty/7z/src/XzDec.c @@ -1,5 +1,5 @@ /* XzDec.c -- Xz Decode -2021-09-04 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -67,7 +67,8 @@ unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value) return 0; } -/* ---------- BraState ---------- */ + +/* ---------- XzBcFilterState ---------- */ #define BRA_BUF_SIZE (1 << 14) @@ -76,27 +77,29 @@ typedef struct size_t bufPos; size_t bufConv; size_t bufTotal; + Byte *buf; // must be aligned for 4 bytes + Xz_Func_BcFilterStateBase_Filter filter_func; + // int encodeMode; + CXzBcFilterStateBase base; + // Byte buf[BRA_BUF_SIZE]; +} CXzBcFilterState; - int encodeMode; - UInt32 methodId; - UInt32 delta; - UInt32 ip; - UInt32 x86State; - Byte deltaState[DELTA_STATE_SIZE]; - - Byte buf[BRA_BUF_SIZE]; -} CBraState; - -static void BraState_Free(void *pp, ISzAllocPtr alloc) +static void XzBcFilterState_Free(void *pp, ISzAllocPtr alloc) { - ISzAlloc_Free(alloc, pp); + if (pp) + { + CXzBcFilterState *p = ((CXzBcFilterState *)pp); + ISzAlloc_Free(alloc, p->buf); + ISzAlloc_Free(alloc, pp); + } } -static SRes BraState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc) + +static SRes XzBcFilterState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc) { - CBraState *p = ((CBraState *)pp); - UNUSED_VAR(alloc); + CXzBcFilterStateBase *p = &((CXzBcFilterState *)pp)->base; + UNUSED_VAR(alloc) p->ip = 0; if (p->methodId == XZ_ID_Delta) { @@ -114,6 +117,7 @@ static SRes BraState_SetProps(void *pp, const Byte *props, size_t propSize, ISzA case XZ_ID_PPC: case XZ_ID_ARM: case XZ_ID_SPARC: + case XZ_ID_ARM64: if ((v & 3) != 0) return SZ_ERROR_UNSUPPORTED; break; @@ -134,73 +138,90 @@ static SRes BraState_SetProps(void *pp, const Byte *props, size_t propSize, ISzA return SZ_OK; } -static void BraState_Init(void *pp) + +static void XzBcFilterState_Init(void *pp) { - CBraState *p = ((CBraState *)pp); + CXzBcFilterState *p = ((CXzBcFilterState *)pp); p->bufPos = p->bufConv = p->bufTotal = 0; - x86_Convert_Init(p->x86State); - if (p->methodId == XZ_ID_Delta) - Delta_Init(p->deltaState); + p->base.X86_State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + if (p->base.methodId == XZ_ID_Delta) + Delta_Init(p->base.delta_State); } -#define CASE_BRA_CONV(isa) case XZ_ID_ ## isa: size = isa ## _Convert(data, size, p->ip, p->encodeMode); break; - -static SizeT BraState_Filter(void *pp, Byte *data, SizeT size) +static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Dec[] = +{ + Z7_BRANCH_CONV_DEC(PPC), + Z7_BRANCH_CONV_DEC(IA64), + Z7_BRANCH_CONV_DEC(ARM), + Z7_BRANCH_CONV_DEC(ARMT), + Z7_BRANCH_CONV_DEC(SPARC), + Z7_BRANCH_CONV_DEC(ARM64) +}; + +static SizeT XzBcFilterStateBase_Filter_Dec(CXzBcFilterStateBase *p, Byte *data, SizeT size) { - CBraState *p = ((CBraState *)pp); switch (p->methodId) { case XZ_ID_Delta: - if (p->encodeMode) - Delta_Encode(p->deltaState, p->delta, data, size); - else - Delta_Decode(p->deltaState, p->delta, data, size); + Delta_Decode(p->delta_State, p->delta, data, size); break; case XZ_ID_X86: - size = x86_Convert(data, size, p->ip, &p->x86State, p->encodeMode); + size = (SizeT)(z7_BranchConvSt_X86_Dec(data, size, p->ip, &p->X86_State) - data); + break; + default: + if (p->methodId >= XZ_ID_PPC) + { + const UInt32 i = p->methodId - XZ_ID_PPC; + if (i < Z7_ARRAY_SIZE(g_Funcs_BranchConv_RISC_Dec)) + size = (SizeT)(g_Funcs_BranchConv_RISC_Dec[i](data, size, p->ip) - data); + } break; - CASE_BRA_CONV(PPC) - CASE_BRA_CONV(IA64) - CASE_BRA_CONV(ARM) - CASE_BRA_CONV(ARMT) - CASE_BRA_CONV(SPARC) } p->ip += (UInt32)size; return size; } -static SRes BraState_Code2(void *pp, +static SizeT XzBcFilterState_Filter(void *pp, Byte *data, SizeT size) +{ + CXzBcFilterState *p = ((CXzBcFilterState *)pp); + return p->filter_func(&p->base, data, size); +} + + +static SRes XzBcFilterState_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, int srcWasFinished, ECoderFinishMode finishMode, // int *wasFinished ECoderStatus *status) { - CBraState *p = ((CBraState *)pp); + CXzBcFilterState *p = ((CXzBcFilterState *)pp); SizeT destRem = *destLen; SizeT srcRem = *srcLen; - UNUSED_VAR(finishMode); + UNUSED_VAR(finishMode) *destLen = 0; *srcLen = 0; // *wasFinished = False; *status = CODER_STATUS_NOT_FINISHED; - while (destRem > 0) + while (destRem != 0) { - if (p->bufPos != p->bufConv) { size_t size = p->bufConv - p->bufPos; - if (size > destRem) - size = destRem; - memcpy(dest, p->buf + p->bufPos, size); - p->bufPos += size; - *destLen += size; - dest += size; - destRem -= size; - continue; + if (size) + { + if (size > destRem) + size = destRem; + memcpy(dest, p->buf + p->bufPos, size); + p->bufPos += size; + *destLen += size; + dest += size; + destRem -= size; + continue; + } } p->bufTotal -= p->bufPos; @@ -220,7 +241,7 @@ static SRes BraState_Code2(void *pp, if (p->bufTotal == 0) break; - p->bufConv = BraState_Filter(pp, p->buf, p->bufTotal); + p->bufConv = p->filter_func(&p->base, p->buf, p->bufTotal); if (p->bufConv == 0) { @@ -240,27 +261,37 @@ static SRes BraState_Code2(void *pp, } -SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc); -SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc) +#define XZ_IS_SUPPORTED_FILTER_ID(id) \ + ((id) >= XZ_ID_Delta && (id) <= XZ_ID_ARM64) + +SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, + Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc) { - CBraState *decoder; - if (id < XZ_ID_Delta || id > XZ_ID_SPARC) + CXzBcFilterState *decoder; + if (!XZ_IS_SUPPORTED_FILTER_ID(id)) return SZ_ERROR_UNSUPPORTED; - decoder = (CBraState *)p->p; + decoder = (CXzBcFilterState *)p->p; if (!decoder) { - decoder = (CBraState *)ISzAlloc_Alloc(alloc, sizeof(CBraState)); + decoder = (CXzBcFilterState *)ISzAlloc_Alloc(alloc, sizeof(CXzBcFilterState)); if (!decoder) return SZ_ERROR_MEM; + decoder->buf = ISzAlloc_Alloc(alloc, BRA_BUF_SIZE); + if (!decoder->buf) + { + ISzAlloc_Free(alloc, decoder); + return SZ_ERROR_MEM; + } p->p = decoder; - p->Free = BraState_Free; - p->SetProps = BraState_SetProps; - p->Init = BraState_Init; - p->Code2 = BraState_Code2; - p->Filter = BraState_Filter; + p->Free = XzBcFilterState_Free; + p->SetProps = XzBcFilterState_SetProps; + p->Init = XzBcFilterState_Init; + p->Code2 = XzBcFilterState_Code2; + p->Filter = XzBcFilterState_Filter; + decoder->filter_func = func; } - decoder->methodId = (UInt32)id; - decoder->encodeMode = encodeMode; + decoder->base.methodId = (UInt32)id; + // decoder->encodeMode = encodeMode; return SZ_OK; } @@ -279,9 +310,9 @@ static void SbState_Free(void *pp, ISzAllocPtr alloc) static SRes SbState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc) { - UNUSED_VAR(pp); - UNUSED_VAR(props); - UNUSED_VAR(alloc); + UNUSED_VAR(pp) + UNUSED_VAR(props) + UNUSED_VAR(alloc) return (propSize == 0) ? SZ_OK : SZ_ERROR_UNSUPPORTED; } @@ -297,7 +328,7 @@ static SRes SbState_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *src, { CSbDec *p = (CSbDec *)pp; SRes res; - UNUSED_VAR(srcWasFinished); + UNUSED_VAR(srcWasFinished) p->dest = dest; p->destLen = *destLen; p->src = src; @@ -389,7 +420,7 @@ static SRes Lzma2State_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *s ELzmaStatus status2; /* ELzmaFinishMode fm = (finishMode == LZMA_FINISH_ANY) ? LZMA_FINISH_ANY : LZMA_FINISH_END; */ SRes res; - UNUSED_VAR(srcWasFinished); + UNUSED_VAR(srcWasFinished) if (spec->outBufMode) { SizeT dicPos = spec->decoder.decoder.dicPos; @@ -420,7 +451,7 @@ static SRes Lzma2State_SetFromMethod(IStateCoder *p, Byte *outBuf, size_t outBuf p->Init = Lzma2State_Init; p->Code2 = Lzma2State_Code2; p->Filter = NULL; - Lzma2Dec_Construct(&spec->decoder); + Lzma2Dec_CONSTRUCT(&spec->decoder) } spec->outBufMode = False; if (outBuf) @@ -519,7 +550,8 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met } if (coderIndex == 0) return SZ_ERROR_UNSUPPORTED; - return BraState_SetFromMethod(sc, methodId, 0, p->alloc); + return Xz_StateCoder_Bc_SetFromMethod_Func(sc, methodId, + XzBcFilterStateBase_Filter_Dec, p->alloc); } @@ -568,7 +600,7 @@ static SRes MixCoder_Code(CMixCoder *p, SizeT destLen2, srcLen2; int wasFinished; - PRF_STR("------- MixCoder Single ----------"); + PRF_STR("------- MixCoder Single ----------") srcLen2 = srcLenOrig; destLen2 = destLenOrig; @@ -615,14 +647,14 @@ static SRes MixCoder_Code(CMixCoder *p, processed = coder->Filter(coder->p, p->outBuf, processed); if (wasFinished || (destFinish && p->outWritten == destLenOrig)) processed = p->outWritten; - PRF_STR_INT("filter", i); + PRF_STR_INT("filter", i) } *destLen = processed; } return res; } - PRF_STR("standard mix"); + PRF_STR("standard mix") if (p->numCoders != 1) { @@ -779,7 +811,7 @@ static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p) { - unsigned numFilters = XzBlock_GetNumFilters(p) - 1; + const unsigned numFilters = XzBlock_GetNumFilters(p) - 1; unsigned i; { const CXzFilter *f = &p->filters[numFilters]; @@ -795,8 +827,7 @@ static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p) if (f->propsSize != 1) return False; } - else if (f->id < XZ_ID_Delta - || f->id > XZ_ID_SPARC + else if (!XZ_IS_SUPPORTED_FILTER_ID(f->id) || (f->propsSize != 0 && f->propsSize != 4)) return False; } @@ -821,22 +852,24 @@ SRes XzBlock_Parse(CXzBlock *p, const Byte *header) p->packSize = (UInt64)(Int64)-1; if (XzBlock_HasPackSize(p)) { - READ_VARINT_AND_CHECK(header, pos, headerSize, &p->packSize); + READ_VARINT_AND_CHECK(header, pos, headerSize, &p->packSize) if (p->packSize == 0 || p->packSize + headerSize >= (UInt64)1 << 63) return SZ_ERROR_ARCHIVE; } p->unpackSize = (UInt64)(Int64)-1; if (XzBlock_HasUnpackSize(p)) - READ_VARINT_AND_CHECK(header, pos, headerSize, &p->unpackSize); + { + READ_VARINT_AND_CHECK(header, pos, headerSize, &p->unpackSize) + } numFilters = XzBlock_GetNumFilters(p); for (i = 0; i < numFilters; i++) { CXzFilter *filter = p->filters + i; UInt64 size; - READ_VARINT_AND_CHECK(header, pos, headerSize, &filter->id); - READ_VARINT_AND_CHECK(header, pos, headerSize, &size); + READ_VARINT_AND_CHECK(header, pos, headerSize, &filter->id) + READ_VARINT_AND_CHECK(header, pos, headerSize, &size) if (size > headerSize - pos || size > XZ_FILTER_PROPS_SIZE_MAX) return SZ_ERROR_ARCHIVE; filter->propsSize = (UInt32)size; @@ -894,20 +927,20 @@ static SRes XzDecMix_Init(CMixCoder *p, const CXzBlock *block, Byte *outBuf, siz MixCoder_Free(p); for (i = 0; i < numFilters; i++) { - RINOK(MixCoder_SetFromMethod(p, i, block->filters[numFilters - 1 - i].id, outBuf, outBufSize)); + RINOK(MixCoder_SetFromMethod(p, i, block->filters[numFilters - 1 - i].id, outBuf, outBufSize)) } p->numCoders = numFilters; } else { - RINOK(MixCoder_ResetFromMethod(p, 0, block->filters[numFilters - 1].id, outBuf, outBufSize)); + RINOK(MixCoder_ResetFromMethod(p, 0, block->filters[numFilters - 1].id, outBuf, outBufSize)) } for (i = 0; i < numFilters; i++) { const CXzFilter *f = &block->filters[numFilters - 1 - i]; IStateCoder *sc = &p->coders[i]; - RINOK(sc->SetProps(sc->p, f->props, f->propsSize, p->alloc)); + RINOK(sc->SetProps(sc->p, f->props, f->propsSize, p->alloc)) } MixCoder_Init(p); @@ -1054,14 +1087,14 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, (*destLen) += destLen2; p->unpackSize += destLen2; - RINOK(res); + RINOK(res) if (*status != CODER_STATUS_FINISHED_WITH_MARK) { if (p->block.packSize == p->packSize && *status == CODER_STATUS_NEEDS_MORE_INPUT) { - PRF_STR("CODER_STATUS_NEEDS_MORE_INPUT"); + PRF_STR("CODER_STATUS_NEEDS_MORE_INPUT") *status = CODER_STATUS_NOT_SPECIFIED; return SZ_ERROR_DATA; } @@ -1078,7 +1111,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, if ((p->block.packSize != (UInt64)(Int64)-1 && p->block.packSize != p->packSize) || (p->block.unpackSize != (UInt64)(Int64)-1 && p->block.unpackSize != p->unpackSize)) { - PRF_STR("ERROR: block.size mismatch"); + PRF_STR("ERROR: block.size mismatch") return SZ_ERROR_DATA; } } @@ -1109,7 +1142,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, } else { - RINOK(Xz_ParseHeader(&p->streamFlags, p->buf)); + RINOK(Xz_ParseHeader(&p->streamFlags, p->buf)) p->numStartedStreams++; p->indexSize = 0; p->numBlocks = 0; @@ -1155,7 +1188,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, } else { - RINOK(XzBlock_Parse(&p->block, p->buf)); + RINOK(XzBlock_Parse(&p->block, p->buf)) if (!XzBlock_AreSupportedFilters(&p->block)) return SZ_ERROR_UNSUPPORTED; p->numTotalBlocks++; @@ -1168,7 +1201,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, p->headerParsedOk = True; return SZ_OK; } - RINOK(XzDecMix_Init(&p->decoder, &p->block, p->outBuf, p->outBufSize)); + RINOK(XzDecMix_Init(&p->decoder, &p->block, p->outBuf, p->outBufSize)) } break; } @@ -1389,7 +1422,7 @@ UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p) -#ifndef _7ZIP_ST +#ifndef Z7_ST #include "MtDec.h" #endif @@ -1400,7 +1433,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p) p->outStep_ST = 1 << 20; p->ignoreErrors = False; - #ifndef _7ZIP_ST + #ifndef Z7_ST p->numThreads = 1; p->inBufSize_MT = 1 << 18; p->memUseMax = sizeof(size_t) << 28; @@ -1409,7 +1442,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p) -#ifndef _7ZIP_ST +#ifndef Z7_ST /* ---------- CXzDecMtThread ---------- */ @@ -1448,7 +1481,7 @@ typedef struct /* ---------- CXzDecMt ---------- */ -typedef struct +struct CXzDecMt { CAlignOffsetAlloc alignOffsetAlloc; ISzAllocPtr allocMid; @@ -1456,9 +1489,9 @@ typedef struct CXzDecMtProps props; size_t unpackBlockMaxSize; - ISeqInStream *inStream; - ISeqOutStream *outStream; - ICompressProgress *progress; + ISeqInStreamPtr inStream; + ISeqOutStreamPtr outStream; + ICompressProgressPtr progress; BoolInt finishMode; BoolInt outSize_Defined; @@ -1481,7 +1514,7 @@ typedef struct ECoderStatus status; SRes codeRes; - #ifndef _7ZIP_ST + #ifndef Z7_ST BoolInt mainDecoderWasCalled; // int statErrorDefined; int finishedDecoderIndex; @@ -1504,10 +1537,9 @@ typedef struct BoolInt mtc_WasConstructed; CMtDec mtc; - CXzDecMtThread coders[MTDEC__THREADS_MAX]; + CXzDecMtThread coders[MTDEC_THREADS_MAX]; #endif - -} CXzDecMt; +}; @@ -1535,11 +1567,11 @@ CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid) XzDecMtProps_Init(&p->props); - #ifndef _7ZIP_ST + #ifndef Z7_ST p->mtc_WasConstructed = False; { unsigned i; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) { CXzDecMtThread *coder = &p->coders[i]; coder->dec_created = False; @@ -1549,16 +1581,16 @@ CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid) } #endif - return p; + return (CXzDecMtHandle)p; } -#ifndef _7ZIP_ST +#ifndef Z7_ST static void XzDecMt_FreeOutBufs(CXzDecMt *p) { unsigned i; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) { CXzDecMtThread *coder = &p->coders[i]; if (coder->outBuf) @@ -1595,13 +1627,15 @@ static void XzDecMt_FreeSt(CXzDecMt *p) } -void XzDecMt_Destroy(CXzDecMtHandle pp) +// #define GET_CXzDecMt_p CXzDecMt *p = pp; + +void XzDecMt_Destroy(CXzDecMtHandle p) { - CXzDecMt *p = (CXzDecMt *)pp; + // GET_CXzDecMt_p XzDecMt_FreeSt(p); - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtc_WasConstructed) { @@ -1610,7 +1644,7 @@ void XzDecMt_Destroy(CXzDecMtHandle pp) } { unsigned i; - for (i = 0; i < MTDEC__THREADS_MAX; i++) + for (i = 0; i < MTDEC_THREADS_MAX; i++) { CXzDecMtThread *t = &p->coders[i]; if (t->dec_created) @@ -1625,12 +1659,12 @@ void XzDecMt_Destroy(CXzDecMtHandle pp) #endif - ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, pp); + ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, p); } -#ifndef _7ZIP_ST +#ifndef Z7_ST static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc) { @@ -1696,7 +1730,7 @@ static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbac coder->dec.parseMode = True; coder->dec.headerParsedOk = False; - PRF_STR_INT("Parse", srcSize2); + PRF_STR_INT("Parse", srcSize2) res = XzUnpacker_Code(&coder->dec, NULL, &destSize, @@ -2071,7 +2105,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, } data += cur; size -= cur; - // PRF_STR_INT("Written size =", size); + // PRF_STR_INT("Written size =", size) if (size == 0) break; res = MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0); @@ -2087,7 +2121,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, return res; } - RINOK(res); + RINOK(res) if (coder->inPreSize != coder->inCodeSize || coder->blockPackTotal != coder->inCodeSize) @@ -2106,13 +2140,13 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, // (coder->state == MTDEC_PARSE_END) means that there are no other working threads // so we can use mtc variables without lock - PRF_STR_INT("Write MTDEC_PARSE_END", me->mtc.inProcessed); + PRF_STR_INT("Write MTDEC_PARSE_END", me->mtc.inProcessed) me->mtc.mtProgress.totalInSize = me->mtc.inProcessed; { CXzUnpacker *dec = &me->dec; - PRF_STR_INT("PostSingle", srcSize); + PRF_STR_INT("PostSingle", srcSize) { size_t srcProcessed = srcSize; @@ -2186,7 +2220,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, me->mtc.crossEnd = srcSize; } - PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd); + PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd) return SZ_OK; } @@ -2277,7 +2311,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, UInt64 inDelta = me->mtc.inProcessed - inProgressPrev; if (inDelta >= (1 << 22)) { - RINOK(MtProgress_Progress_ST(&me->mtc.mtProgress)); + RINOK(MtProgress_Progress_ST(&me->mtc.mtProgress)) inProgressPrev = me->mtc.inProcessed; } } @@ -2331,7 +2365,7 @@ void XzStatInfo_Clear(CXzStatInfo *p) */ static SRes XzDecMt_Decode_ST(CXzDecMt *p - #ifndef _7ZIP_ST + #ifndef Z7_ST , BoolInt tMode #endif , CXzStatInfo *stat) @@ -2343,7 +2377,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p CXzUnpacker *dec; - #ifndef _7ZIP_ST + #ifndef Z7_ST if (tMode) { XzDecMt_FreeOutBufs(p); @@ -2400,7 +2434,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p if (inPos == inLim) { - #ifndef _7ZIP_ST + #ifndef Z7_ST if (tMode) { inData = MtDec_Read(&p->mtc, &inLim); @@ -2577,19 +2611,19 @@ static void XzStatInfo_SetStat(const CXzUnpacker *dec, -SRes XzDecMt_Decode(CXzDecMtHandle pp, +SRes XzDecMt_Decode(CXzDecMtHandle p, const CXzDecMtProps *props, const UInt64 *outDataSize, int finishMode, - ISeqOutStream *outStream, + ISeqOutStreamPtr outStream, // Byte *outBuf, size_t *outBufSize, - ISeqInStream *inStream, + ISeqInStreamPtr inStream, // const Byte *inData, size_t inDataSize, CXzStatInfo *stat, int *isMT, - ICompressProgress *progress) + ICompressProgressPtr progress) { - CXzDecMt *p = (CXzDecMt *)pp; - #ifndef _7ZIP_ST + // GET_CXzDecMt_p + #ifndef Z7_ST BoolInt tMode; #endif @@ -2640,7 +2674,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, */ - #ifndef _7ZIP_ST + #ifndef Z7_ST p->isBlockHeaderState_Parse = False; p->isBlockHeaderState_Write = False; @@ -2782,7 +2816,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, return res; } - PRF_STR("----- decoding ST -----"); + PRF_STR("----- decoding ST -----") } #endif @@ -2792,13 +2826,13 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, { SRes res = XzDecMt_Decode_ST(p - #ifndef _7ZIP_ST + #ifndef Z7_ST , tMode #endif , stat ); - #ifndef _7ZIP_ST + #ifndef Z7_ST // we must set error code from MT decoding at first if (p->mainErrorCode != SZ_OK) stat->DecodeRes = p->mainErrorCode; @@ -2835,3 +2869,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, return res; } } + +#undef PRF +#undef PRF_STR +#undef PRF_STR_INT_2 diff --git a/3rdparty/7z/src/XzEnc.c b/3rdparty/7z/src/XzEnc.c index 759ba670e7..d0c36ea65c 100644 --- a/3rdparty/7z/src/XzEnc.c +++ b/3rdparty/7z/src/XzEnc.c @@ -1,5 +1,5 @@ /* XzEnc.c -- Xz Encode -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -18,13 +18,13 @@ #include "XzEnc.h" -// #define _7ZIP_ST +// #define Z7_ST -#ifndef _7ZIP_ST +#ifndef Z7_ST #include "MtCoder.h" #else -#define MTCODER__THREADS_MAX 1 -#define MTCODER__BLOCKS_MAX 1 +#define MTCODER_THREADS_MAX 1 +#define MTCODER_BLOCKS_MAX 1 #endif #define XZ_GET_PAD_SIZE(dataSize) ((4 - ((unsigned)(dataSize) & 3)) & 3) @@ -35,25 +35,25 @@ #define XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(unpackSize) (XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize)) -#define XzBlock_ClearFlags(p) (p)->flags = 0; -#define XzBlock_SetNumFilters(p, n) (p)->flags = (Byte)((p)->flags | ((n) - 1)); +// #define XzBlock_ClearFlags(p) (p)->flags = 0; +#define XzBlock_ClearFlags_SetNumFilters(p, n) (p)->flags = (Byte)((n) - 1); #define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE; #define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE; -static SRes WriteBytes(ISeqOutStream *s, const void *buf, size_t size) +static SRes WriteBytes(ISeqOutStreamPtr s, const void *buf, size_t size) { return (ISeqOutStream_Write(s, buf, size) == size) ? SZ_OK : SZ_ERROR_WRITE; } -static SRes WriteBytesUpdateCrc(ISeqOutStream *s, const void *buf, size_t size, UInt32 *crc) +static SRes WriteBytes_UpdateCrc(ISeqOutStreamPtr s, const void *buf, size_t size, UInt32 *crc) { *crc = CrcUpdate(*crc, buf, size); return WriteBytes(s, buf, size); } -static SRes Xz_WriteHeader(CXzStreamFlags f, ISeqOutStream *s) +static SRes Xz_WriteHeader(CXzStreamFlags f, ISeqOutStreamPtr s) { UInt32 crc; Byte header[XZ_STREAM_HEADER_SIZE]; @@ -61,12 +61,12 @@ static SRes Xz_WriteHeader(CXzStreamFlags f, ISeqOutStream *s) header[XZ_SIG_SIZE] = (Byte)(f >> 8); header[XZ_SIG_SIZE + 1] = (Byte)(f & 0xFF); crc = CrcCalc(header + XZ_SIG_SIZE, XZ_STREAM_FLAGS_SIZE); - SetUi32(header + XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE, crc); + SetUi32(header + XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE, crc) return WriteBytes(s, header, XZ_STREAM_HEADER_SIZE); } -static SRes XzBlock_WriteHeader(const CXzBlock *p, ISeqOutStream *s) +static SRes XzBlock_WriteHeader(const CXzBlock *p, ISeqOutStreamPtr s) { Byte header[XZ_BLOCK_HEADER_SIZE_MAX]; @@ -91,7 +91,7 @@ static SRes XzBlock_WriteHeader(const CXzBlock *p, ISeqOutStream *s) header[pos++] = 0; header[0] = (Byte)(pos >> 2); - SetUi32(header + pos, CrcCalc(header, pos)); + SetUi32(header + pos, CrcCalc(header, pos)) return WriteBytes(s, header, pos + 4); } @@ -182,7 +182,7 @@ static SRes XzEncIndex_AddIndexRecord(CXzEncIndex *p, UInt64 unpackSize, UInt64 size_t newSize = p->allocated * 2 + 16 * 2; if (newSize < p->size + pos) return SZ_ERROR_MEM; - RINOK(XzEncIndex_ReAlloc(p, newSize, alloc)); + RINOK(XzEncIndex_ReAlloc(p, newSize, alloc)) } memcpy(p->blocks + p->size, buf, pos); p->size += pos; @@ -191,7 +191,7 @@ static SRes XzEncIndex_AddIndexRecord(CXzEncIndex *p, UInt64 unpackSize, UInt64 } -static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, ISeqOutStream *s) +static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, ISeqOutStreamPtr s) { Byte buf[32]; UInt64 globalPos; @@ -200,8 +200,8 @@ static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, I globalPos = pos; buf[0] = 0; - RINOK(WriteBytesUpdateCrc(s, buf, pos, &crc)); - RINOK(WriteBytesUpdateCrc(s, p->blocks, p->size, &crc)); + RINOK(WriteBytes_UpdateCrc(s, buf, pos, &crc)) + RINOK(WriteBytes_UpdateCrc(s, p->blocks, p->size, &crc)) globalPos += p->size; pos = XZ_GET_PAD_SIZE(globalPos); @@ -211,12 +211,12 @@ static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, I globalPos += pos; crc = CrcUpdate(crc, buf + 4 - pos, pos); - SetUi32(buf + 4, CRC_GET_DIGEST(crc)); + SetUi32(buf + 4, CRC_GET_DIGEST(crc)) - SetUi32(buf + 8 + 4, (UInt32)(globalPos >> 2)); + SetUi32(buf + 8 + 4, (UInt32)(globalPos >> 2)) buf[8 + 8] = (Byte)(flags >> 8); buf[8 + 9] = (Byte)(flags & 0xFF); - SetUi32(buf + 8, CrcCalc(buf + 8 + 4, 6)); + SetUi32(buf + 8, CrcCalc(buf + 8 + 4, 6)) buf[8 + 10] = XZ_FOOTER_SIG_0; buf[8 + 11] = XZ_FOOTER_SIG_1; @@ -230,7 +230,7 @@ static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, I typedef struct { ISeqInStream vt; - ISeqInStream *realStream; + ISeqInStreamPtr realStream; const Byte *data; UInt64 limit; UInt64 processed; @@ -251,9 +251,9 @@ static void SeqCheckInStream_GetDigest(CSeqCheckInStream *p, Byte *digest) XzCheck_Final(&p->check, digest); } -static SRes SeqCheckInStream_Read(const ISeqInStream *pp, void *data, size_t *size) +static SRes SeqCheckInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size) { - CSeqCheckInStream *p = CONTAINER_FROM_VTBL(pp, CSeqCheckInStream, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqCheckInStream) size_t size2 = *size; SRes res = SZ_OK; @@ -285,15 +285,15 @@ static SRes SeqCheckInStream_Read(const ISeqInStream *pp, void *data, size_t *si typedef struct { ISeqOutStream vt; - ISeqOutStream *realStream; + ISeqOutStreamPtr realStream; Byte *outBuf; size_t outBufLimit; UInt64 processed; } CSeqSizeOutStream; -static size_t SeqSizeOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size) +static size_t SeqSizeOutStream_Write(ISeqOutStreamPtr pp, const void *data, size_t size) { - CSeqSizeOutStream *p = CONTAINER_FROM_VTBL(pp, CSeqSizeOutStream, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqSizeOutStream) if (p->realStream) size = ISeqOutStream_Write(p->realStream, data, size); else @@ -313,8 +313,8 @@ static size_t SeqSizeOutStream_Write(const ISeqOutStream *pp, const void *data, typedef struct { - ISeqInStream p; - ISeqInStream *realStream; + ISeqInStream vt; + ISeqInStreamPtr realStream; IStateCoder StateCoder; Byte *buf; size_t curPos; @@ -323,7 +323,39 @@ typedef struct } CSeqInFilter; -SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc); +static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Enc[] = +{ + Z7_BRANCH_CONV_ENC(PPC), + Z7_BRANCH_CONV_ENC(IA64), + Z7_BRANCH_CONV_ENC(ARM), + Z7_BRANCH_CONV_ENC(ARMT), + Z7_BRANCH_CONV_ENC(SPARC), + Z7_BRANCH_CONV_ENC(ARM64) +}; + +static SizeT XzBcFilterStateBase_Filter_Enc(CXzBcFilterStateBase *p, Byte *data, SizeT size) +{ + switch (p->methodId) + { + case XZ_ID_Delta: + Delta_Encode(p->delta_State, p->delta, data, size); + break; + case XZ_ID_X86: + size = (SizeT)(z7_BranchConvSt_X86_Enc(data, size, p->ip, &p->X86_State) - data); + break; + default: + if (p->methodId >= XZ_ID_PPC) + { + const UInt32 i = p->methodId - XZ_ID_PPC; + if (i < Z7_ARRAY_SIZE(g_Funcs_BranchConv_RISC_Enc)) + size = (SizeT)(g_Funcs_BranchConv_RISC_Enc[i](data, size, p->ip) - data); + } + break; + } + p->ip += (UInt32)size; + return size; +} + static SRes SeqInFilter_Init(CSeqInFilter *p, const CXzFilter *props, ISzAllocPtr alloc) { @@ -335,17 +367,17 @@ static SRes SeqInFilter_Init(CSeqInFilter *p, const CXzFilter *props, ISzAllocPt } p->curPos = p->endPos = 0; p->srcWasFinished = 0; - RINOK(BraState_SetFromMethod(&p->StateCoder, props->id, 1, alloc)); - RINOK(p->StateCoder.SetProps(p->StateCoder.p, props->props, props->propsSize, alloc)); + RINOK(Xz_StateCoder_Bc_SetFromMethod_Func(&p->StateCoder, props->id, XzBcFilterStateBase_Filter_Enc, alloc)) + RINOK(p->StateCoder.SetProps(p->StateCoder.p, props->props, props->propsSize, alloc)) p->StateCoder.Init(p->StateCoder.p); return SZ_OK; } -static SRes SeqInFilter_Read(const ISeqInStream *pp, void *data, size_t *size) +static SRes SeqInFilter_Read(ISeqInStreamPtr pp, void *data, size_t *size) { - CSeqInFilter *p = CONTAINER_FROM_VTBL(pp, CSeqInFilter, p); - size_t sizeOriginal = *size; + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqInFilter) + const size_t sizeOriginal = *size; if (sizeOriginal == 0) return SZ_OK; *size = 0; @@ -356,7 +388,7 @@ static SRes SeqInFilter_Read(const ISeqInStream *pp, void *data, size_t *size) { p->curPos = 0; p->endPos = FILTER_BUF_SIZE; - RINOK(ISeqInStream_Read(p->realStream, p->buf, &p->endPos)); + RINOK(ISeqInStream_Read(p->realStream, p->buf, &p->endPos)) if (p->endPos == 0) p->srcWasFinished = 1; } @@ -381,7 +413,7 @@ static void SeqInFilter_Construct(CSeqInFilter *p) { p->buf = NULL; p->StateCoder.p = NULL; - p->p.Read = SeqInFilter_Read; + p->vt.Read = SeqInFilter_Read; } static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc) @@ -406,13 +438,13 @@ static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc) typedef struct { ISeqInStream vt; - ISeqInStream *inStream; + ISeqInStreamPtr inStream; CSbEnc enc; } CSbEncInStream; -static SRes SbEncInStream_Read(const ISeqInStream *pp, void *data, size_t *size) +static SRes SbEncInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size) { - CSbEncInStream *p = CONTAINER_FROM_VTBL(pp, CSbEncInStream, vt); + CSbEncInStream *p = Z7_CONTAINER_FROM_VTBL(pp, CSbEncInStream, vt); size_t sizeOriginal = *size; if (sizeOriginal == 0) return SZ_OK; @@ -422,7 +454,7 @@ static SRes SbEncInStream_Read(const ISeqInStream *pp, void *data, size_t *size) if (p->enc.needRead && !p->enc.readWasFinished) { size_t processed = p->enc.needReadSizeMax; - RINOK(p->inStream->Read(p->inStream, p->enc.buf + p->enc.readPos, &processed)); + RINOK(p->inStream->Read(p->inStream, p->enc.buf + p->enc.readPos, &processed)) p->enc.readPos += processed; if (processed == 0) { @@ -433,7 +465,7 @@ static SRes SbEncInStream_Read(const ISeqInStream *pp, void *data, size_t *size) } *size = sizeOriginal; - RINOK(SbEnc_Read(&p->enc, data, size)); + RINOK(SbEnc_Read(&p->enc, data, size)) if (*size != 0 || !p->enc.needRead) return SZ_OK; } @@ -473,7 +505,7 @@ void XzFilterProps_Init(CXzFilterProps *p) void XzProps_Init(CXzProps *p) { p->checkId = XZ_CHECK_CRC32; - p->blockSize = XZ_PROPS__BLOCK_SIZE__AUTO; + p->blockSize = XZ_PROPS_BLOCK_SIZE_AUTO; p->numBlockThreads_Reduced = -1; p->numBlockThreads_Max = -1; p->numTotalThreads = -1; @@ -502,8 +534,8 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p) t2 = p->numBlockThreads_Max; t3 = p->numTotalThreads; - if (t2 > MTCODER__THREADS_MAX) - t2 = MTCODER__THREADS_MAX; + if (t2 > MTCODER_THREADS_MAX) + t2 = MTCODER_THREADS_MAX; if (t3 <= 0) { @@ -519,8 +551,8 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p) t1 = 1; t2 = t3; } - if (t2 > MTCODER__THREADS_MAX) - t2 = MTCODER__THREADS_MAX; + if (t2 > MTCODER_THREADS_MAX) + t2 = MTCODER_THREADS_MAX; } else if (t1 <= 0) { @@ -571,7 +603,7 @@ static void XzProps_Normalize(CXzProps *p) /* we normalize xzProps properties, but we normalize only some of CXzProps::lzma2Props properties. Lzma2Enc_SetProps() will normalize lzma2Props later. */ - if (p->blockSize == XZ_PROPS__BLOCK_SIZE__SOLID) + if (p->blockSize == XZ_PROPS_BLOCK_SIZE_SOLID) { p->lzma2Props.lzmaProps.reduceSize = p->reduceSize; p->numBlockThreads_Reduced = 1; @@ -583,15 +615,15 @@ static void XzProps_Normalize(CXzProps *p) else { CLzma2EncProps *lzma2 = &p->lzma2Props; - if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO) + if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO) { // xz-auto p->lzma2Props.lzmaProps.reduceSize = p->reduceSize; - if (lzma2->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID) + if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID) { // if (xz-auto && lzma2-solid) - we use solid for both - p->blockSize = XZ_PROPS__BLOCK_SIZE__SOLID; + p->blockSize = XZ_PROPS_BLOCK_SIZE_SOLID; p->numBlockThreads_Reduced = 1; p->numBlockThreads_Max = 1; if (p->lzma2Props.numTotalThreads <= 0) @@ -610,9 +642,9 @@ static void XzProps_Normalize(CXzProps *p) p->blockSize = tp.blockSize; // fixed or solid p->numBlockThreads_Reduced = tp.numBlockThreads_Reduced; p->numBlockThreads_Max = tp.numBlockThreads_Max; - if (lzma2->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO) - lzma2->blockSize = tp.blockSize; // fixed or solid, LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID - if (lzma2->lzmaProps.reduceSize > tp.blockSize && tp.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID) + if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO) + lzma2->blockSize = tp.blockSize; // fixed or solid, LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID + if (lzma2->lzmaProps.reduceSize > tp.blockSize && tp.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID) lzma2->lzmaProps.reduceSize = tp.blockSize; lzma2->numBlockThreads_Reduced = 1; lzma2->numBlockThreads_Max = 1; @@ -631,9 +663,9 @@ static void XzProps_Normalize(CXzProps *p) r = p->blockSize; lzma2->lzmaProps.reduceSize = r; } - if (lzma2->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO) - lzma2->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID; - else if (lzma2->blockSize > p->blockSize && lzma2->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID) + if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO) + lzma2->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID; + else if (lzma2->blockSize > p->blockSize && lzma2->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID) lzma2->blockSize = p->blockSize; XzEncProps_Normalize_Fixed(p); @@ -704,17 +736,17 @@ typedef struct static SRes Xz_CompressBlock( CLzma2WithFilters *lzmaf, - ISeqOutStream *outStream, + ISeqOutStreamPtr outStream, Byte *outBufHeader, Byte *outBufData, size_t outBufDataLimit, - ISeqInStream *inStream, + ISeqInStreamPtr inStream, // UInt64 expectedSize, const Byte *inBuf, // used if (!inStream) size_t inBufSize, // used if (!inStream), it's block size, props->blockSize is ignored const CXzProps *props, - ICompressProgress *progress, + ICompressProgressPtr progress, int *inStreamFinished, /* only for inStream version */ CXzEncBlockInfo *blockSizes, ISzAllocPtr alloc, @@ -731,12 +763,12 @@ static SRes Xz_CompressBlock( *inStreamFinished = False; - RINOK(Lzma2WithFilters_Create(lzmaf, alloc, allocBig)); + RINOK(Lzma2WithFilters_Create(lzmaf, alloc, allocBig)) - RINOK(Lzma2Enc_SetProps(lzmaf->lzma2, &props->lzma2Props)); + RINOK(Lzma2Enc_SetProps(lzmaf->lzma2, &props->lzma2Props)) - XzBlock_ClearFlags(&block); - XzBlock_SetNumFilters(&block, 1 + (fp ? 1 : 0)); + // XzBlock_ClearFlags(&block) + XzBlock_ClearFlags_SetNumFilters(&block, 1 + (fp ? 1 : 0)) if (fp) { @@ -752,7 +784,7 @@ static SRes Xz_CompressBlock( else if (fp->ipDefined) { Byte *ptr = filter->props; - SetUi32(ptr, fp->ip); + SetUi32(ptr, fp->ip) filter->propsSize = 4; } } @@ -777,13 +809,13 @@ static SRes Xz_CompressBlock( if (props->blockSize != (UInt64)(Int64)-1) if (expectedSize > props->blockSize) block.unpackSize = props->blockSize; - XzBlock_SetHasUnpackSize(&block); + XzBlock_SetHasUnpackSize(&block) } */ if (outStream) { - RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt)); + RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt)) } checkInStream.vt.Read = SeqCheckInStream_Read; @@ -801,13 +833,13 @@ static SRes Xz_CompressBlock( if (fp->id == XZ_ID_Subblock) { lzmaf->sb.inStream = &checkInStream.vt; - RINOK(SbEncInStream_Init(&lzmaf->sb)); + RINOK(SbEncInStream_Init(&lzmaf->sb)) } else #endif { lzmaf->filter.realStream = &checkInStream.vt; - RINOK(SeqInFilter_Init(&lzmaf->filter, filter, alloc)); + RINOK(SeqInFilter_Init(&lzmaf->filter, filter, alloc)) } } @@ -841,7 +873,7 @@ static SRes Xz_CompressBlock( #ifdef USE_SUBBLOCK (fp->id == XZ_ID_Subblock) ? &lzmaf->sb.vt: #endif - &lzmaf->filter.p) : + &lzmaf->filter.vt) : &checkInStream.vt) : NULL, useStream ? NULL : inBuf, @@ -852,7 +884,7 @@ static SRes Xz_CompressBlock( if (outBuf) seqSizeOutStream.processed += outSize; - RINOK(res); + RINOK(res) blockSizes->unpackSize = checkInStream.processed; } { @@ -866,7 +898,7 @@ static SRes Xz_CompressBlock( buf[3] = 0; SeqCheckInStream_GetDigest(&checkInStream, buf + 4); - RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId))); + RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId))) blockSizes->totalSize = seqSizeOutStream.processed - padSize; @@ -877,12 +909,12 @@ static SRes Xz_CompressBlock( seqSizeOutStream.processed = 0; block.unpackSize = blockSizes->unpackSize; - XzBlock_SetHasUnpackSize(&block); + XzBlock_SetHasUnpackSize(&block) block.packSize = packSize; - XzBlock_SetHasPackSize(&block); + XzBlock_SetHasPackSize(&block) - RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt)); + RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt)) blockSizes->headerSize = (size_t)seqSizeOutStream.processed; blockSizes->totalSize += seqSizeOutStream.processed; @@ -906,15 +938,15 @@ static SRes Xz_CompressBlock( typedef struct { ICompressProgress vt; - ICompressProgress *progress; + ICompressProgressPtr progress; UInt64 inOffset; UInt64 outOffset; } CCompressProgress_XzEncOffset; -static SRes CompressProgress_XzEncOffset_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize) +static SRes CompressProgress_XzEncOffset_Progress(ICompressProgressPtr pp, UInt64 inSize, UInt64 outSize) { - const CCompressProgress_XzEncOffset *p = CONTAINER_FROM_VTBL(pp, CCompressProgress_XzEncOffset, vt); + const CCompressProgress_XzEncOffset *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CCompressProgress_XzEncOffset, vt); inSize += p->inOffset; outSize += p->outOffset; return ICompressProgress_Progress(p->progress, inSize, outSize); @@ -923,7 +955,7 @@ static SRes CompressProgress_XzEncOffset_Progress(const ICompressProgress *pp, U -typedef struct +struct CXzEnc { ISzAllocPtr alloc; ISzAllocPtr allocBig; @@ -933,20 +965,19 @@ typedef struct CXzEncIndex xzIndex; - CLzma2WithFilters lzmaf_Items[MTCODER__THREADS_MAX]; + CLzma2WithFilters lzmaf_Items[MTCODER_THREADS_MAX]; size_t outBufSize; /* size of allocated outBufs[i] */ - Byte *outBufs[MTCODER__BLOCKS_MAX]; + Byte *outBufs[MTCODER_BLOCKS_MAX]; - #ifndef _7ZIP_ST + #ifndef Z7_ST unsigned checkType; - ISeqOutStream *outStream; + ISeqOutStreamPtr outStream; BoolInt mtCoder_WasConstructed; CMtCoder mtCoder; - CXzEncBlockInfo EncBlocks[MTCODER__BLOCKS_MAX]; + CXzEncBlockInfo EncBlocks[MTCODER_BLOCKS_MAX]; #endif - -} CXzEnc; +}; static void XzEnc_Construct(CXzEnc *p) @@ -955,13 +986,13 @@ static void XzEnc_Construct(CXzEnc *p) XzEncIndex_Construct(&p->xzIndex); - for (i = 0; i < MTCODER__THREADS_MAX; i++) + for (i = 0; i < MTCODER_THREADS_MAX; i++) Lzma2WithFilters_Construct(&p->lzmaf_Items[i]); - #ifndef _7ZIP_ST + #ifndef Z7_ST p->mtCoder_WasConstructed = False; { - for (i = 0; i < MTCODER__BLOCKS_MAX; i++) + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) p->outBufs[i] = NULL; p->outBufSize = 0; } @@ -972,7 +1003,7 @@ static void XzEnc_Construct(CXzEnc *p) static void XzEnc_FreeOutBufs(CXzEnc *p) { unsigned i; - for (i = 0; i < MTCODER__BLOCKS_MAX; i++) + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) if (p->outBufs[i]) { ISzAlloc_Free(p->alloc, p->outBufs[i]); @@ -988,10 +1019,10 @@ static void XzEnc_Free(CXzEnc *p, ISzAllocPtr alloc) XzEncIndex_Free(&p->xzIndex, alloc); - for (i = 0; i < MTCODER__THREADS_MAX; i++) + for (i = 0; i < MTCODER_THREADS_MAX; i++) Lzma2WithFilters_Free(&p->lzmaf_Items[i], alloc); - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtCoder_WasConstructed) { MtCoder_Destruct(&p->mtCoder); @@ -1013,37 +1044,38 @@ CXzEncHandle XzEnc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig) p->expectedDataSize = (UInt64)(Int64)-1; p->alloc = alloc; p->allocBig = allocBig; - return p; + return (CXzEncHandle)p; } +// #define GET_CXzEnc_p CXzEnc *p = (CXzEnc *)(void *)pp; -void XzEnc_Destroy(CXzEncHandle pp) +void XzEnc_Destroy(CXzEncHandle p) { - CXzEnc *p = (CXzEnc *)pp; + // GET_CXzEnc_p XzEnc_Free(p, p->alloc); ISzAlloc_Free(p->alloc, p); } -SRes XzEnc_SetProps(CXzEncHandle pp, const CXzProps *props) +SRes XzEnc_SetProps(CXzEncHandle p, const CXzProps *props) { - CXzEnc *p = (CXzEnc *)pp; + // GET_CXzEnc_p p->xzProps = *props; XzProps_Normalize(&p->xzProps); return SZ_OK; } -void XzEnc_SetDataSize(CXzEncHandle pp, UInt64 expectedDataSiize) +void XzEnc_SetDataSize(CXzEncHandle p, UInt64 expectedDataSiize) { - CXzEnc *p = (CXzEnc *)pp; + // GET_CXzEnc_p p->expectedDataSize = expectedDataSiize; } -#ifndef _7ZIP_ST +#ifndef Z7_ST static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex, const Byte *src, size_t srcSize, int finished) @@ -1073,7 +1105,7 @@ static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBuf MtProgressThunk_CreateVTable(&progressThunk); progressThunk.mtProgress = &me->mtCoder.mtProgress; - MtProgressThunk_Init(&progressThunk); + MtProgressThunk_INIT(&progressThunk) { CXzEncBlockInfo blockSizes; @@ -1112,11 +1144,11 @@ static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex) const CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex]; const Byte *data = me->outBufs[outBufIndex]; - RINOK(WriteBytes(me->outStream, data, bInfo->headerSize)); + RINOK(WriteBytes(me->outStream, data, bInfo->headerSize)) { UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize); - RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize)); + RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize)) } return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc); @@ -1126,9 +1158,9 @@ static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex) -SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress) +SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress) { - CXzEnc *p = (CXzEnc *)pp; + // GET_CXzEnc_p const CXzProps *props = &p->xzProps; @@ -1137,7 +1169,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr UInt64 numBlocks = 1; UInt64 blockSize = props->blockSize; - if (blockSize != XZ_PROPS__BLOCK_SIZE__SOLID + if (blockSize != XZ_PROPS_BLOCK_SIZE_SOLID && props->reduceSize != (UInt64)(Int64)-1) { numBlocks = props->reduceSize / blockSize; @@ -1147,13 +1179,13 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr else blockSize = (UInt64)1 << 62; - RINOK(XzEncIndex_PreAlloc(&p->xzIndex, numBlocks, blockSize, XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(blockSize), p->alloc)); + RINOK(XzEncIndex_PreAlloc(&p->xzIndex, numBlocks, blockSize, XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(blockSize), p->alloc)) } - RINOK(Xz_WriteHeader((CXzStreamFlags)props->checkId, outStream)); + RINOK(Xz_WriteHeader((CXzStreamFlags)props->checkId, outStream)) - #ifndef _7ZIP_ST + #ifndef Z7_ST if (props->numBlockThreads_Reduced > 1) { IMtCoderCallback2 vt; @@ -1180,8 +1212,8 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr p->mtCoder.mtCallback = &vt; p->mtCoder.mtCallbackObject = p; - if ( props->blockSize == XZ_PROPS__BLOCK_SIZE__SOLID - || props->blockSize == XZ_PROPS__BLOCK_SIZE__AUTO) + if ( props->blockSize == XZ_PROPS_BLOCK_SIZE_SOLID + || props->blockSize == XZ_PROPS_BLOCK_SIZE_AUTO) return SZ_ERROR_FAIL; p->mtCoder.blockSize = (size_t)props->blockSize; @@ -1200,7 +1232,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max; p->mtCoder.expectedDataSize = p->expectedDataSize; - RINOK(MtCoder_Code(&p->mtCoder)); + RINOK(MtCoder_Code(&p->mtCoder)) } else #endif @@ -1217,7 +1249,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr writeStartSizes = 0; - if (props->blockSize != XZ_PROPS__BLOCK_SIZE__SOLID) + if (props->blockSize != XZ_PROPS_BLOCK_SIZE_SOLID) { writeStartSizes = (props->forceWriteSizesInHeader > 0); @@ -1274,18 +1306,18 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr &inStreamFinished, &blockSizes, p->alloc, - p->allocBig)); + p->allocBig)) { UInt64 totalPackFull = blockSizes.totalSize + XZ_GET_PAD_SIZE(blockSizes.totalSize); if (writeStartSizes) { - RINOK(WriteBytes(outStream, p->outBufs[0], blockSizes.headerSize)); - RINOK(WriteBytes(outStream, bufData, (size_t)totalPackFull - blockSizes.headerSize)); + RINOK(WriteBytes(outStream, p->outBufs[0], blockSizes.headerSize)) + RINOK(WriteBytes(outStream, bufData, (size_t)totalPackFull - blockSizes.headerSize)) } - RINOK(XzEncIndex_AddIndexRecord(&p->xzIndex, blockSizes.unpackSize, blockSizes.totalSize, p->alloc)); + RINOK(XzEncIndex_AddIndexRecord(&p->xzIndex, blockSizes.unpackSize, blockSizes.totalSize, p->alloc)) progress2.inOffset += blockSizes.unpackSize; progress2.outOffset += totalPackFull; @@ -1302,8 +1334,8 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr #include "Alloc.h" -SRes Xz_Encode(ISeqOutStream *outStream, ISeqInStream *inStream, - const CXzProps *props, ICompressProgress *progress) +SRes Xz_Encode(ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, + const CXzProps *props, ICompressProgressPtr progress) { SRes res; CXzEncHandle xz = XzEnc_Create(&g_Alloc, &g_BigAlloc); @@ -1317,7 +1349,7 @@ SRes Xz_Encode(ISeqOutStream *outStream, ISeqInStream *inStream, } -SRes Xz_EncodeEmpty(ISeqOutStream *outStream) +SRes Xz_EncodeEmpty(ISeqOutStreamPtr outStream) { SRes res; CXzEncIndex xzIndex; diff --git a/3rdparty/7z/src/XzEnc.h b/3rdparty/7z/src/XzEnc.h index 529ac3fd8c..31026f7e09 100644 --- a/3rdparty/7z/src/XzEnc.h +++ b/3rdparty/7z/src/XzEnc.h @@ -1,8 +1,8 @@ /* XzEnc.h -- Xz Encode -2017-06-27 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ -#ifndef __XZ_ENC_H -#define __XZ_ENC_H +#ifndef ZIP7_INC_XZ_ENC_H +#define ZIP7_INC_XZ_ENC_H #include "Lzma2Enc.h" @@ -11,8 +11,8 @@ EXTERN_C_BEGIN -#define XZ_PROPS__BLOCK_SIZE__AUTO LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO -#define XZ_PROPS__BLOCK_SIZE__SOLID LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID +#define XZ_PROPS_BLOCK_SIZE_AUTO LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO +#define XZ_PROPS_BLOCK_SIZE_SOLID LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID typedef struct @@ -41,19 +41,20 @@ typedef struct void XzProps_Init(CXzProps *p); - -typedef void * CXzEncHandle; +typedef struct CXzEnc CXzEnc; +typedef CXzEnc * CXzEncHandle; +// Z7_DECLARE_HANDLE(CXzEncHandle) CXzEncHandle XzEnc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig); void XzEnc_Destroy(CXzEncHandle p); SRes XzEnc_SetProps(CXzEncHandle p, const CXzProps *props); void XzEnc_SetDataSize(CXzEncHandle p, UInt64 expectedDataSiize); -SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress); +SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress); -SRes Xz_Encode(ISeqOutStream *outStream, ISeqInStream *inStream, - const CXzProps *props, ICompressProgress *progress); +SRes Xz_Encode(ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, + const CXzProps *props, ICompressProgressPtr progress); -SRes Xz_EncodeEmpty(ISeqOutStream *outStream); +SRes Xz_EncodeEmpty(ISeqOutStreamPtr outStream); EXTERN_C_END diff --git a/3rdparty/7z/src/XzIn.c b/3rdparty/7z/src/XzIn.c index 07201b8425..afe9904332 100644 --- a/3rdparty/7z/src/XzIn.c +++ b/3rdparty/7z/src/XzIn.c @@ -1,5 +1,5 @@ /* XzIn.c - Xz input -2021-09-04 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -15,11 +15,13 @@ #define XZ_FOOTER_SIG_CHECK(p) ((p)[0] == XZ_FOOTER_SIG_0 && (p)[1] == XZ_FOOTER_SIG_1) -SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream) +SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream) { Byte sig[XZ_STREAM_HEADER_SIZE]; - RINOK(SeqInStream_Read2(inStream, sig, XZ_STREAM_HEADER_SIZE, SZ_ERROR_NO_ARCHIVE)); - if (memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0) + size_t processedSize = XZ_STREAM_HEADER_SIZE; + RINOK(SeqInStream_ReadMax(inStream, sig, &processedSize)) + if (processedSize != XZ_STREAM_HEADER_SIZE + || memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0) return SZ_ERROR_NO_ARCHIVE; return Xz_ParseHeader(p, sig); } @@ -29,12 +31,12 @@ SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream) if (s == 0) return SZ_ERROR_ARCHIVE; \ pos += s; } -SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes) +SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes) { Byte header[XZ_BLOCK_HEADER_SIZE_MAX]; unsigned headerSize; *headerSizeRes = 0; - RINOK(SeqInStream_ReadByte(inStream, &header[0])); + RINOK(SeqInStream_ReadByte(inStream, &header[0])) headerSize = (unsigned)header[0]; if (headerSize == 0) { @@ -46,7 +48,12 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, U *isIndex = False; headerSize = (headerSize << 2) + 4; *headerSizeRes = headerSize; - RINOK(SeqInStream_Read(inStream, header + 1, headerSize - 1)); + { + size_t processedSize = headerSize - 1; + RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize)) + if (processedSize != headerSize - 1) + return SZ_ERROR_INPUT_EOF; + } return XzBlock_Parse(p, header); } @@ -58,7 +65,9 @@ UInt64 Xz_GetUnpackSize(const CXzStream *p) UInt64 size = 0; size_t i; for (i = 0; i < p->numBlocks; i++) - ADD_SIZE_CHECK(size, p->blocks[i].unpackSize); + { + ADD_SIZE_CHECK(size, p->blocks[i].unpackSize) + } return size; } @@ -67,12 +76,14 @@ UInt64 Xz_GetPackSize(const CXzStream *p) UInt64 size = 0; size_t i; for (i = 0; i < p->numBlocks; i++) - ADD_SIZE_CHECK(size, (p->blocks[i].totalSize + 3) & ~(UInt64)3); + { + ADD_SIZE_CHECK(size, (p->blocks[i].totalSize + 3) & ~(UInt64)3) + } return size; } /* -SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStream *inStream) +SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStreamPtr inStream) { return SeqInStream_Read(inStream, p->check, XzFlags_GetCheckSize(f)); } @@ -93,7 +104,7 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt { UInt64 numBlocks64; - READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64); + READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64) numBlocks = (size_t)numBlocks64; if (numBlocks != numBlocks64 || numBlocks * 2 > size) return SZ_ERROR_ARCHIVE; @@ -110,8 +121,8 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt for (i = 0; i < numBlocks; i++) { CXzBlockSizes *block = &p->blocks[i]; - READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize); - READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize); + READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize) + READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize) if (block->totalSize == 0) return SZ_ERROR_ARCHIVE; } @@ -122,7 +133,7 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; } -static SRes Xz_ReadIndex(CXzStream *p, ILookInStream *stream, UInt64 indexSize, ISzAllocPtr alloc) +static SRes Xz_ReadIndex(CXzStream *p, ILookInStreamPtr stream, UInt64 indexSize, ISzAllocPtr alloc) { SRes res; size_t size; @@ -142,14 +153,14 @@ static SRes Xz_ReadIndex(CXzStream *p, ILookInStream *stream, UInt64 indexSize, return res; } -static SRes LookInStream_SeekRead_ForArc(ILookInStream *stream, UInt64 offset, void *buf, size_t size) +static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, void *buf, size_t size) { - RINOK(LookInStream_SeekTo(stream, offset)); + RINOK(LookInStream_SeekTo(stream, offset)) return LookInStream_Read(stream, buf, size); /* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */ } -static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOffset, ISzAllocPtr alloc) +static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startOffset, ISzAllocPtr alloc) { UInt64 indexSize; Byte buf[XZ_STREAM_FOOTER_SIZE]; @@ -159,7 +170,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff return SZ_ERROR_NO_ARCHIVE; pos -= XZ_STREAM_FOOTER_SIZE; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)); + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)) if (!XZ_FOOTER_SIG_CHECK(buf + 10)) { @@ -174,7 +185,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff i = (pos > TEMP_BUF_SIZE) ? TEMP_BUF_SIZE : (size_t)pos; pos -= i; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i)); + RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i)) total += (UInt32)i; for (; i != 0; i--) if (temp[i - 1] != 0) @@ -193,7 +204,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff if (pos < XZ_STREAM_FOOTER_SIZE) return SZ_ERROR_NO_ARCHIVE; pos -= XZ_STREAM_FOOTER_SIZE; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)); + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)) if (!XZ_FOOTER_SIG_CHECK(buf + 10)) return SZ_ERROR_NO_ARCHIVE; } @@ -217,8 +228,8 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff return SZ_ERROR_ARCHIVE; pos -= indexSize; - RINOK(LookInStream_SeekTo(stream, pos)); - RINOK(Xz_ReadIndex(p, stream, indexSize, alloc)); + RINOK(LookInStream_SeekTo(stream, pos)) + RINOK(Xz_ReadIndex(p, stream, indexSize, alloc)) { UInt64 totalSize = Xz_GetPackSize(p); @@ -227,7 +238,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff || pos < totalSize + XZ_STREAM_HEADER_SIZE) return SZ_ERROR_ARCHIVE; pos -= (totalSize + XZ_STREAM_HEADER_SIZE); - RINOK(LookInStream_SeekTo(stream, pos)); + RINOK(LookInStream_SeekTo(stream, pos)) *startOffset = (Int64)pos; } { @@ -236,7 +247,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff SecToRead_CreateVTable(&secToRead); secToRead.realStream = stream; - RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt)); + RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt)) return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE; } } @@ -274,7 +285,9 @@ UInt64 Xzs_GetUnpackSize(const CXzs *p) UInt64 size = 0; size_t i; for (i = 0; i < p->num; i++) - ADD_SIZE_CHECK(size, Xz_GetUnpackSize(&p->streams[i])); + { + ADD_SIZE_CHECK(size, Xz_GetUnpackSize(&p->streams[i])) + } return size; } @@ -284,15 +297,17 @@ UInt64 Xzs_GetPackSize(const CXzs *p) UInt64 size = 0; size_t i; for (i = 0; i < p->num; i++) - ADD_SIZE_CHECK(size, Xz_GetTotalSize(&p->streams[i])); + { + ADD_SIZE_CHECK(size, Xz_GetTotalSize(&p->streams[i])) + } return size; } */ -SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompressProgress *progress, ISzAllocPtr alloc) +SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr stream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc) { Int64 endOffset = 0; - RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END)); + RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END)) *startOffset = endOffset; for (;;) { @@ -301,7 +316,7 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr Xz_Construct(&st); res = Xz_ReadBackward(&st, stream, startOffset, alloc); st.startOffset = (UInt64)*startOffset; - RINOK(res); + RINOK(res) if (p->num == p->numAllocated) { const size_t newNum = p->num + p->num / 4 + 1; @@ -317,7 +332,7 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr p->streams[p->num++] = st; if (*startOffset == 0) break; - RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)); + RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)) if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK) return SZ_ERROR_PROGRESS; }