| /* Xz.h - Xz interface |
| 2023-04-13 : Igor Pavlov : Public domain */ |
| |
| #ifndef ZIP7_INC_XZ_H |
| #define ZIP7_INC_XZ_H |
| |
| #include "Sha256.h" |
| #include "Delta.h" |
| |
| EXTERN_C_BEGIN |
| |
| #define XZ_ID_Subblock 1 |
| #define XZ_ID_Delta 3 |
| #define XZ_ID_X86 4 |
| #define XZ_ID_PPC 5 |
| #define XZ_ID_IA64 6 |
| #define XZ_ID_ARM 7 |
| #define XZ_ID_ARMT 8 |
| #define XZ_ID_SPARC 9 |
| #define XZ_ID_ARM64 0xa |
| #define XZ_ID_LZMA2 0x21 |
| |
| unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value); |
| unsigned Xz_WriteVarInt(Byte *buf, UInt64 v); |
| |
| /* ---------- xz block ---------- */ |
| |
| #define XZ_BLOCK_HEADER_SIZE_MAX 1024 |
| |
| #define XZ_NUM_FILTERS_MAX 4 |
| #define XZ_BF_NUM_FILTERS_MASK 3 |
| #define XZ_BF_PACK_SIZE (1 << 6) |
| #define XZ_BF_UNPACK_SIZE (1 << 7) |
| |
| #define XZ_FILTER_PROPS_SIZE_MAX 20 |
| |
| typedef struct |
| { |
| UInt64 id; |
| UInt32 propsSize; |
| Byte props[XZ_FILTER_PROPS_SIZE_MAX]; |
| } CXzFilter; |
| |
| typedef struct |
| { |
| UInt64 packSize; |
| UInt64 unpackSize; |
| Byte flags; |
| CXzFilter filters[XZ_NUM_FILTERS_MAX]; |
| } CXzBlock; |
| |
| #define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1) |
| #define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0) |
| #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0) |
| #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0) |
| |
| SRes XzBlock_Parse(CXzBlock *p, const Byte *header); |
| SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes); |
| |
| /* ---------- xz stream ---------- */ |
| |
| #define XZ_SIG_SIZE 6 |
| #define XZ_FOOTER_SIG_SIZE 2 |
| |
| extern const Byte XZ_SIG[XZ_SIG_SIZE]; |
| |
| /* |
| extern const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE]; |
| */ |
| |
| #define XZ_FOOTER_SIG_0 'Y' |
| #define XZ_FOOTER_SIG_1 'Z' |
| |
| #define XZ_STREAM_FLAGS_SIZE 2 |
| #define XZ_STREAM_CRC_SIZE 4 |
| |
| #define XZ_STREAM_HEADER_SIZE (XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE) |
| #define XZ_STREAM_FOOTER_SIZE (XZ_FOOTER_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE + 4) |
| |
| #define XZ_CHECK_MASK 0xF |
| #define XZ_CHECK_NO 0 |
| #define XZ_CHECK_CRC32 1 |
| #define XZ_CHECK_CRC64 4 |
| #define XZ_CHECK_SHA256 10 |
| |
| typedef struct |
| { |
| unsigned mode; |
| UInt32 crc; |
| UInt64 crc64; |
| CSha256 sha; |
| } CXzCheck; |
| |
| void XzCheck_Init(CXzCheck *p, unsigned mode); |
| void XzCheck_Update(CXzCheck *p, const void *data, size_t size); |
| int XzCheck_Final(CXzCheck *p, Byte *digest); |
| |
| typedef UInt16 CXzStreamFlags; |
| |
| #define XzFlags_IsSupported(f) ((f) <= XZ_CHECK_MASK) |
| #define XzFlags_GetCheckType(f) ((f) & XZ_CHECK_MASK) |
| #define XzFlags_HasDataCrc32(f) (Xz_GetCheckType(f) == XZ_CHECK_CRC32) |
| unsigned XzFlags_GetCheckSize(CXzStreamFlags f); |
| |
| SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf); |
| SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream); |
| |
| typedef struct |
| { |
| UInt64 unpackSize; |
| UInt64 totalSize; |
| } CXzBlockSizes; |
| |
| typedef struct |
| { |
| CXzStreamFlags flags; |
| // Byte _pad[6]; |
| size_t numBlocks; |
| CXzBlockSizes *blocks; |
| UInt64 startOffset; |
| } CXzStream; |
| |
| void Xz_Construct(CXzStream *p); |
| void Xz_Free(CXzStream *p, ISzAllocPtr alloc); |
| |
| #define XZ_SIZE_OVERFLOW ((UInt64)(Int64)-1) |
| |
| UInt64 Xz_GetUnpackSize(const CXzStream *p); |
| UInt64 Xz_GetPackSize(const CXzStream *p); |
| |
| typedef struct |
| { |
| size_t num; |
| size_t numAllocated; |
| CXzStream *streams; |
| } CXzs; |
| |
| void Xzs_Construct(CXzs *p); |
| void Xzs_Free(CXzs *p, ISzAllocPtr alloc); |
| SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc); |
| |
| UInt64 Xzs_GetNumBlocks(const CXzs *p); |
| UInt64 Xzs_GetUnpackSize(const CXzs *p); |
| |
| |
| // ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder |
| |
| typedef enum |
| { |
| CODER_STATUS_NOT_SPECIFIED, /* use main error code instead */ |
| CODER_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ |
| CODER_STATUS_NOT_FINISHED, /* stream was not finished */ |
| CODER_STATUS_NEEDS_MORE_INPUT /* you must provide more input bytes */ |
| } ECoderStatus; |
| |
| |
| // ECoderFinishMode values are identical to ELzmaFinishMode |
| |
| typedef enum |
| { |
| CODER_FINISH_ANY, /* finish at any point */ |
| CODER_FINISH_END /* block must be finished at the end */ |
| } ECoderFinishMode; |
| |
| |
| typedef struct |
| { |
| void *p; // state object; |
| void (*Free)(void *p, ISzAllocPtr alloc); |
| SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc); |
| void (*Init)(void *p); |
| SRes (*Code2)(void *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, |
| int srcWasFinished, ECoderFinishMode finishMode, |
| // int *wasFinished, |
| ECoderStatus *status); |
| SizeT (*Filter)(void *p, Byte *data, SizeT size); |
| } IStateCoder; |
| |
| |
| typedef struct |
| { |
| UInt32 methodId; |
| UInt32 delta; |
| UInt32 ip; |
| UInt32 X86_State; |
| Byte delta_State[DELTA_STATE_SIZE]; |
| } CXzBcFilterStateBase; |
| |
| typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size); |
| |
| SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, |
| Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc); |
| |
| |
| #define MIXCODER_NUM_FILTERS_MAX 4 |
| |
| typedef struct |
| { |
| ISzAllocPtr alloc; |
| Byte *buf; |
| unsigned numCoders; |
| |
| Byte *outBuf; |
| size_t outBufSize; |
| size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode) |
| BoolInt wasFinished; |
| SRes res; |
| ECoderStatus status; |
| // BoolInt SingleBufMode; |
| |
| int finished[MIXCODER_NUM_FILTERS_MAX - 1]; |
| size_t pos[MIXCODER_NUM_FILTERS_MAX - 1]; |
| size_t size[MIXCODER_NUM_FILTERS_MAX - 1]; |
| UInt64 ids[MIXCODER_NUM_FILTERS_MAX]; |
| SRes results[MIXCODER_NUM_FILTERS_MAX]; |
| IStateCoder coders[MIXCODER_NUM_FILTERS_MAX]; |
| } CMixCoder; |
| |
| |
| typedef enum |
| { |
| XZ_STATE_STREAM_HEADER, |
| XZ_STATE_STREAM_INDEX, |
| XZ_STATE_STREAM_INDEX_CRC, |
| XZ_STATE_STREAM_FOOTER, |
| XZ_STATE_STREAM_PADDING, |
| XZ_STATE_BLOCK_HEADER, |
| XZ_STATE_BLOCK, |
| XZ_STATE_BLOCK_FOOTER |
| } EXzState; |
| |
| |
| typedef struct |
| { |
| EXzState state; |
| UInt32 pos; |
| unsigned alignPos; |
| unsigned indexPreSize; |
| |
| CXzStreamFlags streamFlags; |
| |
| UInt32 blockHeaderSize; |
| UInt64 packSize; |
| UInt64 unpackSize; |
| |
| UInt64 numBlocks; // number of finished blocks in current stream |
| UInt64 indexSize; |
| UInt64 indexPos; |
| UInt64 padSize; |
| |
| UInt64 numStartedStreams; |
| UInt64 numFinishedStreams; |
| UInt64 numTotalBlocks; |
| |
| UInt32 crc; |
| CMixCoder decoder; |
| CXzBlock block; |
| CXzCheck check; |
| CSha256 sha; |
| |
| BoolInt parseMode; |
| BoolInt headerParsedOk; |
| BoolInt decodeToStreamSignature; |
| unsigned decodeOnlyOneBlock; |
| |
| Byte *outBuf; |
| size_t outBufSize; |
| size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked |
| |
| Byte shaDigest[SHA256_DIGEST_SIZE]; |
| Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; |
| } CXzUnpacker; |
| |
| /* alloc : aligned for cache line allocation is better */ |
| void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc); |
| void XzUnpacker_Init(CXzUnpacker *p); |
| void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize); |
| void XzUnpacker_Free(CXzUnpacker *p); |
| |
| /* |
| XzUnpacker |
| The sequence for decoding functions: |
| { |
| XzUnpacker_Construct() |
| [Decoding_Calls] |
| XzUnpacker_Free() |
| } |
| |
| [Decoding_Calls] |
| |
| There are 3 types of interfaces for [Decoding_Calls] calls: |
| |
| Interface-1 : Partial output buffers: |
| { |
| XzUnpacker_Init() |
| for() |
| { |
| XzUnpacker_Code(); |
| } |
| XzUnpacker_IsStreamWasFinished() |
| } |
| |
| Interface-2 : Direct output buffer: |
| Use it, if you know exact size of decoded data, and you need |
| whole xz unpacked data in one output buffer. |
| xz unpacker doesn't allocate additional buffer for lzma2 dictionary in that mode. |
| { |
| XzUnpacker_Init() |
| XzUnpacker_SetOutBufMode(); // to set output buffer and size |
| for() |
| { |
| XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code() |
| } |
| XzUnpacker_IsStreamWasFinished() |
| } |
| |
| Interface-3 : Direct output buffer : One call full decoding |
| It unpacks whole input buffer to output buffer in one call. |
| It uses Interface-2 internally. |
| { |
| XzUnpacker_CodeFull() |
| XzUnpacker_IsStreamWasFinished() |
| } |
| */ |
| |
| /* |
| finishMode: |
| It has meaning only if the decoding reaches output limit (*destLen). |
| CODER_FINISH_ANY - use smallest number of input bytes |
| CODER_FINISH_END - read EndOfStream marker after decoding |
| |
| Returns: |
| SZ_OK |
| status: |
| CODER_STATUS_NOT_FINISHED, |
| CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases: |
| 1) it needs more input data to finish current xz stream |
| 2) xz stream was finished successfully. But the decoder supports multiple |
| concatented xz streams. So it expects more input data for new xz streams. |
| Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully. |
| |
| SZ_ERROR_MEM - Memory allocation error |
| SZ_ERROR_DATA - Data error |
| SZ_ERROR_UNSUPPORTED - Unsupported method or method properties |
| SZ_ERROR_CRC - CRC error |
| // SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). |
| |
| SZ_ERROR_NO_ARCHIVE - the error with xz Stream Header with one of the following reasons: |
| - xz Stream Signature failure |
| - CRC32 of xz Stream Header is failed |
| - The size of Stream padding is not multiple of four bytes. |
| It's possible to get that error, if xz stream was finished and the stream |
| contains some another data. In that case you can call XzUnpacker_GetExtraSize() |
| function to get real size of xz stream. |
| */ |
| |
| |
| SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, |
| const Byte *src, SizeT *srcLen, int srcFinished, |
| ECoderFinishMode finishMode, ECoderStatus *status); |
| |
| SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen, |
| const Byte *src, SizeT *srcLen, |
| ECoderFinishMode finishMode, ECoderStatus *status); |
| |
| /* |
| If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished() |
| after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code(). |
| */ |
| |
| BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p); |
| |
| /* |
| XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes, |
| if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state. |
| These bytes can be some data after xz archive, or |
| it can be start of new xz stream. |
| |
| Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of |
| xz stream in two cases, if XzUnpacker_Code() returns: |
| res == SZ_OK && status == CODER_STATUS_NEEDS_MORE_INPUT |
| res == SZ_ERROR_NO_ARCHIVE |
| */ |
| |
| UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p); |
| |
| |
| /* |
| for random block decoding: |
| XzUnpacker_Init(); |
| set CXzUnpacker::streamFlags |
| XzUnpacker_PrepareToRandomBlockDecoding() |
| loop |
| { |
| XzUnpacker_Code() |
| XzUnpacker_IsBlockFinished() |
| } |
| */ |
| |
| void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p); |
| BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p); |
| |
| #define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags)) |
| |
| |
| |
| |
| |
| |
| /* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */ |
| |
| /* |
| if (CXzDecMtProps::numThreads > 1), the decoder can try to use |
| Multi-Threading. The decoder analyses xz block header, and if |
| there are pack size and unpack size values stored in xz block header, |
| the decoder reads compressed data of block to internal buffers, |
| and then it can start parallel decoding, if there are another blocks. |
| The decoder can switch back to Single-Thread decoding after some conditions. |
| |
| The sequence of calls for xz decoding with in/out Streams: |
| { |
| XzDecMt_Create() |
| XzDecMtProps_Init(XzDecMtProps) to set default values of properties |
| // then you can change some XzDecMtProps parameters with required values |
| // here you can set the number of threads and (memUseMax) - the maximum |
| Memory usage for multithreading decoding. |
| for() |
| { |
| XzDecMt_Decode() // one call per one file |
| } |
| XzDecMt_Destroy() |
| } |
| */ |
| |
| |
| typedef struct |
| { |
| size_t inBufSize_ST; // size of input buffer for Single-Thread decoding |
| size_t outStep_ST; // size of output buffer for Single-Thread decoding |
| BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data. |
| |
| #ifndef Z7_ST |
| unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding |
| size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created |
| size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding. |
| // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer. |
| #endif |
| } CXzDecMtProps; |
| |
| void XzDecMtProps_Init(CXzDecMtProps *p); |
| |
| typedef struct CXzDecMt CXzDecMt; |
| typedef CXzDecMt * CXzDecMtHandle; |
| // Z7_DECLARE_HANDLE(CXzDecMtHandle) |
| |
| /* |
| alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc). |
| allocMid : for big allocations, aligned allocation is better |
| */ |
| |
| CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid); |
| void XzDecMt_Destroy(CXzDecMtHandle p); |
| |
| |
| typedef struct |
| { |
| Byte UnpackSize_Defined; |
| Byte NumStreams_Defined; |
| Byte NumBlocks_Defined; |
| |
| Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream. |
| Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data |
| |
| UInt64 InSize; // pack size processed. That value doesn't include the data after |
| // end of xz stream, if that data was not correct |
| UInt64 OutSize; |
| |
| UInt64 NumStreams; |
| UInt64 NumBlocks; |
| |
| SRes DecodeRes; // the error code of xz streams data decoding |
| SRes ReadRes; // error code from ISeqInStream:Read() |
| SRes ProgressRes; // error code from ICompressProgress:Progress() |
| |
| SRes CombinedRes; // Combined result error code that shows main rusult |
| // = S_OK, if there is no error. |
| // but check also (DataAfterEnd) that can show additional minor errors. |
| |
| SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream |
| // = SZ_ERROR_PROGRESS, if error from ICompressProgress |
| // = SZ_ERROR_WRITE, if error from ISeqOutStream |
| // = SZ_ERROR_* codes for decoding |
| } CXzStatInfo; |
| |
| void XzStatInfo_Clear(CXzStatInfo *p); |
| |
| /* |
| |
| XzDecMt_Decode() |
| SRes: it's combined decoding result. It also is equal to stat->CombinedRes. |
| |
| SZ_OK - no error |
| check also output value in (stat->DataAfterEnd) |
| that can show additional possible error |
| |
| SZ_ERROR_MEM - Memory allocation error |
| SZ_ERROR_NO_ARCHIVE - is not xz archive |
| SZ_ERROR_ARCHIVE - Headers error |
| SZ_ERROR_DATA - Data Error |
| SZ_ERROR_UNSUPPORTED - Unsupported method or method properties |
| SZ_ERROR_CRC - CRC Error |
| SZ_ERROR_INPUT_EOF - it needs more input data |
| SZ_ERROR_WRITE - ISeqOutStream error |
| (SZ_ERROR_READ) - ISeqInStream errors |
| (SZ_ERROR_PROGRESS) - ICompressProgress errors |
| // SZ_ERROR_THREAD - error in multi-threading functions |
| MY_SRes_HRESULT_FROM_WRes(WRes_error) - error in multi-threading function |
| */ |
| |
| SRes XzDecMt_Decode(CXzDecMtHandle p, |
| const CXzDecMtProps *props, |
| const UInt64 *outDataSize, // NULL means undefined |
| int finishMode, // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished |
| ISeqOutStreamPtr outStream, |
| // Byte *outBuf, size_t *outBufSize, |
| ISeqInStreamPtr inStream, |
| // const Byte *inData, size_t inDataSize, |
| CXzStatInfo *stat, // out: decoding results and statistics |
| int *isMT, // out: 0 means that ST (Single-Thread) version was used |
| // 1 means that MT (Multi-Thread) version was used |
| ICompressProgressPtr progress); |
| |
| EXTERN_C_END |
| |
| #endif |