Update to latest LZ4.

Update a couple of comments.
This commit is contained in:
Moinak Ghosh 2012-12-11 11:38:42 +05:30
parent 224fb529e9
commit 03840b31c5
5 changed files with 564 additions and 565 deletions

869
lz4/lz4.c

File diff suppressed because it is too large Load diff

120
lz4/lz4.h
View file

@ -31,28 +31,6 @@
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- LZ4 source repository : http://code.google.com/p/lz4/ - LZ4 source repository : http://code.google.com/p/lz4/
*/ */
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
*/
#pragma once #pragma once
#if defined (__cplusplus) #if defined (__cplusplus)
@ -60,6 +38,14 @@ extern "C" {
#endif #endif
//**************************************
// Compiler Options
//**************************************
#ifdef _MSC_VER // Visual Studio
# define inline __inline // Visual is not C99, but supports some kind of inline
#endif
//**************************** //****************************
// Simple Functions // Simple Functions
//**************************** //****************************
@ -69,19 +55,22 @@ int LZ4_uncompress (const char* source, char* dest, int osize);
/* /*
LZ4_compress() : LZ4_compress() :
isize : is the input size. Max supported value is ~1.9GB Compresses 'isize' bytes from 'source' into 'dest'.
return : the number of bytes written in buffer dest Destination buffer must be already allocated,
or 0 if the compression fails (if LZ4_COMPRESSMIN is set) and must be sized to handle worst cases situations (input data not compressible)
note : destination buffer must be already allocated. Worst case size evaluation is provided by function LZ4_compressBound()
destination buffer must be sized to handle worst cases situations (input data not compressible)
worst case size evaluation is provided by function LZ4_compressBound() isize : is the input size. Max supported value is ~1.9GB
return : the number of bytes written in buffer dest
LZ4_uncompress() : LZ4_uncompress() :
osize : is the output size, therefore the original size osize : is the output size, therefore the original size
return : the number of bytes read in the source buffer return : the number of bytes read in the source buffer
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
This function never writes beyond dest + osize, and is therefore protected against malicious data packets This function never writes outside of provided buffers, and never modifies input buffer.
note : destination buffer must be already allocated note : destination buffer must be already allocated.
its size must be a minimum of 'osize' bytes.
*/ */
@ -89,16 +78,34 @@ LZ4_uncompress() :
// Advanced Functions // Advanced Functions
//**************************** //****************************
int LZ4_compressBound(int isize); static inline int LZ4_compressBound(int isize) { return ((isize) + ((isize)/255) + 16); }
#define LZ4_COMPRESSBOUND( isize) ((isize) + ((isize)/255) + 16)
/* /*
LZ4_compressBound() : LZ4_compressBound() :
Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible) Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
primarily useful for memory allocation of output buffer. primarily useful for memory allocation of output buffer.
inline function is recommended for the general case,
but macro is also provided when results need to be evaluated at compile time (such as table size allocation).
isize : is the input size. Max supported value is ~1.9GB isize : is the input size. Max supported value is ~1.9GB
return : maximum output size in a "worst case" scenario return : maximum output size in a "worst case" scenario
note : this function is limited by "int" range (2^31-1) note : this function is limited by "int" range (2^31-1)
*/
int LZ4_compress_limitedOutput (const char* source, char* dest, int isize, int maxOutputSize);
/*
LZ4_compress_limitedOutput() :
Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
If it cannot achieve it, compression will stop, and result of the function will be zero.
This function never writes outside of provided output buffer.
isize : is the input size. Max supported value is ~1.9GB
maxOutputSize : is the size of the destination buffer (which must be already allocated)
return : the number of bytes written in buffer 'dest'
or 0 if the compression fails
*/ */
@ -106,34 +113,13 @@ int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize,
/* /*
LZ4_uncompress_unknownOutputSize() : LZ4_uncompress_unknownOutputSize() :
isize : is the input size, therefore the compressed size isize : is the input size, therefore the compressed size
maxOutputSize : is the size of the destination buffer (which must be already allocated) maxOutputSize : is the size of the destination buffer (which must be already allocated)
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
This function never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets This function never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
note : Destination buffer must be already allocated. note : Destination buffer must be already allocated.
This version is slightly slower than LZ4_uncompress() This version is slightly slower than LZ4_uncompress()
*/
int LZ4_compressCtx(void** ctx, const char* source, char* dest, int isize);
int LZ4_compress64kCtx(void** ctx, const char* source, char* dest, int isize);
/*
LZ4_compressCtx() :
This function explicitly handles the CTX memory structure.
It avoids allocating/deallocating memory between each call, improving performance when malloc is heavily invoked.
This function is only useful when memory is allocated into the heap (HASH_LOG value beyond STACK_LIMIT)
Performance difference will be noticeable only when repetitively calling the compression function over many small segments.
Note : by default, memory is allocated into the stack, therefore "malloc" is not invoked.
LZ4_compress64kCtx() :
Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
isize *Must* be <64KB, otherwise the output will be corrupted.
On first call : provide a *ctx=NULL; It will be automatically allocated.
On next calls : reuse the same ctx pointer.
Use different pointers for different threads when doing multi-threading.
*/ */

View file

@ -31,26 +31,6 @@
- LZ4 source repository : http://code.google.com/p/lz4/ - LZ4 source repository : http://code.google.com/p/lz4/
*/ */
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
*/
//************************************** //**************************************
// CPU Feature Detection // CPU Feature Detection
@ -88,12 +68,20 @@
#ifdef _MSC_VER #ifdef _MSC_VER
#define inline __forceinline // Visual is not C99, but supports some kind of inline #define inline __forceinline // Visual is not C99, but supports some kind of inline
#include <intrin.h> // For Visual 2005
# if LZ4_ARCH64 // 64-bit
# pragma intrinsic(_BitScanForward64) // For Visual 2005
# pragma intrinsic(_BitScanReverse64) // For Visual 2005
# else
# pragma intrinsic(_BitScanForward) // For Visual 2005
# pragma intrinsic(_BitScanReverse) // For Visual 2005
# endif
#endif #endif
#ifdef _MSC_VER // Visual Studio #ifdef _MSC_VER // Visual Studio
#define bswap16(x) _byteswap_ushort(x) #define lz4_bswap16(x) _byteswap_ushort(x)
#else #else
#define bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
#endif #endif
@ -197,8 +185,8 @@ typedef struct _U64_S { U64 v; } U64_S;
#endif #endif
#if defined(LZ4_BIG_ENDIAN) #if defined(LZ4_BIG_ENDIAN)
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = bswap16(v); d = (s) - v; } #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = bswap16(v); A16(p) = v; p+=2; } #define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
#else // Little Endian #else // Little Endian
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } #define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
@ -352,7 +340,7 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
// HC4 match finder // HC4 match finder
LZ4HC_Insert(hc4, ip); LZ4HC_Insert(hc4, ip);
ref = HASH_POINTER(ip); ref = HASH_POINTER(ip);
while ((ref > (ip-MAX_DISTANCE)) && (nbAttempts)) while ((ref >= (ip-MAX_DISTANCE)) && (nbAttempts))
{ {
nbAttempts--; nbAttempts--;
if (*(ref+ml) == *(ip+ml)) if (*(ref+ml) == *(ip+ml))
@ -362,15 +350,15 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
const BYTE* ipt = ip+MINMATCH; const BYTE* ipt = ip+MINMATCH;
#ifdef __USE_SSE_INTRIN__ #ifdef __USE_SSE_INTRIN__
while (ipt<matchlimit-15) { while (ipt<matchlimit-15) {
int mask; int mask;
__m128i span1 = _mm_loadu_si128((__m128i *)(reft)); __m128i span1 = _mm_loadu_si128((__m128i *)(reft));
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt)); __m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff; mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
if (!mask) { ipt+=16; reft+=16; continue; } if (!mask) { ipt+=16; reft+=16; continue; }
ipt += __builtin_ctz(mask); ipt += __builtin_ctz(mask);
goto _endCount; goto _endCount;
} }
#endif #endif
while (ipt<matchlimit-(STEPSIZE-1)) while (ipt<matchlimit-(STEPSIZE-1))
{ {
@ -384,7 +372,7 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
if ((ipt<matchlimit) && (*reft == *ipt)) ipt++; if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
_endCount: _endCount:
if (ipt-ip > ml) { ml = ipt-ip; *matchpos = ref; } if (ipt-ip > ml) { ml = (int)(ipt-ip); *matchpos = ref; }
} }
ref = GETNEXT(ref); ref = GETNEXT(ref);
} }
@ -400,13 +388,13 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
INITBASE(base,hc4->base); INITBASE(base,hc4->base);
const BYTE* ref; const BYTE* ref;
int nbAttempts = MAX_NB_ATTEMPTS; int nbAttempts = MAX_NB_ATTEMPTS;
int delta = ip-startLimit; int delta = (int)(ip-startLimit);
// First Match // First Match
LZ4HC_Insert(hc4, ip); LZ4HC_Insert(hc4, ip);
ref = HASH_POINTER(ip); ref = HASH_POINTER(ip);
while ((ref > ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts)) while ((ref >= ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts))
{ {
nbAttempts--; nbAttempts--;
if (*(startLimit + longest) == *(ref - delta + longest)) if (*(startLimit + longest) == *(ref - delta + longest))
@ -417,15 +405,15 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
const BYTE* startt = ip; const BYTE* startt = ip;
#ifdef __USE_SSE_INTRIN__ #ifdef __USE_SSE_INTRIN__
while (ipt<matchlimit-15) { while (ipt<matchlimit-15) {
int mask; int mask;
__m128i span1 = _mm_loadu_si128((__m128i *)(reft)); __m128i span1 = _mm_loadu_si128((__m128i *)(reft));
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt)); __m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff; mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
if (!mask) { ipt+=16; reft+=16; continue; } if (!mask) { ipt+=16; reft+=16; continue; }
ipt += __builtin_ctz(mask); ipt += __builtin_ctz(mask);
goto _endCount; goto _endCount;
} }
#endif #endif
while (ipt<matchlimit-(STEPSIZE-1)) while (ipt<matchlimit-(STEPSIZE-1))
{ {
@ -444,7 +432,7 @@ _endCount:
if ((ipt-startt) > longest) if ((ipt-startt) > longest)
{ {
longest = ipt-startt; longest = (int)(ipt-startt);
*matchpos = reft; *matchpos = reft;
*startpos = startt; *startpos = startt;
} }
@ -462,7 +450,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
BYTE* token; BYTE* token;
// Encode Literal length // Encode Literal length
length = *ip - *anchor; length = (int)(*ip - *anchor);
token = (*op)++; token = (*op)++;
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
else *token = (length<<ML_BITS); else *token = (length<<ML_BITS);
@ -471,7 +459,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
LZ4_BLINDCOPY(*anchor, *op, length); LZ4_BLINDCOPY(*anchor, *op, length);
// Encode Offset // Encode Offset
LZ4_WRITE_LITTLEENDIAN_16(*op,*ip-ref); LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
// Encode MatchLength // Encode MatchLength
len = (int)(ml-MINMATCH); len = (int)(ml-MINMATCH);
@ -564,8 +552,8 @@ _Search3:
int correction; int correction;
int new_ml = ml; int new_ml = ml;
if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = start2 - ip + ml2 - MINMATCH; if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
correction = new_ml - (start2 - ip); correction = new_ml - (int)(start2 - ip);
if (correction > 0) if (correction > 0)
{ {
start2 += correction; start2 += correction;
@ -588,8 +576,8 @@ _Search3:
{ {
int correction; int correction;
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
if (ip+ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH; if (ip+ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
correction = ml - (start2 - ip); correction = ml - (int)(start2 - ip);
if (correction > 0) if (correction > 0)
{ {
start2 += correction; start2 += correction;
@ -599,7 +587,7 @@ _Search3:
} }
else else
{ {
ml = start2 - ip; ml = (int)(start2 - ip);
} }
} }
// Now, encode 2 sequences // Now, encode 2 sequences
@ -615,7 +603,7 @@ _Search3:
{ {
if (start2 < ip+ml) if (start2 < ip+ml)
{ {
int correction = (ip+ml) - start2; int correction = (int)(ip+ml - start2);
start2 += correction; start2 += correction;
ref2 += correction; ref2 += correction;
ml2 -= correction; ml2 -= correction;
@ -652,8 +640,8 @@ _Search3:
{ {
int correction; int correction;
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
if (ip + ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH; if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
correction = ml - (start2 - ip); correction = ml - (int)(start2 - ip);
if (correction > 0) if (correction > 0)
{ {
start2 += correction; start2 += correction;
@ -663,7 +651,7 @@ _Search3:
} }
else else
{ {
ml = start2 - ip; ml = (int)(start2 - ip);
} }
} }
LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
@ -682,7 +670,7 @@ _Search3:
// Encode Last Literals // Encode Last Literals
{ {
int lastRun = iend - anchor; int lastRun = (int)(iend - anchor);
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
else *op++ = (lastRun<<ML_BITS); else *op++ = (lastRun<<ML_BITS);
memcpy(op, anchor, iend - anchor); memcpy(op, anchor, iend - anchor);

View file

@ -31,28 +31,6 @@
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- LZ4 source repository : http://code.google.com/p/lz4/ - LZ4 source repository : http://code.google.com/p/lz4/
*/ */
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
*/
#pragma once #pragma once

12
main.c
View file

@ -208,6 +208,7 @@ preproc_compress(compress_func_ptr cmp_func, void *src, uint64_t srclen, void *d
} else { } else {
/* /*
* Execution won't come here but just in case ... * Execution won't come here but just in case ...
* Even Delta2 encoding below enables LZP.
*/ */
fprintf(stderr, "Invalid preprocessing mode\n"); fprintf(stderr, "Invalid preprocessing mode\n");
return (-1); return (-1);
@ -541,11 +542,12 @@ cont:
* *
* Chunk Flags, 8 bits: * Chunk Flags, 8 bits:
* I I I I I I I I * I I I I I I I I
* | | | | | * | | | | | |
* | '-----' | `- 0 - Uncompressed * | '-----' | | `- 0 - Uncompressed
* | | | 1 - Compressed * | | | | 1 - Compressed
* | | | * | | | |
* | | `---- 1 - Chunk was Deduped * | | | `---- 1 - Chunk was Deduped
* | | `------- 1 - Chunk was pre-compressed
* | | * | |
* | | 1 - Bzip2 (Adaptive Mode) * | | 1 - Bzip2 (Adaptive Mode)
* | `---------------- 2 - Lzma (Adaptive Mode) * | `---------------- 2 - Lzma (Adaptive Mode)