Fix buffer size calculation when decompressing LZ4, Zlib and Bzip2 compressed chunks.

Slight SSE optimization in LZ4HC.
This commit is contained in:
Moinak Ghosh 2012-08-03 23:19:38 +05:30
parent 636ab4a3d8
commit f9215b53fb
5 changed files with 30 additions and 7 deletions

View file

@ -206,7 +206,7 @@ bzip2_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
} }
/* normal termination */ /* normal termination */
*dstlen = _dstlen; *dstlen = *dstlen - _dstlen;
BZ2_bzDecompressEnd(&bzs); BZ2_bzDecompressEnd(&bzs);
return (0); return (0);
} }

View file

@ -102,6 +102,9 @@
//************************************** //**************************************
#include <stdlib.h> // calloc, free #include <stdlib.h> // calloc, free
#include <string.h> // memset, memcpy #include <string.h> // memset, memcpy
#ifdef __USE_SSE_INTRIN__
#include <emmintrin.h>
#endif
#include "lz4hc.h" #include "lz4hc.h"
#define ALLOCATOR(s) calloc(1,s) #define ALLOCATOR(s) calloc(1,s)
@ -358,6 +361,17 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
const BYTE* reft = ref+MINMATCH; const BYTE* reft = ref+MINMATCH;
const BYTE* ipt = ip+MINMATCH; const BYTE* ipt = ip+MINMATCH;
#ifdef __USE_SSE_INTRIN__
while (ipt<matchlimit-15) {
int mask;
__m128i span1 = _mm_loadu_si128((__m128i *)(reft));
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
if (!mask) { ipt+=16; reft+=16; continue; }
ipt += __builtin_ctz(mask);
goto _endCount;
}
#endif
while (ipt<matchlimit-(STEPSIZE-1)) while (ipt<matchlimit-(STEPSIZE-1))
{ {
UARCH diff = AARCH(reft) ^ AARCH(ipt); UARCH diff = AARCH(reft) ^ AARCH(ipt);
@ -402,6 +416,17 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
const BYTE* ipt = ip+MINMATCH; const BYTE* ipt = ip+MINMATCH;
const BYTE* startt = ip; const BYTE* startt = ip;
#ifdef __USE_SSE_INTRIN__
while (ipt<matchlimit-15) {
int mask;
__m128i span1 = _mm_loadu_si128((__m128i *)(reft));
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
if (!mask) { ipt+=16; reft+=16; continue; }
ipt += __builtin_ctz(mask);
goto _endCount;
}
#endif
while (ipt<matchlimit-(STEPSIZE-1)) while (ipt<matchlimit-(STEPSIZE-1))
{ {
UARCH diff = AARCH(reft) ^ AARCH(ipt); UARCH diff = AARCH(reft) ^ AARCH(ipt);

View file

@ -85,7 +85,6 @@ lz4_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
if (lzdat->level == 1) { if (lzdat->level == 1) {
rv = LZ4_compress(src, dst, _srclen); rv = LZ4_compress(src, dst, _srclen);
} else if (lzdat->level == 2) { } else if (lzdat->level == 2) {
rv = LZ4_compress(src, dst, _srclen); rv = LZ4_compress(src, dst, _srclen);
if (rv == 0) { if (rv == 0) {
@ -127,15 +126,14 @@ lz4_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
sz1 = ntohl(*((int *)src)); sz1 = ntohl(*((int *)src));
rv = LZ4_uncompress(src + sizeof (int), dst, sz1); rv = LZ4_uncompress(src + sizeof (int), dst, sz1);
if (rv == 0) { if (rv != sz1) {
return (-1); return (-1);
} }
memcpy(src, dst, sz1); memcpy(src, dst, sz1);
rv = LZ4_uncompress(src, dst, _dstlen); rv = LZ4_uncompress(src, dst, _dstlen);
} }
if (rv == 0) { if (rv != srclen) {
return (-1); return (-1);
} }
*dstlen = rv;
return (0); return (0);
} }

2
main.c
View file

@ -149,7 +149,7 @@ show_compression_stats(uint64_t chunksize)
/* /*
* This routine is called in multiple threads. Calls the decompression handler * This routine is called in multiple threads. Calls the decompression handler
* as encoded in the file header. For adaptive mode the handler adapt_decompress() * as encoded in the file header. For adaptive mode the handler adapt_decompress()
* in turns looks at the chunk header and call the actualy decompression * in turns looks at the chunk header and calls the actual decompression
* routine. * routine.
*/ */
static void * static void *

View file

@ -225,7 +225,7 @@ zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
} }
} }
*dstlen = _dstlen; *dstlen = *dstlen - _dstlen;
inflateEnd(&zs); inflateEnd(&zs);
return (0); return (0);
} }