Fix buffer size calculation when decompressing LZ4, Zlib and Bzip2 compressed chunks.

Slight SSE optimization in LZ4HC.
This commit is contained in:
Moinak Ghosh 2012-08-03 23:19:38 +05:30
parent 636ab4a3d8
commit f9215b53fb
5 changed files with 30 additions and 7 deletions

View file

@ -206,7 +206,7 @@ bzip2_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
}
/* normal termination */
*dstlen = _dstlen;
*dstlen = *dstlen - _dstlen;
BZ2_bzDecompressEnd(&bzs);
return (0);
}

View file

@ -102,6 +102,9 @@
//**************************************
#include <stdlib.h> // calloc, free
#include <string.h> // memset, memcpy
#ifdef __USE_SSE_INTRIN__
#include <emmintrin.h>
#endif
#include "lz4hc.h"
#define ALLOCATOR(s) calloc(1,s)
@ -358,6 +361,17 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
const BYTE* reft = ref+MINMATCH;
const BYTE* ipt = ip+MINMATCH;
#ifdef __USE_SSE_INTRIN__
while (ipt<matchlimit-15) {
int mask;
__m128i span1 = _mm_loadu_si128((__m128i *)(reft));
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
if (!mask) { ipt+=16; reft+=16; continue; }
ipt += __builtin_ctz(mask);
goto _endCount;
}
#endif
while (ipt<matchlimit-(STEPSIZE-1))
{
UARCH diff = AARCH(reft) ^ AARCH(ipt);
@ -402,6 +416,17 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
const BYTE* ipt = ip+MINMATCH;
const BYTE* startt = ip;
#ifdef __USE_SSE_INTRIN__
while (ipt<matchlimit-15) {
int mask;
__m128i span1 = _mm_loadu_si128((__m128i *)(reft));
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
if (!mask) { ipt+=16; reft+=16; continue; }
ipt += __builtin_ctz(mask);
goto _endCount;
}
#endif
while (ipt<matchlimit-(STEPSIZE-1))
{
UARCH diff = AARCH(reft) ^ AARCH(ipt);

View file

@ -85,7 +85,6 @@ lz4_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
if (lzdat->level == 1) {
rv = LZ4_compress(src, dst, _srclen);
} else if (lzdat->level == 2) {
rv = LZ4_compress(src, dst, _srclen);
if (rv == 0) {
@ -127,15 +126,14 @@ lz4_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
sz1 = ntohl(*((int *)src));
rv = LZ4_uncompress(src + sizeof (int), dst, sz1);
if (rv == 0) {
if (rv != sz1) {
return (-1);
}
memcpy(src, dst, sz1);
rv = LZ4_uncompress(src, dst, _dstlen);
}
if (rv == 0) {
if (rv != srclen) {
return (-1);
}
*dstlen = rv;
return (0);
}

2
main.c
View file

@ -149,7 +149,7 @@ show_compression_stats(uint64_t chunksize)
/*
* This routine is called in multiple threads. Calls the decompression handler
* as encoded in the file header. For adaptive mode the handler adapt_decompress()
* in turns looks at the chunk header and call the actualy decompression
* in turns looks at the chunk header and calls the actual decompression
* routine.
*/
static void *

View file

@ -225,7 +225,7 @@ zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
}
}
*dstlen = _dstlen;
*dstlen = *dstlen - _dstlen;
inflateEnd(&zs);
return (0);
}