Fix buffer size calculation when decompressing LZ4, Zlib and Bzip2 compressed chunks.
Slight SSE optimization in LZ4HC.
This commit is contained in:
parent
636ab4a3d8
commit
f9215b53fb
5 changed files with 30 additions and 7 deletions
|
@ -206,7 +206,7 @@ bzip2_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
|||
}
|
||||
|
||||
/* normal termination */
|
||||
*dstlen = _dstlen;
|
||||
*dstlen = *dstlen - _dstlen;
|
||||
BZ2_bzDecompressEnd(&bzs);
|
||||
return (0);
|
||||
}
|
||||
|
|
25
lz4/lz4hc.c
25
lz4/lz4hc.c
|
@ -102,6 +102,9 @@
|
|||
//**************************************
|
||||
#include <stdlib.h> // calloc, free
|
||||
#include <string.h> // memset, memcpy
|
||||
#ifdef __USE_SSE_INTRIN__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include "lz4hc.h"
|
||||
|
||||
#define ALLOCATOR(s) calloc(1,s)
|
||||
|
@ -358,6 +361,17 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
|
|||
const BYTE* reft = ref+MINMATCH;
|
||||
const BYTE* ipt = ip+MINMATCH;
|
||||
|
||||
#ifdef __USE_SSE_INTRIN__
|
||||
while (ipt<matchlimit-15) {
|
||||
int mask;
|
||||
__m128i span1 = _mm_loadu_si128((__m128i *)(reft));
|
||||
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
|
||||
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
|
||||
if (!mask) { ipt+=16; reft+=16; continue; }
|
||||
ipt += __builtin_ctz(mask);
|
||||
goto _endCount;
|
||||
}
|
||||
#endif
|
||||
while (ipt<matchlimit-(STEPSIZE-1))
|
||||
{
|
||||
UARCH diff = AARCH(reft) ^ AARCH(ipt);
|
||||
|
@ -402,6 +416,17 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
|
|||
const BYTE* ipt = ip+MINMATCH;
|
||||
const BYTE* startt = ip;
|
||||
|
||||
#ifdef __USE_SSE_INTRIN__
|
||||
while (ipt<matchlimit-15) {
|
||||
int mask;
|
||||
__m128i span1 = _mm_loadu_si128((__m128i *)(reft));
|
||||
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
|
||||
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
|
||||
if (!mask) { ipt+=16; reft+=16; continue; }
|
||||
ipt += __builtin_ctz(mask);
|
||||
goto _endCount;
|
||||
}
|
||||
#endif
|
||||
while (ipt<matchlimit-(STEPSIZE-1))
|
||||
{
|
||||
UARCH diff = AARCH(reft) ^ AARCH(ipt);
|
||||
|
|
|
@ -85,7 +85,6 @@ lz4_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
|||
|
||||
if (lzdat->level == 1) {
|
||||
rv = LZ4_compress(src, dst, _srclen);
|
||||
|
||||
} else if (lzdat->level == 2) {
|
||||
rv = LZ4_compress(src, dst, _srclen);
|
||||
if (rv == 0) {
|
||||
|
@ -127,15 +126,14 @@ lz4_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
|||
|
||||
sz1 = ntohl(*((int *)src));
|
||||
rv = LZ4_uncompress(src + sizeof (int), dst, sz1);
|
||||
if (rv == 0) {
|
||||
if (rv != sz1) {
|
||||
return (-1);
|
||||
}
|
||||
memcpy(src, dst, sz1);
|
||||
rv = LZ4_uncompress(src, dst, _dstlen);
|
||||
}
|
||||
if (rv == 0) {
|
||||
if (rv != srclen) {
|
||||
return (-1);
|
||||
}
|
||||
*dstlen = rv;
|
||||
return (0);
|
||||
}
|
||||
|
|
2
main.c
2
main.c
|
@ -149,7 +149,7 @@ show_compression_stats(uint64_t chunksize)
|
|||
/*
|
||||
* This routine is called in multiple threads. Calls the decompression handler
|
||||
* as encoded in the file header. For adaptive mode the handler adapt_decompress()
|
||||
* in turns looks at the chunk header and call the actualy decompression
|
||||
* in turns looks at the chunk header and calls the actual decompression
|
||||
* routine.
|
||||
*/
|
||||
static void *
|
||||
|
|
|
@ -225,7 +225,7 @@ zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
|||
}
|
||||
}
|
||||
|
||||
*dstlen = _dstlen;
|
||||
*dstlen = *dstlen - _dstlen;
|
||||
inflateEnd(&zs);
|
||||
return (0);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue