Fix buffer size calculation when decompressing LZ4, Zlib and Bzip2 compressed chunks.
Slight SSE optimization in LZ4HC.
This commit is contained in:
parent
636ab4a3d8
commit
f9215b53fb
5 changed files with 30 additions and 7 deletions
|
@ -206,7 +206,7 @@ bzip2_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* normal termination */
|
/* normal termination */
|
||||||
*dstlen = _dstlen;
|
*dstlen = *dstlen - _dstlen;
|
||||||
BZ2_bzDecompressEnd(&bzs);
|
BZ2_bzDecompressEnd(&bzs);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
25
lz4/lz4hc.c
25
lz4/lz4hc.c
|
@ -102,6 +102,9 @@
|
||||||
//**************************************
|
//**************************************
|
||||||
#include <stdlib.h> // calloc, free
|
#include <stdlib.h> // calloc, free
|
||||||
#include <string.h> // memset, memcpy
|
#include <string.h> // memset, memcpy
|
||||||
|
#ifdef __USE_SSE_INTRIN__
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
#include "lz4hc.h"
|
#include "lz4hc.h"
|
||||||
|
|
||||||
#define ALLOCATOR(s) calloc(1,s)
|
#define ALLOCATOR(s) calloc(1,s)
|
||||||
|
@ -358,6 +361,17 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
|
||||||
const BYTE* reft = ref+MINMATCH;
|
const BYTE* reft = ref+MINMATCH;
|
||||||
const BYTE* ipt = ip+MINMATCH;
|
const BYTE* ipt = ip+MINMATCH;
|
||||||
|
|
||||||
|
#ifdef __USE_SSE_INTRIN__
|
||||||
|
while (ipt<matchlimit-15) {
|
||||||
|
int mask;
|
||||||
|
__m128i span1 = _mm_loadu_si128((__m128i *)(reft));
|
||||||
|
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
|
||||||
|
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
|
||||||
|
if (!mask) { ipt+=16; reft+=16; continue; }
|
||||||
|
ipt += __builtin_ctz(mask);
|
||||||
|
goto _endCount;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
while (ipt<matchlimit-(STEPSIZE-1))
|
while (ipt<matchlimit-(STEPSIZE-1))
|
||||||
{
|
{
|
||||||
UARCH diff = AARCH(reft) ^ AARCH(ipt);
|
UARCH diff = AARCH(reft) ^ AARCH(ipt);
|
||||||
|
@ -402,6 +416,17 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
|
||||||
const BYTE* ipt = ip+MINMATCH;
|
const BYTE* ipt = ip+MINMATCH;
|
||||||
const BYTE* startt = ip;
|
const BYTE* startt = ip;
|
||||||
|
|
||||||
|
#ifdef __USE_SSE_INTRIN__
|
||||||
|
while (ipt<matchlimit-15) {
|
||||||
|
int mask;
|
||||||
|
__m128i span1 = _mm_loadu_si128((__m128i *)(reft));
|
||||||
|
__m128i span2 = _mm_loadu_si128((__m128i *)(ipt));
|
||||||
|
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
|
||||||
|
if (!mask) { ipt+=16; reft+=16; continue; }
|
||||||
|
ipt += __builtin_ctz(mask);
|
||||||
|
goto _endCount;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
while (ipt<matchlimit-(STEPSIZE-1))
|
while (ipt<matchlimit-(STEPSIZE-1))
|
||||||
{
|
{
|
||||||
UARCH diff = AARCH(reft) ^ AARCH(ipt);
|
UARCH diff = AARCH(reft) ^ AARCH(ipt);
|
||||||
|
|
|
@ -85,7 +85,6 @@ lz4_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||||
|
|
||||||
if (lzdat->level == 1) {
|
if (lzdat->level == 1) {
|
||||||
rv = LZ4_compress(src, dst, _srclen);
|
rv = LZ4_compress(src, dst, _srclen);
|
||||||
|
|
||||||
} else if (lzdat->level == 2) {
|
} else if (lzdat->level == 2) {
|
||||||
rv = LZ4_compress(src, dst, _srclen);
|
rv = LZ4_compress(src, dst, _srclen);
|
||||||
if (rv == 0) {
|
if (rv == 0) {
|
||||||
|
@ -127,15 +126,14 @@ lz4_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||||
|
|
||||||
sz1 = ntohl(*((int *)src));
|
sz1 = ntohl(*((int *)src));
|
||||||
rv = LZ4_uncompress(src + sizeof (int), dst, sz1);
|
rv = LZ4_uncompress(src + sizeof (int), dst, sz1);
|
||||||
if (rv == 0) {
|
if (rv != sz1) {
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
memcpy(src, dst, sz1);
|
memcpy(src, dst, sz1);
|
||||||
rv = LZ4_uncompress(src, dst, _dstlen);
|
rv = LZ4_uncompress(src, dst, _dstlen);
|
||||||
}
|
}
|
||||||
if (rv == 0) {
|
if (rv != srclen) {
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
*dstlen = rv;
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
2
main.c
2
main.c
|
@ -149,7 +149,7 @@ show_compression_stats(uint64_t chunksize)
|
||||||
/*
|
/*
|
||||||
* This routine is called in multiple threads. Calls the decompression handler
|
* This routine is called in multiple threads. Calls the decompression handler
|
||||||
* as encoded in the file header. For adaptive mode the handler adapt_decompress()
|
* as encoded in the file header. For adaptive mode the handler adapt_decompress()
|
||||||
* in turns looks at the chunk header and call the actualy decompression
|
* in turns looks at the chunk header and calls the actual decompression
|
||||||
* routine.
|
* routine.
|
||||||
*/
|
*/
|
||||||
static void *
|
static void *
|
||||||
|
|
|
@ -225,7 +225,7 @@ zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*dstlen = _dstlen;
|
*dstlen = *dstlen - _dstlen;
|
||||||
inflateEnd(&zs);
|
inflateEnd(&zs);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue