Make Delta2 encoding independent of LZP.
Tweak Delta2 parameters. Update README and test cases.
This commit is contained in:
parent
b01d255f6c
commit
ef0191729e
8 changed files with 30 additions and 18 deletions
10
README.md
10
README.md
|
@ -102,10 +102,12 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library
|
|||
delta encoding in conjunction with this may not always be beneficial.
|
||||
However Adaptive Delta Encoding is beneficial along with this.
|
||||
|
||||
'-P' - Enable Adaptive Delta Encoding. This implies '-L' as well. It improves
|
||||
compresion ratio further at the cost of more CPU overhead. Delta
|
||||
Encoding is combined with Run-Length encoding and Matrix transpose
|
||||
of certain kinds of data to improve subsequent compression results.
|
||||
'-P' - Enable Adaptive Delta Encoding. It can improve compresion ratio further
|
||||
for data containing tables of numerical values especially if those are
|
||||
in an arithmetic series. In this implementation basic Delta Encoding is
|
||||
combined with Run-Length encoding and Matrix transpose
|
||||
NOTE - If data has mixed textual and numeric table components then both -L and
|
||||
-P can be used together.
|
||||
|
||||
'-S' <cksum>
|
||||
- Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512, SHA256 and
|
||||
|
|
|
@ -282,8 +282,10 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
|||
|
||||
last = pos + srclen;
|
||||
olen = ntohll(*((uint64_t *)pos));
|
||||
if (*dstlen < (olen + 8))
|
||||
if (*dstlen < olen) {
|
||||
fprintf(stderr, "DELTA2 Decode: Destination buffer too small.\n");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
out = 0;
|
||||
pos += MAIN_HDR;
|
||||
|
@ -297,6 +299,7 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
|||
rcnt = ntohll(*((uint64_t *)pos));
|
||||
pos += sizeof (rcnt);
|
||||
if (out + rcnt > *dstlen) {
|
||||
fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n");
|
||||
return (-1);
|
||||
}
|
||||
memcpy(pos1, pos, rcnt);
|
||||
|
@ -314,6 +317,7 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
|||
rcnt = ntohll(*((uint64_t *)pos));
|
||||
pos += sizeof (rcnt);
|
||||
if (out + rcnt > *dstlen) {
|
||||
fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n");
|
||||
return (-1);
|
||||
}
|
||||
transpose(pos, pos1, rcnt, stride, COL);
|
||||
|
@ -330,6 +334,7 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
|||
delta = ntohll(*((uint64_t *)pos));
|
||||
pos += sizeof (delta);
|
||||
if (out + rcnt > *dstlen) {
|
||||
fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ lz4_props(algo_props_t *data, int level, int64_t chunksize) {
|
|||
data->compress_mt_capable = 0;
|
||||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = lz4_buf_extra(chunksize);
|
||||
data->delta2_span = 50;
|
||||
data->delta2_span = 100;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
21
main.c
21
main.c
|
@ -150,8 +150,11 @@ usage(void)
|
|||
"7) Other flags:\n"
|
||||
" '-L' - Enable LZP pre-compression. This improves compression ratio of all\n"
|
||||
" algorithms with some extra CPU and very low RAM overhead.\n"
|
||||
" '-P' - Enable Adaptive Delta Encoding. This implies '-L' as well. It improves\n"
|
||||
" compresion ratio further at the cost of more CPU overhead.\n"
|
||||
" '-P' - Enable Adaptive Delta Encoding. It can improve compresion ratio for\n"
|
||||
" data containing tables of numerical values especially if those are in\n"
|
||||
" an arithmetic series.\n"
|
||||
" NOTE - If data has mixed textual and numeric table components then both -L and\n"
|
||||
" -P can be used together.\n"
|
||||
" '-S' <cksum>\n"
|
||||
" - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512, SHA256 and\n"
|
||||
" SHA512. Default one is SKEIN256.\n"
|
||||
|
@ -207,7 +210,8 @@ preproc_compress(compress_func_ptr cmp_func, void *src, uint64_t srclen, void *d
|
|||
if (result < 0 || result == srclen) return (-1);
|
||||
srclen = result;
|
||||
memcpy(src, dst, srclen);
|
||||
} else {
|
||||
|
||||
} else if (!enable_delta2_encode) {
|
||||
/*
|
||||
* Execution won't come here but just in case ...
|
||||
* Even Delta2 encoding below enables LZP.
|
||||
|
@ -282,7 +286,9 @@ preproc_decompress(compress_func_ptr dec_func, void *src, uint64_t srclen, void
|
|||
return (-1);
|
||||
}
|
||||
*dstlen = result;
|
||||
} else {
|
||||
}
|
||||
|
||||
if (!(type & (PREPROC_COMPRESSED | PREPROC_TYPE_DELTA2 | PREPROC_TYPE_LZP))) {
|
||||
fprintf(stderr, "Invalid preprocessing flags: %d\n", type);
|
||||
return (-1);
|
||||
}
|
||||
|
@ -1196,7 +1202,7 @@ plain_index:
|
|||
dedupe_index_sz += RABIN_HDR_SIZE;
|
||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
|
||||
/* Compress data chunk. */
|
||||
if (lzp_preprocess) {
|
||||
if (lzp_preprocess || enable_delta2_encode) {
|
||||
rv = preproc_compress(tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz,
|
||||
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
|
||||
tdat->level, 0, tdat->data, tdat->props);
|
||||
|
@ -1215,7 +1221,7 @@ plain_index:
|
|||
} else {
|
||||
plain_compress:
|
||||
_chunksize = tdat->rbytes;
|
||||
if (lzp_preprocess) {
|
||||
if (lzp_preprocess || enable_delta2_encode) {
|
||||
rv = preproc_compress(tdat->compress,
|
||||
tdat->uncompressed_chunk, tdat->rbytes,
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data,
|
||||
|
@ -1270,7 +1276,7 @@ plain_compress:
|
|||
if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) {
|
||||
type |= CHUNK_FLAG_DEDUP;
|
||||
}
|
||||
if (lzp_preprocess) {
|
||||
if (lzp_preprocess || enable_delta2_encode) {
|
||||
type |= CHUNK_FLAG_PREPROC;
|
||||
}
|
||||
|
||||
|
@ -2165,7 +2171,6 @@ main(int argc, char *argv[])
|
|||
break;
|
||||
|
||||
case 'P':
|
||||
lzp_preprocess = 1;
|
||||
enable_delta2_encode = 1;
|
||||
break;
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ for algo in lzfx lz4 adapt
|
|||
do
|
||||
for tf in combined.dat comb_d.dat
|
||||
do
|
||||
for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B2 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -E -P"
|
||||
for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B2 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P"
|
||||
do
|
||||
for seg in 2m 100m
|
||||
do
|
||||
|
|
|
@ -12,7 +12,7 @@ for algo in lzfx adapt2
|
|||
do
|
||||
for tf in comb_d.dat
|
||||
do
|
||||
for feat in "-e" "-e -L -S SHA256" "-D -e -S SHA512" "-D -EE -L -e -S SKEIN512" "-e -S CRC64" "-e -P" "-e -P -S KECCAK256" "-D -e -L -S KECCAK512"
|
||||
for feat in "-e" "-e -L -S SHA256" "-D -e -S SHA512" "-D -EE -L -e -S SKEIN512" "-e -S CRC64" "-e -P" "-e -L -P -S KECCAK256" "-D -e -L -S KECCAK512"
|
||||
do
|
||||
for seg in 2m 100m
|
||||
do
|
||||
|
|
|
@ -12,7 +12,7 @@ for algo in lzfx lz4 adapt adapt2
|
|||
do
|
||||
for tf in combined.dat comb_d.dat
|
||||
do
|
||||
for feat in "-F" "-F -B3 -L" "-F -B4" "-F -B5 -L"
|
||||
for feat in "-F" "-F -B3 -L" "-F -B4" "-F -B5 -L" "-F -P" "-F -L -P"
|
||||
do
|
||||
for seg in 2m 100m
|
||||
do
|
||||
|
|
|
@ -91,7 +91,7 @@ zlib_stats(int show)
|
|||
|
||||
void
|
||||
zlib_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||
data->delta2_span = 50;
|
||||
data->delta2_span = 100;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
Loading…
Reference in a new issue