Fix issue #11.
Increase default chunk size to 8MB. Use default compression level of 1 (fast mode) for LZ4.
This commit is contained in:
parent
3db5188445
commit
fc65111bae
2 changed files with 22 additions and 10 deletions
28
pcompress.c
28
pcompress.c
|
@ -56,9 +56,9 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use 5MB chunks by default.
|
* We use 8MB chunks by default.
|
||||||
*/
|
*/
|
||||||
#define DEFAULT_CHUNKSIZE (5 * 1024 * 1024)
|
#define DEFAULT_CHUNKSIZE (8 * 1024 * 1024)
|
||||||
#define EIGHTY_PCT(x) ((x) - ((x)/5))
|
#define EIGHTY_PCT(x) ((x) - ((x)/5))
|
||||||
|
|
||||||
struct wdata {
|
struct wdata {
|
||||||
|
@ -516,6 +516,7 @@ redo:
|
||||||
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan || pctx->enable_rabin_global) &&
|
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan || pctx->enable_rabin_global) &&
|
||||||
(HDR & CHUNK_FLAG_DEDUP)) {
|
(HDR & CHUNK_FLAG_DEDUP)) {
|
||||||
uchar_t *cmpbuf, *ubuf;
|
uchar_t *cmpbuf, *ubuf;
|
||||||
|
|
||||||
/* Extract various sizes from dedupe header. */
|
/* Extract various sizes from dedupe header. */
|
||||||
parse_dedupe_hdr(cseg, &blknum, &dedupe_index_sz, &dedupe_data_sz,
|
parse_dedupe_hdr(cseg, &blknum, &dedupe_index_sz, &dedupe_data_sz,
|
||||||
&dedupe_index_sz_cmp, &dedupe_data_sz_cmp, &_chunksize);
|
&dedupe_index_sz_cmp, &dedupe_data_sz_cmp, &_chunksize);
|
||||||
|
@ -1395,6 +1396,7 @@ redo:
|
||||||
compressed_chunk = tdat->compressed_chunk + CHUNK_FLAG_SZ;
|
compressed_chunk = tdat->compressed_chunk + CHUNK_FLAG_SZ;
|
||||||
rbytes = tdat->rbytes;
|
rbytes = tdat->rbytes;
|
||||||
dedupe_index_sz = 0;
|
dedupe_index_sz = 0;
|
||||||
|
type = COMPRESSED;
|
||||||
|
|
||||||
/* Perform Dedup if enabled. */
|
/* Perform Dedup if enabled. */
|
||||||
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan)) {
|
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan)) {
|
||||||
|
@ -1434,9 +1436,9 @@ redo:
|
||||||
* reducing compression effectiveness of the data chunk. So we separate them.
|
* reducing compression effectiveness of the data chunk. So we separate them.
|
||||||
*/
|
*/
|
||||||
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan) && tdat->rctx->valid) {
|
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan) && tdat->rctx->valid) {
|
||||||
|
uint64_t o_chunksize;
|
||||||
_chunksize = tdat->rbytes - dedupe_index_sz - RABIN_HDR_SIZE;
|
_chunksize = tdat->rbytes - dedupe_index_sz - RABIN_HDR_SIZE;
|
||||||
index_size_cmp = dedupe_index_sz;
|
index_size_cmp = dedupe_index_sz;
|
||||||
|
|
||||||
rv = 0;
|
rv = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1473,6 +1475,8 @@ plain_index:
|
||||||
index_size_cmp += RABIN_HDR_SIZE;
|
index_size_cmp += RABIN_HDR_SIZE;
|
||||||
dedupe_index_sz += RABIN_HDR_SIZE;
|
dedupe_index_sz += RABIN_HDR_SIZE;
|
||||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
|
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
|
||||||
|
o_chunksize = _chunksize;
|
||||||
|
|
||||||
/* Compress data chunk. */
|
/* Compress data chunk. */
|
||||||
if (pctx->lzp_preprocess || pctx->enable_delta2_encode) {
|
if (pctx->lzp_preprocess || pctx->enable_delta2_encode) {
|
||||||
rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz,
|
rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz,
|
||||||
|
@ -1490,9 +1494,12 @@ plain_index:
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Can't compress data just retain as-is. */
|
/* Can't compress data just retain as-is. */
|
||||||
if (rv < 0)
|
if (rv < 0 || _chunksize >= o_chunksize) {
|
||||||
|
_chunksize = o_chunksize;
|
||||||
|
type = UNCOMPRESSED;
|
||||||
memcpy(compressed_chunk + index_size_cmp,
|
memcpy(compressed_chunk + index_size_cmp,
|
||||||
tdat->uncompressed_chunk + dedupe_index_sz, _chunksize);
|
tdat->uncompressed_chunk + dedupe_index_sz, _chunksize);
|
||||||
|
}
|
||||||
/* Now update rabin header with the compressed sizes. */
|
/* Now update rabin header with the compressed sizes. */
|
||||||
update_dedupe_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, _chunksize);
|
update_dedupe_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, _chunksize);
|
||||||
_chunksize += index_size_cmp;
|
_chunksize += index_size_cmp;
|
||||||
|
@ -1522,14 +1529,12 @@ plain_index:
|
||||||
* chunk will be left uncompressed.
|
* chunk will be left uncompressed.
|
||||||
*/
|
*/
|
||||||
tdat->len_cmp = _chunksize;
|
tdat->len_cmp = _chunksize;
|
||||||
if (_chunksize >= rbytes || rv < 0) {
|
if (_chunksize >= tdat->rbytes || rv < 0) {
|
||||||
if (!(pctx->enable_rabin_scan || pctx->enable_fixed_scan) || !tdat->rctx->valid)
|
if (!(pctx->enable_rabin_scan || pctx->enable_fixed_scan) || !tdat->rctx->valid)
|
||||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes);
|
memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes);
|
||||||
type = UNCOMPRESSED;
|
type = UNCOMPRESSED;
|
||||||
tdat->len_cmp = tdat->rbytes;
|
tdat->len_cmp = tdat->rbytes;
|
||||||
if (rv < 0) rv = COMPRESS_NONE;
|
if (rv < 0) rv = COMPRESS_NONE;
|
||||||
} else {
|
|
||||||
type = COMPRESSED;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2582,7 +2587,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
int opt, num_rem, err, my_optind;
|
int opt, num_rem, err, my_optind;
|
||||||
char *pos;
|
char *pos;
|
||||||
|
|
||||||
pctx->level = 6;
|
pctx->level = -1;
|
||||||
err = 0;
|
err = 0;
|
||||||
pctx->keylen = DEFAULT_KEYLEN;
|
pctx->keylen = DEFAULT_KEYLEN;
|
||||||
pctx->chunksize = DEFAULT_CHUNKSIZE;
|
pctx->chunksize = DEFAULT_CHUNKSIZE;
|
||||||
|
@ -2743,6 +2748,13 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
return (2);
|
return (2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pctx->level == -1) {
|
||||||
|
if (memcmp(pctx->algo, "lz4", 3) == 0) {
|
||||||
|
pctx->level = 1;
|
||||||
|
} else {
|
||||||
|
pctx->level = 6;
|
||||||
|
}
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Remaining mandatory arguments are the filenames.
|
* Remaining mandatory arguments are the filenames.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -824,7 +824,7 @@ process_blocks:
|
||||||
* Block match in index not found.
|
* Block match in index not found.
|
||||||
* Block was added to index. Merge this block.
|
* Block was added to index. Merge this block.
|
||||||
*/
|
*/
|
||||||
if (length + ctx->g_blocks[i].length > RABIN_MAX_BLOCK_SIZE) {
|
if (length + ctx->g_blocks[i].length >= RABIN_MAX_BLOCK_SIZE) {
|
||||||
*((uint32_t *)g_dedupe_idx) = LE32(length);
|
*((uint32_t *)g_dedupe_idx) = LE32(length);
|
||||||
g_dedupe_idx += RABIN_ENTRY_SIZE;
|
g_dedupe_idx += RABIN_ENTRY_SIZE;
|
||||||
length = 0;
|
length = 0;
|
||||||
|
@ -1602,7 +1602,7 @@ dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size)
|
||||||
* However this approach precludes pipe-mode streamed decompression since
|
* However this approach precludes pipe-mode streamed decompression since
|
||||||
* it requires random access to the output file.
|
* it requires random access to the output file.
|
||||||
*/
|
*/
|
||||||
if (pos1 > offset) {
|
if (pos1 >= offset) {
|
||||||
src2 = ctx->cbuf + (pos1 - offset);
|
src2 = ctx->cbuf + (pos1 - offset);
|
||||||
memcpy(pos2, src2, len);
|
memcpy(pos2, src2, len);
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in a new issue