diff --git a/main.c b/main.c index e640877..34ca19a 100644 --- a/main.c +++ b/main.c @@ -88,6 +88,7 @@ static int do_compress = 0; static int do_uncompress = 0; static int cksum_bytes; static int cksum = 0; +static int rab_blk_size = 0; static rabin_context_t *rctx; static void @@ -138,13 +139,15 @@ usage(void) " %s -E ... - This also implies '-D'.\n" "6) Number of threads can optionally be specified: -t <1 - 256 count>\n" "7) Other flags:\n" - " '-L' - Enable LZP pre-compression. This improves compression ratio of all\n" - " algorithms with some extra CPU and very low RAM overhead.\n" - " '-S' \n" - " - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512\n" - " Default one is SKEIN256.\n" - " '-M' - Display memory allocator statistics\n" - " '-C' - Display compression statistics\n\n", + " '-L' - Enable LZP pre-compression. This improves compression ratio of all\n" + " algorithms with some extra CPU and very low RAM overhead.\n" + " '-S' \n" + " - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512\n" + " Default one is SKEIN256.\n" + " '-B' <1..5>\n" + " - Specify a minimum Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n" + " '-M' - Display memory allocator statistics\n" + " '-C' - Display compression statistics\n\n", UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name); } @@ -578,7 +581,7 @@ start_decompress(const char *filename, const char *to_filename) } } if (enable_rabin_scan) { - tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, 0, + tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size, algo, enable_delta_encode); if (tdat->rctx == NULL) { UNCOMP_BAIL; @@ -1130,7 +1133,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) } } if (enable_rabin_scan) { - tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, 0, + tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size, algo, enable_delta_encode); if (tdat->rctx == NULL) { COMP_BAIL; @@ -1521,7 +1524,7 @@ main(int argc, char *argv[]) level = 6; slab_init(); - while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:")) != -1) { + while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:B:")) != -1) { int ovr; switch (opt) { @@ -1555,6 +1558,12 @@ main(int argc, char *argv[]) err_exit(0, "Compression level should be in range 0 - 14\n"); break; + case 'B': + rab_blk_size = atoi(optarg); + if (rab_blk_size < 1 || rab_blk_size > 5) + err_exit(0, "Minimum Dedupe block size must be in range 1 (4k) - 5 (64k)\n"); + break; + case 'p': pipe_mode = 1; break; diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index 67b9821..4da4cbd 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -88,6 +88,9 @@ rabin_min_blksz(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_ uint32_t min_blk; min_blk = 1 << (rab_blk_sz + RAB_BLK_MIN_BITS); + if (rab_blk_sz > 1) + return (min_blk); + if (((memcmp(algo, "lzma", 4) == 0 || memcmp(algo, "adapt", 5) == 0) && chunksize <= LZMA_WINDOW_MAX) || delta_flag) { if (memcmp(algo, "lzfx", 4) == 0 || memcmp(algo, "lz4", 3) == 0 || @@ -220,6 +223,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz * x * polynomial_pow can we written as x << RAB_POLYNOMIAL_WIN_SIZE */ + slab_cache_add(sizeof (rabin_blockentry_t)); ctx->current_window_data = current_window_data; ctx->real_chunksize = real_chunksize; reset_rabin_context(ctx); @@ -408,7 +412,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s * if (fplist[fpos[1]] > fplist[fpos[0]]) fpos[0] = fpos[1]; */ fpos[0] = fpos[(fplist[fpos[1]] > fplist[fpos[0]])]; - if (len1 == SKETCH_BASIC_BLOCK_SZ) { + if (len1 == SKETCH_BASIC_BLOCK_SZ && ctx->delta_flag) { uint32_t p1, p2, p3; /* * Compute the super sketch value by summing all the representative @@ -460,9 +464,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s ctx->blocks[blknum]->crc = XXH_strong32(buf1+last_offset, length, 0); // Accumulate the 2 sketch values into a combined similarity checksum - ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2; - ctx->blocks[blknum]->mean_n_length = cur_sketch / j; - memset(fplist, 0, fplist_sz); + if (ctx->delta_flag) { + ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2; + ctx->blocks[blknum]->mean_n_length = cur_sketch / j; + memset(fplist, 0, fplist_sz); + } else { + ctx->blocks[blknum]->cksum_n_offset = 0; + ctx->blocks[blknum]->mean_n_length = 0; + } fpos[0] = 0; len1 = 0; cur_sketch = 0; @@ -498,9 +507,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s ctx->blocks[blknum]->ref = 0; ctx->blocks[blknum]->similar = 0; - j = (j > 0 ? j:1); - ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2; - ctx->blocks[blknum]->mean_n_length = cur_sketch / j; + if (ctx->delta_flag) { + j = (j > 0 ? j:1); + ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2; + ctx->blocks[blknum]->mean_n_length = cur_sketch / j; + } else { + ctx->blocks[blknum]->cksum_n_offset = 0; + ctx->blocks[blknum]->mean_n_length = 0; + } ctx->blocks[blknum]->crc = XXH_strong32(buf1+last_offset, ctx->blocks[blknum]->length, 0); blknum++; last_offset = *size; @@ -771,6 +785,7 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size) sz = 0; ctx->valid = 1; + slab_cache_add(sizeof (rabin_blockentry_t)); for (blk = 0; blk < blknum; blk++) { if (ctx->blocks[blk] == 0) ctx->blocks[blk] = (rabin_blockentry_t *)slab_alloc(NULL, sizeof (rabin_blockentry_t));