From f34cfb1aa6c18deb2c2512a558dc526e6eb86ccb Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 21 Jul 2013 09:31:59 +0530 Subject: [PATCH] Make data partitioning between threads more effective. Remove unnecessary computation to make Fixed block chunking faster. --- pcompress.c | 5 +++-- rabin/rabin_dedup.c | 2 -- rabin/rabin_dedup.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pcompress.c b/pcompress.c index 1c3f81f..c4ced3a 100644 --- a/pcompress.c +++ b/pcompress.c @@ -2180,8 +2180,9 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev */ file_offset = 0; if (pctx->enable_rabin_split) { - rctx = create_dedupe_context(chunksize, 0, 0, pctx->algo, &props, pctx->enable_delta_encode, - pctx->enable_fixed_scan, VERSION, COMPRESS, 0, NULL, pctx->pipe_mode, nprocs); + rctx = create_dedupe_context(chunksize, 0, pctx->rab_blk_size, pctx->algo, &props, + pctx->enable_delta_encode, pctx->enable_fixed_scan, VERSION, COMPRESS, 0, NULL, + pctx->pipe_mode, nprocs); rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx); } else { rbytes = Read(uncompfd, cread_buf, chunksize); diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index 83414c6..51fd44e 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -512,8 +512,6 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of ctx->blocks[i]->index = i; // Need to store for sorting ctx->blocks[i]->length = length; ctx->blocks[i]->similar = 0; - ctx->blocks[i]->hash = XXH32(buf1+last_offset, length, 0); - ctx->blocks[i]->similarity_hash = ctx->blocks[i]->hash; last_offset += length; } goto process_blocks; diff --git a/rabin/rabin_dedup.h b/rabin/rabin_dedup.h index d6cc1d9..323f9db 100644 --- a/rabin/rabin_dedup.h +++ b/rabin/rabin_dedup.h @@ -80,7 +80,7 @@ #define RAB_POLYNOMIAL_WIN_SIZE 16 #define RAB_POLYNOMIAL_MIN_WIN_SIZE 8 #define RAB_POLYNOMIAL_MAX_WIN_SIZE 64 -#define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (128 * 1024) +#define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (64 * 1024) #define RAB_BLK_MASK (((1 << RAB_BLK_MIN_BITS) - 1) >> 1) #define RAB_BLK_AVG_SZ(x) (1 << ((x) + RAB_BLK_MIN_BITS))