From 935717373b2fcf61eae02d4ba250bbd2bb9116bc Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 30 Mar 2014 17:35:21 +0530 Subject: [PATCH] Capability to list offset and length of each block when deduplication for external use. --- README.md | 3 +++ pcompress.c | 6 +++++- pcompress.h | 1 + rabin/rabin_dedup.c | 7 +++++++ rabin/rabin_dedup.h | 1 + 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c08699c..7e0f028 100644 --- a/README.md +++ b/README.md @@ -361,6 +361,9 @@ Advanced usage -M Display memory allocator statistics. -C Display compression statistics. + -CC Display compression statistics and print the offset and length of each + variable length dedupe block if variable block deduplication is being + used. This has no effect for fixed block deduplication. Environment Variables ===================== diff --git a/pcompress.c b/pcompress.c index 379cc6c..260bd5a 100644 --- a/pcompress.c +++ b/pcompress.c @@ -2243,6 +2243,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev COMP_BAIL; } + tdat->rctx->show_chunks = pctx->show_chunks; tdat->rctx->index_sem = &(tdat->index_sem); tdat->rctx->id = i; } @@ -2906,7 +2907,10 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) break; case 'C': - pctx->hide_cmp_stats = 0; + if (pctx->hide_cmp_stats) + pctx->hide_cmp_stats = 0; + else + pctx->show_chunks = 1; break; case 'D': diff --git a/pcompress.h b/pcompress.h index 17b9c45..27e5688 100644 --- a/pcompress.h +++ b/pcompress.h @@ -198,6 +198,7 @@ typedef struct pc_ctx { int nthreads; int hide_mem_stats; int hide_cmp_stats; + int show_chunks; int enable_rabin_scan; int enable_rabin_global; int enable_delta_encode; diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index 923eed1..fe8377b 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -293,6 +293,7 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s ctx->deltac_min_distance = props->deltac_min_distance; ctx->pagesize = sysconf(_SC_PAGE_SIZE); ctx->similarity_cksums = NULL; + ctx->show_chunks = 0; if (arc) { arc->pagesize = ctx->pagesize; if (rab_blk_sz < 3) @@ -679,6 +680,9 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of ctx->g_blocks[blknum].offset = last_offset; } DEBUG_STAT_EN(if (length >= ctx->rabin_poly_max_block_size) ++max_count); + if (ctx->show_chunks) { + fprintf(stderr, "Block offset: %" PRIu64 ", length: %u\n", last_offset, length); + } /* * Reset the heap structure and find the K min values if Delta Compression @@ -726,6 +730,9 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of ctx->g_blocks[blknum].length = length; ctx->g_blocks[blknum].offset = last_offset; } + if (ctx->show_chunks) { + fprintf(stderr, "Block offset: %" PRIu64 ", length: %u\n", last_offset, length); + } if (ctx->delta_flag) { uint64_t cur_sketch; diff --git a/rabin/rabin_dedup.h b/rabin/rabin_dedup.h index 9fbb0a4..ce4f05f 100644 --- a/rabin/rabin_dedup.h +++ b/rabin/rabin_dedup.h @@ -188,6 +188,7 @@ typedef struct { uint32_t pagesize; int out_fd; int id; + int show_chunks; // Debug display of chunks (offset, length) } dedupe_context_t; extern dedupe_context_t *create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize,