From 962a2cae8a00ae64af72a469a0353202fc72f95a Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 22 Jul 2012 00:00:41 +0530 Subject: [PATCH] Compress Dedup index only if it is at least 90 bytes to avoid expansion. Some minor cleanup. --- main.c | 24 ++++++++++++++++++------ rabin/rabin_polynomial.c | 1 - rabin/rabin_polynomial.h | 6 +++--- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/main.c b/main.c index 3471ba3..fa74662 100644 --- a/main.c +++ b/main.c @@ -216,10 +216,16 @@ redo: goto cont; } + rv = 0; cmpbuf = cseg + RABIN_HDR_SIZE; ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE; - rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf, &rabin_index_sz, - tdat->rctx->level, 0, tdat->rctx->lzma_data); + if (rabin_index_sz >= 90) { + /* Index should be at least 90 bytes to have been compressed. */ + rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf, + &rabin_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data); + } else { + memcpy(ubuf, cmpbuf, rabin_index_sz); + } } else { rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data); @@ -630,10 +636,16 @@ redo: index_size_cmp = rabin_index_sz; memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE); - /* Compress index. */ - rv = lzma_compress(tdat->uncompressed_chunk + RABIN_HDR_SIZE, - rabin_index_sz, compressed_chunk + RABIN_HDR_SIZE, &index_size_cmp, - tdat->rctx->level, 0, tdat->rctx->lzma_data); + rv = 0; + if (rabin_index_sz >= 90) { + /* Compress index if it is at least 90 bytes. */ + rv = lzma_compress(tdat->uncompressed_chunk + RABIN_HDR_SIZE, + rabin_index_sz, compressed_chunk + RABIN_HDR_SIZE, + &index_size_cmp, tdat->rctx->level, 0, tdat->rctx->lzma_data); + } else { + memcpy(compressed_chunk + RABIN_HDR_SIZE, + tdat->uncompressed_chunk + RABIN_HDR_SIZE, rabin_index_sz); + } index_size_cmp += RABIN_HDR_SIZE; rabin_index_sz += RABIN_HDR_SIZE; diff --git a/rabin/rabin_polynomial.c b/rabin/rabin_polynomial.c index 858fa32..2bf1ef7 100755 --- a/rabin/rabin_polynomial.c +++ b/rabin/rabin_polynomial.c @@ -424,7 +424,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s if (blk > 0 && ctx->blocks[blk].refcount == 0 && ctx->blocks[blk].cksum_n_offset == prev_cksum) { - ssize_t sz1, sz2; ctx->blocks[blk].index = prev_index; ctx->blocks[blk].similar = SIMILAR_PARTIAL; (ctx->blocks[prev_blk].refcount)++; diff --git a/rabin/rabin_polynomial.h b/rabin/rabin_polynomial.h index fe67122..91eb54e 100644 --- a/rabin/rabin_polynomial.h +++ b/rabin/rabin_polynomial.h @@ -124,9 +124,9 @@ // Mask to extract value from a rabin index entry #define RABIN_INDEX_VALUE (0x3FFFFFFFUL) -// Tolerance for partial similarity check. We expect 80% similarity for -// delta compression. See: http://www.armedia.com/wp/SimilarityIndex.pdf -#define SIMILARITY_TOLERANCE (0.2f) +/* + * Types of block similarity. + */ #define SIMILAR_EXACT 1 #define SIMILAR_PARTIAL 2