Compress Dedup index only if it is at least 90 bytes to avoid expansion.

Some minor cleanup.
This commit is contained in:
Moinak Ghosh 2012-07-22 00:00:41 +05:30
parent b69dcf4d55
commit 962a2cae8a
3 changed files with 21 additions and 10 deletions

22
main.c
View file

@ -216,10 +216,16 @@ redo:
goto cont; goto cont;
} }
rv = 0;
cmpbuf = cseg + RABIN_HDR_SIZE; cmpbuf = cseg + RABIN_HDR_SIZE;
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE; ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE;
rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf, &rabin_index_sz, if (rabin_index_sz >= 90) {
tdat->rctx->level, 0, tdat->rctx->lzma_data); /* Index should be at least 90 bytes to have been compressed. */
rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf,
&rabin_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data);
} else {
memcpy(ubuf, cmpbuf, rabin_index_sz);
}
} else { } else {
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk, rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
&_chunksize, tdat->level, HDR, tdat->data); &_chunksize, tdat->level, HDR, tdat->data);
@ -630,10 +636,16 @@ redo:
index_size_cmp = rabin_index_sz; index_size_cmp = rabin_index_sz;
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE); memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
/* Compress index. */ rv = 0;
if (rabin_index_sz >= 90) {
/* Compress index if it is at least 90 bytes. */
rv = lzma_compress(tdat->uncompressed_chunk + RABIN_HDR_SIZE, rv = lzma_compress(tdat->uncompressed_chunk + RABIN_HDR_SIZE,
rabin_index_sz, compressed_chunk + RABIN_HDR_SIZE, &index_size_cmp, rabin_index_sz, compressed_chunk + RABIN_HDR_SIZE,
tdat->rctx->level, 0, tdat->rctx->lzma_data); &index_size_cmp, tdat->rctx->level, 0, tdat->rctx->lzma_data);
} else {
memcpy(compressed_chunk + RABIN_HDR_SIZE,
tdat->uncompressed_chunk + RABIN_HDR_SIZE, rabin_index_sz);
}
index_size_cmp += RABIN_HDR_SIZE; index_size_cmp += RABIN_HDR_SIZE;
rabin_index_sz += RABIN_HDR_SIZE; rabin_index_sz += RABIN_HDR_SIZE;

View file

@ -424,7 +424,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
if (blk > 0 && ctx->blocks[blk].refcount == 0 && if (blk > 0 && ctx->blocks[blk].refcount == 0 &&
ctx->blocks[blk].cksum_n_offset == prev_cksum) { ctx->blocks[blk].cksum_n_offset == prev_cksum) {
ssize_t sz1, sz2;
ctx->blocks[blk].index = prev_index; ctx->blocks[blk].index = prev_index;
ctx->blocks[blk].similar = SIMILAR_PARTIAL; ctx->blocks[blk].similar = SIMILAR_PARTIAL;
(ctx->blocks[prev_blk].refcount)++; (ctx->blocks[prev_blk].refcount)++;

View file

@ -124,9 +124,9 @@
// Mask to extract value from a rabin index entry // Mask to extract value from a rabin index entry
#define RABIN_INDEX_VALUE (0x3FFFFFFFUL) #define RABIN_INDEX_VALUE (0x3FFFFFFFUL)
// Tolerance for partial similarity check. We expect 80% similarity for /*
// delta compression. See: http://www.armedia.com/wp/SimilarityIndex.pdf * Types of block similarity.
#define SIMILARITY_TOLERANCE (0.2f) */
#define SIMILAR_EXACT 1 #define SIMILAR_EXACT 1
#define SIMILAR_PARTIAL 2 #define SIMILAR_PARTIAL 2