From f5ce45b16ee7b782d85e70d5f99b1f18a928286e Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Fri, 6 Jul 2012 00:16:02 +0530 Subject: [PATCH] Techniques to better reduce Rabin Metadata. Fix wrong chunk sizing with dedup enabled. --- main.c | 17 ++++-- rabin/rabin_polynomial.c | 115 ++++++++++++++++++++++++++------------- rabin/rabin_polynomial.h | 7 ++- 3 files changed, 91 insertions(+), 48 deletions(-) diff --git a/main.c b/main.c index d491a4a..5895cfb 100644 --- a/main.c +++ b/main.c @@ -434,7 +434,7 @@ start_decompress(const char *filename, const char *to_filename) if (_init_func) _init_func(&(tdat->data), &(tdat->level), chunksize); if (enable_rabin_scan) - tdat->rctx = create_rabin_context(chunksize, algo); + tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, algo); else tdat->rctx = NULL; if (pthread_create(&(tdat->thr), NULL, perform_decompress, @@ -602,7 +602,7 @@ redo: rbytes = tdat->rbytes; reset_rabin_context(tdat->rctx); rctx->cbuf = tdat->uncompressed_chunk; - rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0); + rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL); if (!rctx->valid) { memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes); tdat->rbytes = rbytes; @@ -630,13 +630,15 @@ redo: tdat->rctx->level, 0, tdat->rctx->lzma_data); index_size_cmp += RABIN_HDR_SIZE; + rabin_index_sz += RABIN_HDR_SIZE; if (rv == 0) { /* Compress data chunk. */ - rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz + RABIN_HDR_SIZE, + rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz, _chunksize, compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, tdat->data); /* Now update rabin header with the compressed sizes. */ - rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE , _chunksize); + rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, + _chunksize); } _chunksize += index_size_cmp; } else { @@ -881,7 +883,10 @@ start_compress(const char *filename, uint64_t chunksize, int level) fprintf(stderr, "Out of memory\n"); COMP_BAIL; } - tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL, chunksize); + if (enable_rabin_scan) + tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL, compressed_chunksize + CHDR_SZ); + else + tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL, chunksize); if (!tdat->uncompressed_chunk) { fprintf(stderr, "Out of memory\n"); COMP_BAIL; @@ -897,7 +902,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) if (_init_func) _init_func(&(tdat->data), &(tdat->level), chunksize); if (enable_rabin_scan) - tdat->rctx = create_rabin_context(chunksize, algo); + tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, algo); else tdat->rctx = NULL; diff --git a/rabin/rabin_polynomial.c b/rabin/rabin_polynomial.c index ad08bb6..fcc8306 100755 --- a/rabin/rabin_polynomial.c +++ b/rabin/rabin_polynomial.c @@ -89,7 +89,7 @@ uint32_t rabin_polynomial_max_block_size = RAB_POLYNOMIAL_MAX_BLOCK_SIZE; * Initialize the algorithm with the default params. */ rabin_context_t * -create_rabin_context(uint64_t chunksize, const char *algo) { +create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *algo) { rabin_context_t *ctx; unsigned char *current_window_data; uint32_t blknum, index; @@ -149,6 +149,7 @@ create_rabin_context(uint64_t chunksize, const char *algo) { */ ctx->current_window_data = current_window_data; + ctx->real_chunksize = real_chunksize; reset_rabin_context(ctx); return (ctx); } @@ -182,11 +183,11 @@ cmpblks(const void *a, const void *b) rabin_blockentry_t *a1 = (rabin_blockentry_t *)a; rabin_blockentry_t *b1 = (rabin_blockentry_t *)b; - if (a1->checksum < b1->checksum) + if (a1->cksum_n_offset < b1->cksum_n_offset) return (-1); - else if (a1->checksum == b1->checksum) + else if (a1->cksum_n_offset == b1->cksum_n_offset) return (0); - else if (a1->checksum > b1->checksum) + else if (a1->cksum_n_offset > b1->cksum_n_offset) return (1); } @@ -195,18 +196,18 @@ cmpblks(const void *a, const void *b) * the rolling checksum and dedup blocks vary in size from 4K-128K. */ uint32_t -rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) +rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos) { ssize_t i, last_offset,j; uint32_t blknum; char *buf1 = (char *)buf; uint32_t length; - ssize_t rabin_index_sz; length = offset; last_offset = 0; blknum = 0; ctx->valid = 0; + ctx->cur_checksum = 0; if (*size < ctx->rabin_poly_avg_block_size) return; for (i=offset; i<*size; i++) { @@ -241,7 +242,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) length >= rabin_polynomial_max_block_size) { ctx->blocks[blknum].offset = last_offset; ctx->blocks[blknum].index = blknum; // Need to store for sorting - ctx->blocks[blknum].checksum = ctx->cur_checksum; + ctx->blocks[blknum].cksum_n_offset = ctx->cur_checksum; ctx->blocks[blknum].length = length; ctx->blocks[blknum].refcount = 0; blknum++; @@ -251,20 +252,25 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) } } + if (rabin_pos) { + *rabin_pos = last_offset; + return (0); + } // If we found at least a few chunks, perform dedup. if (blknum > 2) { uint64_t prev_cksum; - uint32_t blk, prev_length, nblocks; - ssize_t pos, matchlen; + uint32_t blk, prev_length; + ssize_t pos, matchlen, pos1; int valid = 1; char *tmp, *prev_offset; - uint32_t *rabin_index, prev_index, prev_blk; + uint32_t *blkarr, *trans, *rabin_index, prev_index, prev_blk; + ssize_t rabin_index_sz; // Insert the last left-over trailing bytes, if any, into a block. if (last_offset < *size) { ctx->blocks[blknum].offset = last_offset; ctx->blocks[blknum].index = blknum; - ctx->blocks[blknum].checksum = ctx->cur_checksum; + ctx->blocks[blknum].cksum_n_offset = ctx->cur_checksum; ctx->blocks[blknum].length = *size - last_offset; ctx->blocks[blknum].refcount = 0; blknum++; @@ -276,7 +282,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) prev_cksum = 0; prev_length = 0; prev_offset = 0; - pos = rabin_index_sz + RABIN_HDR_SIZE; /* * Now sort the block array based on checksums. This will bring virtually @@ -286,6 +291,13 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) */ qsort(ctx->blocks, blknum, sizeof (rabin_blockentry_t), cmpblks); rabin_index = (uint32_t *)(ctx->cbuf + RABIN_HDR_SIZE); + + /* + * We need 2 temporary arrays. We just use available space in the last + * portion of the buffer that will hold the deduped segment. + */ + blkarr = (uint32_t *)(ctx->cbuf + ctx->real_chunksize - (rabin_index_sz * 2 + 1)); + trans = (uint32_t *)(ctx->cbuf + ctx->real_chunksize - (rabin_index_sz + 1)); matchlen = 0; /* @@ -299,9 +311,9 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) * blocks. This helps in non-duplicate block merging later. */ for (blk = 0; blk < blknum; blk++) { - rabin_index[ctx->blocks[blk].index] = blk; + blkarr[ctx->blocks[blk].index] = blk; - if (blk > 0 && ctx->blocks[blk].checksum == prev_cksum && + if (blk > 0 && ctx->blocks[blk].cksum_n_offset == prev_cksum && ctx->blocks[blk].length == prev_length && memcmp(prev_offset, buf1 + ctx->blocks[blk].offset, prev_length) == 0) { ctx->blocks[blk].length = 0; @@ -312,7 +324,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) } prev_offset = buf1 + ctx->blocks[blk].offset; - prev_cksum = ctx->blocks[blk].checksum; + prev_cksum = ctx->blocks[blk].cksum_n_offset; prev_length = ctx->blocks[blk].length; prev_index = ctx->blocks[blk].index; prev_blk = blk; @@ -325,29 +337,18 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) /* * Another pass, this time through the block index in the chunk. We insert * block length into unique block entries. For block entries that are - * identical with another one we store the index number + max rabin block length. + * identical with another one we store the index number with msb set. * This way we can differentiate between a unique block length entry and a * pointer to another block without needing a separate flag. */ prev_index = 0; prev_length = 0; - nblocks = 0; + pos = 0; for (blk = 0; blk < blknum; blk++) { rabin_blockentry_t *be; - /* - * If blocks are overflowing the allowed chunk size then dedup did not - * help at all. We invalidate the dedup operation. - */ - if (pos > last_offset) { - valid = 0; - break; - } - be = &(ctx->blocks[rabin_index[blk]]); + be = &(ctx->blocks[blkarr[blk]]); if (be->length > 0) { - prev_offset = buf1 + be->offset; - memcpy(ctx->cbuf + pos, prev_offset, be->length); - pos += be->length; /* * Update Index entry with the length. Also try to merge runs * of unique (non-duplicate) blocks into a single block entry @@ -355,32 +356,67 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) */ if (prev_index == 0) { if (be->refcount == 0) { - prev_index = blk; + prev_index = pos; prev_length = be->length; } - rabin_index[blk] = htonl(be->length); + rabin_index[pos] = be->length; + ctx->blocks[pos].cksum_n_offset = be->offset; + trans[blk] = pos; + pos++; } else { if (be->refcount > 0) { prev_index = 0; prev_length = 0; - rabin_index[blk] = htonl(be->length); + rabin_index[pos] = be->length; + ctx->blocks[pos].cksum_n_offset = be->offset; + trans[blk] = pos; + pos++; } else { if (prev_length + be->length <= RABIN_MAX_BLOCK_SIZE) { prev_length += be->length; - rabin_index[prev_index] = htonl(prev_length); - rabin_index[blk] = 0; - nblocks++; + rabin_index[prev_index] = prev_length; } else { prev_index = 0; prev_length = 0; - rabin_index[blk] = htonl(be->length); + rabin_index[pos] = be->length; + ctx->blocks[pos].cksum_n_offset = be->offset; + trans[blk] = pos; + pos++; } } } } else { prev_index = 0; prev_length = 0; - rabin_index[blk] = htonl(be->index | RABIN_INDEX_FLAG); + blkarr[blk] = htonl(be->index | RABIN_INDEX_FLAG); + rabin_index[pos] = be->index | RABIN_INDEX_FLAG; + trans[blk] = pos; + pos++; + } + } + + /* + * Final pass, copy the data. + */ + blknum = pos; + rabin_index_sz = (ssize_t)pos * RABIN_ENTRY_SIZE; + pos1 = rabin_index_sz + RABIN_HDR_SIZE; + for (blk = 0; blk < blknum; blk++) { + if (rabin_index[blk] & RABIN_INDEX_FLAG) { + j = rabin_index[blk] & RABIN_INDEX_VALUE; + rabin_index[blk] = htonl(trans[j] | RABIN_INDEX_FLAG); + } else { + /* + * If blocks are overflowing the allowed chunk size then dedup did not + * help at all. We invalidate the dedup operation. + */ + if (pos1 > last_offset) { + valid = 0; + break; + } + memcpy(ctx->cbuf + pos1, buf1 + ctx->blocks[blk].cksum_n_offset, rabin_index[blk]); + pos1 += rabin_index[blk]; + rabin_index[blk] = htonl(rabin_index[blk]); } } cont: @@ -393,9 +429,10 @@ cont: entries = (ssize_t *)cbuf; entries[0] = htonll(*size); entries[1] = 0; - entries[2] = htonll(pos - rabin_index_sz - RABIN_HDR_SIZE); - *size = pos; + entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE); + *size = pos1; ctx->valid = 1; + /* * Remaining header entries: size of compressed index and size of * compressed data are inserted later via rabin_update_hdr, after actual compression! diff --git a/rabin/rabin_polynomial.h b/rabin/rabin_polynomial.h index a81d9ad..e6ce575 100644 --- a/rabin/rabin_polynomial.h +++ b/rabin/rabin_polynomial.h @@ -85,7 +85,7 @@ typedef struct { ssize_t offset; - uint64_t checksum; + uint64_t cksum_n_offset; // Dual purpose variable unsigned int index; unsigned int length; unsigned short refcount; @@ -131,16 +131,17 @@ typedef struct { uint32_t rabin_poly_min_block_size; uint32_t rabin_poly_avg_block_size; uint32_t rabin_avg_block_mask; + uint64_t real_chunksize; int dedup; int valid; void *lzma_data; int level; } rabin_context_t; -extern rabin_context_t *create_rabin_context(uint64_t chunksize, const char *algo); +extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *algo); extern void destroy_rabin_context(rabin_context_t *ctx); extern unsigned int rabin_dedup(rabin_context_t *ctx, unsigned char *buf, - ssize_t *size, ssize_t offset); + ssize_t *size, ssize_t offset, ssize_t *rabin_pos); extern void rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size); extern void rabin_parse_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz, ssize_t *rabin_data_sz, ssize_t *rabin_index_sz_cmp,