Change rabin index encoding scheme for better metadata compression.

This commit is contained in:
Moinak Ghosh 2012-07-02 22:08:03 +05:30
parent a1825a2305
commit a13c61e926
2 changed files with 24 additions and 4 deletions

View file

@ -101,6 +101,11 @@ create_rabin_context(uint64_t chunksize) {
if (chunksize % rabin_polynomial_min_block_size)
blknum++;
if (blknum > RABIN_MAX_BLOCKS) {
fprintf(stderr, "Chunk size too large for dedup.\n");
destroy_rabin_context(ctx);
return (NULL);
}
ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t));
current_window_data = slab_alloc(NULL, RAB_POLYNOMIAL_WIN_SIZE);
ctx->blocks = (rabin_blockentry_t *)slab_alloc(NULL,
@ -345,7 +350,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset)
prev_length = 0;
rabin_index[blk] = htonl(be->length);
} else {
if (prev_length + be->length <= RAB_POLYNOMIAL_MAX_BLOCK_SIZE) {
if (prev_length + be->length <= RABIN_MAX_BLOCK_SIZE) {
prev_length += be->length;
rabin_index[prev_index] = htonl(prev_length);
rabin_index[blk] = 0;
@ -359,7 +364,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset)
} else {
prev_index = 0;
prev_length = 0;
rabin_index[blk] = htonl(RAB_POLYNOMIAL_MAX_BLOCK_SIZE + be->index + 1);
rabin_index[blk] = htonl(be->index | RABIN_INDEX_FLAG);
}
}
@ -437,13 +442,13 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
ctx->blocks[blk].length = 0;
ctx->blocks[blk].index = 0;
} else if (len <= RAB_POLYNOMIAL_MAX_BLOCK_SIZE) {
} else if (!(len & RABIN_INDEX_FLAG)) {
ctx->blocks[blk].length = len;
ctx->blocks[blk].offset = pos1;
pos1 += len;
} else {
ctx->blocks[blk].length = 0;
ctx->blocks[blk].index = len - RAB_POLYNOMIAL_MAX_BLOCK_SIZE - 1;
ctx->blocks[blk].index = len & RABIN_INDEX_VALUE;
}
}
for (blk = 0; blk < blknum; blk++) {

View file

@ -99,6 +99,21 @@ typedef struct {
// size of deduped data, size of compressed data
#define RABIN_HDR_SIZE (sizeof (unsigned int) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t))
// Maximum number of dedup blocks supported (2^31 - 1)
#define RABIN_MAX_BLOCKS (0x7fffffff)
// Maximum possible block size for a single rabin block. This is a hard limit much
// larger than RAB_POLYNOMIAL_MAX_BLOCK_SIZE. Useful when merging non-duplicate blocks.
// This is also 2^31 - 1.
#define RABIN_MAX_BLOCK_SIZE (RABIN_MAX_BLOCKS)
// Mask to determine whether Rabin index entry is a length value or index value.
// MSB = 1 : Index
// MSB = 0 : Length
#define RABIN_INDEX_FLAG (0x80000000)
// Mask to extract value from a rabin index entry
#define RABIN_INDEX_VALUE (0x7fffffff)
typedef struct {
unsigned char *current_window_data;
rabin_blockentry_t *blocks;