Change rabin index encoding scheme for better metadata compression.
This commit is contained in:
parent
a1825a2305
commit
a13c61e926
2 changed files with 24 additions and 4 deletions
|
@ -101,6 +101,11 @@ create_rabin_context(uint64_t chunksize) {
|
|||
if (chunksize % rabin_polynomial_min_block_size)
|
||||
blknum++;
|
||||
|
||||
if (blknum > RABIN_MAX_BLOCKS) {
|
||||
fprintf(stderr, "Chunk size too large for dedup.\n");
|
||||
destroy_rabin_context(ctx);
|
||||
return (NULL);
|
||||
}
|
||||
ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t));
|
||||
current_window_data = slab_alloc(NULL, RAB_POLYNOMIAL_WIN_SIZE);
|
||||
ctx->blocks = (rabin_blockentry_t *)slab_alloc(NULL,
|
||||
|
@ -345,7 +350,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset)
|
|||
prev_length = 0;
|
||||
rabin_index[blk] = htonl(be->length);
|
||||
} else {
|
||||
if (prev_length + be->length <= RAB_POLYNOMIAL_MAX_BLOCK_SIZE) {
|
||||
if (prev_length + be->length <= RABIN_MAX_BLOCK_SIZE) {
|
||||
prev_length += be->length;
|
||||
rabin_index[prev_index] = htonl(prev_length);
|
||||
rabin_index[blk] = 0;
|
||||
|
@ -359,7 +364,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset)
|
|||
} else {
|
||||
prev_index = 0;
|
||||
prev_length = 0;
|
||||
rabin_index[blk] = htonl(RAB_POLYNOMIAL_MAX_BLOCK_SIZE + be->index + 1);
|
||||
rabin_index[blk] = htonl(be->index | RABIN_INDEX_FLAG);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -437,13 +442,13 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
|
|||
ctx->blocks[blk].length = 0;
|
||||
ctx->blocks[blk].index = 0;
|
||||
|
||||
} else if (len <= RAB_POLYNOMIAL_MAX_BLOCK_SIZE) {
|
||||
} else if (!(len & RABIN_INDEX_FLAG)) {
|
||||
ctx->blocks[blk].length = len;
|
||||
ctx->blocks[blk].offset = pos1;
|
||||
pos1 += len;
|
||||
} else {
|
||||
ctx->blocks[blk].length = 0;
|
||||
ctx->blocks[blk].index = len - RAB_POLYNOMIAL_MAX_BLOCK_SIZE - 1;
|
||||
ctx->blocks[blk].index = len & RABIN_INDEX_VALUE;
|
||||
}
|
||||
}
|
||||
for (blk = 0; blk < blknum; blk++) {
|
||||
|
|
|
@ -99,6 +99,21 @@ typedef struct {
|
|||
// size of deduped data, size of compressed data
|
||||
#define RABIN_HDR_SIZE (sizeof (unsigned int) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t))
|
||||
|
||||
// Maximum number of dedup blocks supported (2^31 - 1)
|
||||
#define RABIN_MAX_BLOCKS (0x7fffffff)
|
||||
|
||||
// Maximum possible block size for a single rabin block. This is a hard limit much
|
||||
// larger than RAB_POLYNOMIAL_MAX_BLOCK_SIZE. Useful when merging non-duplicate blocks.
|
||||
// This is also 2^31 - 1.
|
||||
#define RABIN_MAX_BLOCK_SIZE (RABIN_MAX_BLOCKS)
|
||||
|
||||
// Mask to determine whether Rabin index entry is a length value or index value.
|
||||
// MSB = 1 : Index
|
||||
// MSB = 0 : Length
|
||||
#define RABIN_INDEX_FLAG (0x80000000)
|
||||
// Mask to extract value from a rabin index entry
|
||||
#define RABIN_INDEX_VALUE (0x7fffffff)
|
||||
|
||||
typedef struct {
|
||||
unsigned char *current_window_data;
|
||||
rabin_blockentry_t *blocks;
|
||||
|
|
Loading…
Reference in a new issue