Change rabin index encoding scheme for better metadata compression.
This commit is contained in:
parent
a1825a2305
commit
a13c61e926
2 changed files with 24 additions and 4 deletions
|
@ -101,6 +101,11 @@ create_rabin_context(uint64_t chunksize) {
|
||||||
if (chunksize % rabin_polynomial_min_block_size)
|
if (chunksize % rabin_polynomial_min_block_size)
|
||||||
blknum++;
|
blknum++;
|
||||||
|
|
||||||
|
if (blknum > RABIN_MAX_BLOCKS) {
|
||||||
|
fprintf(stderr, "Chunk size too large for dedup.\n");
|
||||||
|
destroy_rabin_context(ctx);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t));
|
ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t));
|
||||||
current_window_data = slab_alloc(NULL, RAB_POLYNOMIAL_WIN_SIZE);
|
current_window_data = slab_alloc(NULL, RAB_POLYNOMIAL_WIN_SIZE);
|
||||||
ctx->blocks = (rabin_blockentry_t *)slab_alloc(NULL,
|
ctx->blocks = (rabin_blockentry_t *)slab_alloc(NULL,
|
||||||
|
@ -345,7 +350,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset)
|
||||||
prev_length = 0;
|
prev_length = 0;
|
||||||
rabin_index[blk] = htonl(be->length);
|
rabin_index[blk] = htonl(be->length);
|
||||||
} else {
|
} else {
|
||||||
if (prev_length + be->length <= RAB_POLYNOMIAL_MAX_BLOCK_SIZE) {
|
if (prev_length + be->length <= RABIN_MAX_BLOCK_SIZE) {
|
||||||
prev_length += be->length;
|
prev_length += be->length;
|
||||||
rabin_index[prev_index] = htonl(prev_length);
|
rabin_index[prev_index] = htonl(prev_length);
|
||||||
rabin_index[blk] = 0;
|
rabin_index[blk] = 0;
|
||||||
|
@ -359,7 +364,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset)
|
||||||
} else {
|
} else {
|
||||||
prev_index = 0;
|
prev_index = 0;
|
||||||
prev_length = 0;
|
prev_length = 0;
|
||||||
rabin_index[blk] = htonl(RAB_POLYNOMIAL_MAX_BLOCK_SIZE + be->index + 1);
|
rabin_index[blk] = htonl(be->index | RABIN_INDEX_FLAG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -437,13 +442,13 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
|
||||||
ctx->blocks[blk].length = 0;
|
ctx->blocks[blk].length = 0;
|
||||||
ctx->blocks[blk].index = 0;
|
ctx->blocks[blk].index = 0;
|
||||||
|
|
||||||
} else if (len <= RAB_POLYNOMIAL_MAX_BLOCK_SIZE) {
|
} else if (!(len & RABIN_INDEX_FLAG)) {
|
||||||
ctx->blocks[blk].length = len;
|
ctx->blocks[blk].length = len;
|
||||||
ctx->blocks[blk].offset = pos1;
|
ctx->blocks[blk].offset = pos1;
|
||||||
pos1 += len;
|
pos1 += len;
|
||||||
} else {
|
} else {
|
||||||
ctx->blocks[blk].length = 0;
|
ctx->blocks[blk].length = 0;
|
||||||
ctx->blocks[blk].index = len - RAB_POLYNOMIAL_MAX_BLOCK_SIZE - 1;
|
ctx->blocks[blk].index = len & RABIN_INDEX_VALUE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (blk = 0; blk < blknum; blk++) {
|
for (blk = 0; blk < blknum; blk++) {
|
||||||
|
|
|
@ -99,6 +99,21 @@ typedef struct {
|
||||||
// size of deduped data, size of compressed data
|
// size of deduped data, size of compressed data
|
||||||
#define RABIN_HDR_SIZE (sizeof (unsigned int) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t))
|
#define RABIN_HDR_SIZE (sizeof (unsigned int) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t))
|
||||||
|
|
||||||
|
// Maximum number of dedup blocks supported (2^31 - 1)
|
||||||
|
#define RABIN_MAX_BLOCKS (0x7fffffff)
|
||||||
|
|
||||||
|
// Maximum possible block size for a single rabin block. This is a hard limit much
|
||||||
|
// larger than RAB_POLYNOMIAL_MAX_BLOCK_SIZE. Useful when merging non-duplicate blocks.
|
||||||
|
// This is also 2^31 - 1.
|
||||||
|
#define RABIN_MAX_BLOCK_SIZE (RABIN_MAX_BLOCKS)
|
||||||
|
|
||||||
|
// Mask to determine whether Rabin index entry is a length value or index value.
|
||||||
|
// MSB = 1 : Index
|
||||||
|
// MSB = 0 : Length
|
||||||
|
#define RABIN_INDEX_FLAG (0x80000000)
|
||||||
|
// Mask to extract value from a rabin index entry
|
||||||
|
#define RABIN_INDEX_VALUE (0x7fffffff)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned char *current_window_data;
|
unsigned char *current_window_data;
|
||||||
rabin_blockentry_t *blocks;
|
rabin_blockentry_t *blocks;
|
||||||
|
|
Loading…
Reference in a new issue