Reduce dedupe loop checks for slight speed edge.

Beginnings of Fixed-block dedupe.
Update variable name for clarity.
This commit is contained in:
Moinak Ghosh 2012-09-15 11:14:58 +05:30
parent a6b3719d89
commit b9355a5dcc
5 changed files with 109 additions and 83 deletions

View file

@ -115,7 +115,7 @@ Examples
========
Compress "file.tar" using bzip2 level 6, 64MB chunk size and use 4 threads. In
addition perform exact deduplication and delta compression prior to compression.
addition perform identity deduplication and delta compression prior to compression.
pcompress -D -E -c bzip2 -l6 -s64m -t4 file.tar
@ -177,6 +177,3 @@ Normally this utility requires lots of RAM depending on compression algorithm,
compression level, and dedupe being enabled. Larger chunk sizes can give
better compression ratio but at the same time use more RAM.
In some cases for files less than a gigabyte. Using Delta Compression in addition
to exact Dedupe can have a slight negative impact on LZMA compression ratio
especially when using the large-window ultra compression levels above 10.

65
main.c
View file

@ -79,6 +79,7 @@ static int hide_cmp_stats = 1;
static int enable_rabin_scan = 0;
static int enable_delta_encode = 0;
static int enable_rabin_split = 1;
static int enable_fixed_scan = 0;
static int lzp_preprocess = 0;
static unsigned int chunk_num;
static uint64_t largest_chunk, smallest_chunk, avg_chunk;
@ -261,7 +262,7 @@ perform_decompress(void *dat)
{
struct cmp_data *tdat = (struct cmp_data *)dat;
ssize_t _chunksize;
ssize_t rabin_index_sz, rabin_data_sz, rabin_index_sz_cmp, rabin_data_sz_cmp;
ssize_t dedupe_index_sz, rabin_data_sz, dedupe_index_sz_cmp, rabin_data_sz_cmp;
int type, rv;
unsigned int blknum;
uchar_t checksum[CKSUM_MAX_BYTES];
@ -302,8 +303,8 @@ redo:
uchar_t *cmpbuf, *ubuf;
/* Extract various sizes from rabin header. */
rabin_parse_hdr(cseg, &blknum, &rabin_index_sz, &rabin_data_sz,
&rabin_index_sz_cmp, &rabin_data_sz_cmp, &_chunksize);
rabin_parse_hdr(cseg, &blknum, &dedupe_index_sz, &rabin_data_sz,
&dedupe_index_sz_cmp, &rabin_data_sz_cmp, &_chunksize);
memcpy(tdat->uncompressed_chunk, cseg, RABIN_HDR_SIZE);
/*
@ -312,8 +313,8 @@ redo:
* state/dictionary info. Since data chunk directly follows index
* uncompressing index first corrupts the data.
*/
cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp;
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz;
cmpbuf = cseg + RABIN_HDR_SIZE + dedupe_index_sz_cmp;
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + dedupe_index_sz;
if (HDR & COMPRESSED) {
if (HDR & CHUNK_FLAG_PREPROC) {
rv = preproc_decompress(tdat->decompress, cmpbuf, rabin_data_sz_cmp,
@ -334,12 +335,12 @@ redo:
rv = 0;
cmpbuf = cseg + RABIN_HDR_SIZE;
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE;
if (rabin_index_sz >= 90) {
if (dedupe_index_sz >= 90) {
/* Index should be at least 90 bytes to have been compressed. */
rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf,
&rabin_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data);
rv = lzma_decompress(cmpbuf, dedupe_index_sz_cmp, ubuf,
&dedupe_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data);
} else {
memcpy(ubuf, cmpbuf, rabin_index_sz);
memcpy(ubuf, cmpbuf, dedupe_index_sz);
}
} else {
if (HDR & COMPRESSED) {
@ -529,6 +530,9 @@ start_decompress(const char *filename, const char *to_filename)
if (flags & FLAG_DEDUP) {
enable_rabin_scan = 1;
} else if (flags & FLAG_DEDUP_FIXED) {
enable_fixed_scan = 1;
}
if (flags & FLAG_SINGLE_CHUNK) {
@ -580,7 +584,7 @@ start_decompress(const char *filename, const char *to_filename)
}
if (enable_rabin_scan) {
tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size,
algo, enable_delta_encode);
algo, enable_delta_encode, enable_fixed_scan);
if (tdat->rctx == NULL) {
UNCOMP_BAIL;
}
@ -750,7 +754,7 @@ uncomp_done:
static void *
perform_compress(void *dat) {
struct cmp_data *tdat = (struct cmp_data *)dat;
typeof (tdat->chunksize) _chunksize, len_cmp, rabin_index_sz, index_size_cmp;
typeof (tdat->chunksize) _chunksize, len_cmp, dedupe_index_sz, index_size_cmp;
int type, rv;
uchar_t *compressed_chunk;
ssize_t rbytes;
@ -780,7 +784,7 @@ redo:
rctx = tdat->rctx;
reset_rabin_context(tdat->rctx);
rctx->cbuf = tdat->uncompressed_chunk;
rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
dedupe_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
if (!rctx->valid) {
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
tdat->rbytes = rbytes;
@ -798,32 +802,32 @@ redo:
* reducing compression effectiveness of the data chunk. So we separate them.
*/
if (enable_rabin_scan && tdat->rctx->valid) {
_chunksize = tdat->rbytes - rabin_index_sz - RABIN_HDR_SIZE;
index_size_cmp = rabin_index_sz;
_chunksize = tdat->rbytes - dedupe_index_sz - RABIN_HDR_SIZE;
index_size_cmp = dedupe_index_sz;
rv = 0;
if (rabin_index_sz >= 90) {
if (dedupe_index_sz >= 90) {
/* Compress index if it is at least 90 bytes. */
rv = lzma_compress(tdat->uncompressed_chunk + RABIN_HDR_SIZE,
rabin_index_sz, compressed_chunk + RABIN_HDR_SIZE,
dedupe_index_sz, compressed_chunk + RABIN_HDR_SIZE,
&index_size_cmp, tdat->rctx->level, 255, tdat->rctx->lzma_data);
} else {
memcpy(compressed_chunk + RABIN_HDR_SIZE,
tdat->uncompressed_chunk + RABIN_HDR_SIZE, rabin_index_sz);
tdat->uncompressed_chunk + RABIN_HDR_SIZE, dedupe_index_sz);
}
index_size_cmp += RABIN_HDR_SIZE;
rabin_index_sz += RABIN_HDR_SIZE;
dedupe_index_sz += RABIN_HDR_SIZE;
if (rv == 0) {
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
/* Compress data chunk. */
if (lzp_preprocess) {
rv = preproc_compress(tdat->compress,
tdat->uncompressed_chunk + rabin_index_sz,
tdat->uncompressed_chunk + dedupe_index_sz,
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
tdat->level, 0, tdat->data);
} else {
rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz,
rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz,
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
tdat->level, 0, tdat->data);
}
@ -831,7 +835,7 @@ redo:
/* Can't compress data just retain as-is. */
if (rv < 0)
memcpy(compressed_chunk + index_size_cmp,
tdat->uncompressed_chunk + rabin_index_sz, _chunksize);
tdat->uncompressed_chunk + dedupe_index_sz, _chunksize);
/* Now update rabin header with the compressed sizes. */
rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE,
_chunksize);
@ -1005,8 +1009,11 @@ start_compress(const char *filename, uint64_t chunksize, int level)
}
flags = 0;
if (enable_rabin_scan) {
if (enable_rabin_scan || enable_fixed_scan) {
if (enable_rabin_scan)
flags |= FLAG_DEDUP;
else
flags |= FLAG_DEDUP_FIXED;
/* Additional scratch space for dedup arrays. */
compressed_chunksize += (rabin_buf_extra(chunksize, 0, algo,
enable_delta_encode) - (compressed_chunksize - chunksize));
@ -1132,7 +1139,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
}
if (enable_rabin_scan) {
tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size,
algo, enable_delta_encode);
algo, enable_delta_encode, enable_fixed_scan);
if (tdat->rctx == NULL) {
COMP_BAIL;
}
@ -1197,7 +1204,8 @@ start_compress(const char *filename, uint64_t chunksize, int level)
* Read the first chunk into a spare buffer (a simple double-buffering).
*/
if (enable_rabin_split) {
rctx = create_rabin_context(chunksize, 0, 0, algo, enable_delta_encode);
rctx = create_rabin_context(chunksize, 0, 0, algo, enable_delta_encode,
enable_fixed_scan);
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx);
} else {
rbytes = Read(uncompfd, cread_buf, chunksize);
@ -1589,6 +1597,10 @@ main(int argc, char *argv[])
enable_delta_encode = 1;
break;
case 'f':
enable_fixed_scan = 1;
break;
case 'L':
lzp_preprocess = 1;
break;
@ -1634,6 +1646,11 @@ main(int argc, char *argv[])
if (!enable_rabin_scan)
enable_rabin_split = 0;
if (enable_fixed_scan && (enable_rabin_scan || enable_delta_encode)) {
fprintf(stderr, "Rabin Deduplication and Fixed block Deduplication are mutually exclusive\n");
exit(1);
}
if (num_rem == 0 && !pipe_mode) {
usage(); /* At least 1 filename needed. */
exit(1);

View file

@ -39,6 +39,7 @@ extern "C" {
#define MIN_CHUNK 2048
#define VERSION 3
#define FLAG_DEDUP 1
#define FLAG_DEDUP_FIXED 1
#define FLAG_SINGLE_CHUNK 2
#define UTILITY_VERSION "0.8.1"

View file

@ -109,7 +109,7 @@ rabin_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_
*/
rabin_context_t *
create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz,
const char *algo, int delta_flag) {
const char *algo, int delta_flag, int fixed_flag) {
rabin_context_t *ctx;
unsigned char *current_window_data;
uint32_t i;
@ -117,6 +117,11 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
if (rab_blk_sz < 1 || rab_blk_sz > 5)
rab_blk_sz = RAB_BLK_DEFAULT;
if (fixed_flag) {
delta_flag = 0;
inited = 1;
}
/*
* Pre-compute a table of irreducible polynomial evaluations for each
* possible byte value.
@ -163,13 +168,18 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t));
ctx->rabin_poly_max_block_size = RAB_POLYNOMIAL_MAX_BLOCK_SIZE;
ctx->fixed_flag = fixed_flag;
ctx->rabin_break_patt = 0;
ctx->delta_flag = delta_flag;
ctx->rabin_poly_avg_block_size = 1 << (rab_blk_sz + RAB_BLK_MIN_BITS);
ctx->rabin_avg_block_mask = ctx->rabin_poly_avg_block_size - 1;
ctx->rabin_poly_min_block_size = rabin_min_blksz(chunksize, rab_blk_sz, algo, delta_flag);
ctx->fp_mask = ctx->rabin_avg_block_mask | ctx->rabin_poly_avg_block_size;
if (!fixed_flag)
ctx->blknum = chunksize / ctx->rabin_poly_min_block_size;
else
ctx->blknum = chunksize / ctx->rabin_poly_avg_block_size;
if (chunksize % ctx->rabin_poly_min_block_size)
ctx->blknum++;
@ -198,7 +208,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
lzma_init(&(ctx->lzma_data), &(ctx->level), chunksize);
if (!(ctx->lzma_data)) {
fprintf(stderr,
"Could not initialize LZMA data for rabin index, out of memory\n");
"Could not initialize LZMA data for dedupe index, out of memory\n");
destroy_rabin_context(ctx);
return (NULL);
}
@ -392,7 +402,8 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
if ((cur_pos_checksum & ctx->rabin_avg_block_mask) == ctx->rabin_break_patt ||
length >= ctx->rabin_poly_max_block_size) {
if (ctx->blocks[blknum] == 0)
ctx->blocks[blknum] = (rabin_blockentry_t *)slab_alloc(NULL, sizeof (rabin_blockentry_t));
ctx->blocks[blknum] = (rabin_blockentry_t *)slab_alloc(NULL,
sizeof (rabin_blockentry_t));
ctx->blocks[blknum]->offset = last_offset;
ctx->blocks[blknum]->index = blknum; // Need to store for sorting
ctx->blocks[blknum]->length = length;
@ -430,8 +441,8 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
ssize_t pos, matchlen, pos1;
int valid = 1;
char *tmp;
uint32_t *blkarr, *trans, *rabin_index;
ssize_t rabin_index_sz;
uint32_t *blkarr, *trans, *dedupe_index;
ssize_t dedupe_index_sz;
rabin_blockentry_t *prev;
DEBUG_STAT_EN(uint32_t delta_calls, delta_fails);
DEBUG_STAT_EN(delta_calls = 0);
@ -469,7 +480,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
last_offset = *size;
}
rabin_index_sz = (ssize_t)blknum * RABIN_ENTRY_SIZE;
dedupe_index_sz = (ssize_t)blknum * RABIN_ENTRY_SIZE;
/*
* Now sort the block array based on checksums. This will bring virtually
@ -477,14 +488,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
* our checksum is. We are using a maximal super-sketch value.
*/
qsort(ctx->blocks, blknum, sizeof (rabin_blockentry_t *), cmpblks);
rabin_index = (uint32_t *)(ctx->cbuf + RABIN_HDR_SIZE);
dedupe_index = (uint32_t *)(ctx->cbuf + RABIN_HDR_SIZE);
/*
* We need 2 temporary arrays. We just use available space in the last
* portion of the buffer that will hold the deduped segment.
*/
blkarr = (uint32_t *)(ctx->cbuf + ctx->real_chunksize - (rabin_index_sz * 2 + 1));
trans = (uint32_t *)(ctx->cbuf + ctx->real_chunksize - (rabin_index_sz + 1));
blkarr = (uint32_t *)(ctx->cbuf + ctx->real_chunksize - (dedupe_index_sz * 2 + 1));
trans = (uint32_t *)(ctx->cbuf + ctx->real_chunksize - (dedupe_index_sz + 1));
matchlen = 0;
/*
@ -497,12 +508,13 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
* A reference count is maintained for blocks that are similar with other
* blocks. This helps in non-duplicate block merging later.
*/
for (blk = 0; blk < blknum; blk++) {
blkarr[ctx->blocks[0]->index] = 0;
prev = ctx->blocks[0];
for (blk = 1; blk < blknum; blk++) {
blkarr[ctx->blocks[blk]->index] = blk;
if (blk > 0 && ctx->blocks[blk]->cksum_n_offset == prev->cksum_n_offset &&
if (ctx->blocks[blk]->crc == prev->crc &&
ctx->blocks[blk]->length == prev->length &&
ctx->blocks[blk]->crc == prev->crc &&
memcmp(buf1 + prev->offset, buf1 + ctx->blocks[blk]->offset,
prev->length) == 0)
{
@ -526,8 +538,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
*/
if (prev != NULL && ctx->blocks[blk]->ref == 0 &&
ctx->blocks[blk]->cksum_n_offset == prev->cksum_n_offset &&
ctx->blocks[blk]->length - prev->length < 512 &&
ctx->blocks[blk]->mean_n_length == prev->mean_n_length
ctx->blocks[blk]->length - prev->length < 512
) {
ctx->blocks[blk]->index = prev->index;
ctx->blocks[blk]->similar = SIMILAR_PARTIAL;
@ -538,7 +549,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
prev = ctx->blocks[blk];
}
}
if (matchlen < rabin_index_sz) {
if (matchlen < dedupe_index_sz) {
ctx->valid = 0;
return;
}
@ -569,7 +580,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
prev_index = pos;
prev_length = be->length;
}
rabin_index[pos] = be->length;
dedupe_index[pos] = be->length;
ctx->blocks[pos]->cksum_n_offset = be->offset;
trans[blk] = pos;
pos++;
@ -577,18 +588,18 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
if (be->ref > 0) {
prev_index = 0;
prev_length = 0;
rabin_index[pos] = be->length;
dedupe_index[pos] = be->length;
ctx->blocks[pos]->cksum_n_offset = be->offset;
trans[blk] = pos;
pos++;
} else {
if (prev_length + be->length <= RABIN_MAX_BLOCK_SIZE) {
prev_length += be->length;
rabin_index[prev_index] = prev_length;
dedupe_index[prev_index] = prev_length;
} else {
prev_index = 0;
prev_length = 0;
rabin_index[pos] = be->length;
dedupe_index[pos] = be->length;
ctx->blocks[pos]->cksum_n_offset = be->offset;
trans[blk] = pos;
pos++;
@ -599,14 +610,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
prev_index = 0;
prev_length = 0;
ctx->blocks[pos]->cksum_n_offset = be->offset;
ctx->blocks[pos]->mean_n_length = be->length;
ctx->blocks[pos]->alt_length = be->length;
trans[blk] = pos;
if (be->similar == SIMILAR_EXACT) {
rabin_index[pos] = (blkarr[be->index] | RABIN_INDEX_FLAG) &
dedupe_index[pos] = (blkarr[be->index] | RABIN_INDEX_FLAG) &
CLEAR_SIMILARITY_FLAG;
} else {
rabin_index[pos] = blkarr[be->index] | RABIN_INDEX_FLAG |
dedupe_index[pos] = blkarr[be->index] | RABIN_INDEX_FLAG |
SET_SIMILARITY_FLAG;
}
pos++;
@ -617,8 +628,8 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
* Final pass, copy the data and perform delta encoding.
*/
blknum = pos;
rabin_index_sz = (ssize_t)pos * RABIN_ENTRY_SIZE;
pos1 = rabin_index_sz + RABIN_HDR_SIZE;
dedupe_index_sz = (ssize_t)pos * RABIN_ENTRY_SIZE;
pos1 = dedupe_index_sz + RABIN_HDR_SIZE;
for (blk = 0; blk < blknum; blk++) {
uchar_t *old, *new;
int32_t bsz;
@ -631,37 +642,37 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
valid = 0;
break;
}
if (rabin_index[blk] & RABIN_INDEX_FLAG) {
j = rabin_index[blk] & RABIN_INDEX_VALUE;
if (dedupe_index[blk] & RABIN_INDEX_FLAG) {
j = dedupe_index[blk] & RABIN_INDEX_VALUE;
i = ctx->blocks[j]->index;
if (rabin_index[blk] & GET_SIMILARITY_FLAG) {
if (dedupe_index[blk] & GET_SIMILARITY_FLAG) {
old = buf1 + ctx->blocks[j]->offset;
new = buf1 + ctx->blocks[blk]->cksum_n_offset;
matchlen = ctx->real_chunksize - *size;
DEBUG_STAT_EN(delta_calls++);
bsz = bsdiff(old, ctx->blocks[j]->length, new,
ctx->blocks[blk]->mean_n_length, ctx->cbuf + pos1,
ctx->blocks[blk]->alt_length, ctx->cbuf + pos1,
buf1 + *size, matchlen);
if (bsz == 0) {
DEBUG_STAT_EN(delta_fails++);
memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk]->mean_n_length);
rabin_index[blk] = htonl(ctx->blocks[blk]->mean_n_length);
pos1 += ctx->blocks[blk]->mean_n_length;
memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk]->alt_length);
dedupe_index[blk] = htonl(ctx->blocks[blk]->alt_length);
pos1 += ctx->blocks[blk]->alt_length;
} else {
rabin_index[blk] = htonl(trans[i] |
dedupe_index[blk] = htonl(trans[i] |
RABIN_INDEX_FLAG | SET_SIMILARITY_FLAG);
pos1 += bsz;
}
} else {
rabin_index[blk] = htonl(trans[i] | RABIN_INDEX_FLAG);
dedupe_index[blk] = htonl(trans[i] | RABIN_INDEX_FLAG);
}
} else {
memcpy(ctx->cbuf + pos1, buf1 + ctx->blocks[blk]->cksum_n_offset,
rabin_index[blk]);
pos1 += rabin_index[blk];
rabin_index[blk] = htonl(rabin_index[blk]);
dedupe_index[blk]);
pos1 += dedupe_index[blk];
dedupe_index[blk] = htonl(dedupe_index[blk]);
}
}
cont:
@ -674,7 +685,7 @@ cont:
entries = (ssize_t *)cbuf;
entries[0] = htonll(*size);
entries[1] = 0;
entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE);
entries[2] = htonll(pos1 - dedupe_index_sz - RABIN_HDR_SIZE);
*size = pos1;
ctx->valid = 1;
DEBUG_STAT_EN(printf("Deduped size: %lld, blknum: %u, delta_calls: %u, delta_fails: %u\n",
@ -683,26 +694,26 @@ cont:
* Remaining header entries: size of compressed index and size of
* compressed data are inserted later via rabin_update_hdr, after actual compression!
*/
return (rabin_index_sz);
return (dedupe_index_sz);
}
}
return (0);
}
void
rabin_update_hdr(uchar_t *buf, ssize_t rabin_index_sz_cmp, ssize_t rabin_data_sz_cmp)
rabin_update_hdr(uchar_t *buf, ssize_t dedupe_index_sz_cmp, ssize_t rabin_data_sz_cmp)
{
ssize_t *entries;
buf += sizeof (uint32_t);
entries = (ssize_t *)buf;
entries[1] = htonll(rabin_index_sz_cmp);
entries[1] = htonll(dedupe_index_sz_cmp);
entries[3] = htonll(rabin_data_sz_cmp);
}
void
rabin_parse_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *rabin_index_sz,
ssize_t *rabin_data_sz, ssize_t *rabin_index_sz_cmp,
rabin_parse_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *dedupe_index_sz,
ssize_t *rabin_data_sz, ssize_t *dedupe_index_sz_cmp,
ssize_t *rabin_data_sz_cmp, ssize_t *rabin_deduped_size)
{
ssize_t *entries;
@ -712,8 +723,8 @@ rabin_parse_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *rabin_index_sz,
entries = (ssize_t *)buf;
*rabin_data_sz = ntohll(entries[0]);
*rabin_index_sz = (ssize_t)(*blknum) * RABIN_ENTRY_SIZE;
*rabin_index_sz_cmp = ntohll(entries[1]);
*dedupe_index_sz = (ssize_t)(*blknum) * RABIN_ENTRY_SIZE;
*dedupe_index_sz_cmp = ntohll(entries[1]);
*rabin_deduped_size = ntohll(entries[2]);
*rabin_data_sz_cmp = ntohll(entries[3]);
}
@ -722,14 +733,14 @@ void
rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
{
uint32_t blknum, blk, oblk, len;
uint32_t *rabin_index;
uint32_t *dedupe_index;
ssize_t data_sz, sz, indx_cmp, data_sz_cmp, deduped_sz;
ssize_t rabin_index_sz, pos1, i;
ssize_t dedupe_index_sz, pos1, i;
uchar_t *pos2;
rabin_parse_hdr(buf, &blknum, &rabin_index_sz, &data_sz, &indx_cmp, &data_sz_cmp, &deduped_sz);
rabin_index = (uint32_t *)(buf + RABIN_HDR_SIZE);
pos1 = rabin_index_sz + RABIN_HDR_SIZE;
rabin_parse_hdr(buf, &blknum, &dedupe_index_sz, &data_sz, &indx_cmp, &data_sz_cmp, &deduped_sz);
dedupe_index = (uint32_t *)(buf + RABIN_HDR_SIZE);
pos1 = dedupe_index_sz + RABIN_HDR_SIZE;
pos2 = ctx->cbuf;
sz = 0;
ctx->valid = 1;
@ -738,7 +749,7 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
for (blk = 0; blk < blknum; blk++) {
if (ctx->blocks[blk] == 0)
ctx->blocks[blk] = (rabin_blockentry_t *)slab_alloc(NULL, sizeof (rabin_blockentry_t));
len = ntohl(rabin_index[blk]);
len = ntohl(dedupe_index[blk]);
if (len == 0) {
ctx->blocks[blk]->length = 0;
ctx->blocks[blk]->index = 0;

View file

@ -127,7 +127,7 @@
typedef struct {
ssize_t offset;
uint64_t cksum_n_offset; // Dual purpose variable
uint64_t mean_n_length; // Dual purpose variable
uint64_t alt_length;
uint64_t crc;
unsigned int index;
unsigned int length;
@ -149,11 +149,11 @@ typedef struct {
uint64_t real_chunksize;
short valid;
void *lzma_data;
int level, delta_flag;
int level, delta_flag, fixed_flag;
} rabin_context_t;
extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize,
int rab_blk_sz, const char *algo, int delta_flag);
int rab_blk_sz, const char *algo, int delta_flag, int fixed_flag);
extern void destroy_rabin_context(rabin_context_t *ctx);
extern unsigned int rabin_dedup(rabin_context_t *ctx, unsigned char *buf,
ssize_t *size, ssize_t offset, ssize_t *rabin_pos);