Add support for Fixed-Block deduplication.

More refactoring of symbol names.
This commit is contained in:
Moinak Ghosh 2012-09-16 11:12:58 +05:30
parent b9355a5dcc
commit e3befd9e16
6 changed files with 116 additions and 76 deletions

View file

@ -97,6 +97,12 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library
the fastest in the group, especially on x86 platforms. BLAKE is faster
than SKEIN on a few platforms.
SKEIN 512-256 is about 60% faster than SHA 512-256 on x64 platforms.
'-F' - Perform Fixed Block Deduplication. This is faster than fingerprinting
based content-aware deduplication in some cases. However this is mostly
usable for disk dumps especially virtual machine images. This generally
gives lower dedupe ratio than content-aware dedupe (-D) and does not
support delta compression.
'-M' - Display memory allocator statistics
'-C' - Display compression statistics

75
main.c
View file

@ -90,7 +90,7 @@ static int do_uncompress = 0;
static int cksum_bytes;
static int cksum = 0;
static int rab_blk_size = 0;
static rabin_context_t *rctx;
static dedupe_context_t *rctx;
static void
usage(void)
@ -145,6 +145,8 @@ usage(void)
" '-S' <cksum>\n"
" - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512\n"
" Default one is SKEIN256.\n"
" '-F' - Perform Fixed-Block Deduplication. Faster than '-D' in some cases\n"
" but with lower deduplication ratio.\n"
" '-B' <1..5>\n"
" - Specify a minimum Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n"
" '-M' - Display memory allocator statistics\n"
@ -299,11 +301,11 @@ redo:
_chunksize = ntohll(*((ssize_t *)rseg));
}
if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) {
if ((enable_rabin_scan || enable_fixed_scan) && (HDR & CHUNK_FLAG_DEDUP)) {
uchar_t *cmpbuf, *ubuf;
/* Extract various sizes from rabin header. */
rabin_parse_hdr(cseg, &blknum, &dedupe_index_sz, &rabin_data_sz,
parse_dedupe_hdr(cseg, &blknum, &dedupe_index_sz, &rabin_data_sz,
&dedupe_index_sz_cmp, &rabin_data_sz_cmp, &_chunksize);
memcpy(tdat->uncompressed_chunk, cseg, RABIN_HDR_SIZE);
@ -363,14 +365,14 @@ redo:
goto cont;
}
/* Rebuild chunk from dedup blocks. */
if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) {
rabin_context_t *rctx;
if ((enable_rabin_scan || enable_fixed_scan) && (HDR & CHUNK_FLAG_DEDUP)) {
dedupe_context_t *rctx;
uchar_t *tmp;
rctx = tdat->rctx;
reset_rabin_context(tdat->rctx);
reset_dedupe_context(tdat->rctx);
rctx->cbuf = tdat->compressed_chunk;
rabin_inverse_dedup(rctx, tdat->uncompressed_chunk, &(tdat->len_cmp));
dedupe_decompress(rctx, tdat->uncompressed_chunk, &(tdat->len_cmp));
if (!rctx->valid) {
fprintf(stderr, "ERROR: Chunk %d, dedup recovery failed.\n", tdat->id);
rv = -1;
@ -582,8 +584,8 @@ start_decompress(const char *filename, const char *to_filename)
UNCOMP_BAIL;
}
}
if (enable_rabin_scan) {
tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size,
if (enable_rabin_scan || enable_fixed_scan) {
tdat->rctx = create_dedupe_context(chunksize, compressed_chunksize, rab_blk_size,
algo, enable_delta_encode, enable_fixed_scan);
if (tdat->rctx == NULL) {
UNCOMP_BAIL;
@ -659,7 +661,7 @@ start_decompress(const char *filename, const char *to_filename)
if (!tdat->compressed_chunk) {
tdat->compressed_chunk = (uchar_t *)slab_alloc(NULL,
compressed_chunksize);
if (enable_rabin_scan)
if ((enable_rabin_scan || enable_fixed_scan))
tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL,
compressed_chunksize);
else
@ -735,8 +737,8 @@ uncomp_done:
slab_free(NULL, dary[i]->compressed_chunk);
if (_deinit_func)
_deinit_func(&(dary[i]->data));
if (enable_rabin_scan) {
destroy_rabin_context(dary[i]->rctx);
if ((enable_rabin_scan || enable_fixed_scan)) {
destroy_dedupe_context(dary[i]->rctx);
}
slab_free(NULL, dary[i]);
}
@ -770,8 +772,8 @@ redo:
compressed_chunk = tdat->compressed_chunk + CHUNK_FLAG_SZ;
rbytes = tdat->rbytes;
/* Perform Dedup if enabled. */
if (enable_rabin_scan) {
rabin_context_t *rctx;
if ((enable_rabin_scan || enable_fixed_scan)) {
dedupe_context_t *rctx;
/*
* Compute checksum of original uncompressed chunk. When doing dedup
@ -782,9 +784,9 @@ redo:
compute_checksum(tdat->checksum, cksum, tdat->cmp_seg, tdat->rbytes);
rctx = tdat->rctx;
reset_rabin_context(tdat->rctx);
reset_dedupe_context(tdat->rctx);
rctx->cbuf = tdat->uncompressed_chunk;
dedupe_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
dedupe_index_sz = dedupe_compress(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
if (!rctx->valid) {
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
tdat->rbytes = rbytes;
@ -801,7 +803,7 @@ redo:
* The rabin index array values can pollute the compressor's dictionary thereby
* reducing compression effectiveness of the data chunk. So we separate them.
*/
if (enable_rabin_scan && tdat->rctx->valid) {
if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) {
_chunksize = tdat->rbytes - dedupe_index_sz - RABIN_HDR_SIZE;
index_size_cmp = dedupe_index_sz;
@ -837,7 +839,7 @@ redo:
memcpy(compressed_chunk + index_size_cmp,
tdat->uncompressed_chunk + dedupe_index_sz, _chunksize);
/* Now update rabin header with the compressed sizes. */
rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE,
update_dedupe_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE,
_chunksize);
} else {
/* If rabin index compression fails, we just drop down to plain
@ -869,7 +871,7 @@ plain_compress:
*/
tdat->len_cmp = _chunksize;
if (_chunksize >= rbytes || rv < 0) {
if (!enable_rabin_scan || !tdat->rctx->valid)
if (!(enable_rabin_scan || enable_fixed_scan) || !tdat->rctx->valid)
memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes);
type = UNCOMPRESSED;
tdat->len_cmp = tdat->rbytes;
@ -877,7 +879,7 @@ plain_compress:
type = COMPRESSED;
}
if (enable_rabin_scan && tdat->rctx->valid) {
if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) {
type |= CHUNK_FLAG_DEDUP;
}
if (lzp_preprocess) {
@ -982,7 +984,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
struct cmp_data **dary = NULL, *tdat;
pthread_t writer_thr;
uchar_t *cread_buf, *pos;
rabin_context_t *rctx;
dedupe_context_t *rctx;
algo_props_t props;
/*
@ -1015,7 +1017,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
else
flags |= FLAG_DEDUP_FIXED;
/* Additional scratch space for dedup arrays. */
compressed_chunksize += (rabin_buf_extra(chunksize, 0, algo,
compressed_chunksize += (dedupe_buf_extra(chunksize, 0, algo,
enable_delta_encode) - (compressed_chunksize - chunksize));
}
@ -1107,7 +1109,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
fprintf(stderr, "\n");
dary = (struct cmp_data **)slab_calloc(NULL, nprocs, sizeof (struct cmp_data *));
if (enable_rabin_scan)
if ((enable_rabin_scan || enable_fixed_scan))
cread_buf = (uchar_t *)slab_alloc(NULL, compressed_chunksize);
else
cread_buf = (uchar_t *)slab_alloc(NULL, chunksize);
@ -1137,8 +1139,8 @@ start_compress(const char *filename, uint64_t chunksize, int level)
COMP_BAIL;
}
}
if (enable_rabin_scan) {
tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size,
if (enable_rabin_scan || enable_fixed_scan) {
tdat->rctx = create_dedupe_context(chunksize, compressed_chunksize, rab_blk_size,
algo, enable_delta_encode, enable_fixed_scan);
if (tdat->rctx == NULL) {
COMP_BAIL;
@ -1204,7 +1206,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
* Read the first chunk into a spare buffer (a simple double-buffering).
*/
if (enable_rabin_split) {
rctx = create_rabin_context(chunksize, 0, 0, algo, enable_delta_encode,
rctx = create_dedupe_context(chunksize, 0, 0, algo, enable_delta_encode,
enable_fixed_scan);
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx);
} else {
@ -1231,7 +1233,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
* Delayed allocation. Allocate chunks if not already done.
*/
if (!tdat->cmp_seg) {
if (enable_rabin_scan) {
if ((enable_rabin_scan || enable_fixed_scan)) {
if (single_chunk)
tdat->cmp_seg = (uchar_t *)1;
else
@ -1266,7 +1268,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
*/
tdat->id = chunk_num;
tdat->rbytes = rbytes;
if (enable_rabin_scan) {
if ((enable_rabin_scan || enable_fixed_scan)) {
tmp = tdat->cmp_seg;
tdat->cmp_seg = cread_buf;
cread_buf = tmp;
@ -1383,8 +1385,8 @@ comp_done:
slab_free(NULL, dary[i]->uncompressed_chunk);
if (dary[i]->cmp_seg != (uchar_t *)1)
slab_free(NULL, dary[i]->cmp_seg);
if (enable_rabin_scan) {
destroy_rabin_context(dary[i]->rctx);
if ((enable_rabin_scan || enable_fixed_scan)) {
destroy_dedupe_context(dary[i]->rctx);
}
if (_deinit_func)
_deinit_func(&(dary[i]->data));
@ -1392,7 +1394,7 @@ comp_done:
}
slab_free(NULL, dary);
}
if (enable_rabin_split) destroy_rabin_context(rctx);
if (enable_rabin_split) destroy_dedupe_context(rctx);
if (cread_buf != (uchar_t *)1)
slab_free(NULL, cread_buf);
if (!pipe_mode) {
@ -1530,7 +1532,7 @@ main(int argc, char *argv[])
level = 6;
slab_init();
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:B:")) != -1) {
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:B:F")) != -1) {
int ovr;
switch (opt) {
@ -1597,8 +1599,9 @@ main(int argc, char *argv[])
enable_delta_encode = 1;
break;
case 'f':
case 'F':
enable_fixed_scan = 1;
enable_rabin_split = 0;
break;
case 'L':
@ -1638,15 +1641,15 @@ main(int argc, char *argv[])
exit(1);
}
if (enable_rabin_scan && !do_compress) {
fprintf(stderr, "Rabin Deduplication is only used during compression.\n");
if ((enable_rabin_scan || enable_fixed_scan) && !do_compress) {
fprintf(stderr, "Deduplication is only used during compression.\n");
usage();
exit(1);
}
if (!enable_rabin_scan)
enable_rabin_split = 0;
if (enable_fixed_scan && (enable_rabin_scan || enable_delta_encode)) {
if (enable_fixed_scan && (enable_rabin_scan || enable_delta_encode || enable_rabin_split)) {
fprintf(stderr, "Rabin Deduplication and Fixed block Deduplication are mutually exclusive\n");
exit(1);
}

View file

@ -157,7 +157,7 @@ struct cmp_data {
uchar_t *cmp_seg;
uchar_t *compressed_chunk;
uchar_t *uncompressed_chunk;
rabin_context_t *rctx;
dedupe_context_t *rctx;
ssize_t rbytes;
ssize_t chunksize;
ssize_t len_cmp;

View file

@ -86,7 +86,7 @@ uint64_t ir[256];
static int inited = 0;
static uint32_t
rabin_min_blksz(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag)
dedupe_min_blksz(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag)
{
uint32_t min_blk;
@ -95,22 +95,22 @@ rabin_min_blksz(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_
}
uint32_t
rabin_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag)
dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag)
{
if (rab_blk_sz < 1 || rab_blk_sz > 5)
rab_blk_sz = RAB_BLK_DEFAULT;
return ((chunksize / rabin_min_blksz(chunksize, rab_blk_sz, algo, delta_flag))
return ((chunksize / dedupe_min_blksz(chunksize, rab_blk_sz, algo, delta_flag))
* sizeof (uint32_t));
}
/*
* Initialize the algorithm with the default params.
*/
rabin_context_t *
create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz,
dedupe_context_t *
create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz,
const char *algo, int delta_flag, int fixed_flag) {
rabin_context_t *ctx;
dedupe_context_t *ctx;
unsigned char *current_window_data;
uint32_t i;
@ -165,7 +165,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
* use 4K minimum Rabin block size. For everything else it is 2K based
* on experimentation.
*/
ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t));
ctx = (dedupe_context_t *)slab_alloc(NULL, sizeof (dedupe_context_t));
ctx->rabin_poly_max_block_size = RAB_POLYNOMIAL_MAX_BLOCK_SIZE;
ctx->fixed_flag = fixed_flag;
@ -173,7 +173,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
ctx->delta_flag = delta_flag;
ctx->rabin_poly_avg_block_size = 1 << (rab_blk_sz + RAB_BLK_MIN_BITS);
ctx->rabin_avg_block_mask = ctx->rabin_poly_avg_block_size - 1;
ctx->rabin_poly_min_block_size = rabin_min_blksz(chunksize, rab_blk_sz, algo, delta_flag);
ctx->rabin_poly_min_block_size = dedupe_min_blksz(chunksize, rab_blk_sz, algo, delta_flag);
ctx->fp_mask = ctx->rabin_avg_block_mask | ctx->rabin_poly_avg_block_size;
if (!fixed_flag)
@ -186,7 +186,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
if (ctx->blknum > RABIN_MAX_BLOCKS) {
fprintf(stderr, "Chunk size too large for dedup.\n");
destroy_rabin_context(ctx);
destroy_dedupe_context(ctx);
return (NULL);
}
current_window_data = slab_alloc(NULL, RAB_POLYNOMIAL_WIN_SIZE);
@ -198,7 +198,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
if(ctx == NULL || current_window_data == NULL || (ctx->blocks == NULL && real_chunksize > 0)) {
fprintf(stderr,
"Could not allocate rabin polynomial context, out of memory\n");
destroy_rabin_context(ctx);
destroy_dedupe_context(ctx);
return (NULL);
}
@ -209,7 +209,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
if (!(ctx->lzma_data)) {
fprintf(stderr,
"Could not initialize LZMA data for dedupe index, out of memory\n");
destroy_rabin_context(ctx);
destroy_dedupe_context(ctx);
return (NULL);
}
}
@ -227,19 +227,19 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
slab_cache_add(sizeof (rabin_blockentry_t));
ctx->current_window_data = current_window_data;
ctx->real_chunksize = real_chunksize;
reset_rabin_context(ctx);
reset_dedupe_context(ctx);
return (ctx);
}
void
reset_rabin_context(rabin_context_t *ctx)
reset_dedupe_context(dedupe_context_t *ctx)
{
memset(ctx->current_window_data, 0, RAB_POLYNOMIAL_WIN_SIZE);
ctx->window_pos = 0;
}
void
destroy_rabin_context(rabin_context_t *ctx)
destroy_dedupe_context(dedupe_context_t *ctx)
{
if (ctx) {
uint32_t i;
@ -288,11 +288,13 @@ cmpblks(const void *a, const void *b)
}
/**
* Perform Deduplication based on Rabin Fingerprinting. A 31-byte window is used for
* the rolling checksum and dedup blocks vary in size from 4K-128K.
* Perform Deduplication.
* Both Semi-Rabin fingerprinting based and Fixed Block Deduplication are supported.
* A 16-byte window is used for the rolling checksum and dedup blocks can vary in size
* from 4K-128K.
*/
uint32_t
rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos)
dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos)
{
ssize_t i, last_offset, j, fplist_sz;
uint32_t blknum;
@ -302,6 +304,40 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
uint32_t *fplist;
heap_t heap;
length = offset;
last_offset = 0;
blknum = 0;
ctx->valid = 0;
cur_roll_checksum = 0;
cur_sketch = 0;
if (ctx->fixed_flag) {
blknum = *size / ctx->rabin_poly_avg_block_size;
j = *size % ctx->rabin_poly_avg_block_size;
if (j) blknum++;
last_offset = 0;
length = ctx->rabin_poly_avg_block_size;
for (i=0; i<blknum; i++) {
if (i == blknum-1) {
length = j;
}
if (ctx->blocks[i] == 0) {
ctx->blocks[i] = (rabin_blockentry_t *)slab_alloc(NULL,
sizeof (rabin_blockentry_t));
}
ctx->blocks[i]->offset = last_offset;
ctx->blocks[i]->index = i; // Need to store for sorting
ctx->blocks[i]->length = length;
ctx->blocks[i]->ref = 0;
ctx->blocks[i]->similar = 0;
ctx->blocks[i]->crc = XXH_strong32(buf1+last_offset, length, 0);
ctx->blocks[i]->cksum_n_offset = ctx->blocks[i]->crc;
last_offset += length;
}
goto process_blocks;
}
if (rabin_pos == NULL) {
/*
* Initialize arrays for sketch computation. We re-use memory allocated
@ -312,12 +348,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
memset(fplist, 0, fplist_sz);
reset_heap(&heap, fplist_sz/2);
}
length = offset;
last_offset = 0;
blknum = 0;
ctx->valid = 0;
cur_roll_checksum = 0;
cur_sketch = 0;
/*
* If rabin_pos is non-zero then we are being asked to scan for the last rabin boundary
@ -434,6 +464,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
}
}
process_blocks:
DEBUG_STAT_EN(printf("Original size: %lld, blknum: %u\n", *size, blknum));
// If we found at least a few chunks, perform dedup.
if (blknum > 2) {
@ -701,7 +732,7 @@ cont:
}
void
rabin_update_hdr(uchar_t *buf, ssize_t dedupe_index_sz_cmp, ssize_t rabin_data_sz_cmp)
update_dedupe_hdr(uchar_t *buf, ssize_t dedupe_index_sz_cmp, ssize_t rabin_data_sz_cmp)
{
ssize_t *entries;
@ -712,7 +743,7 @@ rabin_update_hdr(uchar_t *buf, ssize_t dedupe_index_sz_cmp, ssize_t rabin_data_s
}
void
rabin_parse_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *dedupe_index_sz,
parse_dedupe_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *dedupe_index_sz,
ssize_t *rabin_data_sz, ssize_t *dedupe_index_sz_cmp,
ssize_t *rabin_data_sz_cmp, ssize_t *rabin_deduped_size)
{
@ -730,7 +761,7 @@ rabin_parse_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *dedupe_index_sz,
}
void
rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, ssize_t *size)
{
uint32_t blknum, blk, oblk, len;
uint32_t *dedupe_index;
@ -738,7 +769,7 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
ssize_t dedupe_index_sz, pos1, i;
uchar_t *pos2;
rabin_parse_hdr(buf, &blknum, &dedupe_index_sz, &data_sz, &indx_cmp, &data_sz_cmp, &deduped_sz);
parse_dedupe_hdr(buf, &blknum, &dedupe_index_sz, &data_sz, &indx_cmp, &data_sz_cmp, &deduped_sz);
dedupe_index = (uint32_t *)(buf + RABIN_HDR_SIZE);
pos1 = dedupe_index_sz + RABIN_HDR_SIZE;
pos2 = ctx->cbuf;
@ -828,7 +859,7 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
* TODO: Consolidate rabin dedup and compression/decompression in functions here rather than
* messy code in main program.
int
rabin_compress(rabin_context_t *ctx, uchar_t *from, ssize_t fromlen, uchar_t *to, ssize_t *tolen,
rabin_compress(dedupe_context_t *ctx, uchar_t *from, ssize_t fromlen, uchar_t *to, ssize_t *tolen,
int level, char chdr, void *data, compress_func_ptr cmp)
{
}

View file

@ -150,21 +150,21 @@ typedef struct {
short valid;
void *lzma_data;
int level, delta_flag, fixed_flag;
} rabin_context_t;
} dedupe_context_t;
extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize,
extern dedupe_context_t *create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize,
int rab_blk_sz, const char *algo, int delta_flag, int fixed_flag);
extern void destroy_rabin_context(rabin_context_t *ctx);
extern unsigned int rabin_dedup(rabin_context_t *ctx, unsigned char *buf,
extern void destroy_dedupe_context(dedupe_context_t *ctx);
extern unsigned int dedupe_compress(dedupe_context_t *ctx, unsigned char *buf,
ssize_t *size, ssize_t offset, ssize_t *rabin_pos);
extern void rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size);
extern void rabin_parse_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz,
extern void dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, ssize_t *size);
extern void parse_dedupe_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz,
ssize_t *rabin_data_sz, ssize_t *rabin_index_sz_cmp,
ssize_t *rabin_data_sz_cmp, ssize_t *rabin_deduped_size);
extern void rabin_update_hdr(uchar_t *buf, ssize_t rabin_index_sz_cmp,
extern void update_dedupe_hdr(uchar_t *buf, ssize_t rabin_index_sz_cmp,
ssize_t rabin_data_sz_cmp);
extern void reset_rabin_context(rabin_context_t *ctx);
extern uint32_t rabin_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo,
extern void reset_dedupe_context(dedupe_context_t *ctx);
extern uint32_t dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo,
int delta_flag);
#endif /* _RABIN_POLY_H_ */

View file

@ -223,7 +223,7 @@ Read_Adjusted(int fd, uchar_t *buf, size_t count, ssize_t *rabin_count, void *ct
{
char *buf2;
ssize_t rcount;
rabin_context_t *rctx = (rabin_context_t *)ctx;
dedupe_context_t *rctx = (dedupe_context_t *)ctx;
if (!ctx) return (Read(fd, buf, count));
buf2 = buf;
@ -235,7 +235,7 @@ Read_Adjusted(int fd, uchar_t *buf, size_t count, ssize_t *rabin_count, void *ct
if (rcount > 0) {
rcount += *rabin_count;
if (rcount == count)
rabin_dedup(rctx, buf, &rcount, 0, rabin_count);
dedupe_compress(rctx, buf, &rcount, 0, rabin_count);
else
*rabin_count = 0;
} else {