diff --git a/README.md b/README.md index 1649593..a0ad254 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,12 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library the fastest in the group, especially on x86 platforms. BLAKE is faster than SKEIN on a few platforms. SKEIN 512-256 is about 60% faster than SHA 512-256 on x64 platforms. + + '-F' - Perform Fixed Block Deduplication. This is faster than fingerprinting + based content-aware deduplication in some cases. However this is mostly + usable for disk dumps especially virtual machine images. This generally + gives lower dedupe ratio than content-aware dedupe (-D) and does not + support delta compression. '-M' - Display memory allocator statistics '-C' - Display compression statistics diff --git a/main.c b/main.c index 53f2adb..721d585 100644 --- a/main.c +++ b/main.c @@ -90,7 +90,7 @@ static int do_uncompress = 0; static int cksum_bytes; static int cksum = 0; static int rab_blk_size = 0; -static rabin_context_t *rctx; +static dedupe_context_t *rctx; static void usage(void) @@ -145,6 +145,8 @@ usage(void) " '-S' \n" " - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512\n" " Default one is SKEIN256.\n" + " '-F' - Perform Fixed-Block Deduplication. Faster than '-D' in some cases\n" + " but with lower deduplication ratio.\n" " '-B' <1..5>\n" " - Specify a minimum Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n" " '-M' - Display memory allocator statistics\n" @@ -299,11 +301,11 @@ redo: _chunksize = ntohll(*((ssize_t *)rseg)); } - if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) { + if ((enable_rabin_scan || enable_fixed_scan) && (HDR & CHUNK_FLAG_DEDUP)) { uchar_t *cmpbuf, *ubuf; /* Extract various sizes from rabin header. */ - rabin_parse_hdr(cseg, &blknum, &dedupe_index_sz, &rabin_data_sz, + parse_dedupe_hdr(cseg, &blknum, &dedupe_index_sz, &rabin_data_sz, &dedupe_index_sz_cmp, &rabin_data_sz_cmp, &_chunksize); memcpy(tdat->uncompressed_chunk, cseg, RABIN_HDR_SIZE); @@ -363,14 +365,14 @@ redo: goto cont; } /* Rebuild chunk from dedup blocks. */ - if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) { - rabin_context_t *rctx; + if ((enable_rabin_scan || enable_fixed_scan) && (HDR & CHUNK_FLAG_DEDUP)) { + dedupe_context_t *rctx; uchar_t *tmp; rctx = tdat->rctx; - reset_rabin_context(tdat->rctx); + reset_dedupe_context(tdat->rctx); rctx->cbuf = tdat->compressed_chunk; - rabin_inverse_dedup(rctx, tdat->uncompressed_chunk, &(tdat->len_cmp)); + dedupe_decompress(rctx, tdat->uncompressed_chunk, &(tdat->len_cmp)); if (!rctx->valid) { fprintf(stderr, "ERROR: Chunk %d, dedup recovery failed.\n", tdat->id); rv = -1; @@ -582,8 +584,8 @@ start_decompress(const char *filename, const char *to_filename) UNCOMP_BAIL; } } - if (enable_rabin_scan) { - tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size, + if (enable_rabin_scan || enable_fixed_scan) { + tdat->rctx = create_dedupe_context(chunksize, compressed_chunksize, rab_blk_size, algo, enable_delta_encode, enable_fixed_scan); if (tdat->rctx == NULL) { UNCOMP_BAIL; @@ -659,7 +661,7 @@ start_decompress(const char *filename, const char *to_filename) if (!tdat->compressed_chunk) { tdat->compressed_chunk = (uchar_t *)slab_alloc(NULL, compressed_chunksize); - if (enable_rabin_scan) + if ((enable_rabin_scan || enable_fixed_scan)) tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL, compressed_chunksize); else @@ -735,8 +737,8 @@ uncomp_done: slab_free(NULL, dary[i]->compressed_chunk); if (_deinit_func) _deinit_func(&(dary[i]->data)); - if (enable_rabin_scan) { - destroy_rabin_context(dary[i]->rctx); + if ((enable_rabin_scan || enable_fixed_scan)) { + destroy_dedupe_context(dary[i]->rctx); } slab_free(NULL, dary[i]); } @@ -770,8 +772,8 @@ redo: compressed_chunk = tdat->compressed_chunk + CHUNK_FLAG_SZ; rbytes = tdat->rbytes; /* Perform Dedup if enabled. */ - if (enable_rabin_scan) { - rabin_context_t *rctx; + if ((enable_rabin_scan || enable_fixed_scan)) { + dedupe_context_t *rctx; /* * Compute checksum of original uncompressed chunk. When doing dedup @@ -782,9 +784,9 @@ redo: compute_checksum(tdat->checksum, cksum, tdat->cmp_seg, tdat->rbytes); rctx = tdat->rctx; - reset_rabin_context(tdat->rctx); + reset_dedupe_context(tdat->rctx); rctx->cbuf = tdat->uncompressed_chunk; - dedupe_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL); + dedupe_index_sz = dedupe_compress(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL); if (!rctx->valid) { memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes); tdat->rbytes = rbytes; @@ -801,7 +803,7 @@ redo: * The rabin index array values can pollute the compressor's dictionary thereby * reducing compression effectiveness of the data chunk. So we separate them. */ - if (enable_rabin_scan && tdat->rctx->valid) { + if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) { _chunksize = tdat->rbytes - dedupe_index_sz - RABIN_HDR_SIZE; index_size_cmp = dedupe_index_sz; @@ -837,7 +839,7 @@ redo: memcpy(compressed_chunk + index_size_cmp, tdat->uncompressed_chunk + dedupe_index_sz, _chunksize); /* Now update rabin header with the compressed sizes. */ - rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, + update_dedupe_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, _chunksize); } else { /* If rabin index compression fails, we just drop down to plain @@ -869,7 +871,7 @@ plain_compress: */ tdat->len_cmp = _chunksize; if (_chunksize >= rbytes || rv < 0) { - if (!enable_rabin_scan || !tdat->rctx->valid) + if (!(enable_rabin_scan || enable_fixed_scan) || !tdat->rctx->valid) memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes); type = UNCOMPRESSED; tdat->len_cmp = tdat->rbytes; @@ -877,7 +879,7 @@ plain_compress: type = COMPRESSED; } - if (enable_rabin_scan && tdat->rctx->valid) { + if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) { type |= CHUNK_FLAG_DEDUP; } if (lzp_preprocess) { @@ -982,7 +984,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) struct cmp_data **dary = NULL, *tdat; pthread_t writer_thr; uchar_t *cread_buf, *pos; - rabin_context_t *rctx; + dedupe_context_t *rctx; algo_props_t props; /* @@ -1015,7 +1017,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) else flags |= FLAG_DEDUP_FIXED; /* Additional scratch space for dedup arrays. */ - compressed_chunksize += (rabin_buf_extra(chunksize, 0, algo, + compressed_chunksize += (dedupe_buf_extra(chunksize, 0, algo, enable_delta_encode) - (compressed_chunksize - chunksize)); } @@ -1107,7 +1109,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) fprintf(stderr, "\n"); dary = (struct cmp_data **)slab_calloc(NULL, nprocs, sizeof (struct cmp_data *)); - if (enable_rabin_scan) + if ((enable_rabin_scan || enable_fixed_scan)) cread_buf = (uchar_t *)slab_alloc(NULL, compressed_chunksize); else cread_buf = (uchar_t *)slab_alloc(NULL, chunksize); @@ -1137,8 +1139,8 @@ start_compress(const char *filename, uint64_t chunksize, int level) COMP_BAIL; } } - if (enable_rabin_scan) { - tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size, + if (enable_rabin_scan || enable_fixed_scan) { + tdat->rctx = create_dedupe_context(chunksize, compressed_chunksize, rab_blk_size, algo, enable_delta_encode, enable_fixed_scan); if (tdat->rctx == NULL) { COMP_BAIL; @@ -1204,7 +1206,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) * Read the first chunk into a spare buffer (a simple double-buffering). */ if (enable_rabin_split) { - rctx = create_rabin_context(chunksize, 0, 0, algo, enable_delta_encode, + rctx = create_dedupe_context(chunksize, 0, 0, algo, enable_delta_encode, enable_fixed_scan); rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx); } else { @@ -1231,7 +1233,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) * Delayed allocation. Allocate chunks if not already done. */ if (!tdat->cmp_seg) { - if (enable_rabin_scan) { + if ((enable_rabin_scan || enable_fixed_scan)) { if (single_chunk) tdat->cmp_seg = (uchar_t *)1; else @@ -1266,7 +1268,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) */ tdat->id = chunk_num; tdat->rbytes = rbytes; - if (enable_rabin_scan) { + if ((enable_rabin_scan || enable_fixed_scan)) { tmp = tdat->cmp_seg; tdat->cmp_seg = cread_buf; cread_buf = tmp; @@ -1383,8 +1385,8 @@ comp_done: slab_free(NULL, dary[i]->uncompressed_chunk); if (dary[i]->cmp_seg != (uchar_t *)1) slab_free(NULL, dary[i]->cmp_seg); - if (enable_rabin_scan) { - destroy_rabin_context(dary[i]->rctx); + if ((enable_rabin_scan || enable_fixed_scan)) { + destroy_dedupe_context(dary[i]->rctx); } if (_deinit_func) _deinit_func(&(dary[i]->data)); @@ -1392,7 +1394,7 @@ comp_done: } slab_free(NULL, dary); } - if (enable_rabin_split) destroy_rabin_context(rctx); + if (enable_rabin_split) destroy_dedupe_context(rctx); if (cread_buf != (uchar_t *)1) slab_free(NULL, cread_buf); if (!pipe_mode) { @@ -1530,7 +1532,7 @@ main(int argc, char *argv[]) level = 6; slab_init(); - while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:B:")) != -1) { + while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:B:F")) != -1) { int ovr; switch (opt) { @@ -1597,8 +1599,9 @@ main(int argc, char *argv[]) enable_delta_encode = 1; break; - case 'f': + case 'F': enable_fixed_scan = 1; + enable_rabin_split = 0; break; case 'L': @@ -1638,15 +1641,15 @@ main(int argc, char *argv[]) exit(1); } - if (enable_rabin_scan && !do_compress) { - fprintf(stderr, "Rabin Deduplication is only used during compression.\n"); + if ((enable_rabin_scan || enable_fixed_scan) && !do_compress) { + fprintf(stderr, "Deduplication is only used during compression.\n"); usage(); exit(1); } if (!enable_rabin_scan) enable_rabin_split = 0; - if (enable_fixed_scan && (enable_rabin_scan || enable_delta_encode)) { + if (enable_fixed_scan && (enable_rabin_scan || enable_delta_encode || enable_rabin_split)) { fprintf(stderr, "Rabin Deduplication and Fixed block Deduplication are mutually exclusive\n"); exit(1); } diff --git a/pcompress.h b/pcompress.h index 20eae3d..b3f01c5 100644 --- a/pcompress.h +++ b/pcompress.h @@ -157,7 +157,7 @@ struct cmp_data { uchar_t *cmp_seg; uchar_t *compressed_chunk; uchar_t *uncompressed_chunk; - rabin_context_t *rctx; + dedupe_context_t *rctx; ssize_t rbytes; ssize_t chunksize; ssize_t len_cmp; diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index c885688..2922be0 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -86,7 +86,7 @@ uint64_t ir[256]; static int inited = 0; static uint32_t -rabin_min_blksz(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag) +dedupe_min_blksz(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag) { uint32_t min_blk; @@ -95,22 +95,22 @@ rabin_min_blksz(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_ } uint32_t -rabin_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag) +dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag) { if (rab_blk_sz < 1 || rab_blk_sz > 5) rab_blk_sz = RAB_BLK_DEFAULT; - return ((chunksize / rabin_min_blksz(chunksize, rab_blk_sz, algo, delta_flag)) + return ((chunksize / dedupe_min_blksz(chunksize, rab_blk_sz, algo, delta_flag)) * sizeof (uint32_t)); } /* * Initialize the algorithm with the default params. */ -rabin_context_t * -create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz, +dedupe_context_t * +create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz, const char *algo, int delta_flag, int fixed_flag) { - rabin_context_t *ctx; + dedupe_context_t *ctx; unsigned char *current_window_data; uint32_t i; @@ -165,7 +165,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz * use 4K minimum Rabin block size. For everything else it is 2K based * on experimentation. */ - ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t)); + ctx = (dedupe_context_t *)slab_alloc(NULL, sizeof (dedupe_context_t)); ctx->rabin_poly_max_block_size = RAB_POLYNOMIAL_MAX_BLOCK_SIZE; ctx->fixed_flag = fixed_flag; @@ -173,7 +173,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz ctx->delta_flag = delta_flag; ctx->rabin_poly_avg_block_size = 1 << (rab_blk_sz + RAB_BLK_MIN_BITS); ctx->rabin_avg_block_mask = ctx->rabin_poly_avg_block_size - 1; - ctx->rabin_poly_min_block_size = rabin_min_blksz(chunksize, rab_blk_sz, algo, delta_flag); + ctx->rabin_poly_min_block_size = dedupe_min_blksz(chunksize, rab_blk_sz, algo, delta_flag); ctx->fp_mask = ctx->rabin_avg_block_mask | ctx->rabin_poly_avg_block_size; if (!fixed_flag) @@ -186,7 +186,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz if (ctx->blknum > RABIN_MAX_BLOCKS) { fprintf(stderr, "Chunk size too large for dedup.\n"); - destroy_rabin_context(ctx); + destroy_dedupe_context(ctx); return (NULL); } current_window_data = slab_alloc(NULL, RAB_POLYNOMIAL_WIN_SIZE); @@ -198,7 +198,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz if(ctx == NULL || current_window_data == NULL || (ctx->blocks == NULL && real_chunksize > 0)) { fprintf(stderr, "Could not allocate rabin polynomial context, out of memory\n"); - destroy_rabin_context(ctx); + destroy_dedupe_context(ctx); return (NULL); } @@ -209,7 +209,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz if (!(ctx->lzma_data)) { fprintf(stderr, "Could not initialize LZMA data for dedupe index, out of memory\n"); - destroy_rabin_context(ctx); + destroy_dedupe_context(ctx); return (NULL); } } @@ -227,19 +227,19 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz slab_cache_add(sizeof (rabin_blockentry_t)); ctx->current_window_data = current_window_data; ctx->real_chunksize = real_chunksize; - reset_rabin_context(ctx); + reset_dedupe_context(ctx); return (ctx); } void -reset_rabin_context(rabin_context_t *ctx) +reset_dedupe_context(dedupe_context_t *ctx) { memset(ctx->current_window_data, 0, RAB_POLYNOMIAL_WIN_SIZE); ctx->window_pos = 0; } void -destroy_rabin_context(rabin_context_t *ctx) +destroy_dedupe_context(dedupe_context_t *ctx) { if (ctx) { uint32_t i; @@ -288,11 +288,13 @@ cmpblks(const void *a, const void *b) } /** - * Perform Deduplication based on Rabin Fingerprinting. A 31-byte window is used for - * the rolling checksum and dedup blocks vary in size from 4K-128K. + * Perform Deduplication. + * Both Semi-Rabin fingerprinting based and Fixed Block Deduplication are supported. + * A 16-byte window is used for the rolling checksum and dedup blocks can vary in size + * from 4K-128K. */ uint32_t -rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos) +dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos) { ssize_t i, last_offset, j, fplist_sz; uint32_t blknum; @@ -302,6 +304,40 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s uint32_t *fplist; heap_t heap; + length = offset; + last_offset = 0; + blknum = 0; + ctx->valid = 0; + cur_roll_checksum = 0; + cur_sketch = 0; + + if (ctx->fixed_flag) { + blknum = *size / ctx->rabin_poly_avg_block_size; + j = *size % ctx->rabin_poly_avg_block_size; + if (j) blknum++; + + last_offset = 0; + length = ctx->rabin_poly_avg_block_size; + for (i=0; iblocks[i] == 0) { + ctx->blocks[i] = (rabin_blockentry_t *)slab_alloc(NULL, + sizeof (rabin_blockentry_t)); + } + ctx->blocks[i]->offset = last_offset; + ctx->blocks[i]->index = i; // Need to store for sorting + ctx->blocks[i]->length = length; + ctx->blocks[i]->ref = 0; + ctx->blocks[i]->similar = 0; + ctx->blocks[i]->crc = XXH_strong32(buf1+last_offset, length, 0); + ctx->blocks[i]->cksum_n_offset = ctx->blocks[i]->crc; + last_offset += length; + } + goto process_blocks; + } + if (rabin_pos == NULL) { /* * Initialize arrays for sketch computation. We re-use memory allocated @@ -312,12 +348,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s memset(fplist, 0, fplist_sz); reset_heap(&heap, fplist_sz/2); } - length = offset; - last_offset = 0; - blknum = 0; - ctx->valid = 0; - cur_roll_checksum = 0; - cur_sketch = 0; /* * If rabin_pos is non-zero then we are being asked to scan for the last rabin boundary @@ -434,6 +464,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s } } +process_blocks: DEBUG_STAT_EN(printf("Original size: %lld, blknum: %u\n", *size, blknum)); // If we found at least a few chunks, perform dedup. if (blknum > 2) { @@ -701,7 +732,7 @@ cont: } void -rabin_update_hdr(uchar_t *buf, ssize_t dedupe_index_sz_cmp, ssize_t rabin_data_sz_cmp) +update_dedupe_hdr(uchar_t *buf, ssize_t dedupe_index_sz_cmp, ssize_t rabin_data_sz_cmp) { ssize_t *entries; @@ -712,7 +743,7 @@ rabin_update_hdr(uchar_t *buf, ssize_t dedupe_index_sz_cmp, ssize_t rabin_data_s } void -rabin_parse_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *dedupe_index_sz, +parse_dedupe_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *dedupe_index_sz, ssize_t *rabin_data_sz, ssize_t *dedupe_index_sz_cmp, ssize_t *rabin_data_sz_cmp, ssize_t *rabin_deduped_size) { @@ -730,7 +761,7 @@ rabin_parse_hdr(uchar_t *buf, uint32_t *blknum, ssize_t *dedupe_index_sz, } void -rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size) +dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, ssize_t *size) { uint32_t blknum, blk, oblk, len; uint32_t *dedupe_index; @@ -738,7 +769,7 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size) ssize_t dedupe_index_sz, pos1, i; uchar_t *pos2; - rabin_parse_hdr(buf, &blknum, &dedupe_index_sz, &data_sz, &indx_cmp, &data_sz_cmp, &deduped_sz); + parse_dedupe_hdr(buf, &blknum, &dedupe_index_sz, &data_sz, &indx_cmp, &data_sz_cmp, &deduped_sz); dedupe_index = (uint32_t *)(buf + RABIN_HDR_SIZE); pos1 = dedupe_index_sz + RABIN_HDR_SIZE; pos2 = ctx->cbuf; @@ -828,7 +859,7 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size) * TODO: Consolidate rabin dedup and compression/decompression in functions here rather than * messy code in main program. int -rabin_compress(rabin_context_t *ctx, uchar_t *from, ssize_t fromlen, uchar_t *to, ssize_t *tolen, +rabin_compress(dedupe_context_t *ctx, uchar_t *from, ssize_t fromlen, uchar_t *to, ssize_t *tolen, int level, char chdr, void *data, compress_func_ptr cmp) { } diff --git a/rabin/rabin_dedup.h b/rabin/rabin_dedup.h index 15c9075..df00e8e 100644 --- a/rabin/rabin_dedup.h +++ b/rabin/rabin_dedup.h @@ -150,21 +150,21 @@ typedef struct { short valid; void *lzma_data; int level, delta_flag, fixed_flag; -} rabin_context_t; +} dedupe_context_t; -extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, +extern dedupe_context_t *create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz, const char *algo, int delta_flag, int fixed_flag); -extern void destroy_rabin_context(rabin_context_t *ctx); -extern unsigned int rabin_dedup(rabin_context_t *ctx, unsigned char *buf, +extern void destroy_dedupe_context(dedupe_context_t *ctx); +extern unsigned int dedupe_compress(dedupe_context_t *ctx, unsigned char *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos); -extern void rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size); -extern void rabin_parse_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz, +extern void dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, ssize_t *size); +extern void parse_dedupe_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz, ssize_t *rabin_data_sz, ssize_t *rabin_index_sz_cmp, ssize_t *rabin_data_sz_cmp, ssize_t *rabin_deduped_size); -extern void rabin_update_hdr(uchar_t *buf, ssize_t rabin_index_sz_cmp, +extern void update_dedupe_hdr(uchar_t *buf, ssize_t rabin_index_sz_cmp, ssize_t rabin_data_sz_cmp); -extern void reset_rabin_context(rabin_context_t *ctx); -extern uint32_t rabin_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, +extern void reset_dedupe_context(dedupe_context_t *ctx); +extern uint32_t dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_flag); #endif /* _RABIN_POLY_H_ */ diff --git a/utils/utils.c b/utils/utils.c index 75052cd..057c0f8 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -223,7 +223,7 @@ Read_Adjusted(int fd, uchar_t *buf, size_t count, ssize_t *rabin_count, void *ct { char *buf2; ssize_t rcount; - rabin_context_t *rctx = (rabin_context_t *)ctx; + dedupe_context_t *rctx = (dedupe_context_t *)ctx; if (!ctx) return (Read(fd, buf, count)); buf2 = buf; @@ -235,7 +235,7 @@ Read_Adjusted(int fd, uchar_t *buf, size_t count, ssize_t *rabin_count, void *ct if (rcount > 0) { rcount += *rabin_count; if (rcount == count) - rabin_dedup(rctx, buf, &rcount, 0, rabin_count); + dedupe_compress(rctx, buf, &rcount, 0, rabin_count); else *rabin_count = 0; } else {