diff --git a/main.c b/main.c index 5895cfb..471a877 100644 --- a/main.c +++ b/main.c @@ -602,7 +602,7 @@ redo: rbytes = tdat->rbytes; reset_rabin_context(tdat->rctx); rctx->cbuf = tdat->uncompressed_chunk; - rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL); + rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0); if (!rctx->valid) { memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes); tdat->rbytes = rbytes; diff --git a/rabin/rabin_polynomial.c b/rabin/rabin_polynomial.c index fcc8306..2eff7ac 100755 --- a/rabin/rabin_polynomial.c +++ b/rabin/rabin_polynomial.c @@ -105,10 +105,12 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *al ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE; ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK; ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE; + ctx->rabin_break_patt = RAB_POLYNOMIAL_CONST; } else { ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2; ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2; ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2; + ctx->rabin_break_patt = 0; } blknum = chunksize / ctx->rabin_poly_min_block_size; @@ -196,7 +198,7 @@ cmpblks(const void *a, const void *b) * the rolling checksum and dedup blocks vary in size from 4K-128K. */ uint32_t -rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos) +rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset) { ssize_t i, last_offset,j; uint32_t blknum; @@ -214,7 +216,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s char cur_byte = buf1[i]; uint64_t pushed_out = ctx->current_window_data[ctx->window_pos]; ctx->current_window_data[ctx->window_pos] = cur_byte; - int msk; /* * We want to do: * cur_roll_checksum = cur_roll_checksum * RAB_POLYNOMIAL_CONST + cur_byte; @@ -238,7 +239,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s if (length < ctx->rabin_poly_min_block_size) continue; // If we hit our special value or reached the max block size update block offset - if ((ctx->cur_roll_checksum & ctx->rabin_avg_block_mask) == RAB_POLYNOMIAL_CONST || + if ((ctx->cur_roll_checksum & ctx->rabin_avg_block_mask) == ctx->rabin_break_patt || length >= rabin_polynomial_max_block_size) { ctx->blocks[blknum].offset = last_offset; ctx->blocks[blknum].index = blknum; // Need to store for sorting @@ -252,10 +253,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s } } - if (rabin_pos) { - *rabin_pos = last_offset; - return (0); - } // If we found at least a few chunks, perform dedup. if (blknum > 2) { uint64_t prev_cksum; @@ -388,7 +385,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s } else { prev_index = 0; prev_length = 0; - blkarr[blk] = htonl(be->index | RABIN_INDEX_FLAG); rabin_index[pos] = be->index | RABIN_INDEX_FLAG; trans[blk] = pos; pos++; diff --git a/rabin/rabin_polynomial.h b/rabin/rabin_polynomial.h index e6ce575..3de307a 100644 --- a/rabin/rabin_polynomial.h +++ b/rabin/rabin_polynomial.h @@ -74,10 +74,10 @@ #define RAB_POLYNOMIAL_MIN_BLOCK_SIZE RAB_POLYNOMIAL_AVG_BLOCK_SIZE #define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (128 * 1024) -#define RAB_POLYNOMIAL_AVG_BLOCK_SHIFT2 10 +#define RAB_POLYNOMIAL_AVG_BLOCK_SHIFT2 12 #define RAB_POLYNOMIAL_AVG_BLOCK_SIZE2 (1 << RAB_POLYNOMIAL_AVG_BLOCK_SHIFT) #define RAB_POLYNOMIAL_AVG_BLOCK_MASK2 (RAB_POLYNOMIAL_AVG_BLOCK_SIZE - 1) -#define RAB_POLYNOMIAL_MIN_BLOCK_SIZE2 RAB_POLYNOMIAL_AVG_BLOCK_SIZE +#define RAB_POLYNOMIAL_MIN_BLOCK_SIZE2 2048 #define RAB_POLYNOMIAL_WIN_SIZE 31 #define RAB_POLYNOMIAL_MIN_WIN_SIZE 17 @@ -131,6 +131,7 @@ typedef struct { uint32_t rabin_poly_min_block_size; uint32_t rabin_poly_avg_block_size; uint32_t rabin_avg_block_mask; + uint32_t rabin_break_patt; uint64_t real_chunksize; int dedup; int valid; @@ -138,10 +139,11 @@ typedef struct { int level; } rabin_context_t; -extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *algo); +extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, + const char *algo); extern void destroy_rabin_context(rabin_context_t *ctx); extern unsigned int rabin_dedup(rabin_context_t *ctx, unsigned char *buf, - ssize_t *size, ssize_t offset, ssize_t *rabin_pos); + ssize_t *size, ssize_t offset); extern void rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size); extern void rabin_parse_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz, ssize_t *rabin_data_sz, ssize_t *rabin_index_sz_cmp, diff --git a/utils.h b/utils.h index 9b58f07..46529bf 100644 --- a/utils.h +++ b/utils.h @@ -98,8 +98,6 @@ extern int parse_numeric(ssize_t *val, const char *str); extern char *bytes_to_size(uint64_t bytes); extern ssize_t Read(int fd, void *buf, size_t count); extern ssize_t Write(int fd, const void *buf, size_t count); -// extern ssize_t Dedup_Read(int fd, uchar_t **buf, size_t count, -// ssize_t *rabin_count, void *ctx); /* Pointer type for compress and decompress functions. */ typedef int (*compress_func_ptr)(void *src, size_t srclen, void *dst,