Use different min block size and Rabin break pattern depending on compression algo.
Cleanup some cruft.
This commit is contained in:
parent
f5ce45b16e
commit
ea923b84f0
4 changed files with 11 additions and 15 deletions
2
main.c
2
main.c
|
@ -602,7 +602,7 @@ redo:
|
||||||
rbytes = tdat->rbytes;
|
rbytes = tdat->rbytes;
|
||||||
reset_rabin_context(tdat->rctx);
|
reset_rabin_context(tdat->rctx);
|
||||||
rctx->cbuf = tdat->uncompressed_chunk;
|
rctx->cbuf = tdat->uncompressed_chunk;
|
||||||
rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
|
rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0);
|
||||||
if (!rctx->valid) {
|
if (!rctx->valid) {
|
||||||
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
|
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
|
||||||
tdat->rbytes = rbytes;
|
tdat->rbytes = rbytes;
|
||||||
|
|
|
@ -105,10 +105,12 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *al
|
||||||
ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE;
|
ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE;
|
||||||
ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK;
|
ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK;
|
||||||
ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE;
|
ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE;
|
||||||
|
ctx->rabin_break_patt = RAB_POLYNOMIAL_CONST;
|
||||||
} else {
|
} else {
|
||||||
ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2;
|
ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2;
|
||||||
ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2;
|
ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2;
|
||||||
ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2;
|
ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2;
|
||||||
|
ctx->rabin_break_patt = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
blknum = chunksize / ctx->rabin_poly_min_block_size;
|
blknum = chunksize / ctx->rabin_poly_min_block_size;
|
||||||
|
@ -196,7 +198,7 @@ cmpblks(const void *a, const void *b)
|
||||||
* the rolling checksum and dedup blocks vary in size from 4K-128K.
|
* the rolling checksum and dedup blocks vary in size from 4K-128K.
|
||||||
*/
|
*/
|
||||||
uint32_t
|
uint32_t
|
||||||
rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos)
|
rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset)
|
||||||
{
|
{
|
||||||
ssize_t i, last_offset,j;
|
ssize_t i, last_offset,j;
|
||||||
uint32_t blknum;
|
uint32_t blknum;
|
||||||
|
@ -214,7 +216,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
||||||
char cur_byte = buf1[i];
|
char cur_byte = buf1[i];
|
||||||
uint64_t pushed_out = ctx->current_window_data[ctx->window_pos];
|
uint64_t pushed_out = ctx->current_window_data[ctx->window_pos];
|
||||||
ctx->current_window_data[ctx->window_pos] = cur_byte;
|
ctx->current_window_data[ctx->window_pos] = cur_byte;
|
||||||
int msk;
|
|
||||||
/*
|
/*
|
||||||
* We want to do:
|
* We want to do:
|
||||||
* cur_roll_checksum = cur_roll_checksum * RAB_POLYNOMIAL_CONST + cur_byte;
|
* cur_roll_checksum = cur_roll_checksum * RAB_POLYNOMIAL_CONST + cur_byte;
|
||||||
|
@ -238,7 +239,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
||||||
if (length < ctx->rabin_poly_min_block_size) continue;
|
if (length < ctx->rabin_poly_min_block_size) continue;
|
||||||
|
|
||||||
// If we hit our special value or reached the max block size update block offset
|
// If we hit our special value or reached the max block size update block offset
|
||||||
if ((ctx->cur_roll_checksum & ctx->rabin_avg_block_mask) == RAB_POLYNOMIAL_CONST ||
|
if ((ctx->cur_roll_checksum & ctx->rabin_avg_block_mask) == ctx->rabin_break_patt ||
|
||||||
length >= rabin_polynomial_max_block_size) {
|
length >= rabin_polynomial_max_block_size) {
|
||||||
ctx->blocks[blknum].offset = last_offset;
|
ctx->blocks[blknum].offset = last_offset;
|
||||||
ctx->blocks[blknum].index = blknum; // Need to store for sorting
|
ctx->blocks[blknum].index = blknum; // Need to store for sorting
|
||||||
|
@ -252,10 +253,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rabin_pos) {
|
|
||||||
*rabin_pos = last_offset;
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
// If we found at least a few chunks, perform dedup.
|
// If we found at least a few chunks, perform dedup.
|
||||||
if (blknum > 2) {
|
if (blknum > 2) {
|
||||||
uint64_t prev_cksum;
|
uint64_t prev_cksum;
|
||||||
|
@ -388,7 +385,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
||||||
} else {
|
} else {
|
||||||
prev_index = 0;
|
prev_index = 0;
|
||||||
prev_length = 0;
|
prev_length = 0;
|
||||||
blkarr[blk] = htonl(be->index | RABIN_INDEX_FLAG);
|
|
||||||
rabin_index[pos] = be->index | RABIN_INDEX_FLAG;
|
rabin_index[pos] = be->index | RABIN_INDEX_FLAG;
|
||||||
trans[blk] = pos;
|
trans[blk] = pos;
|
||||||
pos++;
|
pos++;
|
||||||
|
|
|
@ -74,10 +74,10 @@
|
||||||
#define RAB_POLYNOMIAL_MIN_BLOCK_SIZE RAB_POLYNOMIAL_AVG_BLOCK_SIZE
|
#define RAB_POLYNOMIAL_MIN_BLOCK_SIZE RAB_POLYNOMIAL_AVG_BLOCK_SIZE
|
||||||
#define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (128 * 1024)
|
#define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (128 * 1024)
|
||||||
|
|
||||||
#define RAB_POLYNOMIAL_AVG_BLOCK_SHIFT2 10
|
#define RAB_POLYNOMIAL_AVG_BLOCK_SHIFT2 12
|
||||||
#define RAB_POLYNOMIAL_AVG_BLOCK_SIZE2 (1 << RAB_POLYNOMIAL_AVG_BLOCK_SHIFT)
|
#define RAB_POLYNOMIAL_AVG_BLOCK_SIZE2 (1 << RAB_POLYNOMIAL_AVG_BLOCK_SHIFT)
|
||||||
#define RAB_POLYNOMIAL_AVG_BLOCK_MASK2 (RAB_POLYNOMIAL_AVG_BLOCK_SIZE - 1)
|
#define RAB_POLYNOMIAL_AVG_BLOCK_MASK2 (RAB_POLYNOMIAL_AVG_BLOCK_SIZE - 1)
|
||||||
#define RAB_POLYNOMIAL_MIN_BLOCK_SIZE2 RAB_POLYNOMIAL_AVG_BLOCK_SIZE
|
#define RAB_POLYNOMIAL_MIN_BLOCK_SIZE2 2048
|
||||||
|
|
||||||
#define RAB_POLYNOMIAL_WIN_SIZE 31
|
#define RAB_POLYNOMIAL_WIN_SIZE 31
|
||||||
#define RAB_POLYNOMIAL_MIN_WIN_SIZE 17
|
#define RAB_POLYNOMIAL_MIN_WIN_SIZE 17
|
||||||
|
@ -131,6 +131,7 @@ typedef struct {
|
||||||
uint32_t rabin_poly_min_block_size;
|
uint32_t rabin_poly_min_block_size;
|
||||||
uint32_t rabin_poly_avg_block_size;
|
uint32_t rabin_poly_avg_block_size;
|
||||||
uint32_t rabin_avg_block_mask;
|
uint32_t rabin_avg_block_mask;
|
||||||
|
uint32_t rabin_break_patt;
|
||||||
uint64_t real_chunksize;
|
uint64_t real_chunksize;
|
||||||
int dedup;
|
int dedup;
|
||||||
int valid;
|
int valid;
|
||||||
|
@ -138,10 +139,11 @@ typedef struct {
|
||||||
int level;
|
int level;
|
||||||
} rabin_context_t;
|
} rabin_context_t;
|
||||||
|
|
||||||
extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *algo);
|
extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize,
|
||||||
|
const char *algo);
|
||||||
extern void destroy_rabin_context(rabin_context_t *ctx);
|
extern void destroy_rabin_context(rabin_context_t *ctx);
|
||||||
extern unsigned int rabin_dedup(rabin_context_t *ctx, unsigned char *buf,
|
extern unsigned int rabin_dedup(rabin_context_t *ctx, unsigned char *buf,
|
||||||
ssize_t *size, ssize_t offset, ssize_t *rabin_pos);
|
ssize_t *size, ssize_t offset);
|
||||||
extern void rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size);
|
extern void rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size);
|
||||||
extern void rabin_parse_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz,
|
extern void rabin_parse_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz,
|
||||||
ssize_t *rabin_data_sz, ssize_t *rabin_index_sz_cmp,
|
ssize_t *rabin_data_sz, ssize_t *rabin_index_sz_cmp,
|
||||||
|
|
2
utils.h
2
utils.h
|
@ -98,8 +98,6 @@ extern int parse_numeric(ssize_t *val, const char *str);
|
||||||
extern char *bytes_to_size(uint64_t bytes);
|
extern char *bytes_to_size(uint64_t bytes);
|
||||||
extern ssize_t Read(int fd, void *buf, size_t count);
|
extern ssize_t Read(int fd, void *buf, size_t count);
|
||||||
extern ssize_t Write(int fd, const void *buf, size_t count);
|
extern ssize_t Write(int fd, const void *buf, size_t count);
|
||||||
// extern ssize_t Dedup_Read(int fd, uchar_t **buf, size_t count,
|
|
||||||
// ssize_t *rabin_count, void *ctx);
|
|
||||||
|
|
||||||
/* Pointer type for compress and decompress functions. */
|
/* Pointer type for compress and decompress functions. */
|
||||||
typedef int (*compress_func_ptr)(void *src, size_t srclen, void *dst,
|
typedef int (*compress_func_ptr)(void *src, size_t srclen, void *dst,
|
||||||
|
|
Loading…
Reference in a new issue