Implement algo-specific minimum distance match for Delta Compression.
This commit is contained in:
parent
d49a088eea
commit
39dbc4be43
12 changed files with 44 additions and 13 deletions
|
@ -99,6 +99,7 @@ void
|
|||
adapt_props(algo_props_t *data, int level, uint64_t chunksize)
|
||||
{
|
||||
data->delta2_span = 200;
|
||||
data->deltac_min_distance = EIGHTM;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -51,6 +51,7 @@ bzip2_stats(int show)
|
|||
void
|
||||
bzip2_props(algo_props_t *data, int level, uint64_t chunksize) {
|
||||
data->delta2_span = 200;
|
||||
data->deltac_min_distance = FOURM;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -80,6 +80,10 @@ libbsc_props(algo_props_t *data, int level, uint64_t chunksize) {
|
|||
data->c_max_threads = 8;
|
||||
data->d_max_threads = 8;
|
||||
data->delta2_span = 150;
|
||||
if (chunksize > (EIGHTM * 2))
|
||||
data->deltac_min_distance = FOURM;
|
||||
else
|
||||
data->deltac_min_distance = EIGHTM;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -57,6 +57,7 @@ lz4_props(algo_props_t *data, int level, uint64_t chunksize) {
|
|||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = lz4_buf_extra(chunksize);
|
||||
data->delta2_span = 100;
|
||||
data->deltac_min_distance = FOURM;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -42,6 +42,7 @@ lz_fx_stats(int show)
|
|||
void
|
||||
lz_fx_props(algo_props_t *data, int level, uint64_t chunksize) {
|
||||
data->delta2_span = 50;
|
||||
data->deltac_min_distance = FOURM;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -53,6 +53,10 @@ lzma_mt_props(algo_props_t *data, int level, uint64_t chunksize) {
|
|||
data->buf_extra = 0;
|
||||
data->c_max_threads = 2;
|
||||
data->delta2_span = 150;
|
||||
if (level < 12)
|
||||
data->deltac_min_distance = (EIGHTM * 16);
|
||||
else
|
||||
data->deltac_min_distance = (EIGHTM * 32);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -61,6 +65,10 @@ lzma_props(algo_props_t *data, int level, uint64_t chunksize) {
|
|||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = 0;
|
||||
data->delta2_span = 150;
|
||||
if (level < 12)
|
||||
data->deltac_min_distance = (EIGHTM * 16);
|
||||
else
|
||||
data->deltac_min_distance = (EIGHTM * 32);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
6
main.c
6
main.c
|
@ -951,7 +951,7 @@ start_decompress(const char *filename, const char *to_filename)
|
|||
}
|
||||
if (enable_rabin_scan || enable_fixed_scan) {
|
||||
tdat->rctx = create_dedupe_context(chunksize, compressed_chunksize, rab_blk_size,
|
||||
algo, enable_delta_encode, enable_fixed_scan, version, DECOMPRESS);
|
||||
algo, &props, enable_delta_encode, enable_fixed_scan, version, DECOMPRESS);
|
||||
if (tdat->rctx == NULL) {
|
||||
UNCOMP_BAIL;
|
||||
}
|
||||
|
@ -1673,7 +1673,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
|
|||
}
|
||||
if (enable_rabin_scan || enable_fixed_scan) {
|
||||
tdat->rctx = create_dedupe_context(chunksize, compressed_chunksize, rab_blk_size,
|
||||
algo, enable_delta_encode, enable_fixed_scan, VERSION, COMPRESS);
|
||||
algo, &props, enable_delta_encode, enable_fixed_scan, VERSION, COMPRESS);
|
||||
if (tdat->rctx == NULL) {
|
||||
COMP_BAIL;
|
||||
}
|
||||
|
@ -1789,7 +1789,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
|
|||
* Read the first chunk into a spare buffer (a simple double-buffering).
|
||||
*/
|
||||
if (enable_rabin_split) {
|
||||
rctx = create_dedupe_context(chunksize, 0, 0, algo, enable_delta_encode,
|
||||
rctx = create_dedupe_context(chunksize, 0, 0, algo, &props, enable_delta_encode,
|
||||
enable_fixed_scan, VERSION, COMPRESS);
|
||||
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx);
|
||||
} else {
|
||||
|
|
|
@ -64,6 +64,7 @@ ppmd_stats(int show)
|
|||
void
|
||||
ppmd_props(algo_props_t *data, int level, uint64_t chunksize) {
|
||||
data->delta2_span = 100;
|
||||
data->deltac_min_distance = FOURM;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -116,7 +116,8 @@ dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta
|
|||
*/
|
||||
dedupe_context_t *
|
||||
create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz,
|
||||
const char *algo, int delta_flag, int fixed_flag, int file_version, compress_op_t op) {
|
||||
const char *algo, const algo_props_t *props, int delta_flag, int fixed_flag,
|
||||
int file_version, compress_op_t op) {
|
||||
dedupe_context_t *ctx;
|
||||
uint32_t i;
|
||||
|
||||
|
@ -189,6 +190,7 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s
|
|||
ctx->rabin_avg_block_mask = RAB_BLK_MASK;
|
||||
ctx->rabin_poly_min_block_size = dedupe_min_blksz(rab_blk_sz);
|
||||
ctx->delta_flag = 0;
|
||||
ctx->deltac_min_distance = props->deltac_min_distance;
|
||||
|
||||
/*
|
||||
* Scale down similarity percentage based on avg block size unless user specified
|
||||
|
@ -582,12 +584,20 @@ process_blocks:
|
|||
while (1) {
|
||||
if (be->similarity_hash == ctx->blocks[i]->similarity_hash &&
|
||||
be->length == ctx->blocks[i]->length) {
|
||||
ctx->blocks[i]->similar = SIMILAR_PARTIAL;
|
||||
ctx->blocks[i]->other = be;
|
||||
be->similar = SIMILAR_REF;
|
||||
matchlen += (be->length>>1);
|
||||
length = 1;
|
||||
break;
|
||||
uint64_t off_diff;
|
||||
if (be->offset > ctx->blocks[i]->offset)
|
||||
off_diff = be->offset - ctx->blocks[i]->offset;
|
||||
else
|
||||
off_diff = ctx->blocks[i]->offset - be->offset;
|
||||
|
||||
if (off_diff > ctx->deltac_min_distance) {
|
||||
ctx->blocks[i]->similar = SIMILAR_PARTIAL;
|
||||
ctx->blocks[i]->other = be;
|
||||
be->similar = SIMILAR_REF;
|
||||
matchlen += (be->length>>1);
|
||||
length = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (be->next)
|
||||
be = be->next;
|
||||
|
|
|
@ -159,12 +159,12 @@ typedef struct {
|
|||
uint64_t real_chunksize;
|
||||
short valid;
|
||||
void *lzma_data;
|
||||
int level, delta_flag, fixed_flag;
|
||||
int level, delta_flag, fixed_flag, deltac_min_distance;
|
||||
} dedupe_context_t;
|
||||
|
||||
extern dedupe_context_t *create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize,
|
||||
int rab_blk_sz, const char *algo, int delta_flag, int fixed_flag, int file_version,
|
||||
compress_op_t op);
|
||||
int rab_blk_sz, const char *algo, const algo_props_t *props, int delta_flag, int fixed_flag,
|
||||
int file_version, compress_op_t op);
|
||||
extern void destroy_dedupe_context(dedupe_context_t *ctx);
|
||||
extern unsigned int dedupe_compress(dedupe_context_t *ctx, unsigned char *buf,
|
||||
uint64_t *size, uint64_t offset, uint64_t *rabin_pos);
|
||||
|
|
|
@ -42,6 +42,8 @@ extern "C" {
|
|||
|
||||
#define DATA_TEXT 1
|
||||
#define DATA_BINARY 2
|
||||
#define EIGHTM (8UL * 1024UL * 1024UL)
|
||||
#define FOURM (4UL * 1024UL * 1024UL)
|
||||
|
||||
#if !defined(sun) && !defined(__sun)
|
||||
#define uchar_t u_char
|
||||
|
@ -127,6 +129,7 @@ typedef struct {
|
|||
int c_max_threads;
|
||||
int d_max_threads;
|
||||
int delta2_span;
|
||||
int deltac_min_distance;
|
||||
} algo_props_t;
|
||||
|
||||
typedef enum {
|
||||
|
|
|
@ -92,6 +92,7 @@ zlib_stats(int show)
|
|||
void
|
||||
zlib_props(algo_props_t *data, int level, uint64_t chunksize) {
|
||||
data->delta2_span = 100;
|
||||
data->deltac_min_distance = EIGHTM;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
Loading…
Reference in a new issue