Compare commits
7 commits
master
...
algo-analy
Author | SHA1 | Date | |
---|---|---|---|
|
043cdfc05c | ||
|
ae3ba0858c | ||
|
a2d74dab50 | ||
|
e732e86b91 | ||
|
84944932b0 | ||
|
b8f4a5d411 | ||
|
916f31d62b |
5 changed files with 143 additions and 47 deletions
28
pcompress.c
28
pcompress.c
|
@ -1103,12 +1103,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename)
|
|||
}
|
||||
}
|
||||
|
||||
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (pctx->nthreads > 0 && pctx->nthreads < nprocs)
|
||||
nprocs = pctx->nthreads;
|
||||
else
|
||||
pctx->nthreads = nprocs;
|
||||
|
||||
nprocs = 1;
|
||||
set_threadcounts(&props, &(pctx->nthreads), nprocs, DECOMPRESS_THREADS);
|
||||
if (props.is_single_chunk)
|
||||
pctx->nthreads = 1;
|
||||
|
@ -1662,7 +1657,8 @@ repeat:
|
|||
pctx->avg_chunk += tdat->len_cmp;
|
||||
}
|
||||
|
||||
wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp);
|
||||
//wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp);
|
||||
wbytes = tdat->len_cmp;
|
||||
if (unlikely(wbytes != tdat->len_cmp)) {
|
||||
perror("Chunk Write: ");
|
||||
do_cancel:
|
||||
|
@ -1785,12 +1781,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
|
|||
thread = 0;
|
||||
single_chunk = 0;
|
||||
rctx = NULL;
|
||||
|
||||
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (pctx->nthreads > 0 && pctx->nthreads < nprocs)
|
||||
nprocs = pctx->nthreads;
|
||||
else
|
||||
pctx->nthreads = nprocs;
|
||||
nprocs = 1;
|
||||
|
||||
/* A host of sanity checks. */
|
||||
if (!pctx->pipe_mode) {
|
||||
|
@ -2057,6 +2048,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
|
|||
COMP_BAIL;
|
||||
}
|
||||
|
||||
tdat->rctx->full_chunking = pctx->full_chunking;
|
||||
tdat->rctx->index_sem = &(tdat->index_sem);
|
||||
tdat->rctx->id = i;
|
||||
}
|
||||
|
@ -2182,6 +2174,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
|
|||
if (pctx->enable_rabin_split) {
|
||||
rctx = create_dedupe_context(chunksize, 0, 0, pctx->algo, &props, pctx->enable_delta_encode,
|
||||
pctx->enable_fixed_scan, VERSION, COMPRESS, 0, NULL, pctx->pipe_mode, nprocs);
|
||||
rctx->full_chunking = 0;
|
||||
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx);
|
||||
} else {
|
||||
rbytes = Read(uncompfd, cread_buf, chunksize);
|
||||
|
@ -2387,6 +2380,7 @@ comp_done:
|
|||
if (uncompfd != -1) close(uncompfd);
|
||||
}
|
||||
|
||||
dump_frequencies();
|
||||
if (!pctx->hide_cmp_stats) show_compression_stats(pctx);
|
||||
pctx->_stats_func(!pctx->hide_cmp_stats);
|
||||
|
||||
|
@ -2533,6 +2527,7 @@ create_pc_context(void)
|
|||
ctx->hide_mem_stats = 1;
|
||||
ctx->hide_cmp_stats = 1;
|
||||
ctx->enable_rabin_split = 1;
|
||||
ctx->rab_blk_size = 1;
|
||||
|
||||
return (ctx);
|
||||
}
|
||||
|
@ -2579,7 +2574,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
pctx->chunksize = DEFAULT_CHUNKSIZE;
|
||||
|
||||
pthread_mutex_lock(&opt_parse);
|
||||
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDGEe:w:rLPS:B:Fk:")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDGEe:w:rLPS:B:Fk:f")) != -1) {
|
||||
int ovr;
|
||||
int64_t chunksize;
|
||||
|
||||
|
@ -2716,6 +2711,10 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
}
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
pctx->full_chunking = 1;
|
||||
break;
|
||||
|
||||
case '?':
|
||||
default:
|
||||
return (2);
|
||||
|
@ -2726,6 +2725,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
optind = 0;
|
||||
pthread_mutex_unlock(&opt_parse);
|
||||
|
||||
pctx->nthreads = 1;
|
||||
if ((pctx->do_compress && pctx->do_uncompress) || (!pctx->do_compress && !pctx->do_uncompress)) {
|
||||
return (2);
|
||||
}
|
||||
|
|
|
@ -197,6 +197,7 @@ typedef struct pc_ctx {
|
|||
int enable_fixed_scan;
|
||||
int lzp_preprocess;
|
||||
int encrypt_type;
|
||||
int full_chunking;
|
||||
unsigned int chunk_num;
|
||||
uint64_t largest_chunk, smallest_chunk, avg_chunk;
|
||||
uint64_t chunksize;
|
||||
|
|
|
@ -373,7 +373,7 @@ set_config_s(archive_config_t *cfg, const char *algo, cksum_t ck, cksum_t ck_sim
|
|||
cfg->archive_sz = file_sz;
|
||||
cfg->dedupe_mode = MODE_SIMILARITY;
|
||||
|
||||
if (cfg->archive_sz <= SIXTEEN_GB && (pct_interval == 0 || pct_interval == 100)) {
|
||||
if (pct_interval == 0 || pct_interval == 100) {
|
||||
cfg->dedupe_mode = MODE_SIMPLE;
|
||||
cfg->segment_sz_bytes = user_chunk_sz;
|
||||
cfg->similarity_cksum_sz = cfg->chunk_cksum_sz;
|
||||
|
|
|
@ -114,6 +114,12 @@ uint64_t ir[256], out[256];
|
|||
static int inited = 0;
|
||||
archive_config_t *arc = NULL;
|
||||
|
||||
static uint64_t freqs[RAB_POLYNOMIAL_MAX_BLOCK_SIZE+1];
|
||||
static uint64_t tot_chunks = 0, min_chunk;
|
||||
static uint64_t tot_size = 0, non_hashed_size = 0;
|
||||
static double tot_time = 0;
|
||||
static int full_chunking = 0;
|
||||
|
||||
static uint32_t
|
||||
dedupe_min_blksz(int rab_blk_sz)
|
||||
{
|
||||
|
@ -132,6 +138,46 @@ dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta
|
|||
return ((chunksize / dedupe_min_blksz(rab_blk_sz)) * sizeof (uint32_t));
|
||||
}
|
||||
|
||||
void
|
||||
dump_frequencies()
|
||||
{
|
||||
int i, j, limit;
|
||||
uint64_t tot;
|
||||
double tot_c, tot_s, bytes_sec;
|
||||
|
||||
printf("\nChunk Frequency Distribution\n");
|
||||
printf("====================================\n");
|
||||
|
||||
printf("Min chunk size: %" PRIu64 "\n", min_chunk);
|
||||
|
||||
if (full_chunking)
|
||||
limit = 1024;
|
||||
else
|
||||
limit = 4096;
|
||||
for (i = 1; i <= RAB_POLYNOMIAL_MAX_BLOCK_SIZE;) {
|
||||
tot = 0;
|
||||
for (j = 0; j < limit; j++) tot += freqs[i++];
|
||||
if (tot > 0) {
|
||||
printf("%3d KB: %" PRIu64 "\n", i/1024, tot);
|
||||
}
|
||||
}
|
||||
printf("====================================\n");
|
||||
printf("Number of chunks : %" PRIu64 "\n", tot_chunks);
|
||||
tot_c = tot_chunks;
|
||||
tot_s = tot_size;
|
||||
printf("Average chunk size: %.2F Bytes\n", tot_s / tot_c);
|
||||
|
||||
bytes_sec = tot_s / tot_time * 1000;
|
||||
printf("Average chunking speed: %.3f MB/s\n", BYTES_TO_MB(bytes_sec));
|
||||
|
||||
tot_c = non_hashed_size;
|
||||
printf("Total data length: %" PRIu64 "\n", tot_size);
|
||||
printf("Hashed data length: %" PRIu64 "\n", tot_size - non_hashed_size);
|
||||
printf("%%age of roll hash coverage: %.2f%%\n", (1 - tot_c / tot_s) * 100);
|
||||
|
||||
printf("====================================\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function to let caller size the the user specific compression chunk/segment
|
||||
* to align with deduplication requirements.
|
||||
|
@ -185,6 +231,7 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s
|
|||
int term, pow, j;
|
||||
uint64_t val, poly_pow;
|
||||
|
||||
memset(freqs, 0, sizeof (freqs));
|
||||
poly_pow = 1;
|
||||
for (j = 0; j < RAB_POLYNOMIAL_WIN_SIZE; j++) {
|
||||
poly_pow = (poly_pow * RAB_POLYNOMIAL_CONST) & POLY_MASK;
|
||||
|
@ -281,6 +328,7 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s
|
|||
ctx->rabin_poly_avg_block_size = RAB_BLK_AVG_SZ(rab_blk_sz);
|
||||
ctx->rabin_avg_block_mask = RAB_BLK_MASK;
|
||||
ctx->rabin_poly_min_block_size = dedupe_min_blksz(rab_blk_sz);
|
||||
min_chunk = ctx->rabin_poly_min_block_size;
|
||||
ctx->delta_flag = 0;
|
||||
ctx->deltac_min_distance = props->deltac_min_distance;
|
||||
ctx->pagesize = sysconf(_SC_PAGE_SIZE);
|
||||
|
@ -478,9 +526,9 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
uint32_t *ctx_heap;
|
||||
rabin_blockentry_t **htab;
|
||||
MinHeap heap;
|
||||
DEBUG_STAT_EN(uint32_t max_count);
|
||||
DEBUG_STAT_EN(max_count = 0);
|
||||
DEBUG_STAT_EN(double strt, en_1, en);
|
||||
DEBUG_STAT_EN(uint32_t max_count = 0);
|
||||
DEBUG_STAT_EN(double en);
|
||||
double strt, en_1;
|
||||
|
||||
length = offset;
|
||||
last_offset = 0;
|
||||
|
@ -488,8 +536,9 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
window_pos = 0;
|
||||
ctx->valid = 0;
|
||||
cur_roll_checksum = 0;
|
||||
full_chunking = ctx->full_chunking;
|
||||
if (*size < ctx->rabin_poly_avg_block_size) return (0);
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
strt = get_wtime_millis();
|
||||
|
||||
if (ctx->dedupe_flag == RABIN_DEDUPE_FIXED) {
|
||||
blknum = *size / ctx->rabin_poly_avg_block_size;
|
||||
|
@ -516,7 +565,13 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
ctx->blocks[i]->hash = XXH32(buf1+last_offset, length, 0);
|
||||
ctx->blocks[i]->similarity_hash = ctx->blocks[i]->hash;
|
||||
last_offset += length;
|
||||
tot_chunks++;
|
||||
tot_size += length;
|
||||
non_hashed_size += length;
|
||||
}
|
||||
en_1 = get_wtime_millis();
|
||||
tot_time += en_1 - strt;
|
||||
for (i=0; i<blknum; i++) freqs[ctx->blocks[i]->length]++;
|
||||
goto process_blocks;
|
||||
}
|
||||
|
||||
|
@ -538,6 +593,9 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
ary_sz += ctx->rabin_poly_max_block_size;
|
||||
ctx_heap = (uint32_t *)(ctx->cbuf + ctx->real_chunksize - ary_sz);
|
||||
}
|
||||
if (ctx->full_chunking) {
|
||||
ctx->rabin_poly_min_block_size = 1;
|
||||
}
|
||||
#ifndef SSE_MODE
|
||||
memset(ctx->current_window_data, 0, RAB_POLYNOMIAL_WIN_SIZE);
|
||||
#else
|
||||
|
@ -600,8 +658,13 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
* Start our sliding window at a fixed number of bytes before the min window size.
|
||||
* It is pointless to slide the window over the whole length of the chunk.
|
||||
*/
|
||||
if (ctx->full_chunking) {
|
||||
offset = 0;
|
||||
} else {
|
||||
offset = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET;
|
||||
}
|
||||
length = offset;
|
||||
non_hashed_size += offset;
|
||||
for (i=offset; i<j; i++) {
|
||||
uint64_t pc[4];
|
||||
uint32_t cur_byte = buf1[i];
|
||||
|
@ -648,6 +711,7 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
if ((cur_pos_checksum & ctx->rabin_avg_block_mask) == ctx->rabin_break_patt ||
|
||||
length >= ctx->rabin_poly_max_block_size) {
|
||||
|
||||
if (!(ctx->full_chunking)) {
|
||||
if (!(ctx->arc)) {
|
||||
if (ctx->blocks[blknum] == 0)
|
||||
ctx->blocks[blknum] = (rabin_blockentry_t *)slab_alloc(NULL,
|
||||
|
@ -659,6 +723,11 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
ctx->g_blocks[blknum].length = length;
|
||||
ctx->g_blocks[blknum].offset = last_offset;
|
||||
}
|
||||
} else {
|
||||
freqs[length]++;
|
||||
}
|
||||
tot_chunks++;
|
||||
tot_size += length;
|
||||
DEBUG_STAT_EN(if (length >= ctx->rabin_poly_max_block_size) ++max_count);
|
||||
|
||||
/*
|
||||
|
@ -688,14 +757,19 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
last_offset = i+1;
|
||||
length = 0;
|
||||
if (*size - last_offset <= ctx->rabin_poly_min_block_size) break;
|
||||
if (ctx->full_chunking == 0) {
|
||||
length = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET;
|
||||
i = i + length;
|
||||
}
|
||||
non_hashed_size += length;
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the last left-over trailing bytes, if any, into a block.
|
||||
if (last_offset < *size) {
|
||||
length = *size - last_offset;
|
||||
non_hashed_size += length;
|
||||
if (!(ctx->full_chunking)) {
|
||||
if (!(ctx->arc)) {
|
||||
if (ctx->blocks[blknum] == 0)
|
||||
ctx->blocks[blknum] = (rabin_blockentry_t *)slab_alloc(NULL,
|
||||
|
@ -707,7 +781,12 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
ctx->g_blocks[blknum].length = length;
|
||||
ctx->g_blocks[blknum].offset = last_offset;
|
||||
}
|
||||
} else {
|
||||
freqs[length]++;
|
||||
}
|
||||
|
||||
tot_chunks++;
|
||||
tot_size += length;
|
||||
if (ctx->delta_flag) {
|
||||
uint64_t cur_sketch;
|
||||
uint64_t pc[4];
|
||||
|
@ -735,9 +814,11 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
|
||||
process_blocks:
|
||||
// If we found at least a few chunks, perform dedup.
|
||||
DEBUG_STAT_EN(en_1 = get_wtime_millis());
|
||||
en_1 = get_wtime_millis();
|
||||
tot_time += en_1 - strt;
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Original size: %" PRId64 ", blknum: %u\n", *size, blknum));
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Number of maxlen blocks: %u\n", max_count));
|
||||
if (ctx->full_chunking) blknum = 0;
|
||||
if (blknum <=2 && ctx->arc) {
|
||||
sem_wait(ctx->index_sem);
|
||||
sem_post(ctx->index_sem_next);
|
||||
|
@ -778,6 +859,7 @@ process_blocks:
|
|||
*/
|
||||
g_dedupe_idx = ctx->cbuf + RABIN_HDR_SIZE;
|
||||
dedupe_index_sz = 0;
|
||||
for (i=0; i<blknum; i++) freqs[ctx->g_blocks[i].length]++;
|
||||
|
||||
/*
|
||||
* First entry in table is the original file offset where this
|
||||
|
@ -800,6 +882,7 @@ process_blocks:
|
|||
* predictable serialization of index access in a sequence of
|
||||
* threads without locking.
|
||||
*/
|
||||
printf("Using simple dedupe index.\n");
|
||||
length = 0;
|
||||
DEBUG_STAT_EN(w1 = get_wtime_millis());
|
||||
sem_wait(ctx->index_sem);
|
||||
|
@ -886,6 +969,7 @@ process_blocks:
|
|||
* ======================================================================
|
||||
*/
|
||||
|
||||
printf("Using similarity based dedupe index.\n");
|
||||
cfg = ctx->arc;
|
||||
assert(cfg->similarity_cksum_sz == sizeof (uint64_t));
|
||||
seg_heap = (uchar_t *)(ctx->g_blocks) - cfg->segment_sz * cfg->chunk_cksum_sz;
|
||||
|
@ -957,12 +1041,20 @@ process_blocks:
|
|||
increment = cfg->chunk_cksum_sz / 2;
|
||||
if (increment * sub_i > length)
|
||||
sub_i = length / increment;
|
||||
if (increment/2 == sizeof (uint64_t)) {
|
||||
for (j = 0; j<sub_i; j++) {
|
||||
*((uint64_t *)sim_ck) = *((uint64_t *)tgt);
|
||||
tgt += increment;
|
||||
sim_ck += cfg->similarity_cksum_sz;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j<sub_i; j++) {
|
||||
crc = lzma_crc64(tgt, increment/2, 0);
|
||||
*((uint64_t *)sim_ck) = crc;
|
||||
tgt += increment;
|
||||
sim_ck += cfg->similarity_cksum_sz;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Begin shared index access and write segment metadata to cache
|
||||
|
@ -1288,6 +1380,7 @@ next_ent:
|
|||
}
|
||||
}
|
||||
|
||||
for (i=0; i<blknum; i++) freqs[ctx->blocks[i]->length]++;
|
||||
ary_sz = (blknum << 1) * sizeof (rabin_blockentry_t *);
|
||||
htab = (rabin_blockentry_t **)(ctx->cbuf + ctx->real_chunksize - ary_sz);
|
||||
memset(htab, 0, ary_sz);
|
||||
|
|
|
@ -80,7 +80,7 @@
|
|||
#define RAB_POLYNOMIAL_WIN_SIZE 16
|
||||
#define RAB_POLYNOMIAL_MIN_WIN_SIZE 8
|
||||
#define RAB_POLYNOMIAL_MAX_WIN_SIZE 64
|
||||
#define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (128 * 1024)
|
||||
#define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (64 * 1024)
|
||||
#define RAB_BLK_MASK (((1 << RAB_BLK_MIN_BITS) - 1) >> 1)
|
||||
#define RAB_BLK_AVG_SZ(x) (1 << ((x) + RAB_BLK_MIN_BITS))
|
||||
|
||||
|
@ -178,6 +178,7 @@ typedef struct {
|
|||
short valid;
|
||||
void *lzma_data;
|
||||
int level, delta_flag, dedupe_flag, deltac_min_distance;
|
||||
int full_chunking;
|
||||
uint64_t file_offset; // For global dedupe
|
||||
archive_config_t *arc;
|
||||
sem_t *index_sem;
|
||||
|
@ -206,5 +207,6 @@ extern uint32_t dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char
|
|||
extern int global_dedupe_bufadjust(uint32_t rab_blk_sz, uint64_t *user_chunk_sz, int pct_interval,
|
||||
const char *algo, cksum_t ck, cksum_t ck_sim, size_t file_sz,
|
||||
size_t memlimit, int nthreads, int pipe_mode);
|
||||
extern void dump_frequencies();
|
||||
|
||||
#endif /* _RABIN_POLY_H_ */
|
||||
|
|
Loading…
Reference in a new issue