Improve duplicate segment match detection.
This commit is contained in:
parent
6b7d883393
commit
b32f4b3f9a
1 changed files with 28 additions and 7 deletions
|
@ -838,7 +838,7 @@ process_blocks:
|
||||||
ary_sz = cfg->segment_sz * sizeof (global_blockentry_t **);
|
ary_sz = cfg->segment_sz * sizeof (global_blockentry_t **);
|
||||||
htab = (global_blockentry_t **)(seg_heap - ary_sz);
|
htab = (global_blockentry_t **)(seg_heap - ary_sz);
|
||||||
for (i=0; i<blknum;) {
|
for (i=0; i<blknum;) {
|
||||||
uint64_t crc;
|
uint64_t crc, off1;
|
||||||
length = 0;
|
length = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -955,20 +955,41 @@ process_blocks:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now lookup the similarity hashes starting at the highest
|
* Now lookup all the similarity hashes. We sort the hashes first so that
|
||||||
* significance level.
|
* all duplicates can be easily detected.
|
||||||
*/
|
*/
|
||||||
|
qsort(ctx->similarity_cksums, cfg->intervals + sub_i - 1, 8, cmpint);
|
||||||
|
crc = 0;
|
||||||
|
off1 = UINT64_MAX;
|
||||||
for (j=cfg->intervals + sub_i; j > 0; j--) {
|
for (j=cfg->intervals + sub_i; j > 0; j--) {
|
||||||
hash_entry_t *he = NULL, *he1 = NULL;
|
hash_entry_t *he = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for duplicate checksum which need not be looked up
|
||||||
|
* again.
|
||||||
|
*/
|
||||||
|
if (crc == *((uint64_t *)sim_ck)) {
|
||||||
|
he = NULL;
|
||||||
|
} else {
|
||||||
he = db_lookup_insert_s(cfg, sim_ck, 0, seg_offset, 0, 1);
|
he = db_lookup_insert_s(cfg, sim_ck, 0, seg_offset, 0, 1);
|
||||||
if (he && he != he1) {
|
/*
|
||||||
|
* Check for different checksum but same segment match.
|
||||||
|
* This is not a complete check but does help to reduce
|
||||||
|
* wasted processing.
|
||||||
|
*/
|
||||||
|
if (he && off1 == he->item_offset) {
|
||||||
|
crc = *((uint64_t *)sim_ck);
|
||||||
|
he = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (he) {
|
||||||
/*
|
/*
|
||||||
* Match found. Load segment metadata from disk and perform
|
* Match found. Load segment metadata from disk and perform
|
||||||
* identity deduplication with the segment chunks.
|
* identity deduplication with the segment chunks.
|
||||||
*/
|
*/
|
||||||
he1 = he;
|
crc = *((uint64_t *)sim_ck);
|
||||||
offset = he->item_offset;
|
offset = he->item_offset;
|
||||||
|
off1 = offset;
|
||||||
if (db_segcache_map(cfg, ctx->id, &o_blks, &offset,
|
if (db_segcache_map(cfg, ctx->id, &o_blks, &offset,
|
||||||
(uchar_t **)&seg_blocks) == -1) {
|
(uchar_t **)&seg_blocks) == -1) {
|
||||||
fprintf(stderr, "Segment cache mmap failed.\n");
|
fprintf(stderr, "Segment cache mmap failed.\n");
|
||||||
|
|
Loading…
Reference in a new issue