Several bugfixes.
Avoid matching with self during hash lookup.
This commit is contained in:
parent
6b23f6a73a
commit
2c4024792a
2 changed files with 7 additions and 10 deletions
|
@ -432,7 +432,7 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
|
|||
if (cfg->pct_interval == 0) { // Global dedupe with simple index
|
||||
while (ent) {
|
||||
if (mycmp(sim_cksum, ent->cksum, cfg->similarity_cksum_sz) == 0 &&
|
||||
ent->item_size == item_size) {
|
||||
ent->item_size == item_size && ent->item_offset != item_offset) {
|
||||
return (ent);
|
||||
}
|
||||
pent = &(ent->next);
|
||||
|
@ -440,7 +440,8 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
|
|||
}
|
||||
} else {
|
||||
while (ent) {
|
||||
if (mycmp(sim_cksum, ent->cksum, cfg->similarity_cksum_sz) == 0) {
|
||||
if (mycmp(sim_cksum, ent->cksum, cfg->similarity_cksum_sz) == 0 &&
|
||||
ent->item_offset != item_offset) {
|
||||
return (ent);
|
||||
}
|
||||
pent = &(ent->next);
|
||||
|
|
|
@ -858,14 +858,14 @@ process_blocks:
|
|||
qsort(seg_heap, length/8, 8, cmpint);
|
||||
|
||||
/*
|
||||
* Compute the range similarity minhashes.
|
||||
* Compute the range similarity hashes.
|
||||
*/
|
||||
sim_ck = ctx->similarity_cksums;
|
||||
crc = 0;
|
||||
sub_i = cfg->sub_intervals;
|
||||
increment = (length / cfg->intervals) / sub_i;
|
||||
tgt = seg_heap;
|
||||
while (increment < cfg->chunk_cksum_sz/4 && sub_i > 0) {
|
||||
while (increment < cfg->chunk_cksum_sz/4 && sub_i > 1) {
|
||||
sub_i--;
|
||||
increment = (length / cfg->intervals) / sub_i;
|
||||
}
|
||||
|
@ -880,7 +880,7 @@ process_blocks:
|
|||
|
||||
increment = length / cfg->intervals;
|
||||
for (j=0; j<cfg->intervals-1; j++) {
|
||||
crc = lzma_crc64(tgt, increment/2, 0);
|
||||
crc = lzma_crc64(tgt, increment/8, 0);
|
||||
*((uint64_t *)sim_ck) = crc;
|
||||
tgt += increment;
|
||||
len -= increment;
|
||||
|
@ -958,11 +958,7 @@ process_blocks:
|
|||
hash_entry_t *he;
|
||||
|
||||
he = db_lookup_insert_s(cfg, sim_ck, 0, seg_offset, 0, 1);
|
||||
|
||||
/*
|
||||
* If match found also check that match is not with self!
|
||||
*/
|
||||
if (he && he->item_offset != seg_offset) {
|
||||
if (he) {
|
||||
/*
|
||||
* Match found. Load segment metadata from disk and perform
|
||||
* identity deduplication with the segment chunks.
|
||||
|
|
Loading…
Reference in a new issue