From 6ecc4005714534dbefb0ddf23b2f3c0f12715149 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 5 May 2013 18:50:52 +0530 Subject: [PATCH] Fix segment offset sorting. Get rid of incorrect duplicate checks in index. --- rabin/global/index.c | 11 +++++------ rabin/rabin_dedup.c | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/rabin/global/index.c b/rabin/global/index.c index 4254c6a..d9ca942 100644 --- a/rabin/global/index.c +++ b/rabin/global/index.c @@ -485,10 +485,11 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, pent = &(htab[htab_entry]); ent = htab[htab_entry]; - if (cfg->pct_interval == 0) { // Global dedupe with simple index + if (cfg->pct_interval == 0) { // Global dedupe with simple index. + assert(cfg->similarity_cksum_sz == cfg->chunk_cksum_sz); while (ent) { if (mycmp(sim_cksum, ent->cksum, cfg->similarity_cksum_sz) == 0 && - ent->item_size == item_size && ent->item_offset != item_offset) { + ent->item_size == item_size) { return (ent); } pent = &(ent->next); @@ -496,8 +497,7 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, } } else if (cfg->similarity_cksum_sz == 8) {// Fast path for 64-bit keys while (ent) { - if (*((uint64_t *)sim_cksum) == *((uint64_t *)ent->cksum) && - ent->item_offset != item_offset) { + if (*((uint64_t *)sim_cksum) == *((uint64_t *)ent->cksum)) { return (ent); } pent = &(ent->next); @@ -505,8 +505,7 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, } } else { while (ent) { - if (mycmp(sim_cksum, ent->cksum, cfg->similarity_cksum_sz) == 0 && - ent->item_offset != item_offset) { + if (mycmp(sim_cksum, ent->cksum, cfg->similarity_cksum_sz) == 0) { return (ent); } pent = &(ent->next); diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index d0fa12d..07e1f78 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -989,7 +989,7 @@ process_blocks: * file. Sort the offsets to avoid subsequent random access. */ tgt = src + 1; - isort_uint64((uint64_t *)tgt, k); + isort_uint64((uint64_t *)tgt, sub_i); /* * Now eliminate duplicate offsets and UINT64_MAX offset entries which