diff --git a/rabin/global/dedupe_config.c b/rabin/global/dedupe_config.c index 5242ea4..02f38e5 100644 --- a/rabin/global/dedupe_config.c +++ b/rabin/global/dedupe_config.c @@ -367,7 +367,8 @@ set_config_s(archive_config_t *cfg, const char *algo, cksum_t ck, cksum_t ck_sim cfg->compress_level = get_compress_level(cfg->algo); cfg->chunk_cksum_sz = get_cksum_sz(cfg->chunk_cksum_type); cfg->similarity_cksum_sz = get_cksum_sz(cfg->similarity_cksum); - cfg->chunk_sz = chunksize; + cfg->chunk_sz = chunksize; // Chunk size indicator 1 - 5. + // Allows segment to be sized appropriately: 1 - 8M .. 5 - 40M cfg->chunk_sz_bytes = RAB_BLK_AVG_SZ(cfg->chunk_sz); cfg->pct_interval = pct_interval; cfg->archive_sz = file_sz; diff --git a/rabin/global/index.c b/rabin/global/index.c index f5d4af8..5d62fd1 100644 --- a/rabin/global/index.c +++ b/rabin/global/index.c @@ -479,7 +479,17 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, hash_entry_t **htab, *ent, **pent; assert((cfg->similarity_cksum_sz & (sizeof (size_t) - 1)) == 0); - htab_entry = XXH32(sim_cksum, cfg->similarity_cksum_sz, 0); + + /* + * If doing similarity based dedupe, keys will be 64-bit and are portions of + * cryptographic hashes. Since those are already a product of strong hashing + * there is no need to re-hash the keys here. + */ + if (cfg->similarity_cksum_sz == 8) { + htab_entry = *((uint32_t *)sim_cksum); + } else { + htab_entry = XXH32(sim_cksum, cfg->similarity_cksum_sz, 0); + } htab_entry ^= (htab_entry / cfg->similarity_cksum_sz); htab_entry = htab_entry % indx->hash_slots; htab = indx->list[interval].tab;