Avoid unnecessary re-hashing of 64-bit keys of the segment index.

This commit is contained in:
Moinak Ghosh 2013-08-17 22:08:55 +05:30
parent d31c6433c2
commit 58f3113558
2 changed files with 13 additions and 2 deletions

View file

@ -367,7 +367,8 @@ set_config_s(archive_config_t *cfg, const char *algo, cksum_t ck, cksum_t ck_sim
cfg->compress_level = get_compress_level(cfg->algo); cfg->compress_level = get_compress_level(cfg->algo);
cfg->chunk_cksum_sz = get_cksum_sz(cfg->chunk_cksum_type); cfg->chunk_cksum_sz = get_cksum_sz(cfg->chunk_cksum_type);
cfg->similarity_cksum_sz = get_cksum_sz(cfg->similarity_cksum); cfg->similarity_cksum_sz = get_cksum_sz(cfg->similarity_cksum);
cfg->chunk_sz = chunksize; cfg->chunk_sz = chunksize; // Chunk size indicator 1 - 5.
// Allows segment to be sized appropriately: 1 - 8M .. 5 - 40M
cfg->chunk_sz_bytes = RAB_BLK_AVG_SZ(cfg->chunk_sz); cfg->chunk_sz_bytes = RAB_BLK_AVG_SZ(cfg->chunk_sz);
cfg->pct_interval = pct_interval; cfg->pct_interval = pct_interval;
cfg->archive_sz = file_sz; cfg->archive_sz = file_sz;

View file

@ -479,7 +479,17 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
hash_entry_t **htab, *ent, **pent; hash_entry_t **htab, *ent, **pent;
assert((cfg->similarity_cksum_sz & (sizeof (size_t) - 1)) == 0); assert((cfg->similarity_cksum_sz & (sizeof (size_t) - 1)) == 0);
/*
* If doing similarity based dedupe, keys will be 64-bit and are portions of
* cryptographic hashes. Since those are already a product of strong hashing
* there is no need to re-hash the keys here.
*/
if (cfg->similarity_cksum_sz == 8) {
htab_entry = *((uint32_t *)sim_cksum);
} else {
htab_entry = XXH32(sim_cksum, cfg->similarity_cksum_sz, 0); htab_entry = XXH32(sim_cksum, cfg->similarity_cksum_sz, 0);
}
htab_entry ^= (htab_entry / cfg->similarity_cksum_sz); htab_entry ^= (htab_entry / cfg->similarity_cksum_sz);
htab_entry = htab_entry % indx->hash_slots; htab_entry = htab_entry % indx->hash_slots;
htab = indx->list[interval].tab; htab = indx->list[interval].tab;