Avoid unnecessary re-hashing of 64-bit keys of the segment index.
This commit is contained in:
parent
d31c6433c2
commit
58f3113558
2 changed files with 13 additions and 2 deletions
|
@ -367,7 +367,8 @@ set_config_s(archive_config_t *cfg, const char *algo, cksum_t ck, cksum_t ck_sim
|
|||
cfg->compress_level = get_compress_level(cfg->algo);
|
||||
cfg->chunk_cksum_sz = get_cksum_sz(cfg->chunk_cksum_type);
|
||||
cfg->similarity_cksum_sz = get_cksum_sz(cfg->similarity_cksum);
|
||||
cfg->chunk_sz = chunksize;
|
||||
cfg->chunk_sz = chunksize; // Chunk size indicator 1 - 5.
|
||||
// Allows segment to be sized appropriately: 1 - 8M .. 5 - 40M
|
||||
cfg->chunk_sz_bytes = RAB_BLK_AVG_SZ(cfg->chunk_sz);
|
||||
cfg->pct_interval = pct_interval;
|
||||
cfg->archive_sz = file_sz;
|
||||
|
|
|
@ -479,7 +479,17 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
|
|||
hash_entry_t **htab, *ent, **pent;
|
||||
|
||||
assert((cfg->similarity_cksum_sz & (sizeof (size_t) - 1)) == 0);
|
||||
|
||||
/*
|
||||
* If doing similarity based dedupe, keys will be 64-bit and are portions of
|
||||
* cryptographic hashes. Since those are already a product of strong hashing
|
||||
* there is no need to re-hash the keys here.
|
||||
*/
|
||||
if (cfg->similarity_cksum_sz == 8) {
|
||||
htab_entry = *((uint32_t *)sim_cksum);
|
||||
} else {
|
||||
htab_entry = XXH32(sim_cksum, cfg->similarity_cksum_sz, 0);
|
||||
}
|
||||
htab_entry ^= (htab_entry / cfg->similarity_cksum_sz);
|
||||
htab_entry = htab_entry % indx->hash_slots;
|
||||
htab = indx->list[interval].tab;
|
||||
|
|
Loading…
Reference in a new issue