diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index 06cdfe5..9197676 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -76,7 +76,6 @@ #include #include #include -#include #include "rabin_dedup.h" #if defined(__USE_SSE_INTRIN__) && defined(__SSE4_1__) && RAB_POLYNOMIAL_WIN_SIZE == 16 @@ -874,7 +873,7 @@ process_blocks: } else { uchar_t *seg_heap, *sim_ck, *sim_offsets; archive_config_t *cfg; - uint32_t increment, len, blks, o_blks, k; + uint32_t len, blks, o_blks, k; global_blockentry_t *seg_blocks; uint64_t seg_offset, offset; global_blockentry_t **htab, *be; @@ -897,6 +896,7 @@ process_blocks: htab = (global_blockentry_t **)(src - ary_sz); for (i=0; isimilarity_cksums; - sub_i = cfg->sub_intervals; tgt = seg_heap; - increment = cfg->chunk_cksum_sz / 2; - if (increment * sub_i > length) - sub_i = length / increment; - for (j = 0; jsimilarity_cksum_sz; + sub_i = 0; + + *((uint64_t *)sim_ck) = 0; + a = 0; + for (j = 0; j < length && sub_i < cfg->sub_intervals;) { + b = *((uint64_t *)tgt); + tgt += sizeof (uint64_t); + if (b != a) { + *((uint64_t *)sim_ck) = b; + sim_ck += sizeof (uint64_t); + a = b; + sub_i++; + } } /* @@ -984,14 +988,10 @@ process_blocks: } /* - * Now lookup all the similarity hashes. We sort the hashes first so that - * all duplicate hash values can be easily eliminated. - * + * Now lookup all the similarity hashes. * The matching segment offsets in the segcache are stored in a list. Entries * that were not found are stored with offset of UINT64_MAX. */ - isort_uint64((uint64_t *)(ctx->similarity_cksums), sub_i); - sim_ck = ctx->similarity_cksums; tgt = src + 1; // One byte for number of entries crc = 0;