From 3d8f3ada1c91fb77017f6b9396df8358ff54baeb Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Wed, 30 Jan 2013 22:41:13 +0530 Subject: [PATCH] Improve Deduplication performance by another 95%. Start sliding window scanning near minimum chunk size boundaries to avoid scanning whole chunk. --- rabin/rabin_dedup.c | 8 ++++++++ rabin/rabin_dedup.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index 16a0e51..6492dbf 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -416,6 +416,12 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of return (0); } + /* + * Start our sliding window at a fixed number of bytes before the min window size. + * It is pointless to slide the window over the whole length of the chunk. + */ + offset = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET; + length = offset; for (i=offset; irabin_poly_min_block_size) break; + length = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET; + i = i + length; } } diff --git a/rabin/rabin_dedup.h b/rabin/rabin_dedup.h index 2aeb4a3..e6d4bbb 100644 --- a/rabin/rabin_dedup.h +++ b/rabin/rabin_dedup.h @@ -76,6 +76,10 @@ #define RAB_BLK_MASK (((1 << RAB_BLK_MIN_BITS) - 1) >> 1) #define RAB_BLK_AVG_SZ(x) (1 << ((x) + RAB_BLK_MIN_BITS)) +// The sliding window starts at min window size - this offset. It is needless +// to slide the window over every byte in the chunk. +#define RAB_WINDOW_SLIDE_OFFSET (256) + // Minimum practical chunk size when doing dedup #define RAB_MIN_CHUNK_SIZE (1048576L)