Improve Deduplication performance by another 95%.

Start sliding window scanning near minimum chunk size boundaries to avoid scanning whole chunk.
This commit is contained in:
Moinak Ghosh 2013-01-30 22:41:13 +05:30
parent 9983d79e62
commit 3d8f3ada1c
2 changed files with 12 additions and 0 deletions

View file

@ -416,6 +416,12 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
return (0); return (0);
} }
/*
* Start our sliding window at a fixed number of bytes before the min window size.
* It is pointless to slide the window over the whole length of the chunk.
*/
offset = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET;
length = offset;
for (i=offset; i<j; i++) { for (i=offset; i<j; i++) {
uint64_t pc[4]; uint64_t pc[4];
uint32_t cur_byte = buf1[i]; uint32_t cur_byte = buf1[i];
@ -498,6 +504,8 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
last_offset = i+1; last_offset = i+1;
length = 0; length = 0;
if (*size - last_offset <= ctx->rabin_poly_min_block_size) break; if (*size - last_offset <= ctx->rabin_poly_min_block_size) break;
length = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET;
i = i + length;
} }
} }

View file

@ -76,6 +76,10 @@
#define RAB_BLK_MASK (((1 << RAB_BLK_MIN_BITS) - 1) >> 1) #define RAB_BLK_MASK (((1 << RAB_BLK_MIN_BITS) - 1) >> 1)
#define RAB_BLK_AVG_SZ(x) (1 << ((x) + RAB_BLK_MIN_BITS)) #define RAB_BLK_AVG_SZ(x) (1 << ((x) + RAB_BLK_MIN_BITS))
// The sliding window starts at min window size - this offset. It is needless
// to slide the window over every byte in the chunk.
#define RAB_WINDOW_SLIDE_OFFSET (256)
// Minimum practical chunk size when doing dedup // Minimum practical chunk size when doing dedup
#define RAB_MIN_CHUNK_SIZE (1048576L) #define RAB_MIN_CHUNK_SIZE (1048576L)