Improve Deduplication performance by another 95%.
Start sliding window scanning near minimum chunk size boundaries to avoid scanning whole chunk.
This commit is contained in:
parent
9983d79e62
commit
3d8f3ada1c
2 changed files with 12 additions and 0 deletions
|
@ -416,6 +416,12 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start our sliding window at a fixed number of bytes before the min window size.
|
||||
* It is pointless to slide the window over the whole length of the chunk.
|
||||
*/
|
||||
offset = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET;
|
||||
length = offset;
|
||||
for (i=offset; i<j; i++) {
|
||||
uint64_t pc[4];
|
||||
uint32_t cur_byte = buf1[i];
|
||||
|
@ -498,6 +504,8 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
|||
last_offset = i+1;
|
||||
length = 0;
|
||||
if (*size - last_offset <= ctx->rabin_poly_min_block_size) break;
|
||||
length = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET;
|
||||
i = i + length;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -76,6 +76,10 @@
|
|||
#define RAB_BLK_MASK (((1 << RAB_BLK_MIN_BITS) - 1) >> 1)
|
||||
#define RAB_BLK_AVG_SZ(x) (1 << ((x) + RAB_BLK_MIN_BITS))
|
||||
|
||||
// The sliding window starts at min window size - this offset. It is needless
|
||||
// to slide the window over every byte in the chunk.
|
||||
#define RAB_WINDOW_SLIDE_OFFSET (256)
|
||||
|
||||
// Minimum practical chunk size when doing dedup
|
||||
#define RAB_MIN_CHUNK_SIZE (1048576L)
|
||||
|
||||
|
|
Loading…
Reference in a new issue