Improve Deduplication performance by another 95%.
Start sliding window scanning near minimum chunk size boundaries to avoid scanning whole chunk.
This commit is contained in:
parent
9983d79e62
commit
3d8f3ada1c
2 changed files with 12 additions and 0 deletions
|
@ -416,6 +416,12 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start our sliding window at a fixed number of bytes before the min window size.
|
||||||
|
* It is pointless to slide the window over the whole length of the chunk.
|
||||||
|
*/
|
||||||
|
offset = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET;
|
||||||
|
length = offset;
|
||||||
for (i=offset; i<j; i++) {
|
for (i=offset; i<j; i++) {
|
||||||
uint64_t pc[4];
|
uint64_t pc[4];
|
||||||
uint32_t cur_byte = buf1[i];
|
uint32_t cur_byte = buf1[i];
|
||||||
|
@ -498,6 +504,8 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
|
||||||
last_offset = i+1;
|
last_offset = i+1;
|
||||||
length = 0;
|
length = 0;
|
||||||
if (*size - last_offset <= ctx->rabin_poly_min_block_size) break;
|
if (*size - last_offset <= ctx->rabin_poly_min_block_size) break;
|
||||||
|
length = ctx->rabin_poly_min_block_size - RAB_WINDOW_SLIDE_OFFSET;
|
||||||
|
i = i + length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -76,6 +76,10 @@
|
||||||
#define RAB_BLK_MASK (((1 << RAB_BLK_MIN_BITS) - 1) >> 1)
|
#define RAB_BLK_MASK (((1 << RAB_BLK_MIN_BITS) - 1) >> 1)
|
||||||
#define RAB_BLK_AVG_SZ(x) (1 << ((x) + RAB_BLK_MIN_BITS))
|
#define RAB_BLK_AVG_SZ(x) (1 << ((x) + RAB_BLK_MIN_BITS))
|
||||||
|
|
||||||
|
// The sliding window starts at min window size - this offset. It is needless
|
||||||
|
// to slide the window over every byte in the chunk.
|
||||||
|
#define RAB_WINDOW_SLIDE_OFFSET (256)
|
||||||
|
|
||||||
// Minimum practical chunk size when doing dedup
|
// Minimum practical chunk size when doing dedup
|
||||||
#define RAB_MIN_CHUNK_SIZE (1048576L)
|
#define RAB_MIN_CHUNK_SIZE (1048576L)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue