Separate initial rabin boundary detection and block splitting for performance.

Also fix a rare corner case latent bug.
This commit is contained in:
Moinak Ghosh 2012-07-22 21:27:44 +05:30
parent 962a2cae8a
commit 7e14909ad1

View file

@ -260,9 +260,36 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
* If rabin_pos is non-zero then we are being asked to scan for the last rabin boundary * If rabin_pos is non-zero then we are being asked to scan for the last rabin boundary
* in the chunk. We start scanning at chunk end - max rabin block size. We avoid doing * in the chunk. We start scanning at chunk end - max rabin block size. We avoid doing
* a full chunk scan. * a full chunk scan.
*
* !!!NOTE!!!: Code duplication below for performance.
*/ */
if (rabin_pos) { if (rabin_pos) {
offset = *size - RAB_POLYNOMIAL_MAX_BLOCK_SIZE; offset = *size - RAB_POLYNOMIAL_MAX_BLOCK_SIZE;
for (i=offset; i<*size; i++) {
char cur_byte = buf1[i];
uint64_t pushed_out = ctx->current_window_data[ctx->window_pos];
ctx->current_window_data[ctx->window_pos] = cur_byte;
cur_roll_checksum = (cur_roll_checksum << 1) + cur_byte;
cur_roll_checksum -= (pushed_out << RAB_POLYNOMIAL_WIN_SIZE);
ctx->window_pos = (ctx->window_pos + 1) & (RAB_POLYNOMIAL_WIN_SIZE-1);
length++;
if (length < ctx->rabin_poly_min_block_size) continue;
// If we hit our special value or reached the max block size update block offset
if ((cur_roll_checksum & ctx->rabin_avg_block_mask) == ctx->rabin_break_patt ||
length >= rabin_polynomial_max_block_size) {
last_offset = i+1;
length = 0;
j = 0;
}
}
if (last_offset < *size) {
*rabin_pos = last_offset;
}
return (0);
} }
if (*size < ctx->rabin_poly_avg_block_size) return; if (*size < ctx->rabin_poly_avg_block_size) return;
for (i=offset; i<*size; i++) { for (i=offset; i<*size; i++) {
@ -293,7 +320,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
* variant of some approaches detailed in: * variant of some approaches detailed in:
* http://www.armedia.com/wp/SimilarityIndex.pdf * http://www.armedia.com/wp/SimilarityIndex.pdf
*/ */
if (rabin_pos == NULL) {
len1++; len1++;
j = cur_roll_checksum & ctx->rabin_avg_block_mask; j = cur_roll_checksum & ctx->rabin_avg_block_mask;
fplist[j] += cur_roll_checksum; fplist[j] += cur_roll_checksum;
@ -308,7 +334,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
fpos = 0; fpos = 0;
len1 = 0; len1 = 0;
} }
}
/* /*
* Window pos has to rotate from 0 .. RAB_POLYNOMIAL_WIN_SIZE-1 * Window pos has to rotate from 0 .. RAB_POLYNOMIAL_WIN_SIZE-1
* We avoid a branch here by masking. This requires RAB_POLYNOMIAL_WIN_SIZE * We avoid a branch here by masking. This requires RAB_POLYNOMIAL_WIN_SIZE
@ -322,7 +347,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
// If we hit our special value or reached the max block size update block offset // If we hit our special value or reached the max block size update block offset
if ((cur_roll_checksum & ctx->rabin_avg_block_mask) == ctx->rabin_break_patt || if ((cur_roll_checksum & ctx->rabin_avg_block_mask) == ctx->rabin_break_patt ||
length >= rabin_polynomial_max_block_size) { length >= rabin_polynomial_max_block_size) {
if (rabin_pos == NULL) {
ctx->blocks[blknum].offset = last_offset; ctx->blocks[blknum].offset = last_offset;
ctx->blocks[blknum].index = blknum; // Need to store for sorting ctx->blocks[blknum].index = blknum; // Need to store for sorting
ctx->blocks[blknum].length = length; ctx->blocks[blknum].length = length;
@ -334,18 +358,12 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
len1 = 0; len1 = 0;
cur_sketch = 0; cur_sketch = 0;
blknum++; blknum++;
}
last_offset = i+1; last_offset = i+1;
length = 0; length = 0;
j = 0; j = 0;
} }
} }
if (rabin_pos && last_offset < *size) {
*rabin_pos = last_offset;
return (0);
}
// If we found at least a few chunks, perform dedup. // If we found at least a few chunks, perform dedup.
if (blknum > 2) { if (blknum > 2) {
uint64_t prev_cksum; uint64_t prev_cksum;