Additional error checks in RLE encoding for bsdiff extra data.

Add a buffer overflow check in RLE encoder.
Avoid calling RLE encoding if extra data length is zero.
Make 2KB block size default for non-global deduplication.
Update test cases for new 2KB block size support.
This commit is contained in:
Moinak Ghosh 2013-08-30 19:51:43 +05:30
parent 2e62be3c9c
commit 12a2b8ed63
7 changed files with 39 additions and 14 deletions

View file

@ -154,9 +154,10 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library
gives lower dedupe ratio than content-aware dedupe (-D) and does not gives lower dedupe ratio than content-aware dedupe (-D) and does not
support delta compression. support delta compression.
'-B' <1..5> '-B' <0..5>
- Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K. - Specify an average Dedupe block size. 0 - 2K, 1 - 4K, 2 - 8K ... 5 - 64K.
Default deduplication block size is 4KB. Default deduplication block size is 4KB for Global Deduplication and 2KB
otherwise.
'-B' 0 '-B' 0
- This uses blocks as small as 2KB for deduplication. This option can be - This uses blocks as small as 2KB for deduplication. This option can be
used for datasets of a few GBs to a few hundred TBs in size depending on used for datasets of a few GBs to a few hundred TBs in size depending on

View file

@ -444,10 +444,20 @@ bsdiff(u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t newsize,
/* Write extra data */ /* Write extra data */
len = newsize - rv; len = newsize - rv;
ulen = len; ulen = len;
if (eblen > 0) {
if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) { if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) {
rv = 0; rv = 0;
goto out; goto out;
} }
if (ulen >= eblen) {
if (eblen > len) {
rv = 0;
goto out;
}
memcpy(BUFPTR(&pf), eb, eblen);
ulen = eblen;
}
}
/* Output size of extra data */ /* Output size of extra data */
len = ulen; len = ulen;
valouti32(len, header + 4*4); valouti32(len, header + 4*4);

View file

@ -184,12 +184,18 @@ bspatch(u_char *pbuf, u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t *
datalen = len; datalen = len;
len = extralen; len = extralen;
if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 || if (len > 0) {
if (extralen == lzextralen) {
memcpy(extradata, pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen);
} else if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
len != extralen) { len != extralen) {
fprintf(stderr, "bspatch: Failed to decompress extra data.\n"); fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
rv = 0; rv = 0;
goto out; goto out;
} }
}
extralen = len; extralen = len;
BUFOPEN(&cpf, ctrldata, ctrllen); BUFOPEN(&cpf, ctrldata, ctrllen);
BUFOPEN(&dpf, diffdata, datalen); BUFOPEN(&dpf, diffdata, datalen);

View file

@ -61,6 +61,7 @@ zero_rle_encode(const void *ibuf, const unsigned int ilen,
count |= ZERO_MASK; count |= ZERO_MASK;
*((unsigned short *)(ob + pos2)) = htons(count); *((unsigned short *)(ob + pos2)) = htons(count);
pos2 += 2; pos2 += 2;
if (pos2 > *olen) break;
} else { } else {
unsigned int pos3, pos4, state; unsigned int pos3, pos4, state;
pos3 = pos2; pos3 = pos2;

View file

@ -2595,7 +2595,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
pctx->level = -1; pctx->level = -1;
err = 0; err = 0;
pctx->keylen = DEFAULT_KEYLEN; pctx->keylen = DEFAULT_KEYLEN;
pctx->chunksize = DEFAULT_CHUNKSIZE; pctx->chunksize = -1;
pos = argv[0] + strlen(argv[0]); pos = argv[0] + strlen(argv[0]);
while (*pos != '/' && pos > argv[0]) pos--; while (*pos != '/' && pos > argv[0]) pos--;
if (*pos == '/') pos++; if (*pos == '/') pos++;
@ -2760,6 +2760,13 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
pctx->level = 6; pctx->level = 6;
} }
} }
if (pctx->chunksize == -1) {
if (!pctx->enable_rabin_global)
pctx->chunksize = 0;
else
pctx->chunksize = DEFAULT_CHUNKSIZE;
}
/* /*
* Remaining mandatory arguments are the filenames. * Remaining mandatory arguments are the filenames.
*/ */

View file

@ -10,7 +10,7 @@ do
for tf in `cat files.lst` for tf in `cat files.lst`
do do
rm -f ${tf}.* rm -f ${tf}.*
for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B2 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \ for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B0 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \
"-G -D" "-G -F" "-G -L -P" "-G -B2" "-G -D" "-G -F" "-G -L -P" "-G -B2"
do do
for seg in 2m 100m for seg in 2m 100m

View file

@ -59,7 +59,7 @@ do
rm -f ${tstf}.pz rm -f ${tstf}.pz
done done
for feat in "-B8 -s2m -l1" "-B0 -s2m -l1" "-D -s10k -l1" "-D -F -s2m -l1" "-p -e AES -s2m -l1" "-s2m -l15" "-e AES -k64" "-e SALSA20 -k8" "-e AES -k8" "-e SALSA20 -k64" for feat in "-B8 -s2m -l1" "-B-1 -s2m -l1" "-D -s10k -l1" "-D -F -s2m -l1" "-p -e AES -s2m -l1" "-s2m -l15" "-e AES -k64" "-e SALSA20 -k8" "-e AES -k8" "-e SALSA20 -k64"
do do
for algo in lzfx lz4 zlib bzip2 libbsc ppmd lzma for algo in lzfx lz4 zlib bzip2 libbsc ppmd lzma
do do