Additional error checks in RLE encoding for bsdiff extra data.
Add a buffer overflow check in RLE encoder. Avoid calling RLE encoding if extra data length is zero. Make 2KB block size default for non-global deduplication. Update test cases for new 2KB block size support.
This commit is contained in:
parent
2e62be3c9c
commit
12a2b8ed63
7 changed files with 39 additions and 14 deletions
|
@ -154,9 +154,10 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library
|
|||
gives lower dedupe ratio than content-aware dedupe (-D) and does not
|
||||
support delta compression.
|
||||
|
||||
'-B' <1..5>
|
||||
- Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.
|
||||
Default deduplication block size is 4KB.
|
||||
'-B' <0..5>
|
||||
- Specify an average Dedupe block size. 0 - 2K, 1 - 4K, 2 - 8K ... 5 - 64K.
|
||||
Default deduplication block size is 4KB for Global Deduplication and 2KB
|
||||
otherwise.
|
||||
'-B' 0
|
||||
- This uses blocks as small as 2KB for deduplication. This option can be
|
||||
used for datasets of a few GBs to a few hundred TBs in size depending on
|
||||
|
|
|
@ -444,10 +444,20 @@ bsdiff(u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t newsize,
|
|||
/* Write extra data */
|
||||
len = newsize - rv;
|
||||
ulen = len;
|
||||
if (eblen > 0) {
|
||||
if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) {
|
||||
rv = 0;
|
||||
goto out;
|
||||
}
|
||||
if (ulen >= eblen) {
|
||||
if (eblen > len) {
|
||||
rv = 0;
|
||||
goto out;
|
||||
}
|
||||
memcpy(BUFPTR(&pf), eb, eblen);
|
||||
ulen = eblen;
|
||||
}
|
||||
}
|
||||
/* Output size of extra data */
|
||||
len = ulen;
|
||||
valouti32(len, header + 4*4);
|
||||
|
|
|
@ -184,12 +184,18 @@ bspatch(u_char *pbuf, u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t *
|
|||
datalen = len;
|
||||
|
||||
len = extralen;
|
||||
if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
|
||||
if (len > 0) {
|
||||
if (extralen == lzextralen) {
|
||||
memcpy(extradata, pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen);
|
||||
|
||||
} else if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
|
||||
len != extralen) {
|
||||
fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
|
||||
rv = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
extralen = len;
|
||||
BUFOPEN(&cpf, ctrldata, ctrllen);
|
||||
BUFOPEN(&dpf, diffdata, datalen);
|
||||
|
|
|
@ -61,6 +61,7 @@ zero_rle_encode(const void *ibuf, const unsigned int ilen,
|
|||
count |= ZERO_MASK;
|
||||
*((unsigned short *)(ob + pos2)) = htons(count);
|
||||
pos2 += 2;
|
||||
if (pos2 > *olen) break;
|
||||
} else {
|
||||
unsigned int pos3, pos4, state;
|
||||
pos3 = pos2;
|
||||
|
|
|
@ -2595,7 +2595,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
pctx->level = -1;
|
||||
err = 0;
|
||||
pctx->keylen = DEFAULT_KEYLEN;
|
||||
pctx->chunksize = DEFAULT_CHUNKSIZE;
|
||||
pctx->chunksize = -1;
|
||||
pos = argv[0] + strlen(argv[0]);
|
||||
while (*pos != '/' && pos > argv[0]) pos--;
|
||||
if (*pos == '/') pos++;
|
||||
|
@ -2760,6 +2760,13 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
pctx->level = 6;
|
||||
}
|
||||
}
|
||||
|
||||
if (pctx->chunksize == -1) {
|
||||
if (!pctx->enable_rabin_global)
|
||||
pctx->chunksize = 0;
|
||||
else
|
||||
pctx->chunksize = DEFAULT_CHUNKSIZE;
|
||||
}
|
||||
/*
|
||||
* Remaining mandatory arguments are the filenames.
|
||||
*/
|
||||
|
|
|
@ -10,7 +10,7 @@ do
|
|||
for tf in `cat files.lst`
|
||||
do
|
||||
rm -f ${tf}.*
|
||||
for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B2 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \
|
||||
for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B0 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \
|
||||
"-G -D" "-G -F" "-G -L -P" "-G -B2"
|
||||
do
|
||||
for seg in 2m 100m
|
||||
|
|
|
@ -59,7 +59,7 @@ do
|
|||
rm -f ${tstf}.pz
|
||||
done
|
||||
|
||||
for feat in "-B8 -s2m -l1" "-B0 -s2m -l1" "-D -s10k -l1" "-D -F -s2m -l1" "-p -e AES -s2m -l1" "-s2m -l15" "-e AES -k64" "-e SALSA20 -k8" "-e AES -k8" "-e SALSA20 -k64"
|
||||
for feat in "-B8 -s2m -l1" "-B-1 -s2m -l1" "-D -s10k -l1" "-D -F -s2m -l1" "-p -e AES -s2m -l1" "-s2m -l15" "-e AES -k64" "-e SALSA20 -k8" "-e AES -k8" "-e SALSA20 -k64"
|
||||
do
|
||||
for algo in lzfx lz4 zlib bzip2 libbsc ppmd lzma
|
||||
do
|
||||
|
|
Loading…
Reference in a new issue