Additional error checks in RLE encoding for bsdiff extra data.
Add a buffer overflow check in RLE encoder. Avoid calling RLE encoding if extra data length is zero. Make 2KB block size default for non-global deduplication. Update test cases for new 2KB block size support.
This commit is contained in:
parent
2e62be3c9c
commit
12a2b8ed63
7 changed files with 39 additions and 14 deletions
|
@ -154,9 +154,10 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library
|
||||||
gives lower dedupe ratio than content-aware dedupe (-D) and does not
|
gives lower dedupe ratio than content-aware dedupe (-D) and does not
|
||||||
support delta compression.
|
support delta compression.
|
||||||
|
|
||||||
'-B' <1..5>
|
'-B' <0..5>
|
||||||
- Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.
|
- Specify an average Dedupe block size. 0 - 2K, 1 - 4K, 2 - 8K ... 5 - 64K.
|
||||||
Default deduplication block size is 4KB.
|
Default deduplication block size is 4KB for Global Deduplication and 2KB
|
||||||
|
otherwise.
|
||||||
'-B' 0
|
'-B' 0
|
||||||
- This uses blocks as small as 2KB for deduplication. This option can be
|
- This uses blocks as small as 2KB for deduplication. This option can be
|
||||||
used for datasets of a few GBs to a few hundred TBs in size depending on
|
used for datasets of a few GBs to a few hundred TBs in size depending on
|
||||||
|
|
|
@ -444,10 +444,20 @@ bsdiff(u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t newsize,
|
||||||
/* Write extra data */
|
/* Write extra data */
|
||||||
len = newsize - rv;
|
len = newsize - rv;
|
||||||
ulen = len;
|
ulen = len;
|
||||||
|
if (eblen > 0) {
|
||||||
if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) {
|
if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) {
|
||||||
rv = 0;
|
rv = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
if (ulen >= eblen) {
|
||||||
|
if (eblen > len) {
|
||||||
|
rv = 0;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
memcpy(BUFPTR(&pf), eb, eblen);
|
||||||
|
ulen = eblen;
|
||||||
|
}
|
||||||
|
}
|
||||||
/* Output size of extra data */
|
/* Output size of extra data */
|
||||||
len = ulen;
|
len = ulen;
|
||||||
valouti32(len, header + 4*4);
|
valouti32(len, header + 4*4);
|
||||||
|
|
|
@ -184,12 +184,18 @@ bspatch(u_char *pbuf, u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t *
|
||||||
datalen = len;
|
datalen = len;
|
||||||
|
|
||||||
len = extralen;
|
len = extralen;
|
||||||
if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
|
if (len > 0) {
|
||||||
|
if (extralen == lzextralen) {
|
||||||
|
memcpy(extradata, pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen);
|
||||||
|
|
||||||
|
} else if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
|
||||||
len != extralen) {
|
len != extralen) {
|
||||||
fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
|
fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
|
||||||
rv = 0;
|
rv = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
extralen = len;
|
extralen = len;
|
||||||
BUFOPEN(&cpf, ctrldata, ctrllen);
|
BUFOPEN(&cpf, ctrldata, ctrllen);
|
||||||
BUFOPEN(&dpf, diffdata, datalen);
|
BUFOPEN(&dpf, diffdata, datalen);
|
||||||
|
|
|
@ -61,6 +61,7 @@ zero_rle_encode(const void *ibuf, const unsigned int ilen,
|
||||||
count |= ZERO_MASK;
|
count |= ZERO_MASK;
|
||||||
*((unsigned short *)(ob + pos2)) = htons(count);
|
*((unsigned short *)(ob + pos2)) = htons(count);
|
||||||
pos2 += 2;
|
pos2 += 2;
|
||||||
|
if (pos2 > *olen) break;
|
||||||
} else {
|
} else {
|
||||||
unsigned int pos3, pos4, state;
|
unsigned int pos3, pos4, state;
|
||||||
pos3 = pos2;
|
pos3 = pos2;
|
||||||
|
|
|
@ -2595,7 +2595,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
pctx->level = -1;
|
pctx->level = -1;
|
||||||
err = 0;
|
err = 0;
|
||||||
pctx->keylen = DEFAULT_KEYLEN;
|
pctx->keylen = DEFAULT_KEYLEN;
|
||||||
pctx->chunksize = DEFAULT_CHUNKSIZE;
|
pctx->chunksize = -1;
|
||||||
pos = argv[0] + strlen(argv[0]);
|
pos = argv[0] + strlen(argv[0]);
|
||||||
while (*pos != '/' && pos > argv[0]) pos--;
|
while (*pos != '/' && pos > argv[0]) pos--;
|
||||||
if (*pos == '/') pos++;
|
if (*pos == '/') pos++;
|
||||||
|
@ -2760,6 +2760,13 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
pctx->level = 6;
|
pctx->level = 6;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pctx->chunksize == -1) {
|
||||||
|
if (!pctx->enable_rabin_global)
|
||||||
|
pctx->chunksize = 0;
|
||||||
|
else
|
||||||
|
pctx->chunksize = DEFAULT_CHUNKSIZE;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Remaining mandatory arguments are the filenames.
|
* Remaining mandatory arguments are the filenames.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -10,7 +10,7 @@ do
|
||||||
for tf in `cat files.lst`
|
for tf in `cat files.lst`
|
||||||
do
|
do
|
||||||
rm -f ${tf}.*
|
rm -f ${tf}.*
|
||||||
for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B2 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \
|
for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B0 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \
|
||||||
"-G -D" "-G -F" "-G -L -P" "-G -B2"
|
"-G -D" "-G -F" "-G -L -P" "-G -B2"
|
||||||
do
|
do
|
||||||
for seg in 2m 100m
|
for seg in 2m 100m
|
||||||
|
|
|
@ -59,7 +59,7 @@ do
|
||||||
rm -f ${tstf}.pz
|
rm -f ${tstf}.pz
|
||||||
done
|
done
|
||||||
|
|
||||||
for feat in "-B8 -s2m -l1" "-B0 -s2m -l1" "-D -s10k -l1" "-D -F -s2m -l1" "-p -e AES -s2m -l1" "-s2m -l15" "-e AES -k64" "-e SALSA20 -k8" "-e AES -k8" "-e SALSA20 -k64"
|
for feat in "-B8 -s2m -l1" "-B-1 -s2m -l1" "-D -s10k -l1" "-D -F -s2m -l1" "-p -e AES -s2m -l1" "-s2m -l15" "-e AES -k64" "-e SALSA20 -k8" "-e AES -k8" "-e SALSA20 -k64"
|
||||||
do
|
do
|
||||||
for algo in lzfx lz4 zlib bzip2 libbsc ppmd lzma
|
for algo in lzfx lz4 zlib bzip2 libbsc ppmd lzma
|
||||||
do
|
do
|
||||||
|
|
Loading…
Reference in a new issue