From 12a2b8ed63f5afda49af3114ebebbc84052f8a6b Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Fri, 30 Aug 2013 19:51:43 +0530 Subject: [PATCH] Additional error checks in RLE encoding for bsdiff extra data. Add a buffer overflow check in RLE encoder. Avoid calling RLE encoding if extra data length is zero. Make 2KB block size default for non-global deduplication. Update test cases for new 2KB block size support. --- README.md | 7 ++++--- bsdiff/bsdiff.c | 16 +++++++++++++--- bsdiff/bspatch.c | 16 +++++++++++----- bsdiff/rle_encoder.c | 1 + pcompress.c | 9 ++++++++- test/t4.tst | 2 +- test/t9.tst | 2 +- 7 files changed, 39 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 74eceae..c3c14e3 100644 --- a/README.md +++ b/README.md @@ -154,9 +154,10 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library gives lower dedupe ratio than content-aware dedupe (-D) and does not support delta compression. - '-B' <1..5> - - Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K. - Default deduplication block size is 4KB. + '-B' <0..5> + - Specify an average Dedupe block size. 0 - 2K, 1 - 4K, 2 - 8K ... 5 - 64K. + Default deduplication block size is 4KB for Global Deduplication and 2KB + otherwise. '-B' 0 - This uses blocks as small as 2KB for deduplication. This option can be used for datasets of a few GBs to a few hundred TBs in size depending on diff --git a/bsdiff/bsdiff.c b/bsdiff/bsdiff.c index f5b9b39..4371de1 100644 --- a/bsdiff/bsdiff.c +++ b/bsdiff/bsdiff.c @@ -444,9 +444,19 @@ bsdiff(u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t newsize, /* Write extra data */ len = newsize - rv; ulen = len; - if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) { - rv = 0; - goto out; + if (eblen > 0) { + if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) { + rv = 0; + goto out; + } + if (ulen >= eblen) { + if (eblen > len) { + rv = 0; + goto out; + } + memcpy(BUFPTR(&pf), eb, eblen); + ulen = eblen; + } } /* Output size of extra data */ len = ulen; diff --git a/bsdiff/bspatch.c b/bsdiff/bspatch.c index 474fe0b..84fd417 100644 --- a/bsdiff/bspatch.c +++ b/bsdiff/bspatch.c @@ -184,12 +184,18 @@ bspatch(u_char *pbuf, u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t * datalen = len; len = extralen; - if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 || - len != extralen) { - fprintf(stderr, "bspatch: Failed to decompress extra data.\n"); - rv = 0; - goto out; + if (len > 0) { + if (extralen == lzextralen) { + memcpy(extradata, pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen); + + } else if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 || + len != extralen) { + fprintf(stderr, "bspatch: Failed to decompress extra data.\n"); + rv = 0; + goto out; + } } + extralen = len; BUFOPEN(&cpf, ctrldata, ctrllen); BUFOPEN(&dpf, diffdata, datalen); diff --git a/bsdiff/rle_encoder.c b/bsdiff/rle_encoder.c index 02200b5..c5077dc 100644 --- a/bsdiff/rle_encoder.c +++ b/bsdiff/rle_encoder.c @@ -61,6 +61,7 @@ zero_rle_encode(const void *ibuf, const unsigned int ilen, count |= ZERO_MASK; *((unsigned short *)(ob + pos2)) = htons(count); pos2 += 2; + if (pos2 > *olen) break; } else { unsigned int pos3, pos4, state; pos3 = pos2; diff --git a/pcompress.c b/pcompress.c index e5b8bc2..ad4175b 100644 --- a/pcompress.c +++ b/pcompress.c @@ -2595,7 +2595,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) pctx->level = -1; err = 0; pctx->keylen = DEFAULT_KEYLEN; - pctx->chunksize = DEFAULT_CHUNKSIZE; + pctx->chunksize = -1; pos = argv[0] + strlen(argv[0]); while (*pos != '/' && pos > argv[0]) pos--; if (*pos == '/') pos++; @@ -2760,6 +2760,13 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) pctx->level = 6; } } + + if (pctx->chunksize == -1) { + if (!pctx->enable_rabin_global) + pctx->chunksize = 0; + else + pctx->chunksize = DEFAULT_CHUNKSIZE; + } /* * Remaining mandatory arguments are the filenames. */ diff --git a/test/t4.tst b/test/t4.tst index a53adb3..a3ed244 100644 --- a/test/t4.tst +++ b/test/t4.tst @@ -10,7 +10,7 @@ do for tf in `cat files.lst` do rm -f ${tf}.* - for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B2 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \ + for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B0 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \ "-G -D" "-G -F" "-G -L -P" "-G -B2" do for seg in 2m 100m diff --git a/test/t9.tst b/test/t9.tst index 099462b..a5d2b01 100644 --- a/test/t9.tst +++ b/test/t9.tst @@ -59,7 +59,7 @@ do rm -f ${tstf}.pz done -for feat in "-B8 -s2m -l1" "-B0 -s2m -l1" "-D -s10k -l1" "-D -F -s2m -l1" "-p -e AES -s2m -l1" "-s2m -l15" "-e AES -k64" "-e SALSA20 -k8" "-e AES -k8" "-e SALSA20 -k64" +for feat in "-B8 -s2m -l1" "-B-1 -s2m -l1" "-D -s10k -l1" "-D -F -s2m -l1" "-p -e AES -s2m -l1" "-s2m -l15" "-e AES -k64" "-e SALSA20 -k8" "-e AES -k8" "-e SALSA20 -k64" do for algo in lzfx lz4 zlib bzip2 libbsc ppmd lzma do