From fd7c7e9a65a6680755d40fbd30fcd85191cb3980 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Fri, 20 Jul 2012 20:53:46 +0530 Subject: [PATCH] Use 4-byte ints for header values instead of 8-byte size_t. Use RLE on control data if it reduces the size. Update some comments. Use scratch space at end of data chunk, if available. --- bsdiff/bsdiff.c | 75 +++++++++++++++++++--------- bsdiff/bspatch.c | 105 ++++++++++++++++++++++++--------------- bsdiff/rle_encoder.c | 2 +- rabin/rabin_polynomial.c | 8 ++- 4 files changed, 123 insertions(+), 67 deletions(-) diff --git a/bsdiff/bsdiff.c b/bsdiff/bsdiff.c index ebc8148..6575870 100644 --- a/bsdiff/bsdiff.c +++ b/bsdiff/bsdiff.c @@ -226,7 +226,7 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize, bsize_t overlap,Ss,lens; bsize_t i, rv; bsize_t dblen,eblen; - u_char *db,*eb; + u_char *db,*eb, *cb; u_char buf[sizeof (bsize_t)]; u_char header[48]; unsigned int sz, hdrsz, ulen; @@ -252,29 +252,31 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize, BUFOPEN(&pf, diff, newsize); /* Header is - 0 8 length of ctrl block - 8 8 compressed length of diff block - 16 8 actual length of diff block - 24 8 compressed length of extra block - 32 8 actual length of extra block - 40 8 length of new file */ + 0 4 compressed length of ctrl block + 4 4 actual length of ctrl block + 8 4 compressed length of diff block + 12 4 actual length of diff block + 16 4 compressed length of extra block + 20 4 actual length of extra block + 24 4 length of new file */ /* File is - 0 32 Header - 32 ?? ctrl block + 0 28 Header + 28 ?? ctrl block ?? ?? diff block ?? ?? extra block */ - valout(0, header); - valout(0, header + sz); - valout(0, header + sz*2); - valout(0, header + sz*3); - valout(0, header + sz*4); - valout(newsize, header + sz*5); - if (BUFWRITE(&pf, header, sz*6) != sz*6) { + valouti32(0, header); + valouti32(0, header + 4); + valouti32(0, header + 4*2); + valouti32(0, header + 4*3); + valouti32(0, header + 4*4); + valouti32(0, header + 4*5); + valouti32(newsize, header + 4*6); + if (BUFWRITE(&pf, header, 4*7) != 4*7) { fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n"); rv = 0; goto out; } - hdrsz = sz*6; + hdrsz = 4*7; /* Compute the differences, writing ctrl as we go */ scan=0;len=0; @@ -356,9 +358,36 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize, goto out; } - /* Compute size of ctrl data */ + /* Comput uncompressed size of the ctrl data. */ len = BUFTELL(&pf); - valout(len-hdrsz, header); + valouti32(len-hdrsz, header+4); + ulen = len-hdrsz; + + /* If our data can fit in the scratch area use it other alloc. */ + if (ulen > scratchsize) { + cb = slab_alloc(NULL, ulen); + } else { + cb = scratch; + } + + /* + * Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller + * data then retain it. + */ + BUFSEEK(&pf, hdrsz, SEEK_SET); + rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen); + if (rv == 0 && ulen < len-hdrsz) { + BUFWRITE(&pf, cb, ulen); + } else { + BUFSEEK(&pf, len, SEEK_SET); + } + if (len-hdrsz > scratchsize) { + slab_free(NULL, cb); + } + + /* Compute compressed size of ctrl data */ + len = BUFTELL(&pf); + valouti32(len-hdrsz, header); rv = len; /* Write diff data */ @@ -370,8 +399,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize, } /* Output size of diff data */ len = ulen; - valout(len, header + sz); - valout(dblen, header + sz*2); + valouti32(len, header + 4*2); + valouti32(dblen, header + 4*3); rv += len; BUFSEEK(&pf, len, SEEK_CUR); @@ -384,8 +413,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize, } /* Output size of extra data */ len = ulen; - valout(len, header + sz*3); - valout(eblen, header + sz*4); + valouti32(len, header + 4*4); + valouti32(eblen, header + 4*5); rv += len; /* Seek to the beginning, re-write the header.*/ diff --git a/bsdiff/bspatch.c b/bsdiff/bspatch.c index 7c9f53e..e48ef9d 100644 --- a/bsdiff/bspatch.c +++ b/bsdiff/bspatch.c @@ -54,62 +54,63 @@ valini32(u_char *buf) bsize_t get_bsdiff_sz(u_char *pbuf) { bsize_t newsize; - bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen; - int sz, hdrsz, rv; + bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen; + int hdrsz, rv; - sz = sizeof (bsize_t); - hdrsz = sz*6; + hdrsz = 4*7; - ctrllen = valin(pbuf); - lzdatalen = valin(pbuf+sz); - datalen = valin(pbuf+sz*2); - lzextralen = valin(pbuf+sz*3); - extralen = valin(pbuf+sz*4); - newsize = valin(pbuf+sz*5); - return (ctrllen + lzdatalen + lzextralen + hdrsz); + lzctrllen = valini32(pbuf); + ctrllen = valini32(pbuf+4); + lzdatalen = valini32(pbuf+4*2); + datalen = valini32(pbuf+4*3); + lzextralen = valini32(pbuf+4*4); + extralen = valini32(pbuf+4*5); + newsize = valini32(pbuf+4*6); + return (lzctrllen + lzdatalen + lzextralen + hdrsz); } int bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsize) { bsize_t newsize; - bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen; + bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen; u_char buf[8]; - u_char *diffdata, *extradata; + u_char *diffdata, *extradata, *ctrldata; bsize_t oldpos,newpos; bsize_t ctrl[3]; bsize_t lenread; bsize_t i; bufio_t cpf, dpf, epf; - int sz, hdrsz, rv; + int hdrsz, rv; unsigned int len; /* File format: - 0 8 length of ctrl block (X) - 8 8 compressed length of diff block (Y) - 16 8 actual length of diff block - 24 8 compressed length of extra block (Z) - 32 8 actual length of extra block - 40 8 length of new file - 48 X control block - 48+X Y lzfx(diff block) - 48+X+Y Z lzfx(extra block) + 0 4 compressed length of ctrl block (X) + 4 4 actual length of ctrl block (X) + 8 4 compressed length of diff block (Y) + 12 4 actual length of diff block + 16 4 compressed length of extra block (Z) + 20 4 actual length of extra block + 24 4 length of new file + 28 X ZRLE?(control block) + 28+X Y ZRLE(diff block) + 28+X+Y Z ZRLE(extra block) with control block a set of triples (x,y,z) meaning "add x bytes from oldfile to x bytes from the diff block; copy y bytes from the extra block; seek forwards in oldfile by z bytes". */ - sz = sizeof (bsize_t); - hdrsz = sz*6; + hdrsz = 4*7; rv = 1; /* Read lengths from header first. */ - ctrllen = valin(pbuf); - lzdatalen = valin(pbuf+sz); - datalen = valin(pbuf+sz*2); - lzextralen = valin(pbuf+sz*3); - extralen = valin(pbuf+sz*4); - newsize = valin(pbuf+sz*5); + lzctrllen = valini32(pbuf); + ctrllen = valini32(pbuf+4); + lzdatalen = valini32(pbuf+4*2); + datalen = valini32(pbuf+4*3); + lzextralen = valini32(pbuf+4*4); + extralen = valini32(pbuf+4*5); + newsize = valini32(pbuf+4*6); if((ctrllen<0) || (lzdatalen<0) || (newsize<0) || (lzextralen<0)) { fprintf(stderr, "1: Corrupt patch\n"); @@ -122,18 +123,38 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi *_newsize = newsize; /* Allocate buffers. */ - diffdata = malloc(datalen); - extradata = malloc(extralen); + diffdata = slab_alloc(NULL, datalen); + extradata = slab_alloc(NULL, extralen); if (diffdata == NULL || extradata == NULL) { fprintf(stderr, "bspatch: Out of memory.\n"); - if (diffdata) free(diffdata); - if (extradata) free(extradata); + if (diffdata) slab_free(NULL, diffdata); + if (extradata) slab_free(NULL, extradata); return (0); } - /* Decompress diffdata and extradata. */ + /* Decompress ctrldata, diffdata and extradata. */ + if (lzctrllen < ctrllen) { + /* Ctrl data will be RLE-d if RLE size is less. */ + ctrldata = slab_alloc(NULL, ctrllen); + if (ctrldata == NULL) { + fprintf(stderr, "bspatch: Out of memory.\n"); + slab_free(NULL, diffdata); + slab_free(NULL, extradata); + return (0); + } + len = ctrllen; + if (zero_rle_decode(pbuf + hdrsz, lzctrllen, ctrldata, &len) == -1 || + len != ctrllen) { + fprintf(stderr, "bspatch: Failed to decompress control data.\n"); + rv = 0; + goto out; + } + } else { + ctrldata = pbuf + hdrsz; + } + len = datalen; - if (zero_rle_decode(pbuf + hdrsz + ctrllen, lzdatalen, diffdata, &len) == -1 || + if (zero_rle_decode(pbuf + hdrsz + lzctrllen, lzdatalen, diffdata, &len) == -1 || len != datalen) { fprintf(stderr, "bspatch: Failed to decompress diff data.\n"); rv = 0; @@ -142,14 +163,14 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi datalen = len; len = extralen; - if (zero_rle_decode(pbuf + hdrsz + ctrllen + lzdatalen, lzextralen, extradata, &len) == -1 || + if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 || len != extralen) { fprintf(stderr, "bspatch: Failed to decompress extra data.\n"); rv = 0; goto out; } extralen = len; - BUFOPEN(&cpf, pbuf + hdrsz, ctrllen); + BUFOPEN(&cpf, ctrldata, ctrllen); BUFOPEN(&dpf, diffdata, datalen); BUFOPEN(&epf, extradata, extralen); @@ -211,8 +232,10 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi }; out: - free(diffdata); - free(extradata); + if (lzctrllen < ctrllen) + slab_free(NULL, ctrldata); + slab_free(NULL, diffdata); + slab_free(NULL, extradata); return (rv); } diff --git a/bsdiff/rle_encoder.c b/bsdiff/rle_encoder.c index fdc5f15..24f581e 100644 --- a/bsdiff/rle_encoder.c +++ b/bsdiff/rle_encoder.c @@ -59,7 +59,7 @@ zero_rle_encode(const void *const ibuf, const unsigned int ilen, cnt = 0; pos4 = pos1; state = 1; - // Lookahead if have ate least 4 consecutive zeroes + // Lookahead if there are at least 4 consecutive zeroes for (;pos4= 4) break; } diff --git a/rabin/rabin_polynomial.c b/rabin/rabin_polynomial.c index ffb9ba5..9d1346a 100755 --- a/rabin/rabin_polynomial.c +++ b/rabin/rabin_polynomial.c @@ -345,7 +345,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s *rabin_pos = last_offset; return (0); } - +printf("Original size: %lld\n", *size); // If we found at least a few chunks, perform dedup. if (blknum > 2) { uint64_t prev_cksum; @@ -538,8 +538,11 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s if (rabin_index[blk] & GET_SIMILARITY_FLAG) { old = buf1 + ctx->blocks[j].offset; new = buf1 + ctx->blocks[blk].cksum_n_offset; + matchlen = ctx->real_chunksize - *size; + bsz = bsdiff(old, ctx->blocks[j].length, new, - ctx->blocks[blk].new_length, ctx->cbuf + pos1, 0, 0); + ctx->blocks[blk].new_length, ctx->cbuf + pos1, + buf1 + *size, matchlen); if (bsz == 0) { memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk].new_length); rabin_index[blk] = htonl(ctx->blocks[blk].new_length); @@ -572,6 +575,7 @@ cont: entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE); *size = pos1; ctx->valid = 1; +printf("Deduped size: %lld\n", *size); /* * Remaining header entries: size of compressed index and size of