Use 4-byte ints for header values instead of 8-byte size_t.

Use RLE on control data if it reduces the size.
Update some comments.
Use scratch space at end of data chunk, if available.
This commit is contained in:
Moinak Ghosh 2012-07-20 20:53:46 +05:30
parent e788eb43b8
commit fd7c7e9a65
4 changed files with 123 additions and 67 deletions

View file

@ -226,7 +226,7 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
bsize_t overlap,Ss,lens; bsize_t overlap,Ss,lens;
bsize_t i, rv; bsize_t i, rv;
bsize_t dblen,eblen; bsize_t dblen,eblen;
u_char *db,*eb; u_char *db,*eb, *cb;
u_char buf[sizeof (bsize_t)]; u_char buf[sizeof (bsize_t)];
u_char header[48]; u_char header[48];
unsigned int sz, hdrsz, ulen; unsigned int sz, hdrsz, ulen;
@ -252,29 +252,31 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
BUFOPEN(&pf, diff, newsize); BUFOPEN(&pf, diff, newsize);
/* Header is /* Header is
0 8 length of ctrl block 0 4 compressed length of ctrl block
8 8 compressed length of diff block 4 4 actual length of ctrl block
16 8 actual length of diff block 8 4 compressed length of diff block
24 8 compressed length of extra block 12 4 actual length of diff block
32 8 actual length of extra block 16 4 compressed length of extra block
40 8 length of new file */ 20 4 actual length of extra block
24 4 length of new file */
/* File is /* File is
0 32 Header 0 28 Header
32 ?? ctrl block 28 ?? ctrl block
?? ?? diff block ?? ?? diff block
?? ?? extra block */ ?? ?? extra block */
valout(0, header); valouti32(0, header);
valout(0, header + sz); valouti32(0, header + 4);
valout(0, header + sz*2); valouti32(0, header + 4*2);
valout(0, header + sz*3); valouti32(0, header + 4*3);
valout(0, header + sz*4); valouti32(0, header + 4*4);
valout(newsize, header + sz*5); valouti32(0, header + 4*5);
if (BUFWRITE(&pf, header, sz*6) != sz*6) { valouti32(newsize, header + 4*6);
if (BUFWRITE(&pf, header, 4*7) != 4*7) {
fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n"); fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n");
rv = 0; rv = 0;
goto out; goto out;
} }
hdrsz = sz*6; hdrsz = 4*7;
/* Compute the differences, writing ctrl as we go */ /* Compute the differences, writing ctrl as we go */
scan=0;len=0; scan=0;len=0;
@ -356,9 +358,36 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
goto out; goto out;
} }
/* Compute size of ctrl data */ /* Comput uncompressed size of the ctrl data. */
len = BUFTELL(&pf); len = BUFTELL(&pf);
valout(len-hdrsz, header); valouti32(len-hdrsz, header+4);
ulen = len-hdrsz;
/* If our data can fit in the scratch area use it other alloc. */
if (ulen > scratchsize) {
cb = slab_alloc(NULL, ulen);
} else {
cb = scratch;
}
/*
* Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller
* data then retain it.
*/
BUFSEEK(&pf, hdrsz, SEEK_SET);
rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen);
if (rv == 0 && ulen < len-hdrsz) {
BUFWRITE(&pf, cb, ulen);
} else {
BUFSEEK(&pf, len, SEEK_SET);
}
if (len-hdrsz > scratchsize) {
slab_free(NULL, cb);
}
/* Compute compressed size of ctrl data */
len = BUFTELL(&pf);
valouti32(len-hdrsz, header);
rv = len; rv = len;
/* Write diff data */ /* Write diff data */
@ -370,8 +399,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
} }
/* Output size of diff data */ /* Output size of diff data */
len = ulen; len = ulen;
valout(len, header + sz); valouti32(len, header + 4*2);
valout(dblen, header + sz*2); valouti32(dblen, header + 4*3);
rv += len; rv += len;
BUFSEEK(&pf, len, SEEK_CUR); BUFSEEK(&pf, len, SEEK_CUR);
@ -384,8 +413,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
} }
/* Output size of extra data */ /* Output size of extra data */
len = ulen; len = ulen;
valout(len, header + sz*3); valouti32(len, header + 4*4);
valout(eblen, header + sz*4); valouti32(eblen, header + 4*5);
rv += len; rv += len;
/* Seek to the beginning, re-write the header.*/ /* Seek to the beginning, re-write the header.*/

View file

@ -54,62 +54,63 @@ valini32(u_char *buf)
bsize_t bsize_t
get_bsdiff_sz(u_char *pbuf) { get_bsdiff_sz(u_char *pbuf) {
bsize_t newsize; bsize_t newsize;
bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen; bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen;
int sz, hdrsz, rv; int hdrsz, rv;
sz = sizeof (bsize_t); hdrsz = 4*7;
hdrsz = sz*6;
ctrllen = valin(pbuf); lzctrllen = valini32(pbuf);
lzdatalen = valin(pbuf+sz); ctrllen = valini32(pbuf+4);
datalen = valin(pbuf+sz*2); lzdatalen = valini32(pbuf+4*2);
lzextralen = valin(pbuf+sz*3); datalen = valini32(pbuf+4*3);
extralen = valin(pbuf+sz*4); lzextralen = valini32(pbuf+4*4);
newsize = valin(pbuf+sz*5); extralen = valini32(pbuf+4*5);
return (ctrllen + lzdatalen + lzextralen + hdrsz); newsize = valini32(pbuf+4*6);
return (lzctrllen + lzdatalen + lzextralen + hdrsz);
} }
int int
bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsize) bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsize)
{ {
bsize_t newsize; bsize_t newsize;
bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen; bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen;
u_char buf[8]; u_char buf[8];
u_char *diffdata, *extradata; u_char *diffdata, *extradata, *ctrldata;
bsize_t oldpos,newpos; bsize_t oldpos,newpos;
bsize_t ctrl[3]; bsize_t ctrl[3];
bsize_t lenread; bsize_t lenread;
bsize_t i; bsize_t i;
bufio_t cpf, dpf, epf; bufio_t cpf, dpf, epf;
int sz, hdrsz, rv; int hdrsz, rv;
unsigned int len; unsigned int len;
/* /*
File format: File format:
0 8 length of ctrl block (X) 0 4 compressed length of ctrl block (X)
8 8 compressed length of diff block (Y) 4 4 actual length of ctrl block (X)
16 8 actual length of diff block 8 4 compressed length of diff block (Y)
24 8 compressed length of extra block (Z) 12 4 actual length of diff block
32 8 actual length of extra block 16 4 compressed length of extra block (Z)
40 8 length of new file 20 4 actual length of extra block
48 X control block 24 4 length of new file
48+X Y lzfx(diff block) 28 X ZRLE?(control block)
48+X+Y Z lzfx(extra block) 28+X Y ZRLE(diff block)
28+X+Y Z ZRLE(extra block)
with control block a set of triples (x,y,z) meaning "add x bytes with control block a set of triples (x,y,z) meaning "add x bytes
from oldfile to x bytes from the diff block; copy y bytes from the from oldfile to x bytes from the diff block; copy y bytes from the
extra block; seek forwards in oldfile by z bytes". extra block; seek forwards in oldfile by z bytes".
*/ */
sz = sizeof (bsize_t); hdrsz = 4*7;
hdrsz = sz*6;
rv = 1; rv = 1;
/* Read lengths from header first. */ /* Read lengths from header first. */
ctrllen = valin(pbuf); lzctrllen = valini32(pbuf);
lzdatalen = valin(pbuf+sz); ctrllen = valini32(pbuf+4);
datalen = valin(pbuf+sz*2); lzdatalen = valini32(pbuf+4*2);
lzextralen = valin(pbuf+sz*3); datalen = valini32(pbuf+4*3);
extralen = valin(pbuf+sz*4); lzextralen = valini32(pbuf+4*4);
newsize = valin(pbuf+sz*5); extralen = valini32(pbuf+4*5);
newsize = valini32(pbuf+4*6);
if((ctrllen<0) || (lzdatalen<0) || (newsize<0) || (lzextralen<0)) { if((ctrllen<0) || (lzdatalen<0) || (newsize<0) || (lzextralen<0)) {
fprintf(stderr, "1: Corrupt patch\n"); fprintf(stderr, "1: Corrupt patch\n");
@ -122,18 +123,38 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
*_newsize = newsize; *_newsize = newsize;
/* Allocate buffers. */ /* Allocate buffers. */
diffdata = malloc(datalen); diffdata = slab_alloc(NULL, datalen);
extradata = malloc(extralen); extradata = slab_alloc(NULL, extralen);
if (diffdata == NULL || extradata == NULL) { if (diffdata == NULL || extradata == NULL) {
fprintf(stderr, "bspatch: Out of memory.\n"); fprintf(stderr, "bspatch: Out of memory.\n");
if (diffdata) free(diffdata); if (diffdata) slab_free(NULL, diffdata);
if (extradata) free(extradata); if (extradata) slab_free(NULL, extradata);
return (0); return (0);
} }
/* Decompress diffdata and extradata. */ /* Decompress ctrldata, diffdata and extradata. */
if (lzctrllen < ctrllen) {
/* Ctrl data will be RLE-d if RLE size is less. */
ctrldata = slab_alloc(NULL, ctrllen);
if (ctrldata == NULL) {
fprintf(stderr, "bspatch: Out of memory.\n");
slab_free(NULL, diffdata);
slab_free(NULL, extradata);
return (0);
}
len = ctrllen;
if (zero_rle_decode(pbuf + hdrsz, lzctrllen, ctrldata, &len) == -1 ||
len != ctrllen) {
fprintf(stderr, "bspatch: Failed to decompress control data.\n");
rv = 0;
goto out;
}
} else {
ctrldata = pbuf + hdrsz;
}
len = datalen; len = datalen;
if (zero_rle_decode(pbuf + hdrsz + ctrllen, lzdatalen, diffdata, &len) == -1 || if (zero_rle_decode(pbuf + hdrsz + lzctrllen, lzdatalen, diffdata, &len) == -1 ||
len != datalen) { len != datalen) {
fprintf(stderr, "bspatch: Failed to decompress diff data.\n"); fprintf(stderr, "bspatch: Failed to decompress diff data.\n");
rv = 0; rv = 0;
@ -142,14 +163,14 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
datalen = len; datalen = len;
len = extralen; len = extralen;
if (zero_rle_decode(pbuf + hdrsz + ctrllen + lzdatalen, lzextralen, extradata, &len) == -1 || if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
len != extralen) { len != extralen) {
fprintf(stderr, "bspatch: Failed to decompress extra data.\n"); fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
rv = 0; rv = 0;
goto out; goto out;
} }
extralen = len; extralen = len;
BUFOPEN(&cpf, pbuf + hdrsz, ctrllen); BUFOPEN(&cpf, ctrldata, ctrllen);
BUFOPEN(&dpf, diffdata, datalen); BUFOPEN(&dpf, diffdata, datalen);
BUFOPEN(&epf, extradata, extralen); BUFOPEN(&epf, extradata, extralen);
@ -211,8 +232,10 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
}; };
out: out:
free(diffdata); if (lzctrllen < ctrllen)
free(extradata); slab_free(NULL, ctrldata);
slab_free(NULL, diffdata);
slab_free(NULL, extradata);
return (rv); return (rv);
} }

View file

@ -59,7 +59,7 @@ zero_rle_encode(const void *const ibuf, const unsigned int ilen,
cnt = 0; cnt = 0;
pos4 = pos1; pos4 = pos1;
state = 1; state = 1;
// Lookahead if have ate least 4 consecutive zeroes // Lookahead if there are at least 4 consecutive zeroes
for (;pos4<ilen && ib[pos4] == 0; pos4++) cnt++; for (;pos4<ilen && ib[pos4] == 0; pos4++) cnt++;
if (cnt >= 4) break; if (cnt >= 4) break;
} }

View file

@ -345,7 +345,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
*rabin_pos = last_offset; *rabin_pos = last_offset;
return (0); return (0);
} }
printf("Original size: %lld\n", *size);
// If we found at least a few chunks, perform dedup. // If we found at least a few chunks, perform dedup.
if (blknum > 2) { if (blknum > 2) {
uint64_t prev_cksum; uint64_t prev_cksum;
@ -538,8 +538,11 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
if (rabin_index[blk] & GET_SIMILARITY_FLAG) { if (rabin_index[blk] & GET_SIMILARITY_FLAG) {
old = buf1 + ctx->blocks[j].offset; old = buf1 + ctx->blocks[j].offset;
new = buf1 + ctx->blocks[blk].cksum_n_offset; new = buf1 + ctx->blocks[blk].cksum_n_offset;
matchlen = ctx->real_chunksize - *size;
bsz = bsdiff(old, ctx->blocks[j].length, new, bsz = bsdiff(old, ctx->blocks[j].length, new,
ctx->blocks[blk].new_length, ctx->cbuf + pos1, 0, 0); ctx->blocks[blk].new_length, ctx->cbuf + pos1,
buf1 + *size, matchlen);
if (bsz == 0) { if (bsz == 0) {
memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk].new_length); memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk].new_length);
rabin_index[blk] = htonl(ctx->blocks[blk].new_length); rabin_index[blk] = htonl(ctx->blocks[blk].new_length);
@ -572,6 +575,7 @@ cont:
entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE); entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE);
*size = pos1; *size = pos1;
ctx->valid = 1; ctx->valid = 1;
printf("Deduped size: %lld\n", *size);
/* /*
* Remaining header entries: size of compressed index and size of * Remaining header entries: size of compressed index and size of