Use 4-byte ints for header values instead of 8-byte size_t.

Use RLE on control data if it reduces the size.
Update some comments.
Use scratch space at end of data chunk, if available.
This commit is contained in:
Moinak Ghosh 2012-07-20 20:53:46 +05:30
parent e788eb43b8
commit fd7c7e9a65
4 changed files with 123 additions and 67 deletions

View file

@ -226,7 +226,7 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
bsize_t overlap,Ss,lens;
bsize_t i, rv;
bsize_t dblen,eblen;
u_char *db,*eb;
u_char *db,*eb, *cb;
u_char buf[sizeof (bsize_t)];
u_char header[48];
unsigned int sz, hdrsz, ulen;
@ -252,29 +252,31 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
BUFOPEN(&pf, diff, newsize);
/* Header is
0 8 length of ctrl block
8 8 compressed length of diff block
16 8 actual length of diff block
24 8 compressed length of extra block
32 8 actual length of extra block
40 8 length of new file */
0 4 compressed length of ctrl block
4 4 actual length of ctrl block
8 4 compressed length of diff block
12 4 actual length of diff block
16 4 compressed length of extra block
20 4 actual length of extra block
24 4 length of new file */
/* File is
0 32 Header
32 ?? ctrl block
0 28 Header
28 ?? ctrl block
?? ?? diff block
?? ?? extra block */
valout(0, header);
valout(0, header + sz);
valout(0, header + sz*2);
valout(0, header + sz*3);
valout(0, header + sz*4);
valout(newsize, header + sz*5);
if (BUFWRITE(&pf, header, sz*6) != sz*6) {
valouti32(0, header);
valouti32(0, header + 4);
valouti32(0, header + 4*2);
valouti32(0, header + 4*3);
valouti32(0, header + 4*4);
valouti32(0, header + 4*5);
valouti32(newsize, header + 4*6);
if (BUFWRITE(&pf, header, 4*7) != 4*7) {
fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n");
rv = 0;
goto out;
}
hdrsz = sz*6;
hdrsz = 4*7;
/* Compute the differences, writing ctrl as we go */
scan=0;len=0;
@ -356,9 +358,36 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
goto out;
}
/* Compute size of ctrl data */
/* Comput uncompressed size of the ctrl data. */
len = BUFTELL(&pf);
valout(len-hdrsz, header);
valouti32(len-hdrsz, header+4);
ulen = len-hdrsz;
/* If our data can fit in the scratch area use it other alloc. */
if (ulen > scratchsize) {
cb = slab_alloc(NULL, ulen);
} else {
cb = scratch;
}
/*
* Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller
* data then retain it.
*/
BUFSEEK(&pf, hdrsz, SEEK_SET);
rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen);
if (rv == 0 && ulen < len-hdrsz) {
BUFWRITE(&pf, cb, ulen);
} else {
BUFSEEK(&pf, len, SEEK_SET);
}
if (len-hdrsz > scratchsize) {
slab_free(NULL, cb);
}
/* Compute compressed size of ctrl data */
len = BUFTELL(&pf);
valouti32(len-hdrsz, header);
rv = len;
/* Write diff data */
@ -370,8 +399,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
}
/* Output size of diff data */
len = ulen;
valout(len, header + sz);
valout(dblen, header + sz*2);
valouti32(len, header + 4*2);
valouti32(dblen, header + 4*3);
rv += len;
BUFSEEK(&pf, len, SEEK_CUR);
@ -384,8 +413,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
}
/* Output size of extra data */
len = ulen;
valout(len, header + sz*3);
valout(eblen, header + sz*4);
valouti32(len, header + 4*4);
valouti32(eblen, header + 4*5);
rv += len;
/* Seek to the beginning, re-write the header.*/

View file

@ -54,62 +54,63 @@ valini32(u_char *buf)
bsize_t
get_bsdiff_sz(u_char *pbuf) {
bsize_t newsize;
bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen;
int sz, hdrsz, rv;
bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen;
int hdrsz, rv;
sz = sizeof (bsize_t);
hdrsz = sz*6;
hdrsz = 4*7;
ctrllen = valin(pbuf);
lzdatalen = valin(pbuf+sz);
datalen = valin(pbuf+sz*2);
lzextralen = valin(pbuf+sz*3);
extralen = valin(pbuf+sz*4);
newsize = valin(pbuf+sz*5);
return (ctrllen + lzdatalen + lzextralen + hdrsz);
lzctrllen = valini32(pbuf);
ctrllen = valini32(pbuf+4);
lzdatalen = valini32(pbuf+4*2);
datalen = valini32(pbuf+4*3);
lzextralen = valini32(pbuf+4*4);
extralen = valini32(pbuf+4*5);
newsize = valini32(pbuf+4*6);
return (lzctrllen + lzdatalen + lzextralen + hdrsz);
}
int
bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsize)
{
bsize_t newsize;
bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen;
bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen;
u_char buf[8];
u_char *diffdata, *extradata;
u_char *diffdata, *extradata, *ctrldata;
bsize_t oldpos,newpos;
bsize_t ctrl[3];
bsize_t lenread;
bsize_t i;
bufio_t cpf, dpf, epf;
int sz, hdrsz, rv;
int hdrsz, rv;
unsigned int len;
/*
File format:
0 8 length of ctrl block (X)
8 8 compressed length of diff block (Y)
16 8 actual length of diff block
24 8 compressed length of extra block (Z)
32 8 actual length of extra block
40 8 length of new file
48 X control block
48+X Y lzfx(diff block)
48+X+Y Z lzfx(extra block)
0 4 compressed length of ctrl block (X)
4 4 actual length of ctrl block (X)
8 4 compressed length of diff block (Y)
12 4 actual length of diff block
16 4 compressed length of extra block (Z)
20 4 actual length of extra block
24 4 length of new file
28 X ZRLE?(control block)
28+X Y ZRLE(diff block)
28+X+Y Z ZRLE(extra block)
with control block a set of triples (x,y,z) meaning "add x bytes
from oldfile to x bytes from the diff block; copy y bytes from the
extra block; seek forwards in oldfile by z bytes".
*/
sz = sizeof (bsize_t);
hdrsz = sz*6;
hdrsz = 4*7;
rv = 1;
/* Read lengths from header first. */
ctrllen = valin(pbuf);
lzdatalen = valin(pbuf+sz);
datalen = valin(pbuf+sz*2);
lzextralen = valin(pbuf+sz*3);
extralen = valin(pbuf+sz*4);
newsize = valin(pbuf+sz*5);
lzctrllen = valini32(pbuf);
ctrllen = valini32(pbuf+4);
lzdatalen = valini32(pbuf+4*2);
datalen = valini32(pbuf+4*3);
lzextralen = valini32(pbuf+4*4);
extralen = valini32(pbuf+4*5);
newsize = valini32(pbuf+4*6);
if((ctrllen<0) || (lzdatalen<0) || (newsize<0) || (lzextralen<0)) {
fprintf(stderr, "1: Corrupt patch\n");
@ -122,18 +123,38 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
*_newsize = newsize;
/* Allocate buffers. */
diffdata = malloc(datalen);
extradata = malloc(extralen);
diffdata = slab_alloc(NULL, datalen);
extradata = slab_alloc(NULL, extralen);
if (diffdata == NULL || extradata == NULL) {
fprintf(stderr, "bspatch: Out of memory.\n");
if (diffdata) free(diffdata);
if (extradata) free(extradata);
if (diffdata) slab_free(NULL, diffdata);
if (extradata) slab_free(NULL, extradata);
return (0);
}
/* Decompress diffdata and extradata. */
/* Decompress ctrldata, diffdata and extradata. */
if (lzctrllen < ctrllen) {
/* Ctrl data will be RLE-d if RLE size is less. */
ctrldata = slab_alloc(NULL, ctrllen);
if (ctrldata == NULL) {
fprintf(stderr, "bspatch: Out of memory.\n");
slab_free(NULL, diffdata);
slab_free(NULL, extradata);
return (0);
}
len = ctrllen;
if (zero_rle_decode(pbuf + hdrsz, lzctrllen, ctrldata, &len) == -1 ||
len != ctrllen) {
fprintf(stderr, "bspatch: Failed to decompress control data.\n");
rv = 0;
goto out;
}
} else {
ctrldata = pbuf + hdrsz;
}
len = datalen;
if (zero_rle_decode(pbuf + hdrsz + ctrllen, lzdatalen, diffdata, &len) == -1 ||
if (zero_rle_decode(pbuf + hdrsz + lzctrllen, lzdatalen, diffdata, &len) == -1 ||
len != datalen) {
fprintf(stderr, "bspatch: Failed to decompress diff data.\n");
rv = 0;
@ -142,14 +163,14 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
datalen = len;
len = extralen;
if (zero_rle_decode(pbuf + hdrsz + ctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
len != extralen) {
fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
rv = 0;
goto out;
}
extralen = len;
BUFOPEN(&cpf, pbuf + hdrsz, ctrllen);
BUFOPEN(&cpf, ctrldata, ctrllen);
BUFOPEN(&dpf, diffdata, datalen);
BUFOPEN(&epf, extradata, extralen);
@ -211,8 +232,10 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
};
out:
free(diffdata);
free(extradata);
if (lzctrllen < ctrllen)
slab_free(NULL, ctrldata);
slab_free(NULL, diffdata);
slab_free(NULL, extradata);
return (rv);
}

View file

@ -59,7 +59,7 @@ zero_rle_encode(const void *const ibuf, const unsigned int ilen,
cnt = 0;
pos4 = pos1;
state = 1;
// Lookahead if have ate least 4 consecutive zeroes
// Lookahead if there are at least 4 consecutive zeroes
for (;pos4<ilen && ib[pos4] == 0; pos4++) cnt++;
if (cnt >= 4) break;
}

View file

@ -345,7 +345,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
*rabin_pos = last_offset;
return (0);
}
printf("Original size: %lld\n", *size);
// If we found at least a few chunks, perform dedup.
if (blknum > 2) {
uint64_t prev_cksum;
@ -538,8 +538,11 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
if (rabin_index[blk] & GET_SIMILARITY_FLAG) {
old = buf1 + ctx->blocks[j].offset;
new = buf1 + ctx->blocks[blk].cksum_n_offset;
matchlen = ctx->real_chunksize - *size;
bsz = bsdiff(old, ctx->blocks[j].length, new,
ctx->blocks[blk].new_length, ctx->cbuf + pos1, 0, 0);
ctx->blocks[blk].new_length, ctx->cbuf + pos1,
buf1 + *size, matchlen);
if (bsz == 0) {
memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk].new_length);
rabin_index[blk] = htonl(ctx->blocks[blk].new_length);
@ -572,6 +575,7 @@ cont:
entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE);
*size = pos1;
ctx->valid = 1;
printf("Deduped size: %lld\n", *size);
/*
* Remaining header entries: size of compressed index and size of