Use 4-byte ints for header values instead of 8-byte size_t.
Use RLE on control data if it reduces the size. Update some comments. Use scratch space at end of data chunk, if available.
This commit is contained in:
parent
e788eb43b8
commit
fd7c7e9a65
4 changed files with 123 additions and 67 deletions
|
@ -226,7 +226,7 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
|||
bsize_t overlap,Ss,lens;
|
||||
bsize_t i, rv;
|
||||
bsize_t dblen,eblen;
|
||||
u_char *db,*eb;
|
||||
u_char *db,*eb, *cb;
|
||||
u_char buf[sizeof (bsize_t)];
|
||||
u_char header[48];
|
||||
unsigned int sz, hdrsz, ulen;
|
||||
|
@ -252,29 +252,31 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
|||
BUFOPEN(&pf, diff, newsize);
|
||||
|
||||
/* Header is
|
||||
0 8 length of ctrl block
|
||||
8 8 compressed length of diff block
|
||||
16 8 actual length of diff block
|
||||
24 8 compressed length of extra block
|
||||
32 8 actual length of extra block
|
||||
40 8 length of new file */
|
||||
0 4 compressed length of ctrl block
|
||||
4 4 actual length of ctrl block
|
||||
8 4 compressed length of diff block
|
||||
12 4 actual length of diff block
|
||||
16 4 compressed length of extra block
|
||||
20 4 actual length of extra block
|
||||
24 4 length of new file */
|
||||
/* File is
|
||||
0 32 Header
|
||||
32 ?? ctrl block
|
||||
0 28 Header
|
||||
28 ?? ctrl block
|
||||
?? ?? diff block
|
||||
?? ?? extra block */
|
||||
valout(0, header);
|
||||
valout(0, header + sz);
|
||||
valout(0, header + sz*2);
|
||||
valout(0, header + sz*3);
|
||||
valout(0, header + sz*4);
|
||||
valout(newsize, header + sz*5);
|
||||
if (BUFWRITE(&pf, header, sz*6) != sz*6) {
|
||||
valouti32(0, header);
|
||||
valouti32(0, header + 4);
|
||||
valouti32(0, header + 4*2);
|
||||
valouti32(0, header + 4*3);
|
||||
valouti32(0, header + 4*4);
|
||||
valouti32(0, header + 4*5);
|
||||
valouti32(newsize, header + 4*6);
|
||||
if (BUFWRITE(&pf, header, 4*7) != 4*7) {
|
||||
fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n");
|
||||
rv = 0;
|
||||
goto out;
|
||||
}
|
||||
hdrsz = sz*6;
|
||||
hdrsz = 4*7;
|
||||
|
||||
/* Compute the differences, writing ctrl as we go */
|
||||
scan=0;len=0;
|
||||
|
@ -356,9 +358,36 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
|||
goto out;
|
||||
}
|
||||
|
||||
/* Compute size of ctrl data */
|
||||
/* Comput uncompressed size of the ctrl data. */
|
||||
len = BUFTELL(&pf);
|
||||
valout(len-hdrsz, header);
|
||||
valouti32(len-hdrsz, header+4);
|
||||
ulen = len-hdrsz;
|
||||
|
||||
/* If our data can fit in the scratch area use it other alloc. */
|
||||
if (ulen > scratchsize) {
|
||||
cb = slab_alloc(NULL, ulen);
|
||||
} else {
|
||||
cb = scratch;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller
|
||||
* data then retain it.
|
||||
*/
|
||||
BUFSEEK(&pf, hdrsz, SEEK_SET);
|
||||
rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen);
|
||||
if (rv == 0 && ulen < len-hdrsz) {
|
||||
BUFWRITE(&pf, cb, ulen);
|
||||
} else {
|
||||
BUFSEEK(&pf, len, SEEK_SET);
|
||||
}
|
||||
if (len-hdrsz > scratchsize) {
|
||||
slab_free(NULL, cb);
|
||||
}
|
||||
|
||||
/* Compute compressed size of ctrl data */
|
||||
len = BUFTELL(&pf);
|
||||
valouti32(len-hdrsz, header);
|
||||
rv = len;
|
||||
|
||||
/* Write diff data */
|
||||
|
@ -370,8 +399,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
|||
}
|
||||
/* Output size of diff data */
|
||||
len = ulen;
|
||||
valout(len, header + sz);
|
||||
valout(dblen, header + sz*2);
|
||||
valouti32(len, header + 4*2);
|
||||
valouti32(dblen, header + 4*3);
|
||||
rv += len;
|
||||
BUFSEEK(&pf, len, SEEK_CUR);
|
||||
|
||||
|
@ -384,8 +413,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
|||
}
|
||||
/* Output size of extra data */
|
||||
len = ulen;
|
||||
valout(len, header + sz*3);
|
||||
valout(eblen, header + sz*4);
|
||||
valouti32(len, header + 4*4);
|
||||
valouti32(eblen, header + 4*5);
|
||||
rv += len;
|
||||
|
||||
/* Seek to the beginning, re-write the header.*/
|
||||
|
|
105
bsdiff/bspatch.c
105
bsdiff/bspatch.c
|
@ -54,62 +54,63 @@ valini32(u_char *buf)
|
|||
bsize_t
|
||||
get_bsdiff_sz(u_char *pbuf) {
|
||||
bsize_t newsize;
|
||||
bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen;
|
||||
int sz, hdrsz, rv;
|
||||
bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen;
|
||||
int hdrsz, rv;
|
||||
|
||||
sz = sizeof (bsize_t);
|
||||
hdrsz = sz*6;
|
||||
hdrsz = 4*7;
|
||||
|
||||
ctrllen = valin(pbuf);
|
||||
lzdatalen = valin(pbuf+sz);
|
||||
datalen = valin(pbuf+sz*2);
|
||||
lzextralen = valin(pbuf+sz*3);
|
||||
extralen = valin(pbuf+sz*4);
|
||||
newsize = valin(pbuf+sz*5);
|
||||
return (ctrllen + lzdatalen + lzextralen + hdrsz);
|
||||
lzctrllen = valini32(pbuf);
|
||||
ctrllen = valini32(pbuf+4);
|
||||
lzdatalen = valini32(pbuf+4*2);
|
||||
datalen = valini32(pbuf+4*3);
|
||||
lzextralen = valini32(pbuf+4*4);
|
||||
extralen = valini32(pbuf+4*5);
|
||||
newsize = valini32(pbuf+4*6);
|
||||
return (lzctrllen + lzdatalen + lzextralen + hdrsz);
|
||||
}
|
||||
|
||||
int
|
||||
bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsize)
|
||||
{
|
||||
bsize_t newsize;
|
||||
bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen;
|
||||
bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen;
|
||||
u_char buf[8];
|
||||
u_char *diffdata, *extradata;
|
||||
u_char *diffdata, *extradata, *ctrldata;
|
||||
bsize_t oldpos,newpos;
|
||||
bsize_t ctrl[3];
|
||||
bsize_t lenread;
|
||||
bsize_t i;
|
||||
bufio_t cpf, dpf, epf;
|
||||
int sz, hdrsz, rv;
|
||||
int hdrsz, rv;
|
||||
unsigned int len;
|
||||
|
||||
/*
|
||||
File format:
|
||||
0 8 length of ctrl block (X)
|
||||
8 8 compressed length of diff block (Y)
|
||||
16 8 actual length of diff block
|
||||
24 8 compressed length of extra block (Z)
|
||||
32 8 actual length of extra block
|
||||
40 8 length of new file
|
||||
48 X control block
|
||||
48+X Y lzfx(diff block)
|
||||
48+X+Y Z lzfx(extra block)
|
||||
0 4 compressed length of ctrl block (X)
|
||||
4 4 actual length of ctrl block (X)
|
||||
8 4 compressed length of diff block (Y)
|
||||
12 4 actual length of diff block
|
||||
16 4 compressed length of extra block (Z)
|
||||
20 4 actual length of extra block
|
||||
24 4 length of new file
|
||||
28 X ZRLE?(control block)
|
||||
28+X Y ZRLE(diff block)
|
||||
28+X+Y Z ZRLE(extra block)
|
||||
with control block a set of triples (x,y,z) meaning "add x bytes
|
||||
from oldfile to x bytes from the diff block; copy y bytes from the
|
||||
extra block; seek forwards in oldfile by z bytes".
|
||||
*/
|
||||
sz = sizeof (bsize_t);
|
||||
hdrsz = sz*6;
|
||||
hdrsz = 4*7;
|
||||
rv = 1;
|
||||
|
||||
/* Read lengths from header first. */
|
||||
ctrllen = valin(pbuf);
|
||||
lzdatalen = valin(pbuf+sz);
|
||||
datalen = valin(pbuf+sz*2);
|
||||
lzextralen = valin(pbuf+sz*3);
|
||||
extralen = valin(pbuf+sz*4);
|
||||
newsize = valin(pbuf+sz*5);
|
||||
lzctrllen = valini32(pbuf);
|
||||
ctrllen = valini32(pbuf+4);
|
||||
lzdatalen = valini32(pbuf+4*2);
|
||||
datalen = valini32(pbuf+4*3);
|
||||
lzextralen = valini32(pbuf+4*4);
|
||||
extralen = valini32(pbuf+4*5);
|
||||
newsize = valini32(pbuf+4*6);
|
||||
|
||||
if((ctrllen<0) || (lzdatalen<0) || (newsize<0) || (lzextralen<0)) {
|
||||
fprintf(stderr, "1: Corrupt patch\n");
|
||||
|
@ -122,18 +123,38 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
|
|||
*_newsize = newsize;
|
||||
|
||||
/* Allocate buffers. */
|
||||
diffdata = malloc(datalen);
|
||||
extradata = malloc(extralen);
|
||||
diffdata = slab_alloc(NULL, datalen);
|
||||
extradata = slab_alloc(NULL, extralen);
|
||||
if (diffdata == NULL || extradata == NULL) {
|
||||
fprintf(stderr, "bspatch: Out of memory.\n");
|
||||
if (diffdata) free(diffdata);
|
||||
if (extradata) free(extradata);
|
||||
if (diffdata) slab_free(NULL, diffdata);
|
||||
if (extradata) slab_free(NULL, extradata);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Decompress diffdata and extradata. */
|
||||
/* Decompress ctrldata, diffdata and extradata. */
|
||||
if (lzctrllen < ctrllen) {
|
||||
/* Ctrl data will be RLE-d if RLE size is less. */
|
||||
ctrldata = slab_alloc(NULL, ctrllen);
|
||||
if (ctrldata == NULL) {
|
||||
fprintf(stderr, "bspatch: Out of memory.\n");
|
||||
slab_free(NULL, diffdata);
|
||||
slab_free(NULL, extradata);
|
||||
return (0);
|
||||
}
|
||||
len = ctrllen;
|
||||
if (zero_rle_decode(pbuf + hdrsz, lzctrllen, ctrldata, &len) == -1 ||
|
||||
len != ctrllen) {
|
||||
fprintf(stderr, "bspatch: Failed to decompress control data.\n");
|
||||
rv = 0;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
ctrldata = pbuf + hdrsz;
|
||||
}
|
||||
|
||||
len = datalen;
|
||||
if (zero_rle_decode(pbuf + hdrsz + ctrllen, lzdatalen, diffdata, &len) == -1 ||
|
||||
if (zero_rle_decode(pbuf + hdrsz + lzctrllen, lzdatalen, diffdata, &len) == -1 ||
|
||||
len != datalen) {
|
||||
fprintf(stderr, "bspatch: Failed to decompress diff data.\n");
|
||||
rv = 0;
|
||||
|
@ -142,14 +163,14 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
|
|||
datalen = len;
|
||||
|
||||
len = extralen;
|
||||
if (zero_rle_decode(pbuf + hdrsz + ctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
|
||||
if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
|
||||
len != extralen) {
|
||||
fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
|
||||
rv = 0;
|
||||
goto out;
|
||||
}
|
||||
extralen = len;
|
||||
BUFOPEN(&cpf, pbuf + hdrsz, ctrllen);
|
||||
BUFOPEN(&cpf, ctrldata, ctrllen);
|
||||
BUFOPEN(&dpf, diffdata, datalen);
|
||||
BUFOPEN(&epf, extradata, extralen);
|
||||
|
||||
|
@ -211,8 +232,10 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
|
|||
};
|
||||
|
||||
out:
|
||||
free(diffdata);
|
||||
free(extradata);
|
||||
if (lzctrllen < ctrllen)
|
||||
slab_free(NULL, ctrldata);
|
||||
slab_free(NULL, diffdata);
|
||||
slab_free(NULL, extradata);
|
||||
|
||||
return (rv);
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ zero_rle_encode(const void *const ibuf, const unsigned int ilen,
|
|||
cnt = 0;
|
||||
pos4 = pos1;
|
||||
state = 1;
|
||||
// Lookahead if have ate least 4 consecutive zeroes
|
||||
// Lookahead if there are at least 4 consecutive zeroes
|
||||
for (;pos4<ilen && ib[pos4] == 0; pos4++) cnt++;
|
||||
if (cnt >= 4) break;
|
||||
}
|
||||
|
|
|
@ -345,7 +345,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
*rabin_pos = last_offset;
|
||||
return (0);
|
||||
}
|
||||
|
||||
printf("Original size: %lld\n", *size);
|
||||
// If we found at least a few chunks, perform dedup.
|
||||
if (blknum > 2) {
|
||||
uint64_t prev_cksum;
|
||||
|
@ -538,8 +538,11 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
if (rabin_index[blk] & GET_SIMILARITY_FLAG) {
|
||||
old = buf1 + ctx->blocks[j].offset;
|
||||
new = buf1 + ctx->blocks[blk].cksum_n_offset;
|
||||
matchlen = ctx->real_chunksize - *size;
|
||||
|
||||
bsz = bsdiff(old, ctx->blocks[j].length, new,
|
||||
ctx->blocks[blk].new_length, ctx->cbuf + pos1, 0, 0);
|
||||
ctx->blocks[blk].new_length, ctx->cbuf + pos1,
|
||||
buf1 + *size, matchlen);
|
||||
if (bsz == 0) {
|
||||
memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk].new_length);
|
||||
rabin_index[blk] = htonl(ctx->blocks[blk].new_length);
|
||||
|
@ -572,6 +575,7 @@ cont:
|
|||
entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE);
|
||||
*size = pos1;
|
||||
ctx->valid = 1;
|
||||
printf("Deduped size: %lld\n", *size);
|
||||
|
||||
/*
|
||||
* Remaining header entries: size of compressed index and size of
|
||||
|
|
Loading…
Reference in a new issue