Use 4-byte ints for header values instead of 8-byte size_t.
Use RLE on control data if it reduces the size. Update some comments. Use scratch space at end of data chunk, if available.
This commit is contained in:
parent
e788eb43b8
commit
fd7c7e9a65
4 changed files with 123 additions and 67 deletions
|
@ -226,7 +226,7 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
||||||
bsize_t overlap,Ss,lens;
|
bsize_t overlap,Ss,lens;
|
||||||
bsize_t i, rv;
|
bsize_t i, rv;
|
||||||
bsize_t dblen,eblen;
|
bsize_t dblen,eblen;
|
||||||
u_char *db,*eb;
|
u_char *db,*eb, *cb;
|
||||||
u_char buf[sizeof (bsize_t)];
|
u_char buf[sizeof (bsize_t)];
|
||||||
u_char header[48];
|
u_char header[48];
|
||||||
unsigned int sz, hdrsz, ulen;
|
unsigned int sz, hdrsz, ulen;
|
||||||
|
@ -252,29 +252,31 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
||||||
BUFOPEN(&pf, diff, newsize);
|
BUFOPEN(&pf, diff, newsize);
|
||||||
|
|
||||||
/* Header is
|
/* Header is
|
||||||
0 8 length of ctrl block
|
0 4 compressed length of ctrl block
|
||||||
8 8 compressed length of diff block
|
4 4 actual length of ctrl block
|
||||||
16 8 actual length of diff block
|
8 4 compressed length of diff block
|
||||||
24 8 compressed length of extra block
|
12 4 actual length of diff block
|
||||||
32 8 actual length of extra block
|
16 4 compressed length of extra block
|
||||||
40 8 length of new file */
|
20 4 actual length of extra block
|
||||||
|
24 4 length of new file */
|
||||||
/* File is
|
/* File is
|
||||||
0 32 Header
|
0 28 Header
|
||||||
32 ?? ctrl block
|
28 ?? ctrl block
|
||||||
?? ?? diff block
|
?? ?? diff block
|
||||||
?? ?? extra block */
|
?? ?? extra block */
|
||||||
valout(0, header);
|
valouti32(0, header);
|
||||||
valout(0, header + sz);
|
valouti32(0, header + 4);
|
||||||
valout(0, header + sz*2);
|
valouti32(0, header + 4*2);
|
||||||
valout(0, header + sz*3);
|
valouti32(0, header + 4*3);
|
||||||
valout(0, header + sz*4);
|
valouti32(0, header + 4*4);
|
||||||
valout(newsize, header + sz*5);
|
valouti32(0, header + 4*5);
|
||||||
if (BUFWRITE(&pf, header, sz*6) != sz*6) {
|
valouti32(newsize, header + 4*6);
|
||||||
|
if (BUFWRITE(&pf, header, 4*7) != 4*7) {
|
||||||
fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n");
|
fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n");
|
||||||
rv = 0;
|
rv = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
hdrsz = sz*6;
|
hdrsz = 4*7;
|
||||||
|
|
||||||
/* Compute the differences, writing ctrl as we go */
|
/* Compute the differences, writing ctrl as we go */
|
||||||
scan=0;len=0;
|
scan=0;len=0;
|
||||||
|
@ -356,9 +358,36 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute size of ctrl data */
|
/* Comput uncompressed size of the ctrl data. */
|
||||||
len = BUFTELL(&pf);
|
len = BUFTELL(&pf);
|
||||||
valout(len-hdrsz, header);
|
valouti32(len-hdrsz, header+4);
|
||||||
|
ulen = len-hdrsz;
|
||||||
|
|
||||||
|
/* If our data can fit in the scratch area use it other alloc. */
|
||||||
|
if (ulen > scratchsize) {
|
||||||
|
cb = slab_alloc(NULL, ulen);
|
||||||
|
} else {
|
||||||
|
cb = scratch;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller
|
||||||
|
* data then retain it.
|
||||||
|
*/
|
||||||
|
BUFSEEK(&pf, hdrsz, SEEK_SET);
|
||||||
|
rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen);
|
||||||
|
if (rv == 0 && ulen < len-hdrsz) {
|
||||||
|
BUFWRITE(&pf, cb, ulen);
|
||||||
|
} else {
|
||||||
|
BUFSEEK(&pf, len, SEEK_SET);
|
||||||
|
}
|
||||||
|
if (len-hdrsz > scratchsize) {
|
||||||
|
slab_free(NULL, cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute compressed size of ctrl data */
|
||||||
|
len = BUFTELL(&pf);
|
||||||
|
valouti32(len-hdrsz, header);
|
||||||
rv = len;
|
rv = len;
|
||||||
|
|
||||||
/* Write diff data */
|
/* Write diff data */
|
||||||
|
@ -370,8 +399,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
||||||
}
|
}
|
||||||
/* Output size of diff data */
|
/* Output size of diff data */
|
||||||
len = ulen;
|
len = ulen;
|
||||||
valout(len, header + sz);
|
valouti32(len, header + 4*2);
|
||||||
valout(dblen, header + sz*2);
|
valouti32(dblen, header + 4*3);
|
||||||
rv += len;
|
rv += len;
|
||||||
BUFSEEK(&pf, len, SEEK_CUR);
|
BUFSEEK(&pf, len, SEEK_CUR);
|
||||||
|
|
||||||
|
@ -384,8 +413,8 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
||||||
}
|
}
|
||||||
/* Output size of extra data */
|
/* Output size of extra data */
|
||||||
len = ulen;
|
len = ulen;
|
||||||
valout(len, header + sz*3);
|
valouti32(len, header + 4*4);
|
||||||
valout(eblen, header + sz*4);
|
valouti32(eblen, header + 4*5);
|
||||||
rv += len;
|
rv += len;
|
||||||
|
|
||||||
/* Seek to the beginning, re-write the header.*/
|
/* Seek to the beginning, re-write the header.*/
|
||||||
|
|
105
bsdiff/bspatch.c
105
bsdiff/bspatch.c
|
@ -54,62 +54,63 @@ valini32(u_char *buf)
|
||||||
bsize_t
|
bsize_t
|
||||||
get_bsdiff_sz(u_char *pbuf) {
|
get_bsdiff_sz(u_char *pbuf) {
|
||||||
bsize_t newsize;
|
bsize_t newsize;
|
||||||
bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen;
|
bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen;
|
||||||
int sz, hdrsz, rv;
|
int hdrsz, rv;
|
||||||
|
|
||||||
sz = sizeof (bsize_t);
|
hdrsz = 4*7;
|
||||||
hdrsz = sz*6;
|
|
||||||
|
|
||||||
ctrllen = valin(pbuf);
|
lzctrllen = valini32(pbuf);
|
||||||
lzdatalen = valin(pbuf+sz);
|
ctrllen = valini32(pbuf+4);
|
||||||
datalen = valin(pbuf+sz*2);
|
lzdatalen = valini32(pbuf+4*2);
|
||||||
lzextralen = valin(pbuf+sz*3);
|
datalen = valini32(pbuf+4*3);
|
||||||
extralen = valin(pbuf+sz*4);
|
lzextralen = valini32(pbuf+4*4);
|
||||||
newsize = valin(pbuf+sz*5);
|
extralen = valini32(pbuf+4*5);
|
||||||
return (ctrllen + lzdatalen + lzextralen + hdrsz);
|
newsize = valini32(pbuf+4*6);
|
||||||
|
return (lzctrllen + lzdatalen + lzextralen + hdrsz);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsize)
|
bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsize)
|
||||||
{
|
{
|
||||||
bsize_t newsize;
|
bsize_t newsize;
|
||||||
bsize_t ctrllen, lzdatalen, datalen, lzextralen, extralen;
|
bsize_t lzctrllen, ctrllen, lzdatalen, datalen, lzextralen, extralen;
|
||||||
u_char buf[8];
|
u_char buf[8];
|
||||||
u_char *diffdata, *extradata;
|
u_char *diffdata, *extradata, *ctrldata;
|
||||||
bsize_t oldpos,newpos;
|
bsize_t oldpos,newpos;
|
||||||
bsize_t ctrl[3];
|
bsize_t ctrl[3];
|
||||||
bsize_t lenread;
|
bsize_t lenread;
|
||||||
bsize_t i;
|
bsize_t i;
|
||||||
bufio_t cpf, dpf, epf;
|
bufio_t cpf, dpf, epf;
|
||||||
int sz, hdrsz, rv;
|
int hdrsz, rv;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
File format:
|
File format:
|
||||||
0 8 length of ctrl block (X)
|
0 4 compressed length of ctrl block (X)
|
||||||
8 8 compressed length of diff block (Y)
|
4 4 actual length of ctrl block (X)
|
||||||
16 8 actual length of diff block
|
8 4 compressed length of diff block (Y)
|
||||||
24 8 compressed length of extra block (Z)
|
12 4 actual length of diff block
|
||||||
32 8 actual length of extra block
|
16 4 compressed length of extra block (Z)
|
||||||
40 8 length of new file
|
20 4 actual length of extra block
|
||||||
48 X control block
|
24 4 length of new file
|
||||||
48+X Y lzfx(diff block)
|
28 X ZRLE?(control block)
|
||||||
48+X+Y Z lzfx(extra block)
|
28+X Y ZRLE(diff block)
|
||||||
|
28+X+Y Z ZRLE(extra block)
|
||||||
with control block a set of triples (x,y,z) meaning "add x bytes
|
with control block a set of triples (x,y,z) meaning "add x bytes
|
||||||
from oldfile to x bytes from the diff block; copy y bytes from the
|
from oldfile to x bytes from the diff block; copy y bytes from the
|
||||||
extra block; seek forwards in oldfile by z bytes".
|
extra block; seek forwards in oldfile by z bytes".
|
||||||
*/
|
*/
|
||||||
sz = sizeof (bsize_t);
|
hdrsz = 4*7;
|
||||||
hdrsz = sz*6;
|
|
||||||
rv = 1;
|
rv = 1;
|
||||||
|
|
||||||
/* Read lengths from header first. */
|
/* Read lengths from header first. */
|
||||||
ctrllen = valin(pbuf);
|
lzctrllen = valini32(pbuf);
|
||||||
lzdatalen = valin(pbuf+sz);
|
ctrllen = valini32(pbuf+4);
|
||||||
datalen = valin(pbuf+sz*2);
|
lzdatalen = valini32(pbuf+4*2);
|
||||||
lzextralen = valin(pbuf+sz*3);
|
datalen = valini32(pbuf+4*3);
|
||||||
extralen = valin(pbuf+sz*4);
|
lzextralen = valini32(pbuf+4*4);
|
||||||
newsize = valin(pbuf+sz*5);
|
extralen = valini32(pbuf+4*5);
|
||||||
|
newsize = valini32(pbuf+4*6);
|
||||||
|
|
||||||
if((ctrllen<0) || (lzdatalen<0) || (newsize<0) || (lzextralen<0)) {
|
if((ctrllen<0) || (lzdatalen<0) || (newsize<0) || (lzextralen<0)) {
|
||||||
fprintf(stderr, "1: Corrupt patch\n");
|
fprintf(stderr, "1: Corrupt patch\n");
|
||||||
|
@ -122,18 +123,38 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
|
||||||
*_newsize = newsize;
|
*_newsize = newsize;
|
||||||
|
|
||||||
/* Allocate buffers. */
|
/* Allocate buffers. */
|
||||||
diffdata = malloc(datalen);
|
diffdata = slab_alloc(NULL, datalen);
|
||||||
extradata = malloc(extralen);
|
extradata = slab_alloc(NULL, extralen);
|
||||||
if (diffdata == NULL || extradata == NULL) {
|
if (diffdata == NULL || extradata == NULL) {
|
||||||
fprintf(stderr, "bspatch: Out of memory.\n");
|
fprintf(stderr, "bspatch: Out of memory.\n");
|
||||||
if (diffdata) free(diffdata);
|
if (diffdata) slab_free(NULL, diffdata);
|
||||||
if (extradata) free(extradata);
|
if (extradata) slab_free(NULL, extradata);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Decompress diffdata and extradata. */
|
/* Decompress ctrldata, diffdata and extradata. */
|
||||||
|
if (lzctrllen < ctrllen) {
|
||||||
|
/* Ctrl data will be RLE-d if RLE size is less. */
|
||||||
|
ctrldata = slab_alloc(NULL, ctrllen);
|
||||||
|
if (ctrldata == NULL) {
|
||||||
|
fprintf(stderr, "bspatch: Out of memory.\n");
|
||||||
|
slab_free(NULL, diffdata);
|
||||||
|
slab_free(NULL, extradata);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
len = ctrllen;
|
||||||
|
if (zero_rle_decode(pbuf + hdrsz, lzctrllen, ctrldata, &len) == -1 ||
|
||||||
|
len != ctrllen) {
|
||||||
|
fprintf(stderr, "bspatch: Failed to decompress control data.\n");
|
||||||
|
rv = 0;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ctrldata = pbuf + hdrsz;
|
||||||
|
}
|
||||||
|
|
||||||
len = datalen;
|
len = datalen;
|
||||||
if (zero_rle_decode(pbuf + hdrsz + ctrllen, lzdatalen, diffdata, &len) == -1 ||
|
if (zero_rle_decode(pbuf + hdrsz + lzctrllen, lzdatalen, diffdata, &len) == -1 ||
|
||||||
len != datalen) {
|
len != datalen) {
|
||||||
fprintf(stderr, "bspatch: Failed to decompress diff data.\n");
|
fprintf(stderr, "bspatch: Failed to decompress diff data.\n");
|
||||||
rv = 0;
|
rv = 0;
|
||||||
|
@ -142,14 +163,14 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
|
||||||
datalen = len;
|
datalen = len;
|
||||||
|
|
||||||
len = extralen;
|
len = extralen;
|
||||||
if (zero_rle_decode(pbuf + hdrsz + ctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
|
if (zero_rle_decode(pbuf + hdrsz + lzctrllen + lzdatalen, lzextralen, extradata, &len) == -1 ||
|
||||||
len != extralen) {
|
len != extralen) {
|
||||||
fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
|
fprintf(stderr, "bspatch: Failed to decompress extra data.\n");
|
||||||
rv = 0;
|
rv = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
extralen = len;
|
extralen = len;
|
||||||
BUFOPEN(&cpf, pbuf + hdrsz, ctrllen);
|
BUFOPEN(&cpf, ctrldata, ctrllen);
|
||||||
BUFOPEN(&dpf, diffdata, datalen);
|
BUFOPEN(&dpf, diffdata, datalen);
|
||||||
BUFOPEN(&epf, extradata, extralen);
|
BUFOPEN(&epf, extradata, extralen);
|
||||||
|
|
||||||
|
@ -211,8 +232,10 @@ bspatch(u_char *pbuf, u_char *old, bsize_t oldsize, u_char *new, bsize_t *_newsi
|
||||||
};
|
};
|
||||||
|
|
||||||
out:
|
out:
|
||||||
free(diffdata);
|
if (lzctrllen < ctrllen)
|
||||||
free(extradata);
|
slab_free(NULL, ctrldata);
|
||||||
|
slab_free(NULL, diffdata);
|
||||||
|
slab_free(NULL, extradata);
|
||||||
|
|
||||||
return (rv);
|
return (rv);
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,7 +59,7 @@ zero_rle_encode(const void *const ibuf, const unsigned int ilen,
|
||||||
cnt = 0;
|
cnt = 0;
|
||||||
pos4 = pos1;
|
pos4 = pos1;
|
||||||
state = 1;
|
state = 1;
|
||||||
// Lookahead if have ate least 4 consecutive zeroes
|
// Lookahead if there are at least 4 consecutive zeroes
|
||||||
for (;pos4<ilen && ib[pos4] == 0; pos4++) cnt++;
|
for (;pos4<ilen && ib[pos4] == 0; pos4++) cnt++;
|
||||||
if (cnt >= 4) break;
|
if (cnt >= 4) break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -345,7 +345,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
||||||
*rabin_pos = last_offset;
|
*rabin_pos = last_offset;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
printf("Original size: %lld\n", *size);
|
||||||
// If we found at least a few chunks, perform dedup.
|
// If we found at least a few chunks, perform dedup.
|
||||||
if (blknum > 2) {
|
if (blknum > 2) {
|
||||||
uint64_t prev_cksum;
|
uint64_t prev_cksum;
|
||||||
|
@ -538,8 +538,11 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
||||||
if (rabin_index[blk] & GET_SIMILARITY_FLAG) {
|
if (rabin_index[blk] & GET_SIMILARITY_FLAG) {
|
||||||
old = buf1 + ctx->blocks[j].offset;
|
old = buf1 + ctx->blocks[j].offset;
|
||||||
new = buf1 + ctx->blocks[blk].cksum_n_offset;
|
new = buf1 + ctx->blocks[blk].cksum_n_offset;
|
||||||
|
matchlen = ctx->real_chunksize - *size;
|
||||||
|
|
||||||
bsz = bsdiff(old, ctx->blocks[j].length, new,
|
bsz = bsdiff(old, ctx->blocks[j].length, new,
|
||||||
ctx->blocks[blk].new_length, ctx->cbuf + pos1, 0, 0);
|
ctx->blocks[blk].new_length, ctx->cbuf + pos1,
|
||||||
|
buf1 + *size, matchlen);
|
||||||
if (bsz == 0) {
|
if (bsz == 0) {
|
||||||
memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk].new_length);
|
memcpy(ctx->cbuf + pos1, new, ctx->blocks[blk].new_length);
|
||||||
rabin_index[blk] = htonl(ctx->blocks[blk].new_length);
|
rabin_index[blk] = htonl(ctx->blocks[blk].new_length);
|
||||||
|
@ -572,6 +575,7 @@ cont:
|
||||||
entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE);
|
entries[2] = htonll(pos1 - rabin_index_sz - RABIN_HDR_SIZE);
|
||||||
*size = pos1;
|
*size = pos1;
|
||||||
ctx->valid = 1;
|
ctx->valid = 1;
|
||||||
|
printf("Deduped size: %lld\n", *size);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remaining header entries: size of compressed index and size of
|
* Remaining header entries: size of compressed index and size of
|
||||||
|
|
Loading…
Reference in a new issue