pcompress/bsdiff/bsdiff.c
Moinak Ghosh d49a088eea Fixes and performance improvements for Dedupe Delta Compression
Avoid using fingerprints in minhash computation and fix write amplification
Modify min-heap to use 64bit values
Improve bsdiff performance
Fix pointer comparison in bsdiff
Use 32bit offsets in bsdiff to reduce memory usage
Improve Zero RLE Encoder performance
Add more buffer overflow checks in Zero RLE Decoder
2013-01-13 22:04:59 +05:30

461 lines
11 KiB
C

/*-
* Copyright 2003-2005 Colin Percival
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
* This is a somewhat modified bsdiff implementation. It has been modified
* to do buffer to buffer diffing instead of file to file and also use
* a custom RLE encoding rather than Bzip2 on the diff output.
*/
#if 0
__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $");
#endif
#include <sys/types.h>
#include <err.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <allocator.h>
#include <utils.h>
#ifdef __USE_SSE_INTRIN__
#include <emmintrin.h>
#endif
#define __IN_BSDIFF__
#include "bscommon.h"
#define MIN(x,y) (((x)<(y)) ? (x) : (y))
static void split(bsize_t *I,bsize_t *V,bsize_t start,bsize_t len,bsize_t h)
{
bsize_t i,j,k,x,tmp,jj,kk;
if(len<16) {
for(k=start;k<start+len;k+=j) {
j=1;x=V[I[k]+h];
for(i=1;k+i<start+len;i++) {
if(V[I[k+i]+h]<x) {
x=V[I[k+i]+h];
j=0;
};
if(V[I[k+i]+h]==x) {
tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
j++;
};
};
for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
if(j==1) I[k]=-1;
};
return;
};
x=V[I[start+len/2]+h];
jj=0;kk=0;
for(i=start;i<start+len;i++) {
if(V[I[i]+h]<x) jj++;
if(V[I[i]+h]==x) kk++;
};
jj+=start;kk+=jj;
i=start;j=0;k=0;
while(i<jj) {
if(V[I[i]+h]<x) {
i++;
} else if(V[I[i]+h]==x) {
tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
j++;
} else {
tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
k++;
};
};
while(jj+j<kk) {
if(V[I[jj+j]+h]==x) {
j++;
} else {
tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
k++;
};
};
if(jj>start) split(I,V,start,jj-start,h);
for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
if(jj==kk-1) I[jj]=-1;
if(start+len>kk) split(I,V,kk,start+len-kk,h);
}
static void qsufsort(bsize_t *I,bsize_t *V,u_char *oldbuf,bsize_t oldsize)
{
bsize_t buckets[257];
bsize_t *bkts;
bsize_t i,h,len;
#ifdef __USE_SSE_INTRIN__
if (((size_t)buckets & (16 - 1)) == 0) { // 16-byte aligned ?
int iters;
uchar_t *pos;
iters = (256 * sizeof (bsize_t)) / (16 * 4);
__m128i zero = _mm_setzero_si128 ();
pos = (uchar_t *)buckets;
for (i=0; i<iters; i++) {
_mm_store_si128((__m128i *)pos, zero);
_mm_store_si128((__m128i *)(pos+16), zero);
_mm_store_si128((__m128i *)(pos+32), zero);
_mm_store_si128((__m128i *)(pos+48), zero);
pos += 64;
}
} else {
#endif
for(i=0;i<256;i++) buckets[i]=0;
#ifdef __USE_SSE_INTRIN__
}
#endif
/* We want to do this:
* for(i=0;i<oldsize;i++) buckets[oldbuf[i]]++;
* for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
* for(i=255;i>0;i--) buckets[i]=buckets[i-1];
* buckets[0]=0;
*
* However the code below uses an array larger by 1 element and is able to
* avoid the 3rd loop.
*/
bkts = &buckets[1];
for(i=0;i<oldsize;i++) bkts[oldbuf[i]]++;
for(i=1;i<256;i++) bkts[i]+=bkts[i-1];
buckets[0]=0;
for(i=0;i<oldsize;i++) I[++buckets[oldbuf[i]]]=i;
I[0]=oldsize;
for(i=0;i<oldsize;i++) V[i]=buckets[oldbuf[i]];
V[oldsize]=0;
for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
I[0]=-1;
for(h=1;I[0]!=-(oldsize+1);h+=h) {
len=0;
for(i=0;i<oldsize+1;) {
if(I[i]<0) {
len-=I[i];
i-=I[i];
} else {
if(len) I[i-len]=-len;
len=V[I[i]]+1-i;
split(I,V,i,len,h);
i+=len;
len=0;
};
};
if(len) I[i-len]=-len;
};
for(i=0;i<oldsize+1;i++) I[V[i]]=i;
}
static bsize_t matchlen(u_char *oldbuf,bsize_t oldsize,u_char *newbuf,bsize_t newsize)
{
bsize_t i;
for(i=0;(i<oldsize)&&(i<newsize);i++)
if(oldbuf[i]!=newbuf[i]) break;
return i;
}
static bsize_t search(bsize_t *I,u_char *oldbuf,bsize_t oldsize,
u_char *newbuf,bsize_t newsize,bsize_t st,bsize_t en,bsize_t *pos)
{
bsize_t x,y;
if(en-st<2) {
x=matchlen(oldbuf+I[st],oldsize-I[st],newbuf,newsize);
y=matchlen(oldbuf+I[en],oldsize-I[en],newbuf,newsize);
if(x>y) {
*pos=I[st];
return x;
} else {
*pos=I[en];
return y;
}
};
x=st+(en-st)/2;
if(memcmp(oldbuf+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
return search(I,oldbuf,oldsize,newbuf,newsize,x,en,pos);
} else {
return search(I,oldbuf,oldsize,newbuf,newsize,st,x,pos);
};
}
static void
valouti32(bsize_t x, u_char *buf)
{
int32_t val;
val = x;
*((int32_t *)buf) = htonl(val);
}
bsize_t
bsdiff(u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t newsize,
u_char *diff, u_char *scratch, bsize_t scratchsize)
{
bsize_t *I,*V;
bsize_t scan,pos,len;
bsize_t lastscan,lastpos,lastoffset;
bsize_t oldscore,scsc;
bsize_t s,Sf,lenf,Sb,lenb;
bsize_t overlap,Ss,lens;
bsize_t i, rv;
bsize_t dblen,eblen;
u_char *db,*eb, *cb;
u_char buf[sizeof (bsize_t)];
u_char header[48];
unsigned int sz, hdrsz, ulen;
bufio_t pf;
sz = sizeof (bsize_t);
I = (bsize_t *)slab_alloc(NULL, (oldsize+1)*sz);
V = (bsize_t *)slab_alloc(NULL, (oldsize+1)*sz);
if(I == NULL || V == NULL) return (0);
qsufsort(I,V,oldbuf,oldsize);
slab_free(NULL, V);
if(((db=(u_char *)slab_alloc(NULL, newsize+1))==NULL) ||
((eb=(u_char *)slab_alloc(NULL, newsize+1))==NULL)) {
fprintf(stderr, "bsdiff: Memory allocation error.\n");
slab_free(NULL, I);
slab_free(NULL, V);
return (0);
}
dblen=0;
eblen=0;
BUFOPEN(&pf, diff, newsize);
/* Header is
0 4 compressed length of ctrl block
4 4 actual length of ctrl block
8 4 compressed length of diff block
12 4 actual length of diff block
16 4 compressed length of extra block
20 4 actual length of extra block
24 4 length of new file */
/* File is
0 28 Header
28 ?? ctrl block
?? ?? diff block
?? ?? extra block */
valouti32(0, header);
valouti32(0, header + 4);
valouti32(0, header + 4*2);
valouti32(0, header + 4*3);
valouti32(0, header + 4*4);
valouti32(0, header + 4*5);
valouti32(newsize, header + 4*6);
if (BUFWRITE(&pf, header, 4*7) != 4*7) {
fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n");
rv = 0;
goto out;
}
hdrsz = 4*7;
/* Compute the differences, writing ctrl as we go */
scan=0;len=0;
lastscan=0;lastpos=0;lastoffset=0;
pos=0;
while(scan<newsize) {
oldscore=0;
for(scsc=scan+=len;scan<newsize;scan++) {
len=search(I,oldbuf,oldsize,newbuf+scan,newsize-scan,
0,oldsize,&pos);
for(;scsc<scan+len;scsc++)
oldscore += ((scsc+lastoffset<oldsize) &&
(oldbuf[scsc+lastoffset] == newbuf[scsc]));
if(((len==oldscore) && (len!=0)) ||
(len>oldscore+sz)) break;
if((scan+lastoffset<oldsize) &&
(oldbuf[scan+lastoffset] == newbuf[scan]))
oldscore--;
};
if((len!=oldscore) || (scan==newsize)) {
s=0;Sf=0;lenf=0;
for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
s += (oldbuf[lastpos+i]==newbuf[lastscan+i]);
i++;
if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
};
lenb=0;
if(scan<newsize) {
s=0;Sb=0;
for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
s += (oldbuf[pos-i]==newbuf[scan-i]);
if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
};
};
if(lastscan+lenf>scan-lenb) {
overlap=(lastscan+lenf)-(scan-lenb);
s=0;Ss=0;lens=0;
for(i=0;i<overlap;i++) {
s += (newbuf[lastscan+lenf-overlap+i]==
oldbuf[lastpos+lenf-overlap+i]);
s -= (newbuf[scan-lenb+i]==oldbuf[pos-lenb+i]);
if(s>Ss) { Ss=s; lens=i+1; };
};
lenf+=lens-overlap;
lenb-=lens;
};
for(i=0;i<lenf;i++)
db[dblen+i]=newbuf[lastscan+i]-oldbuf[lastpos+i];
for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
eb[eblen+i]=newbuf[lastscan+lenf+i];
dblen+=lenf;
eblen+=(scan-lenb)-(lastscan+lenf);
valouti32(lenf, buf);
BUFWRITE(&pf, buf, 4);
valouti32((scan-lenb)-(lastscan+lenf),buf);
BUFWRITE(&pf, buf, 4);
valouti32((pos-lenb)-(lastpos+lenf),buf);
BUFWRITE(&pf, buf, 4);
lastscan=scan-lenb;
lastpos=pos-lenb;
lastoffset=pos-scan;
}
}
if (eblen > (newsize/2 + newsize/5)) {
rv = 0;
goto out;
}
/* Compute uncompressed size of the ctrl data. */
len = BUFTELL(&pf);
valouti32(len-hdrsz, header+4);
ulen = len-hdrsz;
/* If our data can fit in the scratch area use it otherwise alloc. */
if (ulen > scratchsize) {
cb = (u_char *)slab_alloc(NULL, ulen);
} else {
cb = scratch;
}
/*
* Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller
* data then retain it.
*/
BUFSEEK(&pf, hdrsz, SEEK_SET);
rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen);
if (rv == 0 && ulen < len-hdrsz) {
BUFWRITE(&pf, cb, ulen);
} else {
BUFSEEK(&pf, len, SEEK_SET);
}
if (len-hdrsz > scratchsize) {
slab_free(NULL, cb);
}
/* Compute compressed size of ctrl data */
len = BUFTELL(&pf);
valouti32(len-hdrsz, header);
rv = len;
/* Write diff data */
len = newsize - rv;
ulen = len;
if (zero_rle_encode(db, dblen, BUFPTR(&pf), &ulen) == -1) {
rv = 0;
goto out;
}
/* Output size of diff data */
len = ulen;
valouti32(len, header + 4*2);
valouti32(dblen, header + 4*3);
rv += len;
BUFSEEK(&pf, len, SEEK_CUR);
/* Write extra data */
len = newsize - rv;
ulen = len;
if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) {
rv = 0;
goto out;
}
/* Output size of extra data */
len = ulen;
valouti32(len, header + 4*4);
valouti32(eblen, header + 4*5);
rv += len;
/* Seek to the beginning, re-write the header.*/
BUFSEEK(&pf, 0, SEEK_SET);
BUFWRITE(&pf, header, hdrsz);
out:
/* Free the memory we used */
slab_free(NULL, db);
slab_free(NULL, eb);
slab_free(NULL, I);
return (rv);
}