pcompress/bsdiff/bsdiff.c
Moinak Ghosh 3888c8d316 Many optimization tweaks
Optimize Rabin Deduplication and Bsdiff
Vectorize XXHash using SE4
2013-01-20 22:02:26 +05:30

461 lines
11 KiB
C

/*-
* Copyright 2003-2005 Colin Percival
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
* This is a somewhat modified bsdiff implementation. It has been modified
* to do buffer to buffer diffing instead of file to file and also use
* a custom RLE encoding rather than Bzip2 on the diff output.
*/
#if 0
__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $");
#endif
#include <sys/types.h>
#include <err.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <allocator.h>
#include <utils.h>
#ifdef __USE_SSE_INTRIN__
#include <emmintrin.h>
#endif
#define __IN_BSDIFF__
#include "bscommon.h"
#define MIN(x,y) (((x)<(y)) ? (x) : (y))
static void split(bsize_t *I,bsize_t *V,bsize_t start,bsize_t len,bsize_t h)
{
bsize_t i,j,k,x,tmp,jj,kk;
if(len<16) {
for(k=start;k<start+len;k+=j) {
j=1;x=V[I[k]+h];
for(i=1;k+i<start+len;i++) {
if(V[I[k+i]+h]<x) {
x=V[I[k+i]+h];
j=0;
};
if(V[I[k+i]+h]==x) {
tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
++j;
};
};
for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
if(j==1) I[k]=-1;
};
return;
};
x=V[I[start+len/2]+h];
jj=0;kk=0;
for(i=start;i<start+len;i++) {
if(V[I[i]+h]<x) ++jj;
if(V[I[i]+h]==x) ++kk;
};
jj+=start;kk+=jj;
i=start;j=0;k=0;
while(i<jj) {
if(V[I[i]+h]<x) {
++i;
} else if(V[I[i]+h]==x) {
tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
++j;
} else {
tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
++k;
};
};
while(jj+j<kk) {
if(V[I[jj+j]+h]==x) {
++j;
} else {
tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
++k;
};
};
if(jj>start) split(I,V,start,jj-start,h);
for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
if(jj==kk-1) I[jj]=-1;
if(start+len>kk) split(I,V,kk,start+len-kk,h);
}
static void qsufsort(bsize_t *I,bsize_t *V,u_char *oldbuf,bsize_t oldsize)
{
bsize_t buckets[257];
bsize_t *bkts;
bsize_t i,h,len;
#ifdef __USE_SSE_INTRIN__
if (((size_t)buckets & (16 - 1)) == 0) { // 16-byte aligned ?
int iters;
uchar_t *pos;
iters = (256 * sizeof (bsize_t)) / (16 * 4);
__m128i zero = _mm_setzero_si128 ();
pos = (uchar_t *)buckets;
for (i=0; i<iters; i++) {
_mm_store_si128((__m128i *)pos, zero);
_mm_store_si128((__m128i *)(pos+16), zero);
_mm_store_si128((__m128i *)(pos+32), zero);
_mm_store_si128((__m128i *)(pos+48), zero);
pos += 64;
}
} else {
#endif
for(i=0;i<256;i++) buckets[i]=0;
#ifdef __USE_SSE_INTRIN__
}
#endif
/* We want to do this:
* for(i=0;i<oldsize;i++) buckets[oldbuf[i]]++;
* for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
* for(i=255;i>0;i--) buckets[i]=buckets[i-1];
* buckets[0]=0;
*
* However the code below uses an array larger by 1 element and is able to
* avoid the 3rd loop.
*/
bkts = &buckets[1];
for(i=0;i<oldsize;i++) bkts[oldbuf[i]]++;
for(i=1;i<256;i++) bkts[i]+=bkts[i-1];
buckets[0]=0;
for(i=0;i<oldsize;i++) I[++buckets[oldbuf[i]]]=i;
I[0]=oldsize;
for(i=0;i<oldsize;i++) V[i]=buckets[oldbuf[i]];
V[oldsize]=0;
for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
I[0]=-1;
for(h=1;I[0]!=-(oldsize+1);h+=h) {
len=0;
for(i=0;i<oldsize+1;) {
if(I[i]<0) {
len-=I[i];
i-=I[i];
} else {
if(len) I[i-len]=-len;
len=V[I[i]]+1-i;
split(I,V,i,len,h);
i+=len;
len=0;
};
};
if(len) I[i-len]=-len;
};
for(i=0;i<oldsize+1;i++) I[V[i]]=i;
}
static bsize_t matchlen(u_char *oldbuf,bsize_t oldsize,u_char *newbuf,bsize_t newsize)
{
bsize_t i;
for(i=0;(i<oldsize)&&(i<newsize);i++)
if(oldbuf[i]!=newbuf[i]) break;
return i;
}
static bsize_t search(bsize_t *I,u_char *oldbuf,bsize_t oldsize,
u_char *newbuf,bsize_t newsize,bsize_t st,bsize_t en,bsize_t *pos)
{
bsize_t x,y;
if(en-st<2) {
x=matchlen(oldbuf+I[st],oldsize-I[st],newbuf,newsize);
y=matchlen(oldbuf+I[en],oldsize-I[en],newbuf,newsize);
if(x>y) {
*pos=I[st];
return x;
} else {
*pos=I[en];
return y;
}
};
x=st+(en-st)/2;
if(memcmp(oldbuf+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
return search(I,oldbuf,oldsize,newbuf,newsize,x,en,pos);
} else {
return search(I,oldbuf,oldsize,newbuf,newsize,st,x,pos);
};
}
static void
valouti32(bsize_t x, u_char *buf)
{
int32_t val;
val = x;
*((int32_t *)buf) = htonl(val);
}
bsize_t
bsdiff(u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t newsize,
u_char *diff, u_char *scratch, bsize_t scratchsize)
{
bsize_t *I,*V;
bsize_t scan,pos,len;
bsize_t lastscan,lastpos,lastoffset;
bsize_t oldscore,scsc;
bsize_t s,Sf,lenf,Sb,lenb;
bsize_t overlap,Ss,lens;
bsize_t i, rv;
bsize_t dblen,eblen;
u_char *db,*eb, *cb;
u_char buf[sizeof (bsize_t)];
u_char header[48];
unsigned int sz, hdrsz, ulen;
bufio_t pf;
sz = sizeof (bsize_t);
I = (bsize_t *)slab_alloc(NULL, (oldsize+1)*sz);
V = (bsize_t *)slab_alloc(NULL, (oldsize+1)*sz);
if(I == NULL || V == NULL) return (0);
qsufsort(I,V,oldbuf,oldsize);
slab_free(NULL, V);
if(((db=(u_char *)slab_alloc(NULL, newsize+1))==NULL) ||
((eb=(u_char *)slab_alloc(NULL, newsize+1))==NULL)) {
fprintf(stderr, "bsdiff: Memory allocation error.\n");
slab_free(NULL, I);
slab_free(NULL, V);
return (0);
}
dblen=0;
eblen=0;
BUFOPEN(&pf, diff, newsize);
/* Header is
0 4 compressed length of ctrl block
4 4 actual length of ctrl block
8 4 compressed length of diff block
12 4 actual length of diff block
16 4 compressed length of extra block
20 4 actual length of extra block
24 4 length of new file */
/* File is
0 28 Header
28 ?? ctrl block
?? ?? diff block
?? ?? extra block */
valouti32(0, header);
valouti32(0, header + 4);
valouti32(0, header + 4*2);
valouti32(0, header + 4*3);
valouti32(0, header + 4*4);
valouti32(0, header + 4*5);
valouti32(newsize, header + 4*6);
if (BUFWRITE(&pf, header, 4*7) != 4*7) {
fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n");
rv = 0;
goto out;
}
hdrsz = 4*7;
/* Compute the differences, writing ctrl as we go */
scan=0;len=0;
lastscan=0;lastpos=0;lastoffset=0;
pos=0;
while(scan<newsize) {
oldscore=0;
for(scsc=scan+=len;scan<newsize;scan++) {
len=search(I,oldbuf,oldsize,newbuf+scan,newsize-scan,
0,oldsize,&pos);
for(;scsc<scan+len;scsc++)
oldscore += ((scsc+lastoffset<oldsize) &&
(oldbuf[scsc+lastoffset] == newbuf[scsc]));
if(((len==oldscore) && (len!=0)) ||
(len>oldscore+sz)) break;
if((scan+lastoffset<oldsize) &&
(oldbuf[scan+lastoffset] == newbuf[scan]))
oldscore--;
};
if((len!=oldscore) || (scan==newsize)) {
s=0;Sf=0;lenf=0;
for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
s += (oldbuf[lastpos+i]==newbuf[lastscan+i]);
++i;
if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
};
lenb=0;
if(scan<newsize) {
s=0;Sb=0;
for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
s += (oldbuf[pos-i]==newbuf[scan-i]);
if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
};
};
if(lastscan+lenf>scan-lenb) {
overlap=(lastscan+lenf)-(scan-lenb);
s=0;Ss=0;lens=0;
for(i=0;i<overlap;i++) {
s += (newbuf[lastscan+lenf-overlap+i]==
oldbuf[lastpos+lenf-overlap+i]);
s -= (newbuf[scan-lenb+i]==oldbuf[pos-lenb+i]);
if(s>Ss) { Ss=s; lens=i+1; };
};
lenf+=lens-overlap;
lenb-=lens;
};
for(i=0;i<lenf;i++)
db[dblen+i]=newbuf[lastscan+i]-oldbuf[lastpos+i];
for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
eb[eblen+i]=newbuf[lastscan+lenf+i];
dblen+=lenf;
eblen+=(scan-lenb)-(lastscan+lenf);
valouti32(lenf, buf);
BUFWRITE(&pf, buf, 4);
valouti32((scan-lenb)-(lastscan+lenf),buf);
BUFWRITE(&pf, buf, 4);
valouti32((pos-lenb)-(lastpos+lenf),buf);
BUFWRITE(&pf, buf, 4);
lastscan=scan-lenb;
lastpos=pos-lenb;
lastoffset=pos-scan;
}
}
if (eblen > (newsize/2 + newsize/5)) {
rv = 0;
goto out;
}
/* Compute uncompressed size of the ctrl data. */
len = BUFTELL(&pf);
valouti32(len-hdrsz, header+4);
ulen = len-hdrsz;
/* If our data can fit in the scratch area use it otherwise alloc. */
if (ulen > scratchsize) {
cb = (u_char *)slab_alloc(NULL, ulen);
} else {
cb = scratch;
}
/*
* Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller
* data then retain it.
*/
BUFSEEK(&pf, hdrsz, SEEK_SET);
rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen);
if (rv == 0 && ulen < len-hdrsz) {
BUFWRITE(&pf, cb, ulen);
} else {
BUFSEEK(&pf, len, SEEK_SET);
}
if (len-hdrsz > scratchsize) {
slab_free(NULL, cb);
}
/* Compute compressed size of ctrl data */
len = BUFTELL(&pf);
valouti32(len-hdrsz, header);
rv = len;
/* Write diff data */
len = newsize - rv;
ulen = len;
if (zero_rle_encode(db, dblen, BUFPTR(&pf), &ulen) == -1) {
rv = 0;
goto out;
}
/* Output size of diff data */
len = ulen;
valouti32(len, header + 4*2);
valouti32(dblen, header + 4*3);
rv += len;
BUFSEEK(&pf, len, SEEK_CUR);
/* Write extra data */
len = newsize - rv;
ulen = len;
if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) {
rv = 0;
goto out;
}
/* Output size of extra data */
len = ulen;
valouti32(len, header + 4*4);
valouti32(eblen, header + 4*5);
rv += len;
/* Seek to the beginning, re-write the header.*/
BUFSEEK(&pf, 0, SEEK_SET);
BUFWRITE(&pf, header, hdrsz);
out:
/* Free the memory we used */
slab_free(NULL, db);
slab_free(NULL, eb);
slab_free(NULL, I);
return (rv);
}