/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program.
* If not, see .
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
/*
* NOTE:
* This is a somewhat modified bsdiff implementation. It has been modified
* to do buffer to buffer diffing instead of file to file and also use
* a custom RLE encoding rather than Bzip2 on the diff output.
*/
/*-
* Copyright 2003-2005 Colin Percival
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#if 0
__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $");
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef __USE_SSE_INTRIN__
#include
#endif
#define __IN_BSDIFF__
#include "bscommon.h"
#define BDIFF_MIN(x,y) (((x)<(y)) ? (x) : (y))
static void split(bsize_t *I,bsize_t *V,bsize_t start,bsize_t len,bsize_t h)
{
bsize_t i,j,k,x,tmp,jj,kk;
if(len<16) {
for(k=start;kstart) split(I,V,start,jj-start,h);
for(i=0;ikk) split(I,V,kk,start+len-kk,h);
}
static void qsufsort(bsize_t *I,bsize_t *V,u_char *oldbuf,bsize_t oldsize)
{
bsize_t buckets[257];
bsize_t *bkts;
bsize_t i,h,len;
#ifdef __USE_SSE_INTRIN__
if (((size_t)buckets & (16 - 1)) == 0) { // 16-byte aligned ?
int iters;
uchar_t *pos;
iters = (256 * sizeof (bsize_t)) / (16 * 4);
__m128i zero = _mm_setzero_si128 ();
pos = (uchar_t *)buckets;
for (i=0; i0;i--) buckets[i]=buckets[i-1];
* buckets[0]=0;
*
* However the code below uses an array larger by 1 element and is able to
* avoid the 3rd loop.
*/
bkts = &buckets[1];
for(i=0;iy) {
*pos=I[st];
return x;
} else {
*pos=I[en];
return y;
}
};
x=st+(en-st)/2;
if(memcmp(oldbuf+I[x],newbuf,BDIFF_MIN(oldsize-I[x],newsize))<0) {
return search(I,oldbuf,oldsize,newbuf,newsize,x,en,pos);
} else {
return search(I,oldbuf,oldsize,newbuf,newsize,st,x,pos);
};
}
static void
valouti32(bsize_t x, u_char *buf)
{
int32_t val;
val = x;
I32_P(buf) = htonl(val);
}
bsize_t
bsdiff(u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t newsize,
u_char *diff, u_char *scratch, bsize_t scratchsize)
{
bsize_t *I,*V;
bsize_t scan,pos,len;
bsize_t lastscan,lastpos,lastoffset;
bsize_t oldscore,scsc;
bsize_t s,Sf,lenf,Sb,lenb;
bsize_t overlap,Ss,lens;
bsize_t i, rv;
bsize_t dblen,eblen;
u_char *db,*eb, *cb;
u_char buf[sizeof (bsize_t)];
u_char header[48];
unsigned int sz, hdrsz, ulen;
bufio_t pf;
sz = sizeof (bsize_t);
I = (bsize_t *)slab_alloc(NULL, (oldsize+1)*sz);
V = (bsize_t *)slab_alloc(NULL, (oldsize+1)*sz);
if(I == NULL || V == NULL) return (0);
qsufsort(I,V,oldbuf,oldsize);
slab_free(NULL, V);
if(((db=(u_char *)slab_alloc(NULL, newsize+1))==NULL) ||
((eb=(u_char *)slab_alloc(NULL, newsize+1))==NULL)) {
log_msg(LOG_ERR, 0, "bsdiff: Memory allocation error.\n");
slab_free(NULL, I);
slab_free(NULL, V);
return (0);
}
dblen=0;
eblen=0;
BUFOPEN(&pf, diff, newsize);
/* Header is
0 4 compressed length of ctrl block
4 4 actual length of ctrl block
8 4 compressed length of diff block
12 4 actual length of diff block
16 4 compressed length of extra block
20 4 actual length of extra block
24 4 length of new file */
/* File is
0 28 Header
28 ?? ctrl block
?? ?? diff block
?? ?? extra block */
valouti32(0, header);
valouti32(0, header + 4);
valouti32(0, header + 4*2);
valouti32(0, header + 4*3);
valouti32(0, header + 4*4);
valouti32(0, header + 4*5);
valouti32(newsize, header + 4*6);
if (BUFWRITE(&pf, header, 4*7) != 4*7) {
log_msg(LOG_ERR, 0, "bsdiff: Write to compressed buffer failed.\n");
rv = 0;
goto out;
}
hdrsz = 4*7;
/* Compute the differences, writing ctrl as we go */
scan=0;len=0;
lastscan=0;lastpos=0;lastoffset=0;
pos=0;
while(scanoldscore+sz)) break;
if((scan+lastoffsetSf*2-lenf) { Sf=s; lenf=i; };
};
lenb=0;
if(scan=lastscan+i)&&(pos>=i);i++) {
s += (oldbuf[pos-i]==newbuf[scan-i]);
if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
};
};
if(lastscan+lenf>scan-lenb) {
overlap=(lastscan+lenf)-(scan-lenb);
s=0;Ss=0;lens=0;
for(i=0;iSs) { Ss=s; lens=i+1; };
};
lenf+=lens-overlap;
lenb-=lens;
};
for(i=0;i (newsize/2 + newsize/5)) {
rv = 0;
goto out;
}
/* Compute uncompressed size of the ctrl data. */
len = BUFTELL(&pf);
valouti32(len-hdrsz, header+4);
ulen = len-hdrsz;
/* If our data can fit in the scratch area use it otherwise alloc. */
if (ulen > scratchsize) {
cb = (u_char *)slab_alloc(NULL, ulen);
} else {
cb = scratch;
}
/*
* Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller
* data then retain it.
*/
BUFSEEK(&pf, hdrsz, SEEK_SET);
rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen);
if (rv == 0 && ulen < len-hdrsz) {
BUFWRITE(&pf, cb, ulen);
} else {
BUFSEEK(&pf, len, SEEK_SET);
}
if (len-hdrsz > scratchsize) {
slab_free(NULL, cb);
}
/* Compute compressed size of ctrl data */
len = BUFTELL(&pf);
valouti32(len-hdrsz, header);
rv = len;
/* Write diff data */
len = newsize - rv;
ulen = len;
if (zero_rle_encode(db, dblen, BUFPTR(&pf), &ulen) == -1) {
rv = 0;
goto out;
}
/* Output size of diff data */
len = ulen;
valouti32(len, header + 4*2);
valouti32(dblen, header + 4*3);
rv += len;
BUFSEEK(&pf, len, SEEK_CUR);
/* Write extra data */
len = newsize - rv;
ulen = len;
if (eblen > 0) {
if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) {
rv = 0;
goto out;
}
if (ulen >= eblen) {
if (eblen > len) {
rv = 0;
goto out;
}
memcpy(BUFPTR(&pf), eb, eblen);
ulen = eblen;
}
}
/* Output size of extra data */
len = ulen;
valouti32(len, header + 4*4);
valouti32(eblen, header + 4*5);
rv += len;
/* Seek to the beginning, re-write the header.*/
BUFSEEK(&pf, 0, SEEK_SET);
BUFWRITE(&pf, header, hdrsz);
out:
/* Free the memory we used */
slab_free(NULL, db);
slab_free(NULL, eb);
slab_free(NULL, I);
return (rv);
}