/* * This file is a part of Pcompress, a chunked parallel multi- * algorithm lossless compression and decompression program. * * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved. * Use is subject to license terms. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program. * If not, see . * * moinakg@belenix.org, http://moinakg.wordpress.com/ */ /* * NOTE: * This is a somewhat modified bsdiff implementation. It has been modified * to do buffer to buffer diffing instead of file to file and also use * a custom RLE encoding rather than Bzip2 on the diff output. */ /*- * Copyright 2003-2005 Colin Percival * All rights reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if 0 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $"); #endif #include #include #include #include #include #include #include #include #include #ifdef __USE_SSE_INTRIN__ #include #endif #define __IN_BSDIFF__ #include "bscommon.h" #define MIN(x,y) (((x)<(y)) ? (x) : (y)) static void split(bsize_t *I,bsize_t *V,bsize_t start,bsize_t len,bsize_t h) { bsize_t i,j,k,x,tmp,jj,kk; if(len<16) { for(k=start;kstart) split(I,V,start,jj-start,h); for(i=0;ikk) split(I,V,kk,start+len-kk,h); } static void qsufsort(bsize_t *I,bsize_t *V,u_char *oldbuf,bsize_t oldsize) { bsize_t buckets[257]; bsize_t *bkts; bsize_t i,h,len; #ifdef __USE_SSE_INTRIN__ if (((size_t)buckets & (16 - 1)) == 0) { // 16-byte aligned ? int iters; uchar_t *pos; iters = (256 * sizeof (bsize_t)) / (16 * 4); __m128i zero = _mm_setzero_si128 (); pos = (uchar_t *)buckets; for (i=0; i0;i--) buckets[i]=buckets[i-1]; * buckets[0]=0; * * However the code below uses an array larger by 1 element and is able to * avoid the 3rd loop. */ bkts = &buckets[1]; for(i=0;iy) { *pos=I[st]; return x; } else { *pos=I[en]; return y; } }; x=st+(en-st)/2; if(memcmp(oldbuf+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) { return search(I,oldbuf,oldsize,newbuf,newsize,x,en,pos); } else { return search(I,oldbuf,oldsize,newbuf,newsize,st,x,pos); }; } static void valouti32(bsize_t x, u_char *buf) { int32_t val; val = x; *((int32_t *)buf) = htonl(val); } bsize_t bsdiff(u_char *oldbuf, bsize_t oldsize, u_char *newbuf, bsize_t newsize, u_char *diff, u_char *scratch, bsize_t scratchsize) { bsize_t *I,*V; bsize_t scan,pos,len; bsize_t lastscan,lastpos,lastoffset; bsize_t oldscore,scsc; bsize_t s,Sf,lenf,Sb,lenb; bsize_t overlap,Ss,lens; bsize_t i, rv; bsize_t dblen,eblen; u_char *db,*eb, *cb; u_char buf[sizeof (bsize_t)]; u_char header[48]; unsigned int sz, hdrsz, ulen; bufio_t pf; sz = sizeof (bsize_t); I = (bsize_t *)slab_alloc(NULL, (oldsize+1)*sz); V = (bsize_t *)slab_alloc(NULL, (oldsize+1)*sz); if(I == NULL || V == NULL) return (0); qsufsort(I,V,oldbuf,oldsize); slab_free(NULL, V); if(((db=(u_char *)slab_alloc(NULL, newsize+1))==NULL) || ((eb=(u_char *)slab_alloc(NULL, newsize+1))==NULL)) { fprintf(stderr, "bsdiff: Memory allocation error.\n"); slab_free(NULL, I); slab_free(NULL, V); return (0); } dblen=0; eblen=0; BUFOPEN(&pf, diff, newsize); /* Header is 0 4 compressed length of ctrl block 4 4 actual length of ctrl block 8 4 compressed length of diff block 12 4 actual length of diff block 16 4 compressed length of extra block 20 4 actual length of extra block 24 4 length of new file */ /* File is 0 28 Header 28 ?? ctrl block ?? ?? diff block ?? ?? extra block */ valouti32(0, header); valouti32(0, header + 4); valouti32(0, header + 4*2); valouti32(0, header + 4*3); valouti32(0, header + 4*4); valouti32(0, header + 4*5); valouti32(newsize, header + 4*6); if (BUFWRITE(&pf, header, 4*7) != 4*7) { fprintf(stderr, "bsdiff: Write to compressed buffer failed.\n"); rv = 0; goto out; } hdrsz = 4*7; /* Compute the differences, writing ctrl as we go */ scan=0;len=0; lastscan=0;lastpos=0;lastoffset=0; pos=0; while(scanoldscore+sz)) break; if((scan+lastoffsetSf*2-lenf) { Sf=s; lenf=i; }; }; lenb=0; if(scan=lastscan+i)&&(pos>=i);i++) { s += (oldbuf[pos-i]==newbuf[scan-i]); if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }; }; }; if(lastscan+lenf>scan-lenb) { overlap=(lastscan+lenf)-(scan-lenb); s=0;Ss=0;lens=0; for(i=0;iSs) { Ss=s; lens=i+1; }; }; lenf+=lens-overlap; lenb-=lens; }; for(i=0;i (newsize/2 + newsize/5)) { rv = 0; goto out; } /* Compute uncompressed size of the ctrl data. */ len = BUFTELL(&pf); valouti32(len-hdrsz, header+4); ulen = len-hdrsz; /* If our data can fit in the scratch area use it otherwise alloc. */ if (ulen > scratchsize) { cb = (u_char *)slab_alloc(NULL, ulen); } else { cb = scratch; } /* * Attempt to RLE the ctrl data. If RLE succeeds and produces a smaller * data then retain it. */ BUFSEEK(&pf, hdrsz, SEEK_SET); rv = zero_rle_encode(BUFPTR(&pf), ulen, cb, &ulen); if (rv == 0 && ulen < len-hdrsz) { BUFWRITE(&pf, cb, ulen); } else { BUFSEEK(&pf, len, SEEK_SET); } if (len-hdrsz > scratchsize) { slab_free(NULL, cb); } /* Compute compressed size of ctrl data */ len = BUFTELL(&pf); valouti32(len-hdrsz, header); rv = len; /* Write diff data */ len = newsize - rv; ulen = len; if (zero_rle_encode(db, dblen, BUFPTR(&pf), &ulen) == -1) { rv = 0; goto out; } /* Output size of diff data */ len = ulen; valouti32(len, header + 4*2); valouti32(dblen, header + 4*3); rv += len; BUFSEEK(&pf, len, SEEK_CUR); /* Write extra data */ len = newsize - rv; ulen = len; if (eblen > 0) { if (zero_rle_encode(eb, eblen, BUFPTR(&pf), &ulen) == -1) { rv = 0; goto out; } if (ulen >= eblen) { if (eblen > len) { rv = 0; goto out; } memcpy(BUFPTR(&pf), eb, eblen); ulen = eblen; } } /* Output size of extra data */ len = ulen; valouti32(len, header + 4*4); valouti32(eblen, header + 4*5); rv += len; /* Seek to the beginning, re-write the header.*/ BUFSEEK(&pf, 0, SEEK_SET); BUFWRITE(&pf, header, hdrsz); out: /* Free the memory we used */ slab_free(NULL, db); slab_free(NULL, eb); slab_free(NULL, I); return (rv); }