Add LZFX Compression support, a very fast lightweight compressor.
Avoid a branch in the rabin loop.
This commit is contained in:
parent
7e14909ad1
commit
8cfd54fe34
7 changed files with 655 additions and 15 deletions
23
Makefile
23
Makefile
|
@ -23,7 +23,7 @@
|
|||
|
||||
PROG= pcompress
|
||||
MAINSRCS = main.c utils.c allocator.c zlib_compress.c bzip2_compress.c \
|
||||
lzma_compress.c ppmd_compress.c adaptive_compress.c
|
||||
lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c
|
||||
MAINHDRS = allocator.h pcompress.h utils.h
|
||||
MAINOBJS = $(MAINSRCS:.c=.o)
|
||||
|
||||
|
@ -40,6 +40,10 @@ LZMAHDRS = lzma/CpuArch.h lzma/LzFind.h lzma/LzmaEnc.h lzma/Types.h \
|
|||
lzma/LzHash.h lzma/LzmaDec.h utils.h
|
||||
LZMAOBJS = $(LZMASRCS:.c=.o)
|
||||
|
||||
LZFXSRCS = lzfx/lzfx.c
|
||||
LZFXHDRS = lzfx/lzfx.h
|
||||
LZFXOBJS = $(LZFXSRCS:.c=.o)
|
||||
|
||||
PPMDSRCS = lzma/Ppmd8.c lzma/Ppmd8Enc.c lzma/Ppmd8Dec.c
|
||||
PPMDHDRS = lzma/Ppmd.h lzma/Ppmd8.h
|
||||
PPMDOBJS = $(PPMDSRCS:.c=.o)
|
||||
|
@ -48,11 +52,11 @@ CRCSRCS = lzma/crc64_fast.c lzma/crc64_table.c
|
|||
CRCHDRS = lzma/crc64_table_le.h lzma/crc64_table_be.h lzma/crc_macros.h
|
||||
CRCOBJS = $(CRCSRCS:.c=.o)
|
||||
|
||||
BAKFILES = *~ lzma/*~ rabin/*~ bsdiff/*~
|
||||
BAKFILES = *~ lzma/*~ lzfx/*~ rabin/*~ bsdiff/*~
|
||||
|
||||
RM = rm -f
|
||||
CPPFLAGS = -I. -I./lzma -I./rabin -I./bsdiff -D_7ZIP_ST -DNODEFAULT_PROPS -DFILE_OFFSET_BITS=64 \
|
||||
-D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32
|
||||
CPPFLAGS = -I. -I./lzma -I./lzfx -I./rabin -I./bsdiff -D_7ZIP_ST -DNODEFAULT_PROPS \
|
||||
-DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32
|
||||
VEC_FLAGS = -ftree-vectorize
|
||||
LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
||||
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm
|
||||
|
@ -92,12 +96,17 @@ $(RABINOBJS): $(RABINSRCS) $(RABINHDRS)
|
|||
$(BSDIFFOBJS): $(BSDIFFSRCS) $(BSDIFFHDRS)
|
||||
$(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(LZFXOBJS): $(LZFXSRCS) $(LZFXHDRS)
|
||||
$(COMPILE) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(MAINOBJS): $(MAINSRCS) $(MAINHDRS)
|
||||
$(COMPILE) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(PROG): $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS)
|
||||
$(LINK) -o $@ $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS) $(LDLIBS)
|
||||
$(PROG): $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS)
|
||||
$(LINK) -o $@ $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) \
|
||||
$(LZFXOBJS) $(RABINOBJS) $(BSDIFFOBJS) $(LDLIBS)
|
||||
|
||||
clean:
|
||||
$(RM) $(PROG) $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS) $(BAKFILES)
|
||||
$(RM) $(PROG) $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) $(LZFXOBJS) \
|
||||
$(RABINOBJS) $(BSDIFFOBJS) $(BAKFILES)
|
||||
|
||||
|
|
390
lzfx/lzfx.c
Normal file
390
lzfx/lzfx.c
Normal file
|
@ -0,0 +1,390 @@
|
|||
/*
|
||||
* Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
|
||||
* http://lzfx.googlecode.com
|
||||
*
|
||||
* Implements an LZF-compatible compressor/decompressor based on the liblzf
|
||||
* codebase written by Marc Lehmann. This code is released under the BSD
|
||||
* license. License and original copyright statement follow.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modifica-
|
||||
* tion, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
|
||||
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
|
||||
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
|
||||
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is a part of Pcompress, a chunked parallel multi-
|
||||
* algorithm lossless compression and decompression program.
|
||||
*
|
||||
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||
*
|
||||
* This is a somewhat modified bsdiff implementation. It has been modified
|
||||
* to do buffer to buffer diffing instead of file to file and also use
|
||||
* a custom RLE encoding rather than Bzip2 on the diff output.
|
||||
*/
|
||||
|
||||
#include "lzfx.h"
|
||||
|
||||
#define LZFX_HSIZE (1 << (LZFX_HLOG))
|
||||
|
||||
/* We need this for memset */
|
||||
#ifdef __cplusplus
|
||||
# include <cstring>
|
||||
#else
|
||||
# include <string.h>
|
||||
#endif
|
||||
|
||||
#if __GNUC__ >= 3 && !DISABLE_EXPECT
|
||||
# define fx_expect_false(expr) __builtin_expect((expr) != 0, 0)
|
||||
# define fx_expect_true(expr) __builtin_expect((expr) != 0, 1)
|
||||
#else
|
||||
# define fx_expect_false(expr) (expr)
|
||||
# define fx_expect_true(expr) (expr)
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
typedef const u8 *LZSTATE[LZFX_HSIZE];
|
||||
|
||||
/* Define the hash function */
|
||||
#define LZFX_FRST(p) (((p[0]) << 8) | p[1])
|
||||
#define LZFX_NEXT(v,p) (((v) << 8) | p[2])
|
||||
#define LZFX_IDX(h) ((( h >> (3*8 - LZFX_HLOG)) - h ) & (LZFX_HSIZE - 1))
|
||||
|
||||
/* These cannot be changed, as they are related to the compressed format. */
|
||||
#define LZFX_MAX_LIT (1 << 5)
|
||||
#define LZFX_MAX_OFF (1 << 13)
|
||||
#define LZFX_MAX_REF ((1 << 8) + (1 << 3))
|
||||
|
||||
static
|
||||
int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen);
|
||||
|
||||
/* Compressed format
|
||||
|
||||
There are two kinds of structures in LZF/LZFX: literal runs and back
|
||||
references. The length of a literal run is encoded as L - 1, as it must
|
||||
contain at least one byte. Literals are encoded as follows:
|
||||
|
||||
000LLLLL <L+1 bytes>
|
||||
|
||||
Back references are encoded as follows. The smallest possible encoded
|
||||
length value is 1, as otherwise the control byte would be recognized as
|
||||
a literal run. Since at least three bytes must match for a back reference
|
||||
to be inserted, the length is encoded as L - 2 instead of L - 1. The
|
||||
offset (distance to the desired data in the output buffer) is encoded as
|
||||
o - 1, as all offsets are at least 1. The binary format is:
|
||||
|
||||
LLLooooo oooooooo for backrefs of real length < 9 (1 <= L < 7)
|
||||
111ooooo LLLLLLLL oooooooo for backrefs of real length >= 9 (L > 7)
|
||||
*/
|
||||
#include <stdio.h>
|
||||
int lzfx_compress(const void *const ibuf, const unsigned int ilen,
|
||||
void *obuf, unsigned int *const olen){
|
||||
|
||||
/* Hash table; an array of u8*'s which point
|
||||
to various locations in the input buffer */
|
||||
const u8 *htab[LZFX_HSIZE];
|
||||
|
||||
const u8 **hslot; /* Pointer to entry in hash table */
|
||||
unsigned int hval; /* Hash value generated by macros above */
|
||||
const u8 *ref; /* Pointer to candidate match location in input */
|
||||
|
||||
const u8 *ip = (const u8 *)ibuf;
|
||||
const u8 *const in_end = ip + ilen;
|
||||
|
||||
u8 *op = (u8 *)obuf;
|
||||
const u8 *const out_end = (olen == NULL ? NULL : op + *olen);
|
||||
|
||||
int lit; /* # of bytes in current literal run */
|
||||
|
||||
#if defined (WIN32) && defined (_M_X64)
|
||||
unsigned _int64 off; /* workaround for missing POSIX compliance */
|
||||
#else
|
||||
unsigned long off;
|
||||
#endif
|
||||
|
||||
if(olen == NULL) return LZFX_EARGS;
|
||||
if(ibuf == NULL){
|
||||
if(ilen != 0) return LZFX_EARGS;
|
||||
*olen = 0;
|
||||
return 0;
|
||||
}
|
||||
if(obuf == NULL){
|
||||
if(olen != 0) return LZFX_EARGS;
|
||||
return lzfx_getsize(ibuf, ilen, olen);
|
||||
}
|
||||
|
||||
memset(htab, 0, sizeof(htab));
|
||||
|
||||
/* Start a literal run. Whenever we do this the output pointer is
|
||||
advanced because the current byte will hold the encoded length. */
|
||||
lit = 0; op++;
|
||||
|
||||
hval = LZFX_FRST(ip);
|
||||
|
||||
while(ip + 2 < in_end){ /* The NEXT macro reads 2 bytes ahead */
|
||||
|
||||
hval = LZFX_NEXT(hval, ip);
|
||||
hslot = htab + LZFX_IDX(hval);
|
||||
|
||||
ref = *hslot; *hslot = ip;
|
||||
|
||||
if( ref < ip
|
||||
&& (off = ip - ref - 1) < LZFX_MAX_OFF
|
||||
&& ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */
|
||||
&& ref > (u8 *)ibuf
|
||||
&& ref[0] == ip[0]
|
||||
&& ref[1] == ip[1]
|
||||
&& ref[2] == ip[2] ) {
|
||||
|
||||
unsigned int len = 3; /* We already know 3 bytes match */
|
||||
const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ?
|
||||
LZFX_MAX_REF : in_end - ip - 2;
|
||||
|
||||
/* lit == 0: op + 3 must be < out_end (because we undo the run)
|
||||
lit != 0: op + 3 + 1 must be < out_end */
|
||||
if(fx_expect_false(op - !lit + 3 + 1 >= out_end))
|
||||
return LZFX_ESIZE;
|
||||
|
||||
op [- lit - 1] = lit - 1; /* Terminate literal run */
|
||||
op -= !lit; /* Undo run if length is zero */
|
||||
|
||||
/* Start checking at the fourth byte */
|
||||
while (len < maxlen && ref[len] == ip[len])
|
||||
len++;
|
||||
|
||||
len -= 2; /* We encode the length as #octets - 2 */
|
||||
|
||||
/* Format 1: [LLLooooo oooooooo] */
|
||||
if (len < 7) {
|
||||
*op++ = (off >> 8) + (len << 5);
|
||||
*op++ = off;
|
||||
|
||||
/* Format 2: [111ooooo LLLLLLLL oooooooo] */
|
||||
} else {
|
||||
*op++ = (off >> 8) + (7 << 5);
|
||||
*op++ = len - 7;
|
||||
*op++ = off;
|
||||
}
|
||||
|
||||
lit = 0; op++;
|
||||
|
||||
ip += len + 1; /* ip = initial ip + #octets -1 */
|
||||
|
||||
if (fx_expect_false (ip + 3 >= in_end)){
|
||||
ip++; /* Code following expects exit at bottom of loop */
|
||||
break;
|
||||
}
|
||||
|
||||
hval = LZFX_FRST (ip);
|
||||
hval = LZFX_NEXT (hval, ip);
|
||||
htab[LZFX_IDX (hval)] = ip;
|
||||
|
||||
ip++; /* ip = initial ip + #octets */
|
||||
|
||||
} else {
|
||||
/* Keep copying literal bytes */
|
||||
|
||||
if (fx_expect_false (op >= out_end)) return LZFX_ESIZE;
|
||||
|
||||
lit++; *op++ = *ip++;
|
||||
|
||||
if (fx_expect_false (lit == LZFX_MAX_LIT)) {
|
||||
op [- lit - 1] = lit - 1; /* stop run */
|
||||
lit = 0; op++; /* start run */
|
||||
}
|
||||
|
||||
} /* if() found match in htab */
|
||||
|
||||
} /* while(ip < ilen -2) */
|
||||
|
||||
/* At most 3 bytes remain in input. We therefore need 4 bytes available
|
||||
in the output buffer to store them (3 data + ctrl byte).*/
|
||||
if (op + 3 > out_end) return LZFX_ESIZE;
|
||||
|
||||
while (ip < in_end) {
|
||||
|
||||
lit++; *op++ = *ip++;
|
||||
|
||||
if (fx_expect_false (lit == LZFX_MAX_LIT)){
|
||||
op [- lit - 1] = lit - 1;
|
||||
lit = 0; op++;
|
||||
}
|
||||
}
|
||||
|
||||
op [- lit - 1] = lit - 1;
|
||||
op -= !lit;
|
||||
|
||||
*olen = op - (u8 *)obuf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Decompressor */
|
||||
int lzfx_decompress(const void* ibuf, unsigned int ilen,
|
||||
void* obuf, unsigned int *olen){
|
||||
|
||||
u8 const *ip = (const u8 *)ibuf;
|
||||
u8 const *const in_end = ip + ilen;
|
||||
u8 *op = (u8 *)obuf;
|
||||
u8 const *const out_end = (olen == NULL ? NULL : op + *olen);
|
||||
|
||||
unsigned int remain_len = 0;
|
||||
int rc;
|
||||
|
||||
if(olen == NULL) return LZFX_EARGS;
|
||||
if(ibuf == NULL){
|
||||
if(ilen != 0) return LZFX_EARGS;
|
||||
*olen = 0;
|
||||
return 0;
|
||||
}
|
||||
if(obuf == NULL){
|
||||
if(olen != 0) return LZFX_EARGS;
|
||||
return lzfx_getsize(ibuf, ilen, olen);
|
||||
}
|
||||
|
||||
do {
|
||||
unsigned int ctrl = *ip++;
|
||||
|
||||
/* Format 000LLLLL: a literal byte string follows, of length L+1 */
|
||||
if(ctrl < (1 << 5)) {
|
||||
|
||||
ctrl++;
|
||||
|
||||
if(fx_expect_false(op + ctrl > out_end)){
|
||||
--ip; /* Rewind to control byte */
|
||||
goto guess;
|
||||
}
|
||||
if(fx_expect_false(ip + ctrl > in_end)) return LZFX_ECORRUPT;
|
||||
|
||||
do
|
||||
*op++ = *ip++;
|
||||
while(--ctrl);
|
||||
|
||||
/* Format #1 [LLLooooo oooooooo]: backref of length L+1+2
|
||||
^^^^^ ^^^^^^^^
|
||||
A B
|
||||
#2 [111ooooo LLLLLLLL oooooooo] backref of length L+7+2
|
||||
^^^^^ ^^^^^^^^
|
||||
A B
|
||||
In both cases the location of the backref is computed from the
|
||||
remaining part of the data as follows:
|
||||
|
||||
location = op - A*256 - B - 1
|
||||
*/
|
||||
} else {
|
||||
|
||||
unsigned int len = (ctrl >> 5);
|
||||
u8 *ref = op - ((ctrl & 0x1f) << 8) -1;
|
||||
|
||||
if(len==7) len += *ip++; /* i.e. format #2 */
|
||||
|
||||
len += 2; /* len is now #octets */
|
||||
|
||||
if(fx_expect_false(op + len > out_end)){
|
||||
ip -= (len >= 9) ? 2 : 1; /* Rewind to control byte */
|
||||
goto guess;
|
||||
}
|
||||
if(fx_expect_false(ip >= in_end)) return LZFX_ECORRUPT;
|
||||
|
||||
ref -= *ip++;
|
||||
|
||||
if(fx_expect_false(ref < (u8*)obuf)) return LZFX_ECORRUPT;
|
||||
|
||||
do
|
||||
*op++ = *ref++;
|
||||
while (--len);
|
||||
}
|
||||
|
||||
} while (ip < in_end);
|
||||
|
||||
*olen = op - (u8 *)obuf;
|
||||
|
||||
return 0;
|
||||
|
||||
guess:
|
||||
rc = lzfx_getsize(ip, ilen - (ip-(u8*)ibuf), &remain_len);
|
||||
if(rc>=0) *olen = remain_len + (op - (u8*)obuf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Guess len. No parameters may be NULL; this is not checked. */
|
||||
static
|
||||
int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen){
|
||||
|
||||
u8 const *ip = (const u8 *)ibuf;
|
||||
u8 const *const in_end = ip + ilen;
|
||||
int tot_len = 0;
|
||||
|
||||
while (ip < in_end) {
|
||||
|
||||
unsigned int ctrl = *ip++;
|
||||
|
||||
if(ctrl < (1 << 5)) {
|
||||
|
||||
ctrl++;
|
||||
|
||||
if(ip + ctrl > in_end)
|
||||
return LZFX_ECORRUPT;
|
||||
|
||||
tot_len += ctrl;
|
||||
ip += ctrl;
|
||||
|
||||
} else {
|
||||
|
||||
unsigned int len = (ctrl >> 5);
|
||||
|
||||
if(len==7){ /* i.e. format #2 */
|
||||
len += *ip++;
|
||||
}
|
||||
|
||||
len += 2; /* len is now #octets */
|
||||
|
||||
if(ip >= in_end) return LZFX_ECORRUPT;
|
||||
|
||||
ip++; /* skip the ref byte */
|
||||
|
||||
tot_len += len;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
*olen = tot_len;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
122
lzfx/lzfx.h
Normal file
122
lzfx/lzfx.h
Normal file
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
|
||||
* http://lzfx.googlecode.com
|
||||
*
|
||||
* Implements an LZF-compatible compressor/decompressor based on the liblzf
|
||||
* codebase written by Marc Lehmann. This code is released under the BSD
|
||||
* license. License and original copyright statement follow.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modifica-
|
||||
* tion, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
|
||||
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
|
||||
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
|
||||
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
* This file is a part of Pcompress, a chunked parallel multi-
|
||||
* algorithm lossless compression and decompression program.
|
||||
*
|
||||
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||
*
|
||||
* This is a somewhat modified bsdiff implementation. It has been modified
|
||||
* to do buffer to buffer diffing instead of file to file and also use
|
||||
* a custom RLE encoding rather than Bzip2 on the diff output.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef LZFX_H
|
||||
#define LZFX_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Documented behavior, including function signatures and error codes,
|
||||
is guaranteed to remain unchanged for releases with the same major
|
||||
version number. Releases of the same major version are also able
|
||||
to read each other's output, although the output itself is not
|
||||
guaranteed to be byte-for-byte identical.
|
||||
*/
|
||||
#define LZFX_VERSION_MAJOR 0
|
||||
#define LZFX_VERSION_MINOR 1
|
||||
#define LZFX_VERSION_STRING "0.1"
|
||||
|
||||
/* Hashtable size (2**LZFX_HLOG entries) */
|
||||
#ifndef LZFX_HLOG
|
||||
# define LZFX_HLOG 16
|
||||
#endif
|
||||
|
||||
/* Predefined errors. */
|
||||
#define LZFX_ESIZE -1 /* Output buffer too small */
|
||||
#define LZFX_ECORRUPT -2 /* Invalid data for decompression */
|
||||
#define LZFX_EARGS -3 /* Arguments invalid (NULL) */
|
||||
|
||||
/* Buffer-to buffer compression.
|
||||
|
||||
Supply pre-allocated input and output buffers via ibuf and obuf, and
|
||||
their size in bytes via ilen and olen. Buffers may not overlap.
|
||||
|
||||
On success, the function returns a non-negative value and the argument
|
||||
olen contains the compressed size in bytes. On failure, a negative
|
||||
value is returned and olen is not modified.
|
||||
*/
|
||||
int lzfx_compress(const void* ibuf, unsigned int ilen,
|
||||
void* obuf, unsigned int *olen);
|
||||
|
||||
/* Buffer-to-buffer decompression.
|
||||
|
||||
Supply pre-allocated input and output buffers via ibuf and obuf, and
|
||||
their size in bytes via ilen and olen. Buffers may not overlap.
|
||||
|
||||
On success, the function returns a non-negative value and the argument
|
||||
olen contains the uncompressed size in bytes. On failure, a negative
|
||||
value is returned.
|
||||
|
||||
If the failure code is LZFX_ESIZE, olen contains the minimum buffer size
|
||||
required to hold the decompressed data. Otherwise, olen is not modified.
|
||||
|
||||
Supplying a zero *olen is a valid and supported strategy to determine the
|
||||
required buffer size. This does not require decompression of the entire
|
||||
stream and is consequently very fast. Argument obuf may be NULL in
|
||||
this case only.
|
||||
*/
|
||||
int lzfx_decompress(const void* ibuf, unsigned int ilen,
|
||||
void* obuf, unsigned int *olen);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
99
lzfx_compress.c
Normal file
99
lzfx_compress.c
Normal file
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* This file is a part of Pcompress, a chunked parallel multi-
|
||||
* algorithm lossless compression and decompression program.
|
||||
*
|
||||
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||
*
|
||||
* This program includes partly-modified public domain source
|
||||
* code from the LZMA SDK: http://www.7-zip.org/sdk.html
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <strings.h>
|
||||
#include <limits.h>
|
||||
#include <utils.h>
|
||||
#include <pcompress.h>
|
||||
#include <lzfx.h>
|
||||
|
||||
void
|
||||
lz_fx_stats(int show)
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
lz_fx_init(void **data, int *level, ssize_t chunksize)
|
||||
{
|
||||
if (*level > 9) *level = 9;
|
||||
if (chunksize > UINT_MAX) {
|
||||
fprintf(stderr, "Chunk size too big for LZFX.\n");
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
lz_fx_err(int err)
|
||||
{
|
||||
switch (err) {
|
||||
case LZFX_ESIZE:
|
||||
fprintf(stderr, "LZFX: Output buffer too small.\n");
|
||||
break;
|
||||
case LZFX_ECORRUPT:
|
||||
fprintf(stderr, "LZFX: Corrupt data for decompression.\n");
|
||||
break;
|
||||
case LZFX_EARGS:
|
||||
fprintf(stderr, "LZFX: Invalid arguments.\n");
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "LZFX: Unknown error code: %d\n", err);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
lz_fx_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
{
|
||||
int rv;
|
||||
unsigned int _srclen = srclen;
|
||||
unsigned int _dstlen = *dstlen;
|
||||
|
||||
rv = lzfx_compress(src, _srclen, dst, &_dstlen);
|
||||
if (rv == -1) {
|
||||
lz_fx_err(rv);
|
||||
return (-1);
|
||||
}
|
||||
*dstlen = _dstlen;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
lz_fx_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
{
|
||||
int rv;
|
||||
unsigned int _srclen = srclen;
|
||||
unsigned int _dstlen = *dstlen;
|
||||
|
||||
rv = lzfx_decompress(src, _srclen, dst, &_dstlen);
|
||||
if (rv == -1) {
|
||||
lz_fx_err(rv);
|
||||
return (-1);
|
||||
}
|
||||
*dstlen = _dstlen;
|
||||
return (0);
|
||||
}
|
8
main.c
8
main.c
|
@ -1185,6 +1185,14 @@ init_algo(const char *algo, int bail)
|
|||
_stats_func = ppmd_stats;
|
||||
rv = 0;
|
||||
|
||||
} else if (memcmp(algorithm, "lzfx", 4) == 0) {
|
||||
_compress_func = lz_fx_compress;
|
||||
_decompress_func = lz_fx_decompress;
|
||||
_init_func = lz_fx_init;
|
||||
_deinit_func = NULL;
|
||||
_stats_func = lz_fx_stats;
|
||||
rv = 0;
|
||||
|
||||
/* adapt2 and adapt ordering of the checks matter here. */
|
||||
} else if (memcmp(algorithm, "adapt2", 6) == 0) {
|
||||
_compress_func = adapt_compress;
|
||||
|
|
|
@ -65,6 +65,8 @@ extern int adapt_compress(void *src, size_t srclen, void *dst,
|
|||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
extern int ppmd_compress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
extern int lz_fx_compress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
|
||||
extern int zlib_decompress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
|
@ -76,6 +78,8 @@ extern int adapt_decompress(void *src, size_t srclen, void *dst,
|
|||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
extern int ppmd_decompress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
extern int lz_fx_decompress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
|
||||
extern int adapt_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int adapt2_init(void **data, int *level, ssize_t chunksize);
|
||||
|
@ -83,6 +87,7 @@ extern int lzma_init(void **data, int *level, ssize_t chunksize);
|
|||
extern int ppmd_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int bzip2_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int zlib_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int lz_fx_init(void **data, int *level, ssize_t chunksize);
|
||||
|
||||
extern int adapt_deinit(void **data);
|
||||
extern int lzma_deinit(void **data);
|
||||
|
@ -93,6 +98,7 @@ extern void ppmd_stats(int show);
|
|||
extern void lzma_stats(int show);
|
||||
extern void bzip2_stats(int show);
|
||||
extern void zlib_stats(int show);
|
||||
extern void lz_fx_stats(int show);
|
||||
|
||||
/*
|
||||
* Per-thread data structure for compression and decompression threads.
|
||||
|
|
|
@ -235,7 +235,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
uint32_t length;
|
||||
uint64_t cur_roll_checksum, cur_sketch;
|
||||
uint64_t *fplist;
|
||||
uint32_t len1, fpos;
|
||||
uint32_t len1, fpos[2];
|
||||
|
||||
if (rabin_pos == NULL) {
|
||||
/*
|
||||
|
@ -245,7 +245,8 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
fplist_sz = 8 * ctx->rabin_poly_avg_block_size;
|
||||
fplist = (uint64_t *)(ctx->cbuf + ctx->real_chunksize - fplist_sz);
|
||||
memset(fplist, 0, fplist_sz);
|
||||
fpos = 0;
|
||||
fpos[0] = 0;
|
||||
fpos[1] = 0;
|
||||
len1 = 0;
|
||||
}
|
||||
length = offset;
|
||||
|
@ -321,17 +322,22 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
* http://www.armedia.com/wp/SimilarityIndex.pdf
|
||||
*/
|
||||
len1++;
|
||||
j = cur_roll_checksum & ctx->rabin_avg_block_mask;
|
||||
fplist[j] += cur_roll_checksum;
|
||||
if (fplist[j] > fplist[fpos]) fpos = j;
|
||||
fpos[1] = cur_roll_checksum & ctx->rabin_avg_block_mask;
|
||||
fplist[fpos[1]] += cur_roll_checksum;
|
||||
|
||||
/*
|
||||
* Perform the following statement without branching:
|
||||
* if (fplist[fpos[1]] > fplist[fpos[0]]) fpos[0] = fpos[1];
|
||||
*/
|
||||
fpos[0] = fpos[(fplist[fpos[1]] > fplist[fpos[0]])];
|
||||
if (len1 == SKETCH_BASIC_BLOCK_SZ) {
|
||||
/*
|
||||
* Compute the super sketch value by summing all the representative
|
||||
* fingerprints of the block.
|
||||
*/
|
||||
cur_sketch += fplist[fpos];
|
||||
cur_sketch += fplist[fpos[0]];
|
||||
memset(fplist, 0, fplist_sz);
|
||||
fpos = 0;
|
||||
fpos[0] = 0;
|
||||
len1 = 0;
|
||||
}
|
||||
/*
|
||||
|
@ -354,7 +360,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
ctx->blocks[blknum].similar = 0;
|
||||
ctx->blocks[blknum].cksum_n_offset = cur_sketch;
|
||||
memset(fplist, 0, fplist_sz);
|
||||
fpos = 0;
|
||||
fpos[0] = 0;
|
||||
len1 = 0;
|
||||
cur_sketch = 0;
|
||||
blknum++;
|
||||
|
|
Loading…
Reference in a new issue