From 8cfd54fe3401ed649508a0abe5a5385804db3d96 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Mon, 23 Jul 2012 00:15:08 +0530 Subject: [PATCH] Add LZFX Compression support, a very fast lightweight compressor. Avoid a branch in the rabin loop. --- Makefile | 23 ++- lzfx/lzfx.c | 390 +++++++++++++++++++++++++++++++++++++++ lzfx/lzfx.h | 122 ++++++++++++ lzfx_compress.c | 99 ++++++++++ main.c | 8 + pcompress.h | 6 + rabin/rabin_polynomial.c | 22 ++- 7 files changed, 655 insertions(+), 15 deletions(-) create mode 100644 lzfx/lzfx.c create mode 100644 lzfx/lzfx.h create mode 100644 lzfx_compress.c diff --git a/Makefile b/Makefile index 4b092db..1e5d2b9 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ PROG= pcompress MAINSRCS = main.c utils.c allocator.c zlib_compress.c bzip2_compress.c \ - lzma_compress.c ppmd_compress.c adaptive_compress.c + lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c MAINHDRS = allocator.h pcompress.h utils.h MAINOBJS = $(MAINSRCS:.c=.o) @@ -40,6 +40,10 @@ LZMAHDRS = lzma/CpuArch.h lzma/LzFind.h lzma/LzmaEnc.h lzma/Types.h \ lzma/LzHash.h lzma/LzmaDec.h utils.h LZMAOBJS = $(LZMASRCS:.c=.o) +LZFXSRCS = lzfx/lzfx.c +LZFXHDRS = lzfx/lzfx.h +LZFXOBJS = $(LZFXSRCS:.c=.o) + PPMDSRCS = lzma/Ppmd8.c lzma/Ppmd8Enc.c lzma/Ppmd8Dec.c PPMDHDRS = lzma/Ppmd.h lzma/Ppmd8.h PPMDOBJS = $(PPMDSRCS:.c=.o) @@ -48,11 +52,11 @@ CRCSRCS = lzma/crc64_fast.c lzma/crc64_table.c CRCHDRS = lzma/crc64_table_le.h lzma/crc64_table_be.h lzma/crc_macros.h CRCOBJS = $(CRCSRCS:.c=.o) -BAKFILES = *~ lzma/*~ rabin/*~ bsdiff/*~ +BAKFILES = *~ lzma/*~ lzfx/*~ rabin/*~ bsdiff/*~ RM = rm -f -CPPFLAGS = -I. -I./lzma -I./rabin -I./bsdiff -D_7ZIP_ST -DNODEFAULT_PROPS -DFILE_OFFSET_BITS=64 \ - -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 +CPPFLAGS = -I. -I./lzma -I./lzfx -I./rabin -I./bsdiff -D_7ZIP_ST -DNODEFAULT_PROPS \ + -DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 VEC_FLAGS = -ftree-vectorize LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm @@ -92,12 +96,17 @@ $(RABINOBJS): $(RABINSRCS) $(RABINHDRS) $(BSDIFFOBJS): $(BSDIFFSRCS) $(BSDIFFHDRS) $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ +$(LZFXOBJS): $(LZFXSRCS) $(LZFXHDRS) + $(COMPILE) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(MAINOBJS): $(MAINSRCS) $(MAINHDRS) $(COMPILE) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ -$(PROG): $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS) - $(LINK) -o $@ $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS) $(LDLIBS) +$(PROG): $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS) + $(LINK) -o $@ $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) \ + $(LZFXOBJS) $(RABINOBJS) $(BSDIFFOBJS) $(LDLIBS) clean: - $(RM) $(PROG) $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS) $(BAKFILES) + $(RM) $(PROG) $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(CRCOBJS) $(LZFXOBJS) \ + $(RABINOBJS) $(BSDIFFOBJS) $(BAKFILES) diff --git a/lzfx/lzfx.c b/lzfx/lzfx.c new file mode 100644 index 0000000..ca8138e --- /dev/null +++ b/lzfx/lzfx.c @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2009 Andrew Collette + * http://lzfx.googlecode.com + * + * Implements an LZF-compatible compressor/decompressor based on the liblzf + * codebase written by Marc Lehmann. This code is released under the BSD + * license. License and original copyright statement follow. + * + * + * Copyright (c) 2000-2008 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + * + * This is a somewhat modified bsdiff implementation. It has been modified + * to do buffer to buffer diffing instead of file to file and also use + * a custom RLE encoding rather than Bzip2 on the diff output. + */ + +#include "lzfx.h" + +#define LZFX_HSIZE (1 << (LZFX_HLOG)) + +/* We need this for memset */ +#ifdef __cplusplus +# include +#else +# include +#endif + +#if __GNUC__ >= 3 && !DISABLE_EXPECT +# define fx_expect_false(expr) __builtin_expect((expr) != 0, 0) +# define fx_expect_true(expr) __builtin_expect((expr) != 0, 1) +#else +# define fx_expect_false(expr) (expr) +# define fx_expect_true(expr) (expr) +#endif + +typedef unsigned char u8; +typedef const u8 *LZSTATE[LZFX_HSIZE]; + +/* Define the hash function */ +#define LZFX_FRST(p) (((p[0]) << 8) | p[1]) +#define LZFX_NEXT(v,p) (((v) << 8) | p[2]) +#define LZFX_IDX(h) ((( h >> (3*8 - LZFX_HLOG)) - h ) & (LZFX_HSIZE - 1)) + +/* These cannot be changed, as they are related to the compressed format. */ +#define LZFX_MAX_LIT (1 << 5) +#define LZFX_MAX_OFF (1 << 13) +#define LZFX_MAX_REF ((1 << 8) + (1 << 3)) + +static +int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen); + +/* Compressed format + + There are two kinds of structures in LZF/LZFX: literal runs and back + references. The length of a literal run is encoded as L - 1, as it must + contain at least one byte. Literals are encoded as follows: + + 000LLLLL + + Back references are encoded as follows. The smallest possible encoded + length value is 1, as otherwise the control byte would be recognized as + a literal run. Since at least three bytes must match for a back reference + to be inserted, the length is encoded as L - 2 instead of L - 1. The + offset (distance to the desired data in the output buffer) is encoded as + o - 1, as all offsets are at least 1. The binary format is: + + LLLooooo oooooooo for backrefs of real length < 9 (1 <= L < 7) + 111ooooo LLLLLLLL oooooooo for backrefs of real length >= 9 (L > 7) +*/ +#include +int lzfx_compress(const void *const ibuf, const unsigned int ilen, + void *obuf, unsigned int *const olen){ + + /* Hash table; an array of u8*'s which point + to various locations in the input buffer */ + const u8 *htab[LZFX_HSIZE]; + + const u8 **hslot; /* Pointer to entry in hash table */ + unsigned int hval; /* Hash value generated by macros above */ + const u8 *ref; /* Pointer to candidate match location in input */ + + const u8 *ip = (const u8 *)ibuf; + const u8 *const in_end = ip + ilen; + + u8 *op = (u8 *)obuf; + const u8 *const out_end = (olen == NULL ? NULL : op + *olen); + + int lit; /* # of bytes in current literal run */ + +#if defined (WIN32) && defined (_M_X64) + unsigned _int64 off; /* workaround for missing POSIX compliance */ +#else + unsigned long off; +#endif + + if(olen == NULL) return LZFX_EARGS; + if(ibuf == NULL){ + if(ilen != 0) return LZFX_EARGS; + *olen = 0; + return 0; + } + if(obuf == NULL){ + if(olen != 0) return LZFX_EARGS; + return lzfx_getsize(ibuf, ilen, olen); + } + + memset(htab, 0, sizeof(htab)); + + /* Start a literal run. Whenever we do this the output pointer is + advanced because the current byte will hold the encoded length. */ + lit = 0; op++; + + hval = LZFX_FRST(ip); + + while(ip + 2 < in_end){ /* The NEXT macro reads 2 bytes ahead */ + + hval = LZFX_NEXT(hval, ip); + hslot = htab + LZFX_IDX(hval); + + ref = *hslot; *hslot = ip; + + if( ref < ip + && (off = ip - ref - 1) < LZFX_MAX_OFF + && ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */ + && ref > (u8 *)ibuf + && ref[0] == ip[0] + && ref[1] == ip[1] + && ref[2] == ip[2] ) { + + unsigned int len = 3; /* We already know 3 bytes match */ + const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ? + LZFX_MAX_REF : in_end - ip - 2; + + /* lit == 0: op + 3 must be < out_end (because we undo the run) + lit != 0: op + 3 + 1 must be < out_end */ + if(fx_expect_false(op - !lit + 3 + 1 >= out_end)) + return LZFX_ESIZE; + + op [- lit - 1] = lit - 1; /* Terminate literal run */ + op -= !lit; /* Undo run if length is zero */ + + /* Start checking at the fourth byte */ + while (len < maxlen && ref[len] == ip[len]) + len++; + + len -= 2; /* We encode the length as #octets - 2 */ + + /* Format 1: [LLLooooo oooooooo] */ + if (len < 7) { + *op++ = (off >> 8) + (len << 5); + *op++ = off; + + /* Format 2: [111ooooo LLLLLLLL oooooooo] */ + } else { + *op++ = (off >> 8) + (7 << 5); + *op++ = len - 7; + *op++ = off; + } + + lit = 0; op++; + + ip += len + 1; /* ip = initial ip + #octets -1 */ + + if (fx_expect_false (ip + 3 >= in_end)){ + ip++; /* Code following expects exit at bottom of loop */ + break; + } + + hval = LZFX_FRST (ip); + hval = LZFX_NEXT (hval, ip); + htab[LZFX_IDX (hval)] = ip; + + ip++; /* ip = initial ip + #octets */ + + } else { + /* Keep copying literal bytes */ + + if (fx_expect_false (op >= out_end)) return LZFX_ESIZE; + + lit++; *op++ = *ip++; + + if (fx_expect_false (lit == LZFX_MAX_LIT)) { + op [- lit - 1] = lit - 1; /* stop run */ + lit = 0; op++; /* start run */ + } + + } /* if() found match in htab */ + + } /* while(ip < ilen -2) */ + + /* At most 3 bytes remain in input. We therefore need 4 bytes available + in the output buffer to store them (3 data + ctrl byte).*/ + if (op + 3 > out_end) return LZFX_ESIZE; + + while (ip < in_end) { + + lit++; *op++ = *ip++; + + if (fx_expect_false (lit == LZFX_MAX_LIT)){ + op [- lit - 1] = lit - 1; + lit = 0; op++; + } + } + + op [- lit - 1] = lit - 1; + op -= !lit; + + *olen = op - (u8 *)obuf; + return 0; +} + +/* Decompressor */ +int lzfx_decompress(const void* ibuf, unsigned int ilen, + void* obuf, unsigned int *olen){ + + u8 const *ip = (const u8 *)ibuf; + u8 const *const in_end = ip + ilen; + u8 *op = (u8 *)obuf; + u8 const *const out_end = (olen == NULL ? NULL : op + *olen); + + unsigned int remain_len = 0; + int rc; + + if(olen == NULL) return LZFX_EARGS; + if(ibuf == NULL){ + if(ilen != 0) return LZFX_EARGS; + *olen = 0; + return 0; + } + if(obuf == NULL){ + if(olen != 0) return LZFX_EARGS; + return lzfx_getsize(ibuf, ilen, olen); + } + + do { + unsigned int ctrl = *ip++; + + /* Format 000LLLLL: a literal byte string follows, of length L+1 */ + if(ctrl < (1 << 5)) { + + ctrl++; + + if(fx_expect_false(op + ctrl > out_end)){ + --ip; /* Rewind to control byte */ + goto guess; + } + if(fx_expect_false(ip + ctrl > in_end)) return LZFX_ECORRUPT; + + do + *op++ = *ip++; + while(--ctrl); + + /* Format #1 [LLLooooo oooooooo]: backref of length L+1+2 + ^^^^^ ^^^^^^^^ + A B + #2 [111ooooo LLLLLLLL oooooooo] backref of length L+7+2 + ^^^^^ ^^^^^^^^ + A B + In both cases the location of the backref is computed from the + remaining part of the data as follows: + + location = op - A*256 - B - 1 + */ + } else { + + unsigned int len = (ctrl >> 5); + u8 *ref = op - ((ctrl & 0x1f) << 8) -1; + + if(len==7) len += *ip++; /* i.e. format #2 */ + + len += 2; /* len is now #octets */ + + if(fx_expect_false(op + len > out_end)){ + ip -= (len >= 9) ? 2 : 1; /* Rewind to control byte */ + goto guess; + } + if(fx_expect_false(ip >= in_end)) return LZFX_ECORRUPT; + + ref -= *ip++; + + if(fx_expect_false(ref < (u8*)obuf)) return LZFX_ECORRUPT; + + do + *op++ = *ref++; + while (--len); + } + + } while (ip < in_end); + + *olen = op - (u8 *)obuf; + + return 0; + +guess: + rc = lzfx_getsize(ip, ilen - (ip-(u8*)ibuf), &remain_len); + if(rc>=0) *olen = remain_len + (op - (u8*)obuf); + return rc; +} + +/* Guess len. No parameters may be NULL; this is not checked. */ +static +int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen){ + + u8 const *ip = (const u8 *)ibuf; + u8 const *const in_end = ip + ilen; + int tot_len = 0; + + while (ip < in_end) { + + unsigned int ctrl = *ip++; + + if(ctrl < (1 << 5)) { + + ctrl++; + + if(ip + ctrl > in_end) + return LZFX_ECORRUPT; + + tot_len += ctrl; + ip += ctrl; + + } else { + + unsigned int len = (ctrl >> 5); + + if(len==7){ /* i.e. format #2 */ + len += *ip++; + } + + len += 2; /* len is now #octets */ + + if(ip >= in_end) return LZFX_ECORRUPT; + + ip++; /* skip the ref byte */ + + tot_len += len; + + } + + } + + *olen = tot_len; + + return 0; +} + + + + diff --git a/lzfx/lzfx.h b/lzfx/lzfx.h new file mode 100644 index 0000000..3aedd42 --- /dev/null +++ b/lzfx/lzfx.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2009 Andrew Collette + * http://lzfx.googlecode.com + * + * Implements an LZF-compatible compressor/decompressor based on the liblzf + * codebase written by Marc Lehmann. This code is released under the BSD + * license. License and original copyright statement follow. + * + * + * Copyright (c) 2000-2008 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + * + * This is a somewhat modified bsdiff implementation. It has been modified + * to do buffer to buffer diffing instead of file to file and also use + * a custom RLE encoding rather than Bzip2 on the diff output. + */ + + +#ifndef LZFX_H +#define LZFX_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Documented behavior, including function signatures and error codes, + is guaranteed to remain unchanged for releases with the same major + version number. Releases of the same major version are also able + to read each other's output, although the output itself is not + guaranteed to be byte-for-byte identical. +*/ +#define LZFX_VERSION_MAJOR 0 +#define LZFX_VERSION_MINOR 1 +#define LZFX_VERSION_STRING "0.1" + +/* Hashtable size (2**LZFX_HLOG entries) */ +#ifndef LZFX_HLOG +# define LZFX_HLOG 16 +#endif + +/* Predefined errors. */ +#define LZFX_ESIZE -1 /* Output buffer too small */ +#define LZFX_ECORRUPT -2 /* Invalid data for decompression */ +#define LZFX_EARGS -3 /* Arguments invalid (NULL) */ + +/* Buffer-to buffer compression. + + Supply pre-allocated input and output buffers via ibuf and obuf, and + their size in bytes via ilen and olen. Buffers may not overlap. + + On success, the function returns a non-negative value and the argument + olen contains the compressed size in bytes. On failure, a negative + value is returned and olen is not modified. +*/ +int lzfx_compress(const void* ibuf, unsigned int ilen, + void* obuf, unsigned int *olen); + +/* Buffer-to-buffer decompression. + + Supply pre-allocated input and output buffers via ibuf and obuf, and + their size in bytes via ilen and olen. Buffers may not overlap. + + On success, the function returns a non-negative value and the argument + olen contains the uncompressed size in bytes. On failure, a negative + value is returned. + + If the failure code is LZFX_ESIZE, olen contains the minimum buffer size + required to hold the decompressed data. Otherwise, olen is not modified. + + Supplying a zero *olen is a valid and supported strategy to determine the + required buffer size. This does not require decompression of the entire + stream and is consequently very fast. Argument obuf may be NULL in + this case only. +*/ +int lzfx_decompress(const void* ibuf, unsigned int ilen, + void* obuf, unsigned int *olen); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/lzfx_compress.c b/lzfx_compress.c new file mode 100644 index 0000000..045b663 --- /dev/null +++ b/lzfx_compress.c @@ -0,0 +1,99 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + * + * This program includes partly-modified public domain source + * code from the LZMA SDK: http://www.7-zip.org/sdk.html + */ + +#include +#include +#include +#include +#include +#include +#include + +void +lz_fx_stats(int show) +{ +} + +int +lz_fx_init(void **data, int *level, ssize_t chunksize) +{ + if (*level > 9) *level = 9; + if (chunksize > UINT_MAX) { + fprintf(stderr, "Chunk size too big for LZFX.\n"); + return (1); + } + return (0); +} + +void +lz_fx_err(int err) +{ + switch (err) { + case LZFX_ESIZE: + fprintf(stderr, "LZFX: Output buffer too small.\n"); + break; + case LZFX_ECORRUPT: + fprintf(stderr, "LZFX: Corrupt data for decompression.\n"); + break; + case LZFX_EARGS: + fprintf(stderr, "LZFX: Invalid arguments.\n"); + break; + default: + fprintf(stderr, "LZFX: Unknown error code: %d\n", err); + } +} + +int +lz_fx_compress(void *src, size_t srclen, void *dst, size_t *dstlen, + int level, uchar_t chdr, void *data) +{ + int rv; + unsigned int _srclen = srclen; + unsigned int _dstlen = *dstlen; + + rv = lzfx_compress(src, _srclen, dst, &_dstlen); + if (rv == -1) { + lz_fx_err(rv); + return (-1); + } + *dstlen = _dstlen; + + return (0); +} + +int +lz_fx_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, + int level, uchar_t chdr, void *data) +{ + int rv; + unsigned int _srclen = srclen; + unsigned int _dstlen = *dstlen; + + rv = lzfx_decompress(src, _srclen, dst, &_dstlen); + if (rv == -1) { + lz_fx_err(rv); + return (-1); + } + *dstlen = _dstlen; + return (0); +} diff --git a/main.c b/main.c index fa74662..93dea47 100644 --- a/main.c +++ b/main.c @@ -1185,6 +1185,14 @@ init_algo(const char *algo, int bail) _stats_func = ppmd_stats; rv = 0; + } else if (memcmp(algorithm, "lzfx", 4) == 0) { + _compress_func = lz_fx_compress; + _decompress_func = lz_fx_decompress; + _init_func = lz_fx_init; + _deinit_func = NULL; + _stats_func = lz_fx_stats; + rv = 0; + /* adapt2 and adapt ordering of the checks matter here. */ } else if (memcmp(algorithm, "adapt2", 6) == 0) { _compress_func = adapt_compress; diff --git a/pcompress.h b/pcompress.h index 76294b3..f391d25 100644 --- a/pcompress.h +++ b/pcompress.h @@ -65,6 +65,8 @@ extern int adapt_compress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); extern int ppmd_compress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); +extern int lz_fx_compress(void *src, size_t srclen, void *dst, + size_t *dstlen, int level, uchar_t chdr, void *data); extern int zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); @@ -76,6 +78,8 @@ extern int adapt_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); extern int ppmd_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); +extern int lz_fx_decompress(void *src, size_t srclen, void *dst, + size_t *dstlen, int level, uchar_t chdr, void *data); extern int adapt_init(void **data, int *level, ssize_t chunksize); extern int adapt2_init(void **data, int *level, ssize_t chunksize); @@ -83,6 +87,7 @@ extern int lzma_init(void **data, int *level, ssize_t chunksize); extern int ppmd_init(void **data, int *level, ssize_t chunksize); extern int bzip2_init(void **data, int *level, ssize_t chunksize); extern int zlib_init(void **data, int *level, ssize_t chunksize); +extern int lz_fx_init(void **data, int *level, ssize_t chunksize); extern int adapt_deinit(void **data); extern int lzma_deinit(void **data); @@ -93,6 +98,7 @@ extern void ppmd_stats(int show); extern void lzma_stats(int show); extern void bzip2_stats(int show); extern void zlib_stats(int show); +extern void lz_fx_stats(int show); /* * Per-thread data structure for compression and decompression threads. diff --git a/rabin/rabin_polynomial.c b/rabin/rabin_polynomial.c index b9c5e68..489bd3f 100755 --- a/rabin/rabin_polynomial.c +++ b/rabin/rabin_polynomial.c @@ -235,7 +235,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s uint32_t length; uint64_t cur_roll_checksum, cur_sketch; uint64_t *fplist; - uint32_t len1, fpos; + uint32_t len1, fpos[2]; if (rabin_pos == NULL) { /* @@ -245,7 +245,8 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s fplist_sz = 8 * ctx->rabin_poly_avg_block_size; fplist = (uint64_t *)(ctx->cbuf + ctx->real_chunksize - fplist_sz); memset(fplist, 0, fplist_sz); - fpos = 0; + fpos[0] = 0; + fpos[1] = 0; len1 = 0; } length = offset; @@ -321,17 +322,22 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s * http://www.armedia.com/wp/SimilarityIndex.pdf */ len1++; - j = cur_roll_checksum & ctx->rabin_avg_block_mask; - fplist[j] += cur_roll_checksum; - if (fplist[j] > fplist[fpos]) fpos = j; + fpos[1] = cur_roll_checksum & ctx->rabin_avg_block_mask; + fplist[fpos[1]] += cur_roll_checksum; + + /* + * Perform the following statement without branching: + * if (fplist[fpos[1]] > fplist[fpos[0]]) fpos[0] = fpos[1]; + */ + fpos[0] = fpos[(fplist[fpos[1]] > fplist[fpos[0]])]; if (len1 == SKETCH_BASIC_BLOCK_SZ) { /* * Compute the super sketch value by summing all the representative * fingerprints of the block. */ - cur_sketch += fplist[fpos]; + cur_sketch += fplist[fpos[0]]; memset(fplist, 0, fplist_sz); - fpos = 0; + fpos[0] = 0; len1 = 0; } /* @@ -354,7 +360,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s ctx->blocks[blknum].similar = 0; ctx->blocks[blknum].cksum_n_offset = cur_sketch; memset(fplist, 0, fplist_sz); - fpos = 0; + fpos[0] = 0; len1 = 0; cur_sketch = 0; blknum++;