2012-07-22 18:45:08 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
|
|
|
|
* http://lzfx.googlecode.com
|
|
|
|
*
|
|
|
|
* Implements an LZF-compatible compressor/decompressor based on the liblzf
|
|
|
|
* codebase written by Marc Lehmann. This code is released under the BSD
|
|
|
|
* license. License and original copyright statement follow.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without modifica-
|
|
|
|
* tion, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
*
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
|
|
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
|
|
|
|
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
|
|
|
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
|
|
|
|
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
|
|
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
|
|
|
|
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This file is a part of Pcompress, a chunked parallel multi-
|
|
|
|
* algorithm lossless compression and decompression program.
|
|
|
|
*
|
2013-03-07 14:56:48 +00:00
|
|
|
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
|
2012-07-22 18:45:08 +00:00
|
|
|
* Use is subject to license terms.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 3 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
2013-03-07 14:56:48 +00:00
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this program.
|
|
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*
|
2012-07-22 18:45:08 +00:00
|
|
|
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "lzfx.h"
|
2012-07-23 16:13:12 +00:00
|
|
|
#include <allocator.h>
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
/* We need this for memset */
|
|
|
|
#ifdef __cplusplus
|
|
|
|
# include <cstring>
|
|
|
|
#else
|
|
|
|
# include <string.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if __GNUC__ >= 3 && !DISABLE_EXPECT
|
|
|
|
# define fx_expect_false(expr) __builtin_expect((expr) != 0, 0)
|
|
|
|
# define fx_expect_true(expr) __builtin_expect((expr) != 0, 1)
|
|
|
|
#else
|
|
|
|
# define fx_expect_false(expr) (expr)
|
|
|
|
# define fx_expect_true(expr) (expr)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Define the hash function */
|
|
|
|
#define LZFX_FRST(p) (((p[0]) << 8) | p[1])
|
|
|
|
#define LZFX_NEXT(v,p) (((v) << 8) | p[2])
|
2012-07-23 16:13:12 +00:00
|
|
|
#define LZFX_IDX(h, bits) ((( h >> (3*8 - bits)) - h ) & (LZFX_HTAB_SIZE(bits) - 1))
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
/* These cannot be changed, as they are related to the compressed format. */
|
|
|
|
#define LZFX_MAX_LIT (1 << 5)
|
|
|
|
#define LZFX_MAX_OFF (1 << 13)
|
|
|
|
#define LZFX_MAX_REF ((1 << 8) + (1 << 3))
|
|
|
|
|
|
|
|
static
|
|
|
|
int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen);
|
|
|
|
|
|
|
|
/* Compressed format
|
|
|
|
|
|
|
|
There are two kinds of structures in LZF/LZFX: literal runs and back
|
|
|
|
references. The length of a literal run is encoded as L - 1, as it must
|
|
|
|
contain at least one byte. Literals are encoded as follows:
|
|
|
|
|
|
|
|
000LLLLL <L+1 bytes>
|
|
|
|
|
|
|
|
Back references are encoded as follows. The smallest possible encoded
|
|
|
|
length value is 1, as otherwise the control byte would be recognized as
|
|
|
|
a literal run. Since at least three bytes must match for a back reference
|
|
|
|
to be inserted, the length is encoded as L - 2 instead of L - 1. The
|
|
|
|
offset (distance to the desired data in the output buffer) is encoded as
|
|
|
|
o - 1, as all offsets are at least 1. The binary format is:
|
|
|
|
|
|
|
|
LLLooooo oooooooo for backrefs of real length < 9 (1 <= L < 7)
|
|
|
|
111ooooo LLLLLLLL oooooooo for backrefs of real length >= 9 (L > 7)
|
|
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
|
|
int lzfx_compress(const void *const ibuf, const unsigned int ilen,
|
2012-07-23 16:13:12 +00:00
|
|
|
void *obuf, unsigned int *const olen,
|
|
|
|
unsigned int htab_bits){
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
/* Hash table; an array of u8*'s which point
|
|
|
|
to various locations in the input buffer */
|
2012-07-23 16:13:12 +00:00
|
|
|
const u8 **htab;
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
const u8 **hslot; /* Pointer to entry in hash table */
|
|
|
|
unsigned int hval; /* Hash value generated by macros above */
|
|
|
|
const u8 *ref; /* Pointer to candidate match location in input */
|
|
|
|
|
|
|
|
const u8 *ip = (const u8 *)ibuf;
|
|
|
|
const u8 *const in_end = ip + ilen;
|
|
|
|
|
|
|
|
u8 *op = (u8 *)obuf;
|
|
|
|
const u8 *const out_end = (olen == NULL ? NULL : op + *olen);
|
|
|
|
|
|
|
|
int lit; /* # of bytes in current literal run */
|
|
|
|
|
|
|
|
#if defined (WIN32) && defined (_M_X64)
|
|
|
|
unsigned _int64 off; /* workaround for missing POSIX compliance */
|
|
|
|
#else
|
|
|
|
unsigned long off;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if(olen == NULL) return LZFX_EARGS;
|
|
|
|
if(ibuf == NULL){
|
|
|
|
if(ilen != 0) return LZFX_EARGS;
|
|
|
|
*olen = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if(obuf == NULL){
|
|
|
|
if(olen != 0) return LZFX_EARGS;
|
|
|
|
return lzfx_getsize(ibuf, ilen, olen);
|
|
|
|
}
|
|
|
|
|
2012-07-23 16:13:12 +00:00
|
|
|
htab = (const u8 **)slab_calloc(NULL, LZFX_HTAB_SIZE(htab_bits), sizeof (u8*));
|
|
|
|
if (htab == NULL) return LZFX_ENOMEM;
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
/* Start a literal run. Whenever we do this the output pointer is
|
|
|
|
advanced because the current byte will hold the encoded length. */
|
|
|
|
lit = 0; op++;
|
|
|
|
|
|
|
|
hval = LZFX_FRST(ip);
|
|
|
|
|
|
|
|
while(ip + 2 < in_end){ /* The NEXT macro reads 2 bytes ahead */
|
|
|
|
|
|
|
|
hval = LZFX_NEXT(hval, ip);
|
2012-07-23 16:13:12 +00:00
|
|
|
hslot = htab + LZFX_IDX(hval, htab_bits);
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
ref = *hslot; *hslot = ip;
|
|
|
|
|
|
|
|
if( ref < ip
|
|
|
|
&& (off = ip - ref - 1) < LZFX_MAX_OFF
|
|
|
|
&& ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */
|
|
|
|
&& ref > (u8 *)ibuf
|
|
|
|
&& ref[0] == ip[0]
|
|
|
|
&& ref[1] == ip[1]
|
|
|
|
&& ref[2] == ip[2] ) {
|
|
|
|
|
|
|
|
unsigned int len = 3; /* We already know 3 bytes match */
|
|
|
|
const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ?
|
|
|
|
LZFX_MAX_REF : in_end - ip - 2;
|
|
|
|
|
|
|
|
/* lit == 0: op + 3 must be < out_end (because we undo the run)
|
|
|
|
lit != 0: op + 3 + 1 must be < out_end */
|
2012-07-23 16:13:12 +00:00
|
|
|
if(fx_expect_false(op - !lit + 3 + 1 >= out_end)) {
|
|
|
|
slab_free(NULL, htab);
|
2012-07-22 18:45:08 +00:00
|
|
|
return LZFX_ESIZE;
|
2012-07-23 16:13:12 +00:00
|
|
|
}
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
op [- lit - 1] = lit - 1; /* Terminate literal run */
|
|
|
|
op -= !lit; /* Undo run if length is zero */
|
|
|
|
|
|
|
|
/* Start checking at the fourth byte */
|
|
|
|
while (len < maxlen && ref[len] == ip[len])
|
|
|
|
len++;
|
|
|
|
|
|
|
|
len -= 2; /* We encode the length as #octets - 2 */
|
|
|
|
|
|
|
|
/* Format 1: [LLLooooo oooooooo] */
|
|
|
|
if (len < 7) {
|
|
|
|
*op++ = (off >> 8) + (len << 5);
|
|
|
|
*op++ = off;
|
|
|
|
|
|
|
|
/* Format 2: [111ooooo LLLLLLLL oooooooo] */
|
|
|
|
} else {
|
|
|
|
*op++ = (off >> 8) + (7 << 5);
|
|
|
|
*op++ = len - 7;
|
|
|
|
*op++ = off;
|
|
|
|
}
|
|
|
|
|
|
|
|
lit = 0; op++;
|
|
|
|
|
|
|
|
ip += len + 1; /* ip = initial ip + #octets -1 */
|
|
|
|
|
|
|
|
if (fx_expect_false (ip + 3 >= in_end)){
|
|
|
|
ip++; /* Code following expects exit at bottom of loop */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
hval = LZFX_FRST (ip);
|
|
|
|
hval = LZFX_NEXT (hval, ip);
|
2012-07-23 16:13:12 +00:00
|
|
|
htab[LZFX_IDX (hval, htab_bits)] = ip;
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
ip++; /* ip = initial ip + #octets */
|
|
|
|
|
|
|
|
} else {
|
|
|
|
/* Keep copying literal bytes */
|
|
|
|
|
2012-07-23 16:13:12 +00:00
|
|
|
if (fx_expect_false (op >= out_end)) {
|
|
|
|
slab_free(NULL, htab);
|
|
|
|
return LZFX_ESIZE;
|
|
|
|
}
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
lit++; *op++ = *ip++;
|
|
|
|
|
|
|
|
if (fx_expect_false (lit == LZFX_MAX_LIT)) {
|
|
|
|
op [- lit - 1] = lit - 1; /* stop run */
|
|
|
|
lit = 0; op++; /* start run */
|
|
|
|
}
|
|
|
|
|
|
|
|
} /* if() found match in htab */
|
|
|
|
|
|
|
|
} /* while(ip < ilen -2) */
|
|
|
|
|
|
|
|
/* At most 3 bytes remain in input. We therefore need 4 bytes available
|
|
|
|
in the output buffer to store them (3 data + ctrl byte).*/
|
2012-07-23 16:13:12 +00:00
|
|
|
if (op + 3 > out_end) {
|
|
|
|
slab_free(NULL, htab);
|
|
|
|
return LZFX_ESIZE;
|
|
|
|
}
|
2012-07-22 18:45:08 +00:00
|
|
|
|
|
|
|
while (ip < in_end) {
|
|
|
|
|
|
|
|
lit++; *op++ = *ip++;
|
|
|
|
|
|
|
|
if (fx_expect_false (lit == LZFX_MAX_LIT)){
|
|
|
|
op [- lit - 1] = lit - 1;
|
|
|
|
lit = 0; op++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
op [- lit - 1] = lit - 1;
|
|
|
|
op -= !lit;
|
|
|
|
|
|
|
|
*olen = op - (u8 *)obuf;
|
2012-07-23 16:13:12 +00:00
|
|
|
slab_free(NULL, htab);
|
2012-07-22 18:45:08 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Decompressor */
|
|
|
|
int lzfx_decompress(const void* ibuf, unsigned int ilen,
|
|
|
|
void* obuf, unsigned int *olen){
|
|
|
|
|
|
|
|
u8 const *ip = (const u8 *)ibuf;
|
|
|
|
u8 const *const in_end = ip + ilen;
|
|
|
|
u8 *op = (u8 *)obuf;
|
|
|
|
u8 const *const out_end = (olen == NULL ? NULL : op + *olen);
|
|
|
|
|
|
|
|
unsigned int remain_len = 0;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if(olen == NULL) return LZFX_EARGS;
|
|
|
|
if(ibuf == NULL){
|
|
|
|
if(ilen != 0) return LZFX_EARGS;
|
|
|
|
*olen = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if(obuf == NULL){
|
|
|
|
if(olen != 0) return LZFX_EARGS;
|
|
|
|
return lzfx_getsize(ibuf, ilen, olen);
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
unsigned int ctrl = *ip++;
|
|
|
|
|
|
|
|
/* Format 000LLLLL: a literal byte string follows, of length L+1 */
|
|
|
|
if(ctrl < (1 << 5)) {
|
|
|
|
|
|
|
|
ctrl++;
|
|
|
|
|
|
|
|
if(fx_expect_false(op + ctrl > out_end)){
|
|
|
|
--ip; /* Rewind to control byte */
|
|
|
|
goto guess;
|
|
|
|
}
|
|
|
|
if(fx_expect_false(ip + ctrl > in_end)) return LZFX_ECORRUPT;
|
|
|
|
|
|
|
|
do
|
|
|
|
*op++ = *ip++;
|
|
|
|
while(--ctrl);
|
|
|
|
|
|
|
|
/* Format #1 [LLLooooo oooooooo]: backref of length L+1+2
|
|
|
|
^^^^^ ^^^^^^^^
|
|
|
|
A B
|
|
|
|
#2 [111ooooo LLLLLLLL oooooooo] backref of length L+7+2
|
|
|
|
^^^^^ ^^^^^^^^
|
|
|
|
A B
|
|
|
|
In both cases the location of the backref is computed from the
|
|
|
|
remaining part of the data as follows:
|
|
|
|
|
|
|
|
location = op - A*256 - B - 1
|
|
|
|
*/
|
|
|
|
} else {
|
|
|
|
|
|
|
|
unsigned int len = (ctrl >> 5);
|
|
|
|
u8 *ref = op - ((ctrl & 0x1f) << 8) -1;
|
|
|
|
|
|
|
|
if(len==7) len += *ip++; /* i.e. format #2 */
|
|
|
|
|
|
|
|
len += 2; /* len is now #octets */
|
|
|
|
|
|
|
|
if(fx_expect_false(op + len > out_end)){
|
|
|
|
ip -= (len >= 9) ? 2 : 1; /* Rewind to control byte */
|
|
|
|
goto guess;
|
|
|
|
}
|
|
|
|
if(fx_expect_false(ip >= in_end)) return LZFX_ECORRUPT;
|
|
|
|
|
|
|
|
ref -= *ip++;
|
|
|
|
|
|
|
|
if(fx_expect_false(ref < (u8*)obuf)) return LZFX_ECORRUPT;
|
|
|
|
|
|
|
|
do
|
|
|
|
*op++ = *ref++;
|
|
|
|
while (--len);
|
|
|
|
}
|
|
|
|
|
|
|
|
} while (ip < in_end);
|
|
|
|
|
|
|
|
*olen = op - (u8 *)obuf;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
guess:
|
|
|
|
rc = lzfx_getsize(ip, ilen - (ip-(u8*)ibuf), &remain_len);
|
|
|
|
if(rc>=0) *olen = remain_len + (op - (u8*)obuf);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Guess len. No parameters may be NULL; this is not checked. */
|
|
|
|
static
|
|
|
|
int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen){
|
|
|
|
|
|
|
|
u8 const *ip = (const u8 *)ibuf;
|
|
|
|
u8 const *const in_end = ip + ilen;
|
|
|
|
int tot_len = 0;
|
|
|
|
|
|
|
|
while (ip < in_end) {
|
|
|
|
|
|
|
|
unsigned int ctrl = *ip++;
|
|
|
|
|
|
|
|
if(ctrl < (1 << 5)) {
|
|
|
|
|
|
|
|
ctrl++;
|
|
|
|
|
|
|
|
if(ip + ctrl > in_end)
|
|
|
|
return LZFX_ECORRUPT;
|
|
|
|
|
|
|
|
tot_len += ctrl;
|
|
|
|
ip += ctrl;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
unsigned int len = (ctrl >> 5);
|
|
|
|
|
|
|
|
if(len==7){ /* i.e. format #2 */
|
|
|
|
len += *ip++;
|
|
|
|
}
|
|
|
|
|
|
|
|
len += 2; /* len is now #octets */
|
|
|
|
|
|
|
|
if(ip >= in_end) return LZFX_ECORRUPT;
|
|
|
|
|
|
|
|
ip++; /* skip the ref byte */
|
|
|
|
|
|
|
|
tot_len += len;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
*olen = tot_len;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|