Enable building with alternate Zlib and Bzlib.
Update README and comments. Fix correct setting of output size when using Delta2 without LZP.
This commit is contained in:
parent
5ac47db6d5
commit
fb30b5c295
6 changed files with 132 additions and 45 deletions
10
INSTALL
10
INSTALL
|
@ -76,6 +76,13 @@ not the usual GNU Autoconf script.
|
|||
path to the libbsc source tree must be provided. It
|
||||
links the library statically.
|
||||
|
||||
--with-zlib=<path to zlib installation tree> (Default: System)
|
||||
Enable building against an alternate Zlib installation.
|
||||
|
||||
--with-bzlib=<path to Bzip2 library installation tree> (Default: System)
|
||||
Enable building against an alternate Bzip2 and library
|
||||
installation.
|
||||
|
||||
--help Display the help message.
|
||||
|
||||
Steps for building with libbsc support
|
||||
|
@ -95,4 +102,7 @@ Steps for building with libbsc support
|
|||
4) Now run make in the pcompress directory. This will also run make in
|
||||
the libbsc source directory to build it.
|
||||
|
||||
5) Additional compilation flags can be passed to make like this:
|
||||
make EXTRA_CPPFLAGS=<...> EXTRA_LDFLAGS=<...>
|
||||
|
||||
|
||||
|
|
64
Makefile.in
64
Makefile.in
|
@ -22,16 +22,31 @@
|
|||
#
|
||||
|
||||
PROG= pcompress
|
||||
MAINSRCS = main.c utils/utils.c allocator.c zlib_compress.c bzip2_compress.c \
|
||||
lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c \
|
||||
lz4_compress.c none_compress.c utils/xxhash.c utils/heapq.c utils/cpuid.c \
|
||||
crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \
|
||||
crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c
|
||||
MAINSRCS = main.c utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \
|
||||
adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \
|
||||
utils/xxhash.c utils/heapq.c utils/cpuid.c
|
||||
MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h \
|
||||
utils/cpuid.h crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \
|
||||
crypto/scrypt/sha256.h crypto/scrypt/crypto_aesctr.h crypto/aes/crypto_aes.h
|
||||
utils/cpuid.h
|
||||
MAINOBJS = $(MAINSRCS:.c=.o)
|
||||
|
||||
CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \
|
||||
crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c
|
||||
CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \
|
||||
crypto/scrypt/sha256.h crypto/scrypt/crypto_aesctr.h crypto/aes/crypto_aes.h \
|
||||
$(MAINHDRS)
|
||||
CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o)
|
||||
CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@
|
||||
|
||||
ZLIB_SRCS = zlib_compress.c
|
||||
ZLIB_HDRS = $(MAINHDRS)
|
||||
ZLIB_OBJS = $(ZLIB_SRCS:.c=.o)
|
||||
ZLIB_CPPFLAGS = @LIBZ_INC@
|
||||
|
||||
BZLIB_SRCS = bzip2_compress.c
|
||||
BZLIB_HDRS = $(MAINHDRS)
|
||||
BZLIB_OBJS = $(BZLIB_SRCS:.c=.o)
|
||||
BZLIB_CPPFLAGS = @LIBBZ2_INC@
|
||||
|
||||
RABINSRCS = rabin/rabin_dedup.c
|
||||
RABINHDRS = rabin/rabin_dedup.h utils/utils.h
|
||||
RABINOBJS = $(RABINSRCS:.c=.o)
|
||||
|
@ -130,17 +145,17 @@ RM = rm -f
|
|||
RM_RF = rm -rf
|
||||
COMMON_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT_PROPS \
|
||||
-DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 \
|
||||
-I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein -I./utils -I@OPENSSL_INCDIR@ \
|
||||
-I./crypto/sha2 -I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ \
|
||||
@LIBBZ2_INC@ @LIBZ_INC@ -I./crypto/keccak -I./transpose
|
||||
-I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein -I./utils -I./crypto/sha2 \
|
||||
-I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ \
|
||||
-I./crypto/keccak -I./transpose $(EXTRA_CPPFLAGS)
|
||||
COMMON_VEC_FLAGS = -ftree-vectorize
|
||||
COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
||||
LDLIBS = -ldl -L@LIBBZ2_DIR@ -lbz2 -L@LIBZ_DIR@ -lz -lm @LIBBSCLFLAGS@ \
|
||||
-L@OPENSSL_LIBDIR@ -lcrypto -lrt
|
||||
-L@OPENSSL_LIBDIR@ -lcrypto -lrt $(EXTRA_LDFLAGS)
|
||||
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
|
||||
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \
|
||||
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \
|
||||
$(TRANSP_OBJS)
|
||||
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS)
|
||||
|
||||
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
|
||||
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
|
||||
|
@ -235,6 +250,15 @@ $(LIBBSCWRAPOBJ): $(LIBBSCWRAP) $(LIBBSCLIB)
|
|||
$(TRANSP_OBJS): $(TRANSP_SRCS) $(TRANSP_HDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(CRYPTO_OBJS): $(CRYPTO_SRCS) $(CRYPTO_HDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(CRYPTO_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(ZLIB_OBJS): $(ZLIB_SRCS) $(ZLIB_HDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(ZLIB_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(BZLIB_OBJS): $(BZLIB_SRCS) $(BZLIB_HDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(BZLIB_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(MAINOBJS): $(MAINSRCS) $(MAINHDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
|
@ -253,13 +277,13 @@ distclean: clean
|
|||
$(RM) Makefile
|
||||
|
||||
install: $(PROG)
|
||||
@mkdir -p $(PREFIX)/bin
|
||||
@chmod 0755 $(PREFIX)/bin
|
||||
@cp $(PROG) $(PREFIX)/bin
|
||||
@chmod 0555 $(PREFIX)/bin/$(PROG)
|
||||
@mkdir -p $(PREFIX)/share/doc/$(PROG)
|
||||
@chmod 0755 $(PREFIX)/share $(PREFIX)/share/doc $(PREFIX)/share/doc/$(PROG)
|
||||
@cp README.md $(PREFIX)/share/doc/$(PROG)/README
|
||||
@chmod 0444 $(PREFIX)/share/doc/$(PROG)/README
|
||||
@mkdir -p $(DESTDIR)$(PREFIX)/bin
|
||||
@chmod 0755 $(DESTDIR)$(PREFIX)/bin
|
||||
@cp $(PROG) $(DESTDIR)$(PREFIX)/bin
|
||||
@chmod 0555 $(DESTDIR)$(PREFIX)/bin/$(PROG)
|
||||
@mkdir -p $(DESTDIR)$(PREFIX)/share/doc/$(PROG)
|
||||
@chmod 0755 $(DESTDIR)$(PREFIX)/share $(PREFIX)/share/doc $(PREFIX)/share/doc/$(PROG)
|
||||
@cp README.md $(DESTDIR)$(PREFIX)/share/doc/$(PROG)/README
|
||||
@chmod 0444 $(DESTDIR)$(PREFIX)/share/doc/$(PROG)/README
|
||||
|
||||
|
||||
|
|
26
README.md
26
README.md
|
@ -221,22 +221,28 @@ algorithm can be selected for textual and binary portions.
|
|||
|
||||
Pre-Processing Algorithms
|
||||
=========================
|
||||
As can be seen above a multitude of pre-processing algorithms are available that provide
|
||||
further compression effectiveness beyond what the usual compression algorithms can
|
||||
achieve by themselves. These are summarized below:
|
||||
As can be seen above a multitude of pre-processing algorithms are available that
|
||||
provide further compression effectiveness beyond what the usual compression
|
||||
algorithms can achieve by themselves. These are summarized below:
|
||||
|
||||
1) Deduplication : Per-Chunk (or per-segment) deduplication based on Rabin
|
||||
fingerprinting.
|
||||
|
||||
2) LZP : LZ Prediction is a variant of LZ77 that replaces repeating runs of
|
||||
text with shorter codes.
|
||||
2) Delta Compression : A similarity based (minhash) comparison of Rabin blocks. Two
|
||||
blocks at least 60% similar with each other are diffed using
|
||||
bsdiff.
|
||||
|
||||
3) Adaptive Delta : This is a simple form of Delta Encoding where arithmetic progressions
|
||||
are detected in the data stream and collapsed via Run-Length encoding.
|
||||
3) LZP : LZ Prediction is a variant of LZ77 that replaces repeating
|
||||
runs of text with shorter codes.
|
||||
|
||||
4) Matrix Transpose: This is used automatically in Delta Encoding and Deduplication. This
|
||||
attempts to transpose columnar repeating sequences of bytes into
|
||||
row-wise sequences so that compression algorithms can work better.
|
||||
4) Adaptive Delta : This is a simple form of Delta Encoding where arithmetic
|
||||
progressions are detected in the data stream and collapsed
|
||||
via Run-Length encoding.
|
||||
|
||||
4) Matrix Transpose : This is used automatically in Delta Encoding and Deduplication.
|
||||
This attempts to transpose columnar repeating sequences of
|
||||
bytes into row-wise sequences so that compression algorithms
|
||||
can work better.
|
||||
|
||||
Memory Usage
|
||||
============
|
||||
|
|
57
config
57
config
|
@ -17,6 +17,10 @@ ${prog} [<options>]
|
|||
--with-openssl=<path to OpenSSL installation tree> (Default: System)
|
||||
This defaults to the system's OpenSSL library. You can use this option
|
||||
if you want to use an alternate OpenSSL installation.
|
||||
--with-zlib=<path to zlib installation tree> (Default: System)
|
||||
Enable building against an alternate Zlib installation.
|
||||
--with-bzlib=<path to Bzip2 library installation tree> (Default: System)
|
||||
Enable building against an alternate Bzip2 and library installation.
|
||||
--use-key256 Use 256-bit encryption keys. Default key length is 128-bit.
|
||||
--help Display this help message.
|
||||
|
||||
|
@ -47,6 +51,8 @@ keccak_srcs=
|
|||
keccak_hdrs=
|
||||
keccak_srcs_asm=
|
||||
lto_flag=
|
||||
zlib_prefix=
|
||||
bzlib_prefix=
|
||||
|
||||
# Try a simple compilation
|
||||
cat << _EOF > tst.c
|
||||
|
@ -114,6 +120,12 @@ do
|
|||
--with-openssl=*)
|
||||
openssl_prefix=`echo ${arg1} | cut -f2 -d"="`
|
||||
;;
|
||||
--with-zlib=*)
|
||||
zlib_prefix=`echo ${arg1} | cut -f2 -d"="`
|
||||
;;
|
||||
--with-bzlib=*)
|
||||
bzlib_prefix=`echo ${arg1} | cut -f2 -d"="`
|
||||
;;
|
||||
--use-key256)
|
||||
keylen='-DKEYLEN=32'
|
||||
;;
|
||||
|
@ -263,11 +275,26 @@ fi
|
|||
|
||||
|
||||
# Detect other library packages
|
||||
for libname in "libbz2" "libz"
|
||||
for libspec in "libbz2:${bzlib_prefix}" "libz:${zlib_prefix}"
|
||||
do
|
||||
for lib in "/lib64" "/usr/lib64" "/lib" "/usr/lib" "/lib/x86_64-linux-gnu" \
|
||||
"/usr/lib/x86_64-linux-gnu" \
|
||||
"${prefix}/lib64" "${prefix}/lib" "${prefix}/lib/x86_64-linux-gnu"
|
||||
_OIFS="$IFS"
|
||||
IFS=":"
|
||||
set -- ${libspec}
|
||||
libname=$1
|
||||
pref=$2
|
||||
IFS="$_OIFS"
|
||||
|
||||
use_prefix="${pref}"
|
||||
if [ "x${pref}" = "x" ]
|
||||
then
|
||||
use_prefix="$prefix"
|
||||
fi
|
||||
for lib in "${pref}/lib64" "${pref}/usr/lib64" "${pref}/lib" "${pref}/usr/lib" \
|
||||
"${pref}/lib/x86_64-linux-gnu" "${pref}/usr/lib/x86_64-linux-gnu" \
|
||||
"${pref}/local/lib64" "${pref}/usr/local/lib64" "${pref}/local/lib" "${pref}/usr/local/lib" \
|
||||
"${pref}/local/lib/x86_64-linux-gnu" "${pref}/usr/local/lib/x86_64-linux-gnu" \
|
||||
"${use_prefix}/lib64" "${use_prefix}/lib" "${use_prefix}/lib/x86_64-linux-gnu" \
|
||||
"${use_prefix}/usr/lib/x86_64-linux-gnu"
|
||||
do
|
||||
if [ -d ${lib} ]
|
||||
then
|
||||
|
@ -288,31 +315,49 @@ done
|
|||
|
||||
if [ "x${libbz2_libdir}" = "x" ]
|
||||
then
|
||||
if [ "x$bzlib_prefix" = "x" ]
|
||||
then
|
||||
echo "ERROR: Libbz2 not detected."
|
||||
echo " You may have to install libbz2-devel or libbz2-dev"
|
||||
else
|
||||
echo "ERROR: Bzip2 library not detected in given prefix."
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "x${libz_libdir}" = "x" ]
|
||||
then
|
||||
if [ "x$zlib_prefix" = "x" ]
|
||||
then
|
||||
echo "ERROR: Zlib not detected."
|
||||
echo " You may have to install libz-devel or libz-dev"
|
||||
else
|
||||
echo "ERROR: Zlib not detected in given prefix."
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
libbz2_inc=
|
||||
libz_inc=
|
||||
# Detect other library headers
|
||||
for hdr in "libbz2_inc:bzlib.h" "libz_inc:zlib.h"
|
||||
for hdr in "libbz2_inc:bzlib.h:${bzlib_prefix}" "libz_inc:zlib.h:${zlib_prefix}"
|
||||
do
|
||||
_OIFS="$IFS"
|
||||
IFS=":"
|
||||
set -- ${hdr}
|
||||
var=$1
|
||||
hdrf=$2
|
||||
pref=$3
|
||||
IFS="$_OIFS"
|
||||
|
||||
for inc in "${prefix}/include" "/usr/include"
|
||||
use_prefix="${pref}"
|
||||
if [ "x${pref}" = "x" ]
|
||||
then
|
||||
use_prefix="$prefix"
|
||||
fi
|
||||
for inc in "${pref}/include" "${pref}/usr/include" \
|
||||
"${pref}/local/include" "${pref}/usr/local/include" \
|
||||
"${use_prefix}/include" "${use_prefix}/usr/include"
|
||||
do
|
||||
if [ -d ${inc} ]
|
||||
then
|
||||
|
|
|
@ -34,8 +34,9 @@
|
|||
* objects are output by the encoder:
|
||||
* 1) A literal run of unmodified bytes. Header: 1 zero byte followed
|
||||
* by a 64bit length in bytes.
|
||||
* 2) A literal run of transposed bytes containing at least 87% below
|
||||
* threshold sequences.
|
||||
* 2) A literal run of transposed bytes containing sequences that are
|
||||
* below threshold and the total span of those sequences is at least
|
||||
* 87% of the entire run.
|
||||
* Header: 1 byte stride length with high bit set.
|
||||
* 64bit length of span in bytes.
|
||||
* 3) An encoded run length of a series in arithmetic progression.
|
||||
|
@ -175,7 +176,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
|||
if (gtot1 > 0) {
|
||||
/*
|
||||
* Encode previous literal run, if any. If the literal run
|
||||
* has enough (90%+) large sequences just below threshold,
|
||||
* has enough (87%+) large sequences just below threshold,
|
||||
* do a matrix transpose on the range in the hope of achieving
|
||||
* a better compression ratio.
|
||||
*/
|
||||
|
|
1
main.c
1
main.c
|
@ -272,6 +272,7 @@ preproc_decompress(compress_func_ptr dec_func, void *src, uint64_t srclen, void
|
|||
if (result != -1) {
|
||||
memcpy(src, dst, _dstlen);
|
||||
srclen = _dstlen;
|
||||
*dstlen = _dstlen;
|
||||
} else {
|
||||
return (result);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue