diff --git a/Makefile.in b/Makefile.in index 9a15e44..f279b31 100644 --- a/Makefile.in +++ b/Makefile.in @@ -97,6 +97,10 @@ LIBBSCLIB = @LIBBSCLIB@ LIBBSCGEN_OPT = -fopenmp LIBBSCCPPFLAGS = -I$(LIBBSCDIR)/libbsc -DENABLE_PC_LIBBSC +TRANSP_SRCS = transpose/transpose.c +TRANSP_HDRS = transpose/transpose.h +TRANSP_OBJS = $(TRANSP_SRCS:.c=.o) + KECCAK_SRC_COMMON = crypto/keccak/genKAT.c crypto/keccak/KeccakDuplex.c \ crypto/keccak/KeccakNISTInterface.c crypto/keccak/KeccakSponge.c KECCAK_SRC_OPT64 = $(KECCAK_SRC_COMMON) crypto/keccak/KeccakF-1600-opt64.c @@ -120,7 +124,7 @@ KECCAK_OBJS_ASM = $(KECCAK_SRCS_ASM:.s=.o) BAKFILES = *~ lzma/*~ lzfx/*~ lz4/*~ rabin/*~ bsdiff/*~ lzp/*~ utils/*~ crypto/sha2/*~ \ crypto/sha2/intel/*~ crypto/aes/*~ crypto/scrypt/*~ crypto/*~ rabin/global/*~ \ - delta2/*~ crypto/keccak/*~ + delta2/*~ crypto/keccak/*~ transpose/*~ RM = rm -f RM_RF = rm -rf @@ -128,14 +132,15 @@ COMMON_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT -DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 \ -I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein -I./utils -I@OPENSSL_INCDIR@ \ -I./crypto/sha2 -I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ \ - @LIBBZ2_INC@ @LIBZ_INC@ -I./crypto/keccak + @LIBBZ2_INC@ @LIBZ_INC@ -I./crypto/keccak -I./transpose COMMON_VEC_FLAGS = -ftree-vectorize COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block LDLIBS = -ldl -L@LIBBZ2_DIR@ -lbz2 -L@LIBZ_DIR@ -lz -lm @LIBBSCLFLAGS@ \ -L@OPENSSL_LIBDIR@ -lcrypto -lrt OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \ $(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \ -$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) +$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \ +$(TRANSP_OBJS) DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ DEBUG_COMPILE = gcc -m64 -g -msse3 -c @@ -227,6 +232,9 @@ $(LIBBSCLIB): $(LIBBSCWRAPOBJ): $(LIBBSCWRAP) $(LIBBSCLIB) $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ +$(TRANSP_OBJS): $(TRANSP_SRCS) $(TRANSP_HDRS) + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(MAINOBJS): $(MAINSRCS) $(MAINHDRS) $(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ diff --git a/main.c b/main.c index 84c0aa6..696a820 100644 --- a/main.c +++ b/main.c @@ -46,6 +46,7 @@ #include #include #include +#include /* * We use 5MB chunks by default. @@ -452,13 +453,21 @@ redo: rv = 0; cmpbuf = cseg + RABIN_HDR_SIZE; ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE; - if (dedupe_index_sz >= 90) { + + if (dedupe_index_sz >= 90 && dedupe_index_sz > dedupe_index_sz_cmp) { /* Index should be at least 90 bytes to have been compressed. */ rv = lzma_decompress(cmpbuf, dedupe_index_sz_cmp, ubuf, &dedupe_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data); } else { memcpy(ubuf, cmpbuf, dedupe_index_sz); } + + /* + * Recover from transposed index. + */ + transpose(ubuf, cmpbuf, dedupe_index_sz, sizeof (uint32_t), COL); + memcpy(ubuf, cmpbuf, dedupe_index_sz); + } else { if (HDR & COMPRESSED) { if (HDR & CHUNK_FLAG_PREPROC) { @@ -1150,48 +1159,57 @@ redo: index_size_cmp = dedupe_index_sz; rv = 0; + + /* + * Do a matrix transpose of the index table with the hope of improving + * compression ratio subsequently. + */ + transpose(tdat->uncompressed_chunk + RABIN_HDR_SIZE, + compressed_chunk + RABIN_HDR_SIZE, dedupe_index_sz, + sizeof (uint32_t), ROW); + memcpy(tdat->uncompressed_chunk + RABIN_HDR_SIZE, + compressed_chunk + RABIN_HDR_SIZE, dedupe_index_sz); + if (dedupe_index_sz >= 90) { /* Compress index if it is at least 90 bytes. */ rv = lzma_compress(tdat->uncompressed_chunk + RABIN_HDR_SIZE, dedupe_index_sz, compressed_chunk + RABIN_HDR_SIZE, &index_size_cmp, tdat->rctx->level, 255, tdat->rctx->lzma_data); + + /* + * If index compression fails or does not produce a smaller result + * retain it as is. In that case compressed size == original size + * and it will be handled correctly during decompression. + */ + if (rv != 0 || index_size_cmp >= dedupe_index_sz) { + index_size_cmp = dedupe_index_sz; + goto plain_index; + } } else { +plain_index: memcpy(compressed_chunk + RABIN_HDR_SIZE, tdat->uncompressed_chunk + RABIN_HDR_SIZE, dedupe_index_sz); } index_size_cmp += RABIN_HDR_SIZE; dedupe_index_sz += RABIN_HDR_SIZE; - if (rv == 0) { - memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE); - /* Compress data chunk. */ - if (lzp_preprocess) { - rv = preproc_compress(tdat->compress, - tdat->uncompressed_chunk + dedupe_index_sz, - _chunksize, compressed_chunk + index_size_cmp, &_chunksize, - tdat->level, 0, tdat->data, tdat->props); - } else { - rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz, - _chunksize, compressed_chunk + index_size_cmp, &_chunksize, - tdat->level, 0, tdat->data); - } - - /* Can't compress data just retain as-is. */ - if (rv < 0) - memcpy(compressed_chunk + index_size_cmp, - tdat->uncompressed_chunk + dedupe_index_sz, _chunksize); - /* Now update rabin header with the compressed sizes. */ - update_dedupe_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, - _chunksize); + memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE); + /* Compress data chunk. */ + if (lzp_preprocess) { + rv = preproc_compress(tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz, + _chunksize, compressed_chunk + index_size_cmp, &_chunksize, + tdat->level, 0, tdat->data, tdat->props); } else { - /* If rabin index compression fails, we just drop down to plain - * compression and avoid dedup. Should be pretty rare case. - */ - tdat->rctx->valid = 0; - memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes); - tdat->rbytes = rbytes; - goto plain_compress; + rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz, _chunksize, + compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, tdat->data); } + + /* Can't compress data just retain as-is. */ + if (rv < 0) + memcpy(compressed_chunk + index_size_cmp, + tdat->uncompressed_chunk + dedupe_index_sz, _chunksize); + /* Now update rabin header with the compressed sizes. */ + update_dedupe_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, _chunksize); _chunksize += index_size_cmp; } else { plain_compress: diff --git a/transpose/transpose.c b/transpose/transpose.c new file mode 100644 index 0000000..760400f --- /dev/null +++ b/transpose/transpose.c @@ -0,0 +1,50 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + */ + +#include "transpose.h" + +/* + * Perform a simple matrix transpose of the given buffer in "from". + * If the buffer contains tables of numbers or structured data a + * transpose can potentially help improve compression ratio by + * bringing repeating values in columns into row ordering. + */ +void +transpose(unsigned char *from, unsigned char *to, uint64_t buflen, uint64_t stride, rowcol_t rc) +{ + uint64_t rows, cols, i, j, k, l; + + if (rc == ROW) { + rows = buflen / stride; + cols = stride; + } else { + cols = buflen / stride; + rows = stride; + } + k = 0; + for (j = 0; j < rows; j++) { + l = 0; + for (i = 0; i < cols; i++) { + to[j + l] = from[i + k]; + l += rows; + } + k += cols; + } +} diff --git a/transpose/transpose.h b/transpose/transpose.h new file mode 100644 index 0000000..6b619dd --- /dev/null +++ b/transpose/transpose.h @@ -0,0 +1,44 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + */ + +#ifndef _TRANSP_H +#define _TRANSP_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + ROW = 0, + COL = 1 +} rowcol_t; + +void transpose(unsigned char *from, unsigned char *to, uint64_t buflen, + uint64_t stride, rowcol_t rc); + +#ifdef __cplusplus +} +#endif + +#endif