diff --git a/Makefile b/Makefile index 513c066..0ea89fc 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ PROG= pcompress MAINSRCS = main.c utils.c allocator.c zlib_compress.c bzip2_compress.c \ lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c \ - lz4_compress.c + lz4_compress.c none_compress.c MAINHDRS = allocator.h pcompress.h utils.h MAINOBJS = $(MAINSRCS:.c=.o) diff --git a/main.c b/main.c index 2b51b0c..2059fe5 100644 --- a/main.c +++ b/main.c @@ -195,47 +195,51 @@ redo: _chunksize = ntohll(*((ssize_t *)rseg)); } - if (HDR & COMPRESSED) { - if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) { - uchar_t *cmpbuf, *ubuf; + if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) { + uchar_t *cmpbuf, *ubuf; - /* Extract various sizes from rabin header. */ - rabin_parse_hdr(cseg, &blknum, &rabin_index_sz, &rabin_data_sz, - &rabin_index_sz_cmp, &rabin_data_sz_cmp, &_chunksize); - memcpy(tdat->uncompressed_chunk, cseg, RABIN_HDR_SIZE); + /* Extract various sizes from rabin header. */ + rabin_parse_hdr(cseg, &blknum, &rabin_index_sz, &rabin_data_sz, + &rabin_index_sz_cmp, &rabin_data_sz_cmp, &_chunksize); + memcpy(tdat->uncompressed_chunk, cseg, RABIN_HDR_SIZE); - /* - * Uncompress the data chunk first and then uncompress the index. - * The uncompress routines can use extra bytes at the end for temporary - * state/dictionary info. Since data chunk directly follows index - * uncompressing index first corrupts the data. - */ - cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp; - ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz; + /* + * Uncompress the data chunk first and then uncompress the index. + * The uncompress routines can use extra bytes at the end for temporary + * state/dictionary info. Since data chunk directly follows index + * uncompressing index first corrupts the data. + */ + cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp; + ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz; + if (HDR & COMPRESSED) { rv = tdat->decompress(cmpbuf, rabin_data_sz_cmp, ubuf, &_chunksize, - tdat->level, HDR, tdat->data); + tdat->level, HDR, tdat->data); if (rv == -1) { tdat->len_cmp = 0; fprintf(stderr, "ERROR: Chunk %d, decompression failed.\n", tdat->id); goto cont; } - - rv = 0; - cmpbuf = cseg + RABIN_HDR_SIZE; - ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE; - if (rabin_index_sz >= 90) { - /* Index should be at least 90 bytes to have been compressed. */ - rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf, - &rabin_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data); - } else { - memcpy(ubuf, cmpbuf, rabin_index_sz); - } } else { - rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk, - &_chunksize, tdat->level, HDR, tdat->data); + memcpy(ubuf, cmpbuf, _chunksize); + } + + rv = 0; + cmpbuf = cseg + RABIN_HDR_SIZE; + ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE; + if (rabin_index_sz >= 90) { + /* Index should be at least 90 bytes to have been compressed. */ + rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf, + &rabin_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data); + } else { + memcpy(ubuf, cmpbuf, rabin_index_sz); } } else { - memcpy(cseg + CHDR_SZ, tdat->uncompressed_chunk, _chunksize); + if (HDR & COMPRESSED) { + rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk, + &_chunksize, tdat->level, HDR, tdat->data); + } else { + memcpy(tdat->uncompressed_chunk, cseg, _chunksize); + } } tdat->len_cmp = _chunksize; @@ -609,6 +613,7 @@ perform_compress(void *dat) { typeof (tdat->chunksize) _chunksize, len_cmp, rabin_index_sz, index_size_cmp; int type, rv; uchar_t *compressed_chunk; + ssize_t rbytes; redo: sem_wait(&tdat->start_sem); @@ -619,18 +624,20 @@ redo: } compressed_chunk = tdat->compressed_chunk + CHDR_SZ; + rbytes = tdat->rbytes; /* Perform Dedup if enabled. */ if (enable_rabin_scan) { rabin_context_t *rctx; - ssize_t rbytes; /* - * Compute checksum of original uncompressed chunk. + * Compute checksum of original uncompressed chunk. When doing dedup + * cmp_seg hold original data instead of uncompressed_chunk. We dedup + * into uncompressed_chunk so that compress transforms uncompressed_chunk + * back into cmp_seg. Avoids an extra memcpy(). */ tdat->crc64 = lzma_crc64(tdat->cmp_seg, tdat->rbytes, 0); rctx = tdat->rctx; - rbytes = tdat->rbytes; reset_rabin_context(tdat->rctx); rctx->cbuf = tdat->uncompressed_chunk; rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL); @@ -653,7 +660,6 @@ redo: if (enable_rabin_scan && tdat->rctx->valid) { _chunksize = tdat->rbytes - rabin_index_sz - RABIN_HDR_SIZE; index_size_cmp = rabin_index_sz; - memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE); rv = 0; if (rabin_index_sz >= 90) { @@ -669,16 +675,31 @@ redo: index_size_cmp += RABIN_HDR_SIZE; rabin_index_sz += RABIN_HDR_SIZE; if (rv == 0) { + memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE); /* Compress data chunk. */ rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz, _chunksize, compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, tdat->data); + + /* Can't compress data just retain as-is. */ + if (rv < 0) + memcpy(compressed_chunk + index_size_cmp, + tdat->uncompressed_chunk + rabin_index_sz, _chunksize); /* Now update rabin header with the compressed sizes. */ rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, _chunksize); + } else { + /* If rabin index compression fails, we just drop down to plain + * compression and avoid dedup. Should be pretty rare case. + */ + tdat->rctx->valid = 0; + memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes); + tdat->rbytes = rbytes; + goto plain_compress; } _chunksize += index_size_cmp; } else { +plain_compress: _chunksize = tdat->rbytes; rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes, compressed_chunk, &_chunksize, tdat->level, 0, tdat->data); @@ -690,8 +711,9 @@ redo: * chunk will be left uncompressed. */ tdat->len_cmp = _chunksize; - if (_chunksize >= tdat->chunksize || rv < 0) { - memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes); + if (_chunksize >= rbytes || rv < 0) { + if (!enable_rabin_scan || !tdat->rctx->valid) + memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes); type = UNCOMPRESSED; tdat->len_cmp = tdat->rbytes; } else { @@ -866,8 +888,9 @@ start_compress(const char *filename, uint64_t chunksize, int level) * Adjust chunk size for small files. We then get an archive with * a single chunk for the entire file. */ - if (sbuf.st_size < chunksize) { + if (sbuf.st_size <= chunksize) { chunksize = sbuf.st_size; + enable_rabin_split = 0; // Do not split for whole files. nthreads = 1; } else { if (nthreads == 0 || nthreads > sbuf.st_size / chunksize) { @@ -1243,6 +1266,14 @@ init_algo(const char *algo, int bail) _stats_func = lz4_stats; rv = 0; + } else if (memcmp(algorithm, "none", 4) == 0) { + _compress_func = none_compress; + _decompress_func = none_decompress; + _init_func = none_init; + _deinit_func = none_deinit; + _stats_func = none_stats; + rv = 0; + /* adapt2 and adapt ordering of the checks matter here. */ } else if (memcmp(algorithm, "adapt2", 6) == 0) { _compress_func = adapt_compress; diff --git a/none_compress.c b/none_compress.c new file mode 100644 index 0000000..3adf5c7 --- /dev/null +++ b/none_compress.c @@ -0,0 +1,64 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + * + * This program includes partly-modified public domain source + * code from the LZMA SDK: http://www.7-zip.org/sdk.html + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +void +none_stats(int show) +{ +} + +int +none_init(void **data, int *level, ssize_t chunksize) +{ + return (0); +} + +int +none_deinit(void **data) +{ + return (0); +} + +int +none_compress(void *src, size_t srclen, void *dst, size_t *dstlen, + int level, uchar_t chdr, void *data) +{ + memcpy(dst, src, srclen); + return (0); +} + +int +none_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, + int level, uchar_t chdr, void *data) +{ + memcpy(dst, src, srclen); + return (0); +} diff --git a/pcompress.h b/pcompress.h index 011f09d..780924d 100644 --- a/pcompress.h +++ b/pcompress.h @@ -72,6 +72,8 @@ extern int lz_fx_compress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); extern int lz4_compress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); +extern int none_compress(void *src, size_t srclen, void *dst, + size_t *dstlen, int level, uchar_t chdr, void *data); extern int zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); @@ -87,6 +89,8 @@ extern int lz_fx_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); extern int lz4_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); +extern int none_decompress(void *src, size_t srclen, void *dst, + size_t *dstlen, int level, uchar_t chdr, void *data); extern int adapt_init(void **data, int *level, ssize_t chunksize); extern int adapt2_init(void **data, int *level, ssize_t chunksize); @@ -96,12 +100,14 @@ extern int bzip2_init(void **data, int *level, ssize_t chunksize); extern int zlib_init(void **data, int *level, ssize_t chunksize); extern int lz_fx_init(void **data, int *level, ssize_t chunksize); extern int lz4_init(void **data, int *level, ssize_t chunksize); +extern int none_init(void **data, int *level, ssize_t chunksize); extern int adapt_deinit(void **data); extern int lzma_deinit(void **data); extern int ppmd_deinit(void **data); extern int lz_fx_deinit(void **data); extern int lz4_deinit(void **data); +extern int none_deinit(void **data); extern void adapt_stats(int show); extern void ppmd_stats(int show); @@ -110,6 +116,7 @@ extern void bzip2_stats(int show); extern void zlib_stats(int show); extern void lz_fx_stats(int show); extern void lz4_stats(int show); +extern void none_stats(int show); /* * Per-thread data structure for compression and decompression threads.