Fix handling of incompressible chunks.
Fix handling of various dedup failures. Add NULL compression option for dedup only compression.
This commit is contained in:
parent
927da81562
commit
a4311f2ede
4 changed files with 140 additions and 38 deletions
2
Makefile
2
Makefile
|
@ -24,7 +24,7 @@
|
|||
PROG= pcompress
|
||||
MAINSRCS = main.c utils.c allocator.c zlib_compress.c bzip2_compress.c \
|
||||
lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c \
|
||||
lz4_compress.c
|
||||
lz4_compress.c none_compress.c
|
||||
MAINHDRS = allocator.h pcompress.h utils.h
|
||||
MAINOBJS = $(MAINSRCS:.c=.o)
|
||||
|
||||
|
|
105
main.c
105
main.c
|
@ -195,47 +195,51 @@ redo:
|
|||
_chunksize = ntohll(*((ssize_t *)rseg));
|
||||
}
|
||||
|
||||
if (HDR & COMPRESSED) {
|
||||
if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) {
|
||||
uchar_t *cmpbuf, *ubuf;
|
||||
if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) {
|
||||
uchar_t *cmpbuf, *ubuf;
|
||||
|
||||
/* Extract various sizes from rabin header. */
|
||||
rabin_parse_hdr(cseg, &blknum, &rabin_index_sz, &rabin_data_sz,
|
||||
&rabin_index_sz_cmp, &rabin_data_sz_cmp, &_chunksize);
|
||||
memcpy(tdat->uncompressed_chunk, cseg, RABIN_HDR_SIZE);
|
||||
/* Extract various sizes from rabin header. */
|
||||
rabin_parse_hdr(cseg, &blknum, &rabin_index_sz, &rabin_data_sz,
|
||||
&rabin_index_sz_cmp, &rabin_data_sz_cmp, &_chunksize);
|
||||
memcpy(tdat->uncompressed_chunk, cseg, RABIN_HDR_SIZE);
|
||||
|
||||
/*
|
||||
* Uncompress the data chunk first and then uncompress the index.
|
||||
* The uncompress routines can use extra bytes at the end for temporary
|
||||
* state/dictionary info. Since data chunk directly follows index
|
||||
* uncompressing index first corrupts the data.
|
||||
*/
|
||||
cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp;
|
||||
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz;
|
||||
/*
|
||||
* Uncompress the data chunk first and then uncompress the index.
|
||||
* The uncompress routines can use extra bytes at the end for temporary
|
||||
* state/dictionary info. Since data chunk directly follows index
|
||||
* uncompressing index first corrupts the data.
|
||||
*/
|
||||
cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp;
|
||||
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz;
|
||||
if (HDR & COMPRESSED) {
|
||||
rv = tdat->decompress(cmpbuf, rabin_data_sz_cmp, ubuf, &_chunksize,
|
||||
tdat->level, HDR, tdat->data);
|
||||
tdat->level, HDR, tdat->data);
|
||||
if (rv == -1) {
|
||||
tdat->len_cmp = 0;
|
||||
fprintf(stderr, "ERROR: Chunk %d, decompression failed.\n", tdat->id);
|
||||
goto cont;
|
||||
}
|
||||
|
||||
rv = 0;
|
||||
cmpbuf = cseg + RABIN_HDR_SIZE;
|
||||
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE;
|
||||
if (rabin_index_sz >= 90) {
|
||||
/* Index should be at least 90 bytes to have been compressed. */
|
||||
rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf,
|
||||
&rabin_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data);
|
||||
} else {
|
||||
memcpy(ubuf, cmpbuf, rabin_index_sz);
|
||||
}
|
||||
} else {
|
||||
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
|
||||
&_chunksize, tdat->level, HDR, tdat->data);
|
||||
memcpy(ubuf, cmpbuf, _chunksize);
|
||||
}
|
||||
|
||||
rv = 0;
|
||||
cmpbuf = cseg + RABIN_HDR_SIZE;
|
||||
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE;
|
||||
if (rabin_index_sz >= 90) {
|
||||
/* Index should be at least 90 bytes to have been compressed. */
|
||||
rv = lzma_decompress(cmpbuf, rabin_index_sz_cmp, ubuf,
|
||||
&rabin_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data);
|
||||
} else {
|
||||
memcpy(ubuf, cmpbuf, rabin_index_sz);
|
||||
}
|
||||
} else {
|
||||
memcpy(cseg + CHDR_SZ, tdat->uncompressed_chunk, _chunksize);
|
||||
if (HDR & COMPRESSED) {
|
||||
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
|
||||
&_chunksize, tdat->level, HDR, tdat->data);
|
||||
} else {
|
||||
memcpy(tdat->uncompressed_chunk, cseg, _chunksize);
|
||||
}
|
||||
}
|
||||
tdat->len_cmp = _chunksize;
|
||||
|
||||
|
@ -609,6 +613,7 @@ perform_compress(void *dat) {
|
|||
typeof (tdat->chunksize) _chunksize, len_cmp, rabin_index_sz, index_size_cmp;
|
||||
int type, rv;
|
||||
uchar_t *compressed_chunk;
|
||||
ssize_t rbytes;
|
||||
|
||||
redo:
|
||||
sem_wait(&tdat->start_sem);
|
||||
|
@ -619,18 +624,20 @@ redo:
|
|||
}
|
||||
|
||||
compressed_chunk = tdat->compressed_chunk + CHDR_SZ;
|
||||
rbytes = tdat->rbytes;
|
||||
/* Perform Dedup if enabled. */
|
||||
if (enable_rabin_scan) {
|
||||
rabin_context_t *rctx;
|
||||
ssize_t rbytes;
|
||||
|
||||
/*
|
||||
* Compute checksum of original uncompressed chunk.
|
||||
* Compute checksum of original uncompressed chunk. When doing dedup
|
||||
* cmp_seg hold original data instead of uncompressed_chunk. We dedup
|
||||
* into uncompressed_chunk so that compress transforms uncompressed_chunk
|
||||
* back into cmp_seg. Avoids an extra memcpy().
|
||||
*/
|
||||
tdat->crc64 = lzma_crc64(tdat->cmp_seg, tdat->rbytes, 0);
|
||||
|
||||
rctx = tdat->rctx;
|
||||
rbytes = tdat->rbytes;
|
||||
reset_rabin_context(tdat->rctx);
|
||||
rctx->cbuf = tdat->uncompressed_chunk;
|
||||
rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
|
||||
|
@ -653,7 +660,6 @@ redo:
|
|||
if (enable_rabin_scan && tdat->rctx->valid) {
|
||||
_chunksize = tdat->rbytes - rabin_index_sz - RABIN_HDR_SIZE;
|
||||
index_size_cmp = rabin_index_sz;
|
||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
|
||||
|
||||
rv = 0;
|
||||
if (rabin_index_sz >= 90) {
|
||||
|
@ -669,16 +675,31 @@ redo:
|
|||
index_size_cmp += RABIN_HDR_SIZE;
|
||||
rabin_index_sz += RABIN_HDR_SIZE;
|
||||
if (rv == 0) {
|
||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
|
||||
/* Compress data chunk. */
|
||||
rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz,
|
||||
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
|
||||
tdat->level, 0, tdat->data);
|
||||
|
||||
/* Can't compress data just retain as-is. */
|
||||
if (rv < 0)
|
||||
memcpy(compressed_chunk + index_size_cmp,
|
||||
tdat->uncompressed_chunk + rabin_index_sz, _chunksize);
|
||||
/* Now update rabin header with the compressed sizes. */
|
||||
rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE,
|
||||
_chunksize);
|
||||
} else {
|
||||
/* If rabin index compression fails, we just drop down to plain
|
||||
* compression and avoid dedup. Should be pretty rare case.
|
||||
*/
|
||||
tdat->rctx->valid = 0;
|
||||
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
|
||||
tdat->rbytes = rbytes;
|
||||
goto plain_compress;
|
||||
}
|
||||
_chunksize += index_size_cmp;
|
||||
} else {
|
||||
plain_compress:
|
||||
_chunksize = tdat->rbytes;
|
||||
rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes,
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
|
||||
|
@ -690,8 +711,9 @@ redo:
|
|||
* chunk will be left uncompressed.
|
||||
*/
|
||||
tdat->len_cmp = _chunksize;
|
||||
if (_chunksize >= tdat->chunksize || rv < 0) {
|
||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes);
|
||||
if (_chunksize >= rbytes || rv < 0) {
|
||||
if (!enable_rabin_scan || !tdat->rctx->valid)
|
||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes);
|
||||
type = UNCOMPRESSED;
|
||||
tdat->len_cmp = tdat->rbytes;
|
||||
} else {
|
||||
|
@ -866,8 +888,9 @@ start_compress(const char *filename, uint64_t chunksize, int level)
|
|||
* Adjust chunk size for small files. We then get an archive with
|
||||
* a single chunk for the entire file.
|
||||
*/
|
||||
if (sbuf.st_size < chunksize) {
|
||||
if (sbuf.st_size <= chunksize) {
|
||||
chunksize = sbuf.st_size;
|
||||
enable_rabin_split = 0; // Do not split for whole files.
|
||||
nthreads = 1;
|
||||
} else {
|
||||
if (nthreads == 0 || nthreads > sbuf.st_size / chunksize) {
|
||||
|
@ -1243,6 +1266,14 @@ init_algo(const char *algo, int bail)
|
|||
_stats_func = lz4_stats;
|
||||
rv = 0;
|
||||
|
||||
} else if (memcmp(algorithm, "none", 4) == 0) {
|
||||
_compress_func = none_compress;
|
||||
_decompress_func = none_decompress;
|
||||
_init_func = none_init;
|
||||
_deinit_func = none_deinit;
|
||||
_stats_func = none_stats;
|
||||
rv = 0;
|
||||
|
||||
/* adapt2 and adapt ordering of the checks matter here. */
|
||||
} else if (memcmp(algorithm, "adapt2", 6) == 0) {
|
||||
_compress_func = adapt_compress;
|
||||
|
|
64
none_compress.c
Normal file
64
none_compress.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* This file is a part of Pcompress, a chunked parallel multi-
|
||||
* algorithm lossless compression and decompression program.
|
||||
*
|
||||
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||
*
|
||||
* This program includes partly-modified public domain source
|
||||
* code from the LZMA SDK: http://www.7-zip.org/sdk.html
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <strings.h>
|
||||
#include <limits.h>
|
||||
#include <utils.h>
|
||||
#include <pcompress.h>
|
||||
#include <lzfx.h>
|
||||
#include <allocator.h>
|
||||
|
||||
void
|
||||
none_stats(int show)
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
none_init(void **data, int *level, ssize_t chunksize)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
none_deinit(void **data)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
none_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
{
|
||||
memcpy(dst, src, srclen);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
none_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
{
|
||||
memcpy(dst, src, srclen);
|
||||
return (0);
|
||||
}
|
|
@ -72,6 +72,8 @@ extern int lz_fx_compress(void *src, size_t srclen, void *dst,
|
|||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
extern int lz4_compress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
extern int none_compress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
|
||||
extern int zlib_decompress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
|
@ -87,6 +89,8 @@ extern int lz_fx_decompress(void *src, size_t srclen, void *dst,
|
|||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
extern int lz4_decompress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
extern int none_decompress(void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
|
||||
extern int adapt_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int adapt2_init(void **data, int *level, ssize_t chunksize);
|
||||
|
@ -96,12 +100,14 @@ extern int bzip2_init(void **data, int *level, ssize_t chunksize);
|
|||
extern int zlib_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int lz_fx_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int lz4_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int none_init(void **data, int *level, ssize_t chunksize);
|
||||
|
||||
extern int adapt_deinit(void **data);
|
||||
extern int lzma_deinit(void **data);
|
||||
extern int ppmd_deinit(void **data);
|
||||
extern int lz_fx_deinit(void **data);
|
||||
extern int lz4_deinit(void **data);
|
||||
extern int none_deinit(void **data);
|
||||
|
||||
extern void adapt_stats(int show);
|
||||
extern void ppmd_stats(int show);
|
||||
|
@ -110,6 +116,7 @@ extern void bzip2_stats(int show);
|
|||
extern void zlib_stats(int show);
|
||||
extern void lz_fx_stats(int show);
|
||||
extern void lz4_stats(int show);
|
||||
extern void none_stats(int show);
|
||||
|
||||
/*
|
||||
* Per-thread data structure for compression and decompression threads.
|
||||
|
|
Loading…
Reference in a new issue