Fix handling of incompressible chunks.
Fix handling of various dedup failures. Add NULL compression option for dedup only compression.
This commit is contained in:
parent
927da81562
commit
a4311f2ede
4 changed files with 140 additions and 38 deletions
2
Makefile
2
Makefile
|
@ -24,7 +24,7 @@
|
||||||
PROG= pcompress
|
PROG= pcompress
|
||||||
MAINSRCS = main.c utils.c allocator.c zlib_compress.c bzip2_compress.c \
|
MAINSRCS = main.c utils.c allocator.c zlib_compress.c bzip2_compress.c \
|
||||||
lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c \
|
lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c \
|
||||||
lz4_compress.c
|
lz4_compress.c none_compress.c
|
||||||
MAINHDRS = allocator.h pcompress.h utils.h
|
MAINHDRS = allocator.h pcompress.h utils.h
|
||||||
MAINOBJS = $(MAINSRCS:.c=.o)
|
MAINOBJS = $(MAINSRCS:.c=.o)
|
||||||
|
|
||||||
|
|
49
main.c
49
main.c
|
@ -195,7 +195,6 @@ redo:
|
||||||
_chunksize = ntohll(*((ssize_t *)rseg));
|
_chunksize = ntohll(*((ssize_t *)rseg));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (HDR & COMPRESSED) {
|
|
||||||
if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) {
|
if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) {
|
||||||
uchar_t *cmpbuf, *ubuf;
|
uchar_t *cmpbuf, *ubuf;
|
||||||
|
|
||||||
|
@ -212,6 +211,7 @@ redo:
|
||||||
*/
|
*/
|
||||||
cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp;
|
cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp;
|
||||||
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz;
|
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz;
|
||||||
|
if (HDR & COMPRESSED) {
|
||||||
rv = tdat->decompress(cmpbuf, rabin_data_sz_cmp, ubuf, &_chunksize,
|
rv = tdat->decompress(cmpbuf, rabin_data_sz_cmp, ubuf, &_chunksize,
|
||||||
tdat->level, HDR, tdat->data);
|
tdat->level, HDR, tdat->data);
|
||||||
if (rv == -1) {
|
if (rv == -1) {
|
||||||
|
@ -219,6 +219,9 @@ redo:
|
||||||
fprintf(stderr, "ERROR: Chunk %d, decompression failed.\n", tdat->id);
|
fprintf(stderr, "ERROR: Chunk %d, decompression failed.\n", tdat->id);
|
||||||
goto cont;
|
goto cont;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
memcpy(ubuf, cmpbuf, _chunksize);
|
||||||
|
}
|
||||||
|
|
||||||
rv = 0;
|
rv = 0;
|
||||||
cmpbuf = cseg + RABIN_HDR_SIZE;
|
cmpbuf = cseg + RABIN_HDR_SIZE;
|
||||||
|
@ -231,11 +234,12 @@ redo:
|
||||||
memcpy(ubuf, cmpbuf, rabin_index_sz);
|
memcpy(ubuf, cmpbuf, rabin_index_sz);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (HDR & COMPRESSED) {
|
||||||
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
|
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
|
||||||
&_chunksize, tdat->level, HDR, tdat->data);
|
&_chunksize, tdat->level, HDR, tdat->data);
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
memcpy(cseg + CHDR_SZ, tdat->uncompressed_chunk, _chunksize);
|
memcpy(tdat->uncompressed_chunk, cseg, _chunksize);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
tdat->len_cmp = _chunksize;
|
tdat->len_cmp = _chunksize;
|
||||||
|
|
||||||
|
@ -609,6 +613,7 @@ perform_compress(void *dat) {
|
||||||
typeof (tdat->chunksize) _chunksize, len_cmp, rabin_index_sz, index_size_cmp;
|
typeof (tdat->chunksize) _chunksize, len_cmp, rabin_index_sz, index_size_cmp;
|
||||||
int type, rv;
|
int type, rv;
|
||||||
uchar_t *compressed_chunk;
|
uchar_t *compressed_chunk;
|
||||||
|
ssize_t rbytes;
|
||||||
|
|
||||||
redo:
|
redo:
|
||||||
sem_wait(&tdat->start_sem);
|
sem_wait(&tdat->start_sem);
|
||||||
|
@ -619,18 +624,20 @@ redo:
|
||||||
}
|
}
|
||||||
|
|
||||||
compressed_chunk = tdat->compressed_chunk + CHDR_SZ;
|
compressed_chunk = tdat->compressed_chunk + CHDR_SZ;
|
||||||
|
rbytes = tdat->rbytes;
|
||||||
/* Perform Dedup if enabled. */
|
/* Perform Dedup if enabled. */
|
||||||
if (enable_rabin_scan) {
|
if (enable_rabin_scan) {
|
||||||
rabin_context_t *rctx;
|
rabin_context_t *rctx;
|
||||||
ssize_t rbytes;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute checksum of original uncompressed chunk.
|
* Compute checksum of original uncompressed chunk. When doing dedup
|
||||||
|
* cmp_seg hold original data instead of uncompressed_chunk. We dedup
|
||||||
|
* into uncompressed_chunk so that compress transforms uncompressed_chunk
|
||||||
|
* back into cmp_seg. Avoids an extra memcpy().
|
||||||
*/
|
*/
|
||||||
tdat->crc64 = lzma_crc64(tdat->cmp_seg, tdat->rbytes, 0);
|
tdat->crc64 = lzma_crc64(tdat->cmp_seg, tdat->rbytes, 0);
|
||||||
|
|
||||||
rctx = tdat->rctx;
|
rctx = tdat->rctx;
|
||||||
rbytes = tdat->rbytes;
|
|
||||||
reset_rabin_context(tdat->rctx);
|
reset_rabin_context(tdat->rctx);
|
||||||
rctx->cbuf = tdat->uncompressed_chunk;
|
rctx->cbuf = tdat->uncompressed_chunk;
|
||||||
rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
|
rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
|
||||||
|
@ -653,7 +660,6 @@ redo:
|
||||||
if (enable_rabin_scan && tdat->rctx->valid) {
|
if (enable_rabin_scan && tdat->rctx->valid) {
|
||||||
_chunksize = tdat->rbytes - rabin_index_sz - RABIN_HDR_SIZE;
|
_chunksize = tdat->rbytes - rabin_index_sz - RABIN_HDR_SIZE;
|
||||||
index_size_cmp = rabin_index_sz;
|
index_size_cmp = rabin_index_sz;
|
||||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
|
|
||||||
|
|
||||||
rv = 0;
|
rv = 0;
|
||||||
if (rabin_index_sz >= 90) {
|
if (rabin_index_sz >= 90) {
|
||||||
|
@ -669,16 +675,31 @@ redo:
|
||||||
index_size_cmp += RABIN_HDR_SIZE;
|
index_size_cmp += RABIN_HDR_SIZE;
|
||||||
rabin_index_sz += RABIN_HDR_SIZE;
|
rabin_index_sz += RABIN_HDR_SIZE;
|
||||||
if (rv == 0) {
|
if (rv == 0) {
|
||||||
|
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
|
||||||
/* Compress data chunk. */
|
/* Compress data chunk. */
|
||||||
rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz,
|
rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz,
|
||||||
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
|
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
|
||||||
tdat->level, 0, tdat->data);
|
tdat->level, 0, tdat->data);
|
||||||
|
|
||||||
|
/* Can't compress data just retain as-is. */
|
||||||
|
if (rv < 0)
|
||||||
|
memcpy(compressed_chunk + index_size_cmp,
|
||||||
|
tdat->uncompressed_chunk + rabin_index_sz, _chunksize);
|
||||||
/* Now update rabin header with the compressed sizes. */
|
/* Now update rabin header with the compressed sizes. */
|
||||||
rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE,
|
rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE,
|
||||||
_chunksize);
|
_chunksize);
|
||||||
|
} else {
|
||||||
|
/* If rabin index compression fails, we just drop down to plain
|
||||||
|
* compression and avoid dedup. Should be pretty rare case.
|
||||||
|
*/
|
||||||
|
tdat->rctx->valid = 0;
|
||||||
|
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
|
||||||
|
tdat->rbytes = rbytes;
|
||||||
|
goto plain_compress;
|
||||||
}
|
}
|
||||||
_chunksize += index_size_cmp;
|
_chunksize += index_size_cmp;
|
||||||
} else {
|
} else {
|
||||||
|
plain_compress:
|
||||||
_chunksize = tdat->rbytes;
|
_chunksize = tdat->rbytes;
|
||||||
rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes,
|
rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes,
|
||||||
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
|
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
|
||||||
|
@ -690,7 +711,8 @@ redo:
|
||||||
* chunk will be left uncompressed.
|
* chunk will be left uncompressed.
|
||||||
*/
|
*/
|
||||||
tdat->len_cmp = _chunksize;
|
tdat->len_cmp = _chunksize;
|
||||||
if (_chunksize >= tdat->chunksize || rv < 0) {
|
if (_chunksize >= rbytes || rv < 0) {
|
||||||
|
if (!enable_rabin_scan || !tdat->rctx->valid)
|
||||||
memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes);
|
memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes);
|
||||||
type = UNCOMPRESSED;
|
type = UNCOMPRESSED;
|
||||||
tdat->len_cmp = tdat->rbytes;
|
tdat->len_cmp = tdat->rbytes;
|
||||||
|
@ -866,8 +888,9 @@ start_compress(const char *filename, uint64_t chunksize, int level)
|
||||||
* Adjust chunk size for small files. We then get an archive with
|
* Adjust chunk size for small files. We then get an archive with
|
||||||
* a single chunk for the entire file.
|
* a single chunk for the entire file.
|
||||||
*/
|
*/
|
||||||
if (sbuf.st_size < chunksize) {
|
if (sbuf.st_size <= chunksize) {
|
||||||
chunksize = sbuf.st_size;
|
chunksize = sbuf.st_size;
|
||||||
|
enable_rabin_split = 0; // Do not split for whole files.
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
} else {
|
} else {
|
||||||
if (nthreads == 0 || nthreads > sbuf.st_size / chunksize) {
|
if (nthreads == 0 || nthreads > sbuf.st_size / chunksize) {
|
||||||
|
@ -1243,6 +1266,14 @@ init_algo(const char *algo, int bail)
|
||||||
_stats_func = lz4_stats;
|
_stats_func = lz4_stats;
|
||||||
rv = 0;
|
rv = 0;
|
||||||
|
|
||||||
|
} else if (memcmp(algorithm, "none", 4) == 0) {
|
||||||
|
_compress_func = none_compress;
|
||||||
|
_decompress_func = none_decompress;
|
||||||
|
_init_func = none_init;
|
||||||
|
_deinit_func = none_deinit;
|
||||||
|
_stats_func = none_stats;
|
||||||
|
rv = 0;
|
||||||
|
|
||||||
/* adapt2 and adapt ordering of the checks matter here. */
|
/* adapt2 and adapt ordering of the checks matter here. */
|
||||||
} else if (memcmp(algorithm, "adapt2", 6) == 0) {
|
} else if (memcmp(algorithm, "adapt2", 6) == 0) {
|
||||||
_compress_func = adapt_compress;
|
_compress_func = adapt_compress;
|
||||||
|
|
64
none_compress.c
Normal file
64
none_compress.c
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of Pcompress, a chunked parallel multi-
|
||||||
|
* algorithm lossless compression and decompression program.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||||
|
* Use is subject to license terms.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||||
|
*
|
||||||
|
* This program includes partly-modified public domain source
|
||||||
|
* code from the LZMA SDK: http://www.7-zip.org/sdk.html
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <strings.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <utils.h>
|
||||||
|
#include <pcompress.h>
|
||||||
|
#include <lzfx.h>
|
||||||
|
#include <allocator.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
none_stats(int show)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
none_init(void **data, int *level, ssize_t chunksize)
|
||||||
|
{
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
none_deinit(void **data)
|
||||||
|
{
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
none_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||||
|
int level, uchar_t chdr, void *data)
|
||||||
|
{
|
||||||
|
memcpy(dst, src, srclen);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
none_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||||
|
int level, uchar_t chdr, void *data)
|
||||||
|
{
|
||||||
|
memcpy(dst, src, srclen);
|
||||||
|
return (0);
|
||||||
|
}
|
|
@ -72,6 +72,8 @@ extern int lz_fx_compress(void *src, size_t srclen, void *dst,
|
||||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||||
extern int lz4_compress(void *src, size_t srclen, void *dst,
|
extern int lz4_compress(void *src, size_t srclen, void *dst,
|
||||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||||
|
extern int none_compress(void *src, size_t srclen, void *dst,
|
||||||
|
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||||
|
|
||||||
extern int zlib_decompress(void *src, size_t srclen, void *dst,
|
extern int zlib_decompress(void *src, size_t srclen, void *dst,
|
||||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||||
|
@ -87,6 +89,8 @@ extern int lz_fx_decompress(void *src, size_t srclen, void *dst,
|
||||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||||
extern int lz4_decompress(void *src, size_t srclen, void *dst,
|
extern int lz4_decompress(void *src, size_t srclen, void *dst,
|
||||||
size_t *dstlen, int level, uchar_t chdr, void *data);
|
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||||
|
extern int none_decompress(void *src, size_t srclen, void *dst,
|
||||||
|
size_t *dstlen, int level, uchar_t chdr, void *data);
|
||||||
|
|
||||||
extern int adapt_init(void **data, int *level, ssize_t chunksize);
|
extern int adapt_init(void **data, int *level, ssize_t chunksize);
|
||||||
extern int adapt2_init(void **data, int *level, ssize_t chunksize);
|
extern int adapt2_init(void **data, int *level, ssize_t chunksize);
|
||||||
|
@ -96,12 +100,14 @@ extern int bzip2_init(void **data, int *level, ssize_t chunksize);
|
||||||
extern int zlib_init(void **data, int *level, ssize_t chunksize);
|
extern int zlib_init(void **data, int *level, ssize_t chunksize);
|
||||||
extern int lz_fx_init(void **data, int *level, ssize_t chunksize);
|
extern int lz_fx_init(void **data, int *level, ssize_t chunksize);
|
||||||
extern int lz4_init(void **data, int *level, ssize_t chunksize);
|
extern int lz4_init(void **data, int *level, ssize_t chunksize);
|
||||||
|
extern int none_init(void **data, int *level, ssize_t chunksize);
|
||||||
|
|
||||||
extern int adapt_deinit(void **data);
|
extern int adapt_deinit(void **data);
|
||||||
extern int lzma_deinit(void **data);
|
extern int lzma_deinit(void **data);
|
||||||
extern int ppmd_deinit(void **data);
|
extern int ppmd_deinit(void **data);
|
||||||
extern int lz_fx_deinit(void **data);
|
extern int lz_fx_deinit(void **data);
|
||||||
extern int lz4_deinit(void **data);
|
extern int lz4_deinit(void **data);
|
||||||
|
extern int none_deinit(void **data);
|
||||||
|
|
||||||
extern void adapt_stats(int show);
|
extern void adapt_stats(int show);
|
||||||
extern void ppmd_stats(int show);
|
extern void ppmd_stats(int show);
|
||||||
|
@ -110,6 +116,7 @@ extern void bzip2_stats(int show);
|
||||||
extern void zlib_stats(int show);
|
extern void zlib_stats(int show);
|
||||||
extern void lz_fx_stats(int show);
|
extern void lz_fx_stats(int show);
|
||||||
extern void lz4_stats(int show);
|
extern void lz4_stats(int show);
|
||||||
|
extern void none_stats(int show);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Per-thread data structure for compression and decompression threads.
|
* Per-thread data structure for compression and decompression threads.
|
||||||
|
|
Loading…
Reference in a new issue