Fix handling of incompressible chunks.

Fix handling of various dedup failures.
Add NULL compression option for dedup only compression.
This commit is contained in:
Moinak Ghosh 2012-08-05 22:35:51 +05:30
parent 927da81562
commit a4311f2ede
4 changed files with 140 additions and 38 deletions

View file

@ -24,7 +24,7 @@
PROG= pcompress PROG= pcompress
MAINSRCS = main.c utils.c allocator.c zlib_compress.c bzip2_compress.c \ MAINSRCS = main.c utils.c allocator.c zlib_compress.c bzip2_compress.c \
lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c \ lzma_compress.c ppmd_compress.c adaptive_compress.c lzfx_compress.c \
lz4_compress.c lz4_compress.c none_compress.c
MAINHDRS = allocator.h pcompress.h utils.h MAINHDRS = allocator.h pcompress.h utils.h
MAINOBJS = $(MAINSRCS:.c=.o) MAINOBJS = $(MAINSRCS:.c=.o)

49
main.c
View file

@ -195,7 +195,6 @@ redo:
_chunksize = ntohll(*((ssize_t *)rseg)); _chunksize = ntohll(*((ssize_t *)rseg));
} }
if (HDR & COMPRESSED) {
if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) { if (enable_rabin_scan && (HDR & CHUNK_FLAG_DEDUP)) {
uchar_t *cmpbuf, *ubuf; uchar_t *cmpbuf, *ubuf;
@ -212,6 +211,7 @@ redo:
*/ */
cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp; cmpbuf = cseg + RABIN_HDR_SIZE + rabin_index_sz_cmp;
ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz; ubuf = tdat->uncompressed_chunk + RABIN_HDR_SIZE + rabin_index_sz;
if (HDR & COMPRESSED) {
rv = tdat->decompress(cmpbuf, rabin_data_sz_cmp, ubuf, &_chunksize, rv = tdat->decompress(cmpbuf, rabin_data_sz_cmp, ubuf, &_chunksize,
tdat->level, HDR, tdat->data); tdat->level, HDR, tdat->data);
if (rv == -1) { if (rv == -1) {
@ -219,6 +219,9 @@ redo:
fprintf(stderr, "ERROR: Chunk %d, decompression failed.\n", tdat->id); fprintf(stderr, "ERROR: Chunk %d, decompression failed.\n", tdat->id);
goto cont; goto cont;
} }
} else {
memcpy(ubuf, cmpbuf, _chunksize);
}
rv = 0; rv = 0;
cmpbuf = cseg + RABIN_HDR_SIZE; cmpbuf = cseg + RABIN_HDR_SIZE;
@ -231,11 +234,12 @@ redo:
memcpy(ubuf, cmpbuf, rabin_index_sz); memcpy(ubuf, cmpbuf, rabin_index_sz);
} }
} else { } else {
if (HDR & COMPRESSED) {
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk, rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
&_chunksize, tdat->level, HDR, tdat->data); &_chunksize, tdat->level, HDR, tdat->data);
}
} else { } else {
memcpy(cseg + CHDR_SZ, tdat->uncompressed_chunk, _chunksize); memcpy(tdat->uncompressed_chunk, cseg, _chunksize);
}
} }
tdat->len_cmp = _chunksize; tdat->len_cmp = _chunksize;
@ -609,6 +613,7 @@ perform_compress(void *dat) {
typeof (tdat->chunksize) _chunksize, len_cmp, rabin_index_sz, index_size_cmp; typeof (tdat->chunksize) _chunksize, len_cmp, rabin_index_sz, index_size_cmp;
int type, rv; int type, rv;
uchar_t *compressed_chunk; uchar_t *compressed_chunk;
ssize_t rbytes;
redo: redo:
sem_wait(&tdat->start_sem); sem_wait(&tdat->start_sem);
@ -619,18 +624,20 @@ redo:
} }
compressed_chunk = tdat->compressed_chunk + CHDR_SZ; compressed_chunk = tdat->compressed_chunk + CHDR_SZ;
rbytes = tdat->rbytes;
/* Perform Dedup if enabled. */ /* Perform Dedup if enabled. */
if (enable_rabin_scan) { if (enable_rabin_scan) {
rabin_context_t *rctx; rabin_context_t *rctx;
ssize_t rbytes;
/* /*
* Compute checksum of original uncompressed chunk. * Compute checksum of original uncompressed chunk. When doing dedup
* cmp_seg hold original data instead of uncompressed_chunk. We dedup
* into uncompressed_chunk so that compress transforms uncompressed_chunk
* back into cmp_seg. Avoids an extra memcpy().
*/ */
tdat->crc64 = lzma_crc64(tdat->cmp_seg, tdat->rbytes, 0); tdat->crc64 = lzma_crc64(tdat->cmp_seg, tdat->rbytes, 0);
rctx = tdat->rctx; rctx = tdat->rctx;
rbytes = tdat->rbytes;
reset_rabin_context(tdat->rctx); reset_rabin_context(tdat->rctx);
rctx->cbuf = tdat->uncompressed_chunk; rctx->cbuf = tdat->uncompressed_chunk;
rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL); rabin_index_sz = rabin_dedup(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0, NULL);
@ -653,7 +660,6 @@ redo:
if (enable_rabin_scan && tdat->rctx->valid) { if (enable_rabin_scan && tdat->rctx->valid) {
_chunksize = tdat->rbytes - rabin_index_sz - RABIN_HDR_SIZE; _chunksize = tdat->rbytes - rabin_index_sz - RABIN_HDR_SIZE;
index_size_cmp = rabin_index_sz; index_size_cmp = rabin_index_sz;
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
rv = 0; rv = 0;
if (rabin_index_sz >= 90) { if (rabin_index_sz >= 90) {
@ -669,16 +675,31 @@ redo:
index_size_cmp += RABIN_HDR_SIZE; index_size_cmp += RABIN_HDR_SIZE;
rabin_index_sz += RABIN_HDR_SIZE; rabin_index_sz += RABIN_HDR_SIZE;
if (rv == 0) { if (rv == 0) {
memcpy(compressed_chunk, tdat->uncompressed_chunk, RABIN_HDR_SIZE);
/* Compress data chunk. */ /* Compress data chunk. */
rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz, rv = tdat->compress(tdat->uncompressed_chunk + rabin_index_sz,
_chunksize, compressed_chunk + index_size_cmp, &_chunksize, _chunksize, compressed_chunk + index_size_cmp, &_chunksize,
tdat->level, 0, tdat->data); tdat->level, 0, tdat->data);
/* Can't compress data just retain as-is. */
if (rv < 0)
memcpy(compressed_chunk + index_size_cmp,
tdat->uncompressed_chunk + rabin_index_sz, _chunksize);
/* Now update rabin header with the compressed sizes. */ /* Now update rabin header with the compressed sizes. */
rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE, rabin_update_hdr(compressed_chunk, index_size_cmp - RABIN_HDR_SIZE,
_chunksize); _chunksize);
} else {
/* If rabin index compression fails, we just drop down to plain
* compression and avoid dedup. Should be pretty rare case.
*/
tdat->rctx->valid = 0;
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
tdat->rbytes = rbytes;
goto plain_compress;
} }
_chunksize += index_size_cmp; _chunksize += index_size_cmp;
} else { } else {
plain_compress:
_chunksize = tdat->rbytes; _chunksize = tdat->rbytes;
rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes, rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes,
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data); compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
@ -690,7 +711,8 @@ redo:
* chunk will be left uncompressed. * chunk will be left uncompressed.
*/ */
tdat->len_cmp = _chunksize; tdat->len_cmp = _chunksize;
if (_chunksize >= tdat->chunksize || rv < 0) { if (_chunksize >= rbytes || rv < 0) {
if (!enable_rabin_scan || !tdat->rctx->valid)
memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes); memcpy(compressed_chunk, tdat->uncompressed_chunk, tdat->rbytes);
type = UNCOMPRESSED; type = UNCOMPRESSED;
tdat->len_cmp = tdat->rbytes; tdat->len_cmp = tdat->rbytes;
@ -866,8 +888,9 @@ start_compress(const char *filename, uint64_t chunksize, int level)
* Adjust chunk size for small files. We then get an archive with * Adjust chunk size for small files. We then get an archive with
* a single chunk for the entire file. * a single chunk for the entire file.
*/ */
if (sbuf.st_size < chunksize) { if (sbuf.st_size <= chunksize) {
chunksize = sbuf.st_size; chunksize = sbuf.st_size;
enable_rabin_split = 0; // Do not split for whole files.
nthreads = 1; nthreads = 1;
} else { } else {
if (nthreads == 0 || nthreads > sbuf.st_size / chunksize) { if (nthreads == 0 || nthreads > sbuf.st_size / chunksize) {
@ -1243,6 +1266,14 @@ init_algo(const char *algo, int bail)
_stats_func = lz4_stats; _stats_func = lz4_stats;
rv = 0; rv = 0;
} else if (memcmp(algorithm, "none", 4) == 0) {
_compress_func = none_compress;
_decompress_func = none_decompress;
_init_func = none_init;
_deinit_func = none_deinit;
_stats_func = none_stats;
rv = 0;
/* adapt2 and adapt ordering of the checks matter here. */ /* adapt2 and adapt ordering of the checks matter here. */
} else if (memcmp(algorithm, "adapt2", 6) == 0) { } else if (memcmp(algorithm, "adapt2", 6) == 0) {
_compress_func = adapt_compress; _compress_func = adapt_compress;

64
none_compress.c Normal file
View file

@ -0,0 +1,64 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
* This program includes partly-modified public domain source
* code from the LZMA SDK: http://www.7-zip.org/sdk.html
*/
#include <sys/types.h>
#include <stdio.h>
#include <strings.h>
#include <limits.h>
#include <utils.h>
#include <pcompress.h>
#include <lzfx.h>
#include <allocator.h>
void
none_stats(int show)
{
}
int
none_init(void **data, int *level, ssize_t chunksize)
{
return (0);
}
int
none_deinit(void **data)
{
return (0);
}
int
none_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
int level, uchar_t chdr, void *data)
{
memcpy(dst, src, srclen);
return (0);
}
int
none_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
int level, uchar_t chdr, void *data)
{
memcpy(dst, src, srclen);
return (0);
}

View file

@ -72,6 +72,8 @@ extern int lz_fx_compress(void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data); size_t *dstlen, int level, uchar_t chdr, void *data);
extern int lz4_compress(void *src, size_t srclen, void *dst, extern int lz4_compress(void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data); size_t *dstlen, int level, uchar_t chdr, void *data);
extern int none_compress(void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data);
extern int zlib_decompress(void *src, size_t srclen, void *dst, extern int zlib_decompress(void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data); size_t *dstlen, int level, uchar_t chdr, void *data);
@ -87,6 +89,8 @@ extern int lz_fx_decompress(void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data); size_t *dstlen, int level, uchar_t chdr, void *data);
extern int lz4_decompress(void *src, size_t srclen, void *dst, extern int lz4_decompress(void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data); size_t *dstlen, int level, uchar_t chdr, void *data);
extern int none_decompress(void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data);
extern int adapt_init(void **data, int *level, ssize_t chunksize); extern int adapt_init(void **data, int *level, ssize_t chunksize);
extern int adapt2_init(void **data, int *level, ssize_t chunksize); extern int adapt2_init(void **data, int *level, ssize_t chunksize);
@ -96,12 +100,14 @@ extern int bzip2_init(void **data, int *level, ssize_t chunksize);
extern int zlib_init(void **data, int *level, ssize_t chunksize); extern int zlib_init(void **data, int *level, ssize_t chunksize);
extern int lz_fx_init(void **data, int *level, ssize_t chunksize); extern int lz_fx_init(void **data, int *level, ssize_t chunksize);
extern int lz4_init(void **data, int *level, ssize_t chunksize); extern int lz4_init(void **data, int *level, ssize_t chunksize);
extern int none_init(void **data, int *level, ssize_t chunksize);
extern int adapt_deinit(void **data); extern int adapt_deinit(void **data);
extern int lzma_deinit(void **data); extern int lzma_deinit(void **data);
extern int ppmd_deinit(void **data); extern int ppmd_deinit(void **data);
extern int lz_fx_deinit(void **data); extern int lz_fx_deinit(void **data);
extern int lz4_deinit(void **data); extern int lz4_deinit(void **data);
extern int none_deinit(void **data);
extern void adapt_stats(int show); extern void adapt_stats(int show);
extern void ppmd_stats(int show); extern void ppmd_stats(int show);
@ -110,6 +116,7 @@ extern void bzip2_stats(int show);
extern void zlib_stats(int show); extern void zlib_stats(int show);
extern void lz_fx_stats(int show); extern void lz_fx_stats(int show);
extern void lz4_stats(int show); extern void lz4_stats(int show);
extern void none_stats(int show);
/* /*
* Per-thread data structure for compression and decompression threads. * Per-thread data structure for compression and decompression threads.