diff --git a/adaptive_compress.c b/adaptive_compress.c index 9c48d6e..259d9b1 100644 --- a/adaptive_compress.c +++ b/adaptive_compress.c @@ -62,11 +62,14 @@ extern int ppmd_decompress(void *src, size_t srclen, void *dst, extern int libbsc_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); -extern int lzma_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int lzma_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); extern int lzma_deinit(void **data); -extern int ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); extern int ppmd_deinit(void **data); -extern int libbsc_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int libbsc_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); extern int libbsc_deinit(void **data); struct adapt_data { @@ -93,7 +96,8 @@ adapt_stats(int show) } int -adapt_init(void **data, int *level, int nthreads, ssize_t chunksize) +adapt_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { struct adapt_data *adat = (struct adapt_data *)(*data); int rv; @@ -101,7 +105,7 @@ adapt_init(void **data, int *level, int nthreads, ssize_t chunksize) if (!adat) { adat = (struct adapt_data *)slab_alloc(NULL, sizeof (struct adapt_data)); adat->adapt_mode = 1; - rv = ppmd_init(&(adat->ppmd_data), level, nthreads, chunksize); + rv = ppmd_init(&(adat->ppmd_data), level, nthreads, chunksize, file_version, op); adat->lzma_data = NULL; adat->bsc_data = NULL; *data = adat; @@ -115,7 +119,8 @@ adapt_init(void **data, int *level, int nthreads, ssize_t chunksize) } int -adapt2_init(void **data, int *level, int nthreads, ssize_t chunksize) +adapt2_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { struct adapt_data *adat = (struct adapt_data *)(*data); int rv, lv; @@ -125,13 +130,13 @@ adapt2_init(void **data, int *level, int nthreads, ssize_t chunksize) adat->adapt_mode = 2; adat->ppmd_data = NULL; lv = *level; - rv = ppmd_init(&(adat->ppmd_data), &lv, nthreads, chunksize); + rv = ppmd_init(&(adat->ppmd_data), &lv, nthreads, chunksize, file_version, op); lv = *level; if (rv == 0) - rv = lzma_init(&(adat->lzma_data), &lv, nthreads, chunksize); + rv = lzma_init(&(adat->lzma_data), &lv, nthreads, chunksize, file_version, op); lv = *level; if (rv == 0) - rv = libbsc_init(&(adat->bsc_data), &lv, nthreads, chunksize); + rv = libbsc_init(&(adat->bsc_data), &lv, nthreads, chunksize, file_version, op); *data = adat; if (*level > 9) *level = 9; } diff --git a/bzip2_compress.c b/bzip2_compress.c index 718e17b..7250f43 100644 --- a/bzip2_compress.c +++ b/bzip2_compress.c @@ -49,7 +49,8 @@ bzip2_stats(int show) } int -bzip2_init(void **data, int *level, int nthreads, ssize_t chunksize) +bzip2_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { if (*level > 9) *level = 9; return (0); diff --git a/libbsc_compress.c b/libbsc_compress.c index fe7151d..4eab4c9 100644 --- a/libbsc_compress.c +++ b/libbsc_compress.c @@ -82,7 +82,8 @@ libbsc_props(algo_props_t *data, int level, ssize_t chunksize) { } int -libbsc_init(void **data, int *level, int nthreads, ssize_t chunksize) +libbsc_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { struct libbsc_params *bscdat; int rv; diff --git a/lz4_compress.c b/lz4_compress.c index 122e8f7..afc73ce 100644 --- a/lz4_compress.c +++ b/lz4_compress.c @@ -59,7 +59,8 @@ lz4_props(algo_props_t *data, int level, ssize_t chunksize) { } int -lz4_init(void **data, int *level, int nthreads, ssize_t chunksize) +lz4_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { struct lz4_params *lzdat; int lev; diff --git a/lzfx_compress.c b/lzfx_compress.c index 01723d8..2bc4826 100644 --- a/lzfx_compress.c +++ b/lzfx_compress.c @@ -40,7 +40,8 @@ lz_fx_stats(int show) } int -lz_fx_init(void **data, int *level, int nthreads, ssize_t chunksize) +lz_fx_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { struct lzfx_params *lzdat; int lev; diff --git a/lzma_compress.c b/lzma_compress.c index cc3f031..2221dc1 100644 --- a/lzma_compress.c +++ b/lzma_compress.c @@ -65,9 +65,10 @@ lzma_props(algo_props_t *data, int level, ssize_t chunksize) { * The two functions below are not thread-safe, by design. */ int -lzma_init(void **data, int *level, int nthreads, ssize_t chunksize) +lzma_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { - if (!p) { + if (!p && op == COMPRESS) { p = (CLzmaEncProps *)slab_alloc(NULL, sizeof (CLzmaEncProps)); LzmaEncProps_Init(p); /* diff --git a/main.c b/main.c index 4324751..942744c 100644 --- a/main.c +++ b/main.c @@ -770,7 +770,7 @@ start_decompress(const char *filename, const char *to_filename) slab_cache_add(chunksize); slab_cache_add(sizeof (struct cmp_data)); - dary = (struct cmp_data **)slab_alloc(NULL, sizeof (struct cmp_data *) * nprocs); + dary = (struct cmp_data **)slab_calloc(NULL, nprocs, sizeof (struct cmp_data *)); for (i = 0; i < nprocs; i++) { dary[i] = (struct cmp_data *)slab_alloc(NULL, sizeof (struct cmp_data)); if (!dary[i]) { @@ -790,13 +790,14 @@ start_decompress(const char *filename, const char *to_filename) sem_init(&(tdat->cmp_done_sem), 0, 0); sem_init(&(tdat->write_done_sem), 0, 1); if (_init_func) { - if (_init_func(&(tdat->data), &(tdat->level), props.nthreads, chunksize) != 0) { + if (_init_func(&(tdat->data), &(tdat->level), props.nthreads, chunksize, + version, DECOMPRESS) != 0) { UNCOMP_BAIL; } } if (enable_rabin_scan || enable_fixed_scan) { tdat->rctx = create_dedupe_context(chunksize, compressed_chunksize, rab_blk_size, - algo, enable_delta_encode, enable_fixed_scan); + algo, enable_delta_encode, enable_fixed_scan, version, DECOMPRESS); if (tdat->rctx == NULL) { UNCOMP_BAIL; } @@ -963,6 +964,7 @@ uncomp_done: perror("Chown "); if (dary != NULL) { for (i = 0; i < nprocs; i++) { + if (!dary[i]) continue; if (dary[i]->uncompressed_chunk) slab_free(NULL, dary[i]->uncompressed_chunk); if (dary[i]->compressed_chunk) @@ -1467,13 +1469,13 @@ start_compress(const char *filename, uint64_t chunksize, int level) sem_init(&(tdat->cmp_done_sem), 0, 0); sem_init(&(tdat->write_done_sem), 0, 1); if (_init_func) { - if (_init_func(&(tdat->data), &(tdat->level), props.nthreads, chunksize) != 0) { + if (_init_func(&(tdat->data), &(tdat->level), props.nthreads, chunksize, VERSION, COMPRESS) != 0) { COMP_BAIL; } } if (enable_rabin_scan || enable_fixed_scan) { tdat->rctx = create_dedupe_context(chunksize, compressed_chunksize, rab_blk_size, - algo, enable_delta_encode, enable_fixed_scan); + algo, enable_delta_encode, enable_fixed_scan, VERSION, COMPRESS); if (tdat->rctx == NULL) { COMP_BAIL; } @@ -1580,7 +1582,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) */ if (enable_rabin_split) { rctx = create_dedupe_context(chunksize, 0, 0, algo, enable_delta_encode, - enable_fixed_scan); + enable_fixed_scan, VERSION, COMPRESS); rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx); } else { rbytes = Read(uncompfd, cread_buf, chunksize); diff --git a/none_compress.c b/none_compress.c index 9f84297..7533112 100644 --- a/none_compress.c +++ b/none_compress.c @@ -37,7 +37,8 @@ none_stats(int show) } int -none_init(void **data, int *level, int nthreads, ssize_t chunksize) +none_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { return (0); } diff --git a/pcompress.h b/pcompress.h index 33fdaa4..d1085f0 100644 --- a/pcompress.h +++ b/pcompress.h @@ -38,7 +38,7 @@ extern "C" { #define CHUNK_FLAG_SZ 1 #define ALGO_SZ 8 #define MIN_CHUNK 2048 -#define VERSION 4 +#define VERSION 5 #define FLAG_DEDUP 1 #define FLAG_DEDUP_FIXED 2 #define FLAG_SINGLE_CHUNK 4 @@ -113,15 +113,24 @@ extern int lz4_decompress(void *src, size_t srclen, void *dst, extern int none_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); -extern int adapt_init(void **data, int *level, int nthreads, ssize_t chunksize); -extern int adapt2_init(void **data, int *level, int nthreads, ssize_t chunksize); -extern int lzma_init(void **data, int *level, int nthreads, ssize_t chunksize); -extern int ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize); -extern int bzip2_init(void **data, int *level, int nthreads, ssize_t chunksize); -extern int zlib_init(void **data, int *level, int nthreads, ssize_t chunksize); -extern int lz_fx_init(void **data, int *level, int nthreads, ssize_t chunksize); -extern int lz4_init(void **data, int *level, int nthreads, ssize_t chunksize); -extern int none_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int adapt_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); +extern int adapt2_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); +extern int lzma_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); +extern int ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); +extern int bzip2_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); +extern int zlib_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); +extern int lz_fx_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); +extern int lz4_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); +extern int none_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); extern void lzma_props(algo_props_t *data, int level, ssize_t chunksize); extern void lzma_mt_props(algo_props_t *data, int level, ssize_t chunksize); @@ -149,7 +158,8 @@ extern int libbsc_compress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); extern int libbsc_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); -extern int libbsc_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int libbsc_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); extern void libbsc_props(algo_props_t *data, int level, ssize_t chunksize); extern int libbsc_deinit(void **data); extern void libbsc_stats(int show); diff --git a/ppmd_compress.c b/ppmd_compress.c index 7a9552d..a594e71 100644 --- a/ppmd_compress.c +++ b/ppmd_compress.c @@ -62,7 +62,8 @@ ppmd_stats(int show) } int -ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize) +ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { CPpmd8 *_ppmd; diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index fd0790a..bb497a0 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -71,7 +71,8 @@ #define FIFTY_PCNT(x) ((x) >> 1) #define SIXTY_PCNT(x) (((x) >> 1) + ((x) >> 3)) -extern int lzma_init(void **data, int *level, ssize_t chunksize); +extern int lzma_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); extern int lzma_compress(void *src, size_t srclen, void *dst, size_t *destlen, int level, uchar_t chdr, void *data); extern int lzma_decompress(void *src, size_t srclen, void *dst, @@ -110,9 +111,8 @@ dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta */ dedupe_context_t * create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz, - const char *algo, int delta_flag, int fixed_flag) { + const char *algo, int delta_flag, int fixed_flag, int file_version, compress_op_t op) { dedupe_context_t *ctx; - unsigned char *current_window_data; uint32_t i; if (rab_blk_sz < 1 || rab_blk_sz > 5) @@ -213,13 +213,14 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s destroy_dedupe_context(ctx); return (NULL); } - current_window_data = slab_alloc(NULL, RAB_POLYNOMIAL_WIN_SIZE); + ctx->current_window_data = slab_alloc(NULL, RAB_POLYNOMIAL_WIN_SIZE); ctx->blocks = NULL; if (real_chunksize > 0) { ctx->blocks = (rabin_blockentry_t **)slab_calloc(NULL, ctx->blknum, sizeof (rabin_blockentry_t *)); } - if(ctx == NULL || current_window_data == NULL || (ctx->blocks == NULL && real_chunksize > 0)) { + if(ctx == NULL || ctx->current_window_data == NULL || + (ctx->blocks == NULL && real_chunksize > 0)) { fprintf(stderr, "Could not allocate rabin polynomial context, out of memory\n"); destroy_dedupe_context(ctx); @@ -229,27 +230,18 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s ctx->lzma_data = NULL; ctx->level = 14; if (real_chunksize > 0) { - lzma_init(&(ctx->lzma_data), &(ctx->level), chunksize); - if (!(ctx->lzma_data)) { + lzma_init(&(ctx->lzma_data), &(ctx->level), 1, chunksize, file_version, op); + + // The lzma_data member is not needed during decompression + if (!(ctx->lzma_data) && op == COMPRESS) { fprintf(stderr, "Could not initialize LZMA data for dedupe index, out of memory\n"); destroy_dedupe_context(ctx); return (NULL); } } - /* - * We should compute the power for the window size. - * static uint64_t polynomial_pow; - * polynomial_pow = 1; - * for(index=0; indexcurrent_window_data = current_window_data; ctx->real_chunksize = real_chunksize; reset_dedupe_context(ctx); return (ctx); diff --git a/rabin/rabin_dedup.h b/rabin/rabin_dedup.h index d98aeb6..45104e7 100644 --- a/rabin/rabin_dedup.h +++ b/rabin/rabin_dedup.h @@ -163,7 +163,8 @@ typedef struct { } dedupe_context_t; extern dedupe_context_t *create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, - int rab_blk_sz, const char *algo, int delta_flag, int fixed_flag); + int rab_blk_sz, const char *algo, int delta_flag, int fixed_flag, int file_version, + compress_op_t op); extern void destroy_dedupe_context(dedupe_context_t *ctx); extern unsigned int dedupe_compress(dedupe_context_t *ctx, unsigned char *buf, ssize_t *size, ssize_t offset, ssize_t *rabin_pos); diff --git a/utils/utils.h b/utils/utils.h index 9a6d031..3fa8721 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -147,8 +147,14 @@ extern uint64_t get_total_ram(); typedef int (*compress_func_ptr)(void *src, size_t srclen, void *dst, size_t *destlen, int level, uchar_t chdr, void *data); +typedef enum { + COMPRESS, + DECOMPRESS +} compress_op_t; + /* Pointer type for algo specific init/deinit/stats functions. */ -typedef int (*init_func_ptr)(void **data, int *level, int nthreads, ssize_t chunksize); +typedef int (*init_func_ptr)(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op); typedef int (*deinit_func_ptr)(void **data); typedef void (*stats_func_ptr)(int show); typedef void (*props_func_ptr)(algo_props_t *data, int level, ssize_t chunksize); diff --git a/zlib_compress.c b/zlib_compress.c index a4e19fe..84833c8 100644 --- a/zlib_compress.c +++ b/zlib_compress.c @@ -54,7 +54,8 @@ zlib_buf_extra(ssize_t buflen) } int -zlib_init(void **data, int *level, int nthreads, ssize_t chunksize) +zlib_init(void **data, int *level, int nthreads, ssize_t chunksize, + int file_version, compress_op_t op) { z_stream *zs; int ret; @@ -65,7 +66,15 @@ zlib_init(void **data, int *level, int nthreads, ssize_t chunksize) zs->opaque = NULL; if (*level > 9) *level = 9; - ret = deflateInit(zs, *level); + if (op == COMPRESS) { + ret = deflateInit2(zs, *level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + } else { + if (file_version < 5) { + ret = inflateInit(zs); + } else { + ret = inflateInit2(zs, -15); + } + } if (ret != Z_OK) { zerr(ret, 0); return (-1); @@ -189,22 +198,13 @@ int zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data) { - z_stream zs; int err; unsigned int slen, dlen; size_t _srclen = srclen; size_t _dstlen = *dstlen; uchar_t *dst1 = dst; uchar_t *src1 = src; - - zs.zalloc = slab_alloc_ui; - zs.zfree = slab_free; - zs.opaque = NULL; - - if ((err = inflateInit(&zs)) != Z_OK) { - zerr(err, 0); - return (-1); - } + z_stream *zs = (z_stream *)data; while (_srclen > 0) { if (_srclen > SINGLE_CALL_MAX) { @@ -218,21 +218,21 @@ zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, dlen = _dstlen; } - zs.next_in = src1; - zs.avail_in = slen; - zs.next_out = dst1; - zs.avail_out = dlen; + zs->next_in = src1; + zs->avail_in = slen; + zs->next_out = dst1; + zs->avail_out = dlen; - err = inflate(&zs, Z_NO_FLUSH); + err = inflate(zs, Z_NO_FLUSH); if (err != Z_OK && err != Z_STREAM_END) { zerr(err, 0); return (-1); } - dst1 += (dlen - zs.avail_out); - _dstlen -= (dlen - zs.avail_out); - src1 += (slen - zs.avail_in); - _srclen -= (slen - zs.avail_in); + dst1 += (dlen - zs->avail_out); + _dstlen -= (dlen - zs->avail_out); + src1 += (slen - zs->avail_in); + _srclen -= (slen - zs->avail_in); if (err == Z_STREAM_END) { if (_srclen > 0) { @@ -245,6 +245,10 @@ zlib_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, } *dstlen = *dstlen - _dstlen; - inflateEnd(&zs); + err = inflateReset(zs); + if (err != Z_OK) { + zerr(err, 1); + return (-1); + } return (0); }