diff --git a/Makefile b/Makefile index 0ea89fc..d1bceac 100644 --- a/Makefile +++ b/Makefile @@ -64,6 +64,8 @@ CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -D_7ZIP_ST -DNODEF -DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 VEC_FLAGS = -ftree-vectorize LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block +GEN_OPT = -O3 +RABIN_OPT = -O2 LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm ifdef DEBUG @@ -76,8 +78,8 @@ CPPFLAGS += -DDEBUG_NO_SLAB endif else LINK = g++ -m64 -pthread -msse3 -COMPILE = gcc -m64 -O3 -msse3 -c -COMPILE_cpp = g++ -m64 -O3 -msse3 -c +COMPILE = gcc -m64 -msse3 -c +COMPILE_cpp = g++ -m64 -msse3 -c CPPFLAGS += -DNDEBUG ifdef DEBUG_NO_SLAB CPPFLAGS += -DDEBUG_NO_SLAB @@ -87,28 +89,28 @@ endif all: $(PROG) $(LZMAOBJS): $(LZMASRCS) $(LZMAHDRS) - $(COMPILE) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(CPPFLAGS) $(@:.o=.c) -o $@ $(CRCOBJS): $(CRCSRCS) $(CRCHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(PPMDOBJS): $(PPMDSRCS) $(PPMDHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(RABINOBJS): $(RABINSRCS) $(RABINHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(RABIN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(BSDIFFOBJS): $(BSDIFFSRCS) $(BSDIFFHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(LZFXOBJS): $(LZFXSRCS) $(LZFXHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(MAINOBJS): $(MAINSRCS) $(MAINHDRS) - $(COMPILE) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(PROG): $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) \ $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS) diff --git a/bsdiff/bsdiff.c b/bsdiff/bsdiff.c index f81ee8f..d44f53c 100644 --- a/bsdiff/bsdiff.c +++ b/bsdiff/bsdiff.c @@ -379,17 +379,17 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize, lastoffset=pos-scan; } } - if (eblen > newsize/2) { + if (eblen > (newsize/2 + newsize/5)) { rv = 0; goto out; } - /* Comput uncompressed size of the ctrl data. */ + /* Compute uncompressed size of the ctrl data. */ len = BUFTELL(&pf); valouti32(len-hdrsz, header+4); ulen = len-hdrsz; - /* If our data can fit in the scratch area use it other alloc. */ + /* If our data can fit in the scratch area use it otherwise alloc. */ if (ulen > scratchsize) { cb = slab_alloc(NULL, ulen); } else { diff --git a/main.c b/main.c index 2059fe5..c1298a6 100644 --- a/main.c +++ b/main.c @@ -590,7 +590,7 @@ uncomp_done: slab_free(NULL, dary[i]->uncompressed_chunk); slab_free(NULL, dary[i]->compressed_chunk); if (_deinit_func) - _deinit_func(&(tdat->data)); + _deinit_func(&(dary[i]->data)); if (enable_rabin_scan) { destroy_rabin_context(dary[i]->rctx); } @@ -1222,7 +1222,7 @@ init_algo(const char *algo, int bail) _compress_func = zlib_compress; _decompress_func = zlib_decompress; _init_func = zlib_init; - _deinit_func = NULL; + _deinit_func = zlib_deinit; _stats_func = zlib_stats; rv = 0; diff --git a/none_compress.c b/none_compress.c index 3adf5c7..54c95ce 100644 --- a/none_compress.c +++ b/none_compress.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/pcompress.h b/pcompress.h index 780924d..b1bfc0f 100644 --- a/pcompress.h +++ b/pcompress.h @@ -102,6 +102,7 @@ extern int lz_fx_init(void **data, int *level, ssize_t chunksize); extern int lz4_init(void **data, int *level, ssize_t chunksize); extern int none_init(void **data, int *level, ssize_t chunksize); +extern int zlib_deinit(void **data); extern int adapt_deinit(void **data); extern int lzma_deinit(void **data); extern int ppmd_deinit(void **data); diff --git a/rabin/rabin_polynomial.c b/rabin/rabin_polynomial.c index 92f8e8d..ebebd9c 100755 --- a/rabin/rabin_polynomial.c +++ b/rabin/rabin_polynomial.c @@ -118,31 +118,26 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *al ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t)); ctx->rabin_poly_max_block_size = RAB_POLYNOMIAL_MAX_BLOCK_SIZE; + ctx->rabin_break_patt = 0; if (((memcmp(algo, "lzma", 4) == 0 || memcmp(algo, "adapt", 5) == 0) && chunksize <= LZMA_WINDOW_MAX) || delta_flag) { if (memcmp(algo, "lzfx", 4) == 0 || memcmp(algo, "lz4", 3) == 0 || - memcmp(algo, "zlib", 4) == 0) { + memcmp(algo, "zlib", 4) == 0 || memcmp(algo, "none", 4) == 0) { ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2; ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2; ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2; - ctx->rabin_break_patt = 0; } else { ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE; ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK; ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE; - if (memcmp(algo, "bzip2", 5) == 0) { - ctx->rabin_break_patt = 0; - } else { - ctx->rabin_break_patt = RAB_POLYNOMIAL_CONST; - } } } else { ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2; ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2; ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2; - ctx->rabin_break_patt = 0; } + ctx->fp_mask = ctx->rabin_avg_block_mask | ctx->rabin_poly_avg_block_size; blknum = chunksize / ctx->rabin_poly_min_block_size; if (chunksize % ctx->rabin_poly_min_block_size) blknum++; @@ -311,9 +306,11 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s } return (0); } + if (*size < ctx->rabin_poly_avg_block_size) return; for (i=offset; i<*size; i++) { - char cur_byte = buf1[i]; + uint32_t *splits; + uchar_t cur_byte = buf1[i]; uint64_t pushed_out = ctx->current_window_data[ctx->window_pos]; ctx->current_window_data[ctx->window_pos] = cur_byte; /* @@ -342,7 +339,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s */ len1++; fpos[1] = cur_roll_checksum & ctx->rabin_avg_block_mask; - fplist[fpos[1]] += cur_roll_checksum; + splits = (uint32_t *)(&fplist[fpos[1]]); +#if BYTE_ORDER == BIG_ENDIAN + splits[0]++; + splits[0] += cur_roll_checksum & ctx->fp_mask; +#else + splits[1]++; + splits[0] += cur_roll_checksum & ctx->fp_mask; +#endif /* * Perform the following statement without branching: @@ -466,7 +470,9 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s if (ctx->blocks[blk].similar) continue; if (blk > 0 && ctx->blocks[blk].ref == 0 && - ctx->blocks[blk].cksum_n_offset == prev_cksum) { + ctx->blocks[blk].cksum_n_offset == prev_cksum && + ctx->blocks[blk].length - prev_length < 512 + ) { ctx->blocks[blk].index = prev_index; ctx->blocks[blk].similar = SIMILAR_PARTIAL; ctx->blocks[prev_blk].ref = 1; diff --git a/rabin/rabin_polynomial.h b/rabin/rabin_polynomial.h index 19d792c..bad5d9e 100644 --- a/rabin/rabin_polynomial.h +++ b/rabin/rabin_polynomial.h @@ -148,6 +148,7 @@ typedef struct { uint32_t rabin_poly_min_block_size; uint32_t rabin_poly_avg_block_size; uint32_t rabin_avg_block_mask; + uint32_t fp_mask; uint32_t rabin_break_patt; uint64_t real_chunksize; short valid; diff --git a/utils.h b/utils.h index 51a83d7..a3685f1 100644 --- a/utils.h +++ b/utils.h @@ -151,7 +151,6 @@ hash6432shift(uint64_t key) return (uint32_t) key; } - #ifdef __cplusplus } #endif diff --git a/zlib_compress.c b/zlib_compress.c index 147edeb..f7ce52a 100644 --- a/zlib_compress.c +++ b/zlib_compress.c @@ -34,6 +34,8 @@ */ #define SINGLE_CALL_MAX (2147483648UL) +static void zerr(int ret); + static void * slab_alloc_ui(void *p, unsigned int items, unsigned int size) { void *ptr; @@ -54,7 +56,22 @@ zlib_buf_extra(ssize_t buflen) int zlib_init(void **data, int *level, ssize_t chunksize) { + z_stream *zs; + int ret; + + zs = slab_alloc(NULL, sizeof (z_stream)); + zs->zalloc = slab_alloc_ui; + zs->zfree = slab_free; + zs->opaque = NULL; + if (*level > 9) *level = 9; + ret = deflateInit(zs, *level); + if (ret != Z_OK) { + zerr(ret); + return (-1); + } + + *data = zs; return (0); } @@ -63,6 +80,14 @@ zlib_stats(int show) { } +int +zlib_deinit(void **data) +{ + if (*data) { + slab_free(NULL, *data); + } +} + static void zerr(int ret) { @@ -97,23 +122,13 @@ int zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data) { - z_stream zs; int ret, ending; unsigned int slen, dlen; size_t _srclen = srclen; size_t _dstlen = *dstlen; uchar_t *dst1 = dst; uchar_t *src1 = src; - - zs.zalloc = slab_alloc_ui; - zs.zfree = slab_free; - zs.opaque = NULL; - - ret = deflateInit(&zs, level); - if (ret != Z_OK) { - zerr(ret); - return (-1); - } + z_stream *zs = (z_stream *)data; ending = 0; while (_srclen > 0) { @@ -129,21 +144,21 @@ zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen, dlen = _dstlen; } - zs.next_in = src1; - zs.avail_in = slen; - zs.next_out = dst1; - zs.avail_out = dlen; + zs->next_in = src1; + zs->avail_in = slen; + zs->next_out = dst1; + zs->avail_out = dlen; if (!ending) { - ret = deflate(&zs, Z_NO_FLUSH); + ret = deflate(zs, Z_NO_FLUSH); if (ret != Z_OK) { - deflateEnd(&zs); + deflateReset(zs); zerr(ret); return (-1); } } else { - ret = deflate(&zs, Z_FINISH); + ret = deflate(zs, Z_FINISH); if (ret != Z_STREAM_END) { - deflateEnd(&zs); + deflateReset(zs); if (ret == Z_OK) zerr(Z_BUF_ERROR); else @@ -151,14 +166,14 @@ zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen, return (-1); } } - dst1 += (dlen - zs.avail_out); - _dstlen -= (dlen - zs.avail_out); + dst1 += (dlen - zs->avail_out); + _dstlen -= (dlen - zs->avail_out); src1 += slen; _srclen -= slen; } *dstlen = *dstlen - _dstlen; - ret = deflateEnd(&zs); + ret = deflateReset(zs); if (ret != Z_OK) { zerr(ret); return (-1);