From 400d0bfa720f7b9d199cdd74ea53d6b34503b66d Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Wed, 8 Aug 2012 22:40:58 +0530 Subject: [PATCH] Bias fingerprint value with occurrence counts for a better sketch Fix latent bug when calling algo deinit in decompression code Reduce diff threshold for slightly greater delta encoding Limit similar buffer size difference for less wasted diffing Change zlib compression wrapper to use faster deflateReset mechanism Reduce optimization level for Dedupe code, it goes faster --- Makefile | 22 ++++++++------- bsdiff/bsdiff.c | 6 ++-- main.c | 4 +-- none_compress.c | 1 + pcompress.h | 1 + rabin/rabin_polynomial.c | 28 +++++++++++-------- rabin/rabin_polynomial.h | 1 + utils.h | 1 - zlib_compress.c | 59 +++++++++++++++++++++++++--------------- 9 files changed, 74 insertions(+), 49 deletions(-) diff --git a/Makefile b/Makefile index 0ea89fc..d1bceac 100644 --- a/Makefile +++ b/Makefile @@ -64,6 +64,8 @@ CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -D_7ZIP_ST -DNODEF -DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 VEC_FLAGS = -ftree-vectorize LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block +GEN_OPT = -O3 +RABIN_OPT = -O2 LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm ifdef DEBUG @@ -76,8 +78,8 @@ CPPFLAGS += -DDEBUG_NO_SLAB endif else LINK = g++ -m64 -pthread -msse3 -COMPILE = gcc -m64 -O3 -msse3 -c -COMPILE_cpp = g++ -m64 -O3 -msse3 -c +COMPILE = gcc -m64 -msse3 -c +COMPILE_cpp = g++ -m64 -msse3 -c CPPFLAGS += -DNDEBUG ifdef DEBUG_NO_SLAB CPPFLAGS += -DDEBUG_NO_SLAB @@ -87,28 +89,28 @@ endif all: $(PROG) $(LZMAOBJS): $(LZMASRCS) $(LZMAHDRS) - $(COMPILE) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(CPPFLAGS) $(@:.o=.c) -o $@ $(CRCOBJS): $(CRCSRCS) $(CRCHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(PPMDOBJS): $(PPMDSRCS) $(PPMDHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(RABINOBJS): $(RABINSRCS) $(RABINHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(RABIN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(BSDIFFOBJS): $(BSDIFFSRCS) $(BSDIFFHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(LZFXOBJS): $(LZFXSRCS) $(LZFXHDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS) - $(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(MAINOBJS): $(MAINSRCS) $(MAINHDRS) - $(COMPILE) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(PROG): $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) \ $(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS) diff --git a/bsdiff/bsdiff.c b/bsdiff/bsdiff.c index f81ee8f..d44f53c 100644 --- a/bsdiff/bsdiff.c +++ b/bsdiff/bsdiff.c @@ -379,17 +379,17 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize, lastoffset=pos-scan; } } - if (eblen > newsize/2) { + if (eblen > (newsize/2 + newsize/5)) { rv = 0; goto out; } - /* Comput uncompressed size of the ctrl data. */ + /* Compute uncompressed size of the ctrl data. */ len = BUFTELL(&pf); valouti32(len-hdrsz, header+4); ulen = len-hdrsz; - /* If our data can fit in the scratch area use it other alloc. */ + /* If our data can fit in the scratch area use it otherwise alloc. */ if (ulen > scratchsize) { cb = slab_alloc(NULL, ulen); } else { diff --git a/main.c b/main.c index 2059fe5..c1298a6 100644 --- a/main.c +++ b/main.c @@ -590,7 +590,7 @@ uncomp_done: slab_free(NULL, dary[i]->uncompressed_chunk); slab_free(NULL, dary[i]->compressed_chunk); if (_deinit_func) - _deinit_func(&(tdat->data)); + _deinit_func(&(dary[i]->data)); if (enable_rabin_scan) { destroy_rabin_context(dary[i]->rctx); } @@ -1222,7 +1222,7 @@ init_algo(const char *algo, int bail) _compress_func = zlib_compress; _decompress_func = zlib_decompress; _init_func = zlib_init; - _deinit_func = NULL; + _deinit_func = zlib_deinit; _stats_func = zlib_stats; rv = 0; diff --git a/none_compress.c b/none_compress.c index 3adf5c7..54c95ce 100644 --- a/none_compress.c +++ b/none_compress.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/pcompress.h b/pcompress.h index 780924d..b1bfc0f 100644 --- a/pcompress.h +++ b/pcompress.h @@ -102,6 +102,7 @@ extern int lz_fx_init(void **data, int *level, ssize_t chunksize); extern int lz4_init(void **data, int *level, ssize_t chunksize); extern int none_init(void **data, int *level, ssize_t chunksize); +extern int zlib_deinit(void **data); extern int adapt_deinit(void **data); extern int lzma_deinit(void **data); extern int ppmd_deinit(void **data); diff --git a/rabin/rabin_polynomial.c b/rabin/rabin_polynomial.c index 92f8e8d..ebebd9c 100755 --- a/rabin/rabin_polynomial.c +++ b/rabin/rabin_polynomial.c @@ -118,31 +118,26 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *al ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t)); ctx->rabin_poly_max_block_size = RAB_POLYNOMIAL_MAX_BLOCK_SIZE; + ctx->rabin_break_patt = 0; if (((memcmp(algo, "lzma", 4) == 0 || memcmp(algo, "adapt", 5) == 0) && chunksize <= LZMA_WINDOW_MAX) || delta_flag) { if (memcmp(algo, "lzfx", 4) == 0 || memcmp(algo, "lz4", 3) == 0 || - memcmp(algo, "zlib", 4) == 0) { + memcmp(algo, "zlib", 4) == 0 || memcmp(algo, "none", 4) == 0) { ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2; ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2; ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2; - ctx->rabin_break_patt = 0; } else { ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE; ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK; ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE; - if (memcmp(algo, "bzip2", 5) == 0) { - ctx->rabin_break_patt = 0; - } else { - ctx->rabin_break_patt = RAB_POLYNOMIAL_CONST; - } } } else { ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2; ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2; ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2; - ctx->rabin_break_patt = 0; } + ctx->fp_mask = ctx->rabin_avg_block_mask | ctx->rabin_poly_avg_block_size; blknum = chunksize / ctx->rabin_poly_min_block_size; if (chunksize % ctx->rabin_poly_min_block_size) blknum++; @@ -311,9 +306,11 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s } return (0); } + if (*size < ctx->rabin_poly_avg_block_size) return; for (i=offset; i<*size; i++) { - char cur_byte = buf1[i]; + uint32_t *splits; + uchar_t cur_byte = buf1[i]; uint64_t pushed_out = ctx->current_window_data[ctx->window_pos]; ctx->current_window_data[ctx->window_pos] = cur_byte; /* @@ -342,7 +339,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s */ len1++; fpos[1] = cur_roll_checksum & ctx->rabin_avg_block_mask; - fplist[fpos[1]] += cur_roll_checksum; + splits = (uint32_t *)(&fplist[fpos[1]]); +#if BYTE_ORDER == BIG_ENDIAN + splits[0]++; + splits[0] += cur_roll_checksum & ctx->fp_mask; +#else + splits[1]++; + splits[0] += cur_roll_checksum & ctx->fp_mask; +#endif /* * Perform the following statement without branching: @@ -466,7 +470,9 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s if (ctx->blocks[blk].similar) continue; if (blk > 0 && ctx->blocks[blk].ref == 0 && - ctx->blocks[blk].cksum_n_offset == prev_cksum) { + ctx->blocks[blk].cksum_n_offset == prev_cksum && + ctx->blocks[blk].length - prev_length < 512 + ) { ctx->blocks[blk].index = prev_index; ctx->blocks[blk].similar = SIMILAR_PARTIAL; ctx->blocks[prev_blk].ref = 1; diff --git a/rabin/rabin_polynomial.h b/rabin/rabin_polynomial.h index 19d792c..bad5d9e 100644 --- a/rabin/rabin_polynomial.h +++ b/rabin/rabin_polynomial.h @@ -148,6 +148,7 @@ typedef struct { uint32_t rabin_poly_min_block_size; uint32_t rabin_poly_avg_block_size; uint32_t rabin_avg_block_mask; + uint32_t fp_mask; uint32_t rabin_break_patt; uint64_t real_chunksize; short valid; diff --git a/utils.h b/utils.h index 51a83d7..a3685f1 100644 --- a/utils.h +++ b/utils.h @@ -151,7 +151,6 @@ hash6432shift(uint64_t key) return (uint32_t) key; } - #ifdef __cplusplus } #endif diff --git a/zlib_compress.c b/zlib_compress.c index 147edeb..f7ce52a 100644 --- a/zlib_compress.c +++ b/zlib_compress.c @@ -34,6 +34,8 @@ */ #define SINGLE_CALL_MAX (2147483648UL) +static void zerr(int ret); + static void * slab_alloc_ui(void *p, unsigned int items, unsigned int size) { void *ptr; @@ -54,7 +56,22 @@ zlib_buf_extra(ssize_t buflen) int zlib_init(void **data, int *level, ssize_t chunksize) { + z_stream *zs; + int ret; + + zs = slab_alloc(NULL, sizeof (z_stream)); + zs->zalloc = slab_alloc_ui; + zs->zfree = slab_free; + zs->opaque = NULL; + if (*level > 9) *level = 9; + ret = deflateInit(zs, *level); + if (ret != Z_OK) { + zerr(ret); + return (-1); + } + + *data = zs; return (0); } @@ -63,6 +80,14 @@ zlib_stats(int show) { } +int +zlib_deinit(void **data) +{ + if (*data) { + slab_free(NULL, *data); + } +} + static void zerr(int ret) { @@ -97,23 +122,13 @@ int zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data) { - z_stream zs; int ret, ending; unsigned int slen, dlen; size_t _srclen = srclen; size_t _dstlen = *dstlen; uchar_t *dst1 = dst; uchar_t *src1 = src; - - zs.zalloc = slab_alloc_ui; - zs.zfree = slab_free; - zs.opaque = NULL; - - ret = deflateInit(&zs, level); - if (ret != Z_OK) { - zerr(ret); - return (-1); - } + z_stream *zs = (z_stream *)data; ending = 0; while (_srclen > 0) { @@ -129,21 +144,21 @@ zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen, dlen = _dstlen; } - zs.next_in = src1; - zs.avail_in = slen; - zs.next_out = dst1; - zs.avail_out = dlen; + zs->next_in = src1; + zs->avail_in = slen; + zs->next_out = dst1; + zs->avail_out = dlen; if (!ending) { - ret = deflate(&zs, Z_NO_FLUSH); + ret = deflate(zs, Z_NO_FLUSH); if (ret != Z_OK) { - deflateEnd(&zs); + deflateReset(zs); zerr(ret); return (-1); } } else { - ret = deflate(&zs, Z_FINISH); + ret = deflate(zs, Z_FINISH); if (ret != Z_STREAM_END) { - deflateEnd(&zs); + deflateReset(zs); if (ret == Z_OK) zerr(Z_BUF_ERROR); else @@ -151,14 +166,14 @@ zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen, return (-1); } } - dst1 += (dlen - zs.avail_out); - _dstlen -= (dlen - zs.avail_out); + dst1 += (dlen - zs->avail_out); + _dstlen -= (dlen - zs->avail_out); src1 += slen; _srclen -= slen; } *dstlen = *dstlen - _dstlen; - ret = deflateEnd(&zs); + ret = deflateReset(zs); if (ret != Z_OK) { zerr(ret); return (-1);