Bias fingerprint value with occurrence counts for a better sketch
Fix latent bug when calling algo deinit in decompression code Reduce diff threshold for slightly greater delta encoding Limit similar buffer size difference for less wasted diffing Change zlib compression wrapper to use faster deflateReset mechanism Reduce optimization level for Dedupe code, it goes faster
This commit is contained in:
parent
a4311f2ede
commit
400d0bfa72
9 changed files with 74 additions and 49 deletions
22
Makefile
22
Makefile
|
@ -64,6 +64,8 @@ CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -D_7ZIP_ST -DNODEF
|
|||
-DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32
|
||||
VEC_FLAGS = -ftree-vectorize
|
||||
LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
||||
GEN_OPT = -O3
|
||||
RABIN_OPT = -O2
|
||||
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm
|
||||
|
||||
ifdef DEBUG
|
||||
|
@ -76,8 +78,8 @@ CPPFLAGS += -DDEBUG_NO_SLAB
|
|||
endif
|
||||
else
|
||||
LINK = g++ -m64 -pthread -msse3
|
||||
COMPILE = gcc -m64 -O3 -msse3 -c
|
||||
COMPILE_cpp = g++ -m64 -O3 -msse3 -c
|
||||
COMPILE = gcc -m64 -msse3 -c
|
||||
COMPILE_cpp = g++ -m64 -msse3 -c
|
||||
CPPFLAGS += -DNDEBUG
|
||||
ifdef DEBUG_NO_SLAB
|
||||
CPPFLAGS += -DDEBUG_NO_SLAB
|
||||
|
@ -87,28 +89,28 @@ endif
|
|||
all: $(PROG)
|
||||
|
||||
$(LZMAOBJS): $(LZMASRCS) $(LZMAHDRS)
|
||||
$(COMPILE) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
$(COMPILE) $(GEN_OPT) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(CRCOBJS): $(CRCSRCS) $(CRCHDRS)
|
||||
$(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(PPMDOBJS): $(PPMDSRCS) $(PPMDHDRS)
|
||||
$(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(RABINOBJS): $(RABINSRCS) $(RABINHDRS)
|
||||
$(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
$(COMPILE) $(RABIN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(BSDIFFOBJS): $(BSDIFFSRCS) $(BSDIFFHDRS)
|
||||
$(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(LZFXOBJS): $(LZFXSRCS) $(LZFXHDRS)
|
||||
$(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS)
|
||||
$(COMPILE) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(MAINOBJS): $(MAINSRCS) $(MAINHDRS)
|
||||
$(COMPILE) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
$(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(PROG): $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) \
|
||||
$(CRCOBJS) $(RABINOBJS) $(BSDIFFOBJS)
|
||||
|
|
|
@ -379,17 +379,17 @@ bsdiff(u_char *old, bsize_t oldsize, u_char *new, bsize_t newsize,
|
|||
lastoffset=pos-scan;
|
||||
}
|
||||
}
|
||||
if (eblen > newsize/2) {
|
||||
if (eblen > (newsize/2 + newsize/5)) {
|
||||
rv = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Comput uncompressed size of the ctrl data. */
|
||||
/* Compute uncompressed size of the ctrl data. */
|
||||
len = BUFTELL(&pf);
|
||||
valouti32(len-hdrsz, header+4);
|
||||
ulen = len-hdrsz;
|
||||
|
||||
/* If our data can fit in the scratch area use it other alloc. */
|
||||
/* If our data can fit in the scratch area use it otherwise alloc. */
|
||||
if (ulen > scratchsize) {
|
||||
cb = slab_alloc(NULL, ulen);
|
||||
} else {
|
||||
|
|
4
main.c
4
main.c
|
@ -590,7 +590,7 @@ uncomp_done:
|
|||
slab_free(NULL, dary[i]->uncompressed_chunk);
|
||||
slab_free(NULL, dary[i]->compressed_chunk);
|
||||
if (_deinit_func)
|
||||
_deinit_func(&(tdat->data));
|
||||
_deinit_func(&(dary[i]->data));
|
||||
if (enable_rabin_scan) {
|
||||
destroy_rabin_context(dary[i]->rctx);
|
||||
}
|
||||
|
@ -1222,7 +1222,7 @@ init_algo(const char *algo, int bail)
|
|||
_compress_func = zlib_compress;
|
||||
_decompress_func = zlib_decompress;
|
||||
_init_func = zlib_init;
|
||||
_deinit_func = NULL;
|
||||
_deinit_func = zlib_deinit;
|
||||
_stats_func = zlib_stats;
|
||||
rv = 0;
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <strings.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <utils.h>
|
||||
#include <pcompress.h>
|
||||
|
|
|
@ -102,6 +102,7 @@ extern int lz_fx_init(void **data, int *level, ssize_t chunksize);
|
|||
extern int lz4_init(void **data, int *level, ssize_t chunksize);
|
||||
extern int none_init(void **data, int *level, ssize_t chunksize);
|
||||
|
||||
extern int zlib_deinit(void **data);
|
||||
extern int adapt_deinit(void **data);
|
||||
extern int lzma_deinit(void **data);
|
||||
extern int ppmd_deinit(void **data);
|
||||
|
|
|
@ -118,31 +118,26 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, const char *al
|
|||
ctx = (rabin_context_t *)slab_alloc(NULL, sizeof (rabin_context_t));
|
||||
ctx->rabin_poly_max_block_size = RAB_POLYNOMIAL_MAX_BLOCK_SIZE;
|
||||
|
||||
ctx->rabin_break_patt = 0;
|
||||
if (((memcmp(algo, "lzma", 4) == 0 || memcmp(algo, "adapt", 5) == 0) &&
|
||||
chunksize <= LZMA_WINDOW_MAX) || delta_flag) {
|
||||
if (memcmp(algo, "lzfx", 4) == 0 || memcmp(algo, "lz4", 3) == 0 ||
|
||||
memcmp(algo, "zlib", 4) == 0) {
|
||||
memcmp(algo, "zlib", 4) == 0 || memcmp(algo, "none", 4) == 0) {
|
||||
ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2;
|
||||
ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2;
|
||||
ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2;
|
||||
ctx->rabin_break_patt = 0;
|
||||
} else {
|
||||
ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE;
|
||||
ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK;
|
||||
ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE;
|
||||
if (memcmp(algo, "bzip2", 5) == 0) {
|
||||
ctx->rabin_break_patt = 0;
|
||||
} else {
|
||||
ctx->rabin_break_patt = RAB_POLYNOMIAL_CONST;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ctx->rabin_poly_min_block_size = RAB_POLYNOMIAL_MIN_BLOCK_SIZE2;
|
||||
ctx->rabin_avg_block_mask = RAB_POLYNOMIAL_AVG_BLOCK_MASK2;
|
||||
ctx->rabin_poly_avg_block_size = RAB_POLYNOMIAL_AVG_BLOCK_SIZE2;
|
||||
ctx->rabin_break_patt = 0;
|
||||
}
|
||||
|
||||
ctx->fp_mask = ctx->rabin_avg_block_mask | ctx->rabin_poly_avg_block_size;
|
||||
blknum = chunksize / ctx->rabin_poly_min_block_size;
|
||||
if (chunksize % ctx->rabin_poly_min_block_size)
|
||||
blknum++;
|
||||
|
@ -311,9 +306,11 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (*size < ctx->rabin_poly_avg_block_size) return;
|
||||
for (i=offset; i<*size; i++) {
|
||||
char cur_byte = buf1[i];
|
||||
uint32_t *splits;
|
||||
uchar_t cur_byte = buf1[i];
|
||||
uint64_t pushed_out = ctx->current_window_data[ctx->window_pos];
|
||||
ctx->current_window_data[ctx->window_pos] = cur_byte;
|
||||
/*
|
||||
|
@ -342,7 +339,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
*/
|
||||
len1++;
|
||||
fpos[1] = cur_roll_checksum & ctx->rabin_avg_block_mask;
|
||||
fplist[fpos[1]] += cur_roll_checksum;
|
||||
splits = (uint32_t *)(&fplist[fpos[1]]);
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
splits[0]++;
|
||||
splits[0] += cur_roll_checksum & ctx->fp_mask;
|
||||
#else
|
||||
splits[1]++;
|
||||
splits[0] += cur_roll_checksum & ctx->fp_mask;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Perform the following statement without branching:
|
||||
|
@ -466,7 +470,9 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
|
|||
if (ctx->blocks[blk].similar) continue;
|
||||
|
||||
if (blk > 0 && ctx->blocks[blk].ref == 0 &&
|
||||
ctx->blocks[blk].cksum_n_offset == prev_cksum) {
|
||||
ctx->blocks[blk].cksum_n_offset == prev_cksum &&
|
||||
ctx->blocks[blk].length - prev_length < 512
|
||||
) {
|
||||
ctx->blocks[blk].index = prev_index;
|
||||
ctx->blocks[blk].similar = SIMILAR_PARTIAL;
|
||||
ctx->blocks[prev_blk].ref = 1;
|
||||
|
|
|
@ -148,6 +148,7 @@ typedef struct {
|
|||
uint32_t rabin_poly_min_block_size;
|
||||
uint32_t rabin_poly_avg_block_size;
|
||||
uint32_t rabin_avg_block_mask;
|
||||
uint32_t fp_mask;
|
||||
uint32_t rabin_break_patt;
|
||||
uint64_t real_chunksize;
|
||||
short valid;
|
||||
|
|
1
utils.h
1
utils.h
|
@ -151,7 +151,6 @@ hash6432shift(uint64_t key)
|
|||
return (uint32_t) key;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -34,6 +34,8 @@
|
|||
*/
|
||||
#define SINGLE_CALL_MAX (2147483648UL)
|
||||
|
||||
static void zerr(int ret);
|
||||
|
||||
static void *
|
||||
slab_alloc_ui(void *p, unsigned int items, unsigned int size) {
|
||||
void *ptr;
|
||||
|
@ -54,7 +56,22 @@ zlib_buf_extra(ssize_t buflen)
|
|||
int
|
||||
zlib_init(void **data, int *level, ssize_t chunksize)
|
||||
{
|
||||
z_stream *zs;
|
||||
int ret;
|
||||
|
||||
zs = slab_alloc(NULL, sizeof (z_stream));
|
||||
zs->zalloc = slab_alloc_ui;
|
||||
zs->zfree = slab_free;
|
||||
zs->opaque = NULL;
|
||||
|
||||
if (*level > 9) *level = 9;
|
||||
ret = deflateInit(zs, *level);
|
||||
if (ret != Z_OK) {
|
||||
zerr(ret);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
*data = zs;
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -63,6 +80,14 @@ zlib_stats(int show)
|
|||
{
|
||||
}
|
||||
|
||||
int
|
||||
zlib_deinit(void **data)
|
||||
{
|
||||
if (*data) {
|
||||
slab_free(NULL, *data);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void zerr(int ret)
|
||||
{
|
||||
|
@ -97,23 +122,13 @@ int
|
|||
zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
{
|
||||
z_stream zs;
|
||||
int ret, ending;
|
||||
unsigned int slen, dlen;
|
||||
size_t _srclen = srclen;
|
||||
size_t _dstlen = *dstlen;
|
||||
uchar_t *dst1 = dst;
|
||||
uchar_t *src1 = src;
|
||||
|
||||
zs.zalloc = slab_alloc_ui;
|
||||
zs.zfree = slab_free;
|
||||
zs.opaque = NULL;
|
||||
|
||||
ret = deflateInit(&zs, level);
|
||||
if (ret != Z_OK) {
|
||||
zerr(ret);
|
||||
return (-1);
|
||||
}
|
||||
z_stream *zs = (z_stream *)data;
|
||||
|
||||
ending = 0;
|
||||
while (_srclen > 0) {
|
||||
|
@ -129,21 +144,21 @@ zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
|||
dlen = _dstlen;
|
||||
}
|
||||
|
||||
zs.next_in = src1;
|
||||
zs.avail_in = slen;
|
||||
zs.next_out = dst1;
|
||||
zs.avail_out = dlen;
|
||||
zs->next_in = src1;
|
||||
zs->avail_in = slen;
|
||||
zs->next_out = dst1;
|
||||
zs->avail_out = dlen;
|
||||
if (!ending) {
|
||||
ret = deflate(&zs, Z_NO_FLUSH);
|
||||
ret = deflate(zs, Z_NO_FLUSH);
|
||||
if (ret != Z_OK) {
|
||||
deflateEnd(&zs);
|
||||
deflateReset(zs);
|
||||
zerr(ret);
|
||||
return (-1);
|
||||
}
|
||||
} else {
|
||||
ret = deflate(&zs, Z_FINISH);
|
||||
ret = deflate(zs, Z_FINISH);
|
||||
if (ret != Z_STREAM_END) {
|
||||
deflateEnd(&zs);
|
||||
deflateReset(zs);
|
||||
if (ret == Z_OK)
|
||||
zerr(Z_BUF_ERROR);
|
||||
else
|
||||
|
@ -151,14 +166,14 @@ zlib_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
|
|||
return (-1);
|
||||
}
|
||||
}
|
||||
dst1 += (dlen - zs.avail_out);
|
||||
_dstlen -= (dlen - zs.avail_out);
|
||||
dst1 += (dlen - zs->avail_out);
|
||||
_dstlen -= (dlen - zs->avail_out);
|
||||
src1 += slen;
|
||||
_srclen -= slen;
|
||||
}
|
||||
|
||||
*dstlen = *dstlen - _dstlen;
|
||||
ret = deflateEnd(&zs);
|
||||
ret = deflateReset(zs);
|
||||
if (ret != Z_OK) {
|
||||
zerr(ret);
|
||||
return (-1);
|
||||
|
|
Loading…
Reference in a new issue