Implement Adaptive Delta Encoding.
This commit is contained in:
parent
51249c858d
commit
29b0d8fd7b
12 changed files with 93 additions and 22 deletions
11
Makefile.in
11
Makefile.in
|
@ -67,6 +67,10 @@ LZPSRCS = lzp/lzp.c
|
|||
LZPHDRS = lzp/lzp.h
|
||||
LZPOBJS = $(LZPSRCS:.c=.o)
|
||||
|
||||
DELTA2SRCS = delta2/delta2.c
|
||||
DELTA2HDRS = delta2/delta2.h
|
||||
DELTA2OBJS = $(DELTA2SRCS:.c=.o)
|
||||
|
||||
SKEIN_BLOCK_C = crypto/skein/skein_block.c
|
||||
SKEIN_BLOCK_ASM = crypto/skein/skein_block_x64.s
|
||||
SKEIN_BLOCK_SRC = @SKEIN_BLOCK@
|
||||
|
@ -108,8 +112,8 @@ COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
|||
LDLIBS = -ldl -L@LIBBZ2_DIR@ -lbz2 -L@LIBZ_DIR@ -lz -lm @LIBBSCLFLAGS@ \
|
||||
-L@OPENSSL_LIBDIR@ -lcrypto -lrt
|
||||
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
|
||||
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) $(SKEIN_BLOCK_OBJ) \
|
||||
@SHA256ASM_OBJS@ @SHA256_OBJS@
|
||||
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \
|
||||
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@
|
||||
|
||||
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
|
||||
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
|
||||
|
@ -168,6 +172,9 @@ $(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS)
|
|||
$(LZPOBJS): $(LZPSRCS) $(LZPHDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(DELTA2OBJS): $(DELTA2SRCS) $(DELTA2HDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(SKEIN_BLOCK_OBJ): $(SKEIN_BLOCK_SRC)
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(SKEIN_BLOCK_SRC) -o $@
|
||||
|
||||
|
|
|
@ -95,6 +95,12 @@ adapt_stats(int show)
|
|||
ppmd_count = 0;
|
||||
}
|
||||
|
||||
void
|
||||
adapt_props(algo_props_t *data, int level, ssize_t chunksize)
|
||||
{
|
||||
data->delta2_stride = 200;
|
||||
}
|
||||
|
||||
int
|
||||
adapt_init(void **data, int *level, int nthreads, ssize_t chunksize,
|
||||
int file_version, compress_op_t op)
|
||||
|
|
|
@ -48,6 +48,11 @@ bzip2_stats(int show)
|
|||
{
|
||||
}
|
||||
|
||||
void
|
||||
bzip2_props(algo_props_t *data, int level, ssize_t chunksize) {
|
||||
data->delta2_stride = 200;
|
||||
}
|
||||
|
||||
int
|
||||
bzip2_init(void **data, int *level, int nthreads, ssize_t chunksize,
|
||||
int file_version, compress_op_t op)
|
||||
|
|
|
@ -79,6 +79,7 @@ libbsc_props(algo_props_t *data, int level, ssize_t chunksize) {
|
|||
data->buf_extra = 0;
|
||||
data->c_max_threads = 8;
|
||||
data->d_max_threads = 8;
|
||||
data->delta2_stride = 150;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -56,6 +56,7 @@ lz4_props(algo_props_t *data, int level, ssize_t chunksize) {
|
|||
data->compress_mt_capable = 0;
|
||||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = lz4_buf_extra(chunksize);
|
||||
data->delta2_stride = 50;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -39,6 +39,11 @@ lz_fx_stats(int show)
|
|||
{
|
||||
}
|
||||
|
||||
void
|
||||
lz_fx_props(algo_props_t *data, int level, ssize_t chunksize) {
|
||||
data->delta2_stride = 50;
|
||||
}
|
||||
|
||||
int
|
||||
lz_fx_init(void **data, int *level, int nthreads, ssize_t chunksize,
|
||||
int file_version, compress_op_t op)
|
||||
|
|
|
@ -52,6 +52,7 @@ lzma_mt_props(algo_props_t *data, int level, ssize_t chunksize) {
|
|||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = 0;
|
||||
data->c_max_threads = 2;
|
||||
data->delta2_stride = 150;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -59,6 +60,7 @@ lzma_props(algo_props_t *data, int level, ssize_t chunksize) {
|
|||
data->compress_mt_capable = 0;
|
||||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = 0;
|
||||
data->delta2_stride = 150;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
45
main.c
45
main.c
|
@ -80,6 +80,7 @@ static int hide_mem_stats = 1;
|
|||
static int hide_cmp_stats = 1;
|
||||
static int enable_rabin_scan = 0;
|
||||
static int enable_delta_encode = 0;
|
||||
static int enable_delta2_encode = 0;
|
||||
static int enable_rabin_split = 1;
|
||||
static int enable_fixed_scan = 0;
|
||||
static int lzp_preprocess = 0;
|
||||
|
@ -148,6 +149,8 @@ usage(void)
|
|||
"7) Other flags:\n"
|
||||
" '-L' - Enable LZP pre-compression. This improves compression ratio of all\n"
|
||||
" algorithms with some extra CPU and very low RAM overhead.\n"
|
||||
" '-P' - Enable Adaptive Delta Encoding. This implies '-L' as well. It improves\n"
|
||||
" compresion ratio further at the cost of more CPU overhead.\n"
|
||||
" '-S' <cksum>\n"
|
||||
" - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512, SHA256 and\n"
|
||||
" SHA512. Default one is SKEIN256.\n"
|
||||
|
@ -188,7 +191,7 @@ show_compression_stats(uint64_t chunksize)
|
|||
*/
|
||||
int
|
||||
preproc_compress(compress_func_ptr cmp_func, void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data)
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data, algo_props_t *props)
|
||||
{
|
||||
uchar_t *dest = (uchar_t *)dst, type = 0;
|
||||
ssize_t result, _dstlen;
|
||||
|
@ -210,11 +213,14 @@ preproc_compress(compress_func_ptr cmp_func, void *src, size_t srclen, void *dst
|
|||
return (-1);
|
||||
}
|
||||
|
||||
if (enable_delta2_encode && props->delta2_stride > 0) {
|
||||
_dstlen = srclen;
|
||||
result = delta2_encode(src, srclen, dst, &_dstlen, 150);
|
||||
result = delta2_encode(src, srclen, dst, &_dstlen, props->delta2_stride);
|
||||
if (result != -1) {
|
||||
memcpy(src, dst, _dstlen);
|
||||
srclen = _dstlen;
|
||||
type |= PREPROC_TYPE_DELTA2;
|
||||
}
|
||||
}
|
||||
|
||||
*dest = type;
|
||||
|
@ -225,15 +231,17 @@ preproc_compress(compress_func_ptr cmp_func, void *src, size_t srclen, void *dst
|
|||
*dest |= PREPROC_COMPRESSED;
|
||||
*dstlen = _dstlen + 9;
|
||||
} else {
|
||||
result = -1;
|
||||
}
|
||||
memcpy(dest+1, src, srclen);
|
||||
*dstlen = srclen + 1;
|
||||
result = 0;
|
||||
}
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
int
|
||||
preproc_decompress(compress_func_ptr dec_func, void *src, size_t srclen, void *dst,
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data)
|
||||
size_t *dstlen, int level, uchar_t chdr, void *data, algo_props_t *props)
|
||||
{
|
||||
uchar_t *sorc = (uchar_t *)src, type;
|
||||
ssize_t result;
|
||||
|
@ -252,6 +260,7 @@ preproc_decompress(compress_func_ptr dec_func, void *src, size_t srclen, void *d
|
|||
srclen = *dstlen;
|
||||
}
|
||||
|
||||
if (type & PREPROC_TYPE_DELTA2) {
|
||||
result = delta2_decode(src, srclen, dst, &_dstlen);
|
||||
if (result != -1) {
|
||||
memcpy(src, dst, _dstlen);
|
||||
|
@ -259,6 +268,7 @@ preproc_decompress(compress_func_ptr dec_func, void *src, size_t srclen, void *d
|
|||
} else {
|
||||
return (result);
|
||||
}
|
||||
}
|
||||
|
||||
if (type & PREPROC_TYPE_LZP) {
|
||||
int hashsize;
|
||||
|
@ -423,7 +433,7 @@ redo:
|
|||
if (HDR & COMPRESSED) {
|
||||
if (HDR & CHUNK_FLAG_PREPROC) {
|
||||
rv = preproc_decompress(tdat->decompress, cmpbuf, dedupe_data_sz_cmp,
|
||||
ubuf, &_chunksize, tdat->level, HDR, tdat->data);
|
||||
ubuf, &_chunksize, tdat->level, HDR, tdat->data, tdat->props);
|
||||
} else {
|
||||
rv = tdat->decompress(cmpbuf, dedupe_data_sz_cmp, ubuf, &_chunksize,
|
||||
tdat->level, HDR, tdat->data);
|
||||
|
@ -452,7 +462,8 @@ redo:
|
|||
if (HDR & COMPRESSED) {
|
||||
if (HDR & CHUNK_FLAG_PREPROC) {
|
||||
rv = preproc_decompress(tdat->decompress, cseg, tdat->len_cmp,
|
||||
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data);
|
||||
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data,
|
||||
tdat->props);
|
||||
} else {
|
||||
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
|
||||
&_chunksize, tdat->level, HDR, tdat->data);
|
||||
|
@ -875,6 +886,7 @@ start_decompress(const char *filename, const char *to_filename)
|
|||
tdat->cancel = 0;
|
||||
tdat->level = level;
|
||||
tdat->data = NULL;
|
||||
tdat->props = &props;
|
||||
sem_init(&(tdat->start_sem), 0, 0);
|
||||
sem_init(&(tdat->cmp_done_sem), 0, 0);
|
||||
sem_init(&(tdat->write_done_sem), 0, 1);
|
||||
|
@ -1155,7 +1167,7 @@ redo:
|
|||
rv = preproc_compress(tdat->compress,
|
||||
tdat->uncompressed_chunk + dedupe_index_sz,
|
||||
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
|
||||
tdat->level, 0, tdat->data);
|
||||
tdat->level, 0, tdat->data, tdat->props);
|
||||
} else {
|
||||
rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz,
|
||||
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
|
||||
|
@ -1185,7 +1197,8 @@ plain_compress:
|
|||
if (lzp_preprocess) {
|
||||
rv = preproc_compress(tdat->compress,
|
||||
tdat->uncompressed_chunk, tdat->rbytes,
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data,
|
||||
tdat->props);
|
||||
} else {
|
||||
rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes,
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
|
||||
|
@ -1575,6 +1588,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
|
|||
tdat->cancel = 0;
|
||||
tdat->level = level;
|
||||
tdat->data = NULL;
|
||||
tdat->props = &props;
|
||||
sem_init(&(tdat->start_sem), 0, 0);
|
||||
sem_init(&(tdat->cmp_done_sem), 0, 0);
|
||||
sem_init(&(tdat->write_done_sem), 0, 1);
|
||||
|
@ -1927,6 +1941,7 @@ init_algo(const char *algo, int bail)
|
|||
_init_func = zlib_init;
|
||||
_deinit_func = zlib_deinit;
|
||||
_stats_func = zlib_stats;
|
||||
_props_func = zlib_props;
|
||||
rv = 0;
|
||||
|
||||
} else if (memcmp(algorithm, "lzmaMt", 6) == 0) {
|
||||
|
@ -1953,6 +1968,7 @@ init_algo(const char *algo, int bail)
|
|||
_init_func = bzip2_init;
|
||||
_deinit_func = NULL;
|
||||
_stats_func = bzip2_stats;
|
||||
_props_func = bzip2_props;
|
||||
rv = 0;
|
||||
|
||||
} else if (memcmp(algorithm, "ppmd", 4) == 0) {
|
||||
|
@ -1961,6 +1977,7 @@ init_algo(const char *algo, int bail)
|
|||
_init_func = ppmd_init;
|
||||
_deinit_func = ppmd_deinit;
|
||||
_stats_func = ppmd_stats;
|
||||
_props_func = ppmd_props;
|
||||
rv = 0;
|
||||
|
||||
} else if (memcmp(algorithm, "lzfx", 4) == 0) {
|
||||
|
@ -1969,6 +1986,7 @@ init_algo(const char *algo, int bail)
|
|||
_init_func = lz_fx_init;
|
||||
_deinit_func = lz_fx_deinit;
|
||||
_stats_func = lz_fx_stats;
|
||||
_props_func = lz_fx_props;
|
||||
rv = 0;
|
||||
|
||||
} else if (memcmp(algorithm, "lz4", 3) == 0) {
|
||||
|
@ -1995,6 +2013,7 @@ init_algo(const char *algo, int bail)
|
|||
_init_func = adapt2_init;
|
||||
_deinit_func = adapt_deinit;
|
||||
_stats_func = adapt_stats;
|
||||
_props_func = adapt_props;
|
||||
adapt_mode = 1;
|
||||
rv = 0;
|
||||
|
||||
|
@ -2004,6 +2023,7 @@ init_algo(const char *algo, int bail)
|
|||
_init_func = adapt_init;
|
||||
_deinit_func = adapt_deinit;
|
||||
_stats_func = adapt_stats;
|
||||
_props_func = adapt_props;
|
||||
adapt_mode = 1;
|
||||
rv = 0;
|
||||
#ifdef ENABLE_PC_LIBBSC
|
||||
|
@ -2034,7 +2054,7 @@ main(int argc, char *argv[])
|
|||
level = 6;
|
||||
slab_init();
|
||||
|
||||
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDEew:rLS:B:F")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDEew:rLPS:B:F")) != -1) {
|
||||
int ovr;
|
||||
|
||||
switch (opt) {
|
||||
|
@ -2124,6 +2144,11 @@ main(int argc, char *argv[])
|
|||
lzp_preprocess = 1;
|
||||
break;
|
||||
|
||||
case 'P':
|
||||
lzp_preprocess = 1;
|
||||
enable_delta2_encode = 1;
|
||||
break;
|
||||
|
||||
case 'r':
|
||||
enable_rabin_split = 0;
|
||||
break;
|
||||
|
|
|
@ -56,6 +56,7 @@ extern "C" {
|
|||
#define COMP_EXTN ".pz"
|
||||
|
||||
#define PREPROC_TYPE_LZP 1
|
||||
#define PREPROC_TYPE_DELTA2 2
|
||||
#define PREPROC_COMPRESSED 128
|
||||
|
||||
/*
|
||||
|
@ -135,6 +136,11 @@ extern int none_init(void **data, int *level, int nthreads, ssize_t chunksize,
|
|||
extern void lzma_props(algo_props_t *data, int level, ssize_t chunksize);
|
||||
extern void lzma_mt_props(algo_props_t *data, int level, ssize_t chunksize);
|
||||
extern void lz4_props(algo_props_t *data, int level, ssize_t chunksize);
|
||||
extern void zlib_props(algo_props_t *data, int level, ssize_t chunksize);
|
||||
extern void ppmd_props(algo_props_t *data, int level, ssize_t chunksize);
|
||||
extern void lz_fx_props(algo_props_t *data, int level, ssize_t chunksize);
|
||||
extern void bzip2_props(algo_props_t *data, int level, ssize_t chunksize);
|
||||
extern void adapt_props(algo_props_t *data, int level, ssize_t chunksize);
|
||||
|
||||
extern int zlib_deinit(void **data);
|
||||
extern int adapt_deinit(void **data);
|
||||
|
@ -188,6 +194,7 @@ struct cmp_data {
|
|||
void *data;
|
||||
pthread_t thr;
|
||||
mac_ctx_t chunk_hmac;
|
||||
algo_props_t *props;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -61,6 +61,11 @@ ppmd_stats(int show)
|
|||
{
|
||||
}
|
||||
|
||||
void
|
||||
ppmd_props(algo_props_t *data, int level, ssize_t chunksize) {
|
||||
data->delta2_stride = 100;
|
||||
}
|
||||
|
||||
int
|
||||
ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize,
|
||||
int file_version, compress_op_t op)
|
||||
|
|
|
@ -111,6 +111,7 @@ typedef struct {
|
|||
int nthreads;
|
||||
int c_max_threads;
|
||||
int d_max_threads;
|
||||
int delta2_stride;
|
||||
} algo_props_t;
|
||||
|
||||
typedef enum {
|
||||
|
@ -205,6 +206,7 @@ init_algo_props(algo_props_t *props)
|
|||
props->nthreads = 1;
|
||||
props->c_max_threads = 1;
|
||||
props->d_max_threads = 1;
|
||||
props->delta2_stride = 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -89,6 +89,11 @@ zlib_stats(int show)
|
|||
{
|
||||
}
|
||||
|
||||
void
|
||||
zlib_props(algo_props_t *data, int level, ssize_t chunksize) {
|
||||
data->delta2_stride = 50;
|
||||
}
|
||||
|
||||
int
|
||||
zlib_deinit(void **data)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue