Implement Adaptive Delta Encoding.

This commit is contained in:
Moinak Ghosh 2012-12-05 00:09:47 +05:30
parent 51249c858d
commit 29b0d8fd7b
12 changed files with 93 additions and 22 deletions

View file

@ -67,6 +67,10 @@ LZPSRCS = lzp/lzp.c
LZPHDRS = lzp/lzp.h
LZPOBJS = $(LZPSRCS:.c=.o)
DELTA2SRCS = delta2/delta2.c
DELTA2HDRS = delta2/delta2.h
DELTA2OBJS = $(DELTA2SRCS:.c=.o)
SKEIN_BLOCK_C = crypto/skein/skein_block.c
SKEIN_BLOCK_ASM = crypto/skein/skein_block_x64.s
SKEIN_BLOCK_SRC = @SKEIN_BLOCK@
@ -108,8 +112,8 @@ COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
LDLIBS = -ldl -L@LIBBZ2_DIR@ -lbz2 -L@LIBZ_DIR@ -lz -lm @LIBBSCLFLAGS@ \
-L@OPENSSL_LIBDIR@ -lcrypto -lrt
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) $(SKEIN_BLOCK_OBJ) \
@SHA256ASM_OBJS@ @SHA256_OBJS@
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
@ -168,6 +172,9 @@ $(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS)
$(LZPOBJS): $(LZPSRCS) $(LZPHDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(DELTA2OBJS): $(DELTA2SRCS) $(DELTA2HDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(SKEIN_BLOCK_OBJ): $(SKEIN_BLOCK_SRC)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(SKEIN_BLOCK_SRC) -o $@

View file

@ -95,6 +95,12 @@ adapt_stats(int show)
ppmd_count = 0;
}
void
adapt_props(algo_props_t *data, int level, ssize_t chunksize)
{
data->delta2_stride = 200;
}
int
adapt_init(void **data, int *level, int nthreads, ssize_t chunksize,
int file_version, compress_op_t op)

View file

@ -48,6 +48,11 @@ bzip2_stats(int show)
{
}
void
bzip2_props(algo_props_t *data, int level, ssize_t chunksize) {
data->delta2_stride = 200;
}
int
bzip2_init(void **data, int *level, int nthreads, ssize_t chunksize,
int file_version, compress_op_t op)

View file

@ -79,6 +79,7 @@ libbsc_props(algo_props_t *data, int level, ssize_t chunksize) {
data->buf_extra = 0;
data->c_max_threads = 8;
data->d_max_threads = 8;
data->delta2_stride = 150;
}
int

View file

@ -56,6 +56,7 @@ lz4_props(algo_props_t *data, int level, ssize_t chunksize) {
data->compress_mt_capable = 0;
data->decompress_mt_capable = 0;
data->buf_extra = lz4_buf_extra(chunksize);
data->delta2_stride = 50;
}
int

View file

@ -39,6 +39,11 @@ lz_fx_stats(int show)
{
}
void
lz_fx_props(algo_props_t *data, int level, ssize_t chunksize) {
data->delta2_stride = 50;
}
int
lz_fx_init(void **data, int *level, int nthreads, ssize_t chunksize,
int file_version, compress_op_t op)

View file

@ -52,6 +52,7 @@ lzma_mt_props(algo_props_t *data, int level, ssize_t chunksize) {
data->decompress_mt_capable = 0;
data->buf_extra = 0;
data->c_max_threads = 2;
data->delta2_stride = 150;
}
void
@ -59,6 +60,7 @@ lzma_props(algo_props_t *data, int level, ssize_t chunksize) {
data->compress_mt_capable = 0;
data->decompress_mt_capable = 0;
data->buf_extra = 0;
data->delta2_stride = 150;
}
/*

65
main.c
View file

@ -80,6 +80,7 @@ static int hide_mem_stats = 1;
static int hide_cmp_stats = 1;
static int enable_rabin_scan = 0;
static int enable_delta_encode = 0;
static int enable_delta2_encode = 0;
static int enable_rabin_split = 1;
static int enable_fixed_scan = 0;
static int lzp_preprocess = 0;
@ -148,6 +149,8 @@ usage(void)
"7) Other flags:\n"
" '-L' - Enable LZP pre-compression. This improves compression ratio of all\n"
" algorithms with some extra CPU and very low RAM overhead.\n"
" '-P' - Enable Adaptive Delta Encoding. This implies '-L' as well. It improves\n"
" compresion ratio further at the cost of more CPU overhead.\n"
" '-S' <cksum>\n"
" - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512, SHA256 and\n"
" SHA512. Default one is SKEIN256.\n"
@ -188,7 +191,7 @@ show_compression_stats(uint64_t chunksize)
*/
int
preproc_compress(compress_func_ptr cmp_func, void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data)
size_t *dstlen, int level, uchar_t chdr, void *data, algo_props_t *props)
{
uchar_t *dest = (uchar_t *)dst, type = 0;
ssize_t result, _dstlen;
@ -210,11 +213,14 @@ preproc_compress(compress_func_ptr cmp_func, void *src, size_t srclen, void *dst
return (-1);
}
_dstlen = srclen;
result = delta2_encode(src, srclen, dst, &_dstlen, 150);
if (result != -1) {
memcpy(src, dst, _dstlen);
srclen = _dstlen;
if (enable_delta2_encode && props->delta2_stride > 0) {
_dstlen = srclen;
result = delta2_encode(src, srclen, dst, &_dstlen, props->delta2_stride);
if (result != -1) {
memcpy(src, dst, _dstlen);
srclen = _dstlen;
type |= PREPROC_TYPE_DELTA2;
}
}
*dest = type;
@ -225,15 +231,17 @@ preproc_compress(compress_func_ptr cmp_func, void *src, size_t srclen, void *dst
*dest |= PREPROC_COMPRESSED;
*dstlen = _dstlen + 9;
} else {
result = -1;
memcpy(dest+1, src, srclen);
*dstlen = srclen + 1;
result = 0;
}
result = 0;
return (result);
}
int
preproc_decompress(compress_func_ptr dec_func, void *src, size_t srclen, void *dst,
size_t *dstlen, int level, uchar_t chdr, void *data)
size_t *dstlen, int level, uchar_t chdr, void *data, algo_props_t *props)
{
uchar_t *sorc = (uchar_t *)src, type;
ssize_t result;
@ -252,12 +260,14 @@ preproc_decompress(compress_func_ptr dec_func, void *src, size_t srclen, void *d
srclen = *dstlen;
}
result = delta2_decode(src, srclen, dst, &_dstlen);
if (result != -1) {
memcpy(src, dst, _dstlen);
srclen = _dstlen;
} else {
return (result);
if (type & PREPROC_TYPE_DELTA2) {
result = delta2_decode(src, srclen, dst, &_dstlen);
if (result != -1) {
memcpy(src, dst, _dstlen);
srclen = _dstlen;
} else {
return (result);
}
}
if (type & PREPROC_TYPE_LZP) {
@ -423,7 +433,7 @@ redo:
if (HDR & COMPRESSED) {
if (HDR & CHUNK_FLAG_PREPROC) {
rv = preproc_decompress(tdat->decompress, cmpbuf, dedupe_data_sz_cmp,
ubuf, &_chunksize, tdat->level, HDR, tdat->data);
ubuf, &_chunksize, tdat->level, HDR, tdat->data, tdat->props);
} else {
rv = tdat->decompress(cmpbuf, dedupe_data_sz_cmp, ubuf, &_chunksize,
tdat->level, HDR, tdat->data);
@ -452,7 +462,8 @@ redo:
if (HDR & COMPRESSED) {
if (HDR & CHUNK_FLAG_PREPROC) {
rv = preproc_decompress(tdat->decompress, cseg, tdat->len_cmp,
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data);
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data,
tdat->props);
} else {
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
&_chunksize, tdat->level, HDR, tdat->data);
@ -875,6 +886,7 @@ start_decompress(const char *filename, const char *to_filename)
tdat->cancel = 0;
tdat->level = level;
tdat->data = NULL;
tdat->props = &props;
sem_init(&(tdat->start_sem), 0, 0);
sem_init(&(tdat->cmp_done_sem), 0, 0);
sem_init(&(tdat->write_done_sem), 0, 1);
@ -1155,7 +1167,7 @@ redo:
rv = preproc_compress(tdat->compress,
tdat->uncompressed_chunk + dedupe_index_sz,
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
tdat->level, 0, tdat->data);
tdat->level, 0, tdat->data, tdat->props);
} else {
rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz,
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
@ -1185,7 +1197,8 @@ plain_compress:
if (lzp_preprocess) {
rv = preproc_compress(tdat->compress,
tdat->uncompressed_chunk, tdat->rbytes,
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data,
tdat->props);
} else {
rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes,
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
@ -1575,6 +1588,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
tdat->cancel = 0;
tdat->level = level;
tdat->data = NULL;
tdat->props = &props;
sem_init(&(tdat->start_sem), 0, 0);
sem_init(&(tdat->cmp_done_sem), 0, 0);
sem_init(&(tdat->write_done_sem), 0, 1);
@ -1927,6 +1941,7 @@ init_algo(const char *algo, int bail)
_init_func = zlib_init;
_deinit_func = zlib_deinit;
_stats_func = zlib_stats;
_props_func = zlib_props;
rv = 0;
} else if (memcmp(algorithm, "lzmaMt", 6) == 0) {
@ -1953,6 +1968,7 @@ init_algo(const char *algo, int bail)
_init_func = bzip2_init;
_deinit_func = NULL;
_stats_func = bzip2_stats;
_props_func = bzip2_props;
rv = 0;
} else if (memcmp(algorithm, "ppmd", 4) == 0) {
@ -1961,6 +1977,7 @@ init_algo(const char *algo, int bail)
_init_func = ppmd_init;
_deinit_func = ppmd_deinit;
_stats_func = ppmd_stats;
_props_func = ppmd_props;
rv = 0;
} else if (memcmp(algorithm, "lzfx", 4) == 0) {
@ -1969,6 +1986,7 @@ init_algo(const char *algo, int bail)
_init_func = lz_fx_init;
_deinit_func = lz_fx_deinit;
_stats_func = lz_fx_stats;
_props_func = lz_fx_props;
rv = 0;
} else if (memcmp(algorithm, "lz4", 3) == 0) {
@ -1995,6 +2013,7 @@ init_algo(const char *algo, int bail)
_init_func = adapt2_init;
_deinit_func = adapt_deinit;
_stats_func = adapt_stats;
_props_func = adapt_props;
adapt_mode = 1;
rv = 0;
@ -2004,6 +2023,7 @@ init_algo(const char *algo, int bail)
_init_func = adapt_init;
_deinit_func = adapt_deinit;
_stats_func = adapt_stats;
_props_func = adapt_props;
adapt_mode = 1;
rv = 0;
#ifdef ENABLE_PC_LIBBSC
@ -2034,7 +2054,7 @@ main(int argc, char *argv[])
level = 6;
slab_init();
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDEew:rLS:B:F")) != -1) {
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDEew:rLPS:B:F")) != -1) {
int ovr;
switch (opt) {
@ -2124,6 +2144,11 @@ main(int argc, char *argv[])
lzp_preprocess = 1;
break;
case 'P':
lzp_preprocess = 1;
enable_delta2_encode = 1;
break;
case 'r':
enable_rabin_split = 0;
break;

View file

@ -56,6 +56,7 @@ extern "C" {
#define COMP_EXTN ".pz"
#define PREPROC_TYPE_LZP 1
#define PREPROC_TYPE_DELTA2 2
#define PREPROC_COMPRESSED 128
/*
@ -135,6 +136,11 @@ extern int none_init(void **data, int *level, int nthreads, ssize_t chunksize,
extern void lzma_props(algo_props_t *data, int level, ssize_t chunksize);
extern void lzma_mt_props(algo_props_t *data, int level, ssize_t chunksize);
extern void lz4_props(algo_props_t *data, int level, ssize_t chunksize);
extern void zlib_props(algo_props_t *data, int level, ssize_t chunksize);
extern void ppmd_props(algo_props_t *data, int level, ssize_t chunksize);
extern void lz_fx_props(algo_props_t *data, int level, ssize_t chunksize);
extern void bzip2_props(algo_props_t *data, int level, ssize_t chunksize);
extern void adapt_props(algo_props_t *data, int level, ssize_t chunksize);
extern int zlib_deinit(void **data);
extern int adapt_deinit(void **data);
@ -188,6 +194,7 @@ struct cmp_data {
void *data;
pthread_t thr;
mac_ctx_t chunk_hmac;
algo_props_t *props;
};
#ifdef __cplusplus

View file

@ -61,6 +61,11 @@ ppmd_stats(int show)
{
}
void
ppmd_props(algo_props_t *data, int level, ssize_t chunksize) {
data->delta2_stride = 100;
}
int
ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize,
int file_version, compress_op_t op)

View file

@ -111,6 +111,7 @@ typedef struct {
int nthreads;
int c_max_threads;
int d_max_threads;
int delta2_stride;
} algo_props_t;
typedef enum {
@ -205,6 +206,7 @@ init_algo_props(algo_props_t *props)
props->nthreads = 1;
props->c_max_threads = 1;
props->d_max_threads = 1;
props->delta2_stride = 0;
}
#ifdef __cplusplus

View file

@ -89,6 +89,11 @@ zlib_stats(int show)
{
}
void
zlib_props(algo_props_t *data, int level, ssize_t chunksize) {
data->delta2_stride = 50;
}
int
zlib_deinit(void **data)
{