diff --git a/adaptive_compress.c b/adaptive_compress.c index abf7828..608e4e9 100644 --- a/adaptive_compress.c +++ b/adaptive_compress.c @@ -45,6 +45,7 @@ static unsigned int lzma_count = 0; static unsigned int bzip2_count = 0; static unsigned int bsc_count = 0; static unsigned int ppmd_count = 0; +static unsigned int lz4_count = 0; extern int lzma_compress(void *src, uint64_t srclen, void *dst, uint64_t *destlen, int level, uchar_t chdr, int btype, void *data); @@ -54,6 +55,8 @@ extern int ppmd_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data); extern int libbsc_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data); +extern int lz4_compress(void *src, uint64_t srclen, void *dst, + uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data); extern int lzma_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data); @@ -63,6 +66,8 @@ extern int ppmd_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data); extern int libbsc_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data); +extern int lz4_decompress(void *src, uint64_t srclen, void *dst, + uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data); extern int lzma_init(void **data, int *level, int nthreads, uint64_t chunksize, int file_version, compress_op_t op); @@ -73,11 +78,15 @@ extern int ppmd_deinit(void **data); extern int libbsc_init(void **data, int *level, int nthreads, uint64_t chunksize, int file_version, compress_op_t op); extern int libbsc_deinit(void **data); +extern int lz4_init(void **data, int *level, int nthreads, uint64_t chunksize, + int file_version, compress_op_t op); +extern int lz4_deinit(void **data); struct adapt_data { void *lzma_data; void *ppmd_data; void *bsc_data; + void *lz4_data; int adapt_mode; }; @@ -86,11 +95,12 @@ adapt_stats(int show) { if (show) { if (bzip2_count > 0 || bsc_count > 0 || ppmd_count > 0 || lzma_count > 0) { - log_msg(LOG_INFO, 0, "Adaptive mode stats:\n"); - log_msg(LOG_INFO, 0, " BZIP2 chunk count: %u\n", bzip2_count); - log_msg(LOG_INFO, 0, " LIBBSC chunk count: %u\n", bsc_count); - log_msg(LOG_INFO, 0, " PPMd chunk count: %u\n", ppmd_count); - log_msg(LOG_INFO, 0, " LZMA chunk count: %u\n\n", lzma_count); + log_msg(LOG_INFO, 0, "Adaptive mode stats:"); + log_msg(LOG_INFO, 0, " BZIP2 chunk count: %u", bzip2_count); + log_msg(LOG_INFO, 0, " LIBBSC chunk count: %u", bsc_count); + log_msg(LOG_INFO, 0, " PPMd chunk count: %u", ppmd_count); + log_msg(LOG_INFO, 0, " LZMA chunk count: %u", lzma_count); + log_msg(LOG_INFO, 0, " LZ4 chunk count: %u", lz4_count); } else { log_msg(LOG_INFO, 0, "\n"); } @@ -99,6 +109,7 @@ adapt_stats(int show) bzip2_count = 0; bsc_count = 0; ppmd_count = 0; + lz4_count = 0; } void @@ -119,6 +130,8 @@ adapt_init(void **data, int *level, int nthreads, uint64_t chunksize, adat = (struct adapt_data *)slab_alloc(NULL, sizeof (struct adapt_data)); adat->adapt_mode = 1; rv = ppmd_init(&(adat->ppmd_data), level, nthreads, chunksize, file_version, op); + if (rv == 0) + rv = lz4_init(&(adat->lz4_data), level, nthreads, chunksize, file_version, op); adat->lzma_data = NULL; adat->bsc_data = NULL; *data = adat; @@ -128,6 +141,7 @@ adapt_init(void **data, int *level, int nthreads, uint64_t chunksize, bzip2_count = 0; ppmd_count = 0; bsc_count = 0; + lz4_count = 0; return (rv); } @@ -153,6 +167,8 @@ adapt2_init(void **data, int *level, int nthreads, uint64_t chunksize, if (rv == 0) rv = libbsc_init(&(adat->bsc_data), &lv, nthreads, chunksize, file_version, op); #endif + if (rv == 0) + rv = lz4_init(&(adat->lz4_data), level, nthreads, chunksize, file_version, op); *data = adat; if (*level > 9) *level = 9; } @@ -160,6 +176,7 @@ adapt2_init(void **data, int *level, int nthreads, uint64_t chunksize, bzip2_count = 0; ppmd_count = 0; bsc_count = 0; + lz4_count = 0; return (rv); } @@ -173,6 +190,8 @@ adapt_deinit(void **data) rv = ppmd_deinit(&(adat->ppmd_data)); if (adat->lzma_data) rv += lzma_deinit(&(adat->lzma_data)); + if (adat->lz4_data) + rv += lz4_deinit(&(adat->lz4_data)); slab_free(NULL, adat); *data = NULL; } @@ -227,9 +246,18 @@ adapt_compress(void *src, uint64_t srclen, void *dst, /* * Use PPMd if some percentage of source is 7-bit textual bytes, otherwise - * use Bzip2 or LZMA. + * use Bzip2 or LZMA. For totally incompressible data we always use LZ4. There + * is no point trying to compress such data, like Jpegs. However some archive headers + * and zero paddings can exist which LZ4 can easily take care of very fast. */ - if (adat->adapt_mode == 2 && (PC_TYPE(btype) == TYPE_BINARY)) { + if (is_incompressible(btype)) { + rv = lz4_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lz4_data); + if (rv < 0) + return (rv); + rv = ADAPT_COMPRESS_LZ4; + lz4_count++; + + } else if (adat->adapt_mode == 2 && (PC_TYPE(btype) == TYPE_BINARY)) { rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data); if (rv < 0) return (rv); @@ -273,9 +301,12 @@ adapt_decompress(void *src, uint64_t srclen, void *dst, struct adapt_data *adat = (struct adapt_data *)(data); uchar_t cmp_flags; - cmp_flags = (chdr>>4) & CHDR_ALGO_MASK; + cmp_flags = CHDR_ALGO(chdr); - if (cmp_flags == ADAPT_COMPRESS_LZMA) { + if (cmp_flags == ADAPT_COMPRESS_LZ4) { + return (lz4_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->lz4_data)); + + } else if (cmp_flags == ADAPT_COMPRESS_LZMA) { return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data)); } else if (cmp_flags == ADAPT_COMPRESS_BZIP2) { diff --git a/archive/pc_arc_filter.c b/archive/pc_arc_filter.c index a3248f3..2d98679 100644 --- a/archive/pc_arc_filter.c +++ b/archive/pc_arc_filter.c @@ -220,6 +220,7 @@ packjpg_filter(struct filter_info *fi, void *filter_private) if ((len = packjpg_filter_process(mapbuf, len, &out)) == 0) { return (FILTER_RETURN_SKIP); } + in_size = LE64(len); rv = archive_write_data(fi->target_arc, &in_size, 8); if (rv != 8) diff --git a/bsdiff/rle_encoder.c b/bsdiff/rle_encoder.c index cfd7a3e..8773fd5 100644 --- a/bsdiff/rle_encoder.c +++ b/bsdiff/rle_encoder.c @@ -138,4 +138,3 @@ zero_rle_decode(const void* ibuf, unsigned int ilen, } } - diff --git a/compressed_file_format.txt b/compressed_file_format.txt index 5b1bfbf..9e81023 100644 --- a/compressed_file_format.txt +++ b/compressed_file_format.txt @@ -66,9 +66,11 @@ X Bytes - Chunk Header CRC32 for normal compression | | | `---- 1 - Chunk was Deduped | | `------- 1 - Chunk was pre-compressed | | - | | 1 - Bzip2 (Adaptive Mode) - | `---------------- 2 - Lzma (Adaptive Mode) - | 3 - PPMD (Adaptive Mode) + | | 1 - Lzma (Adaptive Mode) + | | 2 - Bzip2 (Adaptive Mode) + | `---------------- 3 - PPMD (Adaptive Mode) + | 4 - Libbsc (Adaptive Mode) + | 5 - LZ4 (Adaptive Mode) | `---------------------- 1 - Chunk size flag (if original chunk is of variable length) diff --git a/pcompress.c b/pcompress.c index aba558c..d160a06 100644 --- a/pcompress.c +++ b/pcompress.c @@ -1544,23 +1544,23 @@ plain_index: o_chunksize = _chunksize; /* Compress data chunk. */ - if ((pctx->lzp_preprocess || pctx->enable_delta2_encode) && _chunksize > 0) { - rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz, - _chunksize, compressed_chunk + index_size_cmp, &_chunksize, - tdat->level, 0, pctx->btype, tdat->data, tdat->props); - - } else if (_chunksize > 0) { + if (_chunksize == 0) { + rv = -1; + } else if ((pctx->lzp_preprocess || pctx->enable_delta2_encode)) { + rv = preproc_compress(pctx, tdat->compress, + tdat->uncompressed_chunk + dedupe_index_sz, _chunksize, + compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, + tdat->btype, tdat->data, tdat->props); + } else { DEBUG_STAT_EN(double strt, en); DEBUG_STAT_EN(strt = get_wtime_millis()); - rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz, _chunksize, - compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, pctx->btype, - tdat->data); + rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz, + _chunksize, compressed_chunk + index_size_cmp, &_chunksize, + tdat->level, 0, tdat->btype, tdat->data); DEBUG_STAT_EN(en = get_wtime_millis()); DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n", get_mb_s(_chunksize, strt, en))); - } else { - rv = -1; } /* Can't compress data just retain as-is. */ @@ -1576,16 +1576,16 @@ plain_index: } else { _chunksize = tdat->rbytes; if (pctx->lzp_preprocess || pctx->enable_delta2_encode) { - rv = preproc_compress(pctx, tdat->compress, - tdat->uncompressed_chunk, tdat->rbytes, - compressed_chunk, &_chunksize, tdat->level, 0, pctx->btype, tdat->data, - tdat->props); + rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk, + tdat->rbytes, compressed_chunk, &_chunksize, tdat->level, 0, + tdat->btype, tdat->data, tdat->props); } else { DEBUG_STAT_EN(double strt, en); DEBUG_STAT_EN(strt = get_wtime_millis()); rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes, - compressed_chunk, &_chunksize, tdat->level, 0, pctx->btype, tdat->data); + compressed_chunk, &_chunksize, tdat->level, 0, tdat->btype, + tdat->data); DEBUG_STAT_EN(en = get_wtime_millis()); DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n", get_mb_s(_chunksize, strt, en))); @@ -3123,7 +3123,6 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) if (pctx->do_compress) { struct stat sbuf; - struct filter_flags ff; if (pctx->filename && stat(pctx->filename, &sbuf) == -1) { log_msg(LOG_ERR, 1, "Cannot stat: %s", pctx->filename); @@ -3133,17 +3132,22 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) /* * Selectively enable filters while compressing. */ - ff.enable_packjpg = 0; - if (pctx->level > 9) ff.enable_packjpg = 1; - init_filters(&ff); + if (pctx->archive_mode) { + struct filter_flags ff; + + ff.enable_packjpg = 0; + if (pctx->level > 9) ff.enable_packjpg = 1; + init_filters(&ff); + pctx->enable_packjpg = ff.enable_packjpg; + } } else if (pctx->do_uncompress) { struct filter_flags ff; - /* * Enable all filters while decompressing. Obviously! */ ff.enable_packjpg = 1; + pctx->enable_packjpg = 1; init_filters(&ff); } pctx->inited = 1; diff --git a/pcompress.h b/pcompress.h index 6cbfb0c..d9ea44b 100644 --- a/pcompress.h +++ b/pcompress.h @@ -78,7 +78,14 @@ extern "C" { #define ADAPT_COMPRESS_BZIP2 2 #define ADAPT_COMPRESS_PPMD 3 #define ADAPT_COMPRESS_BSC 4 +/* + * This is used in adaptive modes in cases where the data is deemed totally incompressible. + * We can still have zero padding and archive headers that can be compressed. So we use the + * fastest algo at our disposal for these cases. + */ +#define ADAPT_COMPRESS_LZ4 5 #define CHDR_ALGO_MASK 7 +#define CHDR_ALGO(x) (((x)>>4) & CHDR_ALGO_MASK) extern uint32_t zlib_buf_extra(uint64_t buflen); extern int lz4_buf_extra(uint64_t buflen); @@ -226,6 +233,7 @@ typedef struct pc_ctx { int arc_closed, arc_writing; uchar_t btype, ctype; int min_chunk; + int enable_packjpg; unsigned int chunk_num; uint64_t largest_chunk, smallest_chunk, avg_chunk; diff --git a/utils/utils.c b/utils/utils.c index a73de5c..6f3a591 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -565,3 +565,13 @@ rm_fname(char *fn) } pthread_mutex_unlock(&f_mutex); } + +int +is_incompressible(int type) +{ + int ic = 0; + int st = PC_SUBTYPE(type); + + ic = (st == TYPE_JPEG) | (st == TYPE_PACKJPG); + return (ic); +} diff --git a/utils/utils.h b/utils/utils.h index 1b01f92..a4a68ff 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -356,6 +356,11 @@ void handle_signals(); void add_fname(char *fn); void rm_fname(char *fn); +/* + * Some types (like Jpeg) are totally incompressible. + */ +int is_incompressible(int type); + /* * Roundup v to the nearest power of 2. From Bit Twiddling Hacks: * http://graphics.stanford.edu/~seander/bithacks.html