diff --git a/adaptive_compress.c b/adaptive_compress.c index 13991a2..ce63f2b 100644 --- a/adaptive_compress.c +++ b/adaptive_compress.c @@ -98,7 +98,7 @@ adapt_stats(int show) void adapt_props(algo_props_t *data, int level, int64_t chunksize) { - data->delta2_stride = 200; + data->delta2_span = 200; } int diff --git a/bzip2_compress.c b/bzip2_compress.c index a3896a7..1819df0 100644 --- a/bzip2_compress.c +++ b/bzip2_compress.c @@ -50,7 +50,7 @@ bzip2_stats(int show) void bzip2_props(algo_props_t *data, int level, int64_t chunksize) { - data->delta2_stride = 200; + data->delta2_span = 200; } int diff --git a/delta2/delta2.c b/delta2/delta2.c index c125d30..e7302e4 100644 --- a/delta2/delta2.c +++ b/delta2/delta2.c @@ -30,18 +30,23 @@ * Bytes are packed into integers in big-endian format. * * After an optimal stride length has been identified the encoder - * performs a delta run length encoding on the spans. Two types of + * performs a delta run length encoding on the spans. Three types of * objects are output by the encoder: * 1) A literal run of unmodified bytes. Header: 1 zero byte followed * by a 64bit length in bytes. - * 2) An encoded run length of a series in arithmetic progression. - * Header: 1 byte stride length + * 2) A literal run of transposed bytes containing at least 87% below + * threshold sequences. + * Header: 1 byte stride length with high bit set. + * 64bit length of span in bytes. + * 3) An encoded run length of a series in arithmetic progression. + * Header: 1 byte stride length (must be less than 128) * 64bit length of span in bytes * 64bit starting value of series * 64bit delta value */ #include #include +#include #include "delta2.h" // Size of original data. 64 bits. @@ -59,6 +64,9 @@ // 64bit delta value #define DELTA_HDR (1 + (sizeof (uint64_t)) * 3) +// Minimum span length +#define MIN_THRESH (50) + int delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int rle_thresh) { @@ -68,7 +76,10 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int uchar_t *pos, *pos1, *pos2, stride, st1; uchar_t strides[4] = {3, 5, 7, 8}; int st, sz; + DEBUG_STAT_EN(uint32_t num_trans); + if (rle_thresh < MIN_THRESH) + return (-1); gtot1 = ULL_MAX; stride = 0; sz = sizeof (strides) / sizeof (strides[0]); @@ -107,12 +118,10 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int vl1 = vl2; pos += st1; } - if (snum > 1) { - if (snum > rle_thresh) { - gtot2 += DELTA_HDR; - } else { - gtot2 += snum; - } + if (snum > rle_thresh) { + gtot2 += DELTA_HDR; + } else { + gtot2 += snum; } if (gtot2 < gtot1) { gtot1 = gtot2; @@ -139,6 +148,8 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int pos1 += MAIN_HDR; pos2 = pos1; pos1 += LIT_HDR; + gtot2 = 0; + DEBUG_STAT_EN(num_trans = 0); vl2 = *((uint64_t *)pos); vl2 = htonll(vl2); @@ -154,13 +165,27 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int if (snum > rle_thresh) { if (gtot1 > 0) { /* - * Encode previous literal run, if any. + * Encode previous literal run, if any. If the literal run + * has enough large sequences just below threshold, do a + * matrix transpose on the range in the hope of achieving + * a better compression ratio. */ - *pos2 = 0; - pos2++; - *((uint64_t *)pos2) = htonll(gtot1); - pos2 += (gtot1 + sizeof (uint64_t)); + if (gtot2 >= ((gtot1 >> 1) + (gtot1 >> 2) + (gtot1 >> 3))) { + *pos2 = stride | 128; + pos2++; + *((uint64_t *)pos2) = htonll(gtot1); + pos2 += sizeof (uint64_t); + transpose(pos - (gtot1+snum), pos2, gtot1, stride, ROW); + DEBUG_STAT_EN(num_trans++); + } else { + *pos2 = 0; + pos2++; + *((uint64_t *)pos2) = htonll(gtot1); + pos2 += sizeof (uint64_t); + } + pos2 += gtot1; gtot1 = 0; + gtot2 = 0; } /* * RLE Encode delta series. @@ -176,6 +201,8 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int pos1 = pos2 + LIT_HDR; } else { gtot1 += snum; + if (snum >= 50) + gtot2 += snum; } snum = 0; sval = vl2; @@ -230,6 +257,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int } } *dstlen = pos2 - dst; + DEBUG_STAT_EN(printf("%u transpositions\n", num_trans)); return (0); } @@ -262,10 +290,27 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen) if (out + rcnt > *dstlen) { return (-1); } - for (cnt = 0; cnt < rcnt; cnt++) { - *pos1 = *pos; - pos++; pos1++; out++; + memcpy(pos1, pos, rcnt); + pos += rcnt; + pos1 += rcnt; + out += rcnt; + + } else if (*pos & 128) { + int stride; + /* + * Copy over literal run of transposed bytes and inverse-transpose. + */ + stride = (*pos & 127); + pos++; + rcnt = ntohll(*((uint64_t *)pos)); + pos += sizeof (rcnt); + if (out + rcnt > *dstlen) { + return (-1); } + transpose(pos, pos1, rcnt, stride, COL); + pos += rcnt; + pos1 += rcnt; + out += rcnt; } else { stride = *pos; pos++; diff --git a/libbsc_compress.c b/libbsc_compress.c index a7620e5..a3b5dfd 100644 --- a/libbsc_compress.c +++ b/libbsc_compress.c @@ -79,7 +79,7 @@ libbsc_props(algo_props_t *data, int level, int64_t chunksize) { data->buf_extra = 0; data->c_max_threads = 8; data->d_max_threads = 8; - data->delta2_stride = 150; + data->delta2_span = 150; } int diff --git a/lz4_compress.c b/lz4_compress.c index b8dfbdf..96c0541 100644 --- a/lz4_compress.c +++ b/lz4_compress.c @@ -56,7 +56,7 @@ lz4_props(algo_props_t *data, int level, int64_t chunksize) { data->compress_mt_capable = 0; data->decompress_mt_capable = 0; data->buf_extra = lz4_buf_extra(chunksize); - data->delta2_stride = 50; + data->delta2_span = 50; } int diff --git a/lzfx_compress.c b/lzfx_compress.c index 1319936..cd874c7 100644 --- a/lzfx_compress.c +++ b/lzfx_compress.c @@ -41,7 +41,7 @@ lz_fx_stats(int show) void lz_fx_props(algo_props_t *data, int level, int64_t chunksize) { - data->delta2_stride = 50; + data->delta2_span = 50; } int diff --git a/lzma_compress.c b/lzma_compress.c index c7f77f9..dd92530 100644 --- a/lzma_compress.c +++ b/lzma_compress.c @@ -52,7 +52,7 @@ lzma_mt_props(algo_props_t *data, int level, int64_t chunksize) { data->decompress_mt_capable = 0; data->buf_extra = 0; data->c_max_threads = 2; - data->delta2_stride = 150; + data->delta2_span = 150; } void @@ -60,7 +60,7 @@ lzma_props(algo_props_t *data, int level, int64_t chunksize) { data->compress_mt_capable = 0; data->decompress_mt_capable = 0; data->buf_extra = 0; - data->delta2_stride = 150; + data->delta2_span = 150; } /* diff --git a/main.c b/main.c index 696a820..4dd3aef 100644 --- a/main.c +++ b/main.c @@ -215,9 +215,9 @@ preproc_compress(compress_func_ptr cmp_func, void *src, uint64_t srclen, void *d return (-1); } - if (enable_delta2_encode && props->delta2_stride > 0) { + if (enable_delta2_encode && props->delta2_span > 0) { _dstlen = srclen; - result = delta2_encode(src, srclen, dst, &_dstlen, props->delta2_stride); + result = delta2_encode(src, srclen, dst, &_dstlen, props->delta2_span); if (result != -1) { memcpy(src, dst, _dstlen); srclen = _dstlen; diff --git a/ppmd_compress.c b/ppmd_compress.c index 4bb50f9..d0fb0c5 100644 --- a/ppmd_compress.c +++ b/ppmd_compress.c @@ -63,7 +63,7 @@ ppmd_stats(int show) void ppmd_props(algo_props_t *data, int level, int64_t chunksize) { - data->delta2_stride = 100; + data->delta2_span = 100; } int diff --git a/utils/utils.h b/utils/utils.h index 81f2c95..f96777f 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -111,7 +111,7 @@ typedef struct { int nthreads; int c_max_threads; int d_max_threads; - int delta2_stride; + int delta2_span; } algo_props_t; typedef enum { @@ -206,7 +206,7 @@ init_algo_props(algo_props_t *props) props->nthreads = 1; props->c_max_threads = 1; props->d_max_threads = 1; - props->delta2_stride = 0; + props->delta2_span = 0; } #ifdef __cplusplus diff --git a/zlib_compress.c b/zlib_compress.c index 5bbb000..a4aa89a 100644 --- a/zlib_compress.c +++ b/zlib_compress.c @@ -91,7 +91,7 @@ zlib_stats(int show) void zlib_props(algo_props_t *data, int level, int64_t chunksize) { - data->delta2_stride = 50; + data->delta2_span = 50; } int