Add matrix transpose to Delta2 encoding.
Change confusing structure member name.
This commit is contained in:
parent
375ebefa0d
commit
b0f41c2888
11 changed files with 75 additions and 30 deletions
|
@ -98,7 +98,7 @@ adapt_stats(int show)
|
|||
void
|
||||
adapt_props(algo_props_t *data, int level, int64_t chunksize)
|
||||
{
|
||||
data->delta2_stride = 200;
|
||||
data->delta2_span = 200;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -50,7 +50,7 @@ bzip2_stats(int show)
|
|||
|
||||
void
|
||||
bzip2_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||
data->delta2_stride = 200;
|
||||
data->delta2_span = 200;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -30,18 +30,23 @@
|
|||
* Bytes are packed into integers in big-endian format.
|
||||
*
|
||||
* After an optimal stride length has been identified the encoder
|
||||
* performs a delta run length encoding on the spans. Two types of
|
||||
* performs a delta run length encoding on the spans. Three types of
|
||||
* objects are output by the encoder:
|
||||
* 1) A literal run of unmodified bytes. Header: 1 zero byte followed
|
||||
* by a 64bit length in bytes.
|
||||
* 2) An encoded run length of a series in arithmetic progression.
|
||||
* Header: 1 byte stride length
|
||||
* 2) A literal run of transposed bytes containing at least 87% below
|
||||
* threshold sequences.
|
||||
* Header: 1 byte stride length with high bit set.
|
||||
* 64bit length of span in bytes.
|
||||
* 3) An encoded run length of a series in arithmetic progression.
|
||||
* Header: 1 byte stride length (must be less than 128)
|
||||
* 64bit length of span in bytes
|
||||
* 64bit starting value of series
|
||||
* 64bit delta value
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <utils.h>
|
||||
#include <transpose.h>
|
||||
#include "delta2.h"
|
||||
|
||||
// Size of original data. 64 bits.
|
||||
|
@ -59,6 +64,9 @@
|
|||
// 64bit delta value
|
||||
#define DELTA_HDR (1 + (sizeof (uint64_t)) * 3)
|
||||
|
||||
// Minimum span length
|
||||
#define MIN_THRESH (50)
|
||||
|
||||
int
|
||||
delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int rle_thresh)
|
||||
{
|
||||
|
@ -68,7 +76,10 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
|||
uchar_t *pos, *pos1, *pos2, stride, st1;
|
||||
uchar_t strides[4] = {3, 5, 7, 8};
|
||||
int st, sz;
|
||||
DEBUG_STAT_EN(uint32_t num_trans);
|
||||
|
||||
if (rle_thresh < MIN_THRESH)
|
||||
return (-1);
|
||||
gtot1 = ULL_MAX;
|
||||
stride = 0;
|
||||
sz = sizeof (strides) / sizeof (strides[0]);
|
||||
|
@ -107,13 +118,11 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
|||
vl1 = vl2;
|
||||
pos += st1;
|
||||
}
|
||||
if (snum > 1) {
|
||||
if (snum > rle_thresh) {
|
||||
gtot2 += DELTA_HDR;
|
||||
} else {
|
||||
gtot2 += snum;
|
||||
}
|
||||
}
|
||||
if (gtot2 < gtot1) {
|
||||
gtot1 = gtot2;
|
||||
stride = st1;
|
||||
|
@ -139,6 +148,8 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
|||
pos1 += MAIN_HDR;
|
||||
pos2 = pos1;
|
||||
pos1 += LIT_HDR;
|
||||
gtot2 = 0;
|
||||
DEBUG_STAT_EN(num_trans = 0);
|
||||
|
||||
vl2 = *((uint64_t *)pos);
|
||||
vl2 = htonll(vl2);
|
||||
|
@ -154,13 +165,27 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
|||
if (snum > rle_thresh) {
|
||||
if (gtot1 > 0) {
|
||||
/*
|
||||
* Encode previous literal run, if any.
|
||||
* Encode previous literal run, if any. If the literal run
|
||||
* has enough large sequences just below threshold, do a
|
||||
* matrix transpose on the range in the hope of achieving
|
||||
* a better compression ratio.
|
||||
*/
|
||||
if (gtot2 >= ((gtot1 >> 1) + (gtot1 >> 2) + (gtot1 >> 3))) {
|
||||
*pos2 = stride | 128;
|
||||
pos2++;
|
||||
*((uint64_t *)pos2) = htonll(gtot1);
|
||||
pos2 += sizeof (uint64_t);
|
||||
transpose(pos - (gtot1+snum), pos2, gtot1, stride, ROW);
|
||||
DEBUG_STAT_EN(num_trans++);
|
||||
} else {
|
||||
*pos2 = 0;
|
||||
pos2++;
|
||||
*((uint64_t *)pos2) = htonll(gtot1);
|
||||
pos2 += (gtot1 + sizeof (uint64_t));
|
||||
pos2 += sizeof (uint64_t);
|
||||
}
|
||||
pos2 += gtot1;
|
||||
gtot1 = 0;
|
||||
gtot2 = 0;
|
||||
}
|
||||
/*
|
||||
* RLE Encode delta series.
|
||||
|
@ -176,6 +201,8 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
|||
pos1 = pos2 + LIT_HDR;
|
||||
} else {
|
||||
gtot1 += snum;
|
||||
if (snum >= 50)
|
||||
gtot2 += snum;
|
||||
}
|
||||
snum = 0;
|
||||
sval = vl2;
|
||||
|
@ -230,6 +257,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
|||
}
|
||||
}
|
||||
*dstlen = pos2 - dst;
|
||||
DEBUG_STAT_EN(printf("%u transpositions\n", num_trans));
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -262,10 +290,27 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
|||
if (out + rcnt > *dstlen) {
|
||||
return (-1);
|
||||
}
|
||||
for (cnt = 0; cnt < rcnt; cnt++) {
|
||||
*pos1 = *pos;
|
||||
pos++; pos1++; out++;
|
||||
memcpy(pos1, pos, rcnt);
|
||||
pos += rcnt;
|
||||
pos1 += rcnt;
|
||||
out += rcnt;
|
||||
|
||||
} else if (*pos & 128) {
|
||||
int stride;
|
||||
/*
|
||||
* Copy over literal run of transposed bytes and inverse-transpose.
|
||||
*/
|
||||
stride = (*pos & 127);
|
||||
pos++;
|
||||
rcnt = ntohll(*((uint64_t *)pos));
|
||||
pos += sizeof (rcnt);
|
||||
if (out + rcnt > *dstlen) {
|
||||
return (-1);
|
||||
}
|
||||
transpose(pos, pos1, rcnt, stride, COL);
|
||||
pos += rcnt;
|
||||
pos1 += rcnt;
|
||||
out += rcnt;
|
||||
} else {
|
||||
stride = *pos;
|
||||
pos++;
|
||||
|
|
|
@ -79,7 +79,7 @@ libbsc_props(algo_props_t *data, int level, int64_t chunksize) {
|
|||
data->buf_extra = 0;
|
||||
data->c_max_threads = 8;
|
||||
data->d_max_threads = 8;
|
||||
data->delta2_stride = 150;
|
||||
data->delta2_span = 150;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -56,7 +56,7 @@ lz4_props(algo_props_t *data, int level, int64_t chunksize) {
|
|||
data->compress_mt_capable = 0;
|
||||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = lz4_buf_extra(chunksize);
|
||||
data->delta2_stride = 50;
|
||||
data->delta2_span = 50;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -41,7 +41,7 @@ lz_fx_stats(int show)
|
|||
|
||||
void
|
||||
lz_fx_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||
data->delta2_stride = 50;
|
||||
data->delta2_span = 50;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -52,7 +52,7 @@ lzma_mt_props(algo_props_t *data, int level, int64_t chunksize) {
|
|||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = 0;
|
||||
data->c_max_threads = 2;
|
||||
data->delta2_stride = 150;
|
||||
data->delta2_span = 150;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -60,7 +60,7 @@ lzma_props(algo_props_t *data, int level, int64_t chunksize) {
|
|||
data->compress_mt_capable = 0;
|
||||
data->decompress_mt_capable = 0;
|
||||
data->buf_extra = 0;
|
||||
data->delta2_stride = 150;
|
||||
data->delta2_span = 150;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
4
main.c
4
main.c
|
@ -215,9 +215,9 @@ preproc_compress(compress_func_ptr cmp_func, void *src, uint64_t srclen, void *d
|
|||
return (-1);
|
||||
}
|
||||
|
||||
if (enable_delta2_encode && props->delta2_stride > 0) {
|
||||
if (enable_delta2_encode && props->delta2_span > 0) {
|
||||
_dstlen = srclen;
|
||||
result = delta2_encode(src, srclen, dst, &_dstlen, props->delta2_stride);
|
||||
result = delta2_encode(src, srclen, dst, &_dstlen, props->delta2_span);
|
||||
if (result != -1) {
|
||||
memcpy(src, dst, _dstlen);
|
||||
srclen = _dstlen;
|
||||
|
|
|
@ -63,7 +63,7 @@ ppmd_stats(int show)
|
|||
|
||||
void
|
||||
ppmd_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||
data->delta2_stride = 100;
|
||||
data->delta2_span = 100;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -111,7 +111,7 @@ typedef struct {
|
|||
int nthreads;
|
||||
int c_max_threads;
|
||||
int d_max_threads;
|
||||
int delta2_stride;
|
||||
int delta2_span;
|
||||
} algo_props_t;
|
||||
|
||||
typedef enum {
|
||||
|
@ -206,7 +206,7 @@ init_algo_props(algo_props_t *props)
|
|||
props->nthreads = 1;
|
||||
props->c_max_threads = 1;
|
||||
props->d_max_threads = 1;
|
||||
props->delta2_stride = 0;
|
||||
props->delta2_span = 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -91,7 +91,7 @@ zlib_stats(int show)
|
|||
|
||||
void
|
||||
zlib_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||
data->delta2_stride = 50;
|
||||
data->delta2_span = 50;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
Loading…
Reference in a new issue