Add matrix transpose to Delta2 encoding.
Change confusing structure member name.
This commit is contained in:
parent
375ebefa0d
commit
b0f41c2888
11 changed files with 75 additions and 30 deletions
|
@ -98,7 +98,7 @@ adapt_stats(int show)
|
||||||
void
|
void
|
||||||
adapt_props(algo_props_t *data, int level, int64_t chunksize)
|
adapt_props(algo_props_t *data, int level, int64_t chunksize)
|
||||||
{
|
{
|
||||||
data->delta2_stride = 200;
|
data->delta2_span = 200;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -50,7 +50,7 @@ bzip2_stats(int show)
|
||||||
|
|
||||||
void
|
void
|
||||||
bzip2_props(algo_props_t *data, int level, int64_t chunksize) {
|
bzip2_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||||
data->delta2_stride = 200;
|
data->delta2_span = 200;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -30,18 +30,23 @@
|
||||||
* Bytes are packed into integers in big-endian format.
|
* Bytes are packed into integers in big-endian format.
|
||||||
*
|
*
|
||||||
* After an optimal stride length has been identified the encoder
|
* After an optimal stride length has been identified the encoder
|
||||||
* performs a delta run length encoding on the spans. Two types of
|
* performs a delta run length encoding on the spans. Three types of
|
||||||
* objects are output by the encoder:
|
* objects are output by the encoder:
|
||||||
* 1) A literal run of unmodified bytes. Header: 1 zero byte followed
|
* 1) A literal run of unmodified bytes. Header: 1 zero byte followed
|
||||||
* by a 64bit length in bytes.
|
* by a 64bit length in bytes.
|
||||||
* 2) An encoded run length of a series in arithmetic progression.
|
* 2) A literal run of transposed bytes containing at least 87% below
|
||||||
* Header: 1 byte stride length
|
* threshold sequences.
|
||||||
|
* Header: 1 byte stride length with high bit set.
|
||||||
|
* 64bit length of span in bytes.
|
||||||
|
* 3) An encoded run length of a series in arithmetic progression.
|
||||||
|
* Header: 1 byte stride length (must be less than 128)
|
||||||
* 64bit length of span in bytes
|
* 64bit length of span in bytes
|
||||||
* 64bit starting value of series
|
* 64bit starting value of series
|
||||||
* 64bit delta value
|
* 64bit delta value
|
||||||
*/
|
*/
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <utils.h>
|
#include <utils.h>
|
||||||
|
#include <transpose.h>
|
||||||
#include "delta2.h"
|
#include "delta2.h"
|
||||||
|
|
||||||
// Size of original data. 64 bits.
|
// Size of original data. 64 bits.
|
||||||
|
@ -59,6 +64,9 @@
|
||||||
// 64bit delta value
|
// 64bit delta value
|
||||||
#define DELTA_HDR (1 + (sizeof (uint64_t)) * 3)
|
#define DELTA_HDR (1 + (sizeof (uint64_t)) * 3)
|
||||||
|
|
||||||
|
// Minimum span length
|
||||||
|
#define MIN_THRESH (50)
|
||||||
|
|
||||||
int
|
int
|
||||||
delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int rle_thresh)
|
delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int rle_thresh)
|
||||||
{
|
{
|
||||||
|
@ -68,7 +76,10 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
uchar_t *pos, *pos1, *pos2, stride, st1;
|
uchar_t *pos, *pos1, *pos2, stride, st1;
|
||||||
uchar_t strides[4] = {3, 5, 7, 8};
|
uchar_t strides[4] = {3, 5, 7, 8};
|
||||||
int st, sz;
|
int st, sz;
|
||||||
|
DEBUG_STAT_EN(uint32_t num_trans);
|
||||||
|
|
||||||
|
if (rle_thresh < MIN_THRESH)
|
||||||
|
return (-1);
|
||||||
gtot1 = ULL_MAX;
|
gtot1 = ULL_MAX;
|
||||||
stride = 0;
|
stride = 0;
|
||||||
sz = sizeof (strides) / sizeof (strides[0]);
|
sz = sizeof (strides) / sizeof (strides[0]);
|
||||||
|
@ -107,12 +118,10 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
vl1 = vl2;
|
vl1 = vl2;
|
||||||
pos += st1;
|
pos += st1;
|
||||||
}
|
}
|
||||||
if (snum > 1) {
|
if (snum > rle_thresh) {
|
||||||
if (snum > rle_thresh) {
|
gtot2 += DELTA_HDR;
|
||||||
gtot2 += DELTA_HDR;
|
} else {
|
||||||
} else {
|
gtot2 += snum;
|
||||||
gtot2 += snum;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (gtot2 < gtot1) {
|
if (gtot2 < gtot1) {
|
||||||
gtot1 = gtot2;
|
gtot1 = gtot2;
|
||||||
|
@ -139,6 +148,8 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
pos1 += MAIN_HDR;
|
pos1 += MAIN_HDR;
|
||||||
pos2 = pos1;
|
pos2 = pos1;
|
||||||
pos1 += LIT_HDR;
|
pos1 += LIT_HDR;
|
||||||
|
gtot2 = 0;
|
||||||
|
DEBUG_STAT_EN(num_trans = 0);
|
||||||
|
|
||||||
vl2 = *((uint64_t *)pos);
|
vl2 = *((uint64_t *)pos);
|
||||||
vl2 = htonll(vl2);
|
vl2 = htonll(vl2);
|
||||||
|
@ -154,13 +165,27 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
if (snum > rle_thresh) {
|
if (snum > rle_thresh) {
|
||||||
if (gtot1 > 0) {
|
if (gtot1 > 0) {
|
||||||
/*
|
/*
|
||||||
* Encode previous literal run, if any.
|
* Encode previous literal run, if any. If the literal run
|
||||||
|
* has enough large sequences just below threshold, do a
|
||||||
|
* matrix transpose on the range in the hope of achieving
|
||||||
|
* a better compression ratio.
|
||||||
*/
|
*/
|
||||||
*pos2 = 0;
|
if (gtot2 >= ((gtot1 >> 1) + (gtot1 >> 2) + (gtot1 >> 3))) {
|
||||||
pos2++;
|
*pos2 = stride | 128;
|
||||||
*((uint64_t *)pos2) = htonll(gtot1);
|
pos2++;
|
||||||
pos2 += (gtot1 + sizeof (uint64_t));
|
*((uint64_t *)pos2) = htonll(gtot1);
|
||||||
|
pos2 += sizeof (uint64_t);
|
||||||
|
transpose(pos - (gtot1+snum), pos2, gtot1, stride, ROW);
|
||||||
|
DEBUG_STAT_EN(num_trans++);
|
||||||
|
} else {
|
||||||
|
*pos2 = 0;
|
||||||
|
pos2++;
|
||||||
|
*((uint64_t *)pos2) = htonll(gtot1);
|
||||||
|
pos2 += sizeof (uint64_t);
|
||||||
|
}
|
||||||
|
pos2 += gtot1;
|
||||||
gtot1 = 0;
|
gtot1 = 0;
|
||||||
|
gtot2 = 0;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* RLE Encode delta series.
|
* RLE Encode delta series.
|
||||||
|
@ -176,6 +201,8 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
pos1 = pos2 + LIT_HDR;
|
pos1 = pos2 + LIT_HDR;
|
||||||
} else {
|
} else {
|
||||||
gtot1 += snum;
|
gtot1 += snum;
|
||||||
|
if (snum >= 50)
|
||||||
|
gtot2 += snum;
|
||||||
}
|
}
|
||||||
snum = 0;
|
snum = 0;
|
||||||
sval = vl2;
|
sval = vl2;
|
||||||
|
@ -230,6 +257,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*dstlen = pos2 - dst;
|
*dstlen = pos2 - dst;
|
||||||
|
DEBUG_STAT_EN(printf("%u transpositions\n", num_trans));
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -262,10 +290,27 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
||||||
if (out + rcnt > *dstlen) {
|
if (out + rcnt > *dstlen) {
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
for (cnt = 0; cnt < rcnt; cnt++) {
|
memcpy(pos1, pos, rcnt);
|
||||||
*pos1 = *pos;
|
pos += rcnt;
|
||||||
pos++; pos1++; out++;
|
pos1 += rcnt;
|
||||||
|
out += rcnt;
|
||||||
|
|
||||||
|
} else if (*pos & 128) {
|
||||||
|
int stride;
|
||||||
|
/*
|
||||||
|
* Copy over literal run of transposed bytes and inverse-transpose.
|
||||||
|
*/
|
||||||
|
stride = (*pos & 127);
|
||||||
|
pos++;
|
||||||
|
rcnt = ntohll(*((uint64_t *)pos));
|
||||||
|
pos += sizeof (rcnt);
|
||||||
|
if (out + rcnt > *dstlen) {
|
||||||
|
return (-1);
|
||||||
}
|
}
|
||||||
|
transpose(pos, pos1, rcnt, stride, COL);
|
||||||
|
pos += rcnt;
|
||||||
|
pos1 += rcnt;
|
||||||
|
out += rcnt;
|
||||||
} else {
|
} else {
|
||||||
stride = *pos;
|
stride = *pos;
|
||||||
pos++;
|
pos++;
|
||||||
|
|
|
@ -79,7 +79,7 @@ libbsc_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||||
data->buf_extra = 0;
|
data->buf_extra = 0;
|
||||||
data->c_max_threads = 8;
|
data->c_max_threads = 8;
|
||||||
data->d_max_threads = 8;
|
data->d_max_threads = 8;
|
||||||
data->delta2_stride = 150;
|
data->delta2_span = 150;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -56,7 +56,7 @@ lz4_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||||
data->compress_mt_capable = 0;
|
data->compress_mt_capable = 0;
|
||||||
data->decompress_mt_capable = 0;
|
data->decompress_mt_capable = 0;
|
||||||
data->buf_extra = lz4_buf_extra(chunksize);
|
data->buf_extra = lz4_buf_extra(chunksize);
|
||||||
data->delta2_stride = 50;
|
data->delta2_span = 50;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -41,7 +41,7 @@ lz_fx_stats(int show)
|
||||||
|
|
||||||
void
|
void
|
||||||
lz_fx_props(algo_props_t *data, int level, int64_t chunksize) {
|
lz_fx_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||||
data->delta2_stride = 50;
|
data->delta2_span = 50;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -52,7 +52,7 @@ lzma_mt_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||||
data->decompress_mt_capable = 0;
|
data->decompress_mt_capable = 0;
|
||||||
data->buf_extra = 0;
|
data->buf_extra = 0;
|
||||||
data->c_max_threads = 2;
|
data->c_max_threads = 2;
|
||||||
data->delta2_stride = 150;
|
data->delta2_span = 150;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -60,7 +60,7 @@ lzma_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||||
data->compress_mt_capable = 0;
|
data->compress_mt_capable = 0;
|
||||||
data->decompress_mt_capable = 0;
|
data->decompress_mt_capable = 0;
|
||||||
data->buf_extra = 0;
|
data->buf_extra = 0;
|
||||||
data->delta2_stride = 150;
|
data->delta2_span = 150;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
4
main.c
4
main.c
|
@ -215,9 +215,9 @@ preproc_compress(compress_func_ptr cmp_func, void *src, uint64_t srclen, void *d
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (enable_delta2_encode && props->delta2_stride > 0) {
|
if (enable_delta2_encode && props->delta2_span > 0) {
|
||||||
_dstlen = srclen;
|
_dstlen = srclen;
|
||||||
result = delta2_encode(src, srclen, dst, &_dstlen, props->delta2_stride);
|
result = delta2_encode(src, srclen, dst, &_dstlen, props->delta2_span);
|
||||||
if (result != -1) {
|
if (result != -1) {
|
||||||
memcpy(src, dst, _dstlen);
|
memcpy(src, dst, _dstlen);
|
||||||
srclen = _dstlen;
|
srclen = _dstlen;
|
||||||
|
|
|
@ -63,7 +63,7 @@ ppmd_stats(int show)
|
||||||
|
|
||||||
void
|
void
|
||||||
ppmd_props(algo_props_t *data, int level, int64_t chunksize) {
|
ppmd_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||||
data->delta2_stride = 100;
|
data->delta2_span = 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -111,7 +111,7 @@ typedef struct {
|
||||||
int nthreads;
|
int nthreads;
|
||||||
int c_max_threads;
|
int c_max_threads;
|
||||||
int d_max_threads;
|
int d_max_threads;
|
||||||
int delta2_stride;
|
int delta2_span;
|
||||||
} algo_props_t;
|
} algo_props_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -206,7 +206,7 @@ init_algo_props(algo_props_t *props)
|
||||||
props->nthreads = 1;
|
props->nthreads = 1;
|
||||||
props->c_max_threads = 1;
|
props->c_max_threads = 1;
|
||||||
props->d_max_threads = 1;
|
props->d_max_threads = 1;
|
||||||
props->delta2_stride = 0;
|
props->delta2_span = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -91,7 +91,7 @@ zlib_stats(int show)
|
||||||
|
|
||||||
void
|
void
|
||||||
zlib_props(algo_props_t *data, int level, int64_t chunksize) {
|
zlib_props(algo_props_t *data, int level, int64_t chunksize) {
|
||||||
data->delta2_stride = 50;
|
data->delta2_span = 50;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
Loading…
Reference in a new issue