Major enhancements to Delta2 encoding.

Avoid transposing below-threshold spans. Reduces compression ratio.
Use little-endian storage format for numbers to optimize for x86.
Improve embedded table detection.
Reduce header sizes.
Get rid of Gcc's LTO flag. Causes a performance drop.
Fix preprocessing behavior when LZP does not compress but Delta2 works.
This commit is contained in:
Moinak Ghosh 2012-12-23 23:50:45 +05:30
parent a43fdd7d2c
commit d597f0f05c
4 changed files with 128 additions and 152 deletions

View file

@ -167,9 +167,9 @@ DEBUG_RABIN_OPT = -O -fno-omit-frame-pointer
DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS) DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS)
DEBUG_FPTR_FLAG = DEBUG_FPTR_FLAG =
RELEASE_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ @LTO_FLAG@ RELEASE_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ @M64_FLAG@
RELEASE_COMPILE = gcc -m64 -msse3 -c @LTO_FLAG@ RELEASE_COMPILE = gcc -m64 -msse3 -c @M64_FLAG@
RELEASE_COMPILE_cpp = g++ -m64 -msse3 -c @LTO_FLAG@ RELEASE_COMPILE_cpp = g++ -m64 -msse3 -c @M64_FLAG@
RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS) RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS)
RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS) RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS)
RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG

13
config
View file

@ -50,7 +50,7 @@ yasm=yasm
keccak_srcs= keccak_srcs=
keccak_hdrs= keccak_hdrs=
keccak_srcs_asm= keccak_srcs_asm=
lto_flag= m64_flag=
zlib_prefix= zlib_prefix=
bzlib_prefix= bzlib_prefix=
@ -78,10 +78,6 @@ then
exit 1 exit 1
fi fi
# Check GCC version and enable LTO flags if possible
gcc -v 2>&1 | grep lto > /dev/null
[ $? -eq 0 ] && lto_flag="-flto"
# Check bitness of system/toolchain # Check bitness of system/toolchain
bitness=`./tst` bitness=`./tst`
if [ $bitness -lt 8 ] if [ $bitness -lt 8 ]
@ -99,8 +95,7 @@ then
# If m64 compilation succeeds we assume platform to be 64-bit capable but # If m64 compilation succeeds we assume platform to be 64-bit capable but
# explicit flag is reqd. # explicit flag is reqd.
# Instead of setting another variable lets cheat by plugging m64 into lto_flag! m64_flag="-m64"
lto_flag="${lto_flag} -m64"
fi fi
rm -f tst tst.c rm -f tst tst.c
@ -405,7 +400,7 @@ sha256asmobjsvar="SHA256ASM_OBJS"
sha256objsvar="SHA256_OBJS" sha256objsvar="SHA256_OBJS"
yasmvar="YASM" yasmvar="YASM"
fptr_flag_var="FPTR_FLAG" fptr_flag_var="FPTR_FLAG"
lto_flag_var="LTO_FLAG" m64_flag_var="M64_FLAG"
openssllibdirvar="OPENSSL_LIBDIR" openssllibdirvar="OPENSSL_LIBDIR"
opensslincdirvar="OPENSSL_INCDIR" opensslincdirvar="OPENSSL_INCDIR"
@ -458,6 +453,6 @@ s#@${keccak_srcs_var}@#${keccak_srcs}#g
s#@${keccak_hdrs_var}@#${keccak_hdrs}#g s#@${keccak_hdrs_var}@#${keccak_hdrs}#g
s#@${keccak_srcs_var}@#${keccak_srcs}#g s#@${keccak_srcs_var}@#${keccak_srcs}#g
s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g
s#@${lto_flag_var}@#${lto_flag}#g s#@${m64_flag_var}@#${m64_flag}#g
" > Makefile " > Makefile

View file

@ -30,20 +30,19 @@
* Bytes are packed into integers in big-endian format. * Bytes are packed into integers in big-endian format.
* *
* After an optimal stride length has been identified the encoder * After an optimal stride length has been identified the encoder
* performs a delta run length encoding on the spans. Three types of * performs a delta run length encoding on the spans. Two types of
* objects are output by the encoder: * objects are output by the encoder:
* 1) A literal run of unmodified bytes. Header: 1 zero byte followed * 1) A literal run of unmodified bytes. Header:
* by a 64bit length in bytes. * 64-bit encoded value of the following format
* 2) A literal run of transposed bytes containing sequences that are * Most Significant Byte = 0
* below threshold and the total span of those sequences is at least * Remaining Bytes = Length of literal span in bytes
* 97%+ of the entire run. * 2) An encoded run length of a series in arithmetic progression.
* Header: 1 byte stride length with high bit set. * Header: 64bit encoded value
* 64bit length of span in bytes.
* 3) An encoded run length of a series in arithmetic progression.
* Header: 1 byte stride length (must be less than 128)
* 64bit length of span in bytes
* 64bit starting value of series * 64bit starting value of series
* 64bit delta value * 64bit delta value
* 64-bit encoded value is of the following format
* Most Significant Byte = Stride length
* Remaining Bytes = Number of bytes in the span
*/ */
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
@ -55,25 +54,21 @@
#define MAIN_HDR (sizeof (uint64_t)) #define MAIN_HDR (sizeof (uint64_t))
// Literal text header block: // Literal text header block:
// 1-byte flag // 64bit encoded value.
// 64bit length of run in bytes. #define LIT_HDR (sizeof (uint64_t))
#define LIT_HDR (1 + sizeof (uint64_t))
#define TRANSP_HDR (LIT_HDR)
// Delta encoded header block: // Delta encoded header block:
// 1-byte flag indicating stride length // 64bit encoded value
// 64bit length of span in bytes
// 64bit initial value // 64bit initial value
// 64bit delta value // 64bit delta value
#define DELTA_HDR (1 + (sizeof (uint64_t)) * 3) #define DELTA_HDR ((sizeof (uint64_t)) * 3)
// Minimum span length // Minimum span length
#define MIN_THRESH (50) #define MIN_THRESH (50)
// Maximum data length (16TB) // Maximum data length (16TB)
#define MAX_THRESH (0x100000000000ULL) #define MAX_THRESH (0x100000000000ULL)
#define TRANSP_THRESH (100) #define MSB_SETZERO_MASK (0xffffffffffffffULL)
#define TRANSP_BIT (128) #define MSB_SHIFT (56)
#define TRANSP_MASK (127)
/* /*
* Delta2 algorithm processes data in chunks. The 4K size below is somewhat * Delta2 algorithm processes data in chunks. The 4K size below is somewhat
@ -86,8 +81,20 @@
*/ */
#define DELTA2_CHUNK (4096) #define DELTA2_CHUNK (4096)
/*
* Byteswap macros. We optimize for little-endian, so values are stored
* and interpreted in little-endian order.
*/
#if BYTE_ORDER == BIG_ENDIAN
#define HTONLL __bswap_64(x)
#define NTOHLL __bswap_64(x)
#else
#define HTONLL
#define NTOHLL
#endif
static int delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, static int delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen,
int rle_thresh, int last_encode, int *transp_count, int *hdr_ovr); int rle_thresh, int last_encode, int *hdr_ovr);
int int
delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int rle_thresh) delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int rle_thresh)
@ -98,14 +105,13 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
} }
if (*dstlen < DELTA2_CHUNK) { if (*dstlen < DELTA2_CHUNK) {
int transp_count, hdr_ovr; int hdr_ovr;
int rv; int rv;
transp_count = 0;
hdr_ovr = 0; hdr_ovr = 0;
rv = delta2_encode_real(src, srclen, dst, dstlen, rle_thresh, 1, &transp_count, &hdr_ovr); rv = delta2_encode_real(src, srclen, dst, dstlen, rle_thresh, 1, &hdr_ovr);
DEBUG_STAT_EN(fprintf(stderr, "DELTA2: srclen: %" PRIu64 ", dstlen: %" PRIu64 "\n", srclen, *dstlen)); DEBUG_STAT_EN(fprintf(stderr, "DELTA2: srclen: %" PRIu64 ", dstlen: %" PRIu64 "\n", srclen, *dstlen));
DEBUG_STAT_EN(fprintf(stderr, "DELTA2: transpositions: %d, header overhead: %d\n", transp_count, hdr_ovr)); DEBUG_STAT_EN(fprintf(stderr, "DELTA2: header overhead: %d\n", hdr_ovr));
} else { } else {
uchar_t *srcpos, *dstpos, *lastdst, *lastsrc, *dstend; uchar_t *srcpos, *dstpos, *lastdst, *lastsrc, *dstend;
uint64_t slen, sz, dsz, pending; uint64_t slen, sz, dsz, pending;
@ -117,11 +123,10 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
dstend = dst + *dstlen; dstend = dst + *dstlen;
slen = srclen; slen = srclen;
pending = 0; pending = 0;
*((uint64_t *)dstpos) = htonll(srclen); *((uint64_t *)dstpos) = HTONLL(srclen);
dstpos += MAIN_HDR; dstpos += MAIN_HDR;
lastdst = dstpos; lastdst = dstpos;
lastsrc = srcpos; lastsrc = srcpos;
transp_count = 0;
hdr_ovr = 0; hdr_ovr = 0;
DEBUG_STAT_EN(strt = get_wtime_millis()); DEBUG_STAT_EN(strt = get_wtime_millis());
@ -135,22 +140,21 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
} }
dsz = sz; dsz = sz;
rem = delta2_encode_real(srcpos, sz, dstpos, &dsz, rle_thresh, lenc, rem = delta2_encode_real(srcpos, sz, dstpos, &dsz, rle_thresh, lenc,
&transp_count, &hdr_ovr); &hdr_ovr);
if (rem == -1) { if (rem == -1) {
if (pending == 0) { if (pending == 0) {
lastdst = dstpos; lastdst = dstpos;
lastsrc = srcpos; lastsrc = srcpos;
dstpos += LIT_HDR; dstpos += LIT_HDR;
} }
pending += sz; pending += dsz;
srcpos += sz; srcpos += dsz;
dstpos += sz; dstpos += dsz;
slen -= sz; slen -= dsz;
} else { } else {
if (pending) { if (pending) {
*lastdst = 0; pending &= MSB_SETZERO_MASK;
lastdst++; *((uint64_t *)lastdst) = HTONLL(pending);
*((uint64_t *)lastdst) = htonll(pending);
lastdst += sizeof (uint64_t); lastdst += sizeof (uint64_t);
memcpy(lastdst, lastsrc, pending); memcpy(lastdst, lastsrc, pending);
pending = 0; pending = 0;
@ -165,9 +169,8 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
} }
} }
if (pending) { if (pending) {
*lastdst = 0; pending &= MSB_SETZERO_MASK;
lastdst++; *((uint64_t *)lastdst) = HTONLL(pending);
*((uint64_t *)lastdst) = htonll(pending);
lastdst += sizeof (uint64_t); lastdst += sizeof (uint64_t);
if (lastdst + pending > dstend) { if (lastdst + pending > dstend) {
DEBUG_STAT_EN(fprintf(stderr, "No Delta\n")); DEBUG_STAT_EN(fprintf(stderr, "No Delta\n"));
@ -178,7 +181,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
*dstlen = dstpos - dst; *dstlen = dstpos - dst;
DEBUG_STAT_EN(en = get_wtime_millis()); DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "DELTA2: srclen: %" PRIu64 ", dstlen: %" PRIu64 "\n", srclen, *dstlen)); DEBUG_STAT_EN(fprintf(stderr, "DELTA2: srclen: %" PRIu64 ", dstlen: %" PRIu64 "\n", srclen, *dstlen));
DEBUG_STAT_EN(fprintf(stderr, "DELTA2: transpositions: %d, header overhead: %d\n", transp_count, hdr_ovr)); DEBUG_STAT_EN(fprintf(stderr, "DELTA2: header overhead: %d\n", hdr_ovr));
DEBUG_STAT_EN(fprintf(stderr, "DELTA2: Processed at %.3f MB/s\n", get_mb_s(srclen, strt, en))); DEBUG_STAT_EN(fprintf(stderr, "DELTA2: Processed at %.3f MB/s\n", get_mb_s(srclen, strt, en)));
} }
return (0); return (0);
@ -186,7 +189,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
static int static int
delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen,
int rle_thresh, int last_encode, int *transp_count, int *hdr_ovr) int rle_thresh, int last_encode, int *hdr_ovr)
{ {
uint64_t snum, gtot1, gtot2, tot; uint64_t snum, gtot1, gtot2, tot;
uint64_t cnt, val, sval; uint64_t cnt, val, sval;
@ -199,8 +202,8 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
return (-1); return (-1);
gtot1 = ULL_MAX; gtot1 = ULL_MAX;
stride = 0; stride = 0;
sval = 0;
val = 0; val = 0;
tot = 0;
sz = sizeof (strides) / sizeof (strides[0]); sz = sizeof (strides) / sizeof (strides[0]);
/* /*
@ -210,16 +213,20 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
int gt; int gt;
snum = 0; snum = 0;
gtot2 = MAIN_HDR + LIT_HDR; gtot2 = LIT_HDR;
vl1 = 0; vl1 = 0;
vld1 = 0; vld1 = 0;
tot = 0; tot = 0;
pos = src; pos = src;
st1 = strides[st]; st1 = strides[st];
sval = st1;
sval = ((sval << 3) - 1);
sval = (1ULL << sval);
sval |= (sval - 1);
for (cnt = 0; cnt < (srclen - sizeof (cnt)); cnt += st1) { for (cnt = 0; cnt < (srclen - sizeof (cnt)); cnt += st1) {
vl2 = *((uint64_t *)pos); vl2 = *((uint64_t *)pos);
vl2 = htonll(vl2); vl2 = HTONLL(vl2);
vl2 >>= ((sizeof (vl2) - st1) << 3); vl2 &= sval;
vld2 = vl2 - vl1; vld2 = vl2 - vl1;
if (vld1 != vld2) { if (vld1 != vld2) {
if (snum > rle_thresh) { if (snum > rle_thresh) {
@ -241,30 +248,30 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
if (snum > rle_thresh) { if (snum > rle_thresh) {
gtot2 += DELTA_HDR; gtot2 += DELTA_HDR;
/* /*
* If this ended partially into another table reset next scan * If this ended into another table reset next scan
* point to before the table. * point to before the table.
*/ */
val = cnt - snum; val = cnt - snum;
} else { } else {
gtot2 += snum; gtot2 += snum;
/* /*
* If this ended partially into another table reset next scan * If this ended into another table reset next scan
* point to before the table. * point to before the table.
*/ */
if (snum >= st1 * 5) if (snum >= (MIN_THRESH>>1))
val = cnt - snum; val = cnt - snum;
} }
if (gtot2 < gtot1) { if (gtot2 < gtot1) {
gtot1 = gtot2; gtot1 = gtot2;
stride = st1; stride = st1;
sval = val; tot = val;
} }
} }
if ( !(gtot1 < srclen && srclen - gtot1 > (DELTA_HDR + LIT_HDR + MAIN_HDR) && gtot1 < *dstlen) ) { if ( !(gtot1 < srclen && srclen - gtot1 > (DELTA_HDR + LIT_HDR + MAIN_HDR) && gtot1 < *dstlen) ) {
if (srclen >= DELTA2_CHUNK) { if (srclen >= DELTA2_CHUNK) {
if (sval > 0) if (tot > 0)
*dstlen = sval; *dstlen = tot;
} }
return (-1); return (-1);
} }
@ -278,70 +285,48 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
gtot1 = 0; gtot1 = 0;
pos = src; pos = src;
pos2 = dst; pos2 = dst;
gtot2 = 0;
if (rle_thresh <= TRANSP_THRESH) {
tot = rle_thresh/2;
} else {
tot = TRANSP_THRESH;
}
vl2 = *((uint64_t *)pos); vl2 = *((uint64_t *)pos);
vl2 = htonll(vl2); vl2 = HTONLL(vl2);
vl2 >>= ((sizeof (vl2) - stride) << 3); val = stride;
val = ((val << 3) - 1);
val = (1ULL << val);
val |= (val - 1);
vl2 &= val;
sval = vl2; sval = vl2;
for (cnt = 0; cnt < (srclen - sizeof (cnt)); cnt += stride) { for (cnt = 0; cnt < (srclen - sizeof (cnt)); cnt += stride) {
val = *((uint64_t *)pos); vl2 = *((uint64_t *)pos);
vl2 = htonll(val); vl2 = HTONLL(vl2);
vl2 >>= ((sizeof (vl2) - stride) << 3); vl2 &= val;
vld2 = vl2 - vl1; vld2 = vl2 - vl1;
if (vld1 != vld2) { if (vld1 != vld2) {
if (snum > rle_thresh) { if (snum > rle_thresh) {
if (gtot1 > 0) { if (gtot1 > 0) {
/* gtot1 &= MSB_SETZERO_MASK;
* Encode previous literal run, if any. If the literal run *((uint64_t *)pos2) = HTONLL(gtot1);
* has enough (97%+) large sequences just below threshold,
* do a matrix transpose on the range in the hope of achieving
* a better compression ratio.
*/
if (gtot2 >= ((gtot1 >> 1) + (gtot1 >> 2) + (gtot1 >> 3) +
(gtot1 >> 4) + (gtot1 >> 5))) {
*pos2 = stride | TRANSP_BIT;
pos2++;
*((uint64_t *)pos2) = htonll(gtot1);
pos2 += sizeof (uint64_t);
DEBUG_STAT_EN((*transp_count)++);
DEBUG_STAT_EN(*hdr_ovr += TRANSP_HDR);
transpose(pos - (gtot1+snum), pos2, gtot1, stride, ROW);
} else {
*pos2 = 0;
pos2++;
*((uint64_t *)pos2) = htonll(gtot1);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR); DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
memcpy(pos2, pos - (gtot1+snum), gtot1); memcpy(pos2, pos - (gtot1+snum), gtot1);
}
pos2 += gtot1; pos2 += gtot1;
gtot1 = 0; gtot1 = 0;
gtot2 = 0;
} }
/* /*
* RLE Encode delta series. * RLE Encode delta series.
*/ */
*pos2 = stride; gtot2 = stride;
pos2++; gtot2 <<= MSB_SHIFT;
*((uint64_t *)pos2) = htonll(snum); gtot2 |= (snum & MSB_SETZERO_MASK);
*((uint64_t *)pos2) = HTONLL(gtot2);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
*((uint64_t *)pos2) = htonll(sval); *((uint64_t *)pos2) = HTONLL(sval);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
*((uint64_t *)pos2) = htonll(vld1); *((uint64_t *)pos2) = HTONLL(vld1);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
DEBUG_STAT_EN(*hdr_ovr += DELTA_HDR); DEBUG_STAT_EN(*hdr_ovr += DELTA_HDR);
} else { } else {
gtot1 += snum; gtot1 += snum;
if (snum >= tot)
gtot2 += snum;
} }
snum = 0; snum = 0;
sval = vl2; sval = vl2;
@ -355,30 +340,29 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
if (snum > 0) { if (snum > 0) {
if (snum > rle_thresh) { if (snum > rle_thresh) {
if (gtot1 > 0) { if (gtot1 > 0) {
*pos2 = 0; gtot1 &= MSB_SETZERO_MASK;
pos2++; *((uint64_t *)pos2) = HTONLL(gtot1);
*((uint64_t *)pos2) = htonll(gtot1);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR); DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
memcpy(pos2, pos - (gtot1+snum), gtot1); memcpy(pos2, pos - (gtot1+snum), gtot1);
pos2 += gtot1; pos2 += gtot1;
gtot1 = 0; gtot1 = 0;
} }
*pos2 = stride; gtot2 = stride;
pos2++; gtot2 <<= MSB_SHIFT;
*((uint64_t *)pos2) = htonll(snum); gtot2 |= (snum & MSB_SETZERO_MASK);
*((uint64_t *)pos2) = HTONLL(gtot2);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
*((uint64_t *)pos2) = htonll(sval); *((uint64_t *)pos2) = HTONLL(sval);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
*((uint64_t *)pos2) = htonll(vld1); *((uint64_t *)pos2) = HTONLL(vld1);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
DEBUG_STAT_EN(*hdr_ovr += DELTA_HDR); DEBUG_STAT_EN(*hdr_ovr += DELTA_HDR);
} else if (last_encode) { } else if (last_encode) {
gtot1 += snum; gtot1 += snum;
*pos2 = 0; gtot1 &= MSB_SETZERO_MASK;
pos2++; *((uint64_t *)pos2) = HTONLL(gtot1);
*((uint64_t *)pos2) = htonll(gtot1);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR); DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
memcpy(pos2, pos - gtot1, gtot1); memcpy(pos2, pos - gtot1, gtot1);
@ -395,9 +379,8 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
* Encode left over bytes, if any, at the end into a * Encode left over bytes, if any, at the end into a
* literal run. * literal run.
*/ */
*pos2 = 0; val &= MSB_SETZERO_MASK;
pos2++; *((uint64_t *)pos2) = HTONLL(val);
*((uint64_t *)pos2) = htonll(val);
pos2 += sizeof (uint64_t); pos2 += sizeof (uint64_t);
for (cnt = 0; cnt < val; cnt++) { for (cnt = 0; cnt < val; cnt++) {
*pos2 = *pos; *pos2 = *pos;
@ -417,14 +400,16 @@ int
delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen) delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
{ {
uchar_t *pos, *pos1, *last; uchar_t *pos, *pos1, *last;
uint64_t olen, val, sval, delta, rcnt, cnt, out; uint64_t olen, val, sval, delta, rcnt, cnt, out, vl;
uchar_t stride; uchar_t stride, flags;
DEBUG_STAT_EN(double strt, en);
pos = src; pos = src;
pos1 = dst; pos1 = dst;
DEBUG_STAT_EN(strt = get_wtime_millis());
last = pos + srclen; last = pos + srclen;
olen = ntohll(*((uint64_t *)pos)); olen = NTOHLL(*((uint64_t *)pos));
if (*dstlen < olen) { if (*dstlen < olen) {
fprintf(stderr, "DELTA2 Decode: Destination buffer too small.\n"); fprintf(stderr, "DELTA2 Decode: Destination buffer too small.\n");
return (-1); return (-1);
@ -434,12 +419,15 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
pos += MAIN_HDR; pos += MAIN_HDR;
while (pos < last) { while (pos < last) {
if (*pos == 0) { val = *((uint64_t *)pos);
val = NTOHLL(val);
flags = (val >> MSB_SHIFT) & 0xff;
if (flags == 0) {
/* /*
* Copy over literal run of bytes. * Copy over literal run of bytes.
*/ */
pos++; rcnt = val & MSB_SETZERO_MASK;
rcnt = ntohll(*((uint64_t *)pos));
pos += sizeof (rcnt); pos += sizeof (rcnt);
if (out + rcnt > *dstlen) { if (out + rcnt > *dstlen) {
fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n"); fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n");
@ -450,44 +438,31 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
pos1 += rcnt; pos1 += rcnt;
out += rcnt; out += rcnt;
} else if (*pos & TRANSP_BIT) {
int stride;
/*
* Copy over literal run of transposed bytes and inverse-transpose.
*/
stride = (*pos & TRANSP_MASK);
pos++;
rcnt = ntohll(*((uint64_t *)pos));
pos += sizeof (rcnt);
if (out + rcnt > *dstlen) {
fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n");
return (-1);
}
transpose(pos, pos1, rcnt, stride, COL);
pos += rcnt;
pos1 += rcnt;
out += rcnt;
} else { } else {
stride = *pos; stride = flags;
pos++; rcnt = val & MSB_SETZERO_MASK;
rcnt = ntohll(*((uint64_t *)pos));
pos += sizeof (rcnt); pos += sizeof (rcnt);
sval = ntohll(*((uint64_t *)pos)); sval = NTOHLL(*((uint64_t *)pos));
pos += sizeof (sval); pos += sizeof (sval);
delta = ntohll(*((uint64_t *)pos)); delta = NTOHLL(*((uint64_t *)pos));
pos += sizeof (delta); pos += sizeof (delta);
if (out + rcnt > *dstlen) { if (out + rcnt > *dstlen) {
fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n"); fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n");
return (-1); return (-1);
} }
vl = stride;
vl = (vl << 3) - 1;
vl = (1ULL << vl);
vl |= (vl - 1);
/* /*
* Recover original bytes from the arithmetic series using * Recover original bytes from the arithmetic series using
* length, starting value and delta. * length, starting value and delta.
*/ */
for (cnt = 0; cnt < rcnt/stride; cnt++) { for (cnt = 0; cnt < rcnt/stride; cnt++) {
val = sval << ((sizeof (val) - stride) << 3); val = (sval & vl);
*((uint64_t *)pos1) = ntohll(val); *((uint64_t *)pos1) = NTOHLL(val);
out += stride; out += stride;
sval += delta; sval += delta;
pos1 += stride; pos1 += stride;
@ -495,5 +470,7 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
} }
} }
*dstlen = out; *dstlen = out;
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "DELTA2: Decoded at %.3f MB/s\n", get_mb_s(out, strt, en)));
return (0); return (0);
} }

6
main.c
View file

@ -208,9 +208,13 @@ preproc_compress(compress_func_ptr cmp_func, void *src, uint64_t srclen, void *d
type = PREPROC_TYPE_LZP; type = PREPROC_TYPE_LZP;
hashsize = lzp_hash_size(level); hashsize = lzp_hash_size(level);
result = lzp_compress(src, dst, srclen, hashsize, LZP_DEFAULT_LZPMINLEN, 0); result = lzp_compress(src, dst, srclen, hashsize, LZP_DEFAULT_LZPMINLEN, 0);
if (result < 0 || result == srclen) return (-1); if (result < 0 || result == srclen) {
if (!enable_delta2_encode)
return (-1);
} else {
srclen = result; srclen = result;
memcpy(src, dst, srclen); memcpy(src, dst, srclen);
}
} else if (!enable_delta2_encode) { } else if (!enable_delta2_encode) {
/* /*