Further improvements to Delta2 performance.
Fix the byteswap macros. Start adding assertions.
This commit is contained in:
parent
26a4f42506
commit
36d95276ee
3 changed files with 60 additions and 49 deletions
|
@ -43,7 +43,11 @@
|
||||||
* 64-bit encoded value is of the following format
|
* 64-bit encoded value is of the following format
|
||||||
* Most Significant Byte = Stride length
|
* Most Significant Byte = Stride length
|
||||||
* Remaining Bytes = Number of bytes in the span
|
* Remaining Bytes = Number of bytes in the span
|
||||||
|
*
|
||||||
|
* We optimize for little-endian, so values are stored and interpreted
|
||||||
|
* in little-endian order.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <utils.h>
|
#include <utils.h>
|
||||||
|
@ -82,16 +86,12 @@
|
||||||
#define DELTA2_CHUNK (4096)
|
#define DELTA2_CHUNK (4096)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Byteswap macros. We optimize for little-endian, so values are stored
|
* Stride values to be checked. As of this implementation strides only
|
||||||
* and interpreted in little-endian order.
|
* upto 8 bytes (uint64_t) are supported.
|
||||||
*/
|
*/
|
||||||
#if BYTE_ORDER == BIG_ENDIAN
|
#define NSTRIDES 4
|
||||||
#define HTONLL htonll
|
static uchar_t strides[NSTRIDES] = {3, 5, 7, 8};
|
||||||
#define NTOHLL ntohll
|
|
||||||
#else
|
|
||||||
#define HTONLL
|
|
||||||
#define NTOHLL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static int delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen,
|
static int delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen,
|
||||||
int rle_thresh, int last_encode, int *hdr_ovr);
|
int rle_thresh, int last_encode, int *hdr_ovr);
|
||||||
|
@ -104,6 +104,9 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rle_thresh < MIN_THRESH)
|
||||||
|
return (-1);
|
||||||
|
|
||||||
if (*dstlen < DELTA2_CHUNK) {
|
if (*dstlen < DELTA2_CHUNK) {
|
||||||
int hdr_ovr;
|
int hdr_ovr;
|
||||||
int rv;
|
int rv;
|
||||||
|
@ -123,7 +126,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
dstend = dst + *dstlen;
|
dstend = dst + *dstlen;
|
||||||
slen = srclen;
|
slen = srclen;
|
||||||
pending = 0;
|
pending = 0;
|
||||||
*((uint64_t *)dstpos) = HTONLL(srclen);
|
*((uint64_t *)dstpos) = LE64(srclen);
|
||||||
dstpos += MAIN_HDR;
|
dstpos += MAIN_HDR;
|
||||||
lastdst = dstpos;
|
lastdst = dstpos;
|
||||||
lastsrc = srcpos;
|
lastsrc = srcpos;
|
||||||
|
@ -154,7 +157,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
} else {
|
} else {
|
||||||
if (pending) {
|
if (pending) {
|
||||||
pending &= MSB_SETZERO_MASK;
|
pending &= MSB_SETZERO_MASK;
|
||||||
*((uint64_t *)lastdst) = HTONLL(pending);
|
*((uint64_t *)lastdst) = LE64(pending);
|
||||||
lastdst += sizeof (uint64_t);
|
lastdst += sizeof (uint64_t);
|
||||||
memcpy(lastdst, lastsrc, pending);
|
memcpy(lastdst, lastsrc, pending);
|
||||||
pending = 0;
|
pending = 0;
|
||||||
|
@ -170,7 +173,7 @@ delta2_encode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen, int
|
||||||
}
|
}
|
||||||
if (pending) {
|
if (pending) {
|
||||||
pending &= MSB_SETZERO_MASK;
|
pending &= MSB_SETZERO_MASK;
|
||||||
*((uint64_t *)lastdst) = HTONLL(pending);
|
*((uint64_t *)lastdst) = LE64(pending);
|
||||||
lastdst += sizeof (uint64_t);
|
lastdst += sizeof (uint64_t);
|
||||||
if (lastdst + pending > dstend) {
|
if (lastdst + pending > dstend) {
|
||||||
DEBUG_STAT_EN(fprintf(stderr, "No Delta\n"));
|
DEBUG_STAT_EN(fprintf(stderr, "No Delta\n"));
|
||||||
|
@ -195,21 +198,17 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
uint64_t cnt, val, sval;
|
uint64_t cnt, val, sval;
|
||||||
uint64_t vl1, vl2, vld1, vld2;
|
uint64_t vl1, vl2, vld1, vld2;
|
||||||
uchar_t *pos, *pos2, stride, st1;
|
uchar_t *pos, *pos2, stride, st1;
|
||||||
uchar_t strides[4] = {3, 5, 7, 8};
|
int st;
|
||||||
int st, sz;
|
|
||||||
|
|
||||||
if (rle_thresh < MIN_THRESH)
|
assert(srclen == *dstlen);
|
||||||
return (-1);
|
|
||||||
gtot1 = ULL_MAX;
|
gtot1 = ULL_MAX;
|
||||||
stride = 0;
|
stride = 0;
|
||||||
val = 0;
|
|
||||||
tot = 0;
|
tot = 0;
|
||||||
sz = sizeof (strides) / sizeof (strides[0]);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Estimate which stride length gives the max reduction given rle_thresh.
|
* Estimate which stride length gives the max reduction given rle_thresh.
|
||||||
*/
|
*/
|
||||||
for (st = 0; st < sz; st++) {
|
for (st = 0; st < NSTRIDES; st++) {
|
||||||
int gt;
|
int gt;
|
||||||
|
|
||||||
snum = 0;
|
snum = 0;
|
||||||
|
@ -223,9 +222,10 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
sval = ((sval << 3) - 1);
|
sval = ((sval << 3) - 1);
|
||||||
sval = (1ULL << sval);
|
sval = (1ULL << sval);
|
||||||
sval |= (sval - 1);
|
sval |= (sval - 1);
|
||||||
|
val = 0;
|
||||||
for (cnt = 0; cnt < (srclen - sizeof (cnt)); cnt += st1) {
|
for (cnt = 0; cnt < (srclen - sizeof (cnt)); cnt += st1) {
|
||||||
vl2 = *((uint64_t *)pos);
|
vl2 = *((uint64_t *)pos);
|
||||||
vl2 = HTONLL(vl2);
|
vl2 = LE64(vl2);
|
||||||
vl2 &= sval;
|
vl2 &= sval;
|
||||||
vld2 = vl2 - vl1;
|
vld2 = vl2 - vl1;
|
||||||
if (vld1 != vld2) {
|
if (vld1 != vld2) {
|
||||||
|
@ -268,8 +268,12 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !(gtot1 < srclen && srclen - gtot1 > (DELTA_HDR + LIT_HDR + MAIN_HDR) && gtot1 < *dstlen) ) {
|
/*
|
||||||
if (srclen >= DELTA2_CHUNK) {
|
* No need to check for destination buffer overflow since
|
||||||
|
* dstlen == srclen always.
|
||||||
|
*/
|
||||||
|
if ( gtot1 > (srclen - (DELTA_HDR + LIT_HDR + MAIN_HDR)) ) {
|
||||||
|
if (srclen == DELTA2_CHUNK) {
|
||||||
if (tot > 0)
|
if (tot > 0)
|
||||||
*dstlen = tot;
|
*dstlen = tot;
|
||||||
}
|
}
|
||||||
|
@ -287,7 +291,7 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
pos2 = dst;
|
pos2 = dst;
|
||||||
|
|
||||||
vl2 = *((uint64_t *)pos);
|
vl2 = *((uint64_t *)pos);
|
||||||
vl2 = HTONLL(vl2);
|
vl2 = LE64(vl2);
|
||||||
val = stride;
|
val = stride;
|
||||||
val = ((val << 3) - 1);
|
val = ((val << 3) - 1);
|
||||||
val = (1ULL << val);
|
val = (1ULL << val);
|
||||||
|
@ -297,14 +301,14 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
|
|
||||||
for (cnt = 0; cnt < (srclen - sizeof (cnt)); cnt += stride) {
|
for (cnt = 0; cnt < (srclen - sizeof (cnt)); cnt += stride) {
|
||||||
vl2 = *((uint64_t *)pos);
|
vl2 = *((uint64_t *)pos);
|
||||||
vl2 = HTONLL(vl2);
|
vl2 = LE64(vl2);
|
||||||
vl2 &= val;
|
vl2 &= val;
|
||||||
vld2 = vl2 - vl1;
|
vld2 = vl2 - vl1;
|
||||||
if (vld1 != vld2) {
|
if (vld1 != vld2) {
|
||||||
if (snum > rle_thresh) {
|
if (snum > rle_thresh) {
|
||||||
if (gtot1 > 0) {
|
if (gtot1 > 0) {
|
||||||
gtot1 &= MSB_SETZERO_MASK;
|
gtot1 &= MSB_SETZERO_MASK;
|
||||||
*((uint64_t *)pos2) = HTONLL(gtot1);
|
*((uint64_t *)pos2) = LE64(gtot1);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
|
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
|
||||||
memcpy(pos2, pos - (gtot1+snum), gtot1);
|
memcpy(pos2, pos - (gtot1+snum), gtot1);
|
||||||
|
@ -318,11 +322,11 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
gtot2 = stride;
|
gtot2 = stride;
|
||||||
gtot2 <<= MSB_SHIFT;
|
gtot2 <<= MSB_SHIFT;
|
||||||
gtot2 |= (snum & MSB_SETZERO_MASK);
|
gtot2 |= (snum & MSB_SETZERO_MASK);
|
||||||
*((uint64_t *)pos2) = HTONLL(gtot2);
|
*((uint64_t *)pos2) = LE64(gtot2);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
*((uint64_t *)pos2) = HTONLL(sval);
|
*((uint64_t *)pos2) = LE64(sval);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
*((uint64_t *)pos2) = HTONLL(vld1);
|
*((uint64_t *)pos2) = LE64(vld1);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
DEBUG_STAT_EN(*hdr_ovr += DELTA_HDR);
|
DEBUG_STAT_EN(*hdr_ovr += DELTA_HDR);
|
||||||
} else {
|
} else {
|
||||||
|
@ -341,7 +345,7 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
if (snum > rle_thresh) {
|
if (snum > rle_thresh) {
|
||||||
if (gtot1 > 0) {
|
if (gtot1 > 0) {
|
||||||
gtot1 &= MSB_SETZERO_MASK;
|
gtot1 &= MSB_SETZERO_MASK;
|
||||||
*((uint64_t *)pos2) = HTONLL(gtot1);
|
*((uint64_t *)pos2) = LE64(gtot1);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
|
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
|
||||||
memcpy(pos2, pos - (gtot1+snum), gtot1);
|
memcpy(pos2, pos - (gtot1+snum), gtot1);
|
||||||
|
@ -351,18 +355,18 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
gtot2 = stride;
|
gtot2 = stride;
|
||||||
gtot2 <<= MSB_SHIFT;
|
gtot2 <<= MSB_SHIFT;
|
||||||
gtot2 |= (snum & MSB_SETZERO_MASK);
|
gtot2 |= (snum & MSB_SETZERO_MASK);
|
||||||
*((uint64_t *)pos2) = HTONLL(gtot2);
|
*((uint64_t *)pos2) = LE64(gtot2);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
*((uint64_t *)pos2) = HTONLL(sval);
|
*((uint64_t *)pos2) = LE64(sval);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
*((uint64_t *)pos2) = HTONLL(vld1);
|
*((uint64_t *)pos2) = LE64(vld1);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
DEBUG_STAT_EN(*hdr_ovr += DELTA_HDR);
|
DEBUG_STAT_EN(*hdr_ovr += DELTA_HDR);
|
||||||
|
|
||||||
} else if (last_encode) {
|
} else if (last_encode) {
|
||||||
gtot1 += snum;
|
gtot1 += snum;
|
||||||
gtot1 &= MSB_SETZERO_MASK;
|
gtot1 &= MSB_SETZERO_MASK;
|
||||||
*((uint64_t *)pos2) = HTONLL(gtot1);
|
*((uint64_t *)pos2) = LE64(gtot1);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
|
DEBUG_STAT_EN(*hdr_ovr += LIT_HDR);
|
||||||
memcpy(pos2, pos - gtot1, gtot1);
|
memcpy(pos2, pos - gtot1, gtot1);
|
||||||
|
@ -380,7 +384,7 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
* literal run.
|
* literal run.
|
||||||
*/
|
*/
|
||||||
val &= MSB_SETZERO_MASK;
|
val &= MSB_SETZERO_MASK;
|
||||||
*((uint64_t *)pos2) = HTONLL(val);
|
*((uint64_t *)pos2) = LE64(val);
|
||||||
pos2 += sizeof (uint64_t);
|
pos2 += sizeof (uint64_t);
|
||||||
for (cnt = 0; cnt < val; cnt++) {
|
for (cnt = 0; cnt < val; cnt++) {
|
||||||
*pos2 = *pos;
|
*pos2 = *pos;
|
||||||
|
@ -409,7 +413,7 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
||||||
|
|
||||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||||
last = pos + srclen;
|
last = pos + srclen;
|
||||||
olen = NTOHLL(*((uint64_t *)pos));
|
olen = LE64(*((uint64_t *)pos));
|
||||||
if (*dstlen < olen) {
|
if (*dstlen < olen) {
|
||||||
fprintf(stderr, "DELTA2 Decode: Destination buffer too small.\n");
|
fprintf(stderr, "DELTA2 Decode: Destination buffer too small.\n");
|
||||||
return (-1);
|
return (-1);
|
||||||
|
@ -420,7 +424,7 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
||||||
|
|
||||||
while (pos < last) {
|
while (pos < last) {
|
||||||
val = *((uint64_t *)pos);
|
val = *((uint64_t *)pos);
|
||||||
val = NTOHLL(val);
|
val = LE64(val);
|
||||||
flags = (val >> MSB_SHIFT) & 0xff;
|
flags = (val >> MSB_SHIFT) & 0xff;
|
||||||
|
|
||||||
if (flags == 0) {
|
if (flags == 0) {
|
||||||
|
@ -442,9 +446,9 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
||||||
stride = flags;
|
stride = flags;
|
||||||
rcnt = val & MSB_SETZERO_MASK;
|
rcnt = val & MSB_SETZERO_MASK;
|
||||||
pos += sizeof (rcnt);
|
pos += sizeof (rcnt);
|
||||||
sval = NTOHLL(*((uint64_t *)pos));
|
sval = LE64(*((uint64_t *)pos));
|
||||||
pos += sizeof (sval);
|
pos += sizeof (sval);
|
||||||
delta = NTOHLL(*((uint64_t *)pos));
|
delta = LE64(*((uint64_t *)pos));
|
||||||
pos += sizeof (delta);
|
pos += sizeof (delta);
|
||||||
if (out + rcnt > *dstlen) {
|
if (out + rcnt > *dstlen) {
|
||||||
fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n");
|
fprintf(stderr, "DELTA2 Decode: Destination buffer overflow. Corrupt data.\n");
|
||||||
|
@ -462,7 +466,7 @@ delta2_decode(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen)
|
||||||
*/
|
*/
|
||||||
for (cnt = 0; cnt < rcnt/stride; cnt++) {
|
for (cnt = 0; cnt < rcnt/stride; cnt++) {
|
||||||
val = (sval & vl);
|
val = (sval & vl);
|
||||||
*((uint64_t *)pos1) = NTOHLL(val);
|
*((uint64_t *)pos1) = LE64(val);
|
||||||
out += stride;
|
out += stride;
|
||||||
sval += delta;
|
sval += delta;
|
||||||
pos1 += stride;
|
pos1 += stride;
|
||||||
|
|
|
@ -70,8 +70,8 @@ _siftupmax(heap_t *h, __TYPE spos, __TYPE epos)
|
||||||
endpos = h->len;
|
endpos = h->len;
|
||||||
heap = h->ary;
|
heap = h->ary;
|
||||||
#ifdef ERROR_CHK
|
#ifdef ERROR_CHK
|
||||||
if (pos >= endpos) {
|
if (spos >= endpos) {
|
||||||
fprintf(stderr, "_siftupmax: index out of range: %u, len: %u\n", pos, endpos);
|
fprintf(stderr, "_siftupmax: index out of range: %u, len: %u\n", spos, endpos);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -117,8 +117,8 @@ _siftupmax_s(heap_t *h, __TYPE spos)
|
||||||
endpos = h->len;
|
endpos = h->len;
|
||||||
heap = h->ary;
|
heap = h->ary;
|
||||||
#ifdef ERROR_CHK
|
#ifdef ERROR_CHK
|
||||||
if (pos >= endpos) {
|
if (spos >= endpos) {
|
||||||
fprintf(stderr, "_siftupmax: index out of range: %u, len: %u\n", pos, endpos);
|
fprintf(stderr, "_siftupmax: index out of range: %u, len: %u\n", spos, endpos);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
|
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -67,6 +68,11 @@ typedef int64_t bsize_t;
|
||||||
# ifndef ntonll
|
# ifndef ntonll
|
||||||
# define ntohll(x) (x)
|
# define ntohll(x) (x)
|
||||||
# endif
|
# endif
|
||||||
|
# if !defined(sun) && !defined (__sun)
|
||||||
|
# define LE64(x) __bswap_64(x)
|
||||||
|
# else
|
||||||
|
# define LE64(x) BSWAP_64(x)
|
||||||
|
# endif
|
||||||
#else
|
#else
|
||||||
# if !defined(sun) && !defined (__sun)
|
# if !defined(sun) && !defined (__sun)
|
||||||
# ifndef htonll
|
# ifndef htonll
|
||||||
|
@ -76,6 +82,7 @@ typedef int64_t bsize_t;
|
||||||
# define ntohll(x) __bswap_64(x)
|
# define ntohll(x) __bswap_64(x)
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
|
# define LE64(x) (x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue