Add SSE2 optimizations for Segmented Dedupe.
This commit is contained in:
parent
6ecc400571
commit
c27317d7da
2 changed files with 32 additions and 4 deletions
|
@ -495,6 +495,7 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
|
||||||
pent = &(ent->next);
|
pent = &(ent->next);
|
||||||
ent = ent->next;
|
ent = ent->next;
|
||||||
}
|
}
|
||||||
|
// The following two cases are for Segmented Dedupe approximate matching
|
||||||
} else if (cfg->similarity_cksum_sz == 8) {// Fast path for 64-bit keys
|
} else if (cfg->similarity_cksum_sz == 8) {// Fast path for 64-bit keys
|
||||||
while (ent) {
|
while (ent) {
|
||||||
if (*((uint64_t *)sim_cksum) == *((uint64_t *)ent->cksum)) {
|
if (*((uint64_t *)sim_cksum) == *((uint64_t *)ent->cksum)) {
|
||||||
|
|
|
@ -902,10 +902,37 @@ process_blocks:
|
||||||
if (blks > blknum-i) blks = blknum-i;
|
if (blks > blknum-i) blks = blknum-i;
|
||||||
length = 0;
|
length = 0;
|
||||||
tgt = seg_heap;
|
tgt = seg_heap;
|
||||||
for (j=0; j<blks; j++) {
|
#ifdef __USE_SSE_INTRIN__
|
||||||
memcpy(tgt, ctx->g_blocks[j+i].cksum, cfg->chunk_cksum_sz);
|
if ((cfg->chunk_cksum_sz & 15) == 0) {
|
||||||
length += cfg->chunk_cksum_sz;
|
for (j=0; j<blks; j++) {
|
||||||
tgt += cfg->chunk_cksum_sz;
|
__m128i s;
|
||||||
|
uchar_t *sc;
|
||||||
|
k = cfg->chunk_cksum_sz;
|
||||||
|
sc = ctx->g_blocks[j+i].cksum;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use SSE2 to copy 16 bytes at a time avoiding a call
|
||||||
|
* to memcpy() since hash sizes are typically multiple
|
||||||
|
* of 16 bytes: 256-bit or 512-bit.
|
||||||
|
*/
|
||||||
|
while (k > 0) {
|
||||||
|
s = _mm_loadu_si128((__m128i *)sc);
|
||||||
|
_mm_storeu_si128((__m128i *)tgt, s);
|
||||||
|
tgt += 16;
|
||||||
|
sc += 16;
|
||||||
|
k -= 16;
|
||||||
|
}
|
||||||
|
length += cfg->chunk_cksum_sz;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
for (j=0; j<blks; j++) {
|
||||||
|
memcpy(tgt, ctx->g_blocks[j+i].cksum, cfg->chunk_cksum_sz);
|
||||||
|
length += cfg->chunk_cksum_sz;
|
||||||
|
tgt += cfg->chunk_cksum_sz;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
*((uint32_t *)src) = blks;
|
*((uint32_t *)src) = blks;
|
||||||
src += sizeof (blks);
|
src += sizeof (blks);
|
||||||
|
|
Loading…
Reference in a new issue