Add SSE2 optimizations for Segmented Dedupe.
This commit is contained in:
parent
6ecc400571
commit
c27317d7da
2 changed files with 32 additions and 4 deletions
|
@ -495,6 +495,7 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
|
|||
pent = &(ent->next);
|
||||
ent = ent->next;
|
||||
}
|
||||
// The following two cases are for Segmented Dedupe approximate matching
|
||||
} else if (cfg->similarity_cksum_sz == 8) {// Fast path for 64-bit keys
|
||||
while (ent) {
|
||||
if (*((uint64_t *)sim_cksum) == *((uint64_t *)ent->cksum)) {
|
||||
|
|
|
@ -902,11 +902,38 @@ process_blocks:
|
|||
if (blks > blknum-i) blks = blknum-i;
|
||||
length = 0;
|
||||
tgt = seg_heap;
|
||||
#ifdef __USE_SSE_INTRIN__
|
||||
if ((cfg->chunk_cksum_sz & 15) == 0) {
|
||||
for (j=0; j<blks; j++) {
|
||||
__m128i s;
|
||||
uchar_t *sc;
|
||||
k = cfg->chunk_cksum_sz;
|
||||
sc = ctx->g_blocks[j+i].cksum;
|
||||
|
||||
/*
|
||||
* Use SSE2 to copy 16 bytes at a time avoiding a call
|
||||
* to memcpy() since hash sizes are typically multiple
|
||||
* of 16 bytes: 256-bit or 512-bit.
|
||||
*/
|
||||
while (k > 0) {
|
||||
s = _mm_loadu_si128((__m128i *)sc);
|
||||
_mm_storeu_si128((__m128i *)tgt, s);
|
||||
tgt += 16;
|
||||
sc += 16;
|
||||
k -= 16;
|
||||
}
|
||||
length += cfg->chunk_cksum_sz;
|
||||
}
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
for (j=0; j<blks; j++) {
|
||||
memcpy(tgt, ctx->g_blocks[j+i].cksum, cfg->chunk_cksum_sz);
|
||||
length += cfg->chunk_cksum_sz;
|
||||
tgt += cfg->chunk_cksum_sz;
|
||||
}
|
||||
}
|
||||
*((uint32_t *)src) = blks;
|
||||
src += sizeof (blks);
|
||||
blks = j+i;
|
||||
|
|
Loading…
Reference in a new issue