Add SSE2 optimizations for Segmented Dedupe.

This commit is contained in:
Moinak Ghosh 2013-05-05 23:34:26 +05:30
parent 6ecc400571
commit c27317d7da
2 changed files with 32 additions and 4 deletions

View file

@ -495,6 +495,7 @@ db_lookup_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
pent = &(ent->next); pent = &(ent->next);
ent = ent->next; ent = ent->next;
} }
// The following two cases are for Segmented Dedupe approximate matching
} else if (cfg->similarity_cksum_sz == 8) {// Fast path for 64-bit keys } else if (cfg->similarity_cksum_sz == 8) {// Fast path for 64-bit keys
while (ent) { while (ent) {
if (*((uint64_t *)sim_cksum) == *((uint64_t *)ent->cksum)) { if (*((uint64_t *)sim_cksum) == *((uint64_t *)ent->cksum)) {

View file

@ -902,11 +902,38 @@ process_blocks:
if (blks > blknum-i) blks = blknum-i; if (blks > blknum-i) blks = blknum-i;
length = 0; length = 0;
tgt = seg_heap; tgt = seg_heap;
#ifdef __USE_SSE_INTRIN__
if ((cfg->chunk_cksum_sz & 15) == 0) {
for (j=0; j<blks; j++) {
__m128i s;
uchar_t *sc;
k = cfg->chunk_cksum_sz;
sc = ctx->g_blocks[j+i].cksum;
/*
* Use SSE2 to copy 16 bytes at a time avoiding a call
* to memcpy() since hash sizes are typically multiple
* of 16 bytes: 256-bit or 512-bit.
*/
while (k > 0) {
s = _mm_loadu_si128((__m128i *)sc);
_mm_storeu_si128((__m128i *)tgt, s);
tgt += 16;
sc += 16;
k -= 16;
}
length += cfg->chunk_cksum_sz;
}
} else {
#else
{
#endif
for (j=0; j<blks; j++) { for (j=0; j<blks; j++) {
memcpy(tgt, ctx->g_blocks[j+i].cksum, cfg->chunk_cksum_sz); memcpy(tgt, ctx->g_blocks[j+i].cksum, cfg->chunk_cksum_sz);
length += cfg->chunk_cksum_sz; length += cfg->chunk_cksum_sz;
tgt += cfg->chunk_cksum_sz; tgt += cfg->chunk_cksum_sz;
} }
}
*((uint32_t *)src) = blks; *((uint32_t *)src) = blks;
src += sizeof (blks); src += sizeof (blks);
blks = j+i; blks = j+i;