Use SSE3 lddqu in the matchfinder if SSE3 is enabled.

This commit is contained in:
Moinak Ghosh 2013-01-06 19:28:40 +05:30
parent 976a12afbe
commit 87aa12206e

View file

@ -3,8 +3,12 @@
#include <string.h> #include <string.h>
#ifdef __USE_SSE_INTRIN__ #ifdef __USE_SSE_INTRIN__
#ifdef __SSE3__
#include <pmmintrin.h>
#else
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
#endif
#include "LzFind.h" #include "LzFind.h"
#include "LzHash.h" #include "LzHash.h"
@ -409,8 +413,13 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
int mask; int mask;
UInt32 byt; UInt32 byt;
while (lenLimit - len > 16) { while (lenLimit - len > 16) {
#ifdef __SSE3__
__m128i span1 = _mm_lddqu_si128((__m128i *)(pb+len));
__m128i span2 = _mm_lddqu_si128((__m128i *)(cur+len));
#else
__m128i span1 = _mm_loadu_si128((__m128i *)(pb+len)); __m128i span1 = _mm_loadu_si128((__m128i *)(pb+len));
__m128i span2 = _mm_loadu_si128((__m128i *)(cur+len)); __m128i span2 = _mm_loadu_si128((__m128i *)(cur+len));
#endif
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff; mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
if (mask) { if (mask) {
byt = __builtin_ctz(mask); byt = __builtin_ctz(mask);
@ -504,8 +513,13 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
int mask; int mask;
UInt32 byt; UInt32 byt;
while (lenLimit - len > 16) { while (lenLimit - len > 16) {
#ifdef __SSE3__
__m128i span1 = _mm_lddqu_si128((__m128i *)(pb+len));
__m128i span2 = _mm_lddqu_si128((__m128i *)(cur+len));
#else
__m128i span1 = _mm_loadu_si128((__m128i *)(pb+len)); __m128i span1 = _mm_loadu_si128((__m128i *)(pb+len));
__m128i span2 = _mm_loadu_si128((__m128i *)(cur+len)); __m128i span2 = _mm_loadu_si128((__m128i *)(cur+len));
#endif
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff; mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff;
if (mask) { if (mask) {
byt = __builtin_ctz(mask); byt = __builtin_ctz(mask);