diff --git a/lzma/LzFind.c b/lzma/LzFind.c index 1bcd5c1..160fed0 100644 --- a/lzma/LzFind.c +++ b/lzma/LzFind.c @@ -3,8 +3,12 @@ #include #ifdef __USE_SSE_INTRIN__ +#ifdef __SSE3__ +#include +#else #include #endif +#endif #include "LzFind.h" #include "LzHash.h" @@ -409,8 +413,13 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt int mask; UInt32 byt; while (lenLimit - len > 16) { +#ifdef __SSE3__ + __m128i span1 = _mm_lddqu_si128((__m128i *)(pb+len)); + __m128i span2 = _mm_lddqu_si128((__m128i *)(cur+len)); +#else __m128i span1 = _mm_loadu_si128((__m128i *)(pb+len)); __m128i span2 = _mm_loadu_si128((__m128i *)(cur+len)); +#endif mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff; if (mask) { byt = __builtin_ctz(mask); @@ -504,8 +513,13 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const int mask; UInt32 byt; while (lenLimit - len > 16) { +#ifdef __SSE3__ + __m128i span1 = _mm_lddqu_si128((__m128i *)(pb+len)); + __m128i span2 = _mm_lddqu_si128((__m128i *)(cur+len)); +#else __m128i span1 = _mm_loadu_si128((__m128i *)(pb+len)); __m128i span2 = _mm_loadu_si128((__m128i *)(cur+len)); +#endif mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff; if (mask) { byt = __builtin_ctz(mask);