From 87aa12206e7c8d30ec298df9c1a2fed8b8605b85 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 6 Jan 2013 19:28:40 +0530 Subject: [PATCH] Use SSE3 lddqu in the matchfinder if SSE3 is enabled. --- lzma/LzFind.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lzma/LzFind.c b/lzma/LzFind.c index 1bcd5c1..160fed0 100644 --- a/lzma/LzFind.c +++ b/lzma/LzFind.c @@ -3,8 +3,12 @@ #include #ifdef __USE_SSE_INTRIN__ +#ifdef __SSE3__ +#include +#else #include #endif +#endif #include "LzFind.h" #include "LzHash.h" @@ -409,8 +413,13 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt int mask; UInt32 byt; while (lenLimit - len > 16) { +#ifdef __SSE3__ + __m128i span1 = _mm_lddqu_si128((__m128i *)(pb+len)); + __m128i span2 = _mm_lddqu_si128((__m128i *)(cur+len)); +#else __m128i span1 = _mm_loadu_si128((__m128i *)(pb+len)); __m128i span2 = _mm_loadu_si128((__m128i *)(cur+len)); +#endif mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff; if (mask) { byt = __builtin_ctz(mask); @@ -504,8 +513,13 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const int mask; UInt32 byt; while (lenLimit - len > 16) { +#ifdef __SSE3__ + __m128i span1 = _mm_lddqu_si128((__m128i *)(pb+len)); + __m128i span2 = _mm_lddqu_si128((__m128i *)(cur+len)); +#else __m128i span1 = _mm_loadu_si128((__m128i *)(pb+len)); __m128i span2 = _mm_loadu_si128((__m128i *)(cur+len)); +#endif mask = _mm_movemask_epi8(_mm_cmpeq_epi8(span1, span2)) ^ 0xffff; if (mask) { byt = __builtin_ctz(mask);