From 7055a8fc71c89ca33a83c5ecf6a7373d7f358efe Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 4 May 2014 22:03:40 +0530 Subject: [PATCH] Add AVX detection and usage of -mavx[2] flags. Force GCC to use Clang assembler always on MAC OS X for AVX support. --- Makefile.in | 8 ++++---- config | 36 ++++++++++++++++++++++++++++++++---- utils/sse_level.c | 44 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 70 insertions(+), 18 deletions(-) diff --git a/Makefile.in b/Makefile.in index 606e7a5..dd8f73c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -239,8 +239,8 @@ $(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(XXHASH_OBJS) $(BLAKE2 @CRYPTO_COMPAT_OBJS@ $(CRYPTO_ASM_OBJS) $(ARCHIVEOBJS) $(PJPGOBJS) $(DISPACKOBJS) DEBUG_LINK = $(GPP) -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp -fPIC -DEBUG_COMPILE = $(GCC) -g -c @EXTRA_OPT_FLAGS@ -fPIC -DEBUG_COMPILE_cpp = $(GPP) -g -c @EXTRA_OPT_FLAGS@ -fPIC +DEBUG_COMPILE = $(GCC) -g -c @EXTRA_OPT_FLAGS@ -fPIC @USE_CLANG_AS@ +DEBUG_COMPILE_cpp = $(GPP) -g -c @EXTRA_OPT_FLAGS@ -fPIC @USE_CLANG_AS@ DEBUG_VEC_FLAGS = DEBUG_LOOP_OPTFLAGS = DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@ -fopenmp @@ -248,8 +248,8 @@ DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS) DEBUG_FPTR_FLAG = RELEASE_LINK = $(GPP) -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp -fPIC -RELEASE_COMPILE = $(GCC) -c @EXTRA_OPT_FLAGS@ -fPIC -fvisibility=hidden -RELEASE_COMPILE_cpp = $(GPP) -c @EXTRA_OPT_FLAGS@ -fPIC -fvisibility=hidden +RELEASE_COMPILE = $(GCC) -c @EXTRA_OPT_FLAGS@ -fPIC -fvisibility=hidden @USE_CLANG_AS@ +RELEASE_COMPILE_cpp = $(GPP) -c @EXTRA_OPT_FLAGS@ -fPIC -fvisibility=hidden @USE_CLANG_AS@ RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS) RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS) RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG diff --git a/config b/config index 333a63d..7021fff 100755 --- a/config +++ b/config @@ -40,7 +40,9 @@ ${prog} [] Enable building against an alternate Bzip2 and library installation. --with-libarchive= (Default: System) Enable building against an alternate libarchive installation. ---no-sse-detect Do NOT attempt to probe the system's SSE/AVX capability for build flags. +--no-sse-detect Do NOT attempt to probe the system's SSE capability for build flags. + Implies '--no-avx-detect' below. +--no-avx-detect Do NOT attempt to probe the system's AVX apability for build flags. --no-1.3-archive-compat Disable compatibility with compressed archives created with Pcompress version 1.3 (default: retain compatibility). Hash formats changed from version 1.3 to 1.4 so this option is required if files created using @@ -86,6 +88,7 @@ zlib_prefix= bzlib_prefix= libarchive_prefix= sse_detect=1 +avx_detect=1 sse_opt_flags="-msse2" crypto_compat_objs='\$\(CRYPTO_COMPAT_OBJS\)' crypto_compat_flags="-D__HASH_COMPATIBILITY_" @@ -195,6 +198,7 @@ do ;; --no-sse-detect) sse_detect=0 + avx_detect=0 ;; --no-1.3-archive-compat) crypto_compat_objs="" @@ -254,8 +258,6 @@ then exit 1 fi -# SSE Detection -${ECHO} -n "Checking for CPU SSE version ... " if [ $sse_detect -eq 1 ] then ${GCC} -o sse_level ./utils/sse_level.c ./utils/cpuid.c -I./utils @@ -266,6 +268,12 @@ then echo "" exit 1 fi +fi + +if [ $sse_detect -eq 1 ] +then + # SSE Detection + ${ECHO} -n "Checking for CPU SSE version ... " sse_ver=`./sse_level` if [ $? -ne 0 ] then @@ -276,10 +284,30 @@ then exit 1 fi echo $sse_ver - rm -f sse_level sse_opt_flags="-m${sse_ver}" fi +if [ $avx_detect -eq 1 -a $sse_detect -eq 1 ] +then + ${ECHO} -n "Checking for CPU AVX version ... " + avx_ver=`./sse_level --avx` + if [ $? -ne 0 ] + then + rm -f sse_level + echo "ERROR:" + echo "SSE/AVX version detection utility. Try configuring with --no-sse-check option." + echo "" + exit 1 + fi + if [ "x$avx_ver" = "x" ] + then + echo None + else + echo $avx_ver + sse_opt_flags="${sse_opt_flags} -m${avx_ver}" + fi +fi +rm -f sse_level echo $plat | egrep 'x86_64|amd64' > /dev/null if [ $? -eq 0 ] diff --git a/utils/sse_level.c b/utils/sse_level.c index 0ddb2e4..bfda960 100644 --- a/utils/sse_level.c +++ b/utils/sse_level.c @@ -23,24 +23,48 @@ */ #include +#include #include #include +void +usage(void) +{ + printf("Usage: sse_level [--avx]\n"); + exit(1); +} + int -main(void) +main(int argc, char *argv[]) { processor_cap_t pc; + int avx_detect = 0; cpuid_basic_identify(&pc); - if (pc.sse_level == 3 && pc.sse_sub_level == 1) { - printf("ssse%d", pc.sse_level); - pc.sse_sub_level = 0; - } else { - printf("sse%d", pc.sse_level); + + if (argc > 1) { + if (strcmp(argv[1], "--avx") == 0) + avx_detect = 1; + else + usage(); } - if (pc.sse_sub_level > 0) - printf(".%d\n", pc.sse_sub_level); - else - printf("\n"); + if (!avx_detect) { + if (pc.sse_level == 3 && pc.sse_sub_level == 1) { + printf("ssse%d", pc.sse_level); + pc.sse_sub_level = 0; + } else { + printf("sse%d", pc.sse_level); + } + if (pc.sse_sub_level > 0) + printf(".%d\n", pc.sse_sub_level); + else + printf("\n"); + } else { + if (pc.avx_level == 1) + printf("avx\n"); + else if (pc.avx_level == 2) + printf("avx2\n"); + } + return (0); }