Add AVX detection and usage of -mavx[2] flags.
Force GCC to use Clang assembler always on MAC OS X for AVX support.
This commit is contained in:
parent
63bef473cc
commit
7055a8fc71
3 changed files with 70 additions and 18 deletions
|
@ -239,8 +239,8 @@ $(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(XXHASH_OBJS) $(BLAKE2
|
||||||
@CRYPTO_COMPAT_OBJS@ $(CRYPTO_ASM_OBJS) $(ARCHIVEOBJS) $(PJPGOBJS) $(DISPACKOBJS)
|
@CRYPTO_COMPAT_OBJS@ $(CRYPTO_ASM_OBJS) $(ARCHIVEOBJS) $(PJPGOBJS) $(DISPACKOBJS)
|
||||||
|
|
||||||
DEBUG_LINK = $(GPP) -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp -fPIC
|
DEBUG_LINK = $(GPP) -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp -fPIC
|
||||||
DEBUG_COMPILE = $(GCC) -g -c @EXTRA_OPT_FLAGS@ -fPIC
|
DEBUG_COMPILE = $(GCC) -g -c @EXTRA_OPT_FLAGS@ -fPIC @USE_CLANG_AS@
|
||||||
DEBUG_COMPILE_cpp = $(GPP) -g -c @EXTRA_OPT_FLAGS@ -fPIC
|
DEBUG_COMPILE_cpp = $(GPP) -g -c @EXTRA_OPT_FLAGS@ -fPIC @USE_CLANG_AS@
|
||||||
DEBUG_VEC_FLAGS =
|
DEBUG_VEC_FLAGS =
|
||||||
DEBUG_LOOP_OPTFLAGS =
|
DEBUG_LOOP_OPTFLAGS =
|
||||||
DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@ -fopenmp
|
DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@ -fopenmp
|
||||||
|
@ -248,8 +248,8 @@ DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS)
|
||||||
DEBUG_FPTR_FLAG =
|
DEBUG_FPTR_FLAG =
|
||||||
|
|
||||||
RELEASE_LINK = $(GPP) -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp -fPIC
|
RELEASE_LINK = $(GPP) -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp -fPIC
|
||||||
RELEASE_COMPILE = $(GCC) -c @EXTRA_OPT_FLAGS@ -fPIC -fvisibility=hidden
|
RELEASE_COMPILE = $(GCC) -c @EXTRA_OPT_FLAGS@ -fPIC -fvisibility=hidden @USE_CLANG_AS@
|
||||||
RELEASE_COMPILE_cpp = $(GPP) -c @EXTRA_OPT_FLAGS@ -fPIC -fvisibility=hidden
|
RELEASE_COMPILE_cpp = $(GPP) -c @EXTRA_OPT_FLAGS@ -fPIC -fvisibility=hidden @USE_CLANG_AS@
|
||||||
RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS)
|
RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS)
|
||||||
RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS)
|
RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS)
|
||||||
RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG
|
RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG
|
||||||
|
|
36
config
36
config
|
@ -40,7 +40,9 @@ ${prog} [<options>]
|
||||||
Enable building against an alternate Bzip2 and library installation.
|
Enable building against an alternate Bzip2 and library installation.
|
||||||
--with-libarchive=<path to libarchive installation tree> (Default: System)
|
--with-libarchive=<path to libarchive installation tree> (Default: System)
|
||||||
Enable building against an alternate libarchive installation.
|
Enable building against an alternate libarchive installation.
|
||||||
--no-sse-detect Do NOT attempt to probe the system's SSE/AVX capability for build flags.
|
--no-sse-detect Do NOT attempt to probe the system's SSE capability for build flags.
|
||||||
|
Implies '--no-avx-detect' below.
|
||||||
|
--no-avx-detect Do NOT attempt to probe the system's AVX apability for build flags.
|
||||||
--no-1.3-archive-compat Disable compatibility with compressed archives created with Pcompress
|
--no-1.3-archive-compat Disable compatibility with compressed archives created with Pcompress
|
||||||
version 1.3 (default: retain compatibility). Hash formats changed from
|
version 1.3 (default: retain compatibility). Hash formats changed from
|
||||||
version 1.3 to 1.4 so this option is required if files created using
|
version 1.3 to 1.4 so this option is required if files created using
|
||||||
|
@ -86,6 +88,7 @@ zlib_prefix=
|
||||||
bzlib_prefix=
|
bzlib_prefix=
|
||||||
libarchive_prefix=
|
libarchive_prefix=
|
||||||
sse_detect=1
|
sse_detect=1
|
||||||
|
avx_detect=1
|
||||||
sse_opt_flags="-msse2"
|
sse_opt_flags="-msse2"
|
||||||
crypto_compat_objs='\$\(CRYPTO_COMPAT_OBJS\)'
|
crypto_compat_objs='\$\(CRYPTO_COMPAT_OBJS\)'
|
||||||
crypto_compat_flags="-D__HASH_COMPATIBILITY_"
|
crypto_compat_flags="-D__HASH_COMPATIBILITY_"
|
||||||
|
@ -195,6 +198,7 @@ do
|
||||||
;;
|
;;
|
||||||
--no-sse-detect)
|
--no-sse-detect)
|
||||||
sse_detect=0
|
sse_detect=0
|
||||||
|
avx_detect=0
|
||||||
;;
|
;;
|
||||||
--no-1.3-archive-compat)
|
--no-1.3-archive-compat)
|
||||||
crypto_compat_objs=""
|
crypto_compat_objs=""
|
||||||
|
@ -254,8 +258,6 @@ then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# SSE Detection
|
|
||||||
${ECHO} -n "Checking for CPU SSE version ... "
|
|
||||||
if [ $sse_detect -eq 1 ]
|
if [ $sse_detect -eq 1 ]
|
||||||
then
|
then
|
||||||
${GCC} -o sse_level ./utils/sse_level.c ./utils/cpuid.c -I./utils
|
${GCC} -o sse_level ./utils/sse_level.c ./utils/cpuid.c -I./utils
|
||||||
|
@ -266,6 +268,12 @@ then
|
||||||
echo ""
|
echo ""
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $sse_detect -eq 1 ]
|
||||||
|
then
|
||||||
|
# SSE Detection
|
||||||
|
${ECHO} -n "Checking for CPU SSE version ... "
|
||||||
sse_ver=`./sse_level`
|
sse_ver=`./sse_level`
|
||||||
if [ $? -ne 0 ]
|
if [ $? -ne 0 ]
|
||||||
then
|
then
|
||||||
|
@ -276,10 +284,30 @@ then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo $sse_ver
|
echo $sse_ver
|
||||||
rm -f sse_level
|
|
||||||
sse_opt_flags="-m${sse_ver}"
|
sse_opt_flags="-m${sse_ver}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ $avx_detect -eq 1 -a $sse_detect -eq 1 ]
|
||||||
|
then
|
||||||
|
${ECHO} -n "Checking for CPU AVX version ... "
|
||||||
|
avx_ver=`./sse_level --avx`
|
||||||
|
if [ $? -ne 0 ]
|
||||||
|
then
|
||||||
|
rm -f sse_level
|
||||||
|
echo "ERROR:"
|
||||||
|
echo "SSE/AVX version detection utility. Try configuring with --no-sse-check option."
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ "x$avx_ver" = "x" ]
|
||||||
|
then
|
||||||
|
echo None
|
||||||
|
else
|
||||||
|
echo $avx_ver
|
||||||
|
sse_opt_flags="${sse_opt_flags} -m${avx_ver}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
rm -f sse_level
|
||||||
|
|
||||||
echo $plat | egrep 'x86_64|amd64' > /dev/null
|
echo $plat | egrep 'x86_64|amd64' > /dev/null
|
||||||
if [ $? -eq 0 ]
|
if [ $? -eq 0 ]
|
||||||
|
|
|
@ -23,24 +23,48 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <utils.h>
|
#include <utils.h>
|
||||||
#include <cpuid.h>
|
#include <cpuid.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
usage(void)
|
||||||
|
{
|
||||||
|
printf("Usage: sse_level [--avx]\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(void)
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
processor_cap_t pc;
|
processor_cap_t pc;
|
||||||
|
int avx_detect = 0;
|
||||||
cpuid_basic_identify(&pc);
|
cpuid_basic_identify(&pc);
|
||||||
if (pc.sse_level == 3 && pc.sse_sub_level == 1) {
|
|
||||||
printf("ssse%d", pc.sse_level);
|
if (argc > 1) {
|
||||||
pc.sse_sub_level = 0;
|
if (strcmp(argv[1], "--avx") == 0)
|
||||||
} else {
|
avx_detect = 1;
|
||||||
printf("sse%d", pc.sse_level);
|
else
|
||||||
|
usage();
|
||||||
}
|
}
|
||||||
if (pc.sse_sub_level > 0)
|
if (!avx_detect) {
|
||||||
printf(".%d\n", pc.sse_sub_level);
|
if (pc.sse_level == 3 && pc.sse_sub_level == 1) {
|
||||||
else
|
printf("ssse%d", pc.sse_level);
|
||||||
printf("\n");
|
pc.sse_sub_level = 0;
|
||||||
|
} else {
|
||||||
|
printf("sse%d", pc.sse_level);
|
||||||
|
}
|
||||||
|
if (pc.sse_sub_level > 0)
|
||||||
|
printf(".%d\n", pc.sse_sub_level);
|
||||||
|
else
|
||||||
|
printf("\n");
|
||||||
|
} else {
|
||||||
|
if (pc.avx_level == 1)
|
||||||
|
printf("avx\n");
|
||||||
|
else if (pc.avx_level == 2)
|
||||||
|
printf("avx2\n");
|
||||||
|
}
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue