From e9e3e1e632ab9842b180315dc3de9b4774d2f21b Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 20 Jan 2013 22:53:36 +0530 Subject: [PATCH] Improve SSE version detection. Add SSE4 detection. Fix setting of some opt flags in Makefile.in. --- Makefile.in | 12 ++++++------ config | 44 ++++++++++++++++++++++++++++++++++++++------ utils/cpuid.c | 11 ++++++++++- utils/cpuid.h | 2 ++ utils/sse_level.c | 17 +++++++++++++++++ utils/utils.h | 1 + 6 files changed, 74 insertions(+), 13 deletions(-) create mode 100644 utils/sse_level.c diff --git a/Makefile.in b/Makefile.in index b40bd83..a9a26b9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -158,18 +158,18 @@ $(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) $(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \ $(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) -DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ -DEBUG_COMPILE = gcc -m64 -g -msse3 -c -DEBUG_COMPILE_cpp = g++ -m64 -g -msse3 -c +DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ +DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@ +DEBUG_COMPILE_cpp = g++ -g -c @EXTRA_OPT_FLAGS@ DEBUG_VEC_FLAGS = DEBUG_LOOP_OPTFLAGS = DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@ DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS) DEBUG_FPTR_FLAG = -RELEASE_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ @M64_FLAG@ -RELEASE_COMPILE = gcc -m64 -msse3 -c @M64_FLAG@ -RELEASE_COMPILE_cpp = g++ -m64 -msse3 -c @M64_FLAG@ +RELEASE_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ +RELEASE_COMPILE = gcc -c @EXTRA_OPT_FLAGS@ +RELEASE_COMPILE_cpp = g++ -c @EXTRA_OPT_FLAGS@ RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS) RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS) RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG diff --git a/config b/config index 94a3322..cb7cb24 100755 --- a/config +++ b/config @@ -50,9 +50,11 @@ yasm=yasm keccak_srcs= keccak_hdrs= keccak_srcs_asm= -m64_flag= +extra_opt_flags= zlib_prefix= bzlib_prefix= +sse_detect=1 +default_sse="-msse2" rm -rf ./buildtmp mkdir ./buildtmp @@ -98,7 +100,7 @@ then # If m64 compilation succeeds we assume platform to be 64-bit capable but # explicit flag is reqd. - m64_flag="-m64" + extra_opt_flags="-m64" fi rm -f tst tst.c @@ -138,6 +140,9 @@ do --use-key256) keylen='-DKEYLEN=32' ;; + --no-sse-check) + sse_detect=0 + ;; --help) usage $0;; *) echo "Unrecognized option: ${arg1}" @@ -183,6 +188,33 @@ then exit 1 fi +# SSE Detection +if [ $sse_detect -eq 1 ] +then + gcc -o sse_level ./utils/sse_level.c ./utils/cpuid.c -I./utils + if [ $? -ne 0 ] + then + echo "ERROR:" + echo "Failed to build SSE detection utility." + echo "" + exit 1 + fi + sse_ver=`./sse_level` + if [ $? -ne 0 ] + then + rm -f sse_level + echo "ERROR:" + echo "SSE version detection utility. Try configuring with --no-sse-check option." + echo "" + exit 1 + fi + rm -f sse_level + extra_opt_flags="${extra_opt_flags} -msse${sse_ver}" +else + extra_opt_flags="${extra_opt_flags} ${default_sse}" +fi + + echo $plat | egrep 'x86_64|amd64' > /dev/null if [ $? -eq 0 ] then @@ -299,7 +331,7 @@ main(void) } __EOF -gcc ${m64_flag} -I${openssl_incdir} -L${openssl_libdir} tst.c -o tst +gcc ${extra_opt_flags} -I${openssl_incdir} -L${openssl_libdir} tst.c -o tst if [ $? -ne 0 ] then echo "Unable to compile OpenSSL test program please check OpenSSL installation." @@ -335,7 +367,7 @@ main(void) } __EOF -gcc ${m64_flag} -I${openssl_incdir} -L${openssl_libdir} -O0 -g tst.c -o tst -lcrypto >/dev/null 2>&1 +gcc ${extra_opt_flags} -I${openssl_incdir} -L${openssl_libdir} -O0 -g tst.c -o tst -lcrypto >/dev/null 2>&1 if [ $? -ne 0 ] then openssl_incdir="${openssl_incdir} -D__OSSL_OLD__" @@ -465,7 +497,7 @@ sha256asmobjsvar="SHA256ASM_OBJS" sha256objsvar="SHA256_OBJS" yasmvar="YASM" fptr_flag_var="FPTR_FLAG" -m64_flag_var="M64_FLAG" +extra_opt_flags_var="EXTRA_OPT_FLAGS" openssllibdirvar="OPENSSL_LIBDIR" opensslincdirvar="OPENSSL_INCDIR" @@ -517,6 +549,6 @@ s#@${keccak_srcs_var}@#${keccak_srcs}#g s#@${keccak_hdrs_var}@#${keccak_hdrs}#g s#@${keccak_srcs_var}@#${keccak_srcs}#g s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g -s#@${m64_flag_var}@#${m64_flag}#g +s#@${extra_opt_flags_var}@#${extra_opt_flags}#g " > Makefile diff --git a/utils/cpuid.c b/utils/cpuid.c index da6b24a..b8ef1df 100644 --- a/utils/cpuid.c +++ b/utils/cpuid.c @@ -29,6 +29,10 @@ #include "cpuid.h" #ifdef __x86_64__ + +#define SSE4_1_FLAG 0x080000 +#define SSE4_2_FLAG 0x100000 + void exec_cpuid(uint32_t *regs) { @@ -107,6 +111,7 @@ cpuid_basic_identify(processor_info_t *pc) raw.vendor_str[12] = 0; pc->avx_level = 0; pc->sse_level = 0; + pc->sse_sub_level = 0; if (strcmp(raw.vendor_str, "GenuineIntel") == 0) { pc->proc_type = PROC_X64_INTEL; @@ -119,8 +124,12 @@ cpuid_basic_identify(processor_info_t *pc) if (raw.basic_cpuid[0][0] >= 1) { // ECX has SSE 4.2 and AVX flags // Bit 20 is SSE 4.2 and bit 28 indicates AVX - if (raw.basic_cpuid[1][2] & (1 << 20)) { + if (raw.basic_cpuid[1][2] & SSE4_1_FLAG) { pc->sse_level = 4; + pc->sse_sub_level = 1; + if (raw.basic_cpuid[1][2] & SSE4_2_FLAG) { + pc->sse_sub_level = 2; + } } else { pc->sse_level = 3; } diff --git a/utils/cpuid.h b/utils/cpuid.h index 65d0590..59b6597 100644 --- a/utils/cpuid.h +++ b/utils/cpuid.h @@ -26,6 +26,8 @@ #ifndef __CPUID_H__ #define __CPUID_H__ +#include "utils.h" + #ifdef __x86_64__ #define VENDOR_STR_MAX 16 #define BRAND_STR_MAX 64 diff --git a/utils/sse_level.c b/utils/sse_level.c new file mode 100644 index 0000000..b69837d --- /dev/null +++ b/utils/sse_level.c @@ -0,0 +1,17 @@ +#include +#include +#include + +int +main(void) +{ + processor_info_t pc; + cpuid_basic_identify(&pc); + printf("%d", pc.sse_level); + if (pc.sse_sub_level > 0) + printf(".%d\n", pc.sse_sub_level); + else + printf("\n"); + return (0); +} + diff --git a/utils/utils.h b/utils/utils.h index d55bb45..e9017b9 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -146,6 +146,7 @@ typedef enum { typedef struct { int sse_level; + int sse_sub_level; int avx_level; proc_type_t proc_type; } processor_info_t;