Improve SSE version detection.

Add SSE4 detection.
Fix setting of some opt flags in Makefile.in.
This commit is contained in:
Moinak Ghosh 2013-01-20 22:53:36 +05:30
parent 3888c8d316
commit e9e3e1e632
6 changed files with 74 additions and 13 deletions

View file

@ -158,18 +158,18 @@ $(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS)
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \ $(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS)
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
DEBUG_COMPILE = gcc -m64 -g -msse3 -c DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@
DEBUG_COMPILE_cpp = g++ -m64 -g -msse3 -c DEBUG_COMPILE_cpp = g++ -g -c @EXTRA_OPT_FLAGS@
DEBUG_VEC_FLAGS = DEBUG_VEC_FLAGS =
DEBUG_LOOP_OPTFLAGS = DEBUG_LOOP_OPTFLAGS =
DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@ DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@
DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS) DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS)
DEBUG_FPTR_FLAG = DEBUG_FPTR_FLAG =
RELEASE_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ @M64_FLAG@ RELEASE_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
RELEASE_COMPILE = gcc -m64 -msse3 -c @M64_FLAG@ RELEASE_COMPILE = gcc -c @EXTRA_OPT_FLAGS@
RELEASE_COMPILE_cpp = g++ -m64 -msse3 -c @M64_FLAG@ RELEASE_COMPILE_cpp = g++ -c @EXTRA_OPT_FLAGS@
RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS) RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS)
RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS) RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS)
RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG

44
config
View file

@ -50,9 +50,11 @@ yasm=yasm
keccak_srcs= keccak_srcs=
keccak_hdrs= keccak_hdrs=
keccak_srcs_asm= keccak_srcs_asm=
m64_flag= extra_opt_flags=
zlib_prefix= zlib_prefix=
bzlib_prefix= bzlib_prefix=
sse_detect=1
default_sse="-msse2"
rm -rf ./buildtmp rm -rf ./buildtmp
mkdir ./buildtmp mkdir ./buildtmp
@ -98,7 +100,7 @@ then
# If m64 compilation succeeds we assume platform to be 64-bit capable but # If m64 compilation succeeds we assume platform to be 64-bit capable but
# explicit flag is reqd. # explicit flag is reqd.
m64_flag="-m64" extra_opt_flags="-m64"
fi fi
rm -f tst tst.c rm -f tst tst.c
@ -138,6 +140,9 @@ do
--use-key256) --use-key256)
keylen='-DKEYLEN=32' keylen='-DKEYLEN=32'
;; ;;
--no-sse-check)
sse_detect=0
;;
--help) usage $0;; --help) usage $0;;
*) *)
echo "Unrecognized option: ${arg1}" echo "Unrecognized option: ${arg1}"
@ -183,6 +188,33 @@ then
exit 1 exit 1
fi fi
# SSE Detection
if [ $sse_detect -eq 1 ]
then
gcc -o sse_level ./utils/sse_level.c ./utils/cpuid.c -I./utils
if [ $? -ne 0 ]
then
echo "ERROR:"
echo "Failed to build SSE detection utility."
echo ""
exit 1
fi
sse_ver=`./sse_level`
if [ $? -ne 0 ]
then
rm -f sse_level
echo "ERROR:"
echo "SSE version detection utility. Try configuring with --no-sse-check option."
echo ""
exit 1
fi
rm -f sse_level
extra_opt_flags="${extra_opt_flags} -msse${sse_ver}"
else
extra_opt_flags="${extra_opt_flags} ${default_sse}"
fi
echo $plat | egrep 'x86_64|amd64' > /dev/null echo $plat | egrep 'x86_64|amd64' > /dev/null
if [ $? -eq 0 ] if [ $? -eq 0 ]
then then
@ -299,7 +331,7 @@ main(void)
} }
__EOF __EOF
gcc ${m64_flag} -I${openssl_incdir} -L${openssl_libdir} tst.c -o tst gcc ${extra_opt_flags} -I${openssl_incdir} -L${openssl_libdir} tst.c -o tst
if [ $? -ne 0 ] if [ $? -ne 0 ]
then then
echo "Unable to compile OpenSSL test program please check OpenSSL installation." echo "Unable to compile OpenSSL test program please check OpenSSL installation."
@ -335,7 +367,7 @@ main(void)
} }
__EOF __EOF
gcc ${m64_flag} -I${openssl_incdir} -L${openssl_libdir} -O0 -g tst.c -o tst -lcrypto >/dev/null 2>&1 gcc ${extra_opt_flags} -I${openssl_incdir} -L${openssl_libdir} -O0 -g tst.c -o tst -lcrypto >/dev/null 2>&1
if [ $? -ne 0 ] if [ $? -ne 0 ]
then then
openssl_incdir="${openssl_incdir} -D__OSSL_OLD__" openssl_incdir="${openssl_incdir} -D__OSSL_OLD__"
@ -465,7 +497,7 @@ sha256asmobjsvar="SHA256ASM_OBJS"
sha256objsvar="SHA256_OBJS" sha256objsvar="SHA256_OBJS"
yasmvar="YASM" yasmvar="YASM"
fptr_flag_var="FPTR_FLAG" fptr_flag_var="FPTR_FLAG"
m64_flag_var="M64_FLAG" extra_opt_flags_var="EXTRA_OPT_FLAGS"
openssllibdirvar="OPENSSL_LIBDIR" openssllibdirvar="OPENSSL_LIBDIR"
opensslincdirvar="OPENSSL_INCDIR" opensslincdirvar="OPENSSL_INCDIR"
@ -517,6 +549,6 @@ s#@${keccak_srcs_var}@#${keccak_srcs}#g
s#@${keccak_hdrs_var}@#${keccak_hdrs}#g s#@${keccak_hdrs_var}@#${keccak_hdrs}#g
s#@${keccak_srcs_var}@#${keccak_srcs}#g s#@${keccak_srcs_var}@#${keccak_srcs}#g
s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g
s#@${m64_flag_var}@#${m64_flag}#g s#@${extra_opt_flags_var}@#${extra_opt_flags}#g
" > Makefile " > Makefile

View file

@ -29,6 +29,10 @@
#include "cpuid.h" #include "cpuid.h"
#ifdef __x86_64__ #ifdef __x86_64__
#define SSE4_1_FLAG 0x080000
#define SSE4_2_FLAG 0x100000
void void
exec_cpuid(uint32_t *regs) exec_cpuid(uint32_t *regs)
{ {
@ -107,6 +111,7 @@ cpuid_basic_identify(processor_info_t *pc)
raw.vendor_str[12] = 0; raw.vendor_str[12] = 0;
pc->avx_level = 0; pc->avx_level = 0;
pc->sse_level = 0; pc->sse_level = 0;
pc->sse_sub_level = 0;
if (strcmp(raw.vendor_str, "GenuineIntel") == 0) { if (strcmp(raw.vendor_str, "GenuineIntel") == 0) {
pc->proc_type = PROC_X64_INTEL; pc->proc_type = PROC_X64_INTEL;
@ -119,8 +124,12 @@ cpuid_basic_identify(processor_info_t *pc)
if (raw.basic_cpuid[0][0] >= 1) { if (raw.basic_cpuid[0][0] >= 1) {
// ECX has SSE 4.2 and AVX flags // ECX has SSE 4.2 and AVX flags
// Bit 20 is SSE 4.2 and bit 28 indicates AVX // Bit 20 is SSE 4.2 and bit 28 indicates AVX
if (raw.basic_cpuid[1][2] & (1 << 20)) { if (raw.basic_cpuid[1][2] & SSE4_1_FLAG) {
pc->sse_level = 4; pc->sse_level = 4;
pc->sse_sub_level = 1;
if (raw.basic_cpuid[1][2] & SSE4_2_FLAG) {
pc->sse_sub_level = 2;
}
} else { } else {
pc->sse_level = 3; pc->sse_level = 3;
} }

View file

@ -26,6 +26,8 @@
#ifndef __CPUID_H__ #ifndef __CPUID_H__
#define __CPUID_H__ #define __CPUID_H__
#include "utils.h"
#ifdef __x86_64__ #ifdef __x86_64__
#define VENDOR_STR_MAX 16 #define VENDOR_STR_MAX 16
#define BRAND_STR_MAX 64 #define BRAND_STR_MAX 64

17
utils/sse_level.c Normal file
View file

@ -0,0 +1,17 @@
#include <stdio.h>
#include <utils.h>
#include <cpuid.h>
int
main(void)
{
processor_info_t pc;
cpuid_basic_identify(&pc);
printf("%d", pc.sse_level);
if (pc.sse_sub_level > 0)
printf(".%d\n", pc.sse_sub_level);
else
printf("\n");
return (0);
}

View file

@ -146,6 +146,7 @@ typedef enum {
typedef struct { typedef struct {
int sse_level; int sse_level;
int sse_sub_level;
int avx_level; int avx_level;
proc_type_t proc_type; proc_type_t proc_type;
} processor_info_t; } processor_info_t;