Improve SSE version detection.
Add SSE4 detection. Fix setting of some opt flags in Makefile.in.
This commit is contained in:
parent
3888c8d316
commit
e9e3e1e632
6 changed files with 74 additions and 13 deletions
12
Makefile.in
12
Makefile.in
|
@ -158,18 +158,18 @@ $(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS)
|
|||
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \
|
||||
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS)
|
||||
|
||||
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
|
||||
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
|
||||
DEBUG_COMPILE_cpp = g++ -m64 -g -msse3 -c
|
||||
DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
|
||||
DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@
|
||||
DEBUG_COMPILE_cpp = g++ -g -c @EXTRA_OPT_FLAGS@
|
||||
DEBUG_VEC_FLAGS =
|
||||
DEBUG_LOOP_OPTFLAGS =
|
||||
DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@
|
||||
DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS)
|
||||
DEBUG_FPTR_FLAG =
|
||||
|
||||
RELEASE_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ @M64_FLAG@
|
||||
RELEASE_COMPILE = gcc -m64 -msse3 -c @M64_FLAG@
|
||||
RELEASE_COMPILE_cpp = g++ -m64 -msse3 -c @M64_FLAG@
|
||||
RELEASE_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
|
||||
RELEASE_COMPILE = gcc -c @EXTRA_OPT_FLAGS@
|
||||
RELEASE_COMPILE_cpp = g++ -c @EXTRA_OPT_FLAGS@
|
||||
RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS)
|
||||
RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS)
|
||||
RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG
|
||||
|
|
44
config
44
config
|
@ -50,9 +50,11 @@ yasm=yasm
|
|||
keccak_srcs=
|
||||
keccak_hdrs=
|
||||
keccak_srcs_asm=
|
||||
m64_flag=
|
||||
extra_opt_flags=
|
||||
zlib_prefix=
|
||||
bzlib_prefix=
|
||||
sse_detect=1
|
||||
default_sse="-msse2"
|
||||
|
||||
rm -rf ./buildtmp
|
||||
mkdir ./buildtmp
|
||||
|
@ -98,7 +100,7 @@ then
|
|||
|
||||
# If m64 compilation succeeds we assume platform to be 64-bit capable but
|
||||
# explicit flag is reqd.
|
||||
m64_flag="-m64"
|
||||
extra_opt_flags="-m64"
|
||||
fi
|
||||
rm -f tst tst.c
|
||||
|
||||
|
@ -138,6 +140,9 @@ do
|
|||
--use-key256)
|
||||
keylen='-DKEYLEN=32'
|
||||
;;
|
||||
--no-sse-check)
|
||||
sse_detect=0
|
||||
;;
|
||||
--help) usage $0;;
|
||||
*)
|
||||
echo "Unrecognized option: ${arg1}"
|
||||
|
@ -183,6 +188,33 @@ then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
# SSE Detection
|
||||
if [ $sse_detect -eq 1 ]
|
||||
then
|
||||
gcc -o sse_level ./utils/sse_level.c ./utils/cpuid.c -I./utils
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "ERROR:"
|
||||
echo "Failed to build SSE detection utility."
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
sse_ver=`./sse_level`
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
rm -f sse_level
|
||||
echo "ERROR:"
|
||||
echo "SSE version detection utility. Try configuring with --no-sse-check option."
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
rm -f sse_level
|
||||
extra_opt_flags="${extra_opt_flags} -msse${sse_ver}"
|
||||
else
|
||||
extra_opt_flags="${extra_opt_flags} ${default_sse}"
|
||||
fi
|
||||
|
||||
|
||||
echo $plat | egrep 'x86_64|amd64' > /dev/null
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
|
@ -299,7 +331,7 @@ main(void)
|
|||
}
|
||||
__EOF
|
||||
|
||||
gcc ${m64_flag} -I${openssl_incdir} -L${openssl_libdir} tst.c -o tst
|
||||
gcc ${extra_opt_flags} -I${openssl_incdir} -L${openssl_libdir} tst.c -o tst
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Unable to compile OpenSSL test program please check OpenSSL installation."
|
||||
|
@ -335,7 +367,7 @@ main(void)
|
|||
}
|
||||
__EOF
|
||||
|
||||
gcc ${m64_flag} -I${openssl_incdir} -L${openssl_libdir} -O0 -g tst.c -o tst -lcrypto >/dev/null 2>&1
|
||||
gcc ${extra_opt_flags} -I${openssl_incdir} -L${openssl_libdir} -O0 -g tst.c -o tst -lcrypto >/dev/null 2>&1
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
openssl_incdir="${openssl_incdir} -D__OSSL_OLD__"
|
||||
|
@ -465,7 +497,7 @@ sha256asmobjsvar="SHA256ASM_OBJS"
|
|||
sha256objsvar="SHA256_OBJS"
|
||||
yasmvar="YASM"
|
||||
fptr_flag_var="FPTR_FLAG"
|
||||
m64_flag_var="M64_FLAG"
|
||||
extra_opt_flags_var="EXTRA_OPT_FLAGS"
|
||||
|
||||
openssllibdirvar="OPENSSL_LIBDIR"
|
||||
opensslincdirvar="OPENSSL_INCDIR"
|
||||
|
@ -517,6 +549,6 @@ s#@${keccak_srcs_var}@#${keccak_srcs}#g
|
|||
s#@${keccak_hdrs_var}@#${keccak_hdrs}#g
|
||||
s#@${keccak_srcs_var}@#${keccak_srcs}#g
|
||||
s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g
|
||||
s#@${m64_flag_var}@#${m64_flag}#g
|
||||
s#@${extra_opt_flags_var}@#${extra_opt_flags}#g
|
||||
" > Makefile
|
||||
|
||||
|
|
|
@ -29,6 +29,10 @@
|
|||
#include "cpuid.h"
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#define SSE4_1_FLAG 0x080000
|
||||
#define SSE4_2_FLAG 0x100000
|
||||
|
||||
void
|
||||
exec_cpuid(uint32_t *regs)
|
||||
{
|
||||
|
@ -107,6 +111,7 @@ cpuid_basic_identify(processor_info_t *pc)
|
|||
raw.vendor_str[12] = 0;
|
||||
pc->avx_level = 0;
|
||||
pc->sse_level = 0;
|
||||
pc->sse_sub_level = 0;
|
||||
|
||||
if (strcmp(raw.vendor_str, "GenuineIntel") == 0) {
|
||||
pc->proc_type = PROC_X64_INTEL;
|
||||
|
@ -119,8 +124,12 @@ cpuid_basic_identify(processor_info_t *pc)
|
|||
if (raw.basic_cpuid[0][0] >= 1) {
|
||||
// ECX has SSE 4.2 and AVX flags
|
||||
// Bit 20 is SSE 4.2 and bit 28 indicates AVX
|
||||
if (raw.basic_cpuid[1][2] & (1 << 20)) {
|
||||
if (raw.basic_cpuid[1][2] & SSE4_1_FLAG) {
|
||||
pc->sse_level = 4;
|
||||
pc->sse_sub_level = 1;
|
||||
if (raw.basic_cpuid[1][2] & SSE4_2_FLAG) {
|
||||
pc->sse_sub_level = 2;
|
||||
}
|
||||
} else {
|
||||
pc->sse_level = 3;
|
||||
}
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
#ifndef __CPUID_H__
|
||||
#define __CPUID_H__
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define VENDOR_STR_MAX 16
|
||||
#define BRAND_STR_MAX 64
|
||||
|
|
17
utils/sse_level.c
Normal file
17
utils/sse_level.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
#include <stdio.h>
|
||||
#include <utils.h>
|
||||
#include <cpuid.h>
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
processor_info_t pc;
|
||||
cpuid_basic_identify(&pc);
|
||||
printf("%d", pc.sse_level);
|
||||
if (pc.sse_sub_level > 0)
|
||||
printf(".%d\n", pc.sse_sub_level);
|
||||
else
|
||||
printf("\n");
|
||||
return (0);
|
||||
}
|
||||
|
|
@ -146,6 +146,7 @@ typedef enum {
|
|||
|
||||
typedef struct {
|
||||
int sse_level;
|
||||
int sse_sub_level;
|
||||
int avx_level;
|
||||
proc_type_t proc_type;
|
||||
} processor_info_t;
|
||||
|
|
Loading…
Reference in a new issue