diff --git a/INSTALL b/INSTALL index b2dbc4c..e4ed9bd 100644 --- a/INSTALL +++ b/INSTALL @@ -1,5 +1,17 @@ Copyright (c) 2012 Moinak Ghosh +Prerequisites +============= +64-bit System. +GCC 4.4 (with mpfr, ppl and cloog support packages for loop + vectorization). +libz (zlib) and developments packages. +Libbz2 and development packages. +Libbsc source tree if BSC support is desired. + See below: + "Steps for building with libbsc support". +OpenSSL version 0.9.8 or greater. + Basic Installation ================== The simplest process to build and install this utility is: @@ -96,6 +108,13 @@ is not the usual GNU Autoconf script. Enable building against an alternate Bzip2 and library installation. +--no-sse-detect Do not try to detect the CPU's SSE capability. This + mode will simply use SSE2 as the fallback default. + Using SSE4 and later improves performance significantly. + +--use-key256 Use 256-bit encryption keys. Default key length is + 128-bit. + --help Display the help message. Steps for building with libbsc support diff --git a/Makefile.in b/Makefile.in index a9a26b9..de63368 100644 --- a/Makefile.in +++ b/Makefile.in @@ -24,9 +24,9 @@ PROG= pcompress MAINSRCS = main.c utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \ adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \ - utils/xxhash.c utils/heapq.c utils/cpuid.c + utils/xxhash_base.c utils/heapq.c utils/cpuid.c MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h \ - utils/cpuid.h + utils/cpuid.h utils/xxhash.h MAINOBJS = $(MAINSRCS:.c=.o) CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \ @@ -37,6 +37,12 @@ CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \ CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o) CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@ +XXHASH_SRCS = utils/xxhash.c +XXHASH_SSE4_SRCS = utils/xxhash_sse4.c +XXHASH_SSE2_SRCS = utils/xxhash_sse2.c +XXHASH_OBJS = utils/xxhash_sse4.o utils/xxhash_sse2.o +XXHASH_HDRS = utils/xxhash.h + ZLIB_SRCS = zlib_compress.c ZLIB_HDRS = $(MAINHDRS) ZLIB_OBJS = $(ZLIB_SRCS:.c=.o) @@ -156,7 +162,7 @@ LDLIBS = -ldl -L./buildtmp -Wl,-R@LIBBZ2_DIR@ -lbz2 -L./buildtmp -Wl,-R@LIBZ_DIR OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \ $(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \ $(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \ -$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) +$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(XXHASH_OBJS) DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@ @@ -185,8 +191,12 @@ COMPILE_cpp = @COMPILE_cpp@ VEC_FLAGS = @VEC_FLAGS@ LOOP_OPTFLAGS = @LOOP_OPTFLAGS@ CPPFLAGS = @CPPFLAGS@ @NO_SLAB_CPPFLAGS@ @DEBUG_STATS_CPPFLAGS@ -GEN_OPT = @GEN_OPT@ +GEN_OPT = @GEN_OPT@ @SSE_OPT_FLAGS@ +BASE_OPT = @GEN_OPT@ PREFIX=@PREFIX@ +SSE4_OPT_FLAG = -msse4.2 +SSE3_OPT_FLAG = -mssse3 +SSE2_OPT_FLAG = -msse2 SKEIN_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@ SHA256_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@ @@ -257,6 +267,10 @@ $(ZLIB_OBJS): $(ZLIB_SRCS) $(ZLIB_HDRS) $(BZLIB_OBJS): $(BZLIB_SRCS) $(BZLIB_HDRS) $(COMPILE) $(GEN_OPT) $(BZLIB_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ +$(XXHASH_OBJS): $(XXHASH_SSE4_SRCS) $(XXHASH_SSE2_SRCS) $(XXHASH_HDRS) $(XXHASH_SRCS) + $(COMPILE) $(BASE_OPT) $(SSE4_OPT_FLAG) $(CPPFLAGS) $(XXHASH_SSE4_SRCS) -o $(XXHASH_SSE4_SRCS:.c=.o) + $(COMPILE) $(BASE_OPT) $(SSE2_OPT_FLAG) $(CPPFLAGS) $(XXHASH_SSE2_SRCS) -o $(XXHASH_SSE2_SRCS:.c=.o) + $(MAINOBJS): $(MAINSRCS) $(MAINHDRS) $(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ diff --git a/config b/config index cb7cb24..12139e2 100755 --- a/config +++ b/config @@ -21,6 +21,7 @@ ${prog} [] Enable building against an alternate Zlib installation. --with-bzlib= (Default: System) Enable building against an alternate Bzip2 and library installation. +--no-sse-detect Do NOT attempt to probe the system's SSE/AVX capability for build flags. --use-key256 Use 256-bit encryption keys. Default key length is 128-bit. --help Display this help message. @@ -54,7 +55,7 @@ extra_opt_flags= zlib_prefix= bzlib_prefix= sse_detect=1 -default_sse="-msse2" +sse_opt_flags="-msse2" rm -rf ./buildtmp mkdir ./buildtmp @@ -76,7 +77,7 @@ gcc tst.c -o tst if [ $? -ne 0 ] then echo "ERROR:" - echo "Cannot compile a simple program. GCC 4.1 and above is required" + echo "Cannot compile a simple program. GCC 4.4 and above is required" echo "to build this program. Please include installation bindir of GCC in the PATH." echo "" rm -f tst.c @@ -140,7 +141,7 @@ do --use-key256) keylen='-DKEYLEN=32' ;; - --no-sse-check) + --no-sse-detect) sse_detect=0 ;; --help) usage $0;; @@ -180,10 +181,10 @@ IFS=. set -- ${vers} IFS="$OIFS" -if [ $1 -lt 4 -o $2 -lt 1 ] +if [ $1 -lt 4 -o $2 -lt 4 ] then echo "ERROR:" - echo "GCC version 4.1 or above is required." + echo "GCC version 4.4 or above is required." echo "" exit 1 fi @@ -209,9 +210,7 @@ then exit 1 fi rm -f sse_level - extra_opt_flags="${extra_opt_flags} -msse${sse_ver}" -else - extra_opt_flags="${extra_opt_flags} ${default_sse}" + sse_opt_flags="-m${sse_ver}" fi @@ -498,6 +497,7 @@ sha256objsvar="SHA256_OBJS" yasmvar="YASM" fptr_flag_var="FPTR_FLAG" extra_opt_flags_var="EXTRA_OPT_FLAGS" +sse_opt_flags_var="SSE_OPT_FLAGS" openssllibdirvar="OPENSSL_LIBDIR" opensslincdirvar="OPENSSL_INCDIR" @@ -550,5 +550,6 @@ s#@${keccak_hdrs_var}@#${keccak_hdrs}#g s#@${keccak_srcs_var}@#${keccak_srcs}#g s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g s#@${extra_opt_flags_var}@#${extra_opt_flags}#g +s#@${sse_opt_flags_var}@#${sse_opt_flags}#g " > Makefile diff --git a/crypto/crypto_utils.c b/crypto/crypto_utils.c index 81148ae..a300103 100644 --- a/crypto/crypto_utils.c +++ b/crypto/crypto_utils.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "crypto_utils.h" @@ -54,17 +54,29 @@ static int geturandom_bytes(uchar_t rbytes[32]); typedef void (*ckinit_func_ptr)(void); static struct { const char *name; + const char *desc; cksum_t cksum_id; int bytes, mac_bytes; ckinit_func_ptr init_func; } cksum_props[] = { - {"CRC64", CKSUM_CRC64, 8, 32, NULL}, - {"SKEIN256", CKSUM_SKEIN256, 32, 32, NULL}, - {"SKEIN512", CKSUM_SKEIN512, 64, 64, NULL}, - {"SHA256", CKSUM_SHA256, 32, 32, init_sha256}, - {"SHA512", CKSUM_SHA512, 64, 64, NULL}, - {"KECCAK256", CKSUM_KECCAK256, 32, 32, NULL}, - {"KECCAK512", CKSUM_KECCAK512, 64, 64, NULL} + {"CRC64", "Fast 64-bit CRC from LZMA SDK.", + CKSUM_CRC64, 8, 32, NULL}, + {"SKEIN256", "256-bit SKEIN a NIST SHA3 runners-up (90% faster than Keccak).", + CKSUM_SKEIN256, 32, 32, NULL}, + {"SKEIN512", "512-bit SKEIN", + CKSUM_SKEIN512, 64, 64, NULL}, + {"SHA256", "Intel's optimized (SSE,AVX) 256-bit SHA2 implementation for x86.", + CKSUM_SHA256, 32, 32, init_sha256}, + {"SHA512", "512-bit SHA2 from OpenSSL's crypto library.", + CKSUM_SHA512, 64, 64, NULL}, + {"KECCAK256", "Official 256-bit NIST SHA3 optimized implementation.", + CKSUM_KECCAK256, 32, 32, NULL}, + {"KECCAK512", "Official 512-bit NIST SHA3 optimized implementation.", + CKSUM_KECCAK512, 64, 64, NULL}, + {"BLAKE256", "Very fast 256-bit BLAKE2, derived from the NIST SHA3 runner-up BLAKE.", + CKSUM_BLAKE256, 32, 32, NULL}, + {"BLAKE512", "Very fast 256-bit BLAKE2, derived from the NIST SHA3 runner-up BLAKE.", + CKSUM_BLAKE512, 64, 64, NULL} }; static int cksum_provider = PROVIDER_OPENSSL; @@ -213,12 +225,9 @@ init_sha256(void) cksum_provider = PROVIDER_OPENSSL; #else #ifdef __x86_64__ - processor_info_t pc; - cksum_provider = PROVIDER_OPENSSL; - cpuid_basic_identify(&pc); - if (pc.proc_type == PROC_X64_INTEL || pc.proc_type == PROC_X64_AMD) { - if (opt_Init_SHA(&pc) == 0) { + if (proc_info.proc_type == PROC_X64_INTEL || proc_info.proc_type == PROC_X64_AMD) { + if (opt_Init_SHA(&proc_info) == 0) { cksum_provider = PROVIDER_X64_OPT; } } @@ -226,6 +235,15 @@ init_sha256(void) #endif } +void +list_checksums(FILE *strm, char *pad) +{ + int i; + for (i=0; i<(sizeof (cksum_props)/sizeof (cksum_props[0])); i++) { + fprintf(strm, "%s%10s - %s\n", pad, cksum_props[i].name, cksum_props[i].desc); + } +} + /* * Check if either the given checksum name or id is valid and * return it's properties. @@ -235,7 +253,7 @@ get_checksum_props(const char *name, int *cksum, int *cksum_bytes, int *mac_byte { int i; - for (i=0; ibuf)); dat = _mm_loadu_si128((__m128i *)(inbuf+pos)); odat = _mm_xor_si128(cblk, dat); diff --git a/crypto/sha2/sha256.c b/crypto/sha2/sha256.c index 86b7930..f8960ea 100644 --- a/crypto/sha2/sha256.c +++ b/crypto/sha2/sha256.c @@ -89,7 +89,7 @@ APS_NAMESPACE(Init_SHA) (processor_info_t *pc) if (pc->proc_type == PROC_X64_INTEL || pc->proc_type == PROC_X64_AMD) { if (pc->avx_level > 0) { sha_update_func = sha256_avx; - + } else if (pc->sse_level >= 4) { sha_update_func = sha256_sse4; diff --git a/main.c b/main.c index d06c73b..fb54d0c 100644 --- a/main.c +++ b/main.c @@ -158,15 +158,16 @@ usage(void) " NOTE - Both -L and -P can be used together to give maximum benefit on most.\n" " datasets.\n" " '-S' \n" - " - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512, SHA256 and\n" - " SHA512. Default one is SKEIN256.\n" + " - Specify chunk checksum to use:\n\n", + UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name); + list_checksums(stderr, " "); + fprintf(stderr, "\n" " '-F' - Perform Fixed-Block Deduplication. Faster than '-D' in some cases\n" " but with lower deduplication ratio.\n" " '-B' <1..5>\n" " - Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n" " '-M' - Display memory allocator statistics\n" - " '-C' - Display compression statistics\n\n", - UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name); + " '-C' - Display compression statistics\n\n"); } void @@ -716,7 +717,7 @@ start_decompress(const char *filename, const char *to_filename) err = 1; goto uncomp_done; } - if (version < VERSION-2) { + if (version < VERSION-3) { fprintf(stderr, "Unsupported version: %d\n", version); err = 1; goto uncomp_done; @@ -2340,6 +2341,7 @@ main(int argc, char *argv[]) exit(1); } main_cancel = 0; + init_pcompress(); if (cksum == 0) get_checksum_props(DEFAULT_CKSUM, &cksum, &cksum_bytes, &mac_bytes); diff --git a/pcompress.h b/pcompress.h index f3d6fb5..042538f 100644 --- a/pcompress.h +++ b/pcompress.h @@ -38,7 +38,7 @@ extern "C" { #define CHUNK_FLAG_SZ 1 #define ALGO_SZ 8 #define MIN_CHUNK 2048 -#define VERSION 5 +#define VERSION 6 #define FLAG_DEDUP 1 #define FLAG_DEDUP_FIXED 2 #define FLAG_SINGLE_CHUNK 4 diff --git a/utils/cpuid.c b/utils/cpuid.c index b8ef1df..917846e 100644 --- a/utils/cpuid.c +++ b/utils/cpuid.c @@ -32,6 +32,10 @@ #define SSE4_1_FLAG 0x080000 #define SSE4_2_FLAG 0x100000 +#define SSE3_FLAG 0x1 +#define SSSE3_FLAG 0x200 +#define AVX_FLAG 0x10000000 +#define XOP_FLAG 0x800 void exec_cpuid(uint32_t *regs) @@ -112,6 +116,7 @@ cpuid_basic_identify(processor_info_t *pc) pc->avx_level = 0; pc->sse_level = 0; pc->sse_sub_level = 0; + pc->xop_avail = 0; if (strcmp(raw.vendor_str, "GenuineIntel") == 0) { pc->proc_type = PROC_X64_INTEL; @@ -131,12 +136,23 @@ cpuid_basic_identify(processor_info_t *pc) pc->sse_sub_level = 2; } } else { - pc->sse_level = 3; + if (raw.basic_cpuid[1][2] & SSE3_FLAG) { + pc->sse_level = 3; + if (raw.basic_cpuid[1][2] & SSSE3_FLAG) { + pc->sse_sub_level = 1; + } + } else { + pc->sse_level = 2; + } } pc->avx_level = 0; - if (raw.basic_cpuid[1][2] & (1 << 28)) { + if (raw.basic_cpuid[1][2] & AVX_FLAG) { pc->avx_level = 1; } + + if (raw.ext_cpuid[1][2] & XOP_FLAG) { + pc->xop_avail = 1; + } } } diff --git a/utils/sse_level.c b/utils/sse_level.c index b69837d..106f72d 100644 --- a/utils/sse_level.c +++ b/utils/sse_level.c @@ -7,7 +7,12 @@ main(void) { processor_info_t pc; cpuid_basic_identify(&pc); - printf("%d", pc.sse_level); + if (pc.sse_level == 3 && pc.sse_sub_level == 1) { + printf("ssse%d", pc.sse_level); + pc.sse_sub_level = 0; + } else { + printf("sse%d", pc.sse_level); + } if (pc.sse_sub_level > 0) printf(".%d\n", pc.sse_sub_level); else diff --git a/utils/utils.c b/utils/utils.c index d1e4c81..795bd2f 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -36,9 +36,20 @@ #include #include #include +#include +#include +#define _IN_UTILS_ #include "utils.h" +processor_info_t proc_info; + +void +init_pcompress() { + cpuid_basic_identify(&proc_info); + XXH32_module_init(); +} + void err_exit(int show_errno, const char *format, ...) { diff --git a/utils/utils.h b/utils/utils.h index e9017b9..f8228d6 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -91,13 +91,17 @@ typedef int32_t bsize_t; // These allow helping the compiler in some often-executed branches, whose // result is almost always the same. #ifdef __GNUC__ -# define likely(expr) __builtin_expect(expr, 1) -# define unlikely(expr) __builtin_expect(expr, 0) -# define ATOMIC_ADD(var, val) __sync_fetch_and_add(&var, val) -# define ATOMIC_SUB(var, val) __sync_fetch_and_sub(&var, val) +# define likely(expr) __builtin_expect(expr, 1) +# define unlikely(expr) __builtin_expect(expr, 0) +# define ATOMIC_ADD(var, val) __sync_fetch_and_add(&var, val) +# define ATOMIC_SUB(var, val) __sync_fetch_and_sub(&var, val) +# define PREFETCH_WRITE(x, n) __builtin_prefetch((x), 1, (n)) +# define PREFETCH_READ(x, n) __builtin_prefetch((x), 0, (n)) #else # define likely(expr) (expr) # define unlikely(expr) (expr) +# define PREFETCH_WRITE(x, n) +# define PREFETCH_READ(x, n) # if defined(sun) || defined (__sun) # include # define ATOMIC_ADD(var, val) atomic_add_int(&var, val) @@ -148,9 +152,14 @@ typedef struct { int sse_level; int sse_sub_level; int avx_level; + int xop_avail; proc_type_t proc_type; } processor_info_t; +#ifndef _IN_UTILS_ +extern processor_info_t proc_info; +#endif + extern void err_exit(int show_errno, const char *format, ...); extern const char *get_execname(const char *); extern int parse_numeric(int64_t *val, const char *str); @@ -165,6 +174,7 @@ extern uint64_t get_total_ram(); extern double get_wtime_millis(void); extern double get_mb_s(uint64_t bytes, double strt, double en); extern void init_algo_props(algo_props_t *props); +extern void init_pcompress(); /* Pointer type for compress and decompress functions. */ typedef int (*compress_func_ptr)(void *src, uint64_t srclen, void *dst, diff --git a/utils/xxhash.c b/utils/xxhash.c index 1f7bae3..26966ba 100644 --- a/utils/xxhash.c +++ b/utils/xxhash.c @@ -82,8 +82,6 @@ # define XXH_BIG_ENDIAN 0 #endif - - //************************************** // Compiler-specific Options & Functions //************************************** @@ -141,7 +139,7 @@ static inline __m128i _x_mm_rotl_epi32(const __m128i a, int bits) // Simple Hash Functions //**************************** -unsigned int XXH32(const void* input, int len, unsigned int seed) +unsigned int CPUCAP_NM(XXH32)(const void* input, int len, unsigned int seed) { #if 0 // Simple version, good for code maintenance, but unfortunately slow for small inputs @@ -284,7 +282,7 @@ struct XXH_state32_t }; -void* XXH32_init (unsigned int seed) +void* CPUCAP_NM(XXH32_init) (unsigned int seed) { struct XXH_state32_t * state = (struct XXH_state32_t *) malloc ( sizeof(struct XXH_state32_t)); state->seed = seed; @@ -303,7 +301,7 @@ void* XXH32_init (unsigned int seed) } -int XXH32_feed (void* state_in, const void* input, int len) +int CPUCAP_NM(XXH32_feed) (void* state_in, const void* input, int len) { struct XXH_state32_t * state = state_in; const unsigned char* p = (const unsigned char*)input; @@ -437,7 +435,7 @@ int XXH32_feed (void* state_in, const void* input, int len) } -unsigned int XXH32_getIntermediateResult (void* state_in) +unsigned int CPUCAP_NM(XXH32_getIntermediateResult) (void* state_in) { struct XXH_state32_t * state = state_in; unsigned char * p = (unsigned char*)state->memory; @@ -489,9 +487,9 @@ unsigned int XXH32_getIntermediateResult (void* state_in) } -unsigned int XXH32_result (void* state_in) +unsigned int CPUCAP_NM(XXH32_result) (void* state_in) { - unsigned int h32 = XXH32_getIntermediateResult(state_in); + unsigned int h32 = CPUCAP_NM(XXH32_getIntermediateResult)(state_in); free(state_in); diff --git a/utils/xxhash.h b/utils/xxhash.h index d253fa0..a336a47 100644 --- a/utils/xxhash.h +++ b/utils/xxhash.h @@ -63,12 +63,16 @@ It depends on successfully passing SMHasher test set. extern "C" { #endif +#ifndef CPUCAP_NM +#define CPUCAP_NM(x) x +#endif + //**************************** // Simple Hash Functions //**************************** -unsigned int XXH32 (const void* input, int len, unsigned int seed); +unsigned int CPUCAP_NM(XXH32) (const void* input, int len, unsigned int seed); /* XXH32() : @@ -86,9 +90,9 @@ XXH32() : // Advanced Hash Functions //**************************** -void* XXH32_init (unsigned int seed); -int XXH32_feed (void* state, const void* input, int len); -unsigned int XXH32_result (void* state); +void* CPUCAP_NM(XXH32_init) (unsigned int seed); +int CPUCAP_NM(XXH32_feed) (void* state, const void* input, int len); +unsigned int CPUCAP_NM(XXH32_result) (void* state); /* These functions calculate the xxhash of an input provided in several small packets, @@ -113,7 +117,7 @@ Memory will be freed by XXH32_result(). */ -unsigned int XXH32_getIntermediateResult (void* state); +unsigned int CPUCAP_NM(XXH32_getIntermediateResult) (void* state); /* This function does the same as XXH32_result(), generating a 32-bit hash, but preserve memory context. @@ -121,7 +125,7 @@ This way, it becomes possible to generate intermediate hashes, and then continue To free memory context, use XXH32_result(). */ - +void XXH32_module_init(); #if defined (__cplusplus) } diff --git a/utils/xxhash_base.c b/utils/xxhash_base.c new file mode 100644 index 0000000..868994d --- /dev/null +++ b/utils/xxhash_base.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include + +extern void* XXH32_init_SSE4 (unsigned int seed); +extern int XXH32_feed_SSE4 (void* state, const void* input, int len); +extern unsigned int XXH32_result_SSE4 (void* state); +extern unsigned int XXH32_getIntermediateResult_SSE4 (void* state); +extern unsigned int XXH32_SSE4 (const void* input, int len, unsigned int seed); + +extern void* XXH32_init_SSE2 (unsigned int seed); +extern int XXH32_feed_SSE2 (void* state, const void* input, int len); +extern unsigned int XXH32_result_SSE2 (void* state); +extern unsigned int XXH32_getIntermediateResult_SSE2 (void* state); +extern unsigned int XXH32_SSE2 (const void* input, int len, unsigned int seed); + +unsigned int (*xxh32)(const void* input, int len, unsigned int seed) = NULL; +void * (*xxh32_init)(unsigned int seed) = NULL; +int (*xxh32_feed)(void* state, const void* input, int len) = NULL; +unsigned int (*xxh32_result)(void* state) = NULL; +unsigned int (*xxh32_getIntermediateResult)(void* state) = NULL; +#include + +void +XXH32_module_init() { + if (proc_info.sse_level >= 4) { + xxh32 = XXH32_SSE4; + xxh32_init = XXH32_init_SSE4; + xxh32_feed = XXH32_feed_SSE4; + xxh32_result = XXH32_result_SSE4; + xxh32_getIntermediateResult = XXH32_getIntermediateResult_SSE4; + } else { + xxh32 = XXH32_SSE2; + xxh32_init = XXH32_init_SSE2; + xxh32_feed = XXH32_feed_SSE2; + xxh32_result = XXH32_result_SSE2; + xxh32_getIntermediateResult = XXH32_getIntermediateResult_SSE2; + } +} + +unsigned int +XXH32(const void* input, int len, unsigned int seed) +{ + return xxh32(input, len, seed); +} + +void* +XXH32_init(unsigned int seed) +{ + return xxh32_init(seed); +} + +int +XXH32_feed(void* state, const void* input, int len) +{ + return xxh32_feed(state, input, len); +} + +unsigned int +XXH32_result(void* state) +{ + return xxh32_result(state); +} + +unsigned int +XXH32_getIntermediateResult(void* state) +{ + return xxh32_getIntermediateResult(state); +} + diff --git a/utils/xxhash_sse2.c b/utils/xxhash_sse2.c new file mode 100644 index 0000000..1f13d2c --- /dev/null +++ b/utils/xxhash_sse2.c @@ -0,0 +1,3 @@ +#define CPUCAP_NM(x) x##_SSE2 +#include "xxhash.c" + diff --git a/utils/xxhash_sse4.c b/utils/xxhash_sse4.c new file mode 100644 index 0000000..4561c7e --- /dev/null +++ b/utils/xxhash_sse4.c @@ -0,0 +1,3 @@ +#define CPUCAP_NM(x) x##_SSE4 +#include "xxhash.c" +