Changes for generalized runtime SSE/AVX/XOP detection.

Multi instruction set XXhash build with runtime selection.
Extend CPUID code to detect more instruction sets.
Add options for BLAKE2 hash.
Move GCC builtins into utils header.
Bump file format version number due to extended digest flags.
Add descriptions to digest list.
This commit is contained in:
Moinak Ghosh 2013-01-25 00:10:12 +05:30
parent 7b7c85dab4
commit 26bb137257
18 changed files with 236 additions and 58 deletions

19
INSTALL
View file

@ -1,5 +1,17 @@
Copyright (c) 2012 Moinak Ghosh
Prerequisites
=============
64-bit System.
GCC 4.4 (with mpfr, ppl and cloog support packages for loop
vectorization).
libz (zlib) and developments packages.
Libbz2 and development packages.
Libbsc source tree if BSC support is desired.
See below:
"Steps for building with libbsc support".
OpenSSL version 0.9.8 or greater.
Basic Installation
==================
The simplest process to build and install this utility is:
@ -96,6 +108,13 @@ is not the usual GNU Autoconf script.
Enable building against an alternate Bzip2 and library
installation.
--no-sse-detect Do not try to detect the CPU's SSE capability. This
mode will simply use SSE2 as the fallback default.
Using SSE4 and later improves performance significantly.
--use-key256 Use 256-bit encryption keys. Default key length is
128-bit.
--help Display the help message.
Steps for building with libbsc support

View file

@ -24,9 +24,9 @@
PROG= pcompress
MAINSRCS = main.c utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \
adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \
utils/xxhash.c utils/heapq.c utils/cpuid.c
utils/xxhash_base.c utils/heapq.c utils/cpuid.c
MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h \
utils/cpuid.h
utils/cpuid.h utils/xxhash.h
MAINOBJS = $(MAINSRCS:.c=.o)
CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \
@ -37,6 +37,12 @@ CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \
CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o)
CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@
XXHASH_SRCS = utils/xxhash.c
XXHASH_SSE4_SRCS = utils/xxhash_sse4.c
XXHASH_SSE2_SRCS = utils/xxhash_sse2.c
XXHASH_OBJS = utils/xxhash_sse4.o utils/xxhash_sse2.o
XXHASH_HDRS = utils/xxhash.h
ZLIB_SRCS = zlib_compress.c
ZLIB_HDRS = $(MAINHDRS)
ZLIB_OBJS = $(ZLIB_SRCS:.c=.o)
@ -156,7 +162,7 @@ LDLIBS = -ldl -L./buildtmp -Wl,-R@LIBBZ2_DIR@ -lbz2 -L./buildtmp -Wl,-R@LIBZ_DIR
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS)
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(XXHASH_OBJS)
DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@
@ -185,8 +191,12 @@ COMPILE_cpp = @COMPILE_cpp@
VEC_FLAGS = @VEC_FLAGS@
LOOP_OPTFLAGS = @LOOP_OPTFLAGS@
CPPFLAGS = @CPPFLAGS@ @NO_SLAB_CPPFLAGS@ @DEBUG_STATS_CPPFLAGS@
GEN_OPT = @GEN_OPT@
GEN_OPT = @GEN_OPT@ @SSE_OPT_FLAGS@
BASE_OPT = @GEN_OPT@
PREFIX=@PREFIX@
SSE4_OPT_FLAG = -msse4.2
SSE3_OPT_FLAG = -mssse3
SSE2_OPT_FLAG = -msse2
SKEIN_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
SHA256_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
@ -257,6 +267,10 @@ $(ZLIB_OBJS): $(ZLIB_SRCS) $(ZLIB_HDRS)
$(BZLIB_OBJS): $(BZLIB_SRCS) $(BZLIB_HDRS)
$(COMPILE) $(GEN_OPT) $(BZLIB_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(XXHASH_OBJS): $(XXHASH_SSE4_SRCS) $(XXHASH_SSE2_SRCS) $(XXHASH_HDRS) $(XXHASH_SRCS)
$(COMPILE) $(BASE_OPT) $(SSE4_OPT_FLAG) $(CPPFLAGS) $(XXHASH_SSE4_SRCS) -o $(XXHASH_SSE4_SRCS:.c=.o)
$(COMPILE) $(BASE_OPT) $(SSE2_OPT_FLAG) $(CPPFLAGS) $(XXHASH_SSE2_SRCS) -o $(XXHASH_SSE2_SRCS:.c=.o)
$(MAINOBJS): $(MAINSRCS) $(MAINHDRS)
$(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@

17
config
View file

@ -21,6 +21,7 @@ ${prog} [<options>]
Enable building against an alternate Zlib installation.
--with-bzlib=<path to Bzip2 library installation tree> (Default: System)
Enable building against an alternate Bzip2 and library installation.
--no-sse-detect Do NOT attempt to probe the system's SSE/AVX capability for build flags.
--use-key256 Use 256-bit encryption keys. Default key length is 128-bit.
--help Display this help message.
@ -54,7 +55,7 @@ extra_opt_flags=
zlib_prefix=
bzlib_prefix=
sse_detect=1
default_sse="-msse2"
sse_opt_flags="-msse2"
rm -rf ./buildtmp
mkdir ./buildtmp
@ -76,7 +77,7 @@ gcc tst.c -o tst
if [ $? -ne 0 ]
then
echo "ERROR:"
echo "Cannot compile a simple program. GCC 4.1 and above is required"
echo "Cannot compile a simple program. GCC 4.4 and above is required"
echo "to build this program. Please include installation bindir of GCC in the PATH."
echo ""
rm -f tst.c
@ -140,7 +141,7 @@ do
--use-key256)
keylen='-DKEYLEN=32'
;;
--no-sse-check)
--no-sse-detect)
sse_detect=0
;;
--help) usage $0;;
@ -180,10 +181,10 @@ IFS=.
set -- ${vers}
IFS="$OIFS"
if [ $1 -lt 4 -o $2 -lt 1 ]
if [ $1 -lt 4 -o $2 -lt 4 ]
then
echo "ERROR:"
echo "GCC version 4.1 or above is required."
echo "GCC version 4.4 or above is required."
echo ""
exit 1
fi
@ -209,9 +210,7 @@ then
exit 1
fi
rm -f sse_level
extra_opt_flags="${extra_opt_flags} -msse${sse_ver}"
else
extra_opt_flags="${extra_opt_flags} ${default_sse}"
sse_opt_flags="-m${sse_ver}"
fi
@ -498,6 +497,7 @@ sha256objsvar="SHA256_OBJS"
yasmvar="YASM"
fptr_flag_var="FPTR_FLAG"
extra_opt_flags_var="EXTRA_OPT_FLAGS"
sse_opt_flags_var="SSE_OPT_FLAGS"
openssllibdirvar="OPENSSL_LIBDIR"
opensslincdirvar="OPENSSL_INCDIR"
@ -550,5 +550,6 @@ s#@${keccak_hdrs_var}@#${keccak_hdrs}#g
s#@${keccak_srcs_var}@#${keccak_srcs}#g
s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g
s#@${extra_opt_flags_var}@#${extra_opt_flags}#g
s#@${sse_opt_flags_var}@#${sse_opt_flags}#g
" > Makefile

View file

@ -39,7 +39,7 @@
#include <sha256.h>
#include <crypto_aes.h>
#include <KeccakNISTInterface.h>
#include <cpuid.h>
#include <utils.h>
#include "crypto_utils.h"
@ -54,17 +54,29 @@ static int geturandom_bytes(uchar_t rbytes[32]);
typedef void (*ckinit_func_ptr)(void);
static struct {
const char *name;
const char *desc;
cksum_t cksum_id;
int bytes, mac_bytes;
ckinit_func_ptr init_func;
} cksum_props[] = {
{"CRC64", CKSUM_CRC64, 8, 32, NULL},
{"SKEIN256", CKSUM_SKEIN256, 32, 32, NULL},
{"SKEIN512", CKSUM_SKEIN512, 64, 64, NULL},
{"SHA256", CKSUM_SHA256, 32, 32, init_sha256},
{"SHA512", CKSUM_SHA512, 64, 64, NULL},
{"KECCAK256", CKSUM_KECCAK256, 32, 32, NULL},
{"KECCAK512", CKSUM_KECCAK512, 64, 64, NULL}
{"CRC64", "Fast 64-bit CRC from LZMA SDK.",
CKSUM_CRC64, 8, 32, NULL},
{"SKEIN256", "256-bit SKEIN a NIST SHA3 runners-up (90% faster than Keccak).",
CKSUM_SKEIN256, 32, 32, NULL},
{"SKEIN512", "512-bit SKEIN",
CKSUM_SKEIN512, 64, 64, NULL},
{"SHA256", "Intel's optimized (SSE,AVX) 256-bit SHA2 implementation for x86.",
CKSUM_SHA256, 32, 32, init_sha256},
{"SHA512", "512-bit SHA2 from OpenSSL's crypto library.",
CKSUM_SHA512, 64, 64, NULL},
{"KECCAK256", "Official 256-bit NIST SHA3 optimized implementation.",
CKSUM_KECCAK256, 32, 32, NULL},
{"KECCAK512", "Official 512-bit NIST SHA3 optimized implementation.",
CKSUM_KECCAK512, 64, 64, NULL},
{"BLAKE256", "Very fast 256-bit BLAKE2, derived from the NIST SHA3 runner-up BLAKE.",
CKSUM_BLAKE256, 32, 32, NULL},
{"BLAKE512", "Very fast 256-bit BLAKE2, derived from the NIST SHA3 runner-up BLAKE.",
CKSUM_BLAKE512, 64, 64, NULL}
};
static int cksum_provider = PROVIDER_OPENSSL;
@ -213,12 +225,9 @@ init_sha256(void)
cksum_provider = PROVIDER_OPENSSL;
#else
#ifdef __x86_64__
processor_info_t pc;
cksum_provider = PROVIDER_OPENSSL;
cpuid_basic_identify(&pc);
if (pc.proc_type == PROC_X64_INTEL || pc.proc_type == PROC_X64_AMD) {
if (opt_Init_SHA(&pc) == 0) {
if (proc_info.proc_type == PROC_X64_INTEL || proc_info.proc_type == PROC_X64_AMD) {
if (opt_Init_SHA(&proc_info) == 0) {
cksum_provider = PROVIDER_X64_OPT;
}
}
@ -226,6 +235,15 @@ init_sha256(void)
#endif
}
void
list_checksums(FILE *strm, char *pad)
{
int i;
for (i=0; i<(sizeof (cksum_props)/sizeof (cksum_props[0])); i++) {
fprintf(strm, "%s%10s - %s\n", pad, cksum_props[i].name, cksum_props[i].desc);
}
}
/*
* Check if either the given checksum name or id is valid and
* return it's properties.
@ -235,7 +253,7 @@ get_checksum_props(const char *name, int *cksum, int *cksum_bytes, int *mac_byte
{
int i;
for (i=0; i<sizeof (cksum_props); i++) {
for (i=0; i<(sizeof (cksum_props)/sizeof (cksum_props[0])); i++) {
if ((name != NULL && strcmp(name, cksum_props[i].name) == 0) ||
(*cksum != 0 && *cksum == cksum_props[i].cksum_id)) {
*cksum = cksum_props[i].cksum_id;

View file

@ -33,7 +33,7 @@ extern "C" {
#endif
#define MAX_PW_LEN 16
#define CKSUM_MASK 0x700
#define CKSUM_MASK 0x800
#define CKSUM_MAX_BYTES 64
#define DEFAULT_CKSUM "SKEIN256"
@ -54,7 +54,9 @@ typedef enum {
CKSUM_SHA256 = 0x400,
CKSUM_SHA512 = 0x500,
CKSUM_KECCAK256 = 0x600,
CKSUM_KECCAK512 = 0x700
CKSUM_KECCAK512 = 0x700,
CKSUM_BLAKE256 = 0x800,
CKSUM_BLAKE512 = 0x900
} cksum_t;
typedef struct {
@ -75,6 +77,7 @@ typedef struct {
* Generic message digest functions.
*/
int compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes);
void list_checksums(FILE *strm, char *pad);
int get_checksum_props(const char *name, int *cksum, int *cksum_bytes, int *mac_bytes);
void serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
void deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);

View file

@ -116,8 +116,8 @@ do_last:
for (; pos < buflen-15; pos += 16) {
__m128i cblk, dat, odat;
__builtin_prefetch(outbuf+pos, 1, 0);
__builtin_prefetch(inbuf+pos, 0, 0);
PREFETCH_WRITE(outbuf+pos, 0);
PREFETCH_READ(inbuf+pos, 0);
cblk = _mm_load_si128((__m128i *)(stream->buf));
dat = _mm_loadu_si128((__m128i *)(inbuf+pos));
odat = _mm_xor_si128(cblk, dat);

12
main.c
View file

@ -158,15 +158,16 @@ usage(void)
" NOTE - Both -L and -P can be used together to give maximum benefit on most.\n"
" datasets.\n"
" '-S' <cksum>\n"
" - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512, SHA256 and\n"
" SHA512. Default one is SKEIN256.\n"
" - Specify chunk checksum to use:\n\n",
UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name);
list_checksums(stderr, " ");
fprintf(stderr, "\n"
" '-F' - Perform Fixed-Block Deduplication. Faster than '-D' in some cases\n"
" but with lower deduplication ratio.\n"
" '-B' <1..5>\n"
" - Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n"
" '-M' - Display memory allocator statistics\n"
" '-C' - Display compression statistics\n\n",
UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name);
" '-C' - Display compression statistics\n\n");
}
void
@ -716,7 +717,7 @@ start_decompress(const char *filename, const char *to_filename)
err = 1;
goto uncomp_done;
}
if (version < VERSION-2) {
if (version < VERSION-3) {
fprintf(stderr, "Unsupported version: %d\n", version);
err = 1;
goto uncomp_done;
@ -2340,6 +2341,7 @@ main(int argc, char *argv[])
exit(1);
}
main_cancel = 0;
init_pcompress();
if (cksum == 0)
get_checksum_props(DEFAULT_CKSUM, &cksum, &cksum_bytes, &mac_bytes);

View file

@ -38,7 +38,7 @@ extern "C" {
#define CHUNK_FLAG_SZ 1
#define ALGO_SZ 8
#define MIN_CHUNK 2048
#define VERSION 5
#define VERSION 6
#define FLAG_DEDUP 1
#define FLAG_DEDUP_FIXED 2
#define FLAG_SINGLE_CHUNK 4

View file

@ -32,6 +32,10 @@
#define SSE4_1_FLAG 0x080000
#define SSE4_2_FLAG 0x100000
#define SSE3_FLAG 0x1
#define SSSE3_FLAG 0x200
#define AVX_FLAG 0x10000000
#define XOP_FLAG 0x800
void
exec_cpuid(uint32_t *regs)
@ -112,6 +116,7 @@ cpuid_basic_identify(processor_info_t *pc)
pc->avx_level = 0;
pc->sse_level = 0;
pc->sse_sub_level = 0;
pc->xop_avail = 0;
if (strcmp(raw.vendor_str, "GenuineIntel") == 0) {
pc->proc_type = PROC_X64_INTEL;
@ -131,12 +136,23 @@ cpuid_basic_identify(processor_info_t *pc)
pc->sse_sub_level = 2;
}
} else {
pc->sse_level = 3;
if (raw.basic_cpuid[1][2] & SSE3_FLAG) {
pc->sse_level = 3;
if (raw.basic_cpuid[1][2] & SSSE3_FLAG) {
pc->sse_sub_level = 1;
}
} else {
pc->sse_level = 2;
}
}
pc->avx_level = 0;
if (raw.basic_cpuid[1][2] & (1 << 28)) {
if (raw.basic_cpuid[1][2] & AVX_FLAG) {
pc->avx_level = 1;
}
if (raw.ext_cpuid[1][2] & XOP_FLAG) {
pc->xop_avail = 1;
}
}
}

View file

@ -7,7 +7,12 @@ main(void)
{
processor_info_t pc;
cpuid_basic_identify(&pc);
printf("%d", pc.sse_level);
if (pc.sse_level == 3 && pc.sse_sub_level == 1) {
printf("ssse%d", pc.sse_level);
pc.sse_sub_level = 0;
} else {
printf("sse%d", pc.sse_level);
}
if (pc.sse_sub_level > 0)
printf(".%d\n", pc.sse_sub_level);
else

View file

@ -36,9 +36,20 @@
#include <errno.h>
#include <link.h>
#include <rabin_dedup.h>
#include <cpuid.h>
#include <xxhash.h>
#define _IN_UTILS_
#include "utils.h"
processor_info_t proc_info;
void
init_pcompress() {
cpuid_basic_identify(&proc_info);
XXH32_module_init();
}
void
err_exit(int show_errno, const char *format, ...)
{

View file

@ -91,13 +91,17 @@ typedef int32_t bsize_t;
// These allow helping the compiler in some often-executed branches, whose
// result is almost always the same.
#ifdef __GNUC__
# define likely(expr) __builtin_expect(expr, 1)
# define unlikely(expr) __builtin_expect(expr, 0)
# define ATOMIC_ADD(var, val) __sync_fetch_and_add(&var, val)
# define ATOMIC_SUB(var, val) __sync_fetch_and_sub(&var, val)
# define likely(expr) __builtin_expect(expr, 1)
# define unlikely(expr) __builtin_expect(expr, 0)
# define ATOMIC_ADD(var, val) __sync_fetch_and_add(&var, val)
# define ATOMIC_SUB(var, val) __sync_fetch_and_sub(&var, val)
# define PREFETCH_WRITE(x, n) __builtin_prefetch((x), 1, (n))
# define PREFETCH_READ(x, n) __builtin_prefetch((x), 0, (n))
#else
# define likely(expr) (expr)
# define unlikely(expr) (expr)
# define PREFETCH_WRITE(x, n)
# define PREFETCH_READ(x, n)
# if defined(sun) || defined (__sun)
# include <atomic.h>
# define ATOMIC_ADD(var, val) atomic_add_int(&var, val)
@ -148,9 +152,14 @@ typedef struct {
int sse_level;
int sse_sub_level;
int avx_level;
int xop_avail;
proc_type_t proc_type;
} processor_info_t;
#ifndef _IN_UTILS_
extern processor_info_t proc_info;
#endif
extern void err_exit(int show_errno, const char *format, ...);
extern const char *get_execname(const char *);
extern int parse_numeric(int64_t *val, const char *str);
@ -165,6 +174,7 @@ extern uint64_t get_total_ram();
extern double get_wtime_millis(void);
extern double get_mb_s(uint64_t bytes, double strt, double en);
extern void init_algo_props(algo_props_t *props);
extern void init_pcompress();
/* Pointer type for compress and decompress functions. */
typedef int (*compress_func_ptr)(void *src, uint64_t srclen, void *dst,

View file

@ -82,8 +82,6 @@
# define XXH_BIG_ENDIAN 0
#endif
//**************************************
// Compiler-specific Options & Functions
//**************************************
@ -141,7 +139,7 @@ static inline __m128i _x_mm_rotl_epi32(const __m128i a, int bits)
// Simple Hash Functions
//****************************
unsigned int XXH32(const void* input, int len, unsigned int seed)
unsigned int CPUCAP_NM(XXH32)(const void* input, int len, unsigned int seed)
{
#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
@ -284,7 +282,7 @@ struct XXH_state32_t
};
void* XXH32_init (unsigned int seed)
void* CPUCAP_NM(XXH32_init) (unsigned int seed)
{
struct XXH_state32_t * state = (struct XXH_state32_t *) malloc ( sizeof(struct XXH_state32_t));
state->seed = seed;
@ -303,7 +301,7 @@ void* XXH32_init (unsigned int seed)
}
int XXH32_feed (void* state_in, const void* input, int len)
int CPUCAP_NM(XXH32_feed) (void* state_in, const void* input, int len)
{
struct XXH_state32_t * state = state_in;
const unsigned char* p = (const unsigned char*)input;
@ -437,7 +435,7 @@ int XXH32_feed (void* state_in, const void* input, int len)
}
unsigned int XXH32_getIntermediateResult (void* state_in)
unsigned int CPUCAP_NM(XXH32_getIntermediateResult) (void* state_in)
{
struct XXH_state32_t * state = state_in;
unsigned char * p = (unsigned char*)state->memory;
@ -489,9 +487,9 @@ unsigned int XXH32_getIntermediateResult (void* state_in)
}
unsigned int XXH32_result (void* state_in)
unsigned int CPUCAP_NM(XXH32_result) (void* state_in)
{
unsigned int h32 = XXH32_getIntermediateResult(state_in);
unsigned int h32 = CPUCAP_NM(XXH32_getIntermediateResult)(state_in);
free(state_in);

View file

@ -63,12 +63,16 @@ It depends on successfully passing SMHasher test set.
extern "C" {
#endif
#ifndef CPUCAP_NM
#define CPUCAP_NM(x) x
#endif
//****************************
// Simple Hash Functions
//****************************
unsigned int XXH32 (const void* input, int len, unsigned int seed);
unsigned int CPUCAP_NM(XXH32) (const void* input, int len, unsigned int seed);
/*
XXH32() :
@ -86,9 +90,9 @@ XXH32() :
// Advanced Hash Functions
//****************************
void* XXH32_init (unsigned int seed);
int XXH32_feed (void* state, const void* input, int len);
unsigned int XXH32_result (void* state);
void* CPUCAP_NM(XXH32_init) (unsigned int seed);
int CPUCAP_NM(XXH32_feed) (void* state, const void* input, int len);
unsigned int CPUCAP_NM(XXH32_result) (void* state);
/*
These functions calculate the xxhash of an input provided in several small packets,
@ -113,7 +117,7 @@ Memory will be freed by XXH32_result().
*/
unsigned int XXH32_getIntermediateResult (void* state);
unsigned int CPUCAP_NM(XXH32_getIntermediateResult) (void* state);
/*
This function does the same as XXH32_result(), generating a 32-bit hash,
but preserve memory context.
@ -121,7 +125,7 @@ This way, it becomes possible to generate intermediate hashes, and then continue
To free memory context, use XXH32_result().
*/
void XXH32_module_init();
#if defined (__cplusplus)
}

71
utils/xxhash_base.c Normal file
View file

@ -0,0 +1,71 @@
#include <inttypes.h>
#include <xxhash.h>
#include <pthread.h>
#include <utils.h>
extern void* XXH32_init_SSE4 (unsigned int seed);
extern int XXH32_feed_SSE4 (void* state, const void* input, int len);
extern unsigned int XXH32_result_SSE4 (void* state);
extern unsigned int XXH32_getIntermediateResult_SSE4 (void* state);
extern unsigned int XXH32_SSE4 (const void* input, int len, unsigned int seed);
extern void* XXH32_init_SSE2 (unsigned int seed);
extern int XXH32_feed_SSE2 (void* state, const void* input, int len);
extern unsigned int XXH32_result_SSE2 (void* state);
extern unsigned int XXH32_getIntermediateResult_SSE2 (void* state);
extern unsigned int XXH32_SSE2 (const void* input, int len, unsigned int seed);
unsigned int (*xxh32)(const void* input, int len, unsigned int seed) = NULL;
void * (*xxh32_init)(unsigned int seed) = NULL;
int (*xxh32_feed)(void* state, const void* input, int len) = NULL;
unsigned int (*xxh32_result)(void* state) = NULL;
unsigned int (*xxh32_getIntermediateResult)(void* state) = NULL;
#include <stdio.h>
void
XXH32_module_init() {
if (proc_info.sse_level >= 4) {
xxh32 = XXH32_SSE4;
xxh32_init = XXH32_init_SSE4;
xxh32_feed = XXH32_feed_SSE4;
xxh32_result = XXH32_result_SSE4;
xxh32_getIntermediateResult = XXH32_getIntermediateResult_SSE4;
} else {
xxh32 = XXH32_SSE2;
xxh32_init = XXH32_init_SSE2;
xxh32_feed = XXH32_feed_SSE2;
xxh32_result = XXH32_result_SSE2;
xxh32_getIntermediateResult = XXH32_getIntermediateResult_SSE2;
}
}
unsigned int
XXH32(const void* input, int len, unsigned int seed)
{
return xxh32(input, len, seed);
}
void*
XXH32_init(unsigned int seed)
{
return xxh32_init(seed);
}
int
XXH32_feed(void* state, const void* input, int len)
{
return xxh32_feed(state, input, len);
}
unsigned int
XXH32_result(void* state)
{
return xxh32_result(state);
}
unsigned int
XXH32_getIntermediateResult(void* state)
{
return xxh32_getIntermediateResult(state);
}

3
utils/xxhash_sse2.c Normal file
View file

@ -0,0 +1,3 @@
#define CPUCAP_NM(x) x##_SSE2
#include "xxhash.c"

3
utils/xxhash_sse4.c Normal file
View file

@ -0,0 +1,3 @@
#define CPUCAP_NM(x) x##_SSE4
#include "xxhash.c"