Changes for generalized runtime SSE/AVX/XOP detection.
Multi instruction set XXhash build with runtime selection. Extend CPUID code to detect more instruction sets. Add options for BLAKE2 hash. Move GCC builtins into utils header. Bump file format version number due to extended digest flags. Add descriptions to digest list.
This commit is contained in:
parent
7b7c85dab4
commit
26bb137257
18 changed files with 236 additions and 58 deletions
19
INSTALL
19
INSTALL
|
@ -1,5 +1,17 @@
|
||||||
Copyright (c) 2012 Moinak Ghosh
|
Copyright (c) 2012 Moinak Ghosh
|
||||||
|
|
||||||
|
Prerequisites
|
||||||
|
=============
|
||||||
|
64-bit System.
|
||||||
|
GCC 4.4 (with mpfr, ppl and cloog support packages for loop
|
||||||
|
vectorization).
|
||||||
|
libz (zlib) and developments packages.
|
||||||
|
Libbz2 and development packages.
|
||||||
|
Libbsc source tree if BSC support is desired.
|
||||||
|
See below:
|
||||||
|
"Steps for building with libbsc support".
|
||||||
|
OpenSSL version 0.9.8 or greater.
|
||||||
|
|
||||||
Basic Installation
|
Basic Installation
|
||||||
==================
|
==================
|
||||||
The simplest process to build and install this utility is:
|
The simplest process to build and install this utility is:
|
||||||
|
@ -96,6 +108,13 @@ is not the usual GNU Autoconf script.
|
||||||
Enable building against an alternate Bzip2 and library
|
Enable building against an alternate Bzip2 and library
|
||||||
installation.
|
installation.
|
||||||
|
|
||||||
|
--no-sse-detect Do not try to detect the CPU's SSE capability. This
|
||||||
|
mode will simply use SSE2 as the fallback default.
|
||||||
|
Using SSE4 and later improves performance significantly.
|
||||||
|
|
||||||
|
--use-key256 Use 256-bit encryption keys. Default key length is
|
||||||
|
128-bit.
|
||||||
|
|
||||||
--help Display the help message.
|
--help Display the help message.
|
||||||
|
|
||||||
Steps for building with libbsc support
|
Steps for building with libbsc support
|
||||||
|
|
22
Makefile.in
22
Makefile.in
|
@ -24,9 +24,9 @@
|
||||||
PROG= pcompress
|
PROG= pcompress
|
||||||
MAINSRCS = main.c utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \
|
MAINSRCS = main.c utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \
|
||||||
adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \
|
adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \
|
||||||
utils/xxhash.c utils/heapq.c utils/cpuid.c
|
utils/xxhash_base.c utils/heapq.c utils/cpuid.c
|
||||||
MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h \
|
MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h \
|
||||||
utils/cpuid.h
|
utils/cpuid.h utils/xxhash.h
|
||||||
MAINOBJS = $(MAINSRCS:.c=.o)
|
MAINOBJS = $(MAINSRCS:.c=.o)
|
||||||
|
|
||||||
CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \
|
CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \
|
||||||
|
@ -37,6 +37,12 @@ CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \
|
||||||
CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o)
|
CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o)
|
||||||
CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@
|
CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@
|
||||||
|
|
||||||
|
XXHASH_SRCS = utils/xxhash.c
|
||||||
|
XXHASH_SSE4_SRCS = utils/xxhash_sse4.c
|
||||||
|
XXHASH_SSE2_SRCS = utils/xxhash_sse2.c
|
||||||
|
XXHASH_OBJS = utils/xxhash_sse4.o utils/xxhash_sse2.o
|
||||||
|
XXHASH_HDRS = utils/xxhash.h
|
||||||
|
|
||||||
ZLIB_SRCS = zlib_compress.c
|
ZLIB_SRCS = zlib_compress.c
|
||||||
ZLIB_HDRS = $(MAINHDRS)
|
ZLIB_HDRS = $(MAINHDRS)
|
||||||
ZLIB_OBJS = $(ZLIB_SRCS:.c=.o)
|
ZLIB_OBJS = $(ZLIB_SRCS:.c=.o)
|
||||||
|
@ -156,7 +162,7 @@ LDLIBS = -ldl -L./buildtmp -Wl,-R@LIBBZ2_DIR@ -lbz2 -L./buildtmp -Wl,-R@LIBZ_DIR
|
||||||
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
|
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
|
||||||
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \
|
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \
|
||||||
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \
|
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \
|
||||||
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS)
|
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(XXHASH_OBJS)
|
||||||
|
|
||||||
DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
|
DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
|
||||||
DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@
|
DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@
|
||||||
|
@ -185,8 +191,12 @@ COMPILE_cpp = @COMPILE_cpp@
|
||||||
VEC_FLAGS = @VEC_FLAGS@
|
VEC_FLAGS = @VEC_FLAGS@
|
||||||
LOOP_OPTFLAGS = @LOOP_OPTFLAGS@
|
LOOP_OPTFLAGS = @LOOP_OPTFLAGS@
|
||||||
CPPFLAGS = @CPPFLAGS@ @NO_SLAB_CPPFLAGS@ @DEBUG_STATS_CPPFLAGS@
|
CPPFLAGS = @CPPFLAGS@ @NO_SLAB_CPPFLAGS@ @DEBUG_STATS_CPPFLAGS@
|
||||||
GEN_OPT = @GEN_OPT@
|
GEN_OPT = @GEN_OPT@ @SSE_OPT_FLAGS@
|
||||||
|
BASE_OPT = @GEN_OPT@
|
||||||
PREFIX=@PREFIX@
|
PREFIX=@PREFIX@
|
||||||
|
SSE4_OPT_FLAG = -msse4.2
|
||||||
|
SSE3_OPT_FLAG = -mssse3
|
||||||
|
SSE2_OPT_FLAG = -msse2
|
||||||
|
|
||||||
SKEIN_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
|
SKEIN_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
|
||||||
SHA256_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
|
SHA256_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
|
||||||
|
@ -257,6 +267,10 @@ $(ZLIB_OBJS): $(ZLIB_SRCS) $(ZLIB_HDRS)
|
||||||
$(BZLIB_OBJS): $(BZLIB_SRCS) $(BZLIB_HDRS)
|
$(BZLIB_OBJS): $(BZLIB_SRCS) $(BZLIB_HDRS)
|
||||||
$(COMPILE) $(GEN_OPT) $(BZLIB_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
$(COMPILE) $(GEN_OPT) $(BZLIB_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||||
|
|
||||||
|
$(XXHASH_OBJS): $(XXHASH_SSE4_SRCS) $(XXHASH_SSE2_SRCS) $(XXHASH_HDRS) $(XXHASH_SRCS)
|
||||||
|
$(COMPILE) $(BASE_OPT) $(SSE4_OPT_FLAG) $(CPPFLAGS) $(XXHASH_SSE4_SRCS) -o $(XXHASH_SSE4_SRCS:.c=.o)
|
||||||
|
$(COMPILE) $(BASE_OPT) $(SSE2_OPT_FLAG) $(CPPFLAGS) $(XXHASH_SSE2_SRCS) -o $(XXHASH_SSE2_SRCS:.c=.o)
|
||||||
|
|
||||||
$(MAINOBJS): $(MAINSRCS) $(MAINHDRS)
|
$(MAINOBJS): $(MAINSRCS) $(MAINHDRS)
|
||||||
$(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
$(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||||
|
|
||||||
|
|
17
config
17
config
|
@ -21,6 +21,7 @@ ${prog} [<options>]
|
||||||
Enable building against an alternate Zlib installation.
|
Enable building against an alternate Zlib installation.
|
||||||
--with-bzlib=<path to Bzip2 library installation tree> (Default: System)
|
--with-bzlib=<path to Bzip2 library installation tree> (Default: System)
|
||||||
Enable building against an alternate Bzip2 and library installation.
|
Enable building against an alternate Bzip2 and library installation.
|
||||||
|
--no-sse-detect Do NOT attempt to probe the system's SSE/AVX capability for build flags.
|
||||||
--use-key256 Use 256-bit encryption keys. Default key length is 128-bit.
|
--use-key256 Use 256-bit encryption keys. Default key length is 128-bit.
|
||||||
--help Display this help message.
|
--help Display this help message.
|
||||||
|
|
||||||
|
@ -54,7 +55,7 @@ extra_opt_flags=
|
||||||
zlib_prefix=
|
zlib_prefix=
|
||||||
bzlib_prefix=
|
bzlib_prefix=
|
||||||
sse_detect=1
|
sse_detect=1
|
||||||
default_sse="-msse2"
|
sse_opt_flags="-msse2"
|
||||||
|
|
||||||
rm -rf ./buildtmp
|
rm -rf ./buildtmp
|
||||||
mkdir ./buildtmp
|
mkdir ./buildtmp
|
||||||
|
@ -76,7 +77,7 @@ gcc tst.c -o tst
|
||||||
if [ $? -ne 0 ]
|
if [ $? -ne 0 ]
|
||||||
then
|
then
|
||||||
echo "ERROR:"
|
echo "ERROR:"
|
||||||
echo "Cannot compile a simple program. GCC 4.1 and above is required"
|
echo "Cannot compile a simple program. GCC 4.4 and above is required"
|
||||||
echo "to build this program. Please include installation bindir of GCC in the PATH."
|
echo "to build this program. Please include installation bindir of GCC in the PATH."
|
||||||
echo ""
|
echo ""
|
||||||
rm -f tst.c
|
rm -f tst.c
|
||||||
|
@ -140,7 +141,7 @@ do
|
||||||
--use-key256)
|
--use-key256)
|
||||||
keylen='-DKEYLEN=32'
|
keylen='-DKEYLEN=32'
|
||||||
;;
|
;;
|
||||||
--no-sse-check)
|
--no-sse-detect)
|
||||||
sse_detect=0
|
sse_detect=0
|
||||||
;;
|
;;
|
||||||
--help) usage $0;;
|
--help) usage $0;;
|
||||||
|
@ -180,10 +181,10 @@ IFS=.
|
||||||
set -- ${vers}
|
set -- ${vers}
|
||||||
IFS="$OIFS"
|
IFS="$OIFS"
|
||||||
|
|
||||||
if [ $1 -lt 4 -o $2 -lt 1 ]
|
if [ $1 -lt 4 -o $2 -lt 4 ]
|
||||||
then
|
then
|
||||||
echo "ERROR:"
|
echo "ERROR:"
|
||||||
echo "GCC version 4.1 or above is required."
|
echo "GCC version 4.4 or above is required."
|
||||||
echo ""
|
echo ""
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
@ -209,9 +210,7 @@ then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
rm -f sse_level
|
rm -f sse_level
|
||||||
extra_opt_flags="${extra_opt_flags} -msse${sse_ver}"
|
sse_opt_flags="-m${sse_ver}"
|
||||||
else
|
|
||||||
extra_opt_flags="${extra_opt_flags} ${default_sse}"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
@ -498,6 +497,7 @@ sha256objsvar="SHA256_OBJS"
|
||||||
yasmvar="YASM"
|
yasmvar="YASM"
|
||||||
fptr_flag_var="FPTR_FLAG"
|
fptr_flag_var="FPTR_FLAG"
|
||||||
extra_opt_flags_var="EXTRA_OPT_FLAGS"
|
extra_opt_flags_var="EXTRA_OPT_FLAGS"
|
||||||
|
sse_opt_flags_var="SSE_OPT_FLAGS"
|
||||||
|
|
||||||
openssllibdirvar="OPENSSL_LIBDIR"
|
openssllibdirvar="OPENSSL_LIBDIR"
|
||||||
opensslincdirvar="OPENSSL_INCDIR"
|
opensslincdirvar="OPENSSL_INCDIR"
|
||||||
|
@ -550,5 +550,6 @@ s#@${keccak_hdrs_var}@#${keccak_hdrs}#g
|
||||||
s#@${keccak_srcs_var}@#${keccak_srcs}#g
|
s#@${keccak_srcs_var}@#${keccak_srcs}#g
|
||||||
s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g
|
s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g
|
||||||
s#@${extra_opt_flags_var}@#${extra_opt_flags}#g
|
s#@${extra_opt_flags_var}@#${extra_opt_flags}#g
|
||||||
|
s#@${sse_opt_flags_var}@#${sse_opt_flags}#g
|
||||||
" > Makefile
|
" > Makefile
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
#include <sha256.h>
|
#include <sha256.h>
|
||||||
#include <crypto_aes.h>
|
#include <crypto_aes.h>
|
||||||
#include <KeccakNISTInterface.h>
|
#include <KeccakNISTInterface.h>
|
||||||
#include <cpuid.h>
|
#include <utils.h>
|
||||||
|
|
||||||
#include "crypto_utils.h"
|
#include "crypto_utils.h"
|
||||||
|
|
||||||
|
@ -54,17 +54,29 @@ static int geturandom_bytes(uchar_t rbytes[32]);
|
||||||
typedef void (*ckinit_func_ptr)(void);
|
typedef void (*ckinit_func_ptr)(void);
|
||||||
static struct {
|
static struct {
|
||||||
const char *name;
|
const char *name;
|
||||||
|
const char *desc;
|
||||||
cksum_t cksum_id;
|
cksum_t cksum_id;
|
||||||
int bytes, mac_bytes;
|
int bytes, mac_bytes;
|
||||||
ckinit_func_ptr init_func;
|
ckinit_func_ptr init_func;
|
||||||
} cksum_props[] = {
|
} cksum_props[] = {
|
||||||
{"CRC64", CKSUM_CRC64, 8, 32, NULL},
|
{"CRC64", "Fast 64-bit CRC from LZMA SDK.",
|
||||||
{"SKEIN256", CKSUM_SKEIN256, 32, 32, NULL},
|
CKSUM_CRC64, 8, 32, NULL},
|
||||||
{"SKEIN512", CKSUM_SKEIN512, 64, 64, NULL},
|
{"SKEIN256", "256-bit SKEIN a NIST SHA3 runners-up (90% faster than Keccak).",
|
||||||
{"SHA256", CKSUM_SHA256, 32, 32, init_sha256},
|
CKSUM_SKEIN256, 32, 32, NULL},
|
||||||
{"SHA512", CKSUM_SHA512, 64, 64, NULL},
|
{"SKEIN512", "512-bit SKEIN",
|
||||||
{"KECCAK256", CKSUM_KECCAK256, 32, 32, NULL},
|
CKSUM_SKEIN512, 64, 64, NULL},
|
||||||
{"KECCAK512", CKSUM_KECCAK512, 64, 64, NULL}
|
{"SHA256", "Intel's optimized (SSE,AVX) 256-bit SHA2 implementation for x86.",
|
||||||
|
CKSUM_SHA256, 32, 32, init_sha256},
|
||||||
|
{"SHA512", "512-bit SHA2 from OpenSSL's crypto library.",
|
||||||
|
CKSUM_SHA512, 64, 64, NULL},
|
||||||
|
{"KECCAK256", "Official 256-bit NIST SHA3 optimized implementation.",
|
||||||
|
CKSUM_KECCAK256, 32, 32, NULL},
|
||||||
|
{"KECCAK512", "Official 512-bit NIST SHA3 optimized implementation.",
|
||||||
|
CKSUM_KECCAK512, 64, 64, NULL},
|
||||||
|
{"BLAKE256", "Very fast 256-bit BLAKE2, derived from the NIST SHA3 runner-up BLAKE.",
|
||||||
|
CKSUM_BLAKE256, 32, 32, NULL},
|
||||||
|
{"BLAKE512", "Very fast 256-bit BLAKE2, derived from the NIST SHA3 runner-up BLAKE.",
|
||||||
|
CKSUM_BLAKE512, 64, 64, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int cksum_provider = PROVIDER_OPENSSL;
|
static int cksum_provider = PROVIDER_OPENSSL;
|
||||||
|
@ -213,12 +225,9 @@ init_sha256(void)
|
||||||
cksum_provider = PROVIDER_OPENSSL;
|
cksum_provider = PROVIDER_OPENSSL;
|
||||||
#else
|
#else
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
processor_info_t pc;
|
|
||||||
|
|
||||||
cksum_provider = PROVIDER_OPENSSL;
|
cksum_provider = PROVIDER_OPENSSL;
|
||||||
cpuid_basic_identify(&pc);
|
if (proc_info.proc_type == PROC_X64_INTEL || proc_info.proc_type == PROC_X64_AMD) {
|
||||||
if (pc.proc_type == PROC_X64_INTEL || pc.proc_type == PROC_X64_AMD) {
|
if (opt_Init_SHA(&proc_info) == 0) {
|
||||||
if (opt_Init_SHA(&pc) == 0) {
|
|
||||||
cksum_provider = PROVIDER_X64_OPT;
|
cksum_provider = PROVIDER_X64_OPT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -226,6 +235,15 @@ init_sha256(void)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
list_checksums(FILE *strm, char *pad)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i=0; i<(sizeof (cksum_props)/sizeof (cksum_props[0])); i++) {
|
||||||
|
fprintf(strm, "%s%10s - %s\n", pad, cksum_props[i].name, cksum_props[i].desc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if either the given checksum name or id is valid and
|
* Check if either the given checksum name or id is valid and
|
||||||
* return it's properties.
|
* return it's properties.
|
||||||
|
@ -235,7 +253,7 @@ get_checksum_props(const char *name, int *cksum, int *cksum_bytes, int *mac_byte
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i=0; i<sizeof (cksum_props); i++) {
|
for (i=0; i<(sizeof (cksum_props)/sizeof (cksum_props[0])); i++) {
|
||||||
if ((name != NULL && strcmp(name, cksum_props[i].name) == 0) ||
|
if ((name != NULL && strcmp(name, cksum_props[i].name) == 0) ||
|
||||||
(*cksum != 0 && *cksum == cksum_props[i].cksum_id)) {
|
(*cksum != 0 && *cksum == cksum_props[i].cksum_id)) {
|
||||||
*cksum = cksum_props[i].cksum_id;
|
*cksum = cksum_props[i].cksum_id;
|
||||||
|
|
|
@ -33,7 +33,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MAX_PW_LEN 16
|
#define MAX_PW_LEN 16
|
||||||
#define CKSUM_MASK 0x700
|
#define CKSUM_MASK 0x800
|
||||||
#define CKSUM_MAX_BYTES 64
|
#define CKSUM_MAX_BYTES 64
|
||||||
#define DEFAULT_CKSUM "SKEIN256"
|
#define DEFAULT_CKSUM "SKEIN256"
|
||||||
|
|
||||||
|
@ -54,7 +54,9 @@ typedef enum {
|
||||||
CKSUM_SHA256 = 0x400,
|
CKSUM_SHA256 = 0x400,
|
||||||
CKSUM_SHA512 = 0x500,
|
CKSUM_SHA512 = 0x500,
|
||||||
CKSUM_KECCAK256 = 0x600,
|
CKSUM_KECCAK256 = 0x600,
|
||||||
CKSUM_KECCAK512 = 0x700
|
CKSUM_KECCAK512 = 0x700,
|
||||||
|
CKSUM_BLAKE256 = 0x800,
|
||||||
|
CKSUM_BLAKE512 = 0x900
|
||||||
} cksum_t;
|
} cksum_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -75,6 +77,7 @@ typedef struct {
|
||||||
* Generic message digest functions.
|
* Generic message digest functions.
|
||||||
*/
|
*/
|
||||||
int compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes);
|
int compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes);
|
||||||
|
void list_checksums(FILE *strm, char *pad);
|
||||||
int get_checksum_props(const char *name, int *cksum, int *cksum_bytes, int *mac_bytes);
|
int get_checksum_props(const char *name, int *cksum, int *cksum_bytes, int *mac_bytes);
|
||||||
void serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
|
void serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
|
||||||
void deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
|
void deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
|
||||||
|
|
|
@ -116,8 +116,8 @@ do_last:
|
||||||
for (; pos < buflen-15; pos += 16) {
|
for (; pos < buflen-15; pos += 16) {
|
||||||
__m128i cblk, dat, odat;
|
__m128i cblk, dat, odat;
|
||||||
|
|
||||||
__builtin_prefetch(outbuf+pos, 1, 0);
|
PREFETCH_WRITE(outbuf+pos, 0);
|
||||||
__builtin_prefetch(inbuf+pos, 0, 0);
|
PREFETCH_READ(inbuf+pos, 0);
|
||||||
cblk = _mm_load_si128((__m128i *)(stream->buf));
|
cblk = _mm_load_si128((__m128i *)(stream->buf));
|
||||||
dat = _mm_loadu_si128((__m128i *)(inbuf+pos));
|
dat = _mm_loadu_si128((__m128i *)(inbuf+pos));
|
||||||
odat = _mm_xor_si128(cblk, dat);
|
odat = _mm_xor_si128(cblk, dat);
|
||||||
|
|
12
main.c
12
main.c
|
@ -158,15 +158,16 @@ usage(void)
|
||||||
" NOTE - Both -L and -P can be used together to give maximum benefit on most.\n"
|
" NOTE - Both -L and -P can be used together to give maximum benefit on most.\n"
|
||||||
" datasets.\n"
|
" datasets.\n"
|
||||||
" '-S' <cksum>\n"
|
" '-S' <cksum>\n"
|
||||||
" - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512, SHA256 and\n"
|
" - Specify chunk checksum to use:\n\n",
|
||||||
" SHA512. Default one is SKEIN256.\n"
|
UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name);
|
||||||
|
list_checksums(stderr, " ");
|
||||||
|
fprintf(stderr, "\n"
|
||||||
" '-F' - Perform Fixed-Block Deduplication. Faster than '-D' in some cases\n"
|
" '-F' - Perform Fixed-Block Deduplication. Faster than '-D' in some cases\n"
|
||||||
" but with lower deduplication ratio.\n"
|
" but with lower deduplication ratio.\n"
|
||||||
" '-B' <1..5>\n"
|
" '-B' <1..5>\n"
|
||||||
" - Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n"
|
" - Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n"
|
||||||
" '-M' - Display memory allocator statistics\n"
|
" '-M' - Display memory allocator statistics\n"
|
||||||
" '-C' - Display compression statistics\n\n",
|
" '-C' - Display compression statistics\n\n");
|
||||||
UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -716,7 +717,7 @@ start_decompress(const char *filename, const char *to_filename)
|
||||||
err = 1;
|
err = 1;
|
||||||
goto uncomp_done;
|
goto uncomp_done;
|
||||||
}
|
}
|
||||||
if (version < VERSION-2) {
|
if (version < VERSION-3) {
|
||||||
fprintf(stderr, "Unsupported version: %d\n", version);
|
fprintf(stderr, "Unsupported version: %d\n", version);
|
||||||
err = 1;
|
err = 1;
|
||||||
goto uncomp_done;
|
goto uncomp_done;
|
||||||
|
@ -2340,6 +2341,7 @@ main(int argc, char *argv[])
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
main_cancel = 0;
|
main_cancel = 0;
|
||||||
|
init_pcompress();
|
||||||
|
|
||||||
if (cksum == 0)
|
if (cksum == 0)
|
||||||
get_checksum_props(DEFAULT_CKSUM, &cksum, &cksum_bytes, &mac_bytes);
|
get_checksum_props(DEFAULT_CKSUM, &cksum, &cksum_bytes, &mac_bytes);
|
||||||
|
|
|
@ -38,7 +38,7 @@ extern "C" {
|
||||||
#define CHUNK_FLAG_SZ 1
|
#define CHUNK_FLAG_SZ 1
|
||||||
#define ALGO_SZ 8
|
#define ALGO_SZ 8
|
||||||
#define MIN_CHUNK 2048
|
#define MIN_CHUNK 2048
|
||||||
#define VERSION 5
|
#define VERSION 6
|
||||||
#define FLAG_DEDUP 1
|
#define FLAG_DEDUP 1
|
||||||
#define FLAG_DEDUP_FIXED 2
|
#define FLAG_DEDUP_FIXED 2
|
||||||
#define FLAG_SINGLE_CHUNK 4
|
#define FLAG_SINGLE_CHUNK 4
|
||||||
|
|
|
@ -32,6 +32,10 @@
|
||||||
|
|
||||||
#define SSE4_1_FLAG 0x080000
|
#define SSE4_1_FLAG 0x080000
|
||||||
#define SSE4_2_FLAG 0x100000
|
#define SSE4_2_FLAG 0x100000
|
||||||
|
#define SSE3_FLAG 0x1
|
||||||
|
#define SSSE3_FLAG 0x200
|
||||||
|
#define AVX_FLAG 0x10000000
|
||||||
|
#define XOP_FLAG 0x800
|
||||||
|
|
||||||
void
|
void
|
||||||
exec_cpuid(uint32_t *regs)
|
exec_cpuid(uint32_t *regs)
|
||||||
|
@ -112,6 +116,7 @@ cpuid_basic_identify(processor_info_t *pc)
|
||||||
pc->avx_level = 0;
|
pc->avx_level = 0;
|
||||||
pc->sse_level = 0;
|
pc->sse_level = 0;
|
||||||
pc->sse_sub_level = 0;
|
pc->sse_sub_level = 0;
|
||||||
|
pc->xop_avail = 0;
|
||||||
|
|
||||||
if (strcmp(raw.vendor_str, "GenuineIntel") == 0) {
|
if (strcmp(raw.vendor_str, "GenuineIntel") == 0) {
|
||||||
pc->proc_type = PROC_X64_INTEL;
|
pc->proc_type = PROC_X64_INTEL;
|
||||||
|
@ -131,12 +136,23 @@ cpuid_basic_identify(processor_info_t *pc)
|
||||||
pc->sse_sub_level = 2;
|
pc->sse_sub_level = 2;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (raw.basic_cpuid[1][2] & SSE3_FLAG) {
|
||||||
pc->sse_level = 3;
|
pc->sse_level = 3;
|
||||||
|
if (raw.basic_cpuid[1][2] & SSSE3_FLAG) {
|
||||||
|
pc->sse_sub_level = 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pc->sse_level = 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pc->avx_level = 0;
|
pc->avx_level = 0;
|
||||||
if (raw.basic_cpuid[1][2] & (1 << 28)) {
|
if (raw.basic_cpuid[1][2] & AVX_FLAG) {
|
||||||
pc->avx_level = 1;
|
pc->avx_level = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (raw.ext_cpuid[1][2] & XOP_FLAG) {
|
||||||
|
pc->xop_avail = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,12 @@ main(void)
|
||||||
{
|
{
|
||||||
processor_info_t pc;
|
processor_info_t pc;
|
||||||
cpuid_basic_identify(&pc);
|
cpuid_basic_identify(&pc);
|
||||||
printf("%d", pc.sse_level);
|
if (pc.sse_level == 3 && pc.sse_sub_level == 1) {
|
||||||
|
printf("ssse%d", pc.sse_level);
|
||||||
|
pc.sse_sub_level = 0;
|
||||||
|
} else {
|
||||||
|
printf("sse%d", pc.sse_level);
|
||||||
|
}
|
||||||
if (pc.sse_sub_level > 0)
|
if (pc.sse_sub_level > 0)
|
||||||
printf(".%d\n", pc.sse_sub_level);
|
printf(".%d\n", pc.sse_sub_level);
|
||||||
else
|
else
|
||||||
|
|
|
@ -36,9 +36,20 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <link.h>
|
#include <link.h>
|
||||||
#include <rabin_dedup.h>
|
#include <rabin_dedup.h>
|
||||||
|
#include <cpuid.h>
|
||||||
|
#include <xxhash.h>
|
||||||
|
|
||||||
|
#define _IN_UTILS_
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
|
processor_info_t proc_info;
|
||||||
|
|
||||||
|
void
|
||||||
|
init_pcompress() {
|
||||||
|
cpuid_basic_identify(&proc_info);
|
||||||
|
XXH32_module_init();
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
err_exit(int show_errno, const char *format, ...)
|
err_exit(int show_errno, const char *format, ...)
|
||||||
{
|
{
|
||||||
|
|
|
@ -95,9 +95,13 @@ typedef int32_t bsize_t;
|
||||||
# define unlikely(expr) __builtin_expect(expr, 0)
|
# define unlikely(expr) __builtin_expect(expr, 0)
|
||||||
# define ATOMIC_ADD(var, val) __sync_fetch_and_add(&var, val)
|
# define ATOMIC_ADD(var, val) __sync_fetch_and_add(&var, val)
|
||||||
# define ATOMIC_SUB(var, val) __sync_fetch_and_sub(&var, val)
|
# define ATOMIC_SUB(var, val) __sync_fetch_and_sub(&var, val)
|
||||||
|
# define PREFETCH_WRITE(x, n) __builtin_prefetch((x), 1, (n))
|
||||||
|
# define PREFETCH_READ(x, n) __builtin_prefetch((x), 0, (n))
|
||||||
#else
|
#else
|
||||||
# define likely(expr) (expr)
|
# define likely(expr) (expr)
|
||||||
# define unlikely(expr) (expr)
|
# define unlikely(expr) (expr)
|
||||||
|
# define PREFETCH_WRITE(x, n)
|
||||||
|
# define PREFETCH_READ(x, n)
|
||||||
# if defined(sun) || defined (__sun)
|
# if defined(sun) || defined (__sun)
|
||||||
# include <atomic.h>
|
# include <atomic.h>
|
||||||
# define ATOMIC_ADD(var, val) atomic_add_int(&var, val)
|
# define ATOMIC_ADD(var, val) atomic_add_int(&var, val)
|
||||||
|
@ -148,9 +152,14 @@ typedef struct {
|
||||||
int sse_level;
|
int sse_level;
|
||||||
int sse_sub_level;
|
int sse_sub_level;
|
||||||
int avx_level;
|
int avx_level;
|
||||||
|
int xop_avail;
|
||||||
proc_type_t proc_type;
|
proc_type_t proc_type;
|
||||||
} processor_info_t;
|
} processor_info_t;
|
||||||
|
|
||||||
|
#ifndef _IN_UTILS_
|
||||||
|
extern processor_info_t proc_info;
|
||||||
|
#endif
|
||||||
|
|
||||||
extern void err_exit(int show_errno, const char *format, ...);
|
extern void err_exit(int show_errno, const char *format, ...);
|
||||||
extern const char *get_execname(const char *);
|
extern const char *get_execname(const char *);
|
||||||
extern int parse_numeric(int64_t *val, const char *str);
|
extern int parse_numeric(int64_t *val, const char *str);
|
||||||
|
@ -165,6 +174,7 @@ extern uint64_t get_total_ram();
|
||||||
extern double get_wtime_millis(void);
|
extern double get_wtime_millis(void);
|
||||||
extern double get_mb_s(uint64_t bytes, double strt, double en);
|
extern double get_mb_s(uint64_t bytes, double strt, double en);
|
||||||
extern void init_algo_props(algo_props_t *props);
|
extern void init_algo_props(algo_props_t *props);
|
||||||
|
extern void init_pcompress();
|
||||||
|
|
||||||
/* Pointer type for compress and decompress functions. */
|
/* Pointer type for compress and decompress functions. */
|
||||||
typedef int (*compress_func_ptr)(void *src, uint64_t srclen, void *dst,
|
typedef int (*compress_func_ptr)(void *src, uint64_t srclen, void *dst,
|
||||||
|
|
|
@ -82,8 +82,6 @@
|
||||||
# define XXH_BIG_ENDIAN 0
|
# define XXH_BIG_ENDIAN 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//**************************************
|
//**************************************
|
||||||
// Compiler-specific Options & Functions
|
// Compiler-specific Options & Functions
|
||||||
//**************************************
|
//**************************************
|
||||||
|
@ -141,7 +139,7 @@ static inline __m128i _x_mm_rotl_epi32(const __m128i a, int bits)
|
||||||
// Simple Hash Functions
|
// Simple Hash Functions
|
||||||
//****************************
|
//****************************
|
||||||
|
|
||||||
unsigned int XXH32(const void* input, int len, unsigned int seed)
|
unsigned int CPUCAP_NM(XXH32)(const void* input, int len, unsigned int seed)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
// Simple version, good for code maintenance, but unfortunately slow for small inputs
|
// Simple version, good for code maintenance, but unfortunately slow for small inputs
|
||||||
|
@ -284,7 +282,7 @@ struct XXH_state32_t
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void* XXH32_init (unsigned int seed)
|
void* CPUCAP_NM(XXH32_init) (unsigned int seed)
|
||||||
{
|
{
|
||||||
struct XXH_state32_t * state = (struct XXH_state32_t *) malloc ( sizeof(struct XXH_state32_t));
|
struct XXH_state32_t * state = (struct XXH_state32_t *) malloc ( sizeof(struct XXH_state32_t));
|
||||||
state->seed = seed;
|
state->seed = seed;
|
||||||
|
@ -303,7 +301,7 @@ void* XXH32_init (unsigned int seed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int XXH32_feed (void* state_in, const void* input, int len)
|
int CPUCAP_NM(XXH32_feed) (void* state_in, const void* input, int len)
|
||||||
{
|
{
|
||||||
struct XXH_state32_t * state = state_in;
|
struct XXH_state32_t * state = state_in;
|
||||||
const unsigned char* p = (const unsigned char*)input;
|
const unsigned char* p = (const unsigned char*)input;
|
||||||
|
@ -437,7 +435,7 @@ int XXH32_feed (void* state_in, const void* input, int len)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned int XXH32_getIntermediateResult (void* state_in)
|
unsigned int CPUCAP_NM(XXH32_getIntermediateResult) (void* state_in)
|
||||||
{
|
{
|
||||||
struct XXH_state32_t * state = state_in;
|
struct XXH_state32_t * state = state_in;
|
||||||
unsigned char * p = (unsigned char*)state->memory;
|
unsigned char * p = (unsigned char*)state->memory;
|
||||||
|
@ -489,9 +487,9 @@ unsigned int XXH32_getIntermediateResult (void* state_in)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned int XXH32_result (void* state_in)
|
unsigned int CPUCAP_NM(XXH32_result) (void* state_in)
|
||||||
{
|
{
|
||||||
unsigned int h32 = XXH32_getIntermediateResult(state_in);
|
unsigned int h32 = CPUCAP_NM(XXH32_getIntermediateResult)(state_in);
|
||||||
|
|
||||||
free(state_in);
|
free(state_in);
|
||||||
|
|
||||||
|
|
|
@ -63,12 +63,16 @@ It depends on successfully passing SMHasher test set.
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef CPUCAP_NM
|
||||||
|
#define CPUCAP_NM(x) x
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
//****************************
|
//****************************
|
||||||
// Simple Hash Functions
|
// Simple Hash Functions
|
||||||
//****************************
|
//****************************
|
||||||
|
|
||||||
unsigned int XXH32 (const void* input, int len, unsigned int seed);
|
unsigned int CPUCAP_NM(XXH32) (const void* input, int len, unsigned int seed);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
XXH32() :
|
XXH32() :
|
||||||
|
@ -86,9 +90,9 @@ XXH32() :
|
||||||
// Advanced Hash Functions
|
// Advanced Hash Functions
|
||||||
//****************************
|
//****************************
|
||||||
|
|
||||||
void* XXH32_init (unsigned int seed);
|
void* CPUCAP_NM(XXH32_init) (unsigned int seed);
|
||||||
int XXH32_feed (void* state, const void* input, int len);
|
int CPUCAP_NM(XXH32_feed) (void* state, const void* input, int len);
|
||||||
unsigned int XXH32_result (void* state);
|
unsigned int CPUCAP_NM(XXH32_result) (void* state);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
These functions calculate the xxhash of an input provided in several small packets,
|
These functions calculate the xxhash of an input provided in several small packets,
|
||||||
|
@ -113,7 +117,7 @@ Memory will be freed by XXH32_result().
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
unsigned int XXH32_getIntermediateResult (void* state);
|
unsigned int CPUCAP_NM(XXH32_getIntermediateResult) (void* state);
|
||||||
/*
|
/*
|
||||||
This function does the same as XXH32_result(), generating a 32-bit hash,
|
This function does the same as XXH32_result(), generating a 32-bit hash,
|
||||||
but preserve memory context.
|
but preserve memory context.
|
||||||
|
@ -121,7 +125,7 @@ This way, it becomes possible to generate intermediate hashes, and then continue
|
||||||
To free memory context, use XXH32_result().
|
To free memory context, use XXH32_result().
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
void XXH32_module_init();
|
||||||
|
|
||||||
#if defined (__cplusplus)
|
#if defined (__cplusplus)
|
||||||
}
|
}
|
||||||
|
|
71
utils/xxhash_base.c
Normal file
71
utils/xxhash_base.c
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <xxhash.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <utils.h>
|
||||||
|
|
||||||
|
extern void* XXH32_init_SSE4 (unsigned int seed);
|
||||||
|
extern int XXH32_feed_SSE4 (void* state, const void* input, int len);
|
||||||
|
extern unsigned int XXH32_result_SSE4 (void* state);
|
||||||
|
extern unsigned int XXH32_getIntermediateResult_SSE4 (void* state);
|
||||||
|
extern unsigned int XXH32_SSE4 (const void* input, int len, unsigned int seed);
|
||||||
|
|
||||||
|
extern void* XXH32_init_SSE2 (unsigned int seed);
|
||||||
|
extern int XXH32_feed_SSE2 (void* state, const void* input, int len);
|
||||||
|
extern unsigned int XXH32_result_SSE2 (void* state);
|
||||||
|
extern unsigned int XXH32_getIntermediateResult_SSE2 (void* state);
|
||||||
|
extern unsigned int XXH32_SSE2 (const void* input, int len, unsigned int seed);
|
||||||
|
|
||||||
|
unsigned int (*xxh32)(const void* input, int len, unsigned int seed) = NULL;
|
||||||
|
void * (*xxh32_init)(unsigned int seed) = NULL;
|
||||||
|
int (*xxh32_feed)(void* state, const void* input, int len) = NULL;
|
||||||
|
unsigned int (*xxh32_result)(void* state) = NULL;
|
||||||
|
unsigned int (*xxh32_getIntermediateResult)(void* state) = NULL;
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
XXH32_module_init() {
|
||||||
|
if (proc_info.sse_level >= 4) {
|
||||||
|
xxh32 = XXH32_SSE4;
|
||||||
|
xxh32_init = XXH32_init_SSE4;
|
||||||
|
xxh32_feed = XXH32_feed_SSE4;
|
||||||
|
xxh32_result = XXH32_result_SSE4;
|
||||||
|
xxh32_getIntermediateResult = XXH32_getIntermediateResult_SSE4;
|
||||||
|
} else {
|
||||||
|
xxh32 = XXH32_SSE2;
|
||||||
|
xxh32_init = XXH32_init_SSE2;
|
||||||
|
xxh32_feed = XXH32_feed_SSE2;
|
||||||
|
xxh32_result = XXH32_result_SSE2;
|
||||||
|
xxh32_getIntermediateResult = XXH32_getIntermediateResult_SSE2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
XXH32(const void* input, int len, unsigned int seed)
|
||||||
|
{
|
||||||
|
return xxh32(input, len, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void*
|
||||||
|
XXH32_init(unsigned int seed)
|
||||||
|
{
|
||||||
|
return xxh32_init(seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
XXH32_feed(void* state, const void* input, int len)
|
||||||
|
{
|
||||||
|
return xxh32_feed(state, input, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
XXH32_result(void* state)
|
||||||
|
{
|
||||||
|
return xxh32_result(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
XXH32_getIntermediateResult(void* state)
|
||||||
|
{
|
||||||
|
return xxh32_getIntermediateResult(state);
|
||||||
|
}
|
||||||
|
|
3
utils/xxhash_sse2.c
Normal file
3
utils/xxhash_sse2.c
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#define CPUCAP_NM(x) x##_SSE2
|
||||||
|
#include "xxhash.c"
|
||||||
|
|
3
utils/xxhash_sse4.c
Normal file
3
utils/xxhash_sse4.c
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#define CPUCAP_NM(x) x##_SSE4
|
||||||
|
#include "xxhash.c"
|
||||||
|
|
Loading…
Reference in a new issue