Add optimized BLAKE2 implementations with runtime detection of CPU capability (SSE/AVX).
Minor cleanups.
This commit is contained in:
parent
43af97042a
commit
d08b5ea399
25 changed files with 18504 additions and 101 deletions
39
Makefile.in
39
Makefile.in
|
@ -43,6 +43,22 @@ XXHASH_SSE2_SRCS = utils/xxhash_sse2.c
|
|||
XXHASH_OBJS = utils/xxhash_sse4.o utils/xxhash_sse2.o
|
||||
XXHASH_HDRS = utils/xxhash.h
|
||||
|
||||
BLAKE2b_SSE2 = crypto/blake2/blake2b_sse2.c
|
||||
BLAKE2b_SSE3 = crypto/blake2/blake2b_ssse3.c
|
||||
BLAKE2b_SSE4 = crypto/blake2/blake2b_sse41.c
|
||||
BLAKE2b_AVX = crypto/blake2/blake2b_avx.c
|
||||
BLAKE2bp_SSE2 = crypto/blake2/blake2bp_sse2.c
|
||||
BLAKE2bp_SSE3 = crypto/blake2/blake2bp_ssse3.c
|
||||
BLAKE2bp_SSE4 = crypto/blake2/blake2bp_sse41.c
|
||||
BLAKE2bp_AVX = crypto/blake2/blake2bp_avx.c
|
||||
BLAKE2_BASE_SRCS = crypto/blake2/blake2b.c crypto/blake2/blake2bp.c
|
||||
BLAKE2_HDRS = crypto/blake2/blake2.h crypto/blake2/blake2-impl.h crypto/blake2/blake2-config.h \
|
||||
crypto/blake2/blake2-kat.h crypto/blake2/blake2b-round.h crypto/blake2/blake2b-load-sse2.h \
|
||||
crypto/blake2/blake2b-load-sse41.h
|
||||
BLAKE2_SRCS = $(BLAKE2b_SSE2) $(BLAKE2b_SSE3) $(BLAKE2b_SSE4) $(BLAKE2b_AVX) \
|
||||
$(BLAKE2bp_SSE2) $(BLAKE2bp_SSE3) $(BLAKE2bp_SSE4) $(BLAKE2bp_AVX)
|
||||
BLAKE2_OBJS = $(BLAKE2_SRCS:.c=.o)
|
||||
|
||||
ZLIB_SRCS = zlib_compress.c
|
||||
ZLIB_HDRS = $(MAINHDRS)
|
||||
ZLIB_OBJS = $(ZLIB_SRCS:.c=.o)
|
||||
|
@ -152,7 +168,7 @@ COMMON_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT
|
|||
-DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 \
|
||||
-I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein -I./utils -I./crypto/sha2 \
|
||||
-I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ \
|
||||
-I./crypto/keccak -I./transpose $(EXTRA_CPPFLAGS) -pedantic -Wall -std=gnu99 \
|
||||
-I./crypto/keccak -I./transpose -I./crypto/blake2 $(EXTRA_CPPFLAGS) -pedantic -Wall -std=gnu99 \
|
||||
-fno-strict-aliasing -Wno-unused-but-set-variable -Wno-enum-compare
|
||||
COMMON_VEC_FLAGS = -ftree-vectorize
|
||||
COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
||||
|
@ -161,24 +177,24 @@ LDLIBS = -ldl -L./buildtmp -Wl,-R@LIBBZ2_DIR@ -lbz2 -L./buildtmp -Wl,-R@LIBZ_DIR
|
|||
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
|
||||
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \
|
||||
$(SKEIN_BLOCK_OBJ) @SHA2ASM_OBJS@ @SHA2_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \
|
||||
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(XXHASH_OBJS)
|
||||
$(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(XXHASH_OBJS) $(BLAKE2_OBJS)
|
||||
|
||||
DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
|
||||
DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp
|
||||
DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@
|
||||
DEBUG_COMPILE_cpp = g++ -g -c @EXTRA_OPT_FLAGS@
|
||||
DEBUG_VEC_FLAGS =
|
||||
DEBUG_LOOP_OPTFLAGS =
|
||||
DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@
|
||||
DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@ -fopenmp
|
||||
DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS)
|
||||
DEBUG_FPTR_FLAG =
|
||||
|
||||
RELEASE_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@
|
||||
RELEASE_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp
|
||||
RELEASE_COMPILE = gcc -c @EXTRA_OPT_FLAGS@
|
||||
RELEASE_COMPILE_cpp = g++ -c @EXTRA_OPT_FLAGS@
|
||||
RELEASE_VEC_FLAGS = $(COMMON_VEC_FLAGS)
|
||||
RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS)
|
||||
RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG
|
||||
RELEASE_GEN_OPT = -O3 @LIBBSCGEN_OPT@
|
||||
RELEASE_GEN_OPT = -O3 @LIBBSCGEN_OPT@ -fopenmp
|
||||
RELEASE_FPTR_FLAG = -fomit-frame-pointer
|
||||
|
||||
NO_SLAB_CPPFLAGS = -DDEBUG_NO_SLAB
|
||||
|
@ -193,6 +209,7 @@ CPPFLAGS = @CPPFLAGS@ @NO_SLAB_CPPFLAGS@ @DEBUG_STATS_CPPFLAGS@
|
|||
GEN_OPT = @GEN_OPT@ @SSE_OPT_FLAGS@
|
||||
BASE_OPT = @GEN_OPT@
|
||||
PREFIX=@PREFIX@
|
||||
AVX_OPT_FLAG = -mavx
|
||||
SSE4_OPT_FLAG = -msse4.2
|
||||
SSE3_OPT_FLAG = -mssse3
|
||||
SSE2_OPT_FLAG = -msse2
|
||||
|
@ -270,6 +287,16 @@ $(XXHASH_OBJS): $(XXHASH_SSE4_SRCS) $(XXHASH_SSE2_SRCS) $(XXHASH_HDRS) $(XXHASH_
|
|||
$(COMPILE) $(BASE_OPT) $(SSE4_OPT_FLAG) $(CPPFLAGS) $(XXHASH_SSE4_SRCS) -o $(XXHASH_SSE4_SRCS:.c=.o)
|
||||
$(COMPILE) $(BASE_OPT) $(SSE2_OPT_FLAG) $(CPPFLAGS) $(XXHASH_SSE2_SRCS) -o $(XXHASH_SSE2_SRCS:.c=.o)
|
||||
|
||||
$(BLAKE2_OBJS): $(BLAKE2_SRCS) $(BLAKE2_BASE_SRCS) $(BLAKE2_HDRS)
|
||||
$(COMPILE) $(BASE_OPT) $(SSE2_OPT_FLAG) $(CPPFLAGS) $(BLAKE2b_SSE2) -o $(BLAKE2b_SSE2:.c=.o)
|
||||
$(COMPILE) $(BASE_OPT) $(SSE3_OPT_FLAG) $(CPPFLAGS) $(BLAKE2b_SSE3) -o $(BLAKE2b_SSE3:.c=.o)
|
||||
$(COMPILE) $(BASE_OPT) $(SSE4_OPT_FLAG) $(CPPFLAGS) $(BLAKE2b_SSE4) -o $(BLAKE2b_SSE4:.c=.o)
|
||||
$(COMPILE) $(BASE_OPT) $(AVX_OPT_FLAG) $(CPPFLAGS) $(BLAKE2b_AVX) -o $(BLAKE2b_AVX:.c=.o)
|
||||
$(COMPILE) $(BASE_OPT) $(SSE2_OPT_FLAG) $(CPPFLAGS) $(BLAKE2bp_SSE2) -o $(BLAKE2bp_SSE2:.c=.o)
|
||||
$(COMPILE) $(BASE_OPT) $(SSE3_OPT_FLAG) $(CPPFLAGS) $(BLAKE2bp_SSE3) -o $(BLAKE2bp_SSE3:.c=.o)
|
||||
$(COMPILE) $(BASE_OPT) $(SSE4_OPT_FLAG) $(CPPFLAGS) $(BLAKE2bp_SSE4) -o $(BLAKE2bp_SSE4:.c=.o)
|
||||
$(COMPILE) $(BASE_OPT) $(AVX_OPT_FLAG) $(CPPFLAGS) $(BLAKE2bp_AVX) -o $(BLAKE2bp_AVX:.c=.o)
|
||||
|
||||
$(MAINOBJS): $(MAINSRCS) $(MAINHDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(LOOP_OPTFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
|
|
72
crypto/blake2/blake2-config.h
Normal file
72
crypto/blake2/blake2-config.h
Normal file
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef __BLAKE2_CONFIG_H__
|
||||
#define __BLAKE2_CONFIG_H__
|
||||
|
||||
// These don't work everywhere
|
||||
#if defined(__SSE2__)
|
||||
#define HAVE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
#define HAVE_SSSE3
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
#define HAVE_SSE41
|
||||
#endif
|
||||
|
||||
#if defined(__AVX__)
|
||||
#define HAVE_AVX
|
||||
#endif
|
||||
|
||||
#if defined(__XOP__)
|
||||
#define HAVE_XOP
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_AVX2
|
||||
#ifndef HAVE_AVX
|
||||
#define HAVE_AVX
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_XOP
|
||||
#ifndef HAVE_AVX
|
||||
#define HAVE_AVX
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_AVX
|
||||
#ifndef HAVE_SSE41
|
||||
#define HAVE_SSE41
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SSE41
|
||||
#ifndef HAVE_SSSE3
|
||||
#define HAVE_SSSE3
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SSSE3
|
||||
#define HAVE_SSE2
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_SSE2)
|
||||
#error "This code requires at least SSE2."
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
133
crypto/blake2/blake2-impl.h
Normal file
133
crypto/blake2/blake2-impl.h
Normal file
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef __BLAKE2_IMPL_H__
|
||||
#define __BLAKE2_IMPL_H__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static inline uint32_t load32( const void *src )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *( uint32_t * )( src );
|
||||
#else
|
||||
const uint8_t *p = ( uint8_t * )src;
|
||||
uint32_t w = *p++;
|
||||
w |= ( uint32_t )( *p++ ) << 8;
|
||||
w |= ( uint32_t )( *p++ ) << 16;
|
||||
w |= ( uint32_t )( *p++ ) << 24;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint64_t load64( const void *src )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *( uint64_t * )( src );
|
||||
#else
|
||||
const uint8_t *p = ( uint8_t * )src;
|
||||
uint64_t w = *p++;
|
||||
w |= ( uint64_t )( *p++ ) << 8;
|
||||
w |= ( uint64_t )( *p++ ) << 16;
|
||||
w |= ( uint64_t )( *p++ ) << 24;
|
||||
w |= ( uint64_t )( *p++ ) << 32;
|
||||
w |= ( uint64_t )( *p++ ) << 40;
|
||||
w |= ( uint64_t )( *p++ ) << 48;
|
||||
w |= ( uint64_t )( *p++ ) << 56;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void store32( void *dst, uint32_t w )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
*( uint32_t * )( dst ) = w;
|
||||
#else
|
||||
uint8_t *p = ( uint8_t * )dst;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void store64( void *dst, uint64_t w )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
*( uint64_t * )( dst ) = w;
|
||||
#else
|
||||
uint8_t *p = ( uint8_t * )dst;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint64_t load48( const void *src )
|
||||
{
|
||||
const uint8_t *p = ( const uint8_t * )src;
|
||||
uint64_t w = *p++;
|
||||
w |= ( uint64_t )( *p++ ) << 8;
|
||||
w |= ( uint64_t )( *p++ ) << 16;
|
||||
w |= ( uint64_t )( *p++ ) << 24;
|
||||
w |= ( uint64_t )( *p++ ) << 32;
|
||||
w |= ( uint64_t )( *p++ ) << 40;
|
||||
return w;
|
||||
}
|
||||
|
||||
static inline void store48( void *dst, uint64_t w )
|
||||
{
|
||||
uint8_t *p = ( uint8_t * )dst;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w;
|
||||
}
|
||||
|
||||
static inline uint32_t rotl32( const uint32_t w, const unsigned c )
|
||||
{
|
||||
return ( w << c ) | ( w >> ( 32 - c ) );
|
||||
}
|
||||
|
||||
static inline uint64_t rotl64( const uint64_t w, const unsigned c )
|
||||
{
|
||||
return ( w << c ) | ( w >> ( 64 - c ) );
|
||||
}
|
||||
|
||||
static inline uint32_t rotr32( const uint32_t w, const unsigned c )
|
||||
{
|
||||
return ( w >> c ) | ( w << ( 32 - c ) );
|
||||
}
|
||||
|
||||
static inline uint64_t rotr64( const uint64_t w, const unsigned c )
|
||||
{
|
||||
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||
}
|
||||
|
||||
/* prevents compiler optimizing out memset() */
|
||||
static inline void secure_zero_memory( void *v, size_t n )
|
||||
{
|
||||
volatile uint8_t *p = ( volatile uint8_t * )v;
|
||||
|
||||
while( n-- ) *p++ = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
16467
crypto/blake2/blake2-kat.h
Normal file
16467
crypto/blake2/blake2-kat.h
Normal file
File diff suppressed because it is too large
Load diff
133
crypto/blake2/blake2.h
Normal file
133
crypto/blake2/blake2.h
Normal file
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef __BLAKE2_H__
|
||||
#define __BLAKE2_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define BLAKE_ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define BLAKE_ALIGN(x) __attribute__ ((__aligned__(x)))
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2b_constant
|
||||
{
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2b_param
|
||||
{
|
||||
uint8_t digest_length; // 1
|
||||
uint8_t key_length; // 2
|
||||
uint8_t fanout; // 3
|
||||
uint8_t depth; // 4
|
||||
uint32_t leaf_length; // 8
|
||||
uint64_t node_offset; // 16
|
||||
uint8_t node_depth; // 17
|
||||
uint8_t inner_length; // 18
|
||||
uint8_t reserved[14]; // 32
|
||||
uint8_t salt[BLAKE2B_SALTBYTES]; // 48
|
||||
uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64
|
||||
} blake2b_param;
|
||||
|
||||
BLAKE_ALIGN( 64 ) typedef struct __blake2b_state
|
||||
{
|
||||
uint64_t h[8];
|
||||
uint64_t t[2];
|
||||
uint64_t f[2];
|
||||
uint8_t buf[2 * BLAKE2B_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
uint8_t last_node;
|
||||
} blake2b_state;
|
||||
|
||||
BLAKE_ALIGN( 64 ) typedef struct __blake2bp_state
|
||||
{
|
||||
blake2b_state S[4][1];
|
||||
blake2b_state R[1];
|
||||
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
} blake2bp_state;
|
||||
#pragma pack(pop)
|
||||
|
||||
// Streaming API
|
||||
int blake2b_init_sse2( blake2b_state *S, const uint8_t outlen );
|
||||
int blake2b_init_key_sse2( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2b_init_param_sse2( blake2b_state *S, const blake2b_param *P );
|
||||
int blake2b_update_sse2( blake2b_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2b_final_sse2( blake2b_state *S, uint8_t *out, uint8_t outlen );
|
||||
int blake2bp_init_sse2( blake2bp_state *S, const uint8_t outlen );
|
||||
int blake2bp_init_key_sse2( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2bp_update_sse2( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2bp_final_sse2( blake2bp_state *S, uint8_t *out, uint8_t outlen );
|
||||
|
||||
int blake2b_init_ssse3( blake2b_state *S, const uint8_t outlen );
|
||||
int blake2b_init_key_ssse3( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2b_init_param_ssse3( blake2b_state *S, const blake2b_param *P );
|
||||
int blake2b_update_ssse3( blake2b_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2b_final_ssse3( blake2b_state *S, uint8_t *out, uint8_t outlen );
|
||||
int blake2bp_init_ssse3( blake2bp_state *S, const uint8_t outlen );
|
||||
int blake2bp_init_key_ssse3( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2bp_update_ssse3( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2bp_final_ssse3( blake2bp_state *S, uint8_t *out, uint8_t outlen );
|
||||
|
||||
int blake2b_init_sse41( blake2b_state *S, const uint8_t outlen );
|
||||
int blake2b_init_key_sse41( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2b_init_param_sse41( blake2b_state *S, const blake2b_param *P );
|
||||
int blake2b_update_sse41( blake2b_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2b_final_sse41( blake2b_state *S, uint8_t *out, uint8_t outlen );
|
||||
int blake2bp_init_sse41( blake2bp_state *S, const uint8_t outlen );
|
||||
int blake2bp_init_key_sse41( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2bp_update_sse41( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2bp_final_sse41( blake2bp_state *S, uint8_t *out, uint8_t outlen );
|
||||
|
||||
int blake2b_init_avx( blake2b_state *S, const uint8_t outlen );
|
||||
int blake2b_init_key_avx( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2b_init_param_avx( blake2b_state *S, const blake2b_param *P );
|
||||
int blake2b_update_avx( blake2b_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2b_final_avx( blake2b_state *S, uint8_t *out, uint8_t outlen );
|
||||
int blake2bp_init_avx( blake2bp_state *S, const uint8_t outlen );
|
||||
int blake2bp_init_key_avx( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2bp_update_avx( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2bp_final_avx( blake2bp_state *S, uint8_t *out, uint8_t outlen );
|
||||
|
||||
// Simple API
|
||||
int blake2b_sse2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
int blake2bp_sse2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
|
||||
int blake2b_ssse3( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
int blake2bp_ssse3( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
|
||||
int blake2b_sse41( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
int blake2bp_sse41( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
|
||||
int blake2b_avx( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
int blake2bp_avx( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
101
crypto/blake2/blake2_digest.h
Normal file
101
crypto/blake2/blake2_digest.h
Normal file
|
@ -0,0 +1,101 @@
|
|||
#ifndef __BLAKE2_DIGEST_H__
|
||||
#define __BLAKE2_DIGEST_H__
|
||||
|
||||
#include "blake2.h"
|
||||
#include <cpuid.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int (*blake2b_init_funcptr)( blake2b_state *S, const uint8_t outlen );
|
||||
typedef int (*blake2b_init_key_funcptr)( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
typedef int (*blake2b_init_param_funcptr)( blake2b_state *S, const blake2b_param *P );
|
||||
typedef int (*blake2b_update_funcptr)( blake2b_state *S, const uint8_t *in, uint64_t inlen );
|
||||
typedef int (*blake2b_final_funcptr)( blake2b_state *S, uint8_t *out, uint8_t outlen );
|
||||
typedef int (*blake2bp_init_funcptr)( blake2bp_state *S, const uint8_t outlen );
|
||||
typedef int (*blake2bp_init_key_funcptr)( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
typedef int (*blake2bp_update_funcptr)( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
|
||||
typedef int (*blake2bp_final_funcptr)( blake2bp_state *S, uint8_t *out, uint8_t outlen );
|
||||
|
||||
typedef int (*blake2b_funcptr)( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
typedef int (*blake2bp_funcptr)( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
|
||||
/*
|
||||
* BLAKE2 function pointers. These are set to the optimized routines
|
||||
* based on CPU capabilities.
|
||||
*/
|
||||
struct blake2_dispatch {
|
||||
blake2b_init_funcptr blake2b_init;
|
||||
blake2b_init_key_funcptr blake2b_init_key;
|
||||
blake2b_init_param_funcptr blake2b_init_param;
|
||||
blake2b_update_funcptr blake2b_update;
|
||||
blake2b_final_funcptr blake2b_final;
|
||||
blake2bp_init_funcptr blake2bp_init;
|
||||
blake2bp_init_key_funcptr blake2bp_init_key;
|
||||
blake2bp_update_funcptr blake2bp_update;
|
||||
blake2bp_final_funcptr blake2bp_final;
|
||||
blake2b_funcptr blake2b;
|
||||
blake2bp_funcptr blake2bp;
|
||||
};
|
||||
|
||||
static void blake2_module_init(struct blake2_dispatch *dsp, processor_info_t *pc)
|
||||
{
|
||||
dsp->blake2b_init = blake2b_init_sse2;
|
||||
dsp->blake2b_init_key = blake2b_init_key_sse2;
|
||||
dsp->blake2b_init_param = blake2b_init_param_sse2;
|
||||
dsp->blake2b_update = blake2b_update_sse2;
|
||||
dsp->blake2b_final = blake2b_final_sse2;
|
||||
dsp->blake2bp_init = blake2bp_init_sse2;
|
||||
dsp->blake2bp_init_key = blake2bp_init_key_sse2;
|
||||
dsp->blake2bp_update = blake2bp_update_sse2;
|
||||
dsp->blake2bp_final = blake2bp_final_sse2;
|
||||
dsp->blake2b = blake2b_sse2;
|
||||
dsp->blake2bp = blake2bp_sse2;
|
||||
|
||||
if (pc->sse_level == 3 && pc->sse_sub_level == 1) {
|
||||
dsp->blake2b_init = blake2b_init_ssse3;
|
||||
dsp->blake2b_init_key = blake2b_init_key_ssse3;
|
||||
dsp->blake2b_init_param = blake2b_init_param_ssse3;
|
||||
dsp->blake2b_update = blake2b_update_ssse3;
|
||||
dsp->blake2b_final = blake2b_final_ssse3;
|
||||
dsp->blake2bp_init = blake2bp_init_ssse3;
|
||||
dsp->blake2bp_init_key = blake2bp_init_key_ssse3;
|
||||
dsp->blake2bp_update = blake2bp_update_ssse3;
|
||||
dsp->blake2bp_final = blake2bp_final_ssse3;
|
||||
dsp->blake2b = blake2b_ssse3;
|
||||
dsp->blake2bp = blake2bp_ssse3;
|
||||
|
||||
} else if (pc->sse_level == 4 && pc->sse_sub_level >= 1) {
|
||||
dsp->blake2b_init = blake2b_init_sse41;
|
||||
dsp->blake2b_init_key = blake2b_init_key_sse41;
|
||||
dsp->blake2b_init_param = blake2b_init_param_sse41;
|
||||
dsp->blake2b_update = blake2b_update_sse41;
|
||||
dsp->blake2b_final = blake2b_final_sse41;
|
||||
dsp->blake2bp_init = blake2bp_init_sse41;
|
||||
dsp->blake2bp_init_key = blake2bp_init_key_sse41;
|
||||
dsp->blake2bp_update = blake2bp_update_sse41;
|
||||
dsp->blake2bp_final = blake2bp_final_sse41;
|
||||
dsp->blake2b = blake2b_sse41;
|
||||
dsp->blake2bp = blake2bp_sse41;
|
||||
}
|
||||
if (pc->avx_level >= 1) {
|
||||
dsp->blake2b_init = blake2b_init_avx;
|
||||
dsp->blake2b_init_key = blake2b_init_key_avx;
|
||||
dsp->blake2b_init_param = blake2b_init_param_avx;
|
||||
dsp->blake2b_update = blake2b_update_avx;
|
||||
dsp->blake2b_final = blake2b_final_avx;
|
||||
dsp->blake2bp_init = blake2bp_init_avx;
|
||||
dsp->blake2bp_init_key = blake2bp_init_key_avx;
|
||||
dsp->blake2bp_update = blake2bp_update_avx;
|
||||
dsp->blake2bp_final = blake2bp_final_avx;
|
||||
dsp->blake2b = blake2b_avx;
|
||||
dsp->blake2bp = blake2bp_avx;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
68
crypto/blake2/blake2b-load-sse2.h
Normal file
68
crypto/blake2/blake2b-load-sse2.h
Normal file
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef __BLAKE2B_LOAD_SSE2_H__
|
||||
#define __BLAKE2B_LOAD_SSE2_H__
|
||||
|
||||
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
|
||||
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
|
||||
#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
|
||||
#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
|
||||
#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
|
||||
#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
|
||||
#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
|
||||
#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
|
||||
#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
|
||||
#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
|
||||
#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
|
||||
#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
|
||||
#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
|
||||
#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
|
||||
#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
|
||||
#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
|
||||
#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
|
||||
#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
|
||||
#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
|
||||
#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
|
||||
#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
|
||||
#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
|
||||
#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
|
||||
#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
|
||||
#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
|
||||
#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
|
||||
#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
|
||||
#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
|
||||
#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
|
||||
#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
|
||||
#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
|
||||
#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
|
||||
#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
|
||||
#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
|
||||
#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
|
||||
#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
|
||||
#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
|
||||
#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
|
||||
#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
|
||||
#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
|
||||
#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
|
||||
#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
|
||||
#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
|
||||
#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
|
||||
#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
|
||||
#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
|
||||
#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
|
||||
#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
|
||||
|
||||
|
||||
#endif
|
||||
|
402
crypto/blake2/blake2b-load-sse41.h
Normal file
402
crypto/blake2/blake2b-load-sse41.h
Normal file
|
@ -0,0 +1,402 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef __BLAKE2B_LOAD_SSE41_H__
|
||||
#define __BLAKE2B_LOAD_SSE41_H__
|
||||
|
||||
#define LOAD_MSG_0_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m0, m1); \
|
||||
b1 = _mm_unpacklo_epi64(m2, m3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m0, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m2, m3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m4, m5); \
|
||||
b1 = _mm_unpacklo_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m4, m5); \
|
||||
b1 = _mm_unpackhi_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m7, m2); \
|
||||
b1 = _mm_unpackhi_epi64(m4, m6); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
b1 = _mm_alignr_epi8(m3, m7, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
||||
b1 = _mm_unpackhi_epi64(m5, m2); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m6, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m3, m1); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_alignr_epi8(m6, m5, 8); \
|
||||
b1 = _mm_unpackhi_epi64(m2, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m4, m0); \
|
||||
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
|
||||
b1 = _mm_unpackhi_epi64(m3, m4); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m7, m3); \
|
||||
b1 = _mm_alignr_epi8(m2, m0, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m3, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m6, m5); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m4, m0); \
|
||||
b1 = _mm_unpacklo_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
|
||||
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m3, m5); \
|
||||
b1 = _mm_unpacklo_epi64(m0, m4); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m4, m2); \
|
||||
b1 = _mm_unpacklo_epi64(m1, m5); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
|
||||
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
|
||||
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_alignr_epi8(m6, m0, 8); \
|
||||
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m1, m3); \
|
||||
b1 = _mm_unpacklo_epi64(m0, m4); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m6, m5); \
|
||||
b1 = _mm_unpackhi_epi64(m5, m1); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
|
||||
b1 = _mm_unpackhi_epi64(m7, m0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m6, m2); \
|
||||
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
|
||||
b1 = _mm_unpacklo_epi64(m7, m2); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m2, m7); \
|
||||
b1 = _mm_alignr_epi8(m5, m6, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m0, m3); \
|
||||
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m3, m1); \
|
||||
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m6, m3); \
|
||||
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_alignr_epi8(m7, m5, 8); \
|
||||
b1 = _mm_unpackhi_epi64(m0, m4); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m2, m7); \
|
||||
b1 = _mm_unpacklo_epi64(m4, m1); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m0, m2); \
|
||||
b1 = _mm_unpacklo_epi64(m3, m5); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m3, m7); \
|
||||
b1 = _mm_alignr_epi8(m0, m5, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m7, m4); \
|
||||
b1 = _mm_alignr_epi8(m4, m1, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = m6; \
|
||||
b1 = _mm_alignr_epi8(m5, m0, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
|
||||
b1 = m2; \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
b1 = _mm_unpackhi_epi64(m3, m0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m1, m2); \
|
||||
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m7, m4); \
|
||||
b1 = _mm_unpackhi_epi64(m1, m6); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_alignr_epi8(m7, m5, 8); \
|
||||
b1 = _mm_unpacklo_epi64(m6, m0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m0, m1); \
|
||||
b1 = _mm_unpacklo_epi64(m2, m3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m0, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m2, m3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m4, m5); \
|
||||
b1 = _mm_unpacklo_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m4, m5); \
|
||||
b1 = _mm_unpackhi_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m7, m2); \
|
||||
b1 = _mm_unpackhi_epi64(m4, m6); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
b1 = _mm_alignr_epi8(m3, m7, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
||||
b1 = _mm_unpackhi_epi64(m5, m2); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m6, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m3, m1); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#endif
|
||||
|
160
crypto/blake2/blake2b-round.h
Normal file
160
crypto/blake2/blake2b-round.h
Normal file
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef __BLAKE2B_ROUND_H__
|
||||
#define __BLAKE2B_ROUND_H__
|
||||
|
||||
#define LOAD(p) _mm_load_si128( (__m128i *)(p) )
|
||||
#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
|
||||
|
||||
#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) )
|
||||
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
|
||||
|
||||
#define TOF(reg) _mm_castsi128_ps((reg))
|
||||
#define TOI(reg) _mm_castps_si128((reg))
|
||||
|
||||
#define LIKELY(x) __builtin_expect((x),1)
|
||||
|
||||
|
||||
/* Microarchitecture-specific macros */
|
||||
#ifndef HAVE_XOP
|
||||
#ifdef HAVE_SSSE3
|
||||
#define _mm_roti_epi64(x, c) \
|
||||
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
|
||||
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
|
||||
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
|
||||
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
|
||||
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
|
||||
#else
|
||||
#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-c) ))
|
||||
#endif
|
||||
#else
|
||||
/* ... */
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
||||
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
||||
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
||||
\
|
||||
row4l = _mm_xor_si128(row4l, row1l); \
|
||||
row4h = _mm_xor_si128(row4h, row1h); \
|
||||
\
|
||||
row4l = _mm_roti_epi64(row4l, -32); \
|
||||
row4h = _mm_roti_epi64(row4h, -32); \
|
||||
\
|
||||
row3l = _mm_add_epi64(row3l, row4l); \
|
||||
row3h = _mm_add_epi64(row3h, row4h); \
|
||||
\
|
||||
row2l = _mm_xor_si128(row2l, row3l); \
|
||||
row2h = _mm_xor_si128(row2h, row3h); \
|
||||
\
|
||||
row2l = _mm_roti_epi64(row2l, -24); \
|
||||
row2h = _mm_roti_epi64(row2h, -24); \
|
||||
|
||||
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
||||
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
||||
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
||||
\
|
||||
row4l = _mm_xor_si128(row4l, row1l); \
|
||||
row4h = _mm_xor_si128(row4h, row1h); \
|
||||
\
|
||||
row4l = _mm_roti_epi64(row4l, -16); \
|
||||
row4h = _mm_roti_epi64(row4h, -16); \
|
||||
\
|
||||
row3l = _mm_add_epi64(row3l, row4l); \
|
||||
row3h = _mm_add_epi64(row3h, row4h); \
|
||||
\
|
||||
row2l = _mm_xor_si128(row2l, row3l); \
|
||||
row2h = _mm_xor_si128(row2h, row3h); \
|
||||
\
|
||||
row2l = _mm_roti_epi64(row2l, -63); \
|
||||
row2h = _mm_roti_epi64(row2h, -63); \
|
||||
|
||||
#if defined(HAVE_SSSE3)
|
||||
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
|
||||
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
|
||||
row2l = t0; \
|
||||
row2h = t1; \
|
||||
\
|
||||
t0 = row3l; \
|
||||
row3l = row3h; \
|
||||
row3h = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
|
||||
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
|
||||
row4l = t1; \
|
||||
row4h = t0;
|
||||
|
||||
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
|
||||
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
|
||||
row2l = t0; \
|
||||
row2h = t1; \
|
||||
\
|
||||
t0 = row3l; \
|
||||
row3l = row3h; \
|
||||
row3h = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
|
||||
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
|
||||
row4l = t1; \
|
||||
row4h = t0;
|
||||
#else
|
||||
|
||||
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||
t0 = row4l;\
|
||||
t1 = row2l;\
|
||||
row4l = row3l;\
|
||||
row3l = row3h;\
|
||||
row3h = row4l;\
|
||||
row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
|
||||
row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
|
||||
row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
|
||||
row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
|
||||
|
||||
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||
t0 = row3l;\
|
||||
row3l = row3h;\
|
||||
row3h = t0;\
|
||||
t0 = row2l;\
|
||||
t1 = row4l;\
|
||||
row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
|
||||
row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
|
||||
row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
|
||||
row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_SSE41)
|
||||
#include "blake2b-load-sse41.h"
|
||||
#else
|
||||
#include "blake2b-load-sse2.h"
|
||||
#endif
|
||||
|
||||
#define ROUND(r) \
|
||||
LOAD_MSG_ ##r ##_1(b0, b1); \
|
||||
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
LOAD_MSG_ ##r ##_2(b0, b1); \
|
||||
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
|
||||
LOAD_MSG_ ##r ##_3(b0, b1); \
|
||||
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
LOAD_MSG_ ##r ##_4(b0, b1); \
|
||||
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
|
||||
|
||||
#endif
|
||||
|
435
crypto/blake2/blake2b.c
Normal file
435
crypto/blake2/blake2b.c
Normal file
|
@ -0,0 +1,435 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#include "blake2-config.h"
|
||||
|
||||
|
||||
#include <emmintrin.h>
|
||||
#if defined(HAVE_SSSE3)
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
#if defined(HAVE_SSE41)
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
#if defined(__AVX__)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#if defined(HAVE_XOP)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include "blake2b-round.h"
|
||||
|
||||
#ifndef BLAKE_NAMESPACE
|
||||
#define BLAKE_NAMESPACE(x) x
|
||||
#endif
|
||||
|
||||
BLAKE_ALIGN( 64 ) static const uint64_t blake2b_IV[8] =
|
||||
{
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||
};
|
||||
|
||||
static const uint8_t blake2b_sigma[12][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
|
||||
|
||||
/* Some helper functions, not necessarily useful */
|
||||
static inline int blake2b_set_lastnode( blake2b_state *S )
|
||||
{
|
||||
S->f[1] = ~0ULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_clear_lastnode( blake2b_state *S )
|
||||
{
|
||||
S->f[1] = 0ULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_set_lastblock( blake2b_state *S )
|
||||
{
|
||||
if( S->last_node ) blake2b_set_lastnode( S );
|
||||
|
||||
S->f[0] = ~0ULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_clear_lastblock( blake2b_state *S )
|
||||
{
|
||||
if( S->last_node ) blake2b_clear_lastnode( S );
|
||||
|
||||
S->f[0] = 0ULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
|
||||
{
|
||||
#if __x86_64__
|
||||
// ADD/ADC chain
|
||||
__uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
|
||||
t += inc;
|
||||
S->t[0] = ( uint64_t )( t >> 0 );
|
||||
S->t[1] = ( uint64_t )( t >> 64 );
|
||||
#else
|
||||
S->t[0] += inc;
|
||||
S->t[1] += ( S->t[0] < inc );
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Parameter-related functions
|
||||
static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
|
||||
{
|
||||
P->digest_length = digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
|
||||
{
|
||||
P->fanout = fanout;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
|
||||
{
|
||||
P->depth = depth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
|
||||
{
|
||||
P->leaf_length = leaf_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
|
||||
{
|
||||
P->node_offset = node_offset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
|
||||
{
|
||||
P->node_depth = node_depth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
|
||||
{
|
||||
P->inner_length = inner_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
|
||||
{
|
||||
memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
|
||||
{
|
||||
memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2b_init0( blake2b_state *S )
|
||||
{
|
||||
memset( S, 0, sizeof( blake2b_state ) );
|
||||
|
||||
for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* init xors IV with input parameter block */
|
||||
int BLAKE_NAMESPACE(blake2b_init_param) ( blake2b_state *S, const blake2b_param *P )
|
||||
{
|
||||
uint8_t *p, *h, *v;
|
||||
//blake2b_init0( S );
|
||||
v = ( uint8_t * )( blake2b_IV );
|
||||
h = ( uint8_t * )( S->h );
|
||||
p = ( uint8_t * )( P );
|
||||
/* IV XOR ParamBlock */
|
||||
memset( S, 0, sizeof( blake2b_state ) );
|
||||
|
||||
for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Some sort of default parameter block initialization, for sequential blake2b */
|
||||
int BLAKE_NAMESPACE(blake2b_init) ( blake2b_state *S, const uint8_t outlen )
|
||||
{
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
||||
|
||||
const blake2b_param P =
|
||||
{
|
||||
outlen,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
{0},
|
||||
{0},
|
||||
{0}
|
||||
};
|
||||
return BLAKE_NAMESPACE(blake2b_init_param) ( S, &P );
|
||||
}
|
||||
|
||||
int BLAKE_NAMESPACE(blake2b_init_key) ( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
|
||||
{
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
||||
|
||||
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||
|
||||
const blake2b_param P =
|
||||
{
|
||||
outlen,
|
||||
keylen,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
{0},
|
||||
{0},
|
||||
{0}
|
||||
};
|
||||
|
||||
if( BLAKE_NAMESPACE(blake2b_init_param) ( S, &P ) < 0 )
|
||||
return 0;
|
||||
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S, block, BLAKE2B_BLOCKBYTES );
|
||||
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int BLAKE_NAMESPACE(blake2b_compress) ( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
||||
{
|
||||
__m128i row1l, row1h;
|
||||
__m128i row2l, row2h;
|
||||
__m128i row3l, row3h;
|
||||
__m128i row4l, row4h;
|
||||
__m128i b0, b1;
|
||||
__m128i t0, t1;
|
||||
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
|
||||
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
|
||||
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
|
||||
#endif
|
||||
#if defined(HAVE_SSE41)
|
||||
const __m128i m0 = LOADU( block + 00 );
|
||||
const __m128i m1 = LOADU( block + 16 );
|
||||
const __m128i m2 = LOADU( block + 32 );
|
||||
const __m128i m3 = LOADU( block + 48 );
|
||||
const __m128i m4 = LOADU( block + 64 );
|
||||
const __m128i m5 = LOADU( block + 80 );
|
||||
const __m128i m6 = LOADU( block + 96 );
|
||||
const __m128i m7 = LOADU( block + 112 );
|
||||
#else
|
||||
const uint64_t m0 = ( ( uint64_t * )block )[ 0];
|
||||
const uint64_t m1 = ( ( uint64_t * )block )[ 1];
|
||||
const uint64_t m2 = ( ( uint64_t * )block )[ 2];
|
||||
const uint64_t m3 = ( ( uint64_t * )block )[ 3];
|
||||
const uint64_t m4 = ( ( uint64_t * )block )[ 4];
|
||||
const uint64_t m5 = ( ( uint64_t * )block )[ 5];
|
||||
const uint64_t m6 = ( ( uint64_t * )block )[ 6];
|
||||
const uint64_t m7 = ( ( uint64_t * )block )[ 7];
|
||||
const uint64_t m8 = ( ( uint64_t * )block )[ 8];
|
||||
const uint64_t m9 = ( ( uint64_t * )block )[ 9];
|
||||
const uint64_t m10 = ( ( uint64_t * )block )[10];
|
||||
const uint64_t m11 = ( ( uint64_t * )block )[11];
|
||||
const uint64_t m12 = ( ( uint64_t * )block )[12];
|
||||
const uint64_t m13 = ( ( uint64_t * )block )[13];
|
||||
const uint64_t m14 = ( ( uint64_t * )block )[14];
|
||||
const uint64_t m15 = ( ( uint64_t * )block )[15];
|
||||
#endif
|
||||
row1l = LOAD( &S->h[0] );
|
||||
row1h = LOAD( &S->h[2] );
|
||||
row2l = LOAD( &S->h[4] );
|
||||
row2h = LOAD( &S->h[6] );
|
||||
row3l = LOAD( &blake2b_IV[0] );
|
||||
row3h = LOAD( &blake2b_IV[2] );
|
||||
row4l = _mm_xor_si128( LOAD( &blake2b_IV[4] ), LOAD( &S->t[0] ) );
|
||||
row4h = _mm_xor_si128( LOAD( &blake2b_IV[6] ), LOAD( &S->f[0] ) );
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
ROUND( 10 );
|
||||
ROUND( 11 );
|
||||
row1l = _mm_xor_si128( row3l, row1l );
|
||||
row1h = _mm_xor_si128( row3h, row1h );
|
||||
STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) );
|
||||
STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) );
|
||||
row2l = _mm_xor_si128( row4l, row2l );
|
||||
row2h = _mm_xor_si128( row4h, row2h );
|
||||
STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) );
|
||||
STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int BLAKE_NAMESPACE(blake2b_update) ( blake2b_state *S, const uint8_t *in, uint64_t inlen )
|
||||
{
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
|
||||
|
||||
if( inlen > fill )
|
||||
{
|
||||
memcpy( S->buf + left, in, fill ); // Fill buffer
|
||||
S->buflen += fill;
|
||||
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
||||
BLAKE_NAMESPACE(blake2b_compress) ( S, S->buf ); // Compress
|
||||
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
|
||||
S->buflen -= BLAKE2B_BLOCKBYTES;
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
}
|
||||
else // inlen <= fill
|
||||
{
|
||||
memcpy( S->buf + left, in, inlen );
|
||||
S->buflen += inlen; // Be lazy, do not compress
|
||||
in += inlen;
|
||||
inlen -= inlen;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int BLAKE_NAMESPACE(blake2b_final) ( blake2b_state *S, uint8_t *out, uint8_t outlen )
|
||||
{
|
||||
if( S->buflen > BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
||||
BLAKE_NAMESPACE(blake2b_compress) ( S, S->buf );
|
||||
S->buflen -= BLAKE2B_BLOCKBYTES;
|
||||
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
|
||||
}
|
||||
|
||||
blake2b_increment_counter( S, S->buflen );
|
||||
blake2b_set_lastblock( S );
|
||||
memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
|
||||
BLAKE_NAMESPACE(blake2b_compress) ( S, S->buf );
|
||||
memcpy( out, &S->h[0], outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int BLAKE_NAMESPACE(blake2b) ( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
|
||||
{
|
||||
blake2b_state S[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if( NULL == key ) keylen = 0;
|
||||
|
||||
if( keylen )
|
||||
{
|
||||
if( BLAKE_NAMESPACE(blake2b_init_key) ( S, outlen, key, keylen ) < 0 ) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( BLAKE_NAMESPACE(blake2b_init) ( S, outlen ) < 0 ) return -1;
|
||||
}
|
||||
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S, ( uint8_t * )in, inlen );
|
||||
BLAKE_NAMESPACE(blake2b_final) ( S, out, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(SUPERCOP)
|
||||
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
|
||||
{
|
||||
return BLAKE_NAMESPACE(blake2b) ( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(BLAKE2B_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h"
|
||||
int main( int argc, char **argv )
|
||||
{
|
||||
uint8_t key[BLAKE2B_KEYBYTES];
|
||||
uint8_t buf[KAT_LENGTH];
|
||||
|
||||
for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
|
||||
key[i] = ( uint8_t )i;
|
||||
|
||||
for( size_t i = 0; i < KAT_LENGTH; ++i )
|
||||
buf[i] = ( uint8_t )i;
|
||||
|
||||
for( size_t i = 0; i < KAT_LENGTH; ++i )
|
||||
{
|
||||
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||
BLAKE_NAMESPACE(blake2b) ( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
|
||||
|
||||
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
|
||||
{
|
||||
puts( "error" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
4
crypto/blake2/blake2b_avx.c
Normal file
4
crypto/blake2/blake2b_avx.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
#define HAVE_AVX
|
||||
#define BLAKE_NAMESPACE(x) x##_avx
|
||||
#include "blake2b.c"
|
||||
|
4
crypto/blake2/blake2b_sse2.c
Normal file
4
crypto/blake2/blake2b_sse2.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
#define HAVE_SSE2
|
||||
#define BLAKE_NAMESPACE(x) x##_sse2
|
||||
#include "blake2b.c"
|
||||
|
4
crypto/blake2/blake2b_sse41.c
Normal file
4
crypto/blake2/blake2b_sse41.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
#define HAVE_SSE41
|
||||
#define BLAKE_NAMESPACE(x) x##_sse41
|
||||
#include "blake2b.c"
|
||||
|
4
crypto/blake2/blake2b_ssse3.c
Normal file
4
crypto/blake2/blake2b_ssse3.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
#define HAVE_SSSE3
|
||||
#define BLAKE_NAMESPACE(x) x##_ssse3
|
||||
#include "blake2b.c"
|
||||
|
301
crypto/blake2/blake2bp.c
Normal file
301
crypto/blake2/blake2bp.c
Normal file
|
@ -0,0 +1,301 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#define PARALLELISM_DEGREE 4
|
||||
|
||||
#ifndef BLAKE_NAMESPACE
|
||||
#define BLAKE_NAMESPACE(x) x
|
||||
#endif
|
||||
|
||||
|
||||
static inline int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
|
||||
{
|
||||
blake2b_param P[1];
|
||||
P->digest_length = outlen;
|
||||
P->key_length = keylen;
|
||||
P->fanout = PARALLELISM_DEGREE;
|
||||
P->depth = 2;
|
||||
P->leaf_length = 0;
|
||||
P->node_offset = offset;
|
||||
P->node_depth = 0;
|
||||
P->inner_length = outlen;
|
||||
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return BLAKE_NAMESPACE(blake2b_init_param) ( S, P );
|
||||
}
|
||||
|
||||
static inline int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen )
|
||||
{
|
||||
blake2b_param P[1];
|
||||
P->digest_length = outlen;
|
||||
P->key_length = keylen;
|
||||
P->fanout = PARALLELISM_DEGREE;
|
||||
P->depth = 2;
|
||||
P->leaf_length = 0;
|
||||
P->node_offset = 0;
|
||||
P->node_depth = 1;
|
||||
P->inner_length = outlen;
|
||||
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return BLAKE_NAMESPACE(blake2b_init_param) ( S, P );
|
||||
}
|
||||
|
||||
|
||||
int BLAKE_NAMESPACE(blake2bp_init) ( blake2bp_state *S, const uint8_t outlen )
|
||||
{
|
||||
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||
|
||||
memset( S->buf, 0, sizeof( S->buf ) );
|
||||
S->buflen = 0;
|
||||
|
||||
if( blake2bp_init_root( S->R, outlen, 0 ) < 0 )
|
||||
return -1;
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
|
||||
|
||||
S->R->last_node = 1;
|
||||
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BLAKE_NAMESPACE(blake2bp_init_key) ( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
|
||||
{
|
||||
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||
|
||||
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||
|
||||
memset( S->buf, 0, sizeof( S->buf ) );
|
||||
S->buflen = 0;
|
||||
|
||||
if( blake2bp_init_root( S->R, outlen, keylen ) < 0 )
|
||||
return -1;
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||
|
||||
S->R->last_node = 1;
|
||||
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S->S[i], block, BLAKE2B_BLOCKBYTES );
|
||||
|
||||
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int BLAKE_NAMESPACE(blake2bp_update) ( blake2bp_state *S, const uint8_t *in, uint64_t inlen )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
size_t fill = sizeof( S->buf ) - left;
|
||||
|
||||
if( left && inlen >= fill )
|
||||
{
|
||||
memcpy( S->buf + left, in, fill );
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
|
||||
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
left = 0;
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
|
||||
#else
|
||||
|
||||
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
size_t id__ = omp_get_thread_num();
|
||||
#endif
|
||||
uint64_t inlen__ = inlen;
|
||||
const uint8_t *in__ = ( const uint8_t * )in;
|
||||
in__ += id__ * BLAKE2B_BLOCKBYTES;
|
||||
|
||||
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S->S[id__], in__, BLAKE2B_BLOCKBYTES );
|
||||
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
|
||||
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
|
||||
inlen %= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
|
||||
if( inlen > 0 )
|
||||
memcpy( S->buf + left, in, inlen );
|
||||
|
||||
S->buflen = left + inlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int BLAKE_NAMESPACE(blake2bp_final) ( blake2bp_state *S, uint8_t *out, const uint8_t outlen )
|
||||
{
|
||||
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
{
|
||||
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
|
||||
|
||||
if( left > BLAKE2B_BLOCKBYTES ) left = BLAKE2B_BLOCKBYTES;
|
||||
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
|
||||
}
|
||||
|
||||
BLAKE_NAMESPACE(blake2b_final) ( S->S[i], hash[i], BLAKE2B_OUTBYTES );
|
||||
}
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S->R, hash[i], BLAKE2B_OUTBYTES );
|
||||
|
||||
BLAKE_NAMESPACE(blake2b_final) ( S->R, out, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BLAKE_NAMESPACE(blake2bp) ( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen )
|
||||
{
|
||||
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
|
||||
blake2b_state S[PARALLELISM_DEGREE][1];
|
||||
blake2b_state FS[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if ( NULL == key ) keylen = 0;
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||
|
||||
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
|
||||
|
||||
if( keylen > 0 )
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S[i], block, BLAKE2B_BLOCKBYTES );
|
||||
|
||||
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
|
||||
#else
|
||||
|
||||
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
size_t id__ = omp_get_thread_num();
|
||||
#endif
|
||||
uint64_t inlen__ = inlen;
|
||||
const uint8_t *in__ = ( const uint8_t * )in;
|
||||
in__ += id__ * BLAKE2B_BLOCKBYTES;
|
||||
|
||||
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S[id__], in__, BLAKE2B_BLOCKBYTES );
|
||||
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
|
||||
if( inlen__ > id__ * BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES;
|
||||
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
|
||||
BLAKE_NAMESPACE(blake2b_update) ( S[id__], in__, len );
|
||||
}
|
||||
|
||||
BLAKE_NAMESPACE(blake2b_final) ( S[id__], hash[id__], BLAKE2B_OUTBYTES );
|
||||
}
|
||||
|
||||
if( blake2bp_init_root( FS, outlen, keylen ) < 0 )
|
||||
return -1;
|
||||
|
||||
FS->last_node = 1; // Mark as last node
|
||||
|
||||
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
BLAKE_NAMESPACE(blake2b_update) ( FS, hash[i], BLAKE2B_OUTBYTES );
|
||||
|
||||
BLAKE_NAMESPACE(blake2b_final) ( FS, out, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#if defined(BLAKE2BP_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h"
|
||||
int main( int argc, char **argv )
|
||||
{
|
||||
uint8_t key[BLAKE2B_KEYBYTES];
|
||||
uint8_t buf[KAT_LENGTH];
|
||||
|
||||
for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
|
||||
key[i] = ( uint8_t )i;
|
||||
|
||||
for( size_t i = 0; i < KAT_LENGTH; ++i )
|
||||
buf[i] = ( uint8_t )i;
|
||||
|
||||
for( size_t i = 0; i < KAT_LENGTH; ++i )
|
||||
{
|
||||
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||
//blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
|
||||
blake2bp_state S[1];
|
||||
BLAKE_NAMESPACE(blake2bp_init_key) ( S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES );
|
||||
BLAKE_NAMESPACE(blake2bp_update) ( S, buf, i );
|
||||
BLAKE_NAMESPACE(blake2bp_final) ( S, hash, BLAKE2B_OUTBYTES );
|
||||
|
||||
if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) )
|
||||
{
|
||||
puts( "error" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
4
crypto/blake2/blake2bp_avx.c
Normal file
4
crypto/blake2/blake2bp_avx.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
#define HAVE_AVX
|
||||
#define BLAKE_NAMESPACE(x) x##_avx
|
||||
#include "blake2bp.c"
|
||||
|
4
crypto/blake2/blake2bp_sse2.c
Normal file
4
crypto/blake2/blake2bp_sse2.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
#define HAVE_SSE2
|
||||
#define BLAKE_NAMESPACE(x) x##_sse2
|
||||
#include "blake2bp.c"
|
||||
|
4
crypto/blake2/blake2bp_sse41.c
Normal file
4
crypto/blake2/blake2bp_sse41.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
#define HAVE_SSE41
|
||||
#define BLAKE_NAMESPACE(x) x##_sse41
|
||||
#include "blake2bp.c"
|
||||
|
4
crypto/blake2/blake2bp_ssse3.c
Normal file
4
crypto/blake2/blake2bp_ssse3.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
#define HAVE_SSSE3
|
||||
#define BLAKE_NAMESPACE(x) x##_ssse3
|
||||
#include "blake2bp.c"
|
||||
|
|
@ -36,8 +36,8 @@
|
|||
#include <openssl/rand.h>
|
||||
#include <openssl/evp.h>
|
||||
#include <openssl/hmac.h>
|
||||
//#include <sha256.h>
|
||||
#include <sha512.h>
|
||||
#include <blake2_digest.h>
|
||||
#include <crypto_aes.h>
|
||||
#include <KeccakNISTInterface.h>
|
||||
#include <utils.h>
|
||||
|
@ -48,7 +48,10 @@
|
|||
#define PROVIDER_X64_OPT 1
|
||||
|
||||
static void init_sha512(void);
|
||||
static void init_blake2(void);
|
||||
static int geturandom_bytes(uchar_t rbytes[32]);
|
||||
static struct blake2_dispatch bdsp;
|
||||
|
||||
/*
|
||||
* Checksum properties
|
||||
*/
|
||||
|
@ -59,25 +62,26 @@ static struct {
|
|||
cksum_t cksum_id;
|
||||
int bytes, mac_bytes;
|
||||
ckinit_func_ptr init_func;
|
||||
int compatible;
|
||||
} cksum_props[] = {
|
||||
{"CRC64", "Fast 64-bit CRC from LZMA SDK.",
|
||||
CKSUM_CRC64, 8, 32, NULL},
|
||||
{"CRC64", "Extremely Fast 64-bit CRC from LZMA SDK.",
|
||||
CKSUM_CRC64, 8, 32, NULL, 0},
|
||||
{"SKEIN256", "256-bit SKEIN a NIST SHA3 runners-up (90% faster than Keccak).",
|
||||
CKSUM_SKEIN256, 32, 32, NULL},
|
||||
CKSUM_SKEIN256, 32, 32, NULL, 1},
|
||||
{"SKEIN512", "512-bit SKEIN",
|
||||
CKSUM_SKEIN512, 64, 64, NULL},
|
||||
{"SHA256", "Intel's optimized (SSE,AVX) 256-bit SHA2 implementation for x86.",
|
||||
CKSUM_SHA256, 32, 32, init_sha512},
|
||||
{"SHA512", "512-bit SHA2 from OpenSSL's crypto library.",
|
||||
CKSUM_SHA512, 64, 64, init_sha512},
|
||||
CKSUM_SKEIN512, 64, 64, NULL, 1},
|
||||
{"SHA256", "SHA512/256 version of Intel's optimized (SSE,AVX) SHA2 for x86.",
|
||||
CKSUM_SHA256, 32, 32, init_sha512, 0},
|
||||
{"SHA512", "SHA512 version of Intel's optimized (SSE,AVX) SHA2 for x86.",
|
||||
CKSUM_SHA512, 64, 64, init_sha512, 0},
|
||||
{"KECCAK256", "Official 256-bit NIST SHA3 optimized implementation.",
|
||||
CKSUM_KECCAK256, 32, 32, NULL},
|
||||
CKSUM_KECCAK256, 32, 32, NULL, 0},
|
||||
{"KECCAK512", "Official 512-bit NIST SHA3 optimized implementation.",
|
||||
CKSUM_KECCAK512, 64, 64, NULL},
|
||||
CKSUM_KECCAK512, 64, 64, NULL, 0},
|
||||
{"BLAKE256", "Very fast 256-bit BLAKE2, derived from the NIST SHA3 runner-up BLAKE.",
|
||||
CKSUM_BLAKE256, 32, 32, NULL},
|
||||
CKSUM_BLAKE256, 32, 32, init_blake2, 0},
|
||||
{"BLAKE512", "Very fast 256-bit BLAKE2, derived from the NIST SHA3 runner-up BLAKE.",
|
||||
CKSUM_BLAKE512, 64, 64, NULL}
|
||||
CKSUM_BLAKE512, 64, 64, init_blake2, 0}
|
||||
};
|
||||
|
||||
static int cksum_provider = PROVIDER_OPENSSL;
|
||||
|
@ -169,6 +173,14 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes)
|
|||
uint64_t *ck = (uint64_t *)cksum_buf;
|
||||
*ck = lzma_crc64(buf, bytes, 0);
|
||||
|
||||
} else if (cksum == CKSUM_BLAKE256) {
|
||||
if (bdsp.blake2b(cksum_buf, buf, NULL, 32, bytes, 0) != 0)
|
||||
return (-1);
|
||||
|
||||
} else if (cksum == CKSUM_BLAKE512) {
|
||||
if (bdsp.blake2b(cksum_buf, buf, NULL, 64, bytes, 0) != 0)
|
||||
return (-1);
|
||||
|
||||
} else if (cksum == CKSUM_SKEIN256) {
|
||||
Skein_512_Ctxt_t ctx;
|
||||
|
||||
|
@ -244,12 +256,19 @@ init_sha512(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
init_blake2(void)
|
||||
{
|
||||
blake2_module_init(&bdsp, &proc_info);
|
||||
}
|
||||
|
||||
void
|
||||
list_checksums(FILE *strm, char *pad)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<(sizeof (cksum_props)/sizeof (cksum_props[0])); i++) {
|
||||
fprintf(strm, "%s%10s - %s\n", pad, cksum_props[i].name, cksum_props[i].desc);
|
||||
if (!cksum_props[i].compatible)
|
||||
fprintf(strm, "%s%10s - %s\n", pad, cksum_props[i].name, cksum_props[i].desc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -258,13 +277,16 @@ list_checksums(FILE *strm, char *pad)
|
|||
* return it's properties.
|
||||
*/
|
||||
int
|
||||
get_checksum_props(const char *name, int *cksum, int *cksum_bytes, int *mac_bytes)
|
||||
get_checksum_props(const char *name, int *cksum, int *cksum_bytes,
|
||||
int *mac_bytes, int accept_comptible)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<(sizeof (cksum_props)/sizeof (cksum_props[0])); i++) {
|
||||
if ((name != NULL && strcmp(name, cksum_props[i].name) == 0) ||
|
||||
(*cksum != 0 && *cksum == cksum_props[i].cksum_id)) {
|
||||
if (!accept_comptible && cksum_props[i].compatible)
|
||||
break;
|
||||
*cksum = cksum_props[i].cksum_id;
|
||||
*cksum_bytes = cksum_props[i].bytes;
|
||||
*mac_bytes = cksum_props[i].mac_bytes;
|
||||
|
@ -316,7 +338,35 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx)
|
|||
aes_ctx_t *actx = (aes_ctx_t *)(cctx->crypto_ctx);
|
||||
mctx->mac_cksum = cksum;
|
||||
|
||||
if (cksum == CKSUM_SKEIN256) {
|
||||
if (cksum == CKSUM_BLAKE256) {
|
||||
blake2b_state *ctx = (blake2b_state *)malloc(sizeof (blake2b_state));
|
||||
if (!ctx) return (-1);
|
||||
if (bdsp.blake2b_init_key(ctx, 32, actx->pkey, KEYLEN) != 0)
|
||||
return (-1);
|
||||
mctx->mac_ctx = ctx;
|
||||
ctx = (blake2b_state *)malloc(sizeof (blake2b_state));
|
||||
if (!ctx) {
|
||||
free(mctx->mac_ctx);
|
||||
return (-1);
|
||||
}
|
||||
memcpy(ctx, mctx->mac_ctx, sizeof (blake2b_state));
|
||||
mctx->mac_ctx_reinit = ctx;
|
||||
|
||||
} else if (cksum == CKSUM_BLAKE512) {
|
||||
blake2b_state *ctx = (blake2b_state *)malloc(sizeof (blake2b_state));
|
||||
if (!ctx) return (-1);
|
||||
if (bdsp.blake2b_init_key(ctx, 64, actx->pkey, KEYLEN) != 0)
|
||||
return (-1);
|
||||
mctx->mac_ctx = ctx;
|
||||
ctx = (blake2b_state *)malloc(sizeof (blake2b_state));
|
||||
if (!ctx) {
|
||||
free(mctx->mac_ctx);
|
||||
return (-1);
|
||||
}
|
||||
memcpy(ctx, mctx->mac_ctx, sizeof (blake2b_state));
|
||||
mctx->mac_ctx_reinit = ctx;
|
||||
|
||||
} else if (cksum == CKSUM_SKEIN256) {
|
||||
Skein_512_Ctxt_t *ctx = (Skein_512_Ctxt_t *)malloc(sizeof (Skein_512_Ctxt_t));
|
||||
if (!ctx) return (-1);
|
||||
Skein_512_InitExt(ctx, 256, SKEIN_CFG_TREE_INFO_SEQUENTIAL,
|
||||
|
@ -364,19 +414,6 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx)
|
|||
}
|
||||
mctx->mac_ctx_reinit = ctx;
|
||||
} else {
|
||||
/* HMAC_SHA256_Context *ctx = (HMAC_SHA256_Context *)malloc(sizeof (HMAC_SHA256_Context));
|
||||
if (!ctx) return (-1);
|
||||
opt_HMAC_SHA256_Init(ctx, actx->pkey, KEYLEN);
|
||||
mctx->mac_ctx = ctx;
|
||||
|
||||
ctx = (HMAC_SHA256_Context *)malloc(sizeof (HMAC_SHA256_Context));
|
||||
if (!ctx) {
|
||||
free(mctx->mac_ctx);
|
||||
return (-1);
|
||||
}
|
||||
memcpy(ctx, mctx->mac_ctx, sizeof (HMAC_SHA256_Context));
|
||||
mctx->mac_ctx_reinit = ctx;*/
|
||||
|
||||
HMAC_SHA512_Context *ctx = (HMAC_SHA512_Context *)malloc(sizeof (HMAC_SHA512_Context));
|
||||
if (!ctx) return (-1);
|
||||
opt_HMAC_SHA512t256_Init(ctx, actx->pkey, KEYLEN);
|
||||
|
@ -457,7 +494,10 @@ hmac_reinit(mac_ctx_t *mctx)
|
|||
{
|
||||
int cksum = mctx->mac_cksum;
|
||||
|
||||
if (cksum == CKSUM_SKEIN256 || cksum == CKSUM_SKEIN512) {
|
||||
if (cksum == CKSUM_BLAKE256 || cksum == CKSUM_BLAKE512) {
|
||||
memcpy(mctx->mac_ctx, mctx->mac_ctx_reinit, sizeof (blake2b_state));
|
||||
|
||||
} else if (cksum == CKSUM_SKEIN256 || cksum == CKSUM_SKEIN512) {
|
||||
memcpy(mctx->mac_ctx, mctx->mac_ctx_reinit, sizeof (Skein_512_Ctxt_t));
|
||||
|
||||
} else if (cksum == CKSUM_SHA256 || cksum == CKSUM_SHA512 || cksum == CKSUM_CRC64) {
|
||||
|
@ -480,7 +520,10 @@ hmac_update(mac_ctx_t *mctx, uchar_t *data, uint64_t len)
|
|||
{
|
||||
int cksum = mctx->mac_cksum;
|
||||
|
||||
if (cksum == CKSUM_SKEIN256 || cksum == CKSUM_SKEIN512) {
|
||||
if (cksum == CKSUM_BLAKE256 || cksum == CKSUM_BLAKE512) {
|
||||
bdsp.blake2b_update((blake2b_state *)(mctx->mac_ctx), (uint8_t *)data, len);
|
||||
|
||||
} else if (cksum == CKSUM_SKEIN256 || cksum == CKSUM_SKEIN512) {
|
||||
Skein_512_Update((Skein_512_Ctxt_t *)(mctx->mac_ctx), data, len);
|
||||
|
||||
} else if (cksum == CKSUM_SHA256 || cksum == CKSUM_CRC64) {
|
||||
|
@ -529,7 +572,15 @@ hmac_final(mac_ctx_t *mctx, uchar_t *hash, unsigned int *len)
|
|||
{
|
||||
int cksum = mctx->mac_cksum;
|
||||
|
||||
if (cksum == CKSUM_SKEIN256) {
|
||||
if (cksum == CKSUM_BLAKE256) {
|
||||
bdsp.blake2b_final((blake2b_state *)(mctx->mac_ctx), hash, 32);
|
||||
*len = 32;
|
||||
|
||||
} else if (cksum == CKSUM_BLAKE512) {
|
||||
bdsp.blake2b_final((blake2b_state *)(mctx->mac_ctx), hash, 64);
|
||||
*len = 64;
|
||||
|
||||
} else if (cksum == CKSUM_SKEIN256) {
|
||||
Skein_512_Final((Skein_512_Ctxt_t *)(mctx->mac_ctx), hash);
|
||||
*len = 32;
|
||||
|
||||
|
@ -569,7 +620,11 @@ hmac_cleanup(mac_ctx_t *mctx)
|
|||
{
|
||||
int cksum = mctx->mac_cksum;
|
||||
|
||||
if (cksum == CKSUM_SKEIN256 || cksum == CKSUM_SKEIN512) {
|
||||
if (cksum == CKSUM_BLAKE256 || cksum == CKSUM_BLAKE512) {
|
||||
memset(mctx->mac_ctx, 0, sizeof (blake2b_state));
|
||||
memset(mctx->mac_ctx_reinit, 0, sizeof (blake2b_state));
|
||||
|
||||
} else if (cksum == CKSUM_SKEIN256 || cksum == CKSUM_SKEIN512) {
|
||||
memset(mctx->mac_ctx, 0, sizeof (Skein_512_Ctxt_t));
|
||||
memset(mctx->mac_ctx_reinit, 0, sizeof (Skein_512_Ctxt_t));
|
||||
|
||||
|
|
|
@ -49,14 +49,19 @@ extern "C" {
|
|||
*/
|
||||
typedef enum {
|
||||
CKSUM_CRC64 = 0x100,
|
||||
CKSUM_SKEIN256 = 0x200,
|
||||
CKSUM_SKEIN512 = 0x300,
|
||||
CKSUM_BLAKE256 = 0x200,
|
||||
CKSUM_BLAKE512 = 0x300,
|
||||
CKSUM_SHA256 = 0x400,
|
||||
CKSUM_SHA512 = 0x500,
|
||||
CKSUM_KECCAK256 = 0x600,
|
||||
CKSUM_KECCAK512 = 0x700,
|
||||
CKSUM_BLAKE256 = 0x800,
|
||||
CKSUM_BLAKE512 = 0x900
|
||||
/*
|
||||
* Backwards compatibility options. SKEIN in release 1.2 was replaced with
|
||||
* Blake2 from 1.3 onwards (for sheer speed of Blake2). We want to be able
|
||||
* to decode archives created with 1.2. New archives do not use SKEIN.
|
||||
*/
|
||||
CKSUM_SKEIN256 = 0x800,
|
||||
CKSUM_SKEIN512 = 0x900
|
||||
} cksum_t;
|
||||
|
||||
typedef struct {
|
||||
|
@ -78,7 +83,8 @@ typedef struct {
|
|||
*/
|
||||
int compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes);
|
||||
void list_checksums(FILE *strm, char *pad);
|
||||
int get_checksum_props(const char *name, int *cksum, int *cksum_bytes, int *mac_bytes);
|
||||
int get_checksum_props(const char *name, int *cksum, int *cksum_bytes,
|
||||
int *mac_bytes, int accept_compatible);
|
||||
void serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
|
||||
void deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
|
||||
|
||||
|
|
|
@ -125,25 +125,25 @@ APS_NAMESPACE(Init_SHA512) (processor_info_t *pc)
|
|||
static void
|
||||
_init (SHA512_Context *sc, const uint64_t iv[SHA512_HASH_WORDS])
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
sc->totalLength[0] = 0LL;
|
||||
sc->totalLength[1] = 0LL;
|
||||
for (i = 0; i < SHA512_HASH_WORDS; i++)
|
||||
sc->hash[i] = iv[i];
|
||||
sc->bufferLength = 0L;
|
||||
sc->totalLength[0] = 0LL;
|
||||
sc->totalLength[1] = 0LL;
|
||||
for (i = 0; i < SHA512_HASH_WORDS; i++)
|
||||
sc->hash[i] = iv[i];
|
||||
sc->bufferLength = 0L;
|
||||
}
|
||||
|
||||
void
|
||||
APS_NAMESPACE(SHA512_Init) (SHA512_Context *sc)
|
||||
{
|
||||
_init (sc, iv512);
|
||||
_init (sc, iv512);
|
||||
}
|
||||
|
||||
void
|
||||
APS_NAMESPACE(SHA512t256_Init) (SHA512_Context *sc)
|
||||
{
|
||||
_init (sc, iv256);
|
||||
_init (sc, iv256);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -207,50 +207,50 @@ APS_NAMESPACE(SHA512_Update) (SHA512_Context *sc, const void *vdata, size_t len)
|
|||
void
|
||||
APS_NAMESPACE(SHA512t256_Update) (SHA512_Context *sc, const void *data, size_t len)
|
||||
{
|
||||
APS_NAMESPACE(SHA512_Update) (sc, data, len);
|
||||
APS_NAMESPACE(SHA512_Update) (sc, data, len);
|
||||
}
|
||||
|
||||
static void
|
||||
_final (SHA512_Context *sc, uint8_t *hash, int hashWords, int halfWord)
|
||||
{
|
||||
uint32_t bytesToPad;
|
||||
uint64_t lengthPad[2];
|
||||
int i;
|
||||
|
||||
bytesToPad = 240L - sc->bufferLength;
|
||||
if (bytesToPad > 128L)
|
||||
bytesToPad -= 128L;
|
||||
|
||||
lengthPad[0] = BYTESWAP64(sc->totalLength[0]);
|
||||
lengthPad[1] = BYTESWAP64(sc->totalLength[1]);
|
||||
|
||||
APS_NAMESPACE(SHA512_Update) (sc, padding, bytesToPad);
|
||||
APS_NAMESPACE(SHA512_Update) (sc, lengthPad, 16L);
|
||||
|
||||
if (hash) {
|
||||
for (i = 0; i < hashWords; i++) {
|
||||
*((uint64_t *) hash) = BYTESWAP64(sc->hash[i]);
|
||||
hash += 8;
|
||||
}
|
||||
if (halfWord) {
|
||||
hash[0] = (uint8_t) (sc->hash[i] >> 56);
|
||||
hash[1] = (uint8_t) (sc->hash[i] >> 48);
|
||||
hash[2] = (uint8_t) (sc->hash[i] >> 40);
|
||||
hash[3] = (uint8_t) (sc->hash[i] >> 32);
|
||||
}
|
||||
}
|
||||
uint32_t bytesToPad;
|
||||
uint64_t lengthPad[2];
|
||||
int i;
|
||||
|
||||
bytesToPad = 240L - sc->bufferLength;
|
||||
if (bytesToPad > 128L)
|
||||
bytesToPad -= 128L;
|
||||
|
||||
lengthPad[0] = BYTESWAP64(sc->totalLength[0]);
|
||||
lengthPad[1] = BYTESWAP64(sc->totalLength[1]);
|
||||
|
||||
APS_NAMESPACE(SHA512_Update) (sc, padding, bytesToPad);
|
||||
APS_NAMESPACE(SHA512_Update) (sc, lengthPad, 16L);
|
||||
|
||||
if (hash) {
|
||||
for (i = 0; i < hashWords; i++) {
|
||||
*((uint64_t *) hash) = BYTESWAP64(sc->hash[i]);
|
||||
hash += 8;
|
||||
}
|
||||
if (halfWord) {
|
||||
hash[0] = (uint8_t) (sc->hash[i] >> 56);
|
||||
hash[1] = (uint8_t) (sc->hash[i] >> 48);
|
||||
hash[2] = (uint8_t) (sc->hash[i] >> 40);
|
||||
hash[3] = (uint8_t) (sc->hash[i] >> 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
APS_NAMESPACE(SHA512_Final) (SHA512_Context *sc, uint8_t hash[SHA512_HASH_SIZE])
|
||||
{
|
||||
_final (sc, hash, SHA512_HASH_WORDS, 0);
|
||||
_final (sc, hash, SHA512_HASH_WORDS, 0);
|
||||
}
|
||||
|
||||
void
|
||||
APS_NAMESPACE(SHA512t256_Final) (SHA512_Context *sc, uint8_t hash[SHA512t256_HASH_SIZE])
|
||||
{
|
||||
_final (sc, hash, SHA512t256_HASH_WORDS, 0);
|
||||
_final (sc, hash, SHA512t256_HASH_WORDS, 0);
|
||||
}
|
||||
|
||||
#define HASH_CONTEXT SHA512_Context
|
||||
|
|
18
main.c
18
main.c
|
@ -745,11 +745,23 @@ start_decompress(const char *filename, const char *to_filename)
|
|||
}
|
||||
|
||||
cksum = flags & CKSUM_MASK;
|
||||
if (get_checksum_props(NULL, &cksum, &cksum_bytes, &mac_bytes) == -1) {
|
||||
|
||||
/*
|
||||
* Backward compatibility check for SKEIN in archives version 5 or below.
|
||||
* In newer versions BLAKE uses same IDs as SKEIN.
|
||||
*/
|
||||
if (version <= 5) {
|
||||
if (cksum == CKSUM_BLAKE256) cksum = CKSUM_SKEIN256;
|
||||
if (cksum == CKSUM_BLAKE512) cksum = CKSUM_SKEIN512;
|
||||
}
|
||||
if (get_checksum_props(NULL, &cksum, &cksum_bytes, &mac_bytes, 1) == -1) {
|
||||
fprintf(stderr, "Invalid checksum algorithm code: %d. File corrupt ?\n", cksum);
|
||||
UNCOMP_BAIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Archives older than 5 did not support MACs.
|
||||
*/
|
||||
if (version < 5)
|
||||
mac_bytes = 0;
|
||||
|
||||
|
@ -2250,7 +2262,7 @@ main(int argc, char *argv[])
|
|||
break;
|
||||
|
||||
case 'S':
|
||||
if (get_checksum_props(optarg, &cksum, &cksum_bytes, &mac_bytes) == -1) {
|
||||
if (get_checksum_props(optarg, &cksum, &cksum_bytes, &mac_bytes, 0) == -1) {
|
||||
err_exit(0, "Invalid checksum type %s", optarg);
|
||||
}
|
||||
break;
|
||||
|
@ -2344,7 +2356,7 @@ main(int argc, char *argv[])
|
|||
main_cancel = 0;
|
||||
|
||||
if (cksum == 0)
|
||||
get_checksum_props(DEFAULT_CKSUM, &cksum, &cksum_bytes, &mac_bytes);
|
||||
get_checksum_props(DEFAULT_CKSUM, &cksum, &cksum_bytes, &mac_bytes, 0);
|
||||
|
||||
if (!encrypt_type) {
|
||||
/*
|
||||
|
|
|
@ -26,8 +26,6 @@
|
|||
#ifndef __CPUID_H__
|
||||
#define __CPUID_H__
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define VENDOR_STR_MAX 16
|
||||
#define BRAND_STR_MAX 64
|
||||
|
@ -36,6 +34,21 @@
|
|||
#define MAX_EXT_CPUID_LEVEL 32
|
||||
#define MAX_INTELFN4_LEVEL 4
|
||||
|
||||
typedef enum {
|
||||
PROC_BIGENDIAN_GENERIC = 1,
|
||||
PROC_LITENDIAN_GENERIC,
|
||||
PROC_X64_INTEL,
|
||||
PROC_X64_AMD
|
||||
} proc_type_t;
|
||||
|
||||
typedef struct {
|
||||
int sse_level;
|
||||
int sse_sub_level;
|
||||
int avx_level;
|
||||
int xop_avail;
|
||||
proc_type_t proc_type;
|
||||
} processor_info_t;
|
||||
|
||||
/**
|
||||
* This contains only the most basic CPU data, required to do identification
|
||||
* and feature recognition. Every processor should be identifiable using this
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <cpuid.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -141,21 +142,6 @@ typedef enum {
|
|||
DECOMPRESS_THREADS
|
||||
} algo_threads_type_t;
|
||||
|
||||
typedef enum {
|
||||
PROC_BIGENDIAN_GENERIC = 1,
|
||||
PROC_LITENDIAN_GENERIC,
|
||||
PROC_X64_INTEL,
|
||||
PROC_X64_AMD
|
||||
} proc_type_t;
|
||||
|
||||
typedef struct {
|
||||
int sse_level;
|
||||
int sse_sub_level;
|
||||
int avx_level;
|
||||
int xop_avail;
|
||||
proc_type_t proc_type;
|
||||
} processor_info_t;
|
||||
|
||||
#ifndef _IN_UTILS_
|
||||
extern processor_info_t proc_info;
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue