diff --git a/Makefile.in b/Makefile.in index cfd3ce4..19441e4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -97,9 +97,30 @@ LIBBSCLIB = @LIBBSCLIB@ LIBBSCGEN_OPT = -fopenmp LIBBSCCPPFLAGS = -I$(LIBBSCDIR)/libbsc -DENABLE_PC_LIBBSC +KECCAK_SRC_COMMON = crypto/keccak/genKAT.c crypto/keccak/KeccakDuplex.c \ + crypto/keccak/KeccakNISTInterface.c crypto/keccak/KeccakSponge.c +KECCAK_SRC_OPT64 = $(KECCAK_SRC_COMMON) crypto/keccak/KeccakF-1600-opt64.c +KECCAK_SRC_OPT64_ASM1 = $(KECCAK_SRC_COMMON) crypto/keccak/KeccakF-1600-x86-64-asm.c +KECCAK_SRC_OPT64_ASM2 = crypto/keccak/KeccakF-1600-x86-64-gas.s + +KECCAK_HDRS_COMMON = crypto/keccak/KeccakDuplex.h crypto/keccak/KeccakNISTInterface.h \ + crypto/keccak/KeccakSponge.h crypto/keccak/KeccakF-1600-interface.h +KECCAK_HDRS_OPT = $(KECCAK_HDRS_COMMON) \ + crypto/keccak/brg_endian.h crypto/keccak/KeccakF-1600-unrolling.macros +KECCAK_HDRS_OPT64 = $(KECCAK_HDRS_OPT) crypto/keccak/KeccakF-1600-opt64-settings.h \ + crypto/keccak/KeccakF-1600-64.macros crypto/keccak/KeccakF-1600-simd64.macros \ + crypto/keccak/KeccakF-1600-simd128.macros +KECCAK_HDRS_OPT64_ASM = $(KECCAK_HDRS_OPT64) + +KECCAK_SRCS = @KECCAK_SRCS@ +KECCAK_SRCS_ASM = @KECCAK_SRCS_ASM@ +KECCAK_HDRS = @KECCAK_HDRS@ +KECCAK_OBJS = $(KECCAK_SRCS:.c=.o) +KECCAK_OBJS_ASM = $(KECCAK_SRCS_ASM:.s=.o) + BAKFILES = *~ lzma/*~ lzfx/*~ lz4/*~ rabin/*~ bsdiff/*~ lzp/*~ utils/*~ crypto/sha2/*~ \ crypto/sha2/intel/*~ crypto/aes/*~ crypto/scrypt/*~ crypto/*~ rabin/global/*~ \ - delta2/*~ + delta2/*~ crypto/keccak/*~ RM = rm -f RM_RF = rm -rf @@ -107,14 +128,14 @@ COMMON_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT -DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 \ -I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein -I./utils -I@OPENSSL_INCDIR@ \ -I./crypto/sha2 -I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ \ - @LIBBZ2_INC@ @LIBZ_INC@ + @LIBBZ2_INC@ @LIBZ_INC@ -I./crypto/keccak COMMON_VEC_FLAGS = -ftree-vectorize COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block LDLIBS = -ldl -L@LIBBZ2_DIR@ -lbz2 -L@LIBZ_DIR@ -lz -lm @LIBBSCLFLAGS@ \ -L@OPENSSL_LIBDIR@ -lcrypto -lrt OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \ $(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \ -$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ +$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ DEBUG_COMPILE = gcc -m64 -g -msse3 -c @@ -124,6 +145,7 @@ DEBUG_LOOP_OPTFLAGS = DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@ DEBUG_RABIN_OPT = -O -fno-omit-frame-pointer DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS) +DEBUG_FPTR_FLAG = RELEASE_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@ RELEASE_COMPILE = gcc -m64 -msse3 -c @@ -133,6 +155,7 @@ RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS) RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG RELEASE_GEN_OPT = -O3 @LIBBSCGEN_OPT@ RELEASE_RABIN_OPT = -O2 +RELEASE_FPTR_FLAG = -fomit-frame-pointer NO_SLAB_CPPFLAGS = -DDEBUG_NO_SLAB DEBUG_STATS_CPPFLAGS = -DDEBUG_STATS @@ -147,6 +170,10 @@ GEN_OPT = @GEN_OPT@ RABIN_OPT = @RABIN_OPT@ PREFIX=@PREFIX@ +SKEIN_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@ +SHA256_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@ +KECCAK_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@ + all: $(PROG) $(LZMAOBJS): $(LZMASRCS) $(LZMAHDRS) @@ -177,17 +204,23 @@ $(DELTA2OBJS): $(DELTA2SRCS) $(DELTA2HDRS) $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ $(SKEIN_BLOCK_OBJ): $(SKEIN_BLOCK_SRC) - $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(SKEIN_BLOCK_SRC) -o $@ + $(COMPILE) $(SKEIN_FLAGS) $(SKEIN_BLOCK_SRC) -o $@ $(SKEINOBJS): $(SKEINSRCS) $(SKEINHDRS) - $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(SKEIN_FLAGS) $(@:.o=.c) -o $@ $(SHA256_OBJS): $(SHA256_SRCS) $(SHA256_HDRS) - $(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@ + $(COMPILE) $(SHA256_FLAGS) $(@:.o=.c) -o $@ $(SHA256ASM_OBJS): $(SHA256ASM_SRCS) $(YASM) -o $@ $(@:.o=.asm) +$(KECCAK_OBJS): $(KECCAK_SRCS) $(KECCAK_HDRS) + $(COMPILE) $(KECCAK_FLAGS) $(@:.o=.c) -o $@ + +$(KECCAK_OBJS_ASM): $(KECCAK_SRCS_ASM) $(KECCAK_HDRS) + $(COMPILE) $(KECCAK_FLAGS) $(@:.o=.s) -o $@ + $(LIBBSCLIB): (cd $(LIBBSCDIR); make) diff --git a/config b/config index 90aa13b..5ece5a5 100755 --- a/config +++ b/config @@ -43,6 +43,44 @@ sha256asmobjs= sha256objs= keylen= yasm=yasm +keccak_srcs= +keccak_hdrs= +keccak_srcs_asm= + +# Try a simple compilation +cat << _EOF > tst.c +#include + +int +main(void) +{ + long l; + printf("%d\n", sizeof (l)); + return (0); +} +_EOF + +gcc tst.c -o tst +if [ $? -ne 0 ] +then + echo "ERROR:" + echo "Cannot compile a simple program. GCC 4.1 and above is required" + echo "to build this program. Please include installation bindir of GCC in the PATH." + echo "" + rm -f tst.c + exit 1 +fi + +# Check bitness of system +bitness=`./tst` +rm -f tst tst.c +if [ $bitness -lt 8 ] +then + echo "ERROR:" + echo "Only 64-bit platforms are supported." + echo "" + exit 1 +fi while [ "${arg1}" != "" ] do @@ -104,6 +142,21 @@ else exit 1 fi +# Check GCC version +vers=`gcc -dumpversion` +OIFS="$IFS" +IFS=. +set -- ${vers} +IFS="$OIFS" + +if [ $1 -lt 4 -o $2 -lt 1 ] +then + echo "ERROR:" + echo "GCC version 4.1 or above is required." + echo "" + exit 1 +fi + echo $plat | egrep 'x86_64|amd64' > /dev/null if [ $? -eq 0 ] then @@ -135,6 +188,18 @@ then echo "Yasm version 1.1 or later is required to build on x64 platforms" exit 1 fi + + if [ $debug -eq 1 ] + then + keccak_srcs='\$\(KECCAK_SRC_OPT64\)' + keccak_hdrs='\$\(KECCAK_HDRS_OPT64\)' + else + keccak_srcs='\$\(KECCAK_SRC_OPT64_ASM1\)' + keccak_srcs_asm='\$\(KECCAK_SRC_OPT64_ASM2\)' + keccak_hdrs='\$\(KECCAK_HDRS_OPT64_ASM\)' + fi +else + keccak_srcs='\$\(KECCAK_SRC_OPT64\)' fi # Detect OpenSSL library @@ -278,6 +343,7 @@ libbsccppflagsvar="LIBBSCCPPFLAGS" sha256asmobjsvar="SHA256ASM_OBJS" sha256objsvar="SHA256_OBJS" yasmvar="YASM" +fptr_flag_var="FPTR_FLAG" openssllibdirvar="OPENSSL_LIBDIR" opensslincdirvar="OPENSSL_INCDIR" @@ -286,6 +352,10 @@ libzlibdirvar="LIBZ_DIR" libbz2incvar="LIBBZ2_INC" libzincvar="LIBZ_INC" +keccak_srcs_var="KECCAK_SRCS" +keccak_hdrs_var="KECCAK_HDRS" +keccak_srcs_asm_var="KECCAK_SRCS_ASM" + noslabcppflagsval= debugstatscppflagsval= @@ -301,6 +371,7 @@ s#@${loopoptflagsvar}@#\\\$\\(${typ}_${loopoptflagsvar}\\)#g s#@${cppflagsvar}@#\\\$\\(${typ}_${cppflagsvar}\\)#g s#@${genoptvar}@#\\\$\\(${typ}_${genoptvar}\\)#g s#@${rabinoptvar}@#\\\$\\(${typ}_${rabinoptvar}\\)#g +s#@${fptr_flag_var}@#\\\$\\(${typ}_${fptr_flag_var}\\)#g s#@${noslabcppflagsvar}@#${noslabcppflagsval}#g s#@${debugstatscppflagsvar}@#${debugstatscppflagsval}#g s#@${prefixvar}@#${prefix}#g @@ -321,5 +392,9 @@ s#@${libbz2libdirvar}@#${libbz2_libdir}#g s#@${libzlibdirvar}@#${libz_libdir}#g s#@${libbz2incvar}@#${libbz2_inc}#g s#@${libzincvar}@#${libz_inc}#g +s#@${keccak_srcs_var}@#${keccak_srcs}#g +s#@${keccak_hdrs_var}@#${keccak_hdrs}#g +s#@${keccak_srcs_var}@#${keccak_srcs}#g +s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g " > Makefile diff --git a/crypto/crypto_utils.c b/crypto/crypto_utils.c index 7dd114e..caebdf4 100644 --- a/crypto/crypto_utils.c +++ b/crypto/crypto_utils.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "crypto_utils.h" #include "cpuid.h" @@ -57,11 +58,13 @@ static struct { int bytes, mac_bytes; ckinit_func_ptr init_func; } cksum_props[] = { - {"CRC64", CKSUM_CRC64, 8, 32, NULL}, - {"SKEIN256", CKSUM_SKEIN256, 32, 32, NULL}, - {"SKEIN512", CKSUM_SKEIN512, 64, 64, NULL}, - {"SHA256", CKSUM_SHA256, 32, 32, init_sha256}, - {"SHA512", CKSUM_SHA512, 64, 64, NULL} + {"CRC64", CKSUM_CRC64, 8, 32, NULL}, + {"SKEIN256", CKSUM_SKEIN256, 32, 32, NULL}, + {"SKEIN512", CKSUM_SKEIN512, 64, 64, NULL}, + {"SHA256", CKSUM_SHA256, 32, 32, init_sha256}, + {"SHA512", CKSUM_SHA512, 64, 64, NULL}, + {"KECCAK256", CKSUM_KECCAK256, 32, 32, NULL}, + {"KECCAK512", CKSUM_KECCAK512, 64, 64, NULL} }; static int cksum_provider = PROVIDER_OPENSSL, ossl_inited = 0; @@ -111,6 +114,14 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, ssize_t bytes) SHA512_Init(&ctx); SHA512_Update(&ctx, buf, bytes); SHA512_Final(cksum_buf, &ctx); + + } else if (cksum == CKSUM_KECCAK256) { + if (Keccak_Hash(256, buf, bytes, cksum_buf) != 0) + return (-1); + + } else if (cksum == CKSUM_KECCAK512) { + if (Keccak_Hash(512, buf, bytes, cksum_buf) != 0) + return (-1); } else { return (-1); } @@ -279,6 +290,29 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) return (-1); } mctx->mac_ctx_reinit = ctx; + + } else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) { + hashState *ctx = malloc(sizeof (hashState)); + if (!ctx) return (-1); + + if (cksum == CKSUM_KECCAK256) { + if (Keccak_Init(ctx, 256) != 0) + return (-1); + } else { + if (Keccak_Init(ctx, 512) != 0) + return (-1); + } + if (Keccak_Update(ctx, actx->pkey, KEYLEN << 3) != 0) + return (-1); + mctx->mac_ctx = ctx; + + ctx = malloc(sizeof (hashState)); + if (!ctx) { + free(mctx->mac_ctx); + return (-1); + } + memcpy(ctx, mctx->mac_ctx, sizeof (hashState)); + mctx->mac_ctx_reinit = ctx; } else { return (-1); } @@ -301,6 +335,9 @@ hmac_reinit(mac_ctx_t *mctx) } } else if (cksum == CKSUM_SHA512) { HMAC_CTX_copy(mctx->mac_ctx, mctx->mac_ctx_reinit); + + } else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) { + memcpy(mctx->mac_ctx, mctx->mac_ctx_reinit, sizeof (hashState)); } else { return (-1); } @@ -325,6 +362,19 @@ hmac_update(mac_ctx_t *mctx, uchar_t *data, size_t len) } else if (cksum == CKSUM_SHA512) { if (HMAC_Update(mctx->mac_ctx, data, len) == 0) return (-1); + + } else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) { + // Keccak takes data length in bits so we have to scale + while (len > KECCAK_MAX_SEG) { + uint64_t blen; + + blen = KECCAK_MAX_SEG; + if (Keccak_Update(mctx->mac_ctx, data, blen << 3) != 0) + return (-1); + len -= KECCAK_MAX_SEG; + } + if (Keccak_Update(mctx->mac_ctx, data, len << 3) != 0) + return (-1); } else { return (-1); } @@ -353,6 +403,14 @@ hmac_final(mac_ctx_t *mctx, uchar_t *hash, unsigned int *len) } } else if (cksum == CKSUM_SHA512) { HMAC_Final(mctx->mac_ctx, hash, len); + + } else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) { + if (Keccak_Final(mctx->mac_ctx, hash) != 0) + return (-1); + if (cksum == CKSUM_KECCAK256) + *len = 32; + else + *len = 64; } else { return (-1); } @@ -379,6 +437,10 @@ hmac_cleanup(mac_ctx_t *mctx) } else if (cksum == CKSUM_SHA512) { HMAC_CTX_cleanup(mctx->mac_ctx); HMAC_CTX_cleanup(mctx->mac_ctx_reinit); + + } else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) { + memset(mctx->mac_ctx, 0, sizeof (hashState)); + memset(mctx->mac_ctx_reinit, 0, sizeof (hashState)); } else { return (-1); } diff --git a/crypto/crypto_utils.h b/crypto/crypto_utils.h index 56469f5..16f9ec4 100644 --- a/crypto/crypto_utils.h +++ b/crypto/crypto_utils.h @@ -42,6 +42,7 @@ extern "C" { #define CRYPTO_ALG_AES 0x10 #define MAX_SALTLEN 64 +#define KECCAK_MAX_SEG (2305843009213693950ULL) /* * Public checksum properties. CKSUM_MAX_BYTES must be updated if a * newer larger checksum is added to the list. @@ -51,7 +52,9 @@ typedef enum { CKSUM_SKEIN256 = 0x200, CKSUM_SKEIN512 = 0x300, CKSUM_SHA256 = 0x400, - CKSUM_SHA512 = 0x500 + CKSUM_SHA512 = 0x500, + CKSUM_KECCAK256 = 0x600, + CKSUM_KECCAK512 = 0x700 } cksum_t; typedef struct { diff --git a/crypto/keccak/KeccakDuplex.c b/crypto/keccak/KeccakDuplex.c new file mode 100755 index 0000000..b059141 --- /dev/null +++ b/crypto/keccak/KeccakDuplex.c @@ -0,0 +1,68 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakDuplex.h" +#include "KeccakF-1600-interface.h" +#ifdef KeccakReference +#include "displayIntermediateValues.h" +#endif + +int InitDuplex(duplexState *state, unsigned int rate, unsigned int capacity) +{ + if (rate+capacity != 1600) + return 1; + if ((rate <= 0) || (rate > 1600)) + return 1; + KeccakInitialize(); + state->rate = rate; + state->capacity = capacity; + state->rho_max = rate-2; + KeccakInitializeState(state->state); + return 0; +} + +int Duplexing(duplexState *state, const unsigned char *in, unsigned int inBitLen, unsigned char *out, unsigned int outBitLen) +{ + ALIGN unsigned char block[KeccakPermutationSizeInBytes]; + + if (inBitLen > state->rho_max) + return 1; + if ((inBitLen % 8) != 0) { + unsigned char mask = ~((1 << (inBitLen % 8)) - 1); + if ((in[inBitLen/8] & mask) != 0) + return 1; // The bits of the last incomplete byte must be aligned on the LSB + } + if (outBitLen > state->rate) + return 1; // The output length must not be greater than the rate + + memcpy(block, in, (inBitLen+7)/8); + memset(block+(inBitLen+7)/8, 0, ((state->rate+63)/64)*8 - (inBitLen+7)/8); + + block[inBitLen/8] |= 1 << (inBitLen%8); + block[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8); + + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (after padding)", block, (state->rate+7)/8); + #endif + KeccakAbsorb(state->state, block, (state->rate+63)/64); + + KeccakExtract(state->state, block, (state->rate+63)/64); + memcpy(out, block, (outBitLen+7)/8); + if ((outBitLen % 8) != 0) { + unsigned char mask = (1 << (outBitLen % 8)) - 1; + out[outBitLen/8] &= mask; + } + + return 0; +} diff --git a/crypto/keccak/KeccakDuplex.h b/crypto/keccak/KeccakDuplex.h new file mode 100755 index 0000000..e864993 --- /dev/null +++ b/crypto/keccak/KeccakDuplex.h @@ -0,0 +1,59 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakDuplex_h_ +#define _KeccakDuplex_h_ + +#define KeccakPermutationSize 1600 +#define KeccakPermutationSizeInBytes (KeccakPermutationSize/8) + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + +ALIGN typedef struct duplexStateStruct { + ALIGN unsigned char state[KeccakPermutationSizeInBytes]; + unsigned int rate; + unsigned int capacity; + unsigned int rho_max; +} duplexState; + +/** + * Function to initialize a duplex object Duplex[Keccak-f[r+c], pad10*1, r]. + * @param state Pointer to the state of the duplex object to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @pre One must have r+c=1600 in this implementation. (The value of the rate is unrestricted.) + * @return Zero if successful, 1 otherwise. + */ +int InitDuplex(duplexState *state, unsigned int rate, unsigned int capacity); +/** + * Function to make a duplexing call to the duplex object intialized with InitDuplex(). + * @param state Pointer to the state of the duplex object initialized by InitDuplex(). + * @param in Pointer to the input data. + * When @a inBitLen is not a multiple of 8, the last bits of data must be + * in the least significant bits of the last byte. + * @param inBitLen The number of input bits provided in the input data. + * @param out Pointer to the buffer where to store the output data. + * @param outBitLen The number of output bits desired. + * @pre inBitLen ≤ (r-2) + * @pre outBitLen ≤ r + * @return Zero if successful, 1 otherwise. + */ +int Duplexing(duplexState *state, const unsigned char *in, unsigned int inBitLen, unsigned char *out, unsigned int outBitLen); + +#endif diff --git a/crypto/keccak/KeccakF-1600-64.macros b/crypto/keccak/KeccakF-1600-64.macros new file mode 100755 index 0000000..0c20bca --- /dev/null +++ b/crypto/keccak/KeccakF-1600-64.macros @@ -0,0 +1,728 @@ +/* +Code automatically generated by KeccakTools! + +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + UINT64 Aba, Abe, Abi, Abo, Abu; \ + UINT64 Aga, Age, Agi, Ago, Agu; \ + UINT64 Aka, Ake, Aki, Ako, Aku; \ + UINT64 Ama, Ame, Ami, Amo, Amu; \ + UINT64 Asa, Ase, Asi, Aso, Asu; \ + UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ + UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ + UINT64 Bka, Bke, Bki, Bko, Bku; \ + UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ + UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ + UINT64 Ca, Ce, Ci, Co, Cu; \ + UINT64 Da, De, Di, Do, Du; \ + UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ + UINT64 Ega, Ege, Egi, Ego, Egu; \ + UINT64 Eka, Eke, Eki, Eko, Eku; \ + UINT64 Ema, Eme, Emi, Emo, Emu; \ + UINT64 Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = Aba^Aga^Aka^Ama^Asa; \ + Ce = Abe^Age^Ake^Ame^Ase; \ + Ci = Abi^Agi^Aki^Ami^Asi; \ + Co = Abo^Ago^Ako^Amo^Aso; \ + Cu = Abu^Agu^Aku^Amu^Asu; \ + +#ifdef UseBebigokimisa +// --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') +// --- 64-bit lanes mapped to 64-bit words +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^( Bbo & Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^( Bbu | Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^( Bba & Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^( Bgi & Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + Ci ^= E##gi; \ + E##go = Bgo ^( Bgu | Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^( Bga & Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^( Bki & Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = (~Bko)^( Bku | Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^( Bka & Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^( Bmi | Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + Ci ^= E##mi; \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^( Bma | Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = (~Bse)^( Bsi | Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^( Bso & Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^( Bsu | Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^( Bsa & Bse ); \ + Cu ^= E##su; \ +\ + +// --- Code for round (lane complementing pattern 'bebigokimisa') +// --- 64-bit lanes mapped to 64-bit words +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + E##bi = Bbi ^( Bbo & Bbu ); \ + E##bo = Bbo ^( Bbu | Bba ); \ + E##bu = Bbu ^( Bba & Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + E##ge = Bge ^( Bgi & Bgo ); \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + E##go = Bgo ^( Bgu | Bga ); \ + E##gu = Bgu ^( Bga & Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + E##ke = Bke ^( Bki & Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = (~Bko)^( Bku | Bka ); \ + E##ku = Bku ^( Bka & Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + E##me = Bme ^( Bmi | Bmo ); \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + E##mu = Bmu ^( Bma | Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = (~Bse)^( Bsi | Bso ); \ + E##si = Bsi ^( Bso & Bsu ); \ + E##so = Bso ^( Bsu | Bsa ); \ + E##su = Bsu ^( Bsa & Bse ); \ +\ + +#else // UseBebigokimisa +// --- Code for round, with prepare-theta +// --- 64-bit lanes mapped to 64-bit words +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^((~Bba)& Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + Ci ^= E##gi; \ + E##go = Bgo ^((~Bgu)& Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^((~Bga)& Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^((~Bki)& Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = Bko ^((~Bku)& Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^((~Bka)& Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^((~Bmi)& Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + Ci ^= E##mi; \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^((~Bma)& Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = Bse ^((~Bsi)& Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^((~Bso)& Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^((~Bsu)& Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^((~Bsa)& Bse ); \ + Cu ^= E##su; \ +\ + +// --- Code for round +// --- 64-bit lanes mapped to 64-bit words +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + E##bu = Bbu ^((~Bba)& Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + E##go = Bgo ^((~Bgu)& Bga ); \ + E##gu = Bgu ^((~Bga)& Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + E##ke = Bke ^((~Bki)& Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = Bko ^((~Bku)& Bka ); \ + E##ku = Bku ^((~Bka)& Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + E##me = Bme ^((~Bmi)& Bmo ); \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + E##mu = Bmu ^((~Bma)& Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = Bse ^((~Bsi)& Bso ); \ + E##si = Bsi ^((~Bso)& Bsu ); \ + E##so = Bso ^((~Bsu)& Bsa ); \ + E##su = Bsu ^((~Bsa)& Bse ); \ +\ + +#endif // UseBebigokimisa + +const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +#define copyFromStateAndXor576bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor832bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + X##me = state[16]^input[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor1152bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor1344bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]^input[18]; \ + X##mu = state[19]^input[19]; \ + X##sa = state[20]^input[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromState(X, state) \ + X##ba = state[ 0]; \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba; \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + diff --git a/crypto/keccak/KeccakF-1600-int-set.h b/crypto/keccak/KeccakF-1600-int-set.h new file mode 100755 index 0000000..0ed1d80 --- /dev/null +++ b/crypto/keccak/KeccakF-1600-int-set.h @@ -0,0 +1,6 @@ +#define ProvideFast576 +#define ProvideFast832 +#define ProvideFast1024 +#define ProvideFast1088 +#define ProvideFast1152 +#define ProvideFast1344 diff --git a/crypto/keccak/KeccakF-1600-interface.h b/crypto/keccak/KeccakF-1600-interface.h new file mode 100755 index 0000000..22185a4 --- /dev/null +++ b/crypto/keccak/KeccakF-1600-interface.h @@ -0,0 +1,46 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakPermutationInterface_h_ +#define _KeccakPermutationInterface_h_ + +#include "KeccakF-1600-int-set.h" + +void KeccakInitialize( void ); +void KeccakInitializeState(unsigned char *state); +void KeccakPermutation(unsigned char *state); +#ifdef ProvideFast576 +void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast832 +void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast1024 +void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast1088 +void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast1152 +void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast1344 +void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data); +#endif +void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount); +#ifdef ProvideFast1024 +void KeccakExtract1024bits(const unsigned char *state, unsigned char *data); +#endif +void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount); + +#endif diff --git a/crypto/keccak/KeccakF-1600-opt64-settings.h b/crypto/keccak/KeccakF-1600-opt64-settings.h new file mode 100755 index 0000000..8f16ada --- /dev/null +++ b/crypto/keccak/KeccakF-1600-opt64-settings.h @@ -0,0 +1,7 @@ +#define Unrolling 24 +#define UseBebigokimisa +//#define UseSSE +//#define UseOnlySIMD64 +//#define UseMMX +//#define UseSHLD +//#define UseXOP diff --git a/crypto/keccak/KeccakF-1600-opt64.c b/crypto/keccak/KeccakF-1600-opt64.c new file mode 100755 index 0000000..9349f03 --- /dev/null +++ b/crypto/keccak/KeccakF-1600-opt64.c @@ -0,0 +1,504 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "brg_endian.h" +#include "KeccakF-1600-opt64-settings.h" +#include "KeccakF-1600-interface.h" + +typedef unsigned char UINT8; +typedef unsigned long long int UINT64; + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + +#if defined(UseSSE) + #include + typedef __m128i V64; + typedef __m128i V128; + typedef union { + V128 v128; + UINT64 v64[2]; + } V6464; + + #define ANDnu64(a, b) _mm_andnot_si128(a, b) + #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a)) + #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a)) + #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) + #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b) + #define XOR64(a, b) _mm_xor_si128(a, b) + #define XOReq64(a, b) a = _mm_xor_si128(a, b) + #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b) + + #define ANDnu128(a, b) _mm_andnot_si128(a, b) + #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b)) + #define CONST128(a) _mm_load_si128((const V128 *)&(a)) + #define LOAD128(a) _mm_load_si128((const V128 *)&(a)) + #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a)) + #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) + #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b) + #define XOR128(a, b) _mm_xor_si128(a, b) + #define XOReq128(a, b) a = _mm_xor_si128(a, b) + #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b) + #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b) + #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE) + #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44) + #define ZERO128() _mm_setzero_si128() + + #ifdef UseOnlySIMD64 + #include "KeccakF-1600-simd64.macros" + #else +ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09}; + #include "KeccakF-1600-simd128.macros" + #endif + + #ifdef UseBebigokimisa + #error "UseBebigokimisa cannot be used in combination with UseSSE" + #endif +#elif defined(UseXOP) + #include + typedef __m128i V64; + typedef __m128i V128; + + #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a)) + #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a)) + #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b) + #define XOR64(a, b) _mm_xor_si128(a, b) + #define XOReq64(a, b) a = _mm_xor_si128(a, b) + + #define ANDnu128(a, b) _mm_andnot_si128(a, b) + #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b)) + #define CONST128(a) _mm_load_si128((const V128 *)&(a)) + #define LOAD128(a) _mm_load_si128((const V128 *)&(a)) + #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a)) + #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b) + #define XOR128(a, b) _mm_xor_si128(a, b) + #define XOReq128(a, b) a = _mm_xor_si128(a, b) + #define ZERO128() _mm_setzero_si128() + + #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E) + #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b) + #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b) + #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2)) + #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a)) + #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE) + #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44) + + #define ROL6464same(a, o) _mm_roti_epi64(a, o) + #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 )) +ALIGN const UINT64 rot_0_20[2] = { 0, 20}; +ALIGN const UINT64 rot_44_3[2] = {44, 3}; +ALIGN const UINT64 rot_43_45[2] = {43, 45}; +ALIGN const UINT64 rot_21_61[2] = {21, 61}; +ALIGN const UINT64 rot_14_28[2] = {14, 28}; +ALIGN const UINT64 rot_1_36[2] = { 1, 36}; +ALIGN const UINT64 rot_6_10[2] = { 6, 10}; +ALIGN const UINT64 rot_25_15[2] = {25, 15}; +ALIGN const UINT64 rot_8_56[2] = { 8, 56}; +ALIGN const UINT64 rot_18_27[2] = {18, 27}; +ALIGN const UINT64 rot_62_55[2] = {62, 55}; +ALIGN const UINT64 rot_39_41[2] = {39, 41}; + +#if defined(UseSimulatedXOP) + // For debugging purposes, when XOP is not available + #undef ROL6464 + #undef ROL6464same + #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) + V128 ROL6464(V128 a, int r0, int r1) + { + V128 a0 = ROL64(a, r0); + V128 a1 = COPY64HI2LO(ROL64(a, r1)); + return GET64LOLO(a0, a1); + } +#endif + + #include "KeccakF-1600-xop.macros" + + #ifdef UseBebigokimisa + #error "UseBebigokimisa cannot be used in combination with UseXOP" + #endif +#elif defined(UseMMX) + #include + typedef __m64 V64; + #define ANDnu64(a, b) _mm_andnot_si64(a, b) + + #if (defined(_MSC_VER) || defined (__INTEL_COMPILER)) + #define LOAD64(a) *(V64*)&(a) + #define CONST64(a) *(V64*)&(a) + #define STORE64(a, b) *(V64*)&(a) = b + #else + #define LOAD64(a) (V64)a + #define CONST64(a) (V64)a + #define STORE64(a, b) a = (UINT64)b + #endif + #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o))) + #define XOR64(a, b) _mm_xor_si64(a, b) + #define XOReq64(a, b) a = _mm_xor_si64(a, b) + + #include "KeccakF-1600-simd64.macros" + + #ifdef UseBebigokimisa + #error "UseBebigokimisa cannot be used in combination with UseMMX" + #endif +#else + #if defined(_MSC_VER) + #define ROL64(a, offset) _rotl64(a, offset) + #elif defined(UseSHLD) + #define ROL64(x,N) ({ \ + register UINT64 __out; \ + register UINT64 __in = x; \ + __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ + __out; \ + }) + #else + #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) + #endif + + #include "KeccakF-1600-64.macros" +#endif + +#include "KeccakF-1600-unrolling.macros" + +void KeccakPermutationOnWords(UINT64 *state) +{ + declareABCDE +#if (Unrolling != 24) + unsigned int i; +#endif + + copyFromState(A, state) + rounds +#if defined(UseMMX) + _mm_empty(); +#endif +} + +void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount) +{ + declareABCDE +#if (Unrolling != 24) + unsigned int i; +#endif + unsigned int j; + + for(j=0; j> (8*i)) & 0xFF; +} + +#ifdef ProvideFast1024 +void KeccakExtract1024bits(const unsigned char *state, unsigned char *data) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, 128); +#else + unsigned int i; + + for(i=0; i<16; i++) + fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); +#endif +#ifdef UseBebigokimisa + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; +#endif +} +#endif + +void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, laneCount*8); +#else + unsigned int i; + + for(i=0; i 1) { + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + if (laneCount > 2) { + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + if (laneCount > 8) { + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + if (laneCount > 12) { + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; + if (laneCount > 17) { + ((UINT64*)data)[17] = ~((UINT64*)data)[17]; + if (laneCount > 20) { + ((UINT64*)data)[20] = ~((UINT64*)data)[20]; + } + } + } + } + } + } +#endif +} diff --git a/crypto/keccak/KeccakF-1600-simd128.macros b/crypto/keccak/KeccakF-1600-simd128.macros new file mode 100755 index 0000000..6301622 --- /dev/null +++ b/crypto/keccak/KeccakF-1600-simd128.macros @@ -0,0 +1,651 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + V6464 Abage, Abegi, Abigo, Abogu, Abuga; \ + V6464 Akame, Akemi, Akimo, Akomu, Akuma; \ + V6464 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio, Asae, Asio; \ + V64 Aba, Abe, Abi, Abo, Abu; \ + V64 Aga, Age, Agi, Ago, Agu; \ + V64 Aka, Ake, Aki, Ako, Aku; \ + V64 Ama, Ame, Ami, Amo, Amu; \ + V64 Asa, Ase, Asi, Aso, Asu; \ + V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \ + V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \ + V64 Bba, Bbe, Bbi, Bbo, Bbu; \ + V64 Bga, Bge, Bgi, Bgo, Bgu; \ + V64 Bka, Bke, Bki, Bko, Bku; \ + V64 Bma, Bme, Bmi, Bmo, Bmu; \ + V64 Bsa, Bse, Bsi, Bso, Bsu; \ + V128 Cae, Cei, Cio, Cou, Cua, Dei, Dou; \ + V64 Ca, Ce, Ci, Co, Cu; \ + V64 Da, De, Di, Do, Du; \ + V6464 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \ + V6464 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \ + V64 Eba, Ebe, Ebi, Ebo, Ebu; \ + V64 Ega, Ege, Egi, Ego, Egu; \ + V64 Eka, Eke, Eki, Eko, Eku; \ + V64 Ema, Eme, Emi, Emo, Emu; \ + V64 Esa, Ese, Esi, Eso, Esu; \ + V128 Zero; + +#define prepareTheta + +#define computeD \ + Cua = GET64LOLO(Cu, Cae); \ + Dei = XOR128(Cae, ROL64in128(Cio, 1)); \ + Dou = XOR128(Cio, ROL64in128(Cua, 1)); \ + Da = XOR64(Cu, ROL64in128(COPY64HI2LO(Cae), 1)); \ + De = Dei; \ + Di = COPY64HI2LO(Dei); \ + Do = Dou; \ + Du = COPY64HI2LO(Dou); + +// --- Theta Rho Pi Chi Iota Prepare-theta +// --- 64-bit lanes mapped to 64-bit and 128-bit words +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + computeD \ + \ + A##ba = LOAD64(A##bage.v64[0]); \ + XOReq64(A##ba, Da); \ + Bba = A##ba; \ + XOReq64(A##gu, Du); \ + Bge = ROL64(A##gu, 20); \ + Bbage = GET64LOLO(Bba, Bge); \ + A##ge = LOAD64(A##bage.v64[1]); \ + XOReq64(A##ge, De); \ + Bbe = ROL64(A##ge, 44); \ + A##ka = LOAD64(A##kame.v64[0]); \ + XOReq64(A##ka, Da); \ + Bgi = ROL64(A##ka, 3); \ + Bbegi = GET64LOLO(Bbe, Bgi); \ + XOReq64(A##ki, Di); \ + Bbi = ROL64(A##ki, 43); \ + A##me = LOAD64(A##kame.v64[1]); \ + XOReq64(A##me, De); \ + Bgo = ROL64(A##me, 45); \ + Bbigo = GET64LOLO(Bbi, Bgo); \ + E##bage.v128 = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \ + XOReq128(E##bage.v128, CONST64(KeccakF1600RoundConstants[i])); \ + Cae = E##bage.v128; \ + XOReq64(A##mo, Do); \ + Bbo = ROL64(A##mo, 21); \ + XOReq64(A##si, Di); \ + Bgu = ROL64(A##si, 61); \ + Bbogu = GET64LOLO(Bbo, Bgu); \ + E##begi.v128 = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \ + Cei = E##begi.v128; \ + XOReq64(A##su, Du); \ + Bbu = ROL64(A##su, 14); \ + XOReq64(A##bo, Do); \ + Bga = ROL64(A##bo, 28); \ + Bbuga = GET64LOLO(Bbu, Bga); \ + E##bigo.v128 = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \ + E##bi = E##bigo.v128; \ + E##go = GET64HIHI(E##bigo.v128, E##bigo.v128); \ + Cio = E##bigo.v128; \ + E##bogu.v128 = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \ + E##bo = E##bogu.v128; \ + E##gu = GET64HIHI(E##bogu.v128, E##bogu.v128); \ + Cou = E##bogu.v128; \ + E##buga.v128 = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \ + E##bu = E##buga.v128; \ + E##ga = GET64HIHI(E##buga.v128, E##buga.v128); \ + Cua = E##buga.v128; \ +\ + A##be = LOAD64(A##begi.v64[0]); \ + XOReq64(A##be, De); \ + Bka = ROL64(A##be, 1); \ + XOReq64(A##ga, Da); \ + Bme = ROL64(A##ga, 36); \ + Bkame = GET64LOLO(Bka, Bme); \ + A##gi = LOAD64(A##begi.v64[1]); \ + XOReq64(A##gi, Di); \ + Bke = ROL64(A##gi, 6); \ + A##ke = LOAD64(A##kemi.v64[0]); \ + XOReq64(A##ke, De); \ + Bmi = ROL64(A##ke, 10); \ + Bkemi = GET64LOLO(Bke, Bmi); \ + XOReq64(A##ko, Do); \ + Bki = ROL64(A##ko, 25); \ + A##mi = LOAD64(A##kemi.v64[1]); \ + XOReq64(A##mi, Di); \ + Bmo = ROL64(A##mi, 15); \ + Bkimo = GET64LOLO(Bki, Bmo); \ + E##kame.v128 = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \ + XOReq128(Cae, E##kame.v128); \ + Bkomu = GET64LOLO(XOR64(A##mu, Du), XOR64(A##so, Do)); \ + Bkomu = SHUFFLEBYTES128(Bkomu, CONST128(rho8_56)); \ + E##kemi.v128 = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \ + XOReq128(Cei, E##kemi.v128); \ + XOReq64(A##sa, Da); \ + Bku = ROL64(A##sa, 18); \ + XOReq64(A##bu, Du); \ + Bma = ROL64(A##bu, 27); \ + Bkuma = GET64LOLO(Bku, Bma); \ + E##kimo.v128 = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \ + E##ki = E##kimo.v128; \ + E##mo = GET64HIHI(E##kimo.v128, E##kimo.v128); \ + XOReq128(Cio, E##kimo.v128); \ + E##komu.v128 = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \ + E##ko = E##komu.v128; \ + E##mu = GET64HIHI(E##komu.v128, E##komu.v128); \ + XOReq128(Cou, E##komu.v128); \ + E##kuma.v128 = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \ + E##ku = E##kuma.v128; \ + E##ma = GET64HIHI(E##kuma.v128, E##kuma.v128); \ + XOReq128(Cua, E##kuma.v128); \ +\ + XOReq64(A##bi, Di); \ + Bsa = ROL64(A##bi, 62); \ + XOReq64(A##go, Do); \ + Bse = ROL64(A##go, 55); \ + XOReq64(A##ku, Du); \ + Bsi = ROL64(A##ku, 39); \ + E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ + Ca = E##sa; \ + XOReq64(A##ma, Da); \ + Bso = ROL64(A##ma, 41); \ + E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ + Ce = E##se; \ + XOReq128(Cae, GET64LOLO(Ca, Ce)); \ + XOReq64(A##se, De); \ + Bsu = ROL64(A##se, 2); \ + E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ + Ci = E##si; \ + E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ + Co = E##so; \ + XOReq128(Cio, GET64LOLO(Ci, Co)); \ + E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ + Cu = E##su; \ +\ + Zero = ZERO128(); \ + XOReq128(Cae, GET64HIHI(Cua, Zero)); \ + XOReq128(Cae, GET64LOLO(Zero, Cei)); \ + XOReq128(Cio, GET64HIHI(Cei, Zero)); \ + XOReq128(Cio, GET64LOLO(Zero, Cou)); \ + XOReq128(Cua, GET64HIHI(Cou, Zero)); \ + XOReq64(Cu, Cua); \ + +// --- Theta Rho Pi Chi Iota +// --- 64-bit lanes mapped to 64-bit and 128-bit words +#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E) + +const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +#define copyFromStateAndXor576bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = LOAD64(state[ 9]); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = LOAD128(state[10]); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = LOAD128(state[12]); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = LOAD64(state[14]); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = LOAD128u(state[15]); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor832bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD64(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = LOAD64(state[14]); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = LOAD128u(state[15]); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD64(input[15])); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor1152bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor1344bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = XOR128(LOAD128(state[20]), LOAD64(input[20])); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromState(X, state) \ + X##bae.v128 = LOAD128(state[ 0]); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = LOAD128(state[ 2]); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = LOAD64(state[ 4]); \ + Cu = X##bu; \ + X##gae.v128 = LOAD128u(state[ 5]); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = LOAD128u(state[ 7]); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = LOAD64(state[ 9]); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = LOAD128(state[10]); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = LOAD128(state[12]); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = LOAD64(state[14]); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = LOAD128u(state[15]); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyToState(state, X) \ + state[ 0] = A##bage.v64[0]; \ + state[ 1] = A##begi.v64[0]; \ + STORE64(state[ 2], X##bi); \ + STORE64(state[ 3], X##bo); \ + STORE64(state[ 4], X##bu); \ + STORE64(state[ 5], X##ga); \ + state[ 6] = A##bage.v64[1]; \ + state[ 7] = A##begi.v64[1]; \ + STORE64(state[ 8], X##go); \ + STORE64(state[ 9], X##gu); \ + state[10] = X##kame.v64[0]; \ + state[11] = X##kemi.v64[0]; \ + STORE64(state[12], X##ki); \ + STORE64(state[13], X##ko); \ + STORE64(state[14], X##ku); \ + STORE64(state[15], X##ma); \ + state[16] = X##kame.v64[1]; \ + state[17] = X##kemi.v64[1]; \ + STORE64(state[18], X##mo); \ + STORE64(state[19], X##mu); \ + STORE64(state[20], X##sa); \ + STORE64(state[21], X##se); \ + STORE64(state[22], X##si); \ + STORE64(state[23], X##so); \ + STORE64(state[24], X##su); \ + +#define copyStateVariables(X, Y) \ + X##bage = Y##bage; \ + X##begi = Y##begi; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##kame = Y##kame; \ + X##kemi = Y##kemi; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + diff --git a/crypto/keccak/KeccakF-1600-simd64.macros b/crypto/keccak/KeccakF-1600-simd64.macros new file mode 100755 index 0000000..c067304 --- /dev/null +++ b/crypto/keccak/KeccakF-1600-simd64.macros @@ -0,0 +1,517 @@ +/* +Code automatically generated by KeccakTools! + +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + V64 Aba, Abe, Abi, Abo, Abu; \ + V64 Aga, Age, Agi, Ago, Agu; \ + V64 Aka, Ake, Aki, Ako, Aku; \ + V64 Ama, Ame, Ami, Amo, Amu; \ + V64 Asa, Ase, Asi, Aso, Asu; \ + V64 Bba, Bbe, Bbi, Bbo, Bbu; \ + V64 Bga, Bge, Bgi, Bgo, Bgu; \ + V64 Bka, Bke, Bki, Bko, Bku; \ + V64 Bma, Bme, Bmi, Bmo, Bmu; \ + V64 Bsa, Bse, Bsi, Bso, Bsu; \ + V64 Ca, Ce, Ci, Co, Cu; \ + V64 Da, De, Di, Do, Du; \ + V64 Eba, Ebe, Ebi, Ebo, Ebu; \ + V64 Ega, Ege, Egi, Ego, Egu; \ + V64 Eka, Eke, Eki, Eko, Eku; \ + V64 Ema, Eme, Emi, Emo, Emu; \ + V64 Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = XOR64(Aba, XOR64(Aga, XOR64(Aka, XOR64(Ama, Asa)))); \ + Ce = XOR64(Abe, XOR64(Age, XOR64(Ake, XOR64(Ame, Ase)))); \ + Ci = XOR64(Abi, XOR64(Agi, XOR64(Aki, XOR64(Ami, Asi)))); \ + Co = XOR64(Abo, XOR64(Ago, XOR64(Ako, XOR64(Amo, Aso)))); \ + Cu = XOR64(Abu, XOR64(Agu, XOR64(Aku, XOR64(Amu, Asu)))); \ + +// --- Code for round, with prepare-theta +// --- 64-bit lanes mapped to 64-bit words +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = XOR64(Cu, ROL64(Ce, 1)); \ + De = XOR64(Ca, ROL64(Ci, 1)); \ + Di = XOR64(Ce, ROL64(Co, 1)); \ + Do = XOR64(Ci, ROL64(Cu, 1)); \ + Du = XOR64(Co, ROL64(Ca, 1)); \ +\ + XOReq64(A##ba, Da); \ + Bba = A##ba; \ + XOReq64(A##ge, De); \ + Bbe = ROL64(A##ge, 44); \ + XOReq64(A##ki, Di); \ + Bbi = ROL64(A##ki, 43); \ + E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \ + XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \ + Ca = E##ba; \ + XOReq64(A##mo, Do); \ + Bbo = ROL64(A##mo, 21); \ + E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \ + Ce = E##be; \ + XOReq64(A##su, Du); \ + Bbu = ROL64(A##su, 14); \ + E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \ + Ci = E##bi; \ + E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \ + Co = E##bo; \ + E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \ + Cu = E##bu; \ +\ + XOReq64(A##bo, Do); \ + Bga = ROL64(A##bo, 28); \ + XOReq64(A##gu, Du); \ + Bge = ROL64(A##gu, 20); \ + XOReq64(A##ka, Da); \ + Bgi = ROL64(A##ka, 3); \ + E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \ + XOReq64(Ca, E##ga); \ + XOReq64(A##me, De); \ + Bgo = ROL64(A##me, 45); \ + E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \ + XOReq64(Ce, E##ge); \ + XOReq64(A##si, Di); \ + Bgu = ROL64(A##si, 61); \ + E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \ + XOReq64(Ci, E##gi); \ + E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \ + XOReq64(Co, E##go); \ + E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \ + XOReq64(Cu, E##gu); \ +\ + XOReq64(A##be, De); \ + Bka = ROL64(A##be, 1); \ + XOReq64(A##gi, Di); \ + Bke = ROL64(A##gi, 6); \ + XOReq64(A##ko, Do); \ + Bki = ROL64(A##ko, 25); \ + E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \ + XOReq64(Ca, E##ka); \ + XOReq64(A##mu, Du); \ + Bko = ROL64(A##mu, 8); \ + E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \ + XOReq64(Ce, E##ke); \ + XOReq64(A##sa, Da); \ + Bku = ROL64(A##sa, 18); \ + E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \ + XOReq64(Ci, E##ki); \ + E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \ + XOReq64(Co, E##ko); \ + E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \ + XOReq64(Cu, E##ku); \ +\ + XOReq64(A##bu, Du); \ + Bma = ROL64(A##bu, 27); \ + XOReq64(A##ga, Da); \ + Bme = ROL64(A##ga, 36); \ + XOReq64(A##ke, De); \ + Bmi = ROL64(A##ke, 10); \ + E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \ + XOReq64(Ca, E##ma); \ + XOReq64(A##mi, Di); \ + Bmo = ROL64(A##mi, 15); \ + E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \ + XOReq64(Ce, E##me); \ + XOReq64(A##so, Do); \ + Bmu = ROL64(A##so, 56); \ + E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \ + XOReq64(Ci, E##mi); \ + E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \ + XOReq64(Co, E##mo); \ + E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \ + XOReq64(Cu, E##mu); \ +\ + XOReq64(A##bi, Di); \ + Bsa = ROL64(A##bi, 62); \ + XOReq64(A##go, Do); \ + Bse = ROL64(A##go, 55); \ + XOReq64(A##ku, Du); \ + Bsi = ROL64(A##ku, 39); \ + E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ + XOReq64(Ca, E##sa); \ + XOReq64(A##ma, Da); \ + Bso = ROL64(A##ma, 41); \ + E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ + XOReq64(Ce, E##se); \ + XOReq64(A##se, De); \ + Bsu = ROL64(A##se, 2); \ + E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ + XOReq64(Ci, E##si); \ + E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ + XOReq64(Co, E##so); \ + E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ + XOReq64(Cu, E##su); \ +\ + +// --- Code for round +// --- 64-bit lanes mapped to 64-bit words +#define thetaRhoPiChiIota(i, A, E) \ + Da = XOR64(Cu, ROL64(Ce, 1)); \ + De = XOR64(Ca, ROL64(Ci, 1)); \ + Di = XOR64(Ce, ROL64(Co, 1)); \ + Do = XOR64(Ci, ROL64(Cu, 1)); \ + Du = XOR64(Co, ROL64(Ca, 1)); \ +\ + XOReq64(A##ba, Da); \ + Bba = A##ba; \ + XOReq64(A##ge, De); \ + Bbe = ROL64(A##ge, 44); \ + XOReq64(A##ki, Di); \ + Bbi = ROL64(A##ki, 43); \ + E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \ + XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \ + XOReq64(A##mo, Do); \ + Bbo = ROL64(A##mo, 21); \ + E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \ + XOReq64(A##su, Du); \ + Bbu = ROL64(A##su, 14); \ + E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \ + E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \ + E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \ +\ + XOReq64(A##bo, Do); \ + Bga = ROL64(A##bo, 28); \ + XOReq64(A##gu, Du); \ + Bge = ROL64(A##gu, 20); \ + XOReq64(A##ka, Da); \ + Bgi = ROL64(A##ka, 3); \ + E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \ + XOReq64(A##me, De); \ + Bgo = ROL64(A##me, 45); \ + E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \ + XOReq64(A##si, Di); \ + Bgu = ROL64(A##si, 61); \ + E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \ + E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \ + E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \ +\ + XOReq64(A##be, De); \ + Bka = ROL64(A##be, 1); \ + XOReq64(A##gi, Di); \ + Bke = ROL64(A##gi, 6); \ + XOReq64(A##ko, Do); \ + Bki = ROL64(A##ko, 25); \ + E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \ + XOReq64(A##mu, Du); \ + Bko = ROL64(A##mu, 8); \ + E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \ + XOReq64(A##sa, Da); \ + Bku = ROL64(A##sa, 18); \ + E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \ + E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \ + E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \ +\ + XOReq64(A##bu, Du); \ + Bma = ROL64(A##bu, 27); \ + XOReq64(A##ga, Da); \ + Bme = ROL64(A##ga, 36); \ + XOReq64(A##ke, De); \ + Bmi = ROL64(A##ke, 10); \ + E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \ + XOReq64(A##mi, Di); \ + Bmo = ROL64(A##mi, 15); \ + E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \ + XOReq64(A##so, Do); \ + Bmu = ROL64(A##so, 56); \ + E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \ + E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \ + E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \ +\ + XOReq64(A##bi, Di); \ + Bsa = ROL64(A##bi, 62); \ + XOReq64(A##go, Do); \ + Bse = ROL64(A##go, 55); \ + XOReq64(A##ku, Du); \ + Bsi = ROL64(A##ku, 39); \ + E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ + XOReq64(A##ma, Da); \ + Bso = ROL64(A##ma, 41); \ + E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ + XOReq64(A##se, De); \ + Bsu = ROL64(A##se, 2); \ + E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ + E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ + E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ +\ + +const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +#define copyFromStateAndXor576bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = LOAD64(state[ 9]); \ + X##ka = LOAD64(state[10]); \ + X##ke = LOAD64(state[11]); \ + X##ki = LOAD64(state[12]); \ + X##ko = LOAD64(state[13]); \ + X##ku = LOAD64(state[14]); \ + X##ma = LOAD64(state[15]); \ + X##me = LOAD64(state[16]); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor832bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = LOAD64(state[13]); \ + X##ku = LOAD64(state[14]); \ + X##ma = LOAD64(state[15]); \ + X##me = LOAD64(state[16]); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ + X##me = LOAD64(state[16]); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor1152bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor1344bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \ + X##mo = XOR64(LOAD64(state[18]), LOAD64(input[18])); \ + X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \ + X##sa = XOR64(LOAD64(state[20]), LOAD64(input[20])); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromState(X, state) \ + X##ba = LOAD64(state[ 0]); \ + X##be = LOAD64(state[ 1]); \ + X##bi = LOAD64(state[ 2]); \ + X##bo = LOAD64(state[ 3]); \ + X##bu = LOAD64(state[ 4]); \ + X##ga = LOAD64(state[ 5]); \ + X##ge = LOAD64(state[ 6]); \ + X##gi = LOAD64(state[ 7]); \ + X##go = LOAD64(state[ 8]); \ + X##gu = LOAD64(state[ 9]); \ + X##ka = LOAD64(state[10]); \ + X##ke = LOAD64(state[11]); \ + X##ki = LOAD64(state[12]); \ + X##ko = LOAD64(state[13]); \ + X##ku = LOAD64(state[14]); \ + X##ma = LOAD64(state[15]); \ + X##me = LOAD64(state[16]); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyToState(state, X) \ + STORE64(state[ 0], X##ba); \ + STORE64(state[ 1], X##be); \ + STORE64(state[ 2], X##bi); \ + STORE64(state[ 3], X##bo); \ + STORE64(state[ 4], X##bu); \ + STORE64(state[ 5], X##ga); \ + STORE64(state[ 6], X##ge); \ + STORE64(state[ 7], X##gi); \ + STORE64(state[ 8], X##go); \ + STORE64(state[ 9], X##gu); \ + STORE64(state[10], X##ka); \ + STORE64(state[11], X##ke); \ + STORE64(state[12], X##ki); \ + STORE64(state[13], X##ko); \ + STORE64(state[14], X##ku); \ + STORE64(state[15], X##ma); \ + STORE64(state[16], X##me); \ + STORE64(state[17], X##mi); \ + STORE64(state[18], X##mo); \ + STORE64(state[19], X##mu); \ + STORE64(state[20], X##sa); \ + STORE64(state[21], X##se); \ + STORE64(state[22], X##si); \ + STORE64(state[23], X##so); \ + STORE64(state[24], X##su); \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + diff --git a/crypto/keccak/KeccakF-1600-unrolling.macros b/crypto/keccak/KeccakF-1600-unrolling.macros new file mode 100755 index 0000000..83c694c --- /dev/null +++ b/crypto/keccak/KeccakF-1600-unrolling.macros @@ -0,0 +1,124 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#if (Unrolling == 24) +#define rounds \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(11, E, A) \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + copyToState(state, A) +#elif (Unrolling == 12) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=12) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 8) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=8) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+7, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 6) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 4) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 3) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + copyToState(state, A) +#elif (Unrolling == 2) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 1) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + copyToState(state, A) +#else +#error "Unrolling is not correctly specified!" +#endif diff --git a/crypto/keccak/KeccakF-1600-x86-64-asm.c b/crypto/keccak/KeccakF-1600-x86-64-asm.c new file mode 100755 index 0000000..68fb9bd --- /dev/null +++ b/crypto/keccak/KeccakF-1600-x86-64-asm.c @@ -0,0 +1,62 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by Ronny Van Keer, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakF-1600-interface.h" + +#define UseBebigokimisa + +typedef unsigned char UINT8; +typedef unsigned long long int UINT64; + +void KeccakInitialize() +{ +} + +void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) +{ + memcpy(data, state, laneCount*8); +#ifdef UseBebigokimisa + if (laneCount > 8) + { + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + + if (laneCount > 12) + { + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; + if (laneCount > 17) + { + ((UINT64*)data)[17] = ~((UINT64*)data)[17]; + if (laneCount > 20) + { + ((UINT64*)data)[20] = ~((UINT64*)data)[20]; + } + } + } + } + else + { + if (laneCount > 1) + { + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + if (laneCount > 2) + { + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + } + } + } + +#endif +} diff --git a/crypto/keccak/KeccakF-1600-x86-64-gas.s b/crypto/keccak/KeccakF-1600-x86-64-gas.s new file mode 100755 index 0000000..289a84e --- /dev/null +++ b/crypto/keccak/KeccakF-1600-x86-64-gas.s @@ -0,0 +1,766 @@ +# +# The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +# Michaël Peeters and Gilles Van Assche. For more information, feedback or +# questions, please refer to our website: http://keccak.noekeon.org/ +# +# Implementation by Ronny Van Keer, +# hereby denoted as "the implementer". +# +# To the extent possible under law, the implementer has waived all copyright +# and related or neighboring rights to the source code in this file. +# http://creativecommons.org/publicdomain/zero/1.0/ +# + + .text + + +#// --- defines + +.equ UseSIMD, 1 + + +.equ _ba, 0*8 +.equ _be, 1*8 +.equ _bi, 2*8 +.equ _bo, 3*8 +.equ _bu, 4*8 +.equ _ga, 5*8 +.equ _ge, 6*8 +.equ _gi, 7*8 +.equ _go, 8*8 +.equ _gu, 9*8 +.equ _ka, 10*8 +.equ _ke, 11*8 +.equ _ki, 12*8 +.equ _ko, 13*8 +.equ _ku, 14*8 +.equ _ma, 15*8 +.equ _me, 16*8 +.equ _mi, 17*8 +.equ _mo, 18*8 +.equ _mu, 19*8 +.equ _sa, 20*8 +.equ _se, 21*8 +.equ _si, 22*8 +.equ _so, 23*8 +.equ _su, 24*8 + + +# arguments +.equ apState, %rdi +.equ apInput, %rsi +.equ aNbrWords, %rdx + +# xor input into state section +.equ xpState, %r9 + +# round vars +.equ rT1, %rax +.equ rpState, %rdi +.equ rpStack, %rsp + +.equ rDa, %rbx +.equ rDe, %rcx +.equ rDi, %rdx +.equ rDo, %r8 +.equ rDu, %r9 + +.equ rBa, %r10 +.equ rBe, %r11 +.equ rBi, %r12 +.equ rBo, %r13 +.equ rBu, %r14 + +.equ rCa, %rsi +.equ rCe, %rbp +.equ rCi, rBi +.equ rCo, rBo +.equ rCu, %r15 + +.macro mKeccakRound iState, oState, rc, lastRound + + movq rCe, rDa + rolq rDa + + movq _bi(\iState), rCi + xorq _gi(\iState), rDi + xorq rCu, rDa + xorq _ki(\iState), rCi + xorq _mi(\iState), rDi + xorq rDi, rCi + + movq rCi, rDe + rolq rDe + + movq _bo(\iState), rCo + xorq _go(\iState), rDo + xorq rCa, rDe + xorq _ko(\iState), rCo + xorq _mo(\iState), rDo + xorq rDo, rCo + + movq rCo, rDi + rolq rDi + + movq rCu, rDo + xorq rCe, rDi + rolq rDo + + movq rCa, rDu + xorq rCi, rDo + rolq rDu + + movq _ba(\iState), rBa + movq _ge(\iState), rBe + xorq rCo, rDu + movq _ki(\iState), rBi + movq _mo(\iState), rBo + movq _su(\iState), rBu + xorq rDe, rBe + rolq $44, rBe + xorq rDi, rBi + xorq rDa, rBa + rolq $43, rBi + + movq rBe, rCa + movq $\rc, rT1 + orq rBi, rCa + xorq rBa, rT1 + xorq rT1, rCa + movq rCa, _ba(\oState) + + xorq rDu, rBu + rolq $14, rBu + movq rBa, rCu + andq rBe, rCu + xorq rBu, rCu + movq rCu, _bu(\oState) + + xorq rDo, rBo + rolq $21, rBo + movq rBo, rT1 + andq rBu, rT1 + xorq rBi, rT1 + movq rT1, _bi(\oState) + + notq rBi + orq rBa, rBu + orq rBo, rBi + xorq rBo, rBu + xorq rBe, rBi + movq rBu, _bo(\oState) + movq rBi, _be(\oState) + .if \lastRound == 0 + movq rBi, rCe + .endif + + + movq _gu(\iState), rBe + xorq rDu, rBe + movq _ka(\iState), rBi + rolq $20, rBe + xorq rDa, rBi + rolq $3, rBi + movq _bo(\iState), rBa + movq rBe, rT1 + orq rBi, rT1 + xorq rDo, rBa + movq _me(\iState), rBo + movq _si(\iState), rBu + rolq $28, rBa + xorq rBa, rT1 + movq rT1, _ga(\oState) + .if \lastRound == 0 + xor rT1, rCa + .endif + + xorq rDe, rBo + rolq $45, rBo + movq rBi, rT1 + andq rBo, rT1 + xorq rBe, rT1 + movq rT1, _ge(\oState) + .if \lastRound == 0 + xorq rT1, rCe + .endif + + xorq rDi, rBu + rolq $61, rBu + movq rBu, rT1 + orq rBa, rT1 + xorq rBo, rT1 + movq rT1, _go(\oState) + + andq rBe, rBa + xorq rBu, rBa + movq rBa, _gu(\oState) + notq rBu + .if \lastRound == 0 + xorq rBa, rCu + .endif + + orq rBu, rBo + xorq rBi, rBo + movq rBo, _gi(\oState) + + + movq _be(\iState), rBa + movq _gi(\iState), rBe + movq _ko(\iState), rBi + movq _mu(\iState), rBo + movq _sa(\iState), rBu + xorq rDi, rBe + rolq $6, rBe + xorq rDo, rBi + rolq $25, rBi + movq rBe, rT1 + orq rBi, rT1 + xorq rDe, rBa + rolq $1, rBa + xorq rBa, rT1 + movq rT1, _ka(\oState) + .if \lastRound == 0 + xor rT1, rCa + .endif + + xorq rDu, rBo + rolq $8, rBo + movq rBi, rT1 + andq rBo, rT1 + xorq rBe, rT1 + movq rT1, _ke(\oState) + .if \lastRound == 0 + xorq rT1, rCe + .endif + + xorq rDa, rBu + rolq $18, rBu + notq rBo + movq rBo, rT1 + andq rBu, rT1 + xorq rBi, rT1 + movq rT1, _ki(\oState) + + movq rBu, rT1 + orq rBa, rT1 + xorq rBo, rT1 + movq rT1, _ko(\oState) + + andq rBe, rBa + xorq rBu, rBa + movq rBa, _ku(\oState) + .if \lastRound == 0 + xorq rBa, rCu + .endif + + movq _ga(\iState), rBe + xorq rDa, rBe + movq _ke(\iState), rBi + rolq $36, rBe + xorq rDe, rBi + movq _bu(\iState), rBa + rolq $10, rBi + movq rBe, rT1 + movq _mi(\iState), rBo + andq rBi, rT1 + xorq rDu, rBa + movq _so(\iState), rBu + rolq $27, rBa + xorq rBa, rT1 + movq rT1, _ma(\oState) + .if \lastRound == 0 + xor rT1, rCa + .endif + + xorq rDi, rBo + rolq $15, rBo + movq rBi, rT1 + orq rBo, rT1 + xorq rBe, rT1 + movq rT1, _me(\oState) + .if \lastRound == 0 + xorq rT1, rCe + .endif + + xorq rDo, rBu + rolq $56, rBu + notq rBo + movq rBo, rT1 + orq rBu, rT1 + xorq rBi, rT1 + movq rT1, _mi(\oState) + + orq rBa, rBe + xorq rBu, rBe + movq rBe, _mu(\oState) + + andq rBa, rBu + xorq rBo, rBu + movq rBu, _mo(\oState) + .if \lastRound == 0 + xorq rBe, rCu + .endif + + + movq _bi(\iState), rBa + movq _go(\iState), rBe + movq _ku(\iState), rBi + xorq rDi, rBa + movq _ma(\iState), rBo + rolq $62, rBa + xorq rDo, rBe + movq _se(\iState), rBu + rolq $55, rBe + + xorq rDu, rBi + movq rBa, rDu + xorq rDe, rBu + rolq $2, rBu + andq rBe, rDu + xorq rBu, rDu + movq rDu, _su(\oState) + + rolq $39, rBi + .if \lastRound == 0 + xorq rDu, rCu + .endif + notq rBe + xorq rDa, rBo + movq rBe, rDa + andq rBi, rDa + xorq rBa, rDa + movq rDa, _sa(\oState) + .if \lastRound == 0 + xor rDa, rCa + .endif + + rolq $41, rBo + movq rBi, rDe + orq rBo, rDe + xorq rBe, rDe + movq rDe, _se(\oState) + .if \lastRound == 0 + xorq rDe, rCe + .endif + + movq rBo, rDi + movq rBu, rDo + andq rBu, rDi + orq rBa, rDo + xorq rBi, rDi + xorq rBo, rDo + movq rDi, _si(\oState) + movq rDo, _so(\oState) + + .endm + +.macro mKeccakPermutation + + subq $8*25, %rsp + + movq _ba(rpState), rCa + movq _be(rpState), rCe + movq _bu(rpState), rCu + + xorq _ga(rpState), rCa + xorq _ge(rpState), rCe + xorq _gu(rpState), rCu + + xorq _ka(rpState), rCa + xorq _ke(rpState), rCe + xorq _ku(rpState), rCu + + xorq _ma(rpState), rCa + xorq _me(rpState), rCe + xorq _mu(rpState), rCu + + xorq _sa(rpState), rCa + xorq _se(rpState), rCe + movq _si(rpState), rDi + movq _so(rpState), rDo + xorq _su(rpState), rCu + + + mKeccakRound rpState, rpStack, 0x0000000000000001, 0 + mKeccakRound rpStack, rpState, 0x0000000000008082, 0 + mKeccakRound rpState, rpStack, 0x800000000000808a, 0 + mKeccakRound rpStack, rpState, 0x8000000080008000, 0 + mKeccakRound rpState, rpStack, 0x000000000000808b, 0 + mKeccakRound rpStack, rpState, 0x0000000080000001, 0 + + mKeccakRound rpState, rpStack, 0x8000000080008081, 0 + mKeccakRound rpStack, rpState, 0x8000000000008009, 0 + mKeccakRound rpState, rpStack, 0x000000000000008a, 0 + mKeccakRound rpStack, rpState, 0x0000000000000088, 0 + mKeccakRound rpState, rpStack, 0x0000000080008009, 0 + mKeccakRound rpStack, rpState, 0x000000008000000a, 0 + + mKeccakRound rpState, rpStack, 0x000000008000808b, 0 + mKeccakRound rpStack, rpState, 0x800000000000008b, 0 + mKeccakRound rpState, rpStack, 0x8000000000008089, 0 + mKeccakRound rpStack, rpState, 0x8000000000008003, 0 + mKeccakRound rpState, rpStack, 0x8000000000008002, 0 + mKeccakRound rpStack, rpState, 0x8000000000000080, 0 + + mKeccakRound rpState, rpStack, 0x000000000000800a, 0 + mKeccakRound rpStack, rpState, 0x800000008000000a, 0 + mKeccakRound rpState, rpStack, 0x8000000080008081, 0 + mKeccakRound rpStack, rpState, 0x8000000000008080, 0 + mKeccakRound rpState, rpStack, 0x0000000080000001, 0 + mKeccakRound rpStack, rpState, 0x8000000080008008, 1 + + addq $8*25, %rsp + + .endm + +.macro mPushRegs + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + .endm + + +.macro mPopRegs + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + + .endm + + +.macro mXorState128 input, state, offset + .if UseSIMD == 0 + movq \offset(\input), %rax + movq \offset+8(\input), %rcx + xorq %rax, \offset(\state) + xorq %rcx, \offset+8(\state) + .else + movdqu \offset(\input), %xmm0 + pxor \offset(\state), %xmm0 + movdqu %xmm0, \offset(\state) + .endif + .endm + +.macro mXorState256 input, state, offset + .if UseSIMD == 0 + movq \offset(\input), %rax + movq \offset+8(\input), %r10 + movq \offset+16(\input), %rcx + movq \offset+24(\input), %r8 + xorq %rax, \offset(\state) + xorq %r10, \offset+8(\state) + xorq %rcx, \offset+16(\state) + xorq %r8, \offset+24(\state) + .else + movdqu \offset(\input), %xmm0 + pxor \offset(\state), %xmm0 + movdqu \offset+16(\input), %xmm1 + pxor \offset+16(\state), %xmm1 + movdqu %xmm0, \offset(\state) + movdqu %xmm1, \offset+16(\state) + .endif + .endm + +.macro mXorState512 input, state, offset + .if UseSIMD == 0 + mXorState256 \input, \state, \offset + mXorState256 \input, \state, \offset+32 + .else + movdqu \offset(\input), %xmm0 + movdqu \offset+16(\input), %xmm1 + pxor \offset(\state), %xmm0 + movdqu \offset+32(\input), %xmm2 + pxor \offset+16(\state), %xmm1 + movdqu %xmm0, \offset(\state) + movdqu \offset+48(\input), %xmm3 + pxor \offset+32(\state), %xmm2 + movdqu %xmm1, \offset+16(\state) + pxor \offset+48(\state), %xmm3 + movdqu %xmm2, \offset+32(\state) + movdqu %xmm3, \offset+48(\state) + .endif + .endm + +# ------------------------------------------------------------------------- + + .size KeccakPermutation, .-KeccakPermutation + .align 2 + .global KeccakPermutation + .type KeccakPermutation, %function +KeccakPermutation: + + mPushRegs + mKeccakPermutation + mPopRegs + ret + +# ------------------------------------------------------------------------- + + .size KeccakAbsorb576bits, .-KeccakAbsorb576bits + .align 2 + .global KeccakAbsorb576bits + .type KeccakAbsorb576bits, %function +KeccakAbsorb576bits: + + mXorState512 apInput, apState, 0 + movq 64(apInput), %rax + xorq %rax, 64(apState) + mPushRegs + mKeccakPermutation + mPopRegs + ret + +# ------------------------------------------------------------------------- + + .size KeccakAbsorb832bits, .-KeccakAbsorb832bits + .align 2 + .global KeccakAbsorb832bits + .type KeccakAbsorb832bits, %function +KeccakAbsorb832bits: + + mXorState512 apInput, apState, 0 + mXorState256 apInput, apState, 64 + movq 96(apInput), %rax + xorq %rax, 96(apState) + mPushRegs + mKeccakPermutation + mPopRegs + ret + +# ------------------------------------------------------------------------- + + .size KeccakAbsorb1024bits, .-KeccakAbsorb1024bits + .align 2 + .global KeccakAbsorb1024bits + .type KeccakAbsorb1024bits, %function +KeccakAbsorb1024bits: + + mXorState512 apInput, apState, 0 + mXorState512 apInput, apState, 64 + mPushRegs + mKeccakPermutation + mPopRegs + ret + +# ------------------------------------------------------------------------- + + .size KeccakAbsorb1088bits, .-KeccakAbsorb1088bits + .align 2 + .global KeccakAbsorb1088bits + .type KeccakAbsorb1088bits, %function +KeccakAbsorb1088bits: + + mXorState512 apInput, apState, 0 + mXorState512 apInput, apState, 64 + movq 128(apInput), %rax + xorq %rax, 128(apState) + mPushRegs + mKeccakPermutation + mPopRegs + ret + +# ------------------------------------------------------------------------- + + .size KeccakAbsorb1152bits, .-KeccakAbsorb1152bits + .align 2 + .global KeccakAbsorb1152bits + .type KeccakAbsorb1152bits, %function +KeccakAbsorb1152bits: + + mXorState512 apInput, apState, 0 + mXorState512 apInput, apState, 64 + mXorState128 apInput, apState, 128 + mPushRegs + mKeccakPermutation + mPopRegs + ret + +# ------------------------------------------------------------------------- + + .size KeccakAbsorb1344bits, .-KeccakAbsorb1344bits + .align 2 + .global KeccakAbsorb1344bits + .type KeccakAbsorb1344bits, %function +KeccakAbsorb1344bits: + + mXorState512 apInput, apState, 0 + mXorState512 apInput, apState, 64 + mXorState256 apInput, apState, 128 + movq 160(apInput), %rax + xorq %rax, 160(apState) + mPushRegs + mKeccakPermutation + mPopRegs + ret + +# ------------------------------------------------------------------------- + + .size KeccakAbsorb, .-KeccakAbsorb + .align 2 + .global KeccakAbsorb + .type KeccakAbsorb, %function +KeccakAbsorb: + + movq apState, xpState + + test $16, aNbrWords + jz xorInputToState8 + mXorState512 apInput, xpState, 0 + mXorState512 apInput, xpState, 64 + addq $128, apInput + addq $128, xpState + +xorInputToState8: + test $8, aNbrWords + jz xorInputToState4 + mXorState512 apInput, xpState, 0 + addq $64, apInput + addq $64, xpState + +xorInputToState4: + test $4, aNbrWords + jz xorInputToState2 + mXorState256 apInput, xpState, 0 + addq $32, apInput + addq $32, xpState + +xorInputToState2: + test $2, aNbrWords + jz xorInputToState1 + mXorState128 apInput, xpState, 0 + addq $16, apInput + addq $16, xpState + +xorInputToState1: + test $1, aNbrWords + jz xorInputToStateDone + movq (apInput), %rax + xorq %rax, (xpState) + +xorInputToStateDone: + + mPushRegs + mKeccakPermutation + mPopRegs + ret + +# ------------------------------------------------------------------------- + + .size KeccakInitializeState, .-KeccakInitializeState + .align 2 + .global KeccakInitializeState + .type KeccakInitializeState, %function +KeccakInitializeState: + xorq %rax, %rax + xorq %rcx, %rcx + notq %rcx + + .if UseSIMD == 0 + movq %rax, 0*8(apState) + movq %rcx, 1*8(apState) + movq %rcx, 2*8(apState) + movq %rax, 3*8(apState) + movq %rax, 4*8(apState) + movq %rax, 5*8(apState) + movq %rax, 6*8(apState) + movq %rax, 7*8(apState) + movq %rcx, 8*8(apState) + movq %rax, 9*8(apState) + movq %rax, 10*8(apState) + movq %rax, 11*8(apState) + movq %rcx, 12*8(apState) + movq %rax, 13*8(apState) + movq %rax, 14*8(apState) + movq %rax, 15*8(apState) + movq %rax, 16*8(apState) + movq %rcx, 17*8(apState) + movq %rax, 18*8(apState) + movq %rax, 19*8(apState) + movq %rcx, 20*8(apState) + movq %rax, 21*8(apState) + movq %rax, 22*8(apState) + movq %rax, 23*8(apState) + movq %rax, 24*8(apState) + .else + pxor %xmm0, %xmm0 + + movq %rax, 0*8(apState) + movq %rcx, 1*8(apState) + movq %rcx, 2*8(apState) + movq %rax, 3*8(apState) + movdqu %xmm0, 4*8(apState) + movdqu %xmm0, 6*8(apState) + movq %rcx, 8*8(apState) + movq %rax, 9*8(apState) + movdqu %xmm0, 10*8(apState) + movq %rcx, 12*8(apState) + movq %rax, 13*8(apState) + movdqu %xmm0, 14*8(apState) + movq %rax, 16*8(apState) + movq %rcx, 17*8(apState) + movdqu %xmm0, 18*8(apState) + movq %rcx, 20*8(apState) + movq %rax, 21*8(apState) + movdqu %xmm0, 22*8(apState) + movq %rax, 24*8(apState) + .endif + ret + +# ------------------------------------------------------------------------- + + .size KeccakExtract1024bits, .-KeccakExtract1024bits + .align 2 + .global KeccakExtract1024bits + .type KeccakExtract1024bits, %function +KeccakExtract1024bits: + + movq 0*8(apState), %rax + movq 1*8(apState), %rcx + movq 2*8(apState), %rdx + movq 3*8(apState), %r8 + notq %rcx + notq %rdx + movq %rax, 0*8(%rsi) + movq %rcx, 1*8(%rsi) + movq %rdx, 2*8(%rsi) + movq %r8, 3*8(%rsi) + + movq 4*8(apState), %rax + movq 5*8(apState), %rcx + movq 6*8(apState), %rdx + movq 7*8(apState), %r8 + movq %rax, 4*8(%rsi) + movq %rcx, 5*8(%rsi) + movq %rdx, 6*8(%rsi) + movq %r8, 7*8(%rsi) + + movq 8*8(apState), %rax + movq 9*8(apState), %rcx + movq 10*8(apState), %rdx + movq 11*8(apState), %r8 + notq %rax + movq %rax, 8*8(%rsi) + movq %rcx, 9*8(%rsi) + movq %rdx, 10*8(%rsi) + movq %r8, 11*8(%rsi) + + movq 12*8(apState), %rax + movq 13*8(apState), %rcx + movq 14*8(apState), %rdx + movq 15*8(apState), %r8 + notq %rax + movq %rax, 12*8(%rsi) + movq %rcx, 13*8(%rsi) + movq %rdx, 14*8(%rsi) + movq %r8, 15*8(%rsi) + ret + diff --git a/crypto/keccak/KeccakNISTInterface.c b/crypto/keccak/KeccakNISTInterface.c new file mode 100755 index 0000000..ee4ae17 --- /dev/null +++ b/crypto/keccak/KeccakNISTInterface.c @@ -0,0 +1,81 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakNISTInterface.h" +#include "KeccakF-1600-interface.h" + +HashReturn Keccak_Init(hashState *state, int hashbitlen) +{ + switch(hashbitlen) { + case 0: // Default parameters, arbitrary length output + InitSponge((spongeState*)state, 1024, 576); + break; + case 224: + InitSponge((spongeState*)state, 1152, 448); + break; + case 256: + InitSponge((spongeState*)state, 1088, 512); + break; + case 384: + InitSponge((spongeState*)state, 832, 768); + break; + case 512: + InitSponge((spongeState*)state, 576, 1024); + break; + default: + return BAD_HASHLEN; + } + state->fixedOutputLength = hashbitlen; + return SUCCESS; +} + +HashReturn Keccak_Update(hashState *state, const BitSequence *data, DataLength databitlen) +{ + if ((databitlen % 8) == 0) + return Absorb((spongeState*)state, data, databitlen); + else { + HashReturn ret = Absorb((spongeState*)state, data, databitlen - (databitlen % 8)); + if (ret == SUCCESS) { + unsigned char lastByte; + // Align the last partial byte to the least significant bits + lastByte = data[databitlen/8] >> (8 - (databitlen % 8)); + return Absorb((spongeState*)state, &lastByte, databitlen % 8); + } + else + return ret; + } +} + +HashReturn Keccak_Final(hashState *state, BitSequence *hashval) +{ + return Squeeze(state, hashval, state->fixedOutputLength); +} + +HashReturn Keccak_Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval) +{ + hashState state; + HashReturn result; + + if ((hashbitlen != 224) && (hashbitlen != 256) && (hashbitlen != 384) && (hashbitlen != 512)) + return BAD_HASHLEN; // Only the four fixed output lengths available through this API + result = Keccak_Init(&state, hashbitlen); + if (result != SUCCESS) + return result; + result = Keccak_Update(&state, data, databitlen); + if (result != SUCCESS) + return result; + result = Keccak_Final(&state, hashval); + return result; +} + diff --git a/crypto/keccak/KeccakNISTInterface.h b/crypto/keccak/KeccakNISTInterface.h new file mode 100755 index 0000000..a8f3c93 --- /dev/null +++ b/crypto/keccak/KeccakNISTInterface.h @@ -0,0 +1,70 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakNISTInterface_h_ +#define _KeccakNISTInterface_h_ + +#include "KeccakSponge.h" + +typedef unsigned char BitSequence; +typedef unsigned long long DataLength; +typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn; + +typedef spongeState hashState; + +/** + * Function to initialize the state of the Keccak[r, c] sponge function. + * The rate r and capacity c values are determined from @a hashbitlen. + * @param state Pointer to the state of the sponge function to be initialized. + * @param hashbitlen The desired number of output bits, + * or 0 for Keccak[] with default parameters + * and arbitrarily-long output. + * @pre The value of hashbitlen must be one of 0, 224, 256, 384 and 512. + * @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect. + */ +HashReturn Keccak_Init(hashState *state, int hashbitlen); +/** + * Function to give input data for the sponge function to absorb. + * @param state Pointer to the state of the sponge function initialized by Init(). + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the most significant bits of the last byte. + * @param databitLen The number of input bits provided in the input data. + * @pre In the previous call to Absorb(), databitLen was a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_Update(hashState *state, const BitSequence *data, DataLength databitlen); +/** + * Function to squeeze output data from the sponge function. + * If @a hashbitlen was not 0 in the call to Init(), the number of output bits is equal to @a hashbitlen. + * If @a hashbitlen was 0 in the call to Init(), the output bits must be extracted using the Squeeze() function. + * @param state Pointer to the state of the sponge function initialized by Init(). + * @param hashval Pointer to the buffer where to store the output data. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_Final(hashState *state, BitSequence *hashval); +/** + * Function to compute a hash using the Keccak[r, c] sponge function. + * The rate r and capacity c values are determined from @a hashbitlen. + * @param hashbitlen The desired number of output bits. + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the most significant bits of the last byte. + * @param databitLen The number of input bits provided in the input data. + * @param hashval Pointer to the buffer where to store the output data. + * @pre The value of hashbitlen must be one of 224, 256, 384 and 512. + * @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect. + */ +HashReturn Keccak_Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval); + +#endif diff --git a/crypto/keccak/KeccakSponge.c b/crypto/keccak/KeccakSponge.c new file mode 100755 index 0000000..5939ba4 --- /dev/null +++ b/crypto/keccak/KeccakSponge.c @@ -0,0 +1,266 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakSponge.h" +#include "KeccakF-1600-interface.h" +#ifdef KeccakReference +#include "displayIntermediateValues.h" +#endif + +int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity) +{ + if (rate+capacity != 1600) + return 1; + if ((rate <= 0) || (rate >= 1600) || ((rate % 64) != 0)) + return 1; + KeccakInitialize(); + state->rate = rate; + state->capacity = capacity; + state->fixedOutputLength = 0; + KeccakInitializeState(state->state); + memset(state->dataQueue, 0, KeccakMaximumRateInBytes); + state->bitsInQueue = 0; + state->squeezing = 0; + state->bitsAvailableForSqueezing = 0; + + return 0; +} + +void AbsorbQueue(spongeState *state) +{ + // state->bitsInQueue is assumed to be equal to state->rate + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", state->dataQueue, state->rate/8); + #endif +#ifdef ProvideFast576 + if (state->rate == 576) + KeccakAbsorb576bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast832 + if (state->rate == 832) + KeccakAbsorb832bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast1024 + if (state->rate == 1024) + KeccakAbsorb1024bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast1088 + if (state->rate == 1088) + KeccakAbsorb1088bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast1152 + if (state->rate == 1152) + KeccakAbsorb1152bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast1344 + if (state->rate == 1344) + KeccakAbsorb1344bits(state->state, state->dataQueue); + else +#endif + KeccakAbsorb(state->state, state->dataQueue, state->rate/64); + state->bitsInQueue = 0; +} + +int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen) +{ + unsigned long long i, j, wholeBlocks; + unsigned int partialBlock, partialByte; + const unsigned char *curData; + + if ((state->bitsInQueue % 8) != 0) + return 1; // Only the last call may contain a partial byte + if (state->squeezing) + return 1; // Too late for additional input + + i = 0; + while(i < databitlen) { + if ((state->bitsInQueue == 0) && (databitlen >= state->rate) && (i <= (databitlen-state->rate))) { + wholeBlocks = (databitlen-i)/state->rate; + curData = data+i/8; +#ifdef ProvideFast576 + if (state->rate == 576) { + for(j=0; jrate/8); + #endif + KeccakAbsorb576bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast832 + if (state->rate == 832) { + for(j=0; jrate/8); + #endif + KeccakAbsorb832bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast1024 + if (state->rate == 1024) { + for(j=0; jrate/8); + #endif + KeccakAbsorb1024bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast1088 + if (state->rate == 1088) { + for(j=0; jrate/8); + #endif + KeccakAbsorb1088bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast1152 + if (state->rate == 1152) { + for(j=0; jrate/8); + #endif + KeccakAbsorb1152bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast1344 + if (state->rate == 1344) { + for(j=0; jrate/8); + #endif + KeccakAbsorb1344bits(state->state, curData); + } + } + else +#endif + { + for(j=0; jrate/8) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curData, state->rate/8); + #endif + KeccakAbsorb(state->state, curData, state->rate/64); + } + } + i += wholeBlocks*state->rate; + } + else { + partialBlock = (unsigned int)(databitlen - i); + if (partialBlock+state->bitsInQueue > state->rate) + partialBlock = state->rate-state->bitsInQueue; + partialByte = partialBlock % 8; + partialBlock -= partialByte; + memcpy(state->dataQueue+state->bitsInQueue/8, data+i/8, partialBlock/8); + state->bitsInQueue += partialBlock; + i += partialBlock; + if (state->bitsInQueue == state->rate) + AbsorbQueue(state); + if (partialByte > 0) { + unsigned char mask = (1 << partialByte)-1; + state->dataQueue[state->bitsInQueue/8] = data[i/8] & mask; + state->bitsInQueue += partialByte; + i += partialByte; + } + } + } + return 0; +} + +void PadAndSwitchToSqueezingPhase(spongeState *state) +{ + // Note: the bits are numbered from 0=LSB to 7=MSB + if (state->bitsInQueue + 1 == state->rate) { + state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8); + AbsorbQueue(state); + memset(state->dataQueue, 0, state->rate/8); + } + else { + memset(state->dataQueue + (state->bitsInQueue+7)/8, 0, state->rate/8 - (state->bitsInQueue+7)/8); + state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8); + } + state->dataQueue[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8); + AbsorbQueue(state); + + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif +#ifdef ProvideFast1024 + if (state->rate == 1024) { + KeccakExtract1024bits(state->state, state->dataQueue); + state->bitsAvailableForSqueezing = 1024; + } + else +#endif + { + KeccakExtract(state->state, state->dataQueue, state->rate/64); + state->bitsAvailableForSqueezing = state->rate; + } + #ifdef KeccakReference + displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8); + #endif + state->squeezing = 1; +} + +int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength) +{ + unsigned long long i; + unsigned int partialBlock; + + if (!state->squeezing) + PadAndSwitchToSqueezingPhase(state); + if ((outputLength % 8) != 0) + return 1; // Only multiple of 8 bits are allowed, truncation can be done at user level + + i = 0; + while(i < outputLength) { + if (state->bitsAvailableForSqueezing == 0) { + KeccakPermutation(state->state); +#ifdef ProvideFast1024 + if (state->rate == 1024) { + KeccakExtract1024bits(state->state, state->dataQueue); + state->bitsAvailableForSqueezing = 1024; + } + else +#endif + { + KeccakExtract(state->state, state->dataQueue, state->rate/64); + state->bitsAvailableForSqueezing = state->rate; + } + #ifdef KeccakReference + displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8); + #endif + } + partialBlock = state->bitsAvailableForSqueezing; + if ((unsigned long long)partialBlock > outputLength - i) + partialBlock = (unsigned int)(outputLength - i); + memcpy(output+i/8, state->dataQueue+(state->rate-state->bitsAvailableForSqueezing)/8, partialBlock/8); + state->bitsAvailableForSqueezing -= partialBlock; + i += partialBlock; + } + return 0; +} diff --git a/crypto/keccak/KeccakSponge.h b/crypto/keccak/KeccakSponge.h new file mode 100755 index 0000000..df3d797 --- /dev/null +++ b/crypto/keccak/KeccakSponge.h @@ -0,0 +1,76 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSponge_h_ +#define _KeccakSponge_h_ + +#define KeccakPermutationSize 1600 +#define KeccakPermutationSizeInBytes (KeccakPermutationSize/8) +#define KeccakMaximumRate 1536 +#define KeccakMaximumRateInBytes (KeccakMaximumRate/8) + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + +ALIGN typedef struct spongeStateStruct { + ALIGN unsigned char state[KeccakPermutationSizeInBytes]; + ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes]; + unsigned int rate; + unsigned int capacity; + unsigned int bitsInQueue; + unsigned int fixedOutputLength; + int squeezing; + unsigned int bitsAvailableForSqueezing; +} spongeState; + +/** + * Function to initialize the state of the Keccak[r, c] sponge function. + * The sponge function is set to the absorbing phase. + * @param state Pointer to the state of the sponge function to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @pre One must have r+c=1600 and the rate a multiple of 64 bits in this implementation. + * @return Zero if successful, 1 otherwise. + */ +int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity); +/** + * Function to give input data for the sponge function to absorb. + * @param state Pointer to the state of the sponge function initialized by InitSponge(). + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the least significant bits of the last byte. + * @param databitLen The number of input bits provided in the input data. + * @pre In the previous call to Absorb(), databitLen was a multiple of 8. + * @pre The sponge function must be in the absorbing phase, + * i.e., Squeeze() must not have been called before. + * @return Zero if successful, 1 otherwise. + */ +int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen); +/** + * Function to squeeze output data from the sponge function. + * If the sponge function was in the absorbing phase, this function + * switches it to the squeezing phase. + * @param state Pointer to the state of the sponge function initialized by InitSponge(). + * @param output Pointer to the buffer where to store the output data. + * @param outputLength The number of output bits desired. + * It must be a multiple of 8. + * @return Zero if successful, 1 otherwise. + */ +int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength); + +#endif diff --git a/crypto/keccak/brg_endian.h b/crypto/keccak/brg_endian.h new file mode 100755 index 0000000..7226eb3 --- /dev/null +++ b/crypto/keccak/brg_endian.h @@ -0,0 +1,142 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/crypto/keccak/genKAT.c b/crypto/keccak/genKAT.c new file mode 100755 index 0000000..6f0f55f --- /dev/null +++ b/crypto/keccak/genKAT.c @@ -0,0 +1,692 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ +*/ + +#include +#include +#include +#include +#include + +#include "KeccakDuplex.h" +#include "KeccakNISTInterface.h" +#include "KeccakSponge.h" + +#define MAX_MARKER_LEN 50 +#define SUBMITTER_INFO_LEN 128 + +typedef enum { KAT_SUCCESS = 0, KAT_FILE_OPEN_ERROR = 1, KAT_HEADER_ERROR = 2, KAT_DATA_ERROR = 3, KAT_HASH_ERROR = 4 } STATUS_CODES; + +#define AllowExtendedFunctions +#define ExcludeExtremelyLong + +#ifdef AllowExtendedFunctions +#define SqueezingOutputLength 4096 +#endif + +STATUS_CODES genShortMsg(int hashbitlen); +STATUS_CODES genLongMsg(int hashbitlen); +STATUS_CODES genExtremelyLongMsg(int hashbitlen); +STATUS_CODES genMonteCarlo(int hashbitlen); +#ifdef AllowExtendedFunctions +STATUS_CODES genMonteCarloSqueezing(int hashbitlen); +STATUS_CODES genShortMsgSponge(unsigned int rate, unsigned int capacity, int outputLength, const char *fileName); +STATUS_CODES genDuplexKAT(unsigned int rate, unsigned int capacity, const char *fileName); +#endif +int FindMarker(FILE *infile, const char *marker); +int ReadHex(FILE *infile, BitSequence *A, int Length, char *str); +void fprintBstr(FILE *fp, char *S, BitSequence *A, int L); + + +STATUS_CODES +genKAT_main() +{ + int i, ret_val, bitlens[4] = { 224, 256, 384, 512 }; + +#ifdef AllowExtendedFunctions + if ( (ret_val = genShortMsgSponge(1024, 576, 4096, "ShortMsgKAT_0.txt")) != KAT_SUCCESS ) + return (STATUS_CODES)ret_val; + if ( (ret_val = genLongMsg(0)) != KAT_SUCCESS ) + return (STATUS_CODES)ret_val; +#ifndef ExcludeExtremelyLong + if ( (ret_val = genExtremelyLongMsg(0)) != KAT_SUCCESS ) + return (STATUS_CODES)ret_val; +#endif + if ( (ret_val = genMonteCarloSqueezing(0)) != KAT_SUCCESS ) + return (STATUS_CODES)ret_val; +#endif + + for ( i=0; i<4; i++ ) { + if ( (ret_val = genShortMsg(bitlens[i])) != KAT_SUCCESS ) + return (STATUS_CODES)ret_val; + if ( (ret_val = genLongMsg(bitlens[i])) != KAT_SUCCESS ) + return (STATUS_CODES)ret_val; +#ifndef ExcludeExtremelyLong + if ( (ret_val = genExtremelyLongMsg(bitlens[i])) != KAT_SUCCESS ) + return (STATUS_CODES)ret_val; +#endif + if ( (ret_val = genMonteCarlo(bitlens[i])) != KAT_SUCCESS ) + return (STATUS_CODES)ret_val; + } + +#ifdef AllowExtendedFunctions + /* Other case examples */ + genShortMsgSponge(1344, 256, 4096, "ShortMsgKAT_r1344c256.txt"); + /* Duplexing */ + //genDuplexKAT(1024, 576, "DuplexKAT_r1024c576.txt"); + //genDuplexKAT(1025, 575, "DuplexKAT_r1025c575.txt"); + genDuplexKAT(1026, 574, "DuplexKAT_r1026c574.txt"); + genDuplexKAT(1027, 573, "DuplexKAT_r1027c573.txt"); + //genDuplexKAT(1028, 572, "DuplexKAT_r1028c572.txt"); + //genDuplexKAT(1029, 571, "DuplexKAT_r1029c571.txt"); + //genDuplexKAT(1030, 570, "DuplexKAT_r1030c570.txt"); + //genDuplexKAT(1031, 569, "DuplexKAT_r1031c569.txt"); + //genDuplexKAT(1032, 568, "DuplexKAT_r1032c568.txt"); +#endif + + return KAT_SUCCESS; +} + +STATUS_CODES +genShortMsg(int hashbitlen) +{ + char fn[32], line[SUBMITTER_INFO_LEN]; + int msglen, msgbytelen, done; + BitSequence Msg[256], MD[64]; + FILE *fp_in, *fp_out; + + if ( (fp_in = fopen("ShortMsgKAT.txt", "r")) == NULL ) { + printf("Couldn't open for read\n"); + return KAT_FILE_OPEN_ERROR; + } + + sprintf(fn, "ShortMsgKAT_%d.txt", hashbitlen); + if ( (fp_out = fopen(fn, "w")) == NULL ) { + printf("Couldn't open <%s> for write\n", fn); + return KAT_FILE_OPEN_ERROR; + } + fprintf(fp_out, "# %s\n", fn); + if ( FindMarker(fp_in, "# Algorithm Name:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Algorithm Name:%s\n", line); + } + else { + printf("genShortMsg: Couldn't read Algorithm Name\n"); + return KAT_HEADER_ERROR; + } + if ( FindMarker(fp_in, "# Principal Submitter:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Principal Submitter:%s\n", line); + } + else { + printf("genShortMsg: Couldn't read Principal Submitter\n"); + return KAT_HEADER_ERROR; + } + + done = 0; + do { + if ( FindMarker(fp_in, "Len = ") ) + fscanf(fp_in, "%d", &msglen); + else { + done = 1; + break; + } + msgbytelen = (msglen+7)/8; + + if ( !ReadHex(fp_in, Msg, msgbytelen, "Msg = ") ) { + printf("ERROR: unable to read 'Msg' from \n"); + return KAT_DATA_ERROR; + } + Hash(hashbitlen, Msg, msglen, MD); + fprintf(fp_out, "\nLen = %d\n", msglen); + fprintBstr(fp_out, "Msg = ", Msg, msgbytelen); + fprintBstr(fp_out, "MD = ", MD, hashbitlen/8); + } while ( !done ); + printf("finished ShortMsgKAT for <%d>\n", hashbitlen); + + fclose(fp_in); + fclose(fp_out); + + return KAT_SUCCESS; +} + +#ifdef AllowExtendedFunctions +STATUS_CODES +genShortMsgSponge(unsigned int rate, unsigned int capacity, int outputLength, const char *fileName) +{ + char line[SUBMITTER_INFO_LEN]; + int msglen, msgbytelen, done; + BitSequence Msg[256]; + BitSequence Squeezed[SqueezingOutputLength/8]; + spongeState state; + FILE *fp_in, *fp_out; + + if (outputLength > SqueezingOutputLength) { + printf("Requested output length too long.\n"); + return KAT_HASH_ERROR; + } + + if ( (fp_in = fopen("ShortMsgKAT.txt", "r")) == NULL ) { + printf("Couldn't open for read\n"); + return KAT_FILE_OPEN_ERROR; + } + + if ( (fp_out = fopen(fileName, "w")) == NULL ) { + printf("Couldn't open <%s> for write\n", fileName); + return KAT_FILE_OPEN_ERROR; + } + fprintf(fp_out, "# %s\n", fileName); + if ( FindMarker(fp_in, "# Algorithm Name:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Algorithm Name:%s\n", line); + } + else { + printf("genShortMsg: Couldn't read Algorithm Name\n"); + return KAT_HEADER_ERROR; + } + if ( FindMarker(fp_in, "# Principal Submitter:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Principal Submitter:%s\n", line); + } + else { + printf("genShortMsg: Couldn't read Principal Submitter\n"); + return KAT_HEADER_ERROR; + } + + done = 0; + do { + if ( FindMarker(fp_in, "Len = ") ) + fscanf(fp_in, "%d", &msglen); + else { + done = 1; + break; + } + msgbytelen = (msglen+7)/8; + + if ( !ReadHex(fp_in, Msg, msgbytelen, "Msg = ") ) { + printf("ERROR: unable to read 'Msg' from \n"); + return KAT_DATA_ERROR; + } + fprintf(fp_out, "\nLen = %d\n", msglen); + fprintBstr(fp_out, "Msg = ", Msg, msgbytelen); + InitSponge(&state, rate, capacity); + if ((msglen % 8 ) != 0) + // From NIST convention to internal convention for last byte + Msg[msgbytelen - 1] >>= 8 - (msglen % 8); + Absorb(&state, Msg, msglen); + Squeeze(&state, Squeezed, outputLength); + fprintBstr(fp_out, "Squeezed = ", Squeezed, SqueezingOutputLength/8); + } while ( !done ); + printf("finished ShortMsgKAT for <%s>\n", fileName); + + fclose(fp_in); + fclose(fp_out); + + return KAT_SUCCESS; +} +#endif + +STATUS_CODES +genLongMsg(int hashbitlen) +{ + char fn[32], line[SUBMITTER_INFO_LEN]; + int msglen, msgbytelen, done; + BitSequence Msg[4288], MD[64]; +#ifdef AllowExtendedFunctions + BitSequence Squeezed[SqueezingOutputLength/8]; + hashState state; +#endif + FILE *fp_in, *fp_out; + + if ( (fp_in = fopen("LongMsgKAT.txt", "r")) == NULL ) { + printf("Couldn't open for read\n"); + return KAT_FILE_OPEN_ERROR; + } + + sprintf(fn, "LongMsgKAT_%d.txt", hashbitlen); + if ( (fp_out = fopen(fn, "w")) == NULL ) { + printf("Couldn't open <%s> for write\n", fn); + return KAT_FILE_OPEN_ERROR; + } + fprintf(fp_out, "# %s\n", fn); + if ( FindMarker(fp_in, "# Algorithm Name:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Algorithm Name:%s\n", line); + } + else { + printf("genLongMsg: Couldn't read Algorithm Name\n"); + return KAT_HEADER_ERROR; + } + if ( FindMarker(fp_in, "# Principal Submitter:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Principal Submitter:%s\n\n", line); + } + else { + printf("genLongMsg: Couldn't read Principal Submitter\n"); + return KAT_HEADER_ERROR; + } + + done = 0; + do { + if ( FindMarker(fp_in, "Len = ") ) + fscanf(fp_in, "%d", &msglen); + else + break; + msgbytelen = (msglen+7)/8; + + if ( !ReadHex(fp_in, Msg, msgbytelen, "Msg = ") ) { + printf("ERROR: unable to read 'Msg' from \n"); + return KAT_DATA_ERROR; + } +#ifdef AllowExtendedFunctions + if (hashbitlen > 0) + Hash(hashbitlen, Msg, msglen, MD); + else { + Init(&state, hashbitlen); + Update(&state, Msg, msglen); + Final(&state, 0); + Squeeze(&state, Squeezed, SqueezingOutputLength); + } +#else + Hash(hashbitlen, Msg, msglen, MD); +#endif + fprintf(fp_out, "Len = %d\n", msglen); + fprintBstr(fp_out, "Msg = ", Msg, msgbytelen); +#ifdef AllowExtendedFunctions + if (hashbitlen > 0) + fprintBstr(fp_out, "MD = ", MD, hashbitlen/8); + else + fprintBstr(fp_out, "Squeezed = ", Squeezed, SqueezingOutputLength/8); +#else + fprintBstr(fp_out, "MD = ", MD, hashbitlen/8); +#endif + } while ( !done ); + printf("finished LongMsgKAT for <%d>\n", hashbitlen); + + fclose(fp_in); + fclose(fp_out); + + return KAT_SUCCESS; +} + +STATUS_CODES +genExtremelyLongMsg(int hashbitlen) +{ + char fn[32], line[SUBMITTER_INFO_LEN]; + BitSequence Text[65], MD[64]; +#ifdef AllowExtendedFunctions + BitSequence Squeezed[SqueezingOutputLength/8]; +#endif + int i, repeat; + FILE *fp_in, *fp_out; + hashState state; + HashReturn retval; + + if ( (fp_in = fopen("ExtremelyLongMsgKAT.txt", "r")) == NULL ) { + printf("Couldn't open for read\n"); + return KAT_FILE_OPEN_ERROR; + } + + sprintf(fn, "ExtremelyLongMsgKAT_%d.txt", hashbitlen); + if ( (fp_out = fopen(fn, "w")) == NULL ) { + printf("Couldn't open <%s> for write\n", fn); + return KAT_FILE_OPEN_ERROR; + } + fprintf(fp_out, "# %s\n", fn); + if ( FindMarker(fp_in, "# Algorithm Name:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Algorithm Name:%s\n", line); + } + else { + printf("genExtremelyLongMsg: Couldn't read Algorithm Name\n"); + return KAT_HEADER_ERROR; + } + if ( FindMarker(fp_in, "# Principal Submitter:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Principal Submitter:%s\n\n", line); + } + else { + printf("genExtremelyLongMsg: Couldn't read Principal Submitter\n"); + return KAT_HEADER_ERROR; + } + + if ( FindMarker(fp_in, "Repeat = ") ) + fscanf(fp_in, "%d", &repeat); + else { + printf("ERROR: unable to read 'Repeat' from \n"); + return KAT_DATA_ERROR; + } + + if ( FindMarker(fp_in, "Text = ") ) + fscanf(fp_in, "%s", Text); + else { + printf("ERROR: unable to read 'Text' from \n"); + return KAT_DATA_ERROR; + } + +// memcpy(Text, "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno", 64); + + if ( (retval = Init(&state, hashbitlen)) != KAT_SUCCESS ) { + printf("Init returned <%d> in genExtremelyLongMsg\n", retval); + return KAT_HASH_ERROR; + } + for ( i=0; i in genExtremelyLongMsg\n", retval); + return KAT_HASH_ERROR; + } + if ( (retval = Final(&state, MD)) != KAT_SUCCESS ) { + printf("Final returned <%d> in genExtremelyLongMsg\n", retval); + return KAT_HASH_ERROR; + } +#ifdef AllowExtendedFunctions + if (hashbitlen == 0) + Squeeze(&state, Squeezed, SqueezingOutputLength); +#endif + fprintf(fp_out, "Repeat = %d\n", repeat); + fprintf(fp_out, "Text = %s\n", Text); +#ifdef AllowExtendedFunctions + if (hashbitlen > 0) + fprintBstr(fp_out, "MD = ", MD, hashbitlen/8); + else + fprintBstr(fp_out, "Squeezed = ", Squeezed, SqueezingOutputLength/8); +#else + fprintBstr(fp_out, "MD = ", MD, hashbitlen/8); +#endif + printf("finished ExtremelyLongMsgKAT for <%d>\n", hashbitlen); + + fclose(fp_in); + fclose(fp_out); + + return KAT_SUCCESS; +} + +STATUS_CODES +genMonteCarlo(int hashbitlen) +{ + char fn[32], line[SUBMITTER_INFO_LEN]; + BitSequence Seed[128], Msg[128], MD[64], Temp[128]; + int i, j, bytelen; + FILE *fp_in, *fp_out; + + if ( (fp_in = fopen("MonteCarlo.txt", "r")) == NULL ) { + printf("Couldn't open for read\n"); + return KAT_FILE_OPEN_ERROR; + } + + sprintf(fn, "MonteCarlo_%d.txt", hashbitlen); + if ( (fp_out = fopen(fn, "w")) == NULL ) { + printf("Couldn't open <%s> for write\n", fn); + return KAT_FILE_OPEN_ERROR; + } + fprintf(fp_out, "# %s\n", fn); + if ( FindMarker(fp_in, "# Algorithm Name:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Algorithm Name:%s\n", line); + } + else { + printf("genMonteCarlo: Couldn't read Algorithm Name\n"); + return KAT_HEADER_ERROR; + } + if ( FindMarker(fp_in, "# Principal Submitter:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Principal Submitter:%s\n\n", line); + } + else { + printf("genMonteCarlo: Couldn't read Principal Submitter\n"); + return KAT_HEADER_ERROR; + } + + if ( !ReadHex(fp_in, Seed, 128, "Seed = ") ) { + printf("ERROR: unable to read 'Seed' from \n"); + return KAT_DATA_ERROR; + } + + bytelen = hashbitlen / 8; + memcpy(Msg, Seed, 128); + fprintBstr(fp_out, "Seed = ", Seed, 128); + for ( j=0; j<100; j++ ) { + for ( i=0; i<1000; i++ ) { + Hash(hashbitlen, Msg, 1024, MD); + memcpy(Temp, Msg, 128-bytelen); + memcpy(Msg, MD, bytelen); + memcpy(Msg+bytelen, Temp, 128-bytelen); + } + fprintf(fp_out, "\nj = %d\n", j); + fprintBstr(fp_out, "MD = ", MD, bytelen); + } + printf("finished MonteCarloKAT for <%d>\n", hashbitlen); + + fclose(fp_in); + fclose(fp_out); + + return KAT_SUCCESS; +} + +#ifdef AllowExtendedFunctions +STATUS_CODES +genMonteCarloSqueezing(int hashbitlen) +{ + char fn[32], line[SUBMITTER_INFO_LEN]; + BitSequence Seed[128], MD[64]; + int i, j, bytelen; + FILE *fp_in, *fp_out; + hashState state; + HashReturn retval; + + if ( (fp_in = fopen("MonteCarlo.txt", "r")) == NULL ) { + printf("Couldn't open for read\n"); + return KAT_FILE_OPEN_ERROR; + } + + sprintf(fn, "MonteCarlo_%d.txt", hashbitlen); + if ( (fp_out = fopen(fn, "w")) == NULL ) { + printf("Couldn't open <%s> for write\n", fn); + return KAT_FILE_OPEN_ERROR; + } + fprintf(fp_out, "# %s\n", fn); + if ( FindMarker(fp_in, "# Algorithm Name:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Algorithm Name:%s\n", line); + } + else { + printf("genMonteCarlo: Couldn't read Algorithm Name\n"); + return KAT_HEADER_ERROR; + } + if ( FindMarker(fp_in, "# Principal Submitter:") ) { + fscanf(fp_in, "%[^\n]\n", line); + fprintf(fp_out, "# Principal Submitter:%s\n\n", line); + } + else { + printf("genMonteCarlo: Couldn't read Principal Submitter\n"); + return KAT_HEADER_ERROR; + } + + if ( !ReadHex(fp_in, Seed, 128, "Seed = ") ) { + printf("ERROR: unable to read 'Seed' from \n"); + return KAT_DATA_ERROR; + } + + fprintBstr(fp_out, "Seed = ", Seed, 128); + + if ( (retval = Init(&state, hashbitlen)) != KAT_SUCCESS ) { + printf("Init returned <%d> in genMonteCarloSqueezing\n", retval); + return KAT_HASH_ERROR; + } + if ( (retval = Update(&state, Seed, 128*8)) != KAT_SUCCESS ) { + printf("Update returned <%d> in genMonteCarloSqueezing\n", retval); + return KAT_HASH_ERROR; + } + if ( (retval = Final(&state, 0)) != KAT_SUCCESS ) { + printf("Final returned <%d> in genMonteCarloSqueezing\n", retval); + return KAT_HASH_ERROR; + } + bytelen = 64; + for ( j=0; j<100; j++ ) { + for ( i=0; i<1000; i++ ) { + if ( (retval = Squeeze(&state, MD, bytelen*8)) != KAT_SUCCESS ) { + printf("Squeeze returned <%d> in genMonteCarloSqueezing\n", retval); + return KAT_HASH_ERROR; + } + } + fprintf(fp_out, "\nj = %d\n", j); + fprintBstr(fp_out, "MD = ", MD, bytelen); + } + printf("finished MonteCarloKAT for <%d>\n", hashbitlen); + + fclose(fp_in); + fclose(fp_out); + + return KAT_SUCCESS; +} + +STATUS_CODES +genDuplexKAT(unsigned int rate, unsigned int capacity, const char *fileName) +{ + int inLen, inByteLen, outLen, outByteLen, done; + BitSequence in[256]; + BitSequence out[256]; + FILE *fp_in, *fp_out; + duplexState state; + + if ( (fp_in = fopen("DuplexKAT.txt", "r")) == NULL ) { + printf("Couldn't open for read\n"); + return KAT_FILE_OPEN_ERROR; + } + + if ( (fp_out = fopen(fileName, "w")) == NULL ) { + printf("Couldn't open <%s> for write\n", fileName); + return KAT_FILE_OPEN_ERROR; + } + fprintf(fp_out, "# %s\n", fileName); + fprintf(fp_out, "# Algorithm: Duplex[f=Keccak-f[1600], pad=pad10*1, r=%d, c=%d, \xCF\x81max=%d]\n", rate, capacity, rate-2); + + InitDuplex(&state, rate, capacity); + done = 0; + outLen = rate; + outByteLen = (outLen+7)/8; + do { + if ( FindMarker(fp_in, "InLen = ") ) + fscanf(fp_in, "%d", &inLen); + else { + done = 1; + break; + } + inByteLen = (inLen+7)/8; + + if ( !ReadHex(fp_in, in, inByteLen, "In = ") ) { + printf("ERROR: unable to read 'In' from \n"); + return KAT_DATA_ERROR; + } + if (inLen <= rate-2) { + fprintf(fp_out, "\nInLen = %d\n", inLen); + fprintBstr(fp_out, "In = ", in, inByteLen); + Duplexing(&state, in, inLen, out, outLen); + fprintf(fp_out, "OutLen = %d\n", outLen); + fprintBstr(fp_out, "Out = ", out, outByteLen); + } + } while ( !done ); + printf("finished DuplexKAT for <%s>\n", fileName); + + fclose(fp_in); + fclose(fp_out); + + return KAT_SUCCESS; +} +#endif + +// +// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.) +// +int +FindMarker(FILE *infile, const char *marker) +{ + char line[MAX_MARKER_LEN]; + int i, len; + + len = (int)strlen(marker); + if ( len > MAX_MARKER_LEN-1 ) + len = MAX_MARKER_LEN-1; + + for ( i=0; i= '0') && (ch <= '9') ) + ich = ch - '0'; + else if ( (ch >= 'A') && (ch <= 'F') ) + ich = ch - 'A' + 10; + else if ( (ch >= 'a') && (ch <= 'f') ) + ich = ch - 'a' + 10; + + for ( i=0; i> 4); + A[Length-1] = (A[Length-1] << 4) | ich; + } + else + return 0; + + return 1; +} + +void +fprintBstr(FILE *fp, char *S, BitSequence *A, int L) +{ + int i; + + fprintf(fp, "%s", S); + + for ( i=0; i /dev/null if [ $? -ne 0 ] then - echo "${cmd}: Decompression was not correct" + echo "FATAL: Decompression was not correct" exit 1 fi rm -f ${tf}.pz ${tf}.1 diff --git a/test/t2.tst b/test/t2.tst index 8453f31..fa2bd62 100644 --- a/test/t2.tst +++ b/test/t2.tst @@ -9,14 +9,14 @@ for algo in zlib ppmd do for tf in bin.dat share.dat inc.dat do - for cksum in CRC64 SHA256 SHA512 SKEIN256 SKEIN512 + for cksum in CRC64 SHA256 SHA512 SKEIN256 SKEIN512 KECCAK256 KECCAK512 do cmd="../../pcompress -c ${algo} -l 6 -s 1m -S ${cksum} ${tf}" echo "Running $cmd" eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Compression failed." exit 1 fi cmd="../../pcompress -d ${tf}.pz ${tf}.1" @@ -24,14 +24,14 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Decompression failed." exit 1 fi diff ${tf} ${tf}.1 > /dev/null if [ $? -ne 0 ] then - echo "${cmd}: Decompression was not correct" + echo "FATAL: Decompression was not correct" exit 1 fi rm -f ${tf}.pz ${tf}.1 diff --git a/test/t3.tst b/test/t3.tst index f71c66a..4fc51e4 100644 --- a/test/t3.tst +++ b/test/t3.tst @@ -29,7 +29,7 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: ${cmd} errored." exit 1 fi mv ${tf}.pz ${tf}.${algo} @@ -62,13 +62,13 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Decompression failed." exit 1 fi diff ${tf}.${algo} ${tf}.${algo}.1 > /dev/null if [ $? -ne 0 ] then - echo "${cmd}: Decompression was not correct" + echo "FATAL: Decompression was not correct" exit 1 fi rm -f ${tf}.${algo}.pz ${tf}.${algo}.1 diff --git a/test/t4.tst b/test/t4.tst index b65a933..52f8787 100644 --- a/test/t4.tst +++ b/test/t4.tst @@ -33,7 +33,7 @@ do diff ${tf} ${tf}.1 > /dev/null if [ $? -ne 0 ] then - echo "${cmd}: Decompression was not correct" + echo "FATAL: Decompression was not correct" exit 1 fi rm -f ${tf}.pz ${tf}.1 diff --git a/test/t5.tst b/test/t5.tst index 2d3fd7e..b574dc3 100644 --- a/test/t5.tst +++ b/test/t5.tst @@ -9,7 +9,7 @@ for algo in lzfx adapt2 do for tf in comb_d.dat do - for feat in "-e" "-e -L -S SHA256" "-D -e -S SHA512" "-D -EE -L -e -S SKEIN512" "-e -S CRC64" + for feat in "-e" "-e -L -S SHA256" "-D -e -S SHA512" "-D -EE -L -e -S SKEIN512" "-e -S CRC64" "-e -P" "-e -P -S KECCAK256" "-D -e -L -S KECCAK512" do for seg in 2m 100m do @@ -26,7 +26,7 @@ do pw=`cat /tmp/pwf` if [ "$pw" = "sillypassword" ] then - echo "ERROR: Password file /tmp/pwf not zeroed!" + echo "FATAL: Password file /tmp/pwf not zeroed!" exit 1 fi @@ -50,7 +50,21 @@ do pw=`cat /tmp/pwf` if [ "$pw" = "sillypassword" ] then - echo "ERROR: Password file /tmp/pwf not zeroed!" + echo "FATAL: Password file /tmp/pwf not zeroed!" + exit 1 + fi + + # + # Now try decompression with invalid password. It should + # fail. + # + rm -f ${tf}.1 + cmd="../../pcompress -d -w /tmp/pwf ${tf}.pz ${tf}.1" + echo "Running $cmd" + eval $cmd + if [ $? -eq 0 ] + then + echo "FATAL: Decompression did not fail where expected." exit 1 fi rm -f ${tf}.pz ${tf}.1 diff --git a/test/t6.tst b/test/t6.tst index 3de85bc..1bdddac 100644 --- a/test/t6.tst +++ b/test/t6.tst @@ -21,7 +21,7 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Compression errored." exit 1 fi cmd="../../pcompress -d ${tf}.pz ${tf}.1" @@ -29,13 +29,13 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Decompression errored." exit 1 fi diff ${tf} ${tf}.1 > /dev/null if [ $? -ne 0 ] then - echo "${cmd}: Decompression was not correct" + echo "FATAL: Decompression was not correct" exit 1 fi rm -f ${tf}.pz ${tf}.1 diff --git a/test/t7.tst b/test/t7.tst index 3fdd1e0..dd81e7e 100644 --- a/test/t7.tst +++ b/test/t7.tst @@ -19,14 +19,14 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Compression errored." exit 1 fi pw=`cat /tmp/pwf` if [ "$pw" = "sillypassword" ] then - echo "ERROR: Password file /tmp/pwf not zeroed!" + echo "FATAL: Password file /tmp/pwf not zeroed!" exit 1 fi @@ -36,21 +36,21 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Decompression errored." exit 1 fi diff ${tf} ${tf}.1 > /dev/null if [ $? -ne 0 ] then - echo "${cmd}: Decompression was not correct" + echo "FATAL: Decompression was not correct" exit 1 fi pw=`cat /tmp/pwf` if [ "$pw" = "sillypassword" ] then - echo "ERROR: Password file /tmp/pwf not zeroed!" + echo "FATAL: Password file /tmp/pwf not zeroed!" exit 1 fi rm -f ${tf}.pz ${tf}.1 diff --git a/test/t8.tst b/test/t8.tst index 302cf7b..2df068b 100644 --- a/test/t8.tst +++ b/test/t8.tst @@ -26,14 +26,14 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Decompression errored." exit 1 fi diff ${tf} ${tf}.1 > /dev/null if [ $? -ne 0 ] then - echo "${cmd}: Decompression was not correct" + echo "FATAL: Decompression was not correct" exit 1 fi rm -f ${tf}.pz ${tf}.1 diff --git a/test/t9.tst b/test/t9.tst index fcfbc60..7cdf1c7 100644 --- a/test/t9.tst +++ b/test/t9.tst @@ -12,7 +12,7 @@ do eval $cmd if [ $? -eq 0 ] then - echo "${cmd} DID NOT ERROR where expected" + echo "FATAL: Compression DID NOT ERROR where expected" exit 1 fi done @@ -26,14 +26,14 @@ do eval $cmd if [ $? -eq 0 ] then - echo "${cmd} DID NOT ERROR where expected" + echo "FATAL: Compression DID NOT ERROR where expected" rm -f combined.dat.pz exit 1 fi done done -for feat in "-S CRC64" "-S SKEIN256" "-S SKEIN512" "-S SHA256" "-S SHA512" +for feat in "-S CRC64" "-S SKEIN256" "-S SKEIN512" "-S SHA256" "-S SHA512" "-S KECCAK256" "-S KECCAK512" do rm -f combined.dat.1.pz rm -f combined.dat.pz @@ -44,7 +44,7 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Compression errored." rm -f combined.dat.pz exit 1 fi @@ -55,7 +55,7 @@ do eval $cmd if [ $? -eq 0 ] then - echo "${cmd} DID NOT ERROR where expected." + echo "FATAL: Decompression DID NOT ERROR where expected." rm -f combined.dat.pz rm -f combined.dat.1 exit 1 @@ -69,7 +69,7 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Compression errored." rm -f combined.dat.pz exit 1 fi @@ -81,7 +81,7 @@ do eval $cmd if [ $? -eq 0 ] then - echo "${cmd} DID NOT ERROR where expected." + echo "FATAL: Decompression DID NOT ERROR where expected." rm -f combined.dat.pz rm -f combined.dat.1 rm -f combined.dat.1.pz @@ -96,7 +96,7 @@ do eval $cmd if [ $? -eq 0 ] then - echo "${cmd} DID NOT ERROR where expected." + echo "FATAL: Decompression DID NOT ERROR where expected." rm -f combined.dat.pz rm -f combined.dat.1 rm -f combined.dat.1.pz @@ -110,14 +110,14 @@ do eval $cmd if [ $? -ne 0 ] then - echo "${cmd} errored." + echo "FATAL: Compression errored." rm -f combined.dat.pz exit 1 fi pw=`cat /tmp/pwf` if [ "$pw" = "plainpasswd" ] then - echo "ERROR: Password file was not zeroed" + echo "FATAL: Password file was not zeroed" rm -f /tmp/pwf combined.dat.pz exit 1 fi @@ -130,7 +130,7 @@ do eval $cmd if [ $? -eq 0 ] then - echo "${cmd} DID NOT ERROR where expected." + echo "FATAL: Decompression DID NOT ERROR where expected." rm -f combined.dat.pz rm -f combined.dat.1 rm -f combined.dat.1.pz @@ -146,7 +146,7 @@ do eval $cmd if [ $? -eq 0 ] then - echo "${cmd} DID NOT ERROR where expected." + echo "FATAL: Decompression DID NOT ERROR where expected." rm -f combined.dat.pz rm -f combined.dat.1 rm -f combined.dat.1.pz