Add support for 64-bit Keccak implementation.

Sanitize error message from tests.
Add more tests.
Improve platform detection in config script.
This commit is contained in:
Moinak Ghosh 2012-12-08 14:19:01 +05:30
parent 970d8163a2
commit 75c81b5e9c
32 changed files with 5104 additions and 53 deletions

View file

@ -97,9 +97,30 @@ LIBBSCLIB = @LIBBSCLIB@
LIBBSCGEN_OPT = -fopenmp
LIBBSCCPPFLAGS = -I$(LIBBSCDIR)/libbsc -DENABLE_PC_LIBBSC
KECCAK_SRC_COMMON = crypto/keccak/genKAT.c crypto/keccak/KeccakDuplex.c \
crypto/keccak/KeccakNISTInterface.c crypto/keccak/KeccakSponge.c
KECCAK_SRC_OPT64 = $(KECCAK_SRC_COMMON) crypto/keccak/KeccakF-1600-opt64.c
KECCAK_SRC_OPT64_ASM1 = $(KECCAK_SRC_COMMON) crypto/keccak/KeccakF-1600-x86-64-asm.c
KECCAK_SRC_OPT64_ASM2 = crypto/keccak/KeccakF-1600-x86-64-gas.s
KECCAK_HDRS_COMMON = crypto/keccak/KeccakDuplex.h crypto/keccak/KeccakNISTInterface.h \
crypto/keccak/KeccakSponge.h crypto/keccak/KeccakF-1600-interface.h
KECCAK_HDRS_OPT = $(KECCAK_HDRS_COMMON) \
crypto/keccak/brg_endian.h crypto/keccak/KeccakF-1600-unrolling.macros
KECCAK_HDRS_OPT64 = $(KECCAK_HDRS_OPT) crypto/keccak/KeccakF-1600-opt64-settings.h \
crypto/keccak/KeccakF-1600-64.macros crypto/keccak/KeccakF-1600-simd64.macros \
crypto/keccak/KeccakF-1600-simd128.macros
KECCAK_HDRS_OPT64_ASM = $(KECCAK_HDRS_OPT64)
KECCAK_SRCS = @KECCAK_SRCS@
KECCAK_SRCS_ASM = @KECCAK_SRCS_ASM@
KECCAK_HDRS = @KECCAK_HDRS@
KECCAK_OBJS = $(KECCAK_SRCS:.c=.o)
KECCAK_OBJS_ASM = $(KECCAK_SRCS_ASM:.s=.o)
BAKFILES = *~ lzma/*~ lzfx/*~ lz4/*~ rabin/*~ bsdiff/*~ lzp/*~ utils/*~ crypto/sha2/*~ \
crypto/sha2/intel/*~ crypto/aes/*~ crypto/scrypt/*~ crypto/*~ rabin/global/*~ \
delta2/*~
delta2/*~ crypto/keccak/*~
RM = rm -f
RM_RF = rm -rf
@ -107,14 +128,14 @@ COMMON_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT
-DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 \
-I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein -I./utils -I@OPENSSL_INCDIR@ \
-I./crypto/sha2 -I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ \
@LIBBZ2_INC@ @LIBZ_INC@
@LIBBZ2_INC@ @LIBZ_INC@ -I./crypto/keccak
COMMON_VEC_FLAGS = -ftree-vectorize
COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
LDLIBS = -ldl -L@LIBBZ2_DIR@ -lbz2 -L@LIBZ_DIR@ -lz -lm @LIBBSCLFLAGS@ \
-L@OPENSSL_LIBDIR@ -lcrypto -lrt
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@
$(SKEIN_BLOCK_OBJ) @SHA256ASM_OBJS@ @SHA256_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM)
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
@ -124,6 +145,7 @@ DEBUG_LOOP_OPTFLAGS =
DEBUG_GEN_OPT = -O -fno-omit-frame-pointer @LIBBSCGEN_OPT@
DEBUG_RABIN_OPT = -O -fno-omit-frame-pointer
DEBUG_CPPFLAGS = $(COMMON_CPPFLAGS)
DEBUG_FPTR_FLAG =
RELEASE_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
RELEASE_COMPILE = gcc -m64 -msse3 -c
@ -133,6 +155,7 @@ RELEASE_LOOP_OPTFLAGS = $(COMMON_LOOP_OPTFLAGS)
RELEASE_CPPFLAGS = $(COMMON_CPPFLAGS) -DNDEBUG
RELEASE_GEN_OPT = -O3 @LIBBSCGEN_OPT@
RELEASE_RABIN_OPT = -O2
RELEASE_FPTR_FLAG = -fomit-frame-pointer
NO_SLAB_CPPFLAGS = -DDEBUG_NO_SLAB
DEBUG_STATS_CPPFLAGS = -DDEBUG_STATS
@ -147,6 +170,10 @@ GEN_OPT = @GEN_OPT@
RABIN_OPT = @RABIN_OPT@
PREFIX=@PREFIX@
SKEIN_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
SHA256_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
KECCAK_FLAGS = $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) @FPTR_FLAG@
all: $(PROG)
$(LZMAOBJS): $(LZMASRCS) $(LZMAHDRS)
@ -177,17 +204,23 @@ $(DELTA2OBJS): $(DELTA2SRCS) $(DELTA2HDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(SKEIN_BLOCK_OBJ): $(SKEIN_BLOCK_SRC)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(SKEIN_BLOCK_SRC) -o $@
$(COMPILE) $(SKEIN_FLAGS) $(SKEIN_BLOCK_SRC) -o $@
$(SKEINOBJS): $(SKEINSRCS) $(SKEINHDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(COMPILE) $(SKEIN_FLAGS) $(@:.o=.c) -o $@
$(SHA256_OBJS): $(SHA256_SRCS) $(SHA256_HDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(COMPILE) $(SHA256_FLAGS) $(@:.o=.c) -o $@
$(SHA256ASM_OBJS): $(SHA256ASM_SRCS)
$(YASM) -o $@ $(@:.o=.asm)
$(KECCAK_OBJS): $(KECCAK_SRCS) $(KECCAK_HDRS)
$(COMPILE) $(KECCAK_FLAGS) $(@:.o=.c) -o $@
$(KECCAK_OBJS_ASM): $(KECCAK_SRCS_ASM) $(KECCAK_HDRS)
$(COMPILE) $(KECCAK_FLAGS) $(@:.o=.s) -o $@
$(LIBBSCLIB):
(cd $(LIBBSCDIR); make)

75
config
View file

@ -43,6 +43,44 @@ sha256asmobjs=
sha256objs=
keylen=
yasm=yasm
keccak_srcs=
keccak_hdrs=
keccak_srcs_asm=
# Try a simple compilation
cat << _EOF > tst.c
#include <stdio.h>
int
main(void)
{
long l;
printf("%d\n", sizeof (l));
return (0);
}
_EOF
gcc tst.c -o tst
if [ $? -ne 0 ]
then
echo "ERROR:"
echo "Cannot compile a simple program. GCC 4.1 and above is required"
echo "to build this program. Please include installation bindir of GCC in the PATH."
echo ""
rm -f tst.c
exit 1
fi
# Check bitness of system
bitness=`./tst`
rm -f tst tst.c
if [ $bitness -lt 8 ]
then
echo "ERROR:"
echo "Only 64-bit platforms are supported."
echo ""
exit 1
fi
while [ "${arg1}" != "" ]
do
@ -104,6 +142,21 @@ else
exit 1
fi
# Check GCC version
vers=`gcc -dumpversion`
OIFS="$IFS"
IFS=.
set -- ${vers}
IFS="$OIFS"
if [ $1 -lt 4 -o $2 -lt 1 ]
then
echo "ERROR:"
echo "GCC version 4.1 or above is required."
echo ""
exit 1
fi
echo $plat | egrep 'x86_64|amd64' > /dev/null
if [ $? -eq 0 ]
then
@ -135,6 +188,18 @@ then
echo "Yasm version 1.1 or later is required to build on x64 platforms"
exit 1
fi
if [ $debug -eq 1 ]
then
keccak_srcs='\$\(KECCAK_SRC_OPT64\)'
keccak_hdrs='\$\(KECCAK_HDRS_OPT64\)'
else
keccak_srcs='\$\(KECCAK_SRC_OPT64_ASM1\)'
keccak_srcs_asm='\$\(KECCAK_SRC_OPT64_ASM2\)'
keccak_hdrs='\$\(KECCAK_HDRS_OPT64_ASM\)'
fi
else
keccak_srcs='\$\(KECCAK_SRC_OPT64\)'
fi
# Detect OpenSSL library
@ -278,6 +343,7 @@ libbsccppflagsvar="LIBBSCCPPFLAGS"
sha256asmobjsvar="SHA256ASM_OBJS"
sha256objsvar="SHA256_OBJS"
yasmvar="YASM"
fptr_flag_var="FPTR_FLAG"
openssllibdirvar="OPENSSL_LIBDIR"
opensslincdirvar="OPENSSL_INCDIR"
@ -286,6 +352,10 @@ libzlibdirvar="LIBZ_DIR"
libbz2incvar="LIBBZ2_INC"
libzincvar="LIBZ_INC"
keccak_srcs_var="KECCAK_SRCS"
keccak_hdrs_var="KECCAK_HDRS"
keccak_srcs_asm_var="KECCAK_SRCS_ASM"
noslabcppflagsval=
debugstatscppflagsval=
@ -301,6 +371,7 @@ s#@${loopoptflagsvar}@#\\\$\\(${typ}_${loopoptflagsvar}\\)#g
s#@${cppflagsvar}@#\\\$\\(${typ}_${cppflagsvar}\\)#g
s#@${genoptvar}@#\\\$\\(${typ}_${genoptvar}\\)#g
s#@${rabinoptvar}@#\\\$\\(${typ}_${rabinoptvar}\\)#g
s#@${fptr_flag_var}@#\\\$\\(${typ}_${fptr_flag_var}\\)#g
s#@${noslabcppflagsvar}@#${noslabcppflagsval}#g
s#@${debugstatscppflagsvar}@#${debugstatscppflagsval}#g
s#@${prefixvar}@#${prefix}#g
@ -321,5 +392,9 @@ s#@${libbz2libdirvar}@#${libbz2_libdir}#g
s#@${libzlibdirvar}@#${libz_libdir}#g
s#@${libbz2incvar}@#${libbz2_inc}#g
s#@${libzincvar}@#${libz_inc}#g
s#@${keccak_srcs_var}@#${keccak_srcs}#g
s#@${keccak_hdrs_var}@#${keccak_hdrs}#g
s#@${keccak_srcs_var}@#${keccak_srcs}#g
s#@${keccak_srcs_asm_var}@#${keccak_srcs_asm}#g
" > Makefile

View file

@ -38,6 +38,7 @@
#include <openssl/hmac.h>
#include <sha256.h>
#include <crypto_aes.h>
#include <KeccakNISTInterface.h>
#include "crypto_utils.h"
#include "cpuid.h"
@ -61,7 +62,9 @@ static struct {
{"SKEIN256", CKSUM_SKEIN256, 32, 32, NULL},
{"SKEIN512", CKSUM_SKEIN512, 64, 64, NULL},
{"SHA256", CKSUM_SHA256, 32, 32, init_sha256},
{"SHA512", CKSUM_SHA512, 64, 64, NULL}
{"SHA512", CKSUM_SHA512, 64, 64, NULL},
{"KECCAK256", CKSUM_KECCAK256, 32, 32, NULL},
{"KECCAK512", CKSUM_KECCAK512, 64, 64, NULL}
};
static int cksum_provider = PROVIDER_OPENSSL, ossl_inited = 0;
@ -111,6 +114,14 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, ssize_t bytes)
SHA512_Init(&ctx);
SHA512_Update(&ctx, buf, bytes);
SHA512_Final(cksum_buf, &ctx);
} else if (cksum == CKSUM_KECCAK256) {
if (Keccak_Hash(256, buf, bytes, cksum_buf) != 0)
return (-1);
} else if (cksum == CKSUM_KECCAK512) {
if (Keccak_Hash(512, buf, bytes, cksum_buf) != 0)
return (-1);
} else {
return (-1);
}
@ -279,6 +290,29 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx)
return (-1);
}
mctx->mac_ctx_reinit = ctx;
} else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) {
hashState *ctx = malloc(sizeof (hashState));
if (!ctx) return (-1);
if (cksum == CKSUM_KECCAK256) {
if (Keccak_Init(ctx, 256) != 0)
return (-1);
} else {
if (Keccak_Init(ctx, 512) != 0)
return (-1);
}
if (Keccak_Update(ctx, actx->pkey, KEYLEN << 3) != 0)
return (-1);
mctx->mac_ctx = ctx;
ctx = malloc(sizeof (hashState));
if (!ctx) {
free(mctx->mac_ctx);
return (-1);
}
memcpy(ctx, mctx->mac_ctx, sizeof (hashState));
mctx->mac_ctx_reinit = ctx;
} else {
return (-1);
}
@ -301,6 +335,9 @@ hmac_reinit(mac_ctx_t *mctx)
}
} else if (cksum == CKSUM_SHA512) {
HMAC_CTX_copy(mctx->mac_ctx, mctx->mac_ctx_reinit);
} else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) {
memcpy(mctx->mac_ctx, mctx->mac_ctx_reinit, sizeof (hashState));
} else {
return (-1);
}
@ -325,6 +362,19 @@ hmac_update(mac_ctx_t *mctx, uchar_t *data, size_t len)
} else if (cksum == CKSUM_SHA512) {
if (HMAC_Update(mctx->mac_ctx, data, len) == 0)
return (-1);
} else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) {
// Keccak takes data length in bits so we have to scale
while (len > KECCAK_MAX_SEG) {
uint64_t blen;
blen = KECCAK_MAX_SEG;
if (Keccak_Update(mctx->mac_ctx, data, blen << 3) != 0)
return (-1);
len -= KECCAK_MAX_SEG;
}
if (Keccak_Update(mctx->mac_ctx, data, len << 3) != 0)
return (-1);
} else {
return (-1);
}
@ -353,6 +403,14 @@ hmac_final(mac_ctx_t *mctx, uchar_t *hash, unsigned int *len)
}
} else if (cksum == CKSUM_SHA512) {
HMAC_Final(mctx->mac_ctx, hash, len);
} else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) {
if (Keccak_Final(mctx->mac_ctx, hash) != 0)
return (-1);
if (cksum == CKSUM_KECCAK256)
*len = 32;
else
*len = 64;
} else {
return (-1);
}
@ -379,6 +437,10 @@ hmac_cleanup(mac_ctx_t *mctx)
} else if (cksum == CKSUM_SHA512) {
HMAC_CTX_cleanup(mctx->mac_ctx);
HMAC_CTX_cleanup(mctx->mac_ctx_reinit);
} else if (cksum == CKSUM_KECCAK256 || cksum == CKSUM_KECCAK512) {
memset(mctx->mac_ctx, 0, sizeof (hashState));
memset(mctx->mac_ctx_reinit, 0, sizeof (hashState));
} else {
return (-1);
}

View file

@ -42,6 +42,7 @@ extern "C" {
#define CRYPTO_ALG_AES 0x10
#define MAX_SALTLEN 64
#define KECCAK_MAX_SEG (2305843009213693950ULL)
/*
* Public checksum properties. CKSUM_MAX_BYTES must be updated if a
* newer larger checksum is added to the list.
@ -51,7 +52,9 @@ typedef enum {
CKSUM_SKEIN256 = 0x200,
CKSUM_SKEIN512 = 0x300,
CKSUM_SHA256 = 0x400,
CKSUM_SHA512 = 0x500
CKSUM_SHA512 = 0x500,
CKSUM_KECCAK256 = 0x600,
CKSUM_KECCAK512 = 0x700
} cksum_t;
typedef struct {

68
crypto/keccak/KeccakDuplex.c Executable file
View file

@ -0,0 +1,68 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakDuplex.h"
#include "KeccakF-1600-interface.h"
#ifdef KeccakReference
#include "displayIntermediateValues.h"
#endif
int InitDuplex(duplexState *state, unsigned int rate, unsigned int capacity)
{
if (rate+capacity != 1600)
return 1;
if ((rate <= 0) || (rate > 1600))
return 1;
KeccakInitialize();
state->rate = rate;
state->capacity = capacity;
state->rho_max = rate-2;
KeccakInitializeState(state->state);
return 0;
}
int Duplexing(duplexState *state, const unsigned char *in, unsigned int inBitLen, unsigned char *out, unsigned int outBitLen)
{
ALIGN unsigned char block[KeccakPermutationSizeInBytes];
if (inBitLen > state->rho_max)
return 1;
if ((inBitLen % 8) != 0) {
unsigned char mask = ~((1 << (inBitLen % 8)) - 1);
if ((in[inBitLen/8] & mask) != 0)
return 1; // The bits of the last incomplete byte must be aligned on the LSB
}
if (outBitLen > state->rate)
return 1; // The output length must not be greater than the rate
memcpy(block, in, (inBitLen+7)/8);
memset(block+(inBitLen+7)/8, 0, ((state->rate+63)/64)*8 - (inBitLen+7)/8);
block[inBitLen/8] |= 1 << (inBitLen%8);
block[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8);
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed (after padding)", block, (state->rate+7)/8);
#endif
KeccakAbsorb(state->state, block, (state->rate+63)/64);
KeccakExtract(state->state, block, (state->rate+63)/64);
memcpy(out, block, (outBitLen+7)/8);
if ((outBitLen % 8) != 0) {
unsigned char mask = (1 << (outBitLen % 8)) - 1;
out[outBitLen/8] &= mask;
}
return 0;
}

59
crypto/keccak/KeccakDuplex.h Executable file
View file

@ -0,0 +1,59 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakDuplex_h_
#define _KeccakDuplex_h_
#define KeccakPermutationSize 1600
#define KeccakPermutationSizeInBytes (KeccakPermutationSize/8)
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
ALIGN typedef struct duplexStateStruct {
ALIGN unsigned char state[KeccakPermutationSizeInBytes];
unsigned int rate;
unsigned int capacity;
unsigned int rho_max;
} duplexState;
/**
* Function to initialize a duplex object Duplex[Keccak-f[r+c], pad10*1, r].
* @param state Pointer to the state of the duplex object to be initialized.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @pre One must have r+c=1600 in this implementation. (The value of the rate is unrestricted.)
* @return Zero if successful, 1 otherwise.
*/
int InitDuplex(duplexState *state, unsigned int rate, unsigned int capacity);
/**
* Function to make a duplexing call to the duplex object intialized with InitDuplex().
* @param state Pointer to the state of the duplex object initialized by InitDuplex().
* @param in Pointer to the input data.
* When @a inBitLen is not a multiple of 8, the last bits of data must be
* in the least significant bits of the last byte.
* @param inBitLen The number of input bits provided in the input data.
* @param out Pointer to the buffer where to store the output data.
* @param outBitLen The number of output bits desired.
* @pre inBitLen (r-2)
* @pre outBitLen r
* @return Zero if successful, 1 otherwise.
*/
int Duplexing(duplexState *state, const unsigned char *in, unsigned int inBitLen, unsigned char *out, unsigned int outBitLen);
#endif

View file

@ -0,0 +1,728 @@
/*
Code automatically generated by KeccakTools!
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
UINT64 Aba, Abe, Abi, Abo, Abu; \
UINT64 Aga, Age, Agi, Ago, Agu; \
UINT64 Aka, Ake, Aki, Ako, Aku; \
UINT64 Ama, Ame, Ami, Amo, Amu; \
UINT64 Asa, Ase, Asi, Aso, Asu; \
UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
UINT64 Bka, Bke, Bki, Bko, Bku; \
UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
UINT64 Ca, Ce, Ci, Co, Cu; \
UINT64 Da, De, Di, Do, Du; \
UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
UINT64 Ega, Ege, Egi, Ego, Egu; \
UINT64 Eka, Eke, Eki, Eko, Eku; \
UINT64 Ema, Eme, Emi, Emo, Emu; \
UINT64 Esa, Ese, Esi, Eso, Esu; \
#define prepareTheta \
Ca = Aba^Aga^Aka^Ama^Asa; \
Ce = Abe^Age^Ake^Ame^Ase; \
Ci = Abi^Agi^Aki^Ami^Asi; \
Co = Abo^Ago^Ako^Amo^Aso; \
Cu = Abu^Agu^Aku^Amu^Asu; \
#ifdef UseBebigokimisa
// --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa')
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^( Bbe | Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
Ca = E##ba; \
E##be = Bbe ^((~Bbi)| Bbo ); \
Ce = E##be; \
E##bi = Bbi ^( Bbo & Bbu ); \
Ci = E##bi; \
E##bo = Bbo ^( Bbu | Bba ); \
Co = E##bo; \
E##bu = Bbu ^( Bba & Bbe ); \
Cu = E##bu; \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^( Bge | Bgi ); \
Ca ^= E##ga; \
E##ge = Bge ^( Bgi & Bgo ); \
Ce ^= E##ge; \
E##gi = Bgi ^( Bgo |(~Bgu)); \
Ci ^= E##gi; \
E##go = Bgo ^( Bgu | Bga ); \
Co ^= E##go; \
E##gu = Bgu ^( Bga & Bge ); \
Cu ^= E##gu; \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^( Bke | Bki ); \
Ca ^= E##ka; \
E##ke = Bke ^( Bki & Bko ); \
Ce ^= E##ke; \
E##ki = Bki ^((~Bko)& Bku ); \
Ci ^= E##ki; \
E##ko = (~Bko)^( Bku | Bka ); \
Co ^= E##ko; \
E##ku = Bku ^( Bka & Bke ); \
Cu ^= E##ku; \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^( Bme & Bmi ); \
Ca ^= E##ma; \
E##me = Bme ^( Bmi | Bmo ); \
Ce ^= E##me; \
E##mi = Bmi ^((~Bmo)| Bmu ); \
Ci ^= E##mi; \
E##mo = (~Bmo)^( Bmu & Bma ); \
Co ^= E##mo; \
E##mu = Bmu ^( Bma | Bme ); \
Cu ^= E##mu; \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
Ca ^= E##sa; \
E##se = (~Bse)^( Bsi | Bso ); \
Ce ^= E##se; \
E##si = Bsi ^( Bso & Bsu ); \
Ci ^= E##si; \
E##so = Bso ^( Bsu | Bsa ); \
Co ^= E##so; \
E##su = Bsu ^( Bsa & Bse ); \
Cu ^= E##su; \
\
// --- Code for round (lane complementing pattern 'bebigokimisa')
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIota(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^( Bbe | Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
E##be = Bbe ^((~Bbi)| Bbo ); \
E##bi = Bbi ^( Bbo & Bbu ); \
E##bo = Bbo ^( Bbu | Bba ); \
E##bu = Bbu ^( Bba & Bbe ); \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^( Bge | Bgi ); \
E##ge = Bge ^( Bgi & Bgo ); \
E##gi = Bgi ^( Bgo |(~Bgu)); \
E##go = Bgo ^( Bgu | Bga ); \
E##gu = Bgu ^( Bga & Bge ); \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^( Bke | Bki ); \
E##ke = Bke ^( Bki & Bko ); \
E##ki = Bki ^((~Bko)& Bku ); \
E##ko = (~Bko)^( Bku | Bka ); \
E##ku = Bku ^( Bka & Bke ); \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^( Bme & Bmi ); \
E##me = Bme ^( Bmi | Bmo ); \
E##mi = Bmi ^((~Bmo)| Bmu ); \
E##mo = (~Bmo)^( Bmu & Bma ); \
E##mu = Bmu ^( Bma | Bme ); \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
E##se = (~Bse)^( Bsi | Bso ); \
E##si = Bsi ^( Bso & Bsu ); \
E##so = Bso ^( Bsu | Bsa ); \
E##su = Bsu ^( Bsa & Bse ); \
\
#else // UseBebigokimisa
// --- Code for round, with prepare-theta
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^((~Bbe)& Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
Ca = E##ba; \
E##be = Bbe ^((~Bbi)& Bbo ); \
Ce = E##be; \
E##bi = Bbi ^((~Bbo)& Bbu ); \
Ci = E##bi; \
E##bo = Bbo ^((~Bbu)& Bba ); \
Co = E##bo; \
E##bu = Bbu ^((~Bba)& Bbe ); \
Cu = E##bu; \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^((~Bge)& Bgi ); \
Ca ^= E##ga; \
E##ge = Bge ^((~Bgi)& Bgo ); \
Ce ^= E##ge; \
E##gi = Bgi ^((~Bgo)& Bgu ); \
Ci ^= E##gi; \
E##go = Bgo ^((~Bgu)& Bga ); \
Co ^= E##go; \
E##gu = Bgu ^((~Bga)& Bge ); \
Cu ^= E##gu; \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^((~Bke)& Bki ); \
Ca ^= E##ka; \
E##ke = Bke ^((~Bki)& Bko ); \
Ce ^= E##ke; \
E##ki = Bki ^((~Bko)& Bku ); \
Ci ^= E##ki; \
E##ko = Bko ^((~Bku)& Bka ); \
Co ^= E##ko; \
E##ku = Bku ^((~Bka)& Bke ); \
Cu ^= E##ku; \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^((~Bme)& Bmi ); \
Ca ^= E##ma; \
E##me = Bme ^((~Bmi)& Bmo ); \
Ce ^= E##me; \
E##mi = Bmi ^((~Bmo)& Bmu ); \
Ci ^= E##mi; \
E##mo = Bmo ^((~Bmu)& Bma ); \
Co ^= E##mo; \
E##mu = Bmu ^((~Bma)& Bme ); \
Cu ^= E##mu; \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
Ca ^= E##sa; \
E##se = Bse ^((~Bsi)& Bso ); \
Ce ^= E##se; \
E##si = Bsi ^((~Bso)& Bsu ); \
Ci ^= E##si; \
E##so = Bso ^((~Bsu)& Bsa ); \
Co ^= E##so; \
E##su = Bsu ^((~Bsa)& Bse ); \
Cu ^= E##su; \
\
// --- Code for round
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIota(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^((~Bbe)& Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
E##be = Bbe ^((~Bbi)& Bbo ); \
E##bi = Bbi ^((~Bbo)& Bbu ); \
E##bo = Bbo ^((~Bbu)& Bba ); \
E##bu = Bbu ^((~Bba)& Bbe ); \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^((~Bge)& Bgi ); \
E##ge = Bge ^((~Bgi)& Bgo ); \
E##gi = Bgi ^((~Bgo)& Bgu ); \
E##go = Bgo ^((~Bgu)& Bga ); \
E##gu = Bgu ^((~Bga)& Bge ); \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^((~Bke)& Bki ); \
E##ke = Bke ^((~Bki)& Bko ); \
E##ki = Bki ^((~Bko)& Bku ); \
E##ko = Bko ^((~Bku)& Bka ); \
E##ku = Bku ^((~Bka)& Bke ); \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^((~Bme)& Bmi ); \
E##me = Bme ^((~Bmi)& Bmo ); \
E##mi = Bmi ^((~Bmo)& Bmu ); \
E##mo = Bmo ^((~Bmu)& Bma ); \
E##mu = Bmu ^((~Bma)& Bme ); \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
E##se = Bse ^((~Bsi)& Bso ); \
E##si = Bsi ^((~Bso)& Bsu ); \
E##so = Bso ^((~Bsu)& Bsa ); \
E##su = Bsu ^((~Bsa)& Bse ); \
\
#endif // UseBebigokimisa
const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]; \
X##ka = state[10]; \
X##ke = state[11]; \
X##ki = state[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor832bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1024bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1088bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1152bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]^input[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1344bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]^input[17]; \
X##mo = state[18]^input[18]; \
X##mu = state[19]^input[19]; \
X##sa = state[20]^input[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromState(X, state) \
X##ba = state[ 0]; \
X##be = state[ 1]; \
X##bi = state[ 2]; \
X##bo = state[ 3]; \
X##bu = state[ 4]; \
X##ga = state[ 5]; \
X##ge = state[ 6]; \
X##gi = state[ 7]; \
X##go = state[ 8]; \
X##gu = state[ 9]; \
X##ka = state[10]; \
X##ke = state[11]; \
X##ki = state[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyToState(state, X) \
state[ 0] = X##ba; \
state[ 1] = X##be; \
state[ 2] = X##bi; \
state[ 3] = X##bo; \
state[ 4] = X##bu; \
state[ 5] = X##ga; \
state[ 6] = X##ge; \
state[ 7] = X##gi; \
state[ 8] = X##go; \
state[ 9] = X##gu; \
state[10] = X##ka; \
state[11] = X##ke; \
state[12] = X##ki; \
state[13] = X##ko; \
state[14] = X##ku; \
state[15] = X##ma; \
state[16] = X##me; \
state[17] = X##mi; \
state[18] = X##mo; \
state[19] = X##mu; \
state[20] = X##sa; \
state[21] = X##se; \
state[22] = X##si; \
state[23] = X##so; \
state[24] = X##su; \
#define copyStateVariables(X, Y) \
X##ba = Y##ba; \
X##be = Y##be; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##ge = Y##ge; \
X##gi = Y##gi; \
X##go = Y##go; \
X##gu = Y##gu; \
X##ka = Y##ka; \
X##ke = Y##ke; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##me = Y##me; \
X##mi = Y##mi; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

View file

@ -0,0 +1,6 @@
#define ProvideFast576
#define ProvideFast832
#define ProvideFast1024
#define ProvideFast1088
#define ProvideFast1152
#define ProvideFast1344

View file

@ -0,0 +1,46 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakPermutationInterface_h_
#define _KeccakPermutationInterface_h_
#include "KeccakF-1600-int-set.h"
void KeccakInitialize( void );
void KeccakInitializeState(unsigned char *state);
void KeccakPermutation(unsigned char *state);
#ifdef ProvideFast576
void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast832
void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1024
void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1088
void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1152
void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1344
void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data);
#endif
void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount);
#ifdef ProvideFast1024
void KeccakExtract1024bits(const unsigned char *state, unsigned char *data);
#endif
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount);
#endif

View file

@ -0,0 +1,7 @@
#define Unrolling 24
#define UseBebigokimisa
//#define UseSSE
//#define UseOnlySIMD64
//#define UseMMX
//#define UseSHLD
//#define UseXOP

View file

@ -0,0 +1,504 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "brg_endian.h"
#include "KeccakF-1600-opt64-settings.h"
#include "KeccakF-1600-interface.h"
typedef unsigned char UINT8;
typedef unsigned long long int UINT64;
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
#if defined(UseSSE)
#include <x86intrin.h>
typedef __m128i V64;
typedef __m128i V128;
typedef union {
V128 v128;
UINT64 v64[2];
} V6464;
#define ANDnu64(a, b) _mm_andnot_si128(a, b)
#define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
#define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
#define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
#define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
#define XOR64(a, b) _mm_xor_si128(a, b)
#define XOReq64(a, b) a = _mm_xor_si128(a, b)
#define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
#define ANDnu128(a, b) _mm_andnot_si128(a, b)
#define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
#define CONST128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
#define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
#define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
#define XOR128(a, b) _mm_xor_si128(a, b)
#define XOReq128(a, b) a = _mm_xor_si128(a, b)
#define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
#define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
#define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
#define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
#define ZERO128() _mm_setzero_si128()
#ifdef UseOnlySIMD64
#include "KeccakF-1600-simd64.macros"
#else
ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
#include "KeccakF-1600-simd128.macros"
#endif
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseSSE"
#endif
#elif defined(UseXOP)
#include <x86intrin.h>
typedef __m128i V64;
typedef __m128i V128;
#define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
#define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
#define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
#define XOR64(a, b) _mm_xor_si128(a, b)
#define XOReq64(a, b) a = _mm_xor_si128(a, b)
#define ANDnu128(a, b) _mm_andnot_si128(a, b)
#define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
#define CONST128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
#define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
#define XOR128(a, b) _mm_xor_si128(a, b)
#define XOReq128(a, b) a = _mm_xor_si128(a, b)
#define ZERO128() _mm_setzero_si128()
#define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
#define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
#define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
#define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
#define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
#define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
#define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
#define ROL6464same(a, o) _mm_roti_epi64(a, o)
#define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
ALIGN const UINT64 rot_0_20[2] = { 0, 20};
ALIGN const UINT64 rot_44_3[2] = {44, 3};
ALIGN const UINT64 rot_43_45[2] = {43, 45};
ALIGN const UINT64 rot_21_61[2] = {21, 61};
ALIGN const UINT64 rot_14_28[2] = {14, 28};
ALIGN const UINT64 rot_1_36[2] = { 1, 36};
ALIGN const UINT64 rot_6_10[2] = { 6, 10};
ALIGN const UINT64 rot_25_15[2] = {25, 15};
ALIGN const UINT64 rot_8_56[2] = { 8, 56};
ALIGN const UINT64 rot_18_27[2] = {18, 27};
ALIGN const UINT64 rot_62_55[2] = {62, 55};
ALIGN const UINT64 rot_39_41[2] = {39, 41};
#if defined(UseSimulatedXOP)
// For debugging purposes, when XOP is not available
#undef ROL6464
#undef ROL6464same
#define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
V128 ROL6464(V128 a, int r0, int r1)
{
V128 a0 = ROL64(a, r0);
V128 a1 = COPY64HI2LO(ROL64(a, r1));
return GET64LOLO(a0, a1);
}
#endif
#include "KeccakF-1600-xop.macros"
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseXOP"
#endif
#elif defined(UseMMX)
#include <mmintrin.h>
typedef __m64 V64;
#define ANDnu64(a, b) _mm_andnot_si64(a, b)
#if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
#define LOAD64(a) *(V64*)&(a)
#define CONST64(a) *(V64*)&(a)
#define STORE64(a, b) *(V64*)&(a) = b
#else
#define LOAD64(a) (V64)a
#define CONST64(a) (V64)a
#define STORE64(a, b) a = (UINT64)b
#endif
#define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
#define XOR64(a, b) _mm_xor_si64(a, b)
#define XOReq64(a, b) a = _mm_xor_si64(a, b)
#include "KeccakF-1600-simd64.macros"
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseMMX"
#endif
#else
#if defined(_MSC_VER)
#define ROL64(a, offset) _rotl64(a, offset)
#elif defined(UseSHLD)
#define ROL64(x,N) ({ \
register UINT64 __out; \
register UINT64 __in = x; \
__asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
__out; \
})
#else
#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
#endif
#include "KeccakF-1600-64.macros"
#endif
#include "KeccakF-1600-unrolling.macros"
void KeccakPermutationOnWords(UINT64 *state)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromState(A, state)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
unsigned int j;
for(j=0; j<laneCount; j++)
state[j] ^= input[j];
copyFromState(A, state)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#ifdef ProvideFast576
void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor576bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast832
void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor832bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1024
void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1024bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1088
void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1088bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1152
void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1152bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1344
void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1344bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
void KeccakInitialize()
{
}
void KeccakInitializeState(unsigned char *state)
{
memset(state, 0, 200);
#ifdef UseBebigokimisa
((UINT64*)state)[ 1] = ~(UINT64)0;
((UINT64*)state)[ 2] = ~(UINT64)0;
((UINT64*)state)[ 8] = ~(UINT64)0;
((UINT64*)state)[12] = ~(UINT64)0;
((UINT64*)state)[17] = ~(UINT64)0;
((UINT64*)state)[20] = ~(UINT64)0;
#endif
}
void KeccakPermutation(unsigned char *state)
{
// We assume the state is always stored as words
KeccakPermutationOnWords((UINT64*)state);
}
void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
{
unsigned int i;
*word = 0;
for(i=0; i<(64/8); i++)
*word |= (UINT64)(bytes[i]) << (8*i);
}
#ifdef ProvideFast576
void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[9];
unsigned int i;
for(i=0; i<9; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast832
void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[13];
unsigned int i;
for(i=0; i<13; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1024
void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[16];
unsigned int i;
for(i=0; i<16; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1088
void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[17];
unsigned int i;
for(i=0; i<17; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1152
void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[18];
unsigned int i;
for(i=0; i<18; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1344
void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[21];
unsigned int i;
for(i=0; i<21; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
#endif
}
#endif
void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
#else
UINT64 dataAsWords[25];
unsigned int i;
for(i=0; i<laneCount; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
#endif
}
void fromWordToBytes(UINT8 *bytes, const UINT64 word)
{
unsigned int i;
for(i=0; i<(64/8); i++)
bytes[i] = (word >> (8*i)) & 0xFF;
}
#ifdef ProvideFast1024
void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
memcpy(data, state, 128);
#else
unsigned int i;
for(i=0; i<16; i++)
fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
#endif
#ifdef UseBebigokimisa
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
((UINT64*)data)[12] = ~((UINT64*)data)[12];
#endif
}
#endif
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
memcpy(data, state, laneCount*8);
#else
unsigned int i;
for(i=0; i<laneCount; i++)
fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
#endif
#ifdef UseBebigokimisa
if (laneCount > 1) {
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
if (laneCount > 2) {
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
if (laneCount > 8) {
((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
if (laneCount > 12) {
((UINT64*)data)[12] = ~((UINT64*)data)[12];
if (laneCount > 17) {
((UINT64*)data)[17] = ~((UINT64*)data)[17];
if (laneCount > 20) {
((UINT64*)data)[20] = ~((UINT64*)data)[20];
}
}
}
}
}
}
#endif
}

View file

@ -0,0 +1,651 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V6464 Abage, Abegi, Abigo, Abogu, Abuga; \
V6464 Akame, Akemi, Akimo, Akomu, Akuma; \
V6464 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio, Asae, Asio; \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V64 Asa, Ase, Asi, Aso, Asu; \
V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \
V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \
V64 Bba, Bbe, Bbi, Bbo, Bbu; \
V64 Bga, Bge, Bgi, Bgo, Bgu; \
V64 Bka, Bke, Bki, Bko, Bku; \
V64 Bma, Bme, Bmi, Bmo, Bmu; \
V64 Bsa, Bse, Bsi, Bso, Bsu; \
V128 Cae, Cei, Cio, Cou, Cua, Dei, Dou; \
V64 Ca, Ce, Ci, Co, Cu; \
V64 Da, De, Di, Do, Du; \
V6464 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \
V6464 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \
V64 Eba, Ebe, Ebi, Ebo, Ebu; \
V64 Ega, Ege, Egi, Ego, Egu; \
V64 Eka, Eke, Eki, Eko, Eku; \
V64 Ema, Eme, Emi, Emo, Emu; \
V64 Esa, Ese, Esi, Eso, Esu; \
V128 Zero;
#define prepareTheta
#define computeD \
Cua = GET64LOLO(Cu, Cae); \
Dei = XOR128(Cae, ROL64in128(Cio, 1)); \
Dou = XOR128(Cio, ROL64in128(Cua, 1)); \
Da = XOR64(Cu, ROL64in128(COPY64HI2LO(Cae), 1)); \
De = Dei; \
Di = COPY64HI2LO(Dei); \
Do = Dou; \
Du = COPY64HI2LO(Dou);
// --- Theta Rho Pi Chi Iota Prepare-theta
// --- 64-bit lanes mapped to 64-bit and 128-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
computeD \
\
A##ba = LOAD64(A##bage.v64[0]); \
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
Bbage = GET64LOLO(Bba, Bge); \
A##ge = LOAD64(A##bage.v64[1]); \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
A##ka = LOAD64(A##kame.v64[0]); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
Bbegi = GET64LOLO(Bbe, Bgi); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
A##me = LOAD64(A##kame.v64[1]); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
Bbigo = GET64LOLO(Bbi, Bgo); \
E##bage.v128 = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \
XOReq128(E##bage.v128, CONST64(KeccakF1600RoundConstants[i])); \
Cae = E##bage.v128; \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
Bbogu = GET64LOLO(Bbo, Bgu); \
E##begi.v128 = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \
Cei = E##begi.v128; \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
Bbuga = GET64LOLO(Bbu, Bga); \
E##bigo.v128 = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \
E##bi = E##bigo.v128; \
E##go = GET64HIHI(E##bigo.v128, E##bigo.v128); \
Cio = E##bigo.v128; \
E##bogu.v128 = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \
E##bo = E##bogu.v128; \
E##gu = GET64HIHI(E##bogu.v128, E##bogu.v128); \
Cou = E##bogu.v128; \
E##buga.v128 = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \
E##bu = E##buga.v128; \
E##ga = GET64HIHI(E##buga.v128, E##buga.v128); \
Cua = E##buga.v128; \
\
A##be = LOAD64(A##begi.v64[0]); \
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
Bkame = GET64LOLO(Bka, Bme); \
A##gi = LOAD64(A##begi.v64[1]); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
A##ke = LOAD64(A##kemi.v64[0]); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
Bkemi = GET64LOLO(Bke, Bmi); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
A##mi = LOAD64(A##kemi.v64[1]); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
Bkimo = GET64LOLO(Bki, Bmo); \
E##kame.v128 = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \
XOReq128(Cae, E##kame.v128); \
Bkomu = GET64LOLO(XOR64(A##mu, Du), XOR64(A##so, Do)); \
Bkomu = SHUFFLEBYTES128(Bkomu, CONST128(rho8_56)); \
E##kemi.v128 = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \
XOReq128(Cei, E##kemi.v128); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
Bkuma = GET64LOLO(Bku, Bma); \
E##kimo.v128 = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \
E##ki = E##kimo.v128; \
E##mo = GET64HIHI(E##kimo.v128, E##kimo.v128); \
XOReq128(Cio, E##kimo.v128); \
E##komu.v128 = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \
E##ko = E##komu.v128; \
E##mu = GET64HIHI(E##komu.v128, E##komu.v128); \
XOReq128(Cou, E##komu.v128); \
E##kuma.v128 = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \
E##ku = E##kuma.v128; \
E##ma = GET64HIHI(E##kuma.v128, E##kuma.v128); \
XOReq128(Cua, E##kuma.v128); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
Ca = E##sa; \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
Ce = E##se; \
XOReq128(Cae, GET64LOLO(Ca, Ce)); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
Ci = E##si; \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
Co = E##so; \
XOReq128(Cio, GET64LOLO(Ci, Co)); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
Cu = E##su; \
\
Zero = ZERO128(); \
XOReq128(Cae, GET64HIHI(Cua, Zero)); \
XOReq128(Cae, GET64LOLO(Zero, Cei)); \
XOReq128(Cio, GET64HIHI(Cei, Zero)); \
XOReq128(Cio, GET64LOLO(Zero, Cou)); \
XOReq128(Cua, GET64HIHI(Cou, Zero)); \
XOReq64(Cu, Cua); \
// --- Theta Rho Pi Chi Iota
// --- 64-bit lanes mapped to 64-bit and 128-bit words
#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = LOAD64(state[ 9]); \
XOReq64(Cu, X##gu); \
X##kae.v128 = LOAD128(state[10]); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = LOAD128(state[12]); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor832bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD64(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD64(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
XOReq64(Cu, X##mu); \
X##sae.v128 = XOR128(LOAD128(state[20]), LOAD64(input[20])); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromState(X, state) \
X##bae.v128 = LOAD128(state[ 0]); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = LOAD128(state[ 2]); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = LOAD64(state[ 4]); \
Cu = X##bu; \
X##gae.v128 = LOAD128u(state[ 5]); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = LOAD128u(state[ 7]); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = LOAD64(state[ 9]); \
XOReq64(Cu, X##gu); \
X##kae.v128 = LOAD128(state[10]); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = LOAD128(state[12]); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyToState(state, X) \
state[ 0] = A##bage.v64[0]; \
state[ 1] = A##begi.v64[0]; \
STORE64(state[ 2], X##bi); \
STORE64(state[ 3], X##bo); \
STORE64(state[ 4], X##bu); \
STORE64(state[ 5], X##ga); \
state[ 6] = A##bage.v64[1]; \
state[ 7] = A##begi.v64[1]; \
STORE64(state[ 8], X##go); \
STORE64(state[ 9], X##gu); \
state[10] = X##kame.v64[0]; \
state[11] = X##kemi.v64[0]; \
STORE64(state[12], X##ki); \
STORE64(state[13], X##ko); \
STORE64(state[14], X##ku); \
STORE64(state[15], X##ma); \
state[16] = X##kame.v64[1]; \
state[17] = X##kemi.v64[1]; \
STORE64(state[18], X##mo); \
STORE64(state[19], X##mu); \
STORE64(state[20], X##sa); \
STORE64(state[21], X##se); \
STORE64(state[22], X##si); \
STORE64(state[23], X##so); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##bage = Y##bage; \
X##begi = Y##begi; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##go = Y##go; \
X##gu = Y##gu; \
X##kame = Y##kame; \
X##kemi = Y##kemi; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

View file

@ -0,0 +1,517 @@
/*
Code automatically generated by KeccakTools!
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V64 Asa, Ase, Asi, Aso, Asu; \
V64 Bba, Bbe, Bbi, Bbo, Bbu; \
V64 Bga, Bge, Bgi, Bgo, Bgu; \
V64 Bka, Bke, Bki, Bko, Bku; \
V64 Bma, Bme, Bmi, Bmo, Bmu; \
V64 Bsa, Bse, Bsi, Bso, Bsu; \
V64 Ca, Ce, Ci, Co, Cu; \
V64 Da, De, Di, Do, Du; \
V64 Eba, Ebe, Ebi, Ebo, Ebu; \
V64 Ega, Ege, Egi, Ego, Egu; \
V64 Eka, Eke, Eki, Eko, Eku; \
V64 Ema, Eme, Emi, Emo, Emu; \
V64 Esa, Ese, Esi, Eso, Esu; \
#define prepareTheta \
Ca = XOR64(Aba, XOR64(Aga, XOR64(Aka, XOR64(Ama, Asa)))); \
Ce = XOR64(Abe, XOR64(Age, XOR64(Ake, XOR64(Ame, Ase)))); \
Ci = XOR64(Abi, XOR64(Agi, XOR64(Aki, XOR64(Ami, Asi)))); \
Co = XOR64(Abo, XOR64(Ago, XOR64(Ako, XOR64(Amo, Aso)))); \
Cu = XOR64(Abu, XOR64(Agu, XOR64(Aku, XOR64(Amu, Asu)))); \
// --- Code for round, with prepare-theta
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = XOR64(Cu, ROL64(Ce, 1)); \
De = XOR64(Ca, ROL64(Ci, 1)); \
Di = XOR64(Ce, ROL64(Co, 1)); \
Do = XOR64(Ci, ROL64(Cu, 1)); \
Du = XOR64(Co, ROL64(Ca, 1)); \
\
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
Ca = E##ba; \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
Ce = E##be; \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
Ci = E##bi; \
E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
Co = E##bo; \
E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
Cu = E##bu; \
\
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
XOReq64(Ca, E##ga); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
XOReq64(Ce, E##ge); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
XOReq64(Ci, E##gi); \
E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
XOReq64(Co, E##go); \
E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
XOReq64(Cu, E##gu); \
\
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
XOReq64(Ca, E##ka); \
XOReq64(A##mu, Du); \
Bko = ROL64(A##mu, 8); \
E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
XOReq64(Ce, E##ke); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
XOReq64(Ci, E##ki); \
E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
XOReq64(Co, E##ko); \
E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
XOReq64(Cu, E##ku); \
\
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
XOReq64(Ca, E##ma); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
XOReq64(Ce, E##me); \
XOReq64(A##so, Do); \
Bmu = ROL64(A##so, 56); \
E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
XOReq64(Ci, E##mi); \
E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
XOReq64(Co, E##mo); \
E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
XOReq64(Cu, E##mu); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
XOReq64(Ca, E##sa); \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
XOReq64(Ce, E##se); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
XOReq64(Ci, E##si); \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
XOReq64(Co, E##so); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
XOReq64(Cu, E##su); \
\
// --- Code for round
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIota(i, A, E) \
Da = XOR64(Cu, ROL64(Ce, 1)); \
De = XOR64(Ca, ROL64(Ci, 1)); \
Di = XOR64(Ce, ROL64(Co, 1)); \
Do = XOR64(Ci, ROL64(Cu, 1)); \
Du = XOR64(Co, ROL64(Ca, 1)); \
\
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
\
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
\
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
XOReq64(A##mu, Du); \
Bko = ROL64(A##mu, 8); \
E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
\
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
XOReq64(A##so, Do); \
Bmu = ROL64(A##so, 56); \
E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
\
const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = LOAD64(state[ 9]); \
X##ka = LOAD64(state[10]); \
X##ke = LOAD64(state[11]); \
X##ki = LOAD64(state[12]); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor832bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
X##mo = XOR64(LOAD64(state[18]), LOAD64(input[18])); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
X##sa = XOR64(LOAD64(state[20]), LOAD64(input[20])); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromState(X, state) \
X##ba = LOAD64(state[ 0]); \
X##be = LOAD64(state[ 1]); \
X##bi = LOAD64(state[ 2]); \
X##bo = LOAD64(state[ 3]); \
X##bu = LOAD64(state[ 4]); \
X##ga = LOAD64(state[ 5]); \
X##ge = LOAD64(state[ 6]); \
X##gi = LOAD64(state[ 7]); \
X##go = LOAD64(state[ 8]); \
X##gu = LOAD64(state[ 9]); \
X##ka = LOAD64(state[10]); \
X##ke = LOAD64(state[11]); \
X##ki = LOAD64(state[12]); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyToState(state, X) \
STORE64(state[ 0], X##ba); \
STORE64(state[ 1], X##be); \
STORE64(state[ 2], X##bi); \
STORE64(state[ 3], X##bo); \
STORE64(state[ 4], X##bu); \
STORE64(state[ 5], X##ga); \
STORE64(state[ 6], X##ge); \
STORE64(state[ 7], X##gi); \
STORE64(state[ 8], X##go); \
STORE64(state[ 9], X##gu); \
STORE64(state[10], X##ka); \
STORE64(state[11], X##ke); \
STORE64(state[12], X##ki); \
STORE64(state[13], X##ko); \
STORE64(state[14], X##ku); \
STORE64(state[15], X##ma); \
STORE64(state[16], X##me); \
STORE64(state[17], X##mi); \
STORE64(state[18], X##mo); \
STORE64(state[19], X##mu); \
STORE64(state[20], X##sa); \
STORE64(state[21], X##se); \
STORE64(state[22], X##si); \
STORE64(state[23], X##so); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##ba = Y##ba; \
X##be = Y##be; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##ge = Y##ge; \
X##gi = Y##gi; \
X##go = Y##go; \
X##gu = Y##gu; \
X##ka = Y##ka; \
X##ke = Y##ke; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##me = Y##me; \
X##mi = Y##mi; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

View file

@ -0,0 +1,124 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#if (Unrolling == 24)
#define rounds \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(10, A, E) \
thetaRhoPiChiIotaPrepareTheta(11, E, A) \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
copyToState(state, A)
#elif (Unrolling == 12)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=12) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 8)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=8) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+7, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 6)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 4)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 3)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \
copyToState(state, A)
#elif (Unrolling == 2)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 1)
#define rounds \
prepareTheta \
for(i=0; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
copyToState(state, A)
#else
#error "Unrolling is not correctly specified!"
#endif

View file

@ -0,0 +1,62 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by Ronny Van Keer,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakF-1600-interface.h"
#define UseBebigokimisa
typedef unsigned char UINT8;
typedef unsigned long long int UINT64;
void KeccakInitialize()
{
}
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
memcpy(data, state, laneCount*8);
#ifdef UseBebigokimisa
if (laneCount > 8)
{
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
if (laneCount > 12)
{
((UINT64*)data)[12] = ~((UINT64*)data)[12];
if (laneCount > 17)
{
((UINT64*)data)[17] = ~((UINT64*)data)[17];
if (laneCount > 20)
{
((UINT64*)data)[20] = ~((UINT64*)data)[20];
}
}
}
}
else
{
if (laneCount > 1)
{
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
if (laneCount > 2)
{
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
}
}
}
#endif
}

View file

@ -0,0 +1,766 @@
#
# The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
# Michaël Peeters and Gilles Van Assche. For more information, feedback or
# questions, please refer to our website: http://keccak.noekeon.org/
#
# Implementation by Ronny Van Keer,
# hereby denoted as "the implementer".
#
# To the extent possible under law, the implementer has waived all copyright
# and related or neighboring rights to the source code in this file.
# http://creativecommons.org/publicdomain/zero/1.0/
#
.text
#// --- defines
.equ UseSIMD, 1
.equ _ba, 0*8
.equ _be, 1*8
.equ _bi, 2*8
.equ _bo, 3*8
.equ _bu, 4*8
.equ _ga, 5*8
.equ _ge, 6*8
.equ _gi, 7*8
.equ _go, 8*8
.equ _gu, 9*8
.equ _ka, 10*8
.equ _ke, 11*8
.equ _ki, 12*8
.equ _ko, 13*8
.equ _ku, 14*8
.equ _ma, 15*8
.equ _me, 16*8
.equ _mi, 17*8
.equ _mo, 18*8
.equ _mu, 19*8
.equ _sa, 20*8
.equ _se, 21*8
.equ _si, 22*8
.equ _so, 23*8
.equ _su, 24*8
# arguments
.equ apState, %rdi
.equ apInput, %rsi
.equ aNbrWords, %rdx
# xor input into state section
.equ xpState, %r9
# round vars
.equ rT1, %rax
.equ rpState, %rdi
.equ rpStack, %rsp
.equ rDa, %rbx
.equ rDe, %rcx
.equ rDi, %rdx
.equ rDo, %r8
.equ rDu, %r9
.equ rBa, %r10
.equ rBe, %r11
.equ rBi, %r12
.equ rBo, %r13
.equ rBu, %r14
.equ rCa, %rsi
.equ rCe, %rbp
.equ rCi, rBi
.equ rCo, rBo
.equ rCu, %r15
.macro mKeccakRound iState, oState, rc, lastRound
movq rCe, rDa
rolq rDa
movq _bi(\iState), rCi
xorq _gi(\iState), rDi
xorq rCu, rDa
xorq _ki(\iState), rCi
xorq _mi(\iState), rDi
xorq rDi, rCi
movq rCi, rDe
rolq rDe
movq _bo(\iState), rCo
xorq _go(\iState), rDo
xorq rCa, rDe
xorq _ko(\iState), rCo
xorq _mo(\iState), rDo
xorq rDo, rCo
movq rCo, rDi
rolq rDi
movq rCu, rDo
xorq rCe, rDi
rolq rDo
movq rCa, rDu
xorq rCi, rDo
rolq rDu
movq _ba(\iState), rBa
movq _ge(\iState), rBe
xorq rCo, rDu
movq _ki(\iState), rBi
movq _mo(\iState), rBo
movq _su(\iState), rBu
xorq rDe, rBe
rolq $44, rBe
xorq rDi, rBi
xorq rDa, rBa
rolq $43, rBi
movq rBe, rCa
movq $\rc, rT1
orq rBi, rCa
xorq rBa, rT1
xorq rT1, rCa
movq rCa, _ba(\oState)
xorq rDu, rBu
rolq $14, rBu
movq rBa, rCu
andq rBe, rCu
xorq rBu, rCu
movq rCu, _bu(\oState)
xorq rDo, rBo
rolq $21, rBo
movq rBo, rT1
andq rBu, rT1
xorq rBi, rT1
movq rT1, _bi(\oState)
notq rBi
orq rBa, rBu
orq rBo, rBi
xorq rBo, rBu
xorq rBe, rBi
movq rBu, _bo(\oState)
movq rBi, _be(\oState)
.if \lastRound == 0
movq rBi, rCe
.endif
movq _gu(\iState), rBe
xorq rDu, rBe
movq _ka(\iState), rBi
rolq $20, rBe
xorq rDa, rBi
rolq $3, rBi
movq _bo(\iState), rBa
movq rBe, rT1
orq rBi, rT1
xorq rDo, rBa
movq _me(\iState), rBo
movq _si(\iState), rBu
rolq $28, rBa
xorq rBa, rT1
movq rT1, _ga(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDe, rBo
rolq $45, rBo
movq rBi, rT1
andq rBo, rT1
xorq rBe, rT1
movq rT1, _ge(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDi, rBu
rolq $61, rBu
movq rBu, rT1
orq rBa, rT1
xorq rBo, rT1
movq rT1, _go(\oState)
andq rBe, rBa
xorq rBu, rBa
movq rBa, _gu(\oState)
notq rBu
.if \lastRound == 0
xorq rBa, rCu
.endif
orq rBu, rBo
xorq rBi, rBo
movq rBo, _gi(\oState)
movq _be(\iState), rBa
movq _gi(\iState), rBe
movq _ko(\iState), rBi
movq _mu(\iState), rBo
movq _sa(\iState), rBu
xorq rDi, rBe
rolq $6, rBe
xorq rDo, rBi
rolq $25, rBi
movq rBe, rT1
orq rBi, rT1
xorq rDe, rBa
rolq $1, rBa
xorq rBa, rT1
movq rT1, _ka(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDu, rBo
rolq $8, rBo
movq rBi, rT1
andq rBo, rT1
xorq rBe, rT1
movq rT1, _ke(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDa, rBu
rolq $18, rBu
notq rBo
movq rBo, rT1
andq rBu, rT1
xorq rBi, rT1
movq rT1, _ki(\oState)
movq rBu, rT1
orq rBa, rT1
xorq rBo, rT1
movq rT1, _ko(\oState)
andq rBe, rBa
xorq rBu, rBa
movq rBa, _ku(\oState)
.if \lastRound == 0
xorq rBa, rCu
.endif
movq _ga(\iState), rBe
xorq rDa, rBe
movq _ke(\iState), rBi
rolq $36, rBe
xorq rDe, rBi
movq _bu(\iState), rBa
rolq $10, rBi
movq rBe, rT1
movq _mi(\iState), rBo
andq rBi, rT1
xorq rDu, rBa
movq _so(\iState), rBu
rolq $27, rBa
xorq rBa, rT1
movq rT1, _ma(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDi, rBo
rolq $15, rBo
movq rBi, rT1
orq rBo, rT1
xorq rBe, rT1
movq rT1, _me(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDo, rBu
rolq $56, rBu
notq rBo
movq rBo, rT1
orq rBu, rT1
xorq rBi, rT1
movq rT1, _mi(\oState)
orq rBa, rBe
xorq rBu, rBe
movq rBe, _mu(\oState)
andq rBa, rBu
xorq rBo, rBu
movq rBu, _mo(\oState)
.if \lastRound == 0
xorq rBe, rCu
.endif
movq _bi(\iState), rBa
movq _go(\iState), rBe
movq _ku(\iState), rBi
xorq rDi, rBa
movq _ma(\iState), rBo
rolq $62, rBa
xorq rDo, rBe
movq _se(\iState), rBu
rolq $55, rBe
xorq rDu, rBi
movq rBa, rDu
xorq rDe, rBu
rolq $2, rBu
andq rBe, rDu
xorq rBu, rDu
movq rDu, _su(\oState)
rolq $39, rBi
.if \lastRound == 0
xorq rDu, rCu
.endif
notq rBe
xorq rDa, rBo
movq rBe, rDa
andq rBi, rDa
xorq rBa, rDa
movq rDa, _sa(\oState)
.if \lastRound == 0
xor rDa, rCa
.endif
rolq $41, rBo
movq rBi, rDe
orq rBo, rDe
xorq rBe, rDe
movq rDe, _se(\oState)
.if \lastRound == 0
xorq rDe, rCe
.endif
movq rBo, rDi
movq rBu, rDo
andq rBu, rDi
orq rBa, rDo
xorq rBi, rDi
xorq rBo, rDo
movq rDi, _si(\oState)
movq rDo, _so(\oState)
.endm
.macro mKeccakPermutation
subq $8*25, %rsp
movq _ba(rpState), rCa
movq _be(rpState), rCe
movq _bu(rpState), rCu
xorq _ga(rpState), rCa
xorq _ge(rpState), rCe
xorq _gu(rpState), rCu
xorq _ka(rpState), rCa
xorq _ke(rpState), rCe
xorq _ku(rpState), rCu
xorq _ma(rpState), rCa
xorq _me(rpState), rCe
xorq _mu(rpState), rCu
xorq _sa(rpState), rCa
xorq _se(rpState), rCe
movq _si(rpState), rDi
movq _so(rpState), rDo
xorq _su(rpState), rCu
mKeccakRound rpState, rpStack, 0x0000000000000001, 0
mKeccakRound rpStack, rpState, 0x0000000000008082, 0
mKeccakRound rpState, rpStack, 0x800000000000808a, 0
mKeccakRound rpStack, rpState, 0x8000000080008000, 0
mKeccakRound rpState, rpStack, 0x000000000000808b, 0
mKeccakRound rpStack, rpState, 0x0000000080000001, 0
mKeccakRound rpState, rpStack, 0x8000000080008081, 0
mKeccakRound rpStack, rpState, 0x8000000000008009, 0
mKeccakRound rpState, rpStack, 0x000000000000008a, 0
mKeccakRound rpStack, rpState, 0x0000000000000088, 0
mKeccakRound rpState, rpStack, 0x0000000080008009, 0
mKeccakRound rpStack, rpState, 0x000000008000000a, 0
mKeccakRound rpState, rpStack, 0x000000008000808b, 0
mKeccakRound rpStack, rpState, 0x800000000000008b, 0
mKeccakRound rpState, rpStack, 0x8000000000008089, 0
mKeccakRound rpStack, rpState, 0x8000000000008003, 0
mKeccakRound rpState, rpStack, 0x8000000000008002, 0
mKeccakRound rpStack, rpState, 0x8000000000000080, 0
mKeccakRound rpState, rpStack, 0x000000000000800a, 0
mKeccakRound rpStack, rpState, 0x800000008000000a, 0
mKeccakRound rpState, rpStack, 0x8000000080008081, 0
mKeccakRound rpStack, rpState, 0x8000000000008080, 0
mKeccakRound rpState, rpStack, 0x0000000080000001, 0
mKeccakRound rpStack, rpState, 0x8000000080008008, 1
addq $8*25, %rsp
.endm
.macro mPushRegs
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.endm
.macro mPopRegs
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.endm
.macro mXorState128 input, state, offset
.if UseSIMD == 0
movq \offset(\input), %rax
movq \offset+8(\input), %rcx
xorq %rax, \offset(\state)
xorq %rcx, \offset+8(\state)
.else
movdqu \offset(\input), %xmm0
pxor \offset(\state), %xmm0
movdqu %xmm0, \offset(\state)
.endif
.endm
.macro mXorState256 input, state, offset
.if UseSIMD == 0
movq \offset(\input), %rax
movq \offset+8(\input), %r10
movq \offset+16(\input), %rcx
movq \offset+24(\input), %r8
xorq %rax, \offset(\state)
xorq %r10, \offset+8(\state)
xorq %rcx, \offset+16(\state)
xorq %r8, \offset+24(\state)
.else
movdqu \offset(\input), %xmm0
pxor \offset(\state), %xmm0
movdqu \offset+16(\input), %xmm1
pxor \offset+16(\state), %xmm1
movdqu %xmm0, \offset(\state)
movdqu %xmm1, \offset+16(\state)
.endif
.endm
.macro mXorState512 input, state, offset
.if UseSIMD == 0
mXorState256 \input, \state, \offset
mXorState256 \input, \state, \offset+32
.else
movdqu \offset(\input), %xmm0
movdqu \offset+16(\input), %xmm1
pxor \offset(\state), %xmm0
movdqu \offset+32(\input), %xmm2
pxor \offset+16(\state), %xmm1
movdqu %xmm0, \offset(\state)
movdqu \offset+48(\input), %xmm3
pxor \offset+32(\state), %xmm2
movdqu %xmm1, \offset+16(\state)
pxor \offset+48(\state), %xmm3
movdqu %xmm2, \offset+32(\state)
movdqu %xmm3, \offset+48(\state)
.endif
.endm
# -------------------------------------------------------------------------
.size KeccakPermutation, .-KeccakPermutation
.align 2
.global KeccakPermutation
.type KeccakPermutation, %function
KeccakPermutation:
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb576bits, .-KeccakAbsorb576bits
.align 2
.global KeccakAbsorb576bits
.type KeccakAbsorb576bits, %function
KeccakAbsorb576bits:
mXorState512 apInput, apState, 0
movq 64(apInput), %rax
xorq %rax, 64(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb832bits, .-KeccakAbsorb832bits
.align 2
.global KeccakAbsorb832bits
.type KeccakAbsorb832bits, %function
KeccakAbsorb832bits:
mXorState512 apInput, apState, 0
mXorState256 apInput, apState, 64
movq 96(apInput), %rax
xorq %rax, 96(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1024bits, .-KeccakAbsorb1024bits
.align 2
.global KeccakAbsorb1024bits
.type KeccakAbsorb1024bits, %function
KeccakAbsorb1024bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1088bits, .-KeccakAbsorb1088bits
.align 2
.global KeccakAbsorb1088bits
.type KeccakAbsorb1088bits, %function
KeccakAbsorb1088bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
movq 128(apInput), %rax
xorq %rax, 128(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1152bits, .-KeccakAbsorb1152bits
.align 2
.global KeccakAbsorb1152bits
.type KeccakAbsorb1152bits, %function
KeccakAbsorb1152bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mXorState128 apInput, apState, 128
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1344bits, .-KeccakAbsorb1344bits
.align 2
.global KeccakAbsorb1344bits
.type KeccakAbsorb1344bits, %function
KeccakAbsorb1344bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mXorState256 apInput, apState, 128
movq 160(apInput), %rax
xorq %rax, 160(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb, .-KeccakAbsorb
.align 2
.global KeccakAbsorb
.type KeccakAbsorb, %function
KeccakAbsorb:
movq apState, xpState
test $16, aNbrWords
jz xorInputToState8
mXorState512 apInput, xpState, 0
mXorState512 apInput, xpState, 64
addq $128, apInput
addq $128, xpState
xorInputToState8:
test $8, aNbrWords
jz xorInputToState4
mXorState512 apInput, xpState, 0
addq $64, apInput
addq $64, xpState
xorInputToState4:
test $4, aNbrWords
jz xorInputToState2
mXorState256 apInput, xpState, 0
addq $32, apInput
addq $32, xpState
xorInputToState2:
test $2, aNbrWords
jz xorInputToState1
mXorState128 apInput, xpState, 0
addq $16, apInput
addq $16, xpState
xorInputToState1:
test $1, aNbrWords
jz xorInputToStateDone
movq (apInput), %rax
xorq %rax, (xpState)
xorInputToStateDone:
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakInitializeState, .-KeccakInitializeState
.align 2
.global KeccakInitializeState
.type KeccakInitializeState, %function
KeccakInitializeState:
xorq %rax, %rax
xorq %rcx, %rcx
notq %rcx
.if UseSIMD == 0
movq %rax, 0*8(apState)
movq %rcx, 1*8(apState)
movq %rcx, 2*8(apState)
movq %rax, 3*8(apState)
movq %rax, 4*8(apState)
movq %rax, 5*8(apState)
movq %rax, 6*8(apState)
movq %rax, 7*8(apState)
movq %rcx, 8*8(apState)
movq %rax, 9*8(apState)
movq %rax, 10*8(apState)
movq %rax, 11*8(apState)
movq %rcx, 12*8(apState)
movq %rax, 13*8(apState)
movq %rax, 14*8(apState)
movq %rax, 15*8(apState)
movq %rax, 16*8(apState)
movq %rcx, 17*8(apState)
movq %rax, 18*8(apState)
movq %rax, 19*8(apState)
movq %rcx, 20*8(apState)
movq %rax, 21*8(apState)
movq %rax, 22*8(apState)
movq %rax, 23*8(apState)
movq %rax, 24*8(apState)
.else
pxor %xmm0, %xmm0
movq %rax, 0*8(apState)
movq %rcx, 1*8(apState)
movq %rcx, 2*8(apState)
movq %rax, 3*8(apState)
movdqu %xmm0, 4*8(apState)
movdqu %xmm0, 6*8(apState)
movq %rcx, 8*8(apState)
movq %rax, 9*8(apState)
movdqu %xmm0, 10*8(apState)
movq %rcx, 12*8(apState)
movq %rax, 13*8(apState)
movdqu %xmm0, 14*8(apState)
movq %rax, 16*8(apState)
movq %rcx, 17*8(apState)
movdqu %xmm0, 18*8(apState)
movq %rcx, 20*8(apState)
movq %rax, 21*8(apState)
movdqu %xmm0, 22*8(apState)
movq %rax, 24*8(apState)
.endif
ret
# -------------------------------------------------------------------------
.size KeccakExtract1024bits, .-KeccakExtract1024bits
.align 2
.global KeccakExtract1024bits
.type KeccakExtract1024bits, %function
KeccakExtract1024bits:
movq 0*8(apState), %rax
movq 1*8(apState), %rcx
movq 2*8(apState), %rdx
movq 3*8(apState), %r8
notq %rcx
notq %rdx
movq %rax, 0*8(%rsi)
movq %rcx, 1*8(%rsi)
movq %rdx, 2*8(%rsi)
movq %r8, 3*8(%rsi)
movq 4*8(apState), %rax
movq 5*8(apState), %rcx
movq 6*8(apState), %rdx
movq 7*8(apState), %r8
movq %rax, 4*8(%rsi)
movq %rcx, 5*8(%rsi)
movq %rdx, 6*8(%rsi)
movq %r8, 7*8(%rsi)
movq 8*8(apState), %rax
movq 9*8(apState), %rcx
movq 10*8(apState), %rdx
movq 11*8(apState), %r8
notq %rax
movq %rax, 8*8(%rsi)
movq %rcx, 9*8(%rsi)
movq %rdx, 10*8(%rsi)
movq %r8, 11*8(%rsi)
movq 12*8(apState), %rax
movq 13*8(apState), %rcx
movq 14*8(apState), %rdx
movq 15*8(apState), %r8
notq %rax
movq %rax, 12*8(%rsi)
movq %rcx, 13*8(%rsi)
movq %rdx, 14*8(%rsi)
movq %r8, 15*8(%rsi)
ret

View file

@ -0,0 +1,81 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakNISTInterface.h"
#include "KeccakF-1600-interface.h"
HashReturn Keccak_Init(hashState *state, int hashbitlen)
{
switch(hashbitlen) {
case 0: // Default parameters, arbitrary length output
InitSponge((spongeState*)state, 1024, 576);
break;
case 224:
InitSponge((spongeState*)state, 1152, 448);
break;
case 256:
InitSponge((spongeState*)state, 1088, 512);
break;
case 384:
InitSponge((spongeState*)state, 832, 768);
break;
case 512:
InitSponge((spongeState*)state, 576, 1024);
break;
default:
return BAD_HASHLEN;
}
state->fixedOutputLength = hashbitlen;
return SUCCESS;
}
HashReturn Keccak_Update(hashState *state, const BitSequence *data, DataLength databitlen)
{
if ((databitlen % 8) == 0)
return Absorb((spongeState*)state, data, databitlen);
else {
HashReturn ret = Absorb((spongeState*)state, data, databitlen - (databitlen % 8));
if (ret == SUCCESS) {
unsigned char lastByte;
// Align the last partial byte to the least significant bits
lastByte = data[databitlen/8] >> (8 - (databitlen % 8));
return Absorb((spongeState*)state, &lastByte, databitlen % 8);
}
else
return ret;
}
}
HashReturn Keccak_Final(hashState *state, BitSequence *hashval)
{
return Squeeze(state, hashval, state->fixedOutputLength);
}
HashReturn Keccak_Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
{
hashState state;
HashReturn result;
if ((hashbitlen != 224) && (hashbitlen != 256) && (hashbitlen != 384) && (hashbitlen != 512))
return BAD_HASHLEN; // Only the four fixed output lengths available through this API
result = Keccak_Init(&state, hashbitlen);
if (result != SUCCESS)
return result;
result = Keccak_Update(&state, data, databitlen);
if (result != SUCCESS)
return result;
result = Keccak_Final(&state, hashval);
return result;
}

View file

@ -0,0 +1,70 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakNISTInterface_h_
#define _KeccakNISTInterface_h_
#include "KeccakSponge.h"
typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn;
typedef spongeState hashState;
/**
* Function to initialize the state of the Keccak[r, c] sponge function.
* The rate r and capacity c values are determined from @a hashbitlen.
* @param state Pointer to the state of the sponge function to be initialized.
* @param hashbitlen The desired number of output bits,
* or 0 for Keccak[] with default parameters
* and arbitrarily-long output.
* @pre The value of hashbitlen must be one of 0, 224, 256, 384 and 512.
* @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect.
*/
HashReturn Keccak_Init(hashState *state, int hashbitlen);
/**
* Function to give input data for the sponge function to absorb.
* @param state Pointer to the state of the sponge function initialized by Init().
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the most significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @pre In the previous call to Absorb(), databitLen was a multiple of 8.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_Update(hashState *state, const BitSequence *data, DataLength databitlen);
/**
* Function to squeeze output data from the sponge function.
* If @a hashbitlen was not 0 in the call to Init(), the number of output bits is equal to @a hashbitlen.
* If @a hashbitlen was 0 in the call to Init(), the output bits must be extracted using the Squeeze() function.
* @param state Pointer to the state of the sponge function initialized by Init().
* @param hashval Pointer to the buffer where to store the output data.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_Final(hashState *state, BitSequence *hashval);
/**
* Function to compute a hash using the Keccak[r, c] sponge function.
* The rate r and capacity c values are determined from @a hashbitlen.
* @param hashbitlen The desired number of output bits.
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the most significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @param hashval Pointer to the buffer where to store the output data.
* @pre The value of hashbitlen must be one of 224, 256, 384 and 512.
* @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect.
*/
HashReturn Keccak_Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
#endif

266
crypto/keccak/KeccakSponge.c Executable file
View file

@ -0,0 +1,266 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakSponge.h"
#include "KeccakF-1600-interface.h"
#ifdef KeccakReference
#include "displayIntermediateValues.h"
#endif
int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity)
{
if (rate+capacity != 1600)
return 1;
if ((rate <= 0) || (rate >= 1600) || ((rate % 64) != 0))
return 1;
KeccakInitialize();
state->rate = rate;
state->capacity = capacity;
state->fixedOutputLength = 0;
KeccakInitializeState(state->state);
memset(state->dataQueue, 0, KeccakMaximumRateInBytes);
state->bitsInQueue = 0;
state->squeezing = 0;
state->bitsAvailableForSqueezing = 0;
return 0;
}
void AbsorbQueue(spongeState *state)
{
// state->bitsInQueue is assumed to be equal to state->rate
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", state->dataQueue, state->rate/8);
#endif
#ifdef ProvideFast576
if (state->rate == 576)
KeccakAbsorb576bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast832
if (state->rate == 832)
KeccakAbsorb832bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1024
if (state->rate == 1024)
KeccakAbsorb1024bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1088
if (state->rate == 1088)
KeccakAbsorb1088bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1152
if (state->rate == 1152)
KeccakAbsorb1152bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1344
if (state->rate == 1344)
KeccakAbsorb1344bits(state->state, state->dataQueue);
else
#endif
KeccakAbsorb(state->state, state->dataQueue, state->rate/64);
state->bitsInQueue = 0;
}
int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen)
{
unsigned long long i, j, wholeBlocks;
unsigned int partialBlock, partialByte;
const unsigned char *curData;
if ((state->bitsInQueue % 8) != 0)
return 1; // Only the last call may contain a partial byte
if (state->squeezing)
return 1; // Too late for additional input
i = 0;
while(i < databitlen) {
if ((state->bitsInQueue == 0) && (databitlen >= state->rate) && (i <= (databitlen-state->rate))) {
wholeBlocks = (databitlen-i)/state->rate;
curData = data+i/8;
#ifdef ProvideFast576
if (state->rate == 576) {
for(j=0; j<wholeBlocks; j++, curData+=576/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb576bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast832
if (state->rate == 832) {
for(j=0; j<wholeBlocks; j++, curData+=832/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb832bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1024
if (state->rate == 1024) {
for(j=0; j<wholeBlocks; j++, curData+=1024/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1024bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1088
if (state->rate == 1088) {
for(j=0; j<wholeBlocks; j++, curData+=1088/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1088bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1152
if (state->rate == 1152) {
for(j=0; j<wholeBlocks; j++, curData+=1152/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1152bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1344
if (state->rate == 1344) {
for(j=0; j<wholeBlocks; j++, curData+=1344/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1344bits(state->state, curData);
}
}
else
#endif
{
for(j=0; j<wholeBlocks; j++, curData+=state->rate/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb(state->state, curData, state->rate/64);
}
}
i += wholeBlocks*state->rate;
}
else {
partialBlock = (unsigned int)(databitlen - i);
if (partialBlock+state->bitsInQueue > state->rate)
partialBlock = state->rate-state->bitsInQueue;
partialByte = partialBlock % 8;
partialBlock -= partialByte;
memcpy(state->dataQueue+state->bitsInQueue/8, data+i/8, partialBlock/8);
state->bitsInQueue += partialBlock;
i += partialBlock;
if (state->bitsInQueue == state->rate)
AbsorbQueue(state);
if (partialByte > 0) {
unsigned char mask = (1 << partialByte)-1;
state->dataQueue[state->bitsInQueue/8] = data[i/8] & mask;
state->bitsInQueue += partialByte;
i += partialByte;
}
}
}
return 0;
}
void PadAndSwitchToSqueezingPhase(spongeState *state)
{
// Note: the bits are numbered from 0=LSB to 7=MSB
if (state->bitsInQueue + 1 == state->rate) {
state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8);
AbsorbQueue(state);
memset(state->dataQueue, 0, state->rate/8);
}
else {
memset(state->dataQueue + (state->bitsInQueue+7)/8, 0, state->rate/8 - (state->bitsInQueue+7)/8);
state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8);
}
state->dataQueue[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8);
AbsorbQueue(state);
#ifdef KeccakReference
displayText(1, "--- Switching to squeezing phase ---");
#endif
#ifdef ProvideFast1024
if (state->rate == 1024) {
KeccakExtract1024bits(state->state, state->dataQueue);
state->bitsAvailableForSqueezing = 1024;
}
else
#endif
{
KeccakExtract(state->state, state->dataQueue, state->rate/64);
state->bitsAvailableForSqueezing = state->rate;
}
#ifdef KeccakReference
displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8);
#endif
state->squeezing = 1;
}
int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength)
{
unsigned long long i;
unsigned int partialBlock;
if (!state->squeezing)
PadAndSwitchToSqueezingPhase(state);
if ((outputLength % 8) != 0)
return 1; // Only multiple of 8 bits are allowed, truncation can be done at user level
i = 0;
while(i < outputLength) {
if (state->bitsAvailableForSqueezing == 0) {
KeccakPermutation(state->state);
#ifdef ProvideFast1024
if (state->rate == 1024) {
KeccakExtract1024bits(state->state, state->dataQueue);
state->bitsAvailableForSqueezing = 1024;
}
else
#endif
{
KeccakExtract(state->state, state->dataQueue, state->rate/64);
state->bitsAvailableForSqueezing = state->rate;
}
#ifdef KeccakReference
displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8);
#endif
}
partialBlock = state->bitsAvailableForSqueezing;
if ((unsigned long long)partialBlock > outputLength - i)
partialBlock = (unsigned int)(outputLength - i);
memcpy(output+i/8, state->dataQueue+(state->rate-state->bitsAvailableForSqueezing)/8, partialBlock/8);
state->bitsAvailableForSqueezing -= partialBlock;
i += partialBlock;
}
return 0;
}

76
crypto/keccak/KeccakSponge.h Executable file
View file

@ -0,0 +1,76 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakSponge_h_
#define _KeccakSponge_h_
#define KeccakPermutationSize 1600
#define KeccakPermutationSizeInBytes (KeccakPermutationSize/8)
#define KeccakMaximumRate 1536
#define KeccakMaximumRateInBytes (KeccakMaximumRate/8)
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
ALIGN typedef struct spongeStateStruct {
ALIGN unsigned char state[KeccakPermutationSizeInBytes];
ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes];
unsigned int rate;
unsigned int capacity;
unsigned int bitsInQueue;
unsigned int fixedOutputLength;
int squeezing;
unsigned int bitsAvailableForSqueezing;
} spongeState;
/**
* Function to initialize the state of the Keccak[r, c] sponge function.
* The sponge function is set to the absorbing phase.
* @param state Pointer to the state of the sponge function to be initialized.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @pre One must have r+c=1600 and the rate a multiple of 64 bits in this implementation.
* @return Zero if successful, 1 otherwise.
*/
int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity);
/**
* Function to give input data for the sponge function to absorb.
* @param state Pointer to the state of the sponge function initialized by InitSponge().
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the least significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @pre In the previous call to Absorb(), databitLen was a multiple of 8.
* @pre The sponge function must be in the absorbing phase,
* i.e., Squeeze() must not have been called before.
* @return Zero if successful, 1 otherwise.
*/
int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen);
/**
* Function to squeeze output data from the sponge function.
* If the sponge function was in the absorbing phase, this function
* switches it to the squeezing phase.
* @param state Pointer to the state of the sponge function initialized by InitSponge().
* @param output Pointer to the buffer where to store the output data.
* @param outputLength The number of output bits desired.
* It must be a multiple of 8.
* @return Zero if successful, 1 otherwise.
*/
int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength);
#endif

142
crypto/keccak/brg_endian.h Executable file
View file

@ -0,0 +1,142 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
*/
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
#endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( _LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if !defined(PLATFORM_BYTE_ORDER)
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# else
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#else
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
#endif
#endif
#endif

692
crypto/keccak/genKAT.c Executable file
View file

@ -0,0 +1,692 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include "KeccakDuplex.h"
#include "KeccakNISTInterface.h"
#include "KeccakSponge.h"
#define MAX_MARKER_LEN 50
#define SUBMITTER_INFO_LEN 128
typedef enum { KAT_SUCCESS = 0, KAT_FILE_OPEN_ERROR = 1, KAT_HEADER_ERROR = 2, KAT_DATA_ERROR = 3, KAT_HASH_ERROR = 4 } STATUS_CODES;
#define AllowExtendedFunctions
#define ExcludeExtremelyLong
#ifdef AllowExtendedFunctions
#define SqueezingOutputLength 4096
#endif
STATUS_CODES genShortMsg(int hashbitlen);
STATUS_CODES genLongMsg(int hashbitlen);
STATUS_CODES genExtremelyLongMsg(int hashbitlen);
STATUS_CODES genMonteCarlo(int hashbitlen);
#ifdef AllowExtendedFunctions
STATUS_CODES genMonteCarloSqueezing(int hashbitlen);
STATUS_CODES genShortMsgSponge(unsigned int rate, unsigned int capacity, int outputLength, const char *fileName);
STATUS_CODES genDuplexKAT(unsigned int rate, unsigned int capacity, const char *fileName);
#endif
int FindMarker(FILE *infile, const char *marker);
int ReadHex(FILE *infile, BitSequence *A, int Length, char *str);
void fprintBstr(FILE *fp, char *S, BitSequence *A, int L);
STATUS_CODES
genKAT_main()
{
int i, ret_val, bitlens[4] = { 224, 256, 384, 512 };
#ifdef AllowExtendedFunctions
if ( (ret_val = genShortMsgSponge(1024, 576, 4096, "ShortMsgKAT_0.txt")) != KAT_SUCCESS )
return (STATUS_CODES)ret_val;
if ( (ret_val = genLongMsg(0)) != KAT_SUCCESS )
return (STATUS_CODES)ret_val;
#ifndef ExcludeExtremelyLong
if ( (ret_val = genExtremelyLongMsg(0)) != KAT_SUCCESS )
return (STATUS_CODES)ret_val;
#endif
if ( (ret_val = genMonteCarloSqueezing(0)) != KAT_SUCCESS )
return (STATUS_CODES)ret_val;
#endif
for ( i=0; i<4; i++ ) {
if ( (ret_val = genShortMsg(bitlens[i])) != KAT_SUCCESS )
return (STATUS_CODES)ret_val;
if ( (ret_val = genLongMsg(bitlens[i])) != KAT_SUCCESS )
return (STATUS_CODES)ret_val;
#ifndef ExcludeExtremelyLong
if ( (ret_val = genExtremelyLongMsg(bitlens[i])) != KAT_SUCCESS )
return (STATUS_CODES)ret_val;
#endif
if ( (ret_val = genMonteCarlo(bitlens[i])) != KAT_SUCCESS )
return (STATUS_CODES)ret_val;
}
#ifdef AllowExtendedFunctions
/* Other case examples */
genShortMsgSponge(1344, 256, 4096, "ShortMsgKAT_r1344c256.txt");
/* Duplexing */
//genDuplexKAT(1024, 576, "DuplexKAT_r1024c576.txt");
//genDuplexKAT(1025, 575, "DuplexKAT_r1025c575.txt");
genDuplexKAT(1026, 574, "DuplexKAT_r1026c574.txt");
genDuplexKAT(1027, 573, "DuplexKAT_r1027c573.txt");
//genDuplexKAT(1028, 572, "DuplexKAT_r1028c572.txt");
//genDuplexKAT(1029, 571, "DuplexKAT_r1029c571.txt");
//genDuplexKAT(1030, 570, "DuplexKAT_r1030c570.txt");
//genDuplexKAT(1031, 569, "DuplexKAT_r1031c569.txt");
//genDuplexKAT(1032, 568, "DuplexKAT_r1032c568.txt");
#endif
return KAT_SUCCESS;
}
STATUS_CODES
genShortMsg(int hashbitlen)
{
char fn[32], line[SUBMITTER_INFO_LEN];
int msglen, msgbytelen, done;
BitSequence Msg[256], MD[64];
FILE *fp_in, *fp_out;
if ( (fp_in = fopen("ShortMsgKAT.txt", "r")) == NULL ) {
printf("Couldn't open <ShortMsgKAT.txt> for read\n");
return KAT_FILE_OPEN_ERROR;
}
sprintf(fn, "ShortMsgKAT_%d.txt", hashbitlen);
if ( (fp_out = fopen(fn, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fn);
return KAT_FILE_OPEN_ERROR;
}
fprintf(fp_out, "# %s\n", fn);
if ( FindMarker(fp_in, "# Algorithm Name:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Algorithm Name:%s\n", line);
}
else {
printf("genShortMsg: Couldn't read Algorithm Name\n");
return KAT_HEADER_ERROR;
}
if ( FindMarker(fp_in, "# Principal Submitter:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Principal Submitter:%s\n", line);
}
else {
printf("genShortMsg: Couldn't read Principal Submitter\n");
return KAT_HEADER_ERROR;
}
done = 0;
do {
if ( FindMarker(fp_in, "Len = ") )
fscanf(fp_in, "%d", &msglen);
else {
done = 1;
break;
}
msgbytelen = (msglen+7)/8;
if ( !ReadHex(fp_in, Msg, msgbytelen, "Msg = ") ) {
printf("ERROR: unable to read 'Msg' from <ShortMsgKAT.txt>\n");
return KAT_DATA_ERROR;
}
Hash(hashbitlen, Msg, msglen, MD);
fprintf(fp_out, "\nLen = %d\n", msglen);
fprintBstr(fp_out, "Msg = ", Msg, msgbytelen);
fprintBstr(fp_out, "MD = ", MD, hashbitlen/8);
} while ( !done );
printf("finished ShortMsgKAT for <%d>\n", hashbitlen);
fclose(fp_in);
fclose(fp_out);
return KAT_SUCCESS;
}
#ifdef AllowExtendedFunctions
STATUS_CODES
genShortMsgSponge(unsigned int rate, unsigned int capacity, int outputLength, const char *fileName)
{
char line[SUBMITTER_INFO_LEN];
int msglen, msgbytelen, done;
BitSequence Msg[256];
BitSequence Squeezed[SqueezingOutputLength/8];
spongeState state;
FILE *fp_in, *fp_out;
if (outputLength > SqueezingOutputLength) {
printf("Requested output length too long.\n");
return KAT_HASH_ERROR;
}
if ( (fp_in = fopen("ShortMsgKAT.txt", "r")) == NULL ) {
printf("Couldn't open <ShortMsgKAT.txt> for read\n");
return KAT_FILE_OPEN_ERROR;
}
if ( (fp_out = fopen(fileName, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fileName);
return KAT_FILE_OPEN_ERROR;
}
fprintf(fp_out, "# %s\n", fileName);
if ( FindMarker(fp_in, "# Algorithm Name:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Algorithm Name:%s\n", line);
}
else {
printf("genShortMsg: Couldn't read Algorithm Name\n");
return KAT_HEADER_ERROR;
}
if ( FindMarker(fp_in, "# Principal Submitter:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Principal Submitter:%s\n", line);
}
else {
printf("genShortMsg: Couldn't read Principal Submitter\n");
return KAT_HEADER_ERROR;
}
done = 0;
do {
if ( FindMarker(fp_in, "Len = ") )
fscanf(fp_in, "%d", &msglen);
else {
done = 1;
break;
}
msgbytelen = (msglen+7)/8;
if ( !ReadHex(fp_in, Msg, msgbytelen, "Msg = ") ) {
printf("ERROR: unable to read 'Msg' from <ShortMsgKAT.txt>\n");
return KAT_DATA_ERROR;
}
fprintf(fp_out, "\nLen = %d\n", msglen);
fprintBstr(fp_out, "Msg = ", Msg, msgbytelen);
InitSponge(&state, rate, capacity);
if ((msglen % 8 ) != 0)
// From NIST convention to internal convention for last byte
Msg[msgbytelen - 1] >>= 8 - (msglen % 8);
Absorb(&state, Msg, msglen);
Squeeze(&state, Squeezed, outputLength);
fprintBstr(fp_out, "Squeezed = ", Squeezed, SqueezingOutputLength/8);
} while ( !done );
printf("finished ShortMsgKAT for <%s>\n", fileName);
fclose(fp_in);
fclose(fp_out);
return KAT_SUCCESS;
}
#endif
STATUS_CODES
genLongMsg(int hashbitlen)
{
char fn[32], line[SUBMITTER_INFO_LEN];
int msglen, msgbytelen, done;
BitSequence Msg[4288], MD[64];
#ifdef AllowExtendedFunctions
BitSequence Squeezed[SqueezingOutputLength/8];
hashState state;
#endif
FILE *fp_in, *fp_out;
if ( (fp_in = fopen("LongMsgKAT.txt", "r")) == NULL ) {
printf("Couldn't open <LongMsgKAT.txt> for read\n");
return KAT_FILE_OPEN_ERROR;
}
sprintf(fn, "LongMsgKAT_%d.txt", hashbitlen);
if ( (fp_out = fopen(fn, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fn);
return KAT_FILE_OPEN_ERROR;
}
fprintf(fp_out, "# %s\n", fn);
if ( FindMarker(fp_in, "# Algorithm Name:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Algorithm Name:%s\n", line);
}
else {
printf("genLongMsg: Couldn't read Algorithm Name\n");
return KAT_HEADER_ERROR;
}
if ( FindMarker(fp_in, "# Principal Submitter:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Principal Submitter:%s\n\n", line);
}
else {
printf("genLongMsg: Couldn't read Principal Submitter\n");
return KAT_HEADER_ERROR;
}
done = 0;
do {
if ( FindMarker(fp_in, "Len = ") )
fscanf(fp_in, "%d", &msglen);
else
break;
msgbytelen = (msglen+7)/8;
if ( !ReadHex(fp_in, Msg, msgbytelen, "Msg = ") ) {
printf("ERROR: unable to read 'Msg' from <LongMsgKAT.txt>\n");
return KAT_DATA_ERROR;
}
#ifdef AllowExtendedFunctions
if (hashbitlen > 0)
Hash(hashbitlen, Msg, msglen, MD);
else {
Init(&state, hashbitlen);
Update(&state, Msg, msglen);
Final(&state, 0);
Squeeze(&state, Squeezed, SqueezingOutputLength);
}
#else
Hash(hashbitlen, Msg, msglen, MD);
#endif
fprintf(fp_out, "Len = %d\n", msglen);
fprintBstr(fp_out, "Msg = ", Msg, msgbytelen);
#ifdef AllowExtendedFunctions
if (hashbitlen > 0)
fprintBstr(fp_out, "MD = ", MD, hashbitlen/8);
else
fprintBstr(fp_out, "Squeezed = ", Squeezed, SqueezingOutputLength/8);
#else
fprintBstr(fp_out, "MD = ", MD, hashbitlen/8);
#endif
} while ( !done );
printf("finished LongMsgKAT for <%d>\n", hashbitlen);
fclose(fp_in);
fclose(fp_out);
return KAT_SUCCESS;
}
STATUS_CODES
genExtremelyLongMsg(int hashbitlen)
{
char fn[32], line[SUBMITTER_INFO_LEN];
BitSequence Text[65], MD[64];
#ifdef AllowExtendedFunctions
BitSequence Squeezed[SqueezingOutputLength/8];
#endif
int i, repeat;
FILE *fp_in, *fp_out;
hashState state;
HashReturn retval;
if ( (fp_in = fopen("ExtremelyLongMsgKAT.txt", "r")) == NULL ) {
printf("Couldn't open <ExtremelyLongMsgKAT.txt> for read\n");
return KAT_FILE_OPEN_ERROR;
}
sprintf(fn, "ExtremelyLongMsgKAT_%d.txt", hashbitlen);
if ( (fp_out = fopen(fn, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fn);
return KAT_FILE_OPEN_ERROR;
}
fprintf(fp_out, "# %s\n", fn);
if ( FindMarker(fp_in, "# Algorithm Name:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Algorithm Name:%s\n", line);
}
else {
printf("genExtremelyLongMsg: Couldn't read Algorithm Name\n");
return KAT_HEADER_ERROR;
}
if ( FindMarker(fp_in, "# Principal Submitter:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Principal Submitter:%s\n\n", line);
}
else {
printf("genExtremelyLongMsg: Couldn't read Principal Submitter\n");
return KAT_HEADER_ERROR;
}
if ( FindMarker(fp_in, "Repeat = ") )
fscanf(fp_in, "%d", &repeat);
else {
printf("ERROR: unable to read 'Repeat' from <ExtremelyLongMsgKAT.txt>\n");
return KAT_DATA_ERROR;
}
if ( FindMarker(fp_in, "Text = ") )
fscanf(fp_in, "%s", Text);
else {
printf("ERROR: unable to read 'Text' from <ExtremelyLongMsgKAT.txt>\n");
return KAT_DATA_ERROR;
}
// memcpy(Text, "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno", 64);
if ( (retval = Init(&state, hashbitlen)) != KAT_SUCCESS ) {
printf("Init returned <%d> in genExtremelyLongMsg\n", retval);
return KAT_HASH_ERROR;
}
for ( i=0; i<repeat; i++ )
if ( (retval = Update(&state, Text, 512)) != KAT_SUCCESS ) {
printf("Update returned <%d> in genExtremelyLongMsg\n", retval);
return KAT_HASH_ERROR;
}
if ( (retval = Final(&state, MD)) != KAT_SUCCESS ) {
printf("Final returned <%d> in genExtremelyLongMsg\n", retval);
return KAT_HASH_ERROR;
}
#ifdef AllowExtendedFunctions
if (hashbitlen == 0)
Squeeze(&state, Squeezed, SqueezingOutputLength);
#endif
fprintf(fp_out, "Repeat = %d\n", repeat);
fprintf(fp_out, "Text = %s\n", Text);
#ifdef AllowExtendedFunctions
if (hashbitlen > 0)
fprintBstr(fp_out, "MD = ", MD, hashbitlen/8);
else
fprintBstr(fp_out, "Squeezed = ", Squeezed, SqueezingOutputLength/8);
#else
fprintBstr(fp_out, "MD = ", MD, hashbitlen/8);
#endif
printf("finished ExtremelyLongMsgKAT for <%d>\n", hashbitlen);
fclose(fp_in);
fclose(fp_out);
return KAT_SUCCESS;
}
STATUS_CODES
genMonteCarlo(int hashbitlen)
{
char fn[32], line[SUBMITTER_INFO_LEN];
BitSequence Seed[128], Msg[128], MD[64], Temp[128];
int i, j, bytelen;
FILE *fp_in, *fp_out;
if ( (fp_in = fopen("MonteCarlo.txt", "r")) == NULL ) {
printf("Couldn't open <MonteCarlo.txt> for read\n");
return KAT_FILE_OPEN_ERROR;
}
sprintf(fn, "MonteCarlo_%d.txt", hashbitlen);
if ( (fp_out = fopen(fn, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fn);
return KAT_FILE_OPEN_ERROR;
}
fprintf(fp_out, "# %s\n", fn);
if ( FindMarker(fp_in, "# Algorithm Name:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Algorithm Name:%s\n", line);
}
else {
printf("genMonteCarlo: Couldn't read Algorithm Name\n");
return KAT_HEADER_ERROR;
}
if ( FindMarker(fp_in, "# Principal Submitter:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Principal Submitter:%s\n\n", line);
}
else {
printf("genMonteCarlo: Couldn't read Principal Submitter\n");
return KAT_HEADER_ERROR;
}
if ( !ReadHex(fp_in, Seed, 128, "Seed = ") ) {
printf("ERROR: unable to read 'Seed' from <MonteCarlo.txt>\n");
return KAT_DATA_ERROR;
}
bytelen = hashbitlen / 8;
memcpy(Msg, Seed, 128);
fprintBstr(fp_out, "Seed = ", Seed, 128);
for ( j=0; j<100; j++ ) {
for ( i=0; i<1000; i++ ) {
Hash(hashbitlen, Msg, 1024, MD);
memcpy(Temp, Msg, 128-bytelen);
memcpy(Msg, MD, bytelen);
memcpy(Msg+bytelen, Temp, 128-bytelen);
}
fprintf(fp_out, "\nj = %d\n", j);
fprintBstr(fp_out, "MD = ", MD, bytelen);
}
printf("finished MonteCarloKAT for <%d>\n", hashbitlen);
fclose(fp_in);
fclose(fp_out);
return KAT_SUCCESS;
}
#ifdef AllowExtendedFunctions
STATUS_CODES
genMonteCarloSqueezing(int hashbitlen)
{
char fn[32], line[SUBMITTER_INFO_LEN];
BitSequence Seed[128], MD[64];
int i, j, bytelen;
FILE *fp_in, *fp_out;
hashState state;
HashReturn retval;
if ( (fp_in = fopen("MonteCarlo.txt", "r")) == NULL ) {
printf("Couldn't open <MonteCarlo.txt> for read\n");
return KAT_FILE_OPEN_ERROR;
}
sprintf(fn, "MonteCarlo_%d.txt", hashbitlen);
if ( (fp_out = fopen(fn, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fn);
return KAT_FILE_OPEN_ERROR;
}
fprintf(fp_out, "# %s\n", fn);
if ( FindMarker(fp_in, "# Algorithm Name:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Algorithm Name:%s\n", line);
}
else {
printf("genMonteCarlo: Couldn't read Algorithm Name\n");
return KAT_HEADER_ERROR;
}
if ( FindMarker(fp_in, "# Principal Submitter:") ) {
fscanf(fp_in, "%[^\n]\n", line);
fprintf(fp_out, "# Principal Submitter:%s\n\n", line);
}
else {
printf("genMonteCarlo: Couldn't read Principal Submitter\n");
return KAT_HEADER_ERROR;
}
if ( !ReadHex(fp_in, Seed, 128, "Seed = ") ) {
printf("ERROR: unable to read 'Seed' from <MonteCarlo.txt>\n");
return KAT_DATA_ERROR;
}
fprintBstr(fp_out, "Seed = ", Seed, 128);
if ( (retval = Init(&state, hashbitlen)) != KAT_SUCCESS ) {
printf("Init returned <%d> in genMonteCarloSqueezing\n", retval);
return KAT_HASH_ERROR;
}
if ( (retval = Update(&state, Seed, 128*8)) != KAT_SUCCESS ) {
printf("Update returned <%d> in genMonteCarloSqueezing\n", retval);
return KAT_HASH_ERROR;
}
if ( (retval = Final(&state, 0)) != KAT_SUCCESS ) {
printf("Final returned <%d> in genMonteCarloSqueezing\n", retval);
return KAT_HASH_ERROR;
}
bytelen = 64;
for ( j=0; j<100; j++ ) {
for ( i=0; i<1000; i++ ) {
if ( (retval = Squeeze(&state, MD, bytelen*8)) != KAT_SUCCESS ) {
printf("Squeeze returned <%d> in genMonteCarloSqueezing\n", retval);
return KAT_HASH_ERROR;
}
}
fprintf(fp_out, "\nj = %d\n", j);
fprintBstr(fp_out, "MD = ", MD, bytelen);
}
printf("finished MonteCarloKAT for <%d>\n", hashbitlen);
fclose(fp_in);
fclose(fp_out);
return KAT_SUCCESS;
}
STATUS_CODES
genDuplexKAT(unsigned int rate, unsigned int capacity, const char *fileName)
{
int inLen, inByteLen, outLen, outByteLen, done;
BitSequence in[256];
BitSequence out[256];
FILE *fp_in, *fp_out;
duplexState state;
if ( (fp_in = fopen("DuplexKAT.txt", "r")) == NULL ) {
printf("Couldn't open <DuplexKAT.txt> for read\n");
return KAT_FILE_OPEN_ERROR;
}
if ( (fp_out = fopen(fileName, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fileName);
return KAT_FILE_OPEN_ERROR;
}
fprintf(fp_out, "# %s\n", fileName);
fprintf(fp_out, "# Algorithm: Duplex[f=Keccak-f[1600], pad=pad10*1, r=%d, c=%d, \xCF\x81max=%d]\n", rate, capacity, rate-2);
InitDuplex(&state, rate, capacity);
done = 0;
outLen = rate;
outByteLen = (outLen+7)/8;
do {
if ( FindMarker(fp_in, "InLen = ") )
fscanf(fp_in, "%d", &inLen);
else {
done = 1;
break;
}
inByteLen = (inLen+7)/8;
if ( !ReadHex(fp_in, in, inByteLen, "In = ") ) {
printf("ERROR: unable to read 'In' from <DuplexKAT.txt>\n");
return KAT_DATA_ERROR;
}
if (inLen <= rate-2) {
fprintf(fp_out, "\nInLen = %d\n", inLen);
fprintBstr(fp_out, "In = ", in, inByteLen);
Duplexing(&state, in, inLen, out, outLen);
fprintf(fp_out, "OutLen = %d\n", outLen);
fprintBstr(fp_out, "Out = ", out, outByteLen);
}
} while ( !done );
printf("finished DuplexKAT for <%s>\n", fileName);
fclose(fp_in);
fclose(fp_out);
return KAT_SUCCESS;
}
#endif
//
// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.)
//
int
FindMarker(FILE *infile, const char *marker)
{
char line[MAX_MARKER_LEN];
int i, len;
len = (int)strlen(marker);
if ( len > MAX_MARKER_LEN-1 )
len = MAX_MARKER_LEN-1;
for ( i=0; i<len; i++ )
if ( (line[i] = fgetc(infile)) == EOF )
return 0;
line[len] = '\0';
while ( 1 ) {
if ( !strncmp(line, marker, len) )
return 1;
for ( i=0; i<len-1; i++ )
line[i] = line[i+1];
if ( (line[len-1] = fgetc(infile)) == EOF )
return 0;
line[len] = '\0';
}
// shouldn't get here
return 0;
}
//
// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.)
//
int
ReadHex(FILE *infile, BitSequence *A, int Length, char *str)
{
int i, ch, started;
BitSequence ich;
if ( Length == 0 ) {
A[0] = 0x00;
return 1;
}
memset(A, 0x00, Length);
started = 0;
if ( FindMarker(infile, str) )
while ( (ch = fgetc(infile)) != EOF ) {
if ( !isxdigit(ch) ) {
if ( !started ) {
if ( ch == '\n' )
break;
else
continue;
}
else
break;
}
started = 1;
if ( (ch >= '0') && (ch <= '9') )
ich = ch - '0';
else if ( (ch >= 'A') && (ch <= 'F') )
ich = ch - 'A' + 10;
else if ( (ch >= 'a') && (ch <= 'f') )
ich = ch - 'a' + 10;
for ( i=0; i<Length-1; i++ )
A[i] = (A[i] << 4) | (A[i+1] >> 4);
A[Length-1] = (A[Length-1] << 4) | ich;
}
else
return 0;
return 1;
}
void
fprintBstr(FILE *fp, char *S, BitSequence *A, int L)
{
int i;
fprintf(fp, "%s", S);
for ( i=0; i<L; i++ )
fprintf(fp, "%02X", A[i]);
if ( L == 0 )
fprintf(fp, "00");
fprintf(fp, "\n");
}

7
main.c
View file

@ -812,7 +812,7 @@ start_decompress(const char *filename, const char *to_filename)
d1 = htons(version);
hmac_update(&hdr_mac, (uchar_t *)&d1, sizeof (version));
d1 = htons(flags);
hmac_update(&hdr_mac, (uchar_t *)&d1, sizeof (version));
hmac_update(&hdr_mac, (uchar_t *)&d1, sizeof (flags));
nonce = htonll(chunksize);
hmac_update(&hdr_mac, (uchar_t *)&nonce, sizeof (nonce));
d2 = htonl(level);
@ -1430,7 +1430,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
pw_len = get_pw_string(pw,
"Please enter encryption password", 1);
if (pw_len == -1) {
err_exit(1, "Failed to get password.\n");
err_exit(0, "Failed to get password.\n");
}
} else {
int fd, len;
@ -1459,7 +1459,6 @@ start_compress(const char *filename, uint64_t chunksize, int level)
}
}
if (pw_len == -1) {
perror(" ");
err_exit(1, "Failed to get password.\n");
}
close(fd);
@ -1467,7 +1466,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
if (init_crypto(&crypto_ctx, pw, pw_len, encrypt_type, NULL,
0, 0, ENCRYPT_FLAG) == -1) {
memset(pw, 0, MAX_PW_LEN);
err_exit(1, "Failed to initialize crypto\n");
err_exit(0, "Failed to initialize crypto\n");
}
memset(pw, 0, MAX_PW_LEN);
}

View file

@ -23,7 +23,7 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Compression failed."
exit 1
fi
cmd="../../pcompress -d ${tf}.pz ${tf}.1"
@ -31,13 +31,13 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Decompression failed."
exit 1
fi
diff ${tf} ${tf}.1 > /dev/null
if [ $? -ne 0 ]
then
echo "${cmd}: Decompression was not correct"
echo "FATAL: Decompression was not correct"
exit 1
fi
rm -f ${tf}.pz ${tf}.1

View file

@ -9,14 +9,14 @@ for algo in zlib ppmd
do
for tf in bin.dat share.dat inc.dat
do
for cksum in CRC64 SHA256 SHA512 SKEIN256 SKEIN512
for cksum in CRC64 SHA256 SHA512 SKEIN256 SKEIN512 KECCAK256 KECCAK512
do
cmd="../../pcompress -c ${algo} -l 6 -s 1m -S ${cksum} ${tf}"
echo "Running $cmd"
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Compression failed."
exit 1
fi
cmd="../../pcompress -d ${tf}.pz ${tf}.1"
@ -24,14 +24,14 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Decompression failed."
exit 1
fi
diff ${tf} ${tf}.1 > /dev/null
if [ $? -ne 0 ]
then
echo "${cmd}: Decompression was not correct"
echo "FATAL: Decompression was not correct"
exit 1
fi
rm -f ${tf}.pz ${tf}.1

View file

@ -29,7 +29,7 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: ${cmd} errored."
exit 1
fi
mv ${tf}.pz ${tf}.${algo}
@ -62,13 +62,13 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Decompression failed."
exit 1
fi
diff ${tf}.${algo} ${tf}.${algo}.1 > /dev/null
if [ $? -ne 0 ]
then
echo "${cmd}: Decompression was not correct"
echo "FATAL: Decompression was not correct"
exit 1
fi
rm -f ${tf}.${algo}.pz ${tf}.${algo}.1

View file

@ -33,7 +33,7 @@ do
diff ${tf} ${tf}.1 > /dev/null
if [ $? -ne 0 ]
then
echo "${cmd}: Decompression was not correct"
echo "FATAL: Decompression was not correct"
exit 1
fi
rm -f ${tf}.pz ${tf}.1

View file

@ -9,7 +9,7 @@ for algo in lzfx adapt2
do
for tf in comb_d.dat
do
for feat in "-e" "-e -L -S SHA256" "-D -e -S SHA512" "-D -EE -L -e -S SKEIN512" "-e -S CRC64"
for feat in "-e" "-e -L -S SHA256" "-D -e -S SHA512" "-D -EE -L -e -S SKEIN512" "-e -S CRC64" "-e -P" "-e -P -S KECCAK256" "-D -e -L -S KECCAK512"
do
for seg in 2m 100m
do
@ -26,7 +26,7 @@ do
pw=`cat /tmp/pwf`
if [ "$pw" = "sillypassword" ]
then
echo "ERROR: Password file /tmp/pwf not zeroed!"
echo "FATAL: Password file /tmp/pwf not zeroed!"
exit 1
fi
@ -50,7 +50,21 @@ do
pw=`cat /tmp/pwf`
if [ "$pw" = "sillypassword" ]
then
echo "ERROR: Password file /tmp/pwf not zeroed!"
echo "FATAL: Password file /tmp/pwf not zeroed!"
exit 1
fi
#
# Now try decompression with invalid password. It should
# fail.
#
rm -f ${tf}.1
cmd="../../pcompress -d -w /tmp/pwf ${tf}.pz ${tf}.1"
echo "Running $cmd"
eval $cmd
if [ $? -eq 0 ]
then
echo "FATAL: Decompression did not fail where expected."
exit 1
fi
rm -f ${tf}.pz ${tf}.1

View file

@ -21,7 +21,7 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Compression errored."
exit 1
fi
cmd="../../pcompress -d ${tf}.pz ${tf}.1"
@ -29,13 +29,13 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Decompression errored."
exit 1
fi
diff ${tf} ${tf}.1 > /dev/null
if [ $? -ne 0 ]
then
echo "${cmd}: Decompression was not correct"
echo "FATAL: Decompression was not correct"
exit 1
fi
rm -f ${tf}.pz ${tf}.1

View file

@ -19,14 +19,14 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Compression errored."
exit 1
fi
pw=`cat /tmp/pwf`
if [ "$pw" = "sillypassword" ]
then
echo "ERROR: Password file /tmp/pwf not zeroed!"
echo "FATAL: Password file /tmp/pwf not zeroed!"
exit 1
fi
@ -36,21 +36,21 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Decompression errored."
exit 1
fi
diff ${tf} ${tf}.1 > /dev/null
if [ $? -ne 0 ]
then
echo "${cmd}: Decompression was not correct"
echo "FATAL: Decompression was not correct"
exit 1
fi
pw=`cat /tmp/pwf`
if [ "$pw" = "sillypassword" ]
then
echo "ERROR: Password file /tmp/pwf not zeroed!"
echo "FATAL: Password file /tmp/pwf not zeroed!"
exit 1
fi
rm -f ${tf}.pz ${tf}.1

View file

@ -26,14 +26,14 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Decompression errored."
exit 1
fi
diff ${tf} ${tf}.1 > /dev/null
if [ $? -ne 0 ]
then
echo "${cmd}: Decompression was not correct"
echo "FATAL: Decompression was not correct"
exit 1
fi
rm -f ${tf}.pz ${tf}.1

View file

@ -12,7 +12,7 @@ do
eval $cmd
if [ $? -eq 0 ]
then
echo "${cmd} DID NOT ERROR where expected"
echo "FATAL: Compression DID NOT ERROR where expected"
exit 1
fi
done
@ -26,14 +26,14 @@ do
eval $cmd
if [ $? -eq 0 ]
then
echo "${cmd} DID NOT ERROR where expected"
echo "FATAL: Compression DID NOT ERROR where expected"
rm -f combined.dat.pz
exit 1
fi
done
done
for feat in "-S CRC64" "-S SKEIN256" "-S SKEIN512" "-S SHA256" "-S SHA512"
for feat in "-S CRC64" "-S SKEIN256" "-S SKEIN512" "-S SHA256" "-S SHA512" "-S KECCAK256" "-S KECCAK512"
do
rm -f combined.dat.1.pz
rm -f combined.dat.pz
@ -44,7 +44,7 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Compression errored."
rm -f combined.dat.pz
exit 1
fi
@ -55,7 +55,7 @@ do
eval $cmd
if [ $? -eq 0 ]
then
echo "${cmd} DID NOT ERROR where expected."
echo "FATAL: Decompression DID NOT ERROR where expected."
rm -f combined.dat.pz
rm -f combined.dat.1
exit 1
@ -69,7 +69,7 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Compression errored."
rm -f combined.dat.pz
exit 1
fi
@ -81,7 +81,7 @@ do
eval $cmd
if [ $? -eq 0 ]
then
echo "${cmd} DID NOT ERROR where expected."
echo "FATAL: Decompression DID NOT ERROR where expected."
rm -f combined.dat.pz
rm -f combined.dat.1
rm -f combined.dat.1.pz
@ -96,7 +96,7 @@ do
eval $cmd
if [ $? -eq 0 ]
then
echo "${cmd} DID NOT ERROR where expected."
echo "FATAL: Decompression DID NOT ERROR where expected."
rm -f combined.dat.pz
rm -f combined.dat.1
rm -f combined.dat.1.pz
@ -110,14 +110,14 @@ do
eval $cmd
if [ $? -ne 0 ]
then
echo "${cmd} errored."
echo "FATAL: Compression errored."
rm -f combined.dat.pz
exit 1
fi
pw=`cat /tmp/pwf`
if [ "$pw" = "plainpasswd" ]
then
echo "ERROR: Password file was not zeroed"
echo "FATAL: Password file was not zeroed"
rm -f /tmp/pwf combined.dat.pz
exit 1
fi
@ -130,7 +130,7 @@ do
eval $cmd
if [ $? -eq 0 ]
then
echo "${cmd} DID NOT ERROR where expected."
echo "FATAL: Decompression DID NOT ERROR where expected."
rm -f combined.dat.pz
rm -f combined.dat.1
rm -f combined.dat.1.pz
@ -146,7 +146,7 @@ do
eval $cmd
if [ $? -eq 0 ]
then
echo "${cmd} DID NOT ERROR where expected."
echo "FATAL: Decompression DID NOT ERROR where expected."
rm -f combined.dat.pz
rm -f combined.dat.1
rm -f combined.dat.1.pz