pcompress/crypto/keccak/KeccakF-1600-simd64.macros
Moinak Ghosh 75c81b5e9c Add support for 64-bit Keccak implementation.
Sanitize error message from tests.
Add more tests.
Improve platform detection in config script.
2012-12-08 14:19:01 +05:30

517 lines
18 KiB
Text
Executable file

/*
Code automatically generated by KeccakTools!
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V64 Asa, Ase, Asi, Aso, Asu; \
V64 Bba, Bbe, Bbi, Bbo, Bbu; \
V64 Bga, Bge, Bgi, Bgo, Bgu; \
V64 Bka, Bke, Bki, Bko, Bku; \
V64 Bma, Bme, Bmi, Bmo, Bmu; \
V64 Bsa, Bse, Bsi, Bso, Bsu; \
V64 Ca, Ce, Ci, Co, Cu; \
V64 Da, De, Di, Do, Du; \
V64 Eba, Ebe, Ebi, Ebo, Ebu; \
V64 Ega, Ege, Egi, Ego, Egu; \
V64 Eka, Eke, Eki, Eko, Eku; \
V64 Ema, Eme, Emi, Emo, Emu; \
V64 Esa, Ese, Esi, Eso, Esu; \
#define prepareTheta \
Ca = XOR64(Aba, XOR64(Aga, XOR64(Aka, XOR64(Ama, Asa)))); \
Ce = XOR64(Abe, XOR64(Age, XOR64(Ake, XOR64(Ame, Ase)))); \
Ci = XOR64(Abi, XOR64(Agi, XOR64(Aki, XOR64(Ami, Asi)))); \
Co = XOR64(Abo, XOR64(Ago, XOR64(Ako, XOR64(Amo, Aso)))); \
Cu = XOR64(Abu, XOR64(Agu, XOR64(Aku, XOR64(Amu, Asu)))); \
// --- Code for round, with prepare-theta
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = XOR64(Cu, ROL64(Ce, 1)); \
De = XOR64(Ca, ROL64(Ci, 1)); \
Di = XOR64(Ce, ROL64(Co, 1)); \
Do = XOR64(Ci, ROL64(Cu, 1)); \
Du = XOR64(Co, ROL64(Ca, 1)); \
\
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
Ca = E##ba; \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
Ce = E##be; \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
Ci = E##bi; \
E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
Co = E##bo; \
E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
Cu = E##bu; \
\
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
XOReq64(Ca, E##ga); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
XOReq64(Ce, E##ge); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
XOReq64(Ci, E##gi); \
E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
XOReq64(Co, E##go); \
E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
XOReq64(Cu, E##gu); \
\
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
XOReq64(Ca, E##ka); \
XOReq64(A##mu, Du); \
Bko = ROL64(A##mu, 8); \
E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
XOReq64(Ce, E##ke); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
XOReq64(Ci, E##ki); \
E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
XOReq64(Co, E##ko); \
E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
XOReq64(Cu, E##ku); \
\
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
XOReq64(Ca, E##ma); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
XOReq64(Ce, E##me); \
XOReq64(A##so, Do); \
Bmu = ROL64(A##so, 56); \
E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
XOReq64(Ci, E##mi); \
E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
XOReq64(Co, E##mo); \
E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
XOReq64(Cu, E##mu); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
XOReq64(Ca, E##sa); \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
XOReq64(Ce, E##se); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
XOReq64(Ci, E##si); \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
XOReq64(Co, E##so); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
XOReq64(Cu, E##su); \
\
// --- Code for round
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIota(i, A, E) \
Da = XOR64(Cu, ROL64(Ce, 1)); \
De = XOR64(Ca, ROL64(Ci, 1)); \
Di = XOR64(Ce, ROL64(Co, 1)); \
Do = XOR64(Ci, ROL64(Cu, 1)); \
Du = XOR64(Co, ROL64(Ca, 1)); \
\
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
\
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
\
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
XOReq64(A##mu, Du); \
Bko = ROL64(A##mu, 8); \
E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
\
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
XOReq64(A##so, Do); \
Bmu = ROL64(A##so, 56); \
E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
\
const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = LOAD64(state[ 9]); \
X##ka = LOAD64(state[10]); \
X##ke = LOAD64(state[11]); \
X##ki = LOAD64(state[12]); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor832bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
X##mo = XOR64(LOAD64(state[18]), LOAD64(input[18])); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
X##sa = XOR64(LOAD64(state[20]), LOAD64(input[20])); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromState(X, state) \
X##ba = LOAD64(state[ 0]); \
X##be = LOAD64(state[ 1]); \
X##bi = LOAD64(state[ 2]); \
X##bo = LOAD64(state[ 3]); \
X##bu = LOAD64(state[ 4]); \
X##ga = LOAD64(state[ 5]); \
X##ge = LOAD64(state[ 6]); \
X##gi = LOAD64(state[ 7]); \
X##go = LOAD64(state[ 8]); \
X##gu = LOAD64(state[ 9]); \
X##ka = LOAD64(state[10]); \
X##ke = LOAD64(state[11]); \
X##ki = LOAD64(state[12]); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyToState(state, X) \
STORE64(state[ 0], X##ba); \
STORE64(state[ 1], X##be); \
STORE64(state[ 2], X##bi); \
STORE64(state[ 3], X##bo); \
STORE64(state[ 4], X##bu); \
STORE64(state[ 5], X##ga); \
STORE64(state[ 6], X##ge); \
STORE64(state[ 7], X##gi); \
STORE64(state[ 8], X##go); \
STORE64(state[ 9], X##gu); \
STORE64(state[10], X##ka); \
STORE64(state[11], X##ke); \
STORE64(state[12], X##ki); \
STORE64(state[13], X##ko); \
STORE64(state[14], X##ku); \
STORE64(state[15], X##ma); \
STORE64(state[16], X##me); \
STORE64(state[17], X##mi); \
STORE64(state[18], X##mo); \
STORE64(state[19], X##mu); \
STORE64(state[20], X##sa); \
STORE64(state[21], X##se); \
STORE64(state[22], X##si); \
STORE64(state[23], X##so); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##ba = Y##ba; \
X##be = Y##be; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##ge = Y##ge; \
X##gi = Y##gi; \
X##go = Y##go; \
X##gu = Y##gu; \
X##ka = Y##ka; \
X##ke = Y##ke; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##me = Y##me; \
X##mi = Y##mi; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \