More portability tweaks.

This commit is contained in:
Moinak Ghosh 2014-05-04 11:54:33 +05:30
parent f2da433188
commit 4b037f0ed7
9 changed files with 757 additions and 13 deletions

View file

@ -184,6 +184,9 @@ KECCAK_SRC_COMMON = crypto/keccak/genKAT.c crypto/keccak/KeccakDuplex.c \
KECCAK_SRC_OPT64 = $(KECCAK_SRC_COMMON) crypto/keccak/KeccakF-1600-opt64.c
KECCAK_SRC_OPT64_ASM1 = $(KECCAK_SRC_COMMON) crypto/keccak/KeccakF-1600-x86-64-asm.c
KECCAK_SRC_OPT64_ASM2 = crypto/keccak/KeccakF-1600-x86-64-gas.s
KECCAK_SRC_OPT64_ASM3 = crypto/keccak/KeccakF-1600-x86-64-gas_yasm.s
KECCAK_OBJS_OPT64_ASM2 = $(KECCAK_SRC_OPT64_ASM2:.s=.o)
KECCAK_OBJS_OPT64_ASM3 = $(KECCAK_SRC_OPT64_ASM3:.s=.o)
KECCAK_HDRS_COMMON = crypto/keccak/KeccakDuplex.h crypto/keccak/KeccakNISTInterface.h \
crypto/keccak/KeccakSponge.h crypto/keccak/KeccakF-1600-interface.h
@ -200,6 +203,10 @@ KECCAK_HDRS = @KECCAK_HDRS@
KECCAK_OBJS = $(KECCAK_SRCS:.c=.o)
KECCAK_OBJS_ASM = $(KECCAK_SRCS_ASM:.s=.o)
CRYPTO_ASM_COMPILE1 = $(COMPILE) $(GEN_OPT) $(CRYPTO_CPPFLAGS) $(CPPFLAGS)
CRYPTO_ASM_COMPILE2 = $(YASM)
CRYPTO_ASM_COMPILE = @CRYPTO_ASM_COMPILE@
BAKFILES = *~ lzma/*~ lzfx/*~ lz4/*~ rabin/*~ bsdiff/*~ filters/lzp/*~ utils/*~ crypto/sha2/*~ \
crypto/sha2/intel/*~ crypto/aes/*~ crypto/scrypt/*~ crypto/*~ rabin/global/*~ \
delta2/*~ crypto/keccak/*~ transpose/*~ crypto/skein/*~ crypto/keccak/*.o \
@ -323,9 +330,14 @@ $(SHA2ASM_OBJS): $(SHA2ASM_SRCS)
$(KECCAK_OBJS): $(KECCAK_SRCS) $(KECCAK_HDRS)
$(COMPILE) $(KECCAK_FLAGS) $(@:.o=.c) -o $@
$(KECCAK_OBJS_ASM): $(KECCAK_SRCS_ASM) $(KECCAK_HDRS)
$(KECCAK_OBJS_OPT64_ASM2): $(KECCAK_SRC_OPT64_ASM2)
$(COMPILE) $(KECCAK_FLAGS) $(@:.o=.s) -o $@
$(KECCAK_OBJS_OPT64_ASM3): $(KECCAK_SRC_OPT64_ASM3)
$(YASM) $(@:.o=.s) -o $@
$(KECCAK_OBJS_ASM): $(KECCAK_SRCS_ASM) $(KECCAK_HDRS)
$(LIBBSCWRAPOBJ): $(LIBBSCWRAP)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
@ -336,7 +348,7 @@ $(CRYPTO_OBJS): $(CRYPTO_SRCS) $(CRYPTO_HDRS) $(CRYPTO_ASM_OBJS)
$(COMPILE) $(GEN_OPT) $(CRYPTO_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(CRYPTO_ASM_OBJS): $(CRYPTO_ASM_SRCS) $(CRYPTO_ASM_HDRS)
$(COMPILE) $(GEN_OPT) $(CRYPTO_CPPFLAGS) $(CPPFLAGS) -o $@ $(@:.o=.s)
$(CRYPTO_ASM_COMPILE) -o $@ $(@:.o=.s)
$(CRYPTO_COMPAT_OBJS): $(CRYPTO_COMPAT_SRCS) $(CRYPTO_COMPAT_HDRS)
$(COMPILE) $(GEN_OPT) $(CRYPTO_CPPFLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@

12
config
View file

@ -91,6 +91,7 @@ salsa20_stream_c=
salsa20_stream_asm='\$\(XSALSA20_STREAM_ASM\)'
salsa20_debug=
so_suffix=so
crypto_asm_compile='\$\(CRYPTO_ASM_COMPILE1\)'
if [ -x /bin/echo ]
then
@ -312,9 +313,12 @@ then
if [ "$OS" = "Linux" ]
then
yasm="${yasm} ${yasm_params_linux}"
elif [ "$OS" = "Darwin" ]
then
yasm="${yasm} ${yasm_params_osx}"
crypto_asm_compile='\$\(CRYPTO_ASM_COMPILE2\)'
elif [ "$OS" = "SunOS" ]
then
yasm="${yasm} ${yasm_params_linux}"
@ -326,7 +330,12 @@ then
keccak_hdrs='\$\(KECCAK_HDRS_OPT64\)'
else
keccak_srcs='\$\(KECCAK_SRC_OPT64_ASM1\)'
keccak_srcs_asm='\$\(KECCAK_SRC_OPT64_ASM2\)'
if [ "$OS" != "Darwin" ]
then
keccak_srcs_asm='\$\(KECCAK_SRC_OPT64_ASM2\)'
else
keccak_srcs_asm='\$\(KECCAK_SRC_OPT64_ASM3\)'
fi
keccak_hdrs='\$\(KECCAK_HDRS_OPT64_ASM\)'
fi
else
@ -654,6 +663,7 @@ debugstatscppflagsval=
cat Makefile.in | sed "
s#@GCC@#${GCC}#g
s#@GPP@#${GPP}#g
s#@CRYPTO_ASM_COMPILE@#${crypto_asm_compile}#g
s#@SO_SUFFIX@#${so_suffix}#g
s#@${linkvar}@#\\\$\\(${typ}_${linkvar}\\)#g
s#@${compilevar}@#\\\$\\(${typ}_${compilevar}\\)#g

View file

@ -78,7 +78,7 @@ setkey_func_ptr enc_setkey;
encrypt_func_ptr enc_encrypt;
void
aes_module_init(processor_info_t *pc)
aes_module_init(processor_cap_t *pc)
{
enc_setkey = AES_set_encrypt_key;
enc_encrypt = AES_encrypt;

View file

@ -52,7 +52,7 @@ int aes_decrypt(aes_ctx_t *ctx, uchar_t *ciphertext, uchar_t *plaintext, uint64_
uchar_t *aes_nonce(aes_ctx_t *ctx);
void aes_clean_pkey(aes_ctx_t *ctx);
void aes_cleanup(aes_ctx_t *ctx);
void aes_module_init(processor_info_t *pc);
void aes_module_init(processor_cap_t *pc);
#ifdef __cplusplus
}

View file

@ -64,7 +64,7 @@ extern "C" {
blake2bp_funcptr blake2bp;
};
static void blake2_module_init(struct blake2_dispatch *dsp, processor_info_t *pc)
static void blake2_module_init(struct blake2_dispatch *dsp, processor_cap_t *pc)
{
dsp->blake2b_init = blake2b_init_sse2;
dsp->blake2b_init_key = blake2b_init_key_sse2;

View file

@ -0,0 +1,718 @@
#
# The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
# Michaël Peeters and Gilles Van Assche. For more information, feedback or
# questions, please refer to our website: http://keccak.noekeon.org/
#
# Implementation by Ronny Van Keer,
# hereby denoted as "the implementer".
#
# To the extent possible under law, the implementer has waived all copyright
# and related or neighboring rights to the source code in this file.
# http://creativecommons.org/publicdomain/zero/1.0/
#
.text
#// --- defines
.equ UseSIMD, 1
.equ _ba, 0*8
.equ _be, 1*8
.equ _bi, 2*8
.equ _bo, 3*8
.equ _bu, 4*8
.equ _ga, 5*8
.equ _ge, 6*8
.equ _gi, 7*8
.equ _go, 8*8
.equ _gu, 9*8
.equ _ka, 10*8
.equ _ke, 11*8
.equ _ki, 12*8
.equ _ko, 13*8
.equ _ku, 14*8
.equ _ma, 15*8
.equ _me, 16*8
.equ _mi, 17*8
.equ _mo, 18*8
.equ _mu, 19*8
.equ _sa, 20*8
.equ _se, 21*8
.equ _si, 22*8
.equ _so, 23*8
.equ _su, 24*8
# round vars
.equ %r15, %r15
.macro mKeccakRound iState, oState, rc, lastRound
movq %rbp, %rbx
rolq %rbx
movq _bi(\iState), %r12
xorq _gi(\iState), %rdx
xorq %r15, %rbx
xorq _ki(\iState), %r12
xorq _mi(\iState), %rdx
xorq %rdx, %r12
movq %r12, %rcx
rolq %rcx
movq _bo(\iState), %r13
xorq _go(\iState), %r8
xorq %rsi, %rcx
xorq _ko(\iState), %r13
xorq _mo(\iState), %r8
xorq %r8, %r13
movq %r13, %rdx
rolq %rdx
movq %r15, %r8
xorq %rbp, %rdx
rolq %r8
movq %rsi, %r9
xorq %r12, %r8
rolq %r9
movq _ba(\iState), %r10
movq _ge(\iState), %r11
xorq %r13, %r9
movq _ki(\iState), %r12
movq _mo(\iState), %r13
movq _su(\iState), %r14
xorq %rcx, %r11
rolq $44, %r11
xorq %rdx, %r12
xorq %rbx, %r10
rolq $43, %r12
movq %r11, %rsi
movq $\rc, %rax
orq %r12, %rsi
xorq %r10, %rax
xorq %rax, %rsi
movq %rsi, _ba(\oState)
xorq %r9, %r14
rolq $14, %r14
movq %r10, %r15
andq %r11, %r15
xorq %r14, %r15
movq %r15, _bu(\oState)
xorq %r8, %r13
rolq $21, %r13
movq %r13, %rax
andq %r14, %rax
xorq %r12, %rax
movq %rax, _bi(\oState)
notq %r12
orq %r10, %r14
orq %r13, %r12
xorq %r13, %r14
xorq %r11, %r12
movq %r14, _bo(\oState)
movq %r12, _be(\oState)
.if \lastRound == 0
movq %r12, %rbp
.endif
movq _gu(\iState), %r11
xorq %r9, %r11
movq _ka(\iState), %r12
rolq $20, %r11
xorq %rbx, %r12
rolq $3, %r12
movq _bo(\iState), %r10
movq %r11, %rax
orq %r12, %rax
xorq %r8, %r10
movq _me(\iState), %r13
movq _si(\iState), %r14
rolq $28, %r10
xorq %r10, %rax
movq %rax, _ga(\oState)
.if \lastRound == 0
xor %rax, %rsi
.endif
xorq %rcx, %r13
rolq $45, %r13
movq %r12, %rax
andq %r13, %rax
xorq %r11, %rax
movq %rax, _ge(\oState)
.if \lastRound == 0
xorq %rax, %rbp
.endif
xorq %rdx, %r14
rolq $61, %r14
movq %r14, %rax
orq %r10, %rax
xorq %r13, %rax
movq %rax, _go(\oState)
andq %r11, %r10
xorq %r14, %r10
movq %r10, _gu(\oState)
notq %r14
.if \lastRound == 0
xorq %r10, %r15
.endif
orq %r14, %r13
xorq %r12, %r13
movq %r13, _gi(\oState)
movq _be(\iState), %r10
movq _gi(\iState), %r11
movq _ko(\iState), %r12
movq _mu(\iState), %r13
movq _sa(\iState), %r14
xorq %rdx, %r11
rolq $6, %r11
xorq %r8, %r12
rolq $25, %r12
movq %r11, %rax
orq %r12, %rax
xorq %rcx, %r10
rolq $1, %r10
xorq %r10, %rax
movq %rax, _ka(\oState)
.if \lastRound == 0
xor %rax, %rsi
.endif
xorq %r9, %r13
rolq $8, %r13
movq %r12, %rax
andq %r13, %rax
xorq %r11, %rax
movq %rax, _ke(\oState)
.if \lastRound == 0
xorq %rax, %rbp
.endif
xorq %rbx, %r14
rolq $18, %r14
notq %r13
movq %r13, %rax
andq %r14, %rax
xorq %r12, %rax
movq %rax, _ki(\oState)
movq %r14, %rax
orq %r10, %rax
xorq %r13, %rax
movq %rax, _ko(\oState)
andq %r11, %r10
xorq %r14, %r10
movq %r10, _ku(\oState)
.if \lastRound == 0
xorq %r10, %r15
.endif
movq _ga(\iState), %r11
xorq %rbx, %r11
movq _ke(\iState), %r12
rolq $36, %r11
xorq %rcx, %r12
movq _bu(\iState), %r10
rolq $10, %r12
movq %r11, %rax
movq _mi(\iState), %r13
andq %r12, %rax
xorq %r9, %r10
movq _so(\iState), %r14
rolq $27, %r10
xorq %r10, %rax
movq %rax, _ma(\oState)
.if \lastRound == 0
xor %rax, %rsi
.endif
xorq %rdx, %r13
rolq $15, %r13
movq %r12, %rax
orq %r13, %rax
xorq %r11, %rax
movq %rax, _me(\oState)
.if \lastRound == 0
xorq %rax, %rbp
.endif
xorq %r8, %r14
rolq $56, %r14
notq %r13
movq %r13, %rax
orq %r14, %rax
xorq %r12, %rax
movq %rax, _mi(\oState)
orq %r10, %r11
xorq %r14, %r11
movq %r11, _mu(\oState)
andq %r10, %r14
xorq %r13, %r14
movq %r14, _mo(\oState)
.if \lastRound == 0
xorq %r11, %r15
.endif
movq _bi(\iState), %r10
movq _go(\iState), %r11
movq _ku(\iState), %r12
xorq %rdx, %r10
movq _ma(\iState), %r13
rolq $62, %r10
xorq %r8, %r11
movq _se(\iState), %r14
rolq $55, %r11
xorq %r9, %r12
movq %r10, %r9
xorq %rcx, %r14
rolq $2, %r14
andq %r11, %r9
xorq %r14, %r9
movq %r9, _su(\oState)
rolq $39, %r12
.if \lastRound == 0
xorq %r9, %r15
.endif
notq %r11
xorq %rbx, %r13
movq %r11, %rbx
andq %r12, %rbx
xorq %r10, %rbx
movq %rbx, _sa(\oState)
.if \lastRound == 0
xor %rbx, %rsi
.endif
rolq $41, %r13
movq %r12, %rcx
orq %r13, %rcx
xorq %r11, %rcx
movq %rcx, _se(\oState)
.if \lastRound == 0
xorq %rcx, %rbp
.endif
movq %r13, %rdx
movq %r14, %r8
andq %r14, %rdx
orq %r10, %r8
xorq %r12, %rdx
xorq %r13, %r8
movq %rdx, _si(\oState)
movq %r8, _so(\oState)
.endm
.macro mKeccakPermutation
subq $8*25, %rsp
movq _ba(%rdi), %rsi
movq _be(%rdi), %rbp
movq _bu(%rdi), %r15
xorq _ga(%rdi), %rsi
xorq _ge(%rdi), %rbp
xorq _gu(%rdi), %r15
xorq _ka(%rdi), %rsi
xorq _ke(%rdi), %rbp
xorq _ku(%rdi), %r15
xorq _ma(%rdi), %rsi
xorq _me(%rdi), %rbp
xorq _mu(%rdi), %r15
xorq _sa(%rdi), %rsi
xorq _se(%rdi), %rbp
movq _si(%rdi), %rdx
movq _so(%rdi), %r8
xorq _su(%rdi), %r15
mKeccakRound %rdi, %rsp, 0x0000000000000001, 0
mKeccakRound %rsp, %rdi, 0x0000000000008082, 0
mKeccakRound %rdi, %rsp, 0x800000000000808a, 0
mKeccakRound %rsp, %rdi, 0x8000000080008000, 0
mKeccakRound %rdi, %rsp, 0x000000000000808b, 0
mKeccakRound %rsp, %rdi, 0x0000000080000001, 0
mKeccakRound %rdi, %rsp, 0x8000000080008081, 0
mKeccakRound %rsp, %rdi, 0x8000000000008009, 0
mKeccakRound %rdi, %rsp, 0x000000000000008a, 0
mKeccakRound %rsp, %rdi, 0x0000000000000088, 0
mKeccakRound %rdi, %rsp, 0x0000000080008009, 0
mKeccakRound %rsp, %rdi, 0x000000008000000a, 0
mKeccakRound %rdi, %rsp, 0x000000008000808b, 0
mKeccakRound %rsp, %rdi, 0x800000000000008b, 0
mKeccakRound %rdi, %rsp, 0x8000000000008089, 0
mKeccakRound %rsp, %rdi, 0x8000000000008003, 0
mKeccakRound %rdi, %rsp, 0x8000000000008002, 0
mKeccakRound %rsp, %rdi, 0x8000000000000080, 0
mKeccakRound %rdi, %rsp, 0x000000000000800a, 0
mKeccakRound %rsp, %rdi, 0x800000008000000a, 0
mKeccakRound %rdi, %rsp, 0x8000000080008081, 0
mKeccakRound %rsp, %rdi, 0x8000000000008080, 0
mKeccakRound %rdi, %rsp, 0x0000000080000001, 0
mKeccakRound %rsp, %rdi, 0x8000000080008008, 1
addq $8*25, %rsp
.endm
.macro mPushRegs
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.endm
.macro mPopRegs
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.endm
.macro mXorState128 input, state, offset
.if UseSIMD == 0
movq \offset(\input), %rax
movq \offset+8(\input), %rcx
xorq %rax, \offset(\state)
xorq %rcx, \offset+8(\state)
.else
movdqu \offset(\input), %xmm0
pxor \offset(\state), %xmm0
movdqu %xmm0, \offset(\state)
.endif
.endm
.macro mXorState256 input, state, offset
.if UseSIMD == 0
movq \offset(\input), %rax
movq \offset+8(\input), %r10
movq \offset+16(\input), %rcx
movq \offset+24(\input), %r8
xorq %rax, \offset(\state)
xorq %r10, \offset+8(\state)
xorq %rcx, \offset+16(\state)
xorq %r8, \offset+24(\state)
.else
movdqu \offset(\input), %xmm0
pxor \offset(\state), %xmm0
movdqu \offset+16(\input), %xmm1
pxor \offset+16(\state), %xmm1
movdqu %xmm0, \offset(\state)
movdqu %xmm1, \offset+16(\state)
.endif
.endm
.macro mXorState512 input, state, offset
.if UseSIMD == 0
mXorState256 \input, \state, \offset
mXorState256 \input, \state, \offset+32
.else
movdqu \offset(\input), %xmm0
movdqu \offset+16(\input), %xmm1
pxor \offset(\state), %xmm0
movdqu \offset+32(\input), %xmm2
pxor \offset+16(\state), %xmm1
movdqu %xmm0, \offset(\state)
movdqu \offset+48(\input), %xmm3
pxor \offset+32(\state), %xmm2
movdqu %xmm1, \offset+16(\state)
pxor \offset+48(\state), %xmm3
movdqu %xmm2, \offset+32(\state)
movdqu %xmm3, \offset+48(\state)
.endif
.endm
# -------------------------------------------------------------------------
.align 2
.global KeccakPermutation, @function
KeccakPermutation:
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakAbsorb576bits, @function
KeccakAbsorb576bits:
mXorState512 %rsi, %rdi, 0
movq 64(%rsi), %rax
xorq %rax, 64(%rdi)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakAbsorb832bits, @function
KeccakAbsorb832bits:
mXorState512 %rsi, %rdi, 0
mXorState256 %rsi, %rdi, 64
movq 96(%rsi), %rax
xorq %rax, 96(%rdi)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakAbsorb1024bits, @function
KeccakAbsorb1024bits:
mXorState512 %rsi, %rdi, 0
mXorState512 %rsi, %rdi, 64
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakAbsorb1088bits, @function
KeccakAbsorb1088bits:
mXorState512 %rsi, %rdi, 0
mXorState512 %rsi, %rdi, 64
movq 128(%rsi), %rax
xorq %rax, 128(%rdi)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakAbsorb1152bits, @function
KeccakAbsorb1152bits:
mXorState512 %rsi, %rdi, 0
mXorState512 %rsi, %rdi, 64
mXorState128 %rsi, %rdi, 128
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakAbsorb1344bits, @function
KeccakAbsorb1344bits:
mXorState512 %rsi, %rdi, 0
mXorState512 %rsi, %rdi, 64
mXorState256 %rsi, %rdi, 128
movq 160(%rsi), %rax
xorq %rax, 160(%rdi)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakAbsorb, @function
KeccakAbsorb:
movq %rdi, %r9
test $16, %rdx
jz xorInputToState8
mXorState512 %rsi, %r9, 0
mXorState512 %rsi, %r9, 64
addq $128, %rsi
addq $128, %r9
xorInputToState8:
test $8, %rdx
jz xorInputToState4
mXorState512 %rsi, %r9, 0
addq $64, %rsi
addq $64, %r9
xorInputToState4:
test $4, %rdx
jz xorInputToState2
mXorState256 %rsi, %r9, 0
addq $32, %rsi
addq $32, %r9
xorInputToState2:
test $2, %rdx
jz xorInputToState1
mXorState128 %rsi, %r9, 0
addq $16, %rsi
addq $16, %r9
xorInputToState1:
test $1, %rdx
jz xorInputToStateDone
movq (%rsi), %rax
xorq %rax, (%r9)
xorInputToStateDone:
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakInitializeState, @function
KeccakInitializeState:
xorq %rax, %rax
xorq %rcx, %rcx
notq %rcx
.if UseSIMD == 0
movq %rax, 0*8(%rdi)
movq %rcx, 1*8(%rdi)
movq %rcx, 2*8(%rdi)
movq %rax, 3*8(%rdi)
movq %rax, 4*8(%rdi)
movq %rax, 5*8(%rdi)
movq %rax, 6*8(%rdi)
movq %rax, 7*8(%rdi)
movq %rcx, 8*8(%rdi)
movq %rax, 9*8(%rdi)
movq %rax, 10*8(%rdi)
movq %rax, 11*8(%rdi)
movq %rcx, 12*8(%rdi)
movq %rax, 13*8(%rdi)
movq %rax, 14*8(%rdi)
movq %rax, 15*8(%rdi)
movq %rax, 16*8(%rdi)
movq %rcx, 17*8(%rdi)
movq %rax, 18*8(%rdi)
movq %rax, 19*8(%rdi)
movq %rcx, 20*8(%rdi)
movq %rax, 21*8(%rdi)
movq %rax, 22*8(%rdi)
movq %rax, 23*8(%rdi)
movq %rax, 24*8(%rdi)
.else
pxor %xmm0, %xmm0
movq %rax, 0*8(%rdi)
movq %rcx, 1*8(%rdi)
movq %rcx, 2*8(%rdi)
movq %rax, 3*8(%rdi)
movdqu %xmm0, 4*8(%rdi)
movdqu %xmm0, 6*8(%rdi)
movq %rcx, 8*8(%rdi)
movq %rax, 9*8(%rdi)
movdqu %xmm0, 10*8(%rdi)
movq %rcx, 12*8(%rdi)
movq %rax, 13*8(%rdi)
movdqu %xmm0, 14*8(%rdi)
movq %rax, 16*8(%rdi)
movq %rcx, 17*8(%rdi)
movdqu %xmm0, 18*8(%rdi)
movq %rcx, 20*8(%rdi)
movq %rax, 21*8(%rdi)
movdqu %xmm0, 22*8(%rdi)
movq %rax, 24*8(%rdi)
.endif
ret
# -------------------------------------------------------------------------
.align 2
.global KeccakExtract1024bits, @function
KeccakExtract1024bits:
movq 0*8(%rdi), %rax
movq 1*8(%rdi), %rcx
movq 2*8(%rdi), %rdx
movq 3*8(%rdi), %r8
notq %rcx
notq %rdx
movq %rax, 0*8(%rsi)
movq %rcx, 1*8(%rsi)
movq %rdx, 2*8(%rsi)
movq %r8, 3*8(%rsi)
movq 4*8(%rdi), %rax
movq 5*8(%rdi), %rcx
movq 6*8(%rdi), %rdx
movq 7*8(%rdi), %r8
movq %rax, 4*8(%rsi)
movq %rcx, 5*8(%rsi)
movq %rdx, 6*8(%rsi)
movq %r8, 7*8(%rsi)
movq 8*8(%rdi), %rax
movq 9*8(%rdi), %rcx
movq 10*8(%rdi), %rdx
movq 11*8(%rdi), %r8
notq %rax
movq %rax, 8*8(%rsi)
movq %rcx, 9*8(%rsi)
movq %rdx, 10*8(%rsi)
movq %r8, 11*8(%rsi)
movq 12*8(%rdi), %rax
movq 13*8(%rdi), %rcx
movq 14*8(%rdi), %rdx
movq 15*8(%rdi), %r8
notq %rax
movq %rax, 12*8(%rsi)
movq %rcx, 13*8(%rsi)
movq %rdx, 14*8(%rsi)
movq %r8, 15*8(%rsi)
ret

View file

@ -45,16 +45,16 @@ http://creativecommons.org/publicdomain/zero/1.0/
#define KeccakMaximumRateInBytes (KeccakMaximumRate/8)
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#define KECCAK_ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#define KECCAK_ALIGN __declspec(align(32))
#else
#define ALIGN
#define KECCAK_ALIGN
#endif
ALIGN typedef struct spongeStateStruct {
ALIGN unsigned char state[KeccakPermutationSizeInBytes];
ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes];
KECCAK_ALIGN typedef struct spongeStateStruct {
KECCAK_ALIGN unsigned char state[KeccakPermutationSizeInBytes];
KECCAK_ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes];
unsigned int rate;
unsigned int capacity;
unsigned int bitsInQueue;

View file

@ -26,7 +26,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
#include <sys/time.h>
#include <fcntl.h>
#include <time.h>
#include <libgen.h>

View file

@ -39,6 +39,7 @@
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include <sys/time.h>
#include <cpuid.h>
#if defined(sun) || defined(__sun)
#include <sys/byteorder.h>
@ -402,6 +403,10 @@ void rm_fname(char *fn);
*/
int is_incompressible(int type);
#ifdef __APPLE__
int clock_gettime(int clk_id, struct timespec *ts);
#endif
/*
* Roundup v to the nearest power of 2. From Bit Twiddling Hacks:
* http://graphics.stanford.edu/~seander/bithacks.html