From 20250aa5dc55adfbd04a1583fdb825c47b6361ae Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Mon, 4 Mar 2013 21:56:07 +0530 Subject: [PATCH] Add XSalsa20 encryption algorithm from the NaCL library. Include 128-bit key support based on the Salsa20 eSTREAM submission. Allow variable-length nonces. Use random bytes for initial nonce value. Increase PBE hash rounds to 50000. --- Makefile.in | 22 +- config | 12 + crypto/aes/crypto_aes.c | 28 +- crypto/aes/crypto_aes.h | 4 +- crypto/crypto_utils.c | 143 +- crypto/crypto_utils.h | 17 +- crypto/scrypt/crypto_scrypt.h | 2 + crypto/xsalsa20/crypto_core_hsalsa20.h | 16 + crypto/xsalsa20/crypto_stream_salsa20.h | 27 + crypto/xsalsa20/crypto_xsalsa20.h | 32 + crypto/xsalsa20/hsalsa_core.c | 108 + crypto/xsalsa20/stream.c | 216 + crypto/xsalsa20/stream.s | 4823 +++++++++++++++++++++++ crypto/xsalsa20/xsalsa20_xor.c | 140 + main.c | 52 +- 15 files changed, 5568 insertions(+), 74 deletions(-) create mode 100644 crypto/xsalsa20/crypto_core_hsalsa20.h create mode 100644 crypto/xsalsa20/crypto_stream_salsa20.h create mode 100644 crypto/xsalsa20/crypto_xsalsa20.h create mode 100644 crypto/xsalsa20/hsalsa_core.c create mode 100644 crypto/xsalsa20/stream.c create mode 100644 crypto/xsalsa20/stream.s create mode 100644 crypto/xsalsa20/xsalsa20_xor.c diff --git a/Makefile.in b/Makefile.in index 365e403..f4ace6d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,15 +29,22 @@ MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h utils/cpuid.h utils/xxhash.h MAINOBJS = $(MAINSRCS:.c=.o) +XSALSA20_STREAM_C = crypto/xsalsa20/stream.c +XSALSA20_STREAM_ASM = crypto/xsalsa20/stream.s +XSALSA20_DEBUG = -DSALSA20_DEBUG + CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \ crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c \ - crypto/sha2_utils.c crypto/sha3_utils.c + crypto/sha2_utils.c crypto/sha3_utils.c crypto/xsalsa20/xsalsa20_xor.c \ + crypto/xsalsa20/hsalsa_core.c @XSALSA20_STREAM_C@ CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \ crypto/scrypt/sha256.h crypto/scrypt/crypto_aesctr.h crypto/aes/crypto_aes.h \ - crypto/sha2_utils.h crypto/sha3_utils.h $(MAINHDRS) -CRYPTO_ASM_SRCS = crypto/aes/vpaes-x86_64.s crypto/aes/aesni-x86_64.s -CRYPTO_ASM_OBJS = crypto/aes/vpaes-x86_64.o crypto/aes/aesni-x86_64.o -CRYPTO_ASM_HDRS = crypto/aes/crypto_aes.h + crypto/sha2_utils.h crypto/sha3_utils.h crypto/xsalsa20/crypto_core_hsalsa20.h \ + crypto/xsalsa20/crypto_stream_salsa20.h crypto/xsalsa20/crypto_xsalsa20.h \ + $(MAINHDRS) +CRYPTO_ASM_SRCS = crypto/aes/vpaes-x86_64.s crypto/aes/aesni-x86_64.s @XSALSA20_STREAM_ASM@ +CRYPTO_ASM_OBJS = $(CRYPTO_ASM_SRCS:.s=.o) +CRYPTO_ASM_HDRS = crypto/aes/crypto_aes.h crypto/xsalsa20/crypto_stream_salsa20.h CRYPTO_COMPAT_SRCS = crypto/old/sha2_utils_old.c crypto/old/sha3_utils_old.c CRYPTO_COMPAT_HDRS = crypto/old/sha2_utils_old.h crypto/old/sha3_utils_old.h CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o) @@ -175,9 +182,10 @@ COMMON_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT -DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 \ -I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein -I./utils -I./crypto/sha2 \ -I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ -I./rabin/global \ - -I./crypto/keccak -I./transpose -I./crypto/blake2 $(EXTRA_CPPFLAGS) -pedantic -Wall -std=gnu99 \ + -I./crypto/keccak -I./transpose -I./crypto/blake2 $(EXTRA_CPPFLAGS) \ + -I./crypto/xsalsa20 -pedantic -Wall -std=gnu99 \ -fno-strict-aliasing -Wno-unused-but-set-variable -Wno-enum-compare \ - @COMPAT_CPPFLAGS@ + @COMPAT_CPPFLAGS@ @XSALSA20_DEBUG@ COMMON_VEC_FLAGS = -ftree-vectorize COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block LDLIBS = -ldl -L./buildtmp -Wl,-R@LIBBZ2_DIR@ -lbz2 -L./buildtmp -Wl,-R@LIBZ_DIR@ -lz -lm @LIBBSCLFLAGS@ \ diff --git a/config b/config index b048d43..97609ff 100755 --- a/config +++ b/config @@ -63,6 +63,9 @@ sse_detect=1 sse_opt_flags="-msse2" crypto_compat_objs='\$\(CRYPTO_COMPAT_OBJS\)' crypto_compat_flags="-D__HASH_COMPATIBILITY_" +salsa20_stream_c= +salsa20_stream_asm='\$\(XSALSA20_STREAM_ASM\)' +salsa20_debug= rm -rf ./buildtmp mkdir ./buildtmp @@ -168,6 +171,9 @@ done if [ $debug -eq 1 ] then typ="DEBUG" + salsa20_stream_c='\$\(XSALSA20_STREAM_C\)' + salsa20_stream_asm= + salsa20_debug='\$\(XSALSA20_DEBUG\)' else typ="RELEASE" fi @@ -523,6 +529,9 @@ keccak_srcs_asm_var="KECCAK_SRCS_ASM" crypto_compat_objs_var="CRYPTO_COMPAT_OBJS" crypto_compat_flags_var="COMPAT_CPPFLAGS" +salsa20_stream_c_var="XSALSA20_STREAM_C" +salsa20_stream_asm_var="XSALSA20_STREAM_ASM" +salsa20_debug_var="XSALSA20_DEBUG" noslabcppflagsval= debugstatscppflagsval= @@ -567,5 +576,8 @@ s#@${extra_opt_flags_var}@#${extra_opt_flags}#g s#@${sse_opt_flags_var}@#${sse_opt_flags}#g s#@${crypto_compat_objs_var}@#${crypto_compat_objs}#g s#@${crypto_compat_flags_var}@#${crypto_compat_flags}#g +s#@${salsa20_stream_c_var}@#${salsa20_stream_c}#g +s#@${salsa20_stream_asm_var}@#${salsa20_stream_asm}#g +s#@${salsa20_debug_var}@#${salsa20_debug}#g " > Makefile diff --git a/crypto/aes/crypto_aes.c b/crypto/aes/crypto_aes.c index 6c4aaa6..264982c 100644 --- a/crypto/aes/crypto_aes.c +++ b/crypto/aes/crypto_aes.c @@ -58,12 +58,14 @@ #include #include #include +#include #include #include #include #include #include "crypto_aes.h" +extern int geturandom_bytes(uchar_t *rbytes, int nbytes); extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc); extern int vpaes_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); extern void vpaes_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key); @@ -123,16 +125,20 @@ aes_init(aes_ctx_t *ctx, uchar_t *salt, int saltlen, uchar_t *pwd, int pwd_len, if (enc) { enc_setkey(key, (ctx->keylen << 3), &(ctx->key)); - // Derive nonce from salt - if (clock_gettime(CLOCK_MONOTONIC, &tp) == -1) { - time((time_t *)&tv); - } else { - tv = tp.tv_sec * 1000UL + tp.tv_nsec; + // Derive 64-bit nonce + if (RAND_status() != 1 || RAND_bytes((uchar_t *)&(ctx->nonce), 8) != 1) { + if (geturandom_bytes((uchar_t *)&(ctx->nonce), 8) != 0) { + if (clock_gettime(CLOCK_MONOTONIC, &tp) == -1) { + time((time_t *)&tv); + } else { + tv = tp.tv_sec * 1000UL + tp.tv_nsec; + } + sprintf((char *)num, "%" PRIu64, tv); + PKCS5_PBKDF2_HMAC((const char *)num, strlen((char *)num), salt, + saltlen, PBE_ROUNDS, EVP_sha256(), 32, IV); + ctx->nonce = lzma_crc64(IV, 32, 0); + } } - sprintf((char *)num, "%" PRIu64, tv); - PKCS5_PBKDF2_HMAC((const char *)num, strlen((char *)num), salt, - saltlen, PBE_ROUNDS, EVP_sha256(), 32, IV); - ctx->nonce = lzma_crc64(IV, 32, 0) & 0xffffffff00000000ULL; // Nullify stack components memset(num, 0, 25); memset(IV, 0, 32); @@ -193,10 +199,10 @@ aes_decrypt(aes_ctx_t *ctx, uchar_t *ciphertext, uchar_t *plaintext, uint64_t le return (0); } -uint64_t +uchar_t * aes_nonce(aes_ctx_t *ctx) { - return (ctx->nonce); + return ((uchar_t *)&(ctx->nonce)); } void diff --git a/crypto/aes/crypto_aes.h b/crypto/aes/crypto_aes.h index 3da6e97..f5417f6 100644 --- a/crypto/aes/crypto_aes.h +++ b/crypto/aes/crypto_aes.h @@ -36,8 +36,6 @@ extern "C" { #endif -#define PBE_ROUNDS 1000 - typedef struct { uint64_t nonce; AES_KEY key; @@ -49,7 +47,7 @@ int aes_init(aes_ctx_t *ctx, uchar_t *salt, int saltlen, uchar_t *pwd, int pwd_l uint64_t nonce, int enc); int aes_encrypt(aes_ctx_t *ctx, uchar_t *plaintext, uchar_t *ciphertext, uint64_t len, uint64_t id); int aes_decrypt(aes_ctx_t *ctx, uchar_t *ciphertext, uchar_t *plaintext, uint64_t len, uint64_t id); -uint64_t aes_nonce(aes_ctx_t *ctx); +uchar_t *aes_nonce(aes_ctx_t *ctx); void aes_clean_pkey(aes_ctx_t *ctx); void aes_cleanup(aes_ctx_t *ctx); void aes_module_init(processor_info_t *pc); diff --git a/crypto/crypto_utils.c b/crypto/crypto_utils.c index cf53dcd..40a750d 100644 --- a/crypto/crypto_utils.c +++ b/crypto/crypto_utils.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "crypto_utils.h" #include "sha2_utils.h" @@ -56,7 +57,6 @@ static void init_sha512(void); static void init_blake2(void); -static int geturandom_bytes(uchar_t rbytes[32]); static struct blake2_dispatch bdsp; /* @@ -173,6 +173,25 @@ PKCS5_PBKDF2_HMAC(const char *pass, int passlen, } #endif +int +get_crypto_alg(char *name) +{ + if (name[0] == 0 || name[1] == 0 || name[2] == 0) { + return (0); + } + if (strncmp(name, "AES", 3) == 0) { + return (CRYPTO_ALG_AES); + } else { + if (name[3] == 0 || name[4] == 0 || name[5] == 0 || name[6] == 0) { + return (0); + } + if (strncmp(name, "SALSA20", 3) == 0) { + return (CRYPTO_ALG_SALSA20); + } + } + return (0); +} + /* * Compute a digest of the given data segment. The parameter mt indicates whether * to use the parallel(OpenMP) versions. Parallel versions are only used when @@ -401,13 +420,12 @@ deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes) int hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) { - aes_ctx_t *actx = (aes_ctx_t *)(cctx->crypto_ctx); mctx->mac_cksum = cksum; if (cksum == CKSUM_BLAKE256) { blake2b_state *ctx = (blake2b_state *)malloc(sizeof (blake2b_state)); if (!ctx) return (-1); - if (bdsp.blake2b_init_key(ctx, 32, actx->pkey, cctx->keylen) != 0) + if (bdsp.blake2b_init_key(ctx, 32, cctx->pkey, cctx->keylen) != 0) return (-1); mctx->mac_ctx = ctx; ctx = (blake2b_state *)malloc(sizeof (blake2b_state)); @@ -421,7 +439,7 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) } else if (cksum == CKSUM_BLAKE512) { blake2b_state *ctx = (blake2b_state *)malloc(sizeof (blake2b_state)); if (!ctx) return (-1); - if (bdsp.blake2b_init_key(ctx, 64, actx->pkey, cctx->keylen) != 0) + if (bdsp.blake2b_init_key(ctx, 64, cctx->pkey, cctx->keylen) != 0) return (-1); mctx->mac_ctx = ctx; ctx = (blake2b_state *)malloc(sizeof (blake2b_state)); @@ -436,7 +454,7 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) Skein_512_Ctxt_t *ctx = (Skein_512_Ctxt_t *)malloc(sizeof (Skein_512_Ctxt_t)); if (!ctx) return (-1); Skein_512_InitExt(ctx, 256, SKEIN_CFG_TREE_INFO_SEQUENTIAL, - actx->pkey, cctx->keylen); + cctx->pkey, cctx->keylen); mctx->mac_ctx = ctx; ctx = (Skein_512_Ctxt_t *)malloc(sizeof (Skein_512_Ctxt_t)); if (!ctx) { @@ -450,7 +468,7 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) Skein_512_Ctxt_t *ctx = (Skein_512_Ctxt_t *)malloc(sizeof (Skein_512_Ctxt_t)); if (!ctx) return (-1); Skein_512_InitExt(ctx, 512, SKEIN_CFG_TREE_INFO_SEQUENTIAL, - actx->pkey, cctx->keylen); + cctx->pkey, cctx->keylen); mctx->mac_ctx = ctx; ctx = (Skein_512_Ctxt_t *)malloc(sizeof (Skein_512_Ctxt_t)); if (!ctx) { @@ -465,7 +483,7 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) HMAC_CTX *ctx = (HMAC_CTX *)malloc(sizeof (HMAC_CTX)); if (!ctx) return (-1); HMAC_CTX_init(ctx); - HMAC_Init_ex(ctx, actx->pkey, cctx->keylen, EVP_sha256(), NULL); + HMAC_Init_ex(ctx, cctx->pkey, cctx->keylen, EVP_sha256(), NULL); mctx->mac_ctx = ctx; ctx = (HMAC_CTX *)malloc(sizeof (HMAC_CTX)); @@ -482,7 +500,7 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) } else { HMAC_SHA512_Context *ctx = (HMAC_SHA512_Context *)malloc(sizeof (HMAC_SHA512_Context)); if (!ctx) return (-1); - opt_HMAC_SHA512t256_Init(ctx, actx->pkey, cctx->keylen); + opt_HMAC_SHA512t256_Init(ctx, cctx->pkey, cctx->keylen); mctx->mac_ctx = ctx; ctx = (HMAC_SHA512_Context *)malloc(sizeof (HMAC_SHA512_Context)); @@ -498,7 +516,7 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) HMAC_CTX *ctx = (HMAC_CTX *)malloc(sizeof (HMAC_CTX)); if (!ctx) return (-1); HMAC_CTX_init(ctx); - HMAC_Init_ex(ctx, actx->pkey, cctx->keylen, EVP_sha512(), NULL); + HMAC_Init_ex(ctx, cctx->pkey, cctx->keylen, EVP_sha512(), NULL); mctx->mac_ctx = ctx; ctx = (HMAC_CTX *)malloc(sizeof (HMAC_CTX)); @@ -515,7 +533,7 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) } else { HMAC_SHA512_Context *ctx = (HMAC_SHA512_Context *)malloc(sizeof (HMAC_SHA512_Context)); if (!ctx) return (-1); - opt_HMAC_SHA512_Init(ctx, actx->pkey, cctx->keylen); + opt_HMAC_SHA512_Init(ctx, cctx->pkey, cctx->keylen); mctx->mac_ctx = ctx; ctx = (HMAC_SHA512_Context *)malloc(sizeof (HMAC_SHA512_Context)); @@ -538,7 +556,7 @@ hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) if (Keccak_Init(ctx, 512) != 0) return (-1); } - if (Keccak_Update(ctx, actx->pkey, cctx->keylen << 3) != 0) + if (Keccak_Update(ctx, cctx->pkey, cctx->keylen << 3) != 0) return (-1); mctx->mac_ctx = ctx; @@ -719,13 +737,27 @@ hmac_cleanup(mac_ctx_t *mctx) */ int init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg, - uchar_t *salt, int saltlen, int keylen, uint64_t nonce, int enc_dec) + uchar_t *salt, int saltlen, int keylen, uchar_t *nonce, int enc_dec) { - if (crypto_alg == CRYPTO_ALG_AES) { - aes_ctx_t *actx = (aes_ctx_t *)malloc(sizeof (aes_ctx_t)); - aes_module_init(&proc_info); + if (crypto_alg == CRYPTO_ALG_AES || crypto_alg == CRYPTO_ALG_SALSA20) { + aes_ctx_t *actx; + salsa20_ctx_t *sctx; + + /* Silence compiler warnings */ + actx = NULL; + sctx = NULL; + + if (crypto_alg == CRYPTO_ALG_AES) { + actx = (aes_ctx_t *)malloc(sizeof (aes_ctx_t)); + actx->keylen = keylen; + cctx->pkey = actx->pkey; + aes_module_init(&proc_info); + } else { + sctx = (salsa20_ctx_t *)malloc(sizeof (salsa20_ctx_t)); + sctx->keylen = keylen; + cctx->pkey = sctx->pkey; + } cctx->keylen = keylen; - actx->keylen = keylen; if (enc_dec) { /* @@ -735,7 +767,7 @@ init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg, salt = cctx->salt; cctx->saltlen = 32; if (RAND_status() != 1 || RAND_bytes(salt, 32) != 1) { - if (geturandom_bytes(salt) != 0) { + if (geturandom_bytes(salt, 32) != 0) { uchar_t sb[64]; int b; struct timespec tp; @@ -765,9 +797,16 @@ init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg, /* * Zero nonce (arg #6) since it will be generated. */ - if (aes_init(actx, salt, 32, pwd, pwd_len, 0, enc_dec) != 0) { - fprintf(stderr, "Failed to initialize AES context\n"); - return (-1); + if (crypto_alg == CRYPTO_ALG_AES) { + if (aes_init(actx, salt, 32, pwd, pwd_len, 0, enc_dec) != 0) { + fprintf(stderr, "Failed to initialize AES context\n"); + return (-1); + } + } else { + if (salsa20_init(sctx, salt, 32, pwd, pwd_len, 0, enc_dec) != 0) { + fprintf(stderr, "Failed to initialize SALSA20 context\n"); + return (-1); + } } } else { /* @@ -783,13 +822,24 @@ init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg, cctx->salt = (uchar_t *)malloc(saltlen); memcpy(cctx->salt, salt, saltlen); - if (aes_init(actx, cctx->salt, saltlen, pwd, pwd_len, nonce, - enc_dec) != 0) { - fprintf(stderr, "Failed to initialize AES context\n"); - return (-1); + if (crypto_alg == CRYPTO_ALG_AES) { + if (aes_init(actx, cctx->salt, saltlen, pwd, pwd_len, *((uint64_t *)nonce), + enc_dec) != 0) { + fprintf(stderr, "Failed to initialize AES context\n"); + return (-1); + } + } else { + if (salsa20_init(sctx, salt, 32, pwd, pwd_len, nonce, enc_dec) != 0) { + fprintf(stderr, "Failed to initialize SALSA20 context\n"); + return (-1); + } } } - cctx->crypto_ctx = actx; + if (crypto_alg == CRYPTO_ALG_AES) { + cctx->crypto_ctx = actx; + } else { + cctx->crypto_ctx = sctx; + } cctx->crypto_alg = crypto_alg; cctx->enc_dec = enc_dec; } else { @@ -808,6 +858,12 @@ crypto_buf(crypto_ctx_t *cctx, uchar_t *from, uchar_t *to, uint64_t bytes, uint6 } else { return (aes_decrypt((aes_ctx_t *)(cctx->crypto_ctx), from, to, bytes, id)); } + } else if (cctx->crypto_alg == CRYPTO_ALG_SALSA20) { + if (cctx->enc_dec == ENCRYPT_FLAG) { + return (salsa20_encrypt((salsa20_ctx_t *)(cctx->crypto_ctx), from, to, bytes, id)); + } else { + return (salsa20_decrypt((salsa20_ctx_t *)(cctx->crypto_ctx), from, to, bytes, id)); + } } else { fprintf(stderr, "Unrecognized algorithm code: %d\n", cctx->crypto_alg); return (-1); @@ -815,37 +871,56 @@ crypto_buf(crypto_ctx_t *cctx, uchar_t *from, uchar_t *to, uint64_t bytes, uint6 return (0); } -uint64_t +uchar_t * crypto_nonce(crypto_ctx_t *cctx) { - return (aes_nonce((aes_ctx_t *)(cctx->crypto_ctx))); + if (cctx->crypto_alg == CRYPTO_ALG_AES) { + return (aes_nonce((aes_ctx_t *)(cctx->crypto_ctx))); + } + return (salsa20_nonce((salsa20_ctx_t *)(cctx->crypto_ctx))); } void crypto_clean_pkey(crypto_ctx_t *cctx) { - aes_clean_pkey((aes_ctx_t *)(cctx->crypto_ctx)); + if (cctx->crypto_alg == CRYPTO_ALG_AES) { + aes_clean_pkey((aes_ctx_t *)(cctx->crypto_ctx)); + } else { + salsa20_clean_pkey((salsa20_ctx_t *)(cctx->crypto_ctx)); + } + cctx->pkey = NULL; } void cleanup_crypto(crypto_ctx_t *cctx) { - aes_cleanup((aes_ctx_t *)(cctx->crypto_ctx)); + if (cctx->crypto_alg == CRYPTO_ALG_AES) { + aes_cleanup((aes_ctx_t *)(cctx->crypto_ctx)); + } else { + salsa20_cleanup((salsa20_ctx_t *)(cctx->crypto_ctx)); + } memset(cctx->salt, 0, 32); free(cctx->salt); free(cctx); } -static int -geturandom_bytes(uchar_t rbytes[32]) +int +geturandom_bytes(uchar_t *rbytes, int buflen) { int fd; int64_t lenread; uchar_t * buf = rbytes; - uint64_t buflen = 32; - /* Open /dev/urandom. */ - if ((fd = open("/dev/urandom", O_RDONLY)) == -1) + /* Open /dev/urandom. Upto 10 retries. */ + fd = -1; + lenread = 1; + while (fd == -1 && lenread < 10) { + if ((fd = open("/dev/urandom", O_RDONLY)) != -1) + break; + lenread++; + sleep(1); + } + if (fd == -1) goto err0; /* Read bytes until we have filled the buffer. */ diff --git a/crypto/crypto_utils.h b/crypto/crypto_utils.h index 867765f..409df79 100644 --- a/crypto/crypto_utils.h +++ b/crypto/crypto_utils.h @@ -47,10 +47,12 @@ extern "C" { #define MAX_KEYLEN DEFAULT_KEYLEN #endif -#define ENCRYPT_FLAG 1 -#define DECRYPT_FLAG 0 -#define CRYPTO_ALG_AES 0x10 -#define MAX_SALTLEN 64 +#define ENCRYPT_FLAG 1 +#define DECRYPT_FLAG 0 +#define CRYPTO_ALG_AES 0x10 +#define CRYPTO_ALG_SALSA20 0x20 +#define MAX_SALTLEN 64 +#define MAX_NONCE 32 #define KECCAK_MAX_SEG (2305843009213693950ULL) /* @@ -80,6 +82,7 @@ typedef struct { int crypto_alg; int enc_dec; uchar_t *salt; + uchar_t *pkey; int saltlen; int keylen; } crypto_ctx_t; @@ -104,12 +107,14 @@ void deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes); * Encryption related functions. */ int init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg, - uchar_t *salt, int saltlen, int keylen, uint64_t nonce, int enc_dec); + uchar_t *salt, int saltlen, int keylen, uchar_t *nonce, int enc_dec); int crypto_buf(crypto_ctx_t *cctx, uchar_t *from, uchar_t *to, uint64_t bytes, uint64_t id); -uint64_t crypto_nonce(crypto_ctx_t *cctx); +uchar_t *crypto_nonce(crypto_ctx_t *cctx); void crypto_clean_pkey(crypto_ctx_t *cctx); void cleanup_crypto(crypto_ctx_t *cctx); int get_pw_string(uchar_t pw[MAX_PW_LEN], const char *prompt, int twice); +int get_crypto_alg(char *name); +int geturandom_bytes(uchar_t *rbytes, int nbytes); /* * HMAC functions. diff --git a/crypto/scrypt/crypto_scrypt.h b/crypto/scrypt/crypto_scrypt.h index 5a37aac..42b55c8 100644 --- a/crypto/scrypt/crypto_scrypt.h +++ b/crypto/scrypt/crypto_scrypt.h @@ -32,6 +32,8 @@ #include #include +#define PBE_ROUNDS 50000 + /** * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, diff --git a/crypto/xsalsa20/crypto_core_hsalsa20.h b/crypto/xsalsa20/crypto_core_hsalsa20.h new file mode 100644 index 0000000..eee2703 --- /dev/null +++ b/crypto/xsalsa20/crypto_core_hsalsa20.h @@ -0,0 +1,16 @@ +#ifndef crypto_core_hsalsa20_H +#define crypto_core_hsalsa20_H + +#define HSALSA_CRYPTO_OUTPUTBYTES 32 +#define HSALSA_CRYPTO_INPUTBYTES 16 +#define HSALSA_CRYPTO_CONSTBYTES 16 + +#ifdef __cplusplus +extern "C" { +#endif +int crypto_core_hsalsa20(unsigned char *out, const unsigned char *in, const unsigned char *k, const unsigned char *c); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypto/xsalsa20/crypto_stream_salsa20.h b/crypto/xsalsa20/crypto_stream_salsa20.h new file mode 100644 index 0000000..22a8e72 --- /dev/null +++ b/crypto/xsalsa20/crypto_stream_salsa20.h @@ -0,0 +1,27 @@ +#ifndef crypto_stream_salsa20_H +#define crypto_stream_salsa20_H + +#define crypto_stream_salsa20_amd64_xmm6_KEYBYTES 32 +#define crypto_stream_salsa20_amd64_xmm6_NONCEBYTES 8 +#ifdef __cplusplus +extern "C" { +#endif +extern int crypto_stream_salsa20_amd64_xmm6(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_stream_salsa20_amd64_xmm6_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_stream_salsa20_ref(unsigned char *c,unsigned long long clen, const unsigned char *n, const unsigned char *k); +extern int crypto_stream_salsa20_ref_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +#ifdef __cplusplus +} +#endif + +#ifndef SALSA20_DEBUG +#define crypto_stream_salsa20 crypto_stream_salsa20_amd64_xmm6 +#define crypto_stream_salsa20_xor crypto_stream_salsa20_amd64_xmm6_xor +#else +#define crypto_stream_salsa20 crypto_stream_salsa20_ref +#define crypto_stream_salsa20_xor crypto_stream_salsa20_ref_xor +#endif +#define crypto_stream_salsa20_KEYBYTES crypto_stream_salsa20_amd64_xmm6_KEYBYTES +#define crypto_stream_salsa20_NONCEBYTES crypto_stream_salsa20_amd64_xmm6_NONCEBYTES + +#endif diff --git a/crypto/xsalsa20/crypto_xsalsa20.h b/crypto/xsalsa20/crypto_xsalsa20.h new file mode 100644 index 0000000..1e65b37 --- /dev/null +++ b/crypto/xsalsa20/crypto_xsalsa20.h @@ -0,0 +1,32 @@ +#ifndef crypto_xsalsa20_H +#define crypto_xsalsa20_H + +#include +#include + +#define XSALSA20_CRYPTO_KEYBYTES 32 +#define XSALSA20_CRYPTO_NONCEBYTES 24 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + unsigned char nonce[XSALSA20_CRYPTO_NONCEBYTES]; + uchar_t key[XSALSA20_CRYPTO_KEYBYTES]; + int keylen; + uchar_t pkey[XSALSA20_CRYPTO_KEYBYTES]; +} salsa20_ctx_t; + +int salsa20_init(salsa20_ctx_t *ctx, uchar_t *salt, int saltlen, uchar_t *pwd, int pwd_len, uchar_t *nonce, int enc); +int salsa20_encrypt(salsa20_ctx_t *ctx, uchar_t *plaintext, uchar_t *ciphertext, uint64_t len, uint64_t id); +int salsa20_decrypt(salsa20_ctx_t *ctx, uchar_t *ciphertext, uchar_t *plaintext, uint64_t len, uint64_t id); +uchar_t *salsa20_nonce(salsa20_ctx_t *ctx); +void salsa20_clean_pkey(salsa20_ctx_t *ctx); +void salsa20_cleanup(salsa20_ctx_t *ctx); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypto/xsalsa20/hsalsa_core.c b/crypto/xsalsa20/hsalsa_core.c new file mode 100644 index 0000000..ca41adf --- /dev/null +++ b/crypto/xsalsa20/hsalsa_core.c @@ -0,0 +1,108 @@ +/* +version 20080912 +D. J. Bernstein +Public domain. +*/ +#include +#include + +#define ROUNDS 20 + +static uint32_t +rotate(uint32_t u,int c) +{ + return (u << c) | (u >> (32 - c)); +} + +static uint32_t +load_littleendian(const unsigned char *x) +{ + return + (uint32_t) (x[0]) \ + | (((uint32_t) (x[1])) << 8) \ + | (((uint32_t) (x[2])) << 16) \ + | (((uint32_t) (x[3])) << 24) + ; +} + +static void +store_littleendian(unsigned char *x, uint32_t u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + +int +crypto_core_hsalsa20(unsigned char *out, + const unsigned char *in, + const unsigned char *k, + const unsigned char *c) +{ + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + int i; + + x0 = load_littleendian(c + 0); + x1 = load_littleendian(k + 0); + x2 = load_littleendian(k + 4); + x3 = load_littleendian(k + 8); + x4 = load_littleendian(k + 12); + x5 = load_littleendian(c + 4); + x6 = load_littleendian(in + 0); + x7 = load_littleendian(in + 4); + x8 = load_littleendian(in + 8); + x9 = load_littleendian(in + 12); + x10 = load_littleendian(c + 8); + x11 = load_littleendian(k + 16); + x12 = load_littleendian(k + 20); + x13 = load_littleendian(k + 24); + x14 = load_littleendian(k + 28); + x15 = load_littleendian(c + 12); + + for (i = ROUNDS;i > 0;i -= 2) { + x4 ^= rotate( x0+x12, 7); + x8 ^= rotate( x4+ x0, 9); + x12 ^= rotate( x8+ x4,13); + x0 ^= rotate(x12+ x8,18); + x9 ^= rotate( x5+ x1, 7); + x13 ^= rotate( x9+ x5, 9); + x1 ^= rotate(x13+ x9,13); + x5 ^= rotate( x1+x13,18); + x14 ^= rotate(x10+ x6, 7); + x2 ^= rotate(x14+x10, 9); + x6 ^= rotate( x2+x14,13); + x10 ^= rotate( x6+ x2,18); + x3 ^= rotate(x15+x11, 7); + x7 ^= rotate( x3+x15, 9); + x11 ^= rotate( x7+ x3,13); + x15 ^= rotate(x11+ x7,18); + x1 ^= rotate( x0+ x3, 7); + x2 ^= rotate( x1+ x0, 9); + x3 ^= rotate( x2+ x1,13); + x0 ^= rotate( x3+ x2,18); + x6 ^= rotate( x5+ x4, 7); + x7 ^= rotate( x6+ x5, 9); + x4 ^= rotate( x7+ x6,13); + x5 ^= rotate( x4+ x7,18); + x11 ^= rotate(x10+ x9, 7); + x8 ^= rotate(x11+x10, 9); + x9 ^= rotate( x8+x11,13); + x10 ^= rotate( x9+ x8,18); + x12 ^= rotate(x15+x14, 7); + x13 ^= rotate(x12+x15, 9); + x14 ^= rotate(x13+x12,13); + x15 ^= rotate(x14+x13,18); + } + + store_littleendian(out + 0,x0); + store_littleendian(out + 4,x5); + store_littleendian(out + 8,x10); + store_littleendian(out + 12,x15); + store_littleendian(out + 16,x6); + store_littleendian(out + 20,x7); + store_littleendian(out + 24,x8); + store_littleendian(out + 28,x9); + + return 0; +} diff --git a/crypto/xsalsa20/stream.c b/crypto/xsalsa20/stream.c new file mode 100644 index 0000000..4a91047 --- /dev/null +++ b/crypto/xsalsa20/stream.c @@ -0,0 +1,216 @@ +/* +version 20080912 +D. J. Bernstein +Public domain. +*/ +#include +#include + +#define ROUNDS 20 +static const unsigned char sigma[16] = "expand 32-byte k"; + +static uint32_t +rotate(uint32_t u,int c) +{ + return (u << c) | (u >> (32 - c)); +} + +static uint32_t +load_littleendian(const unsigned char *x) +{ + return + (uint32_t) (x[0]) \ + | (((uint32_t) (x[1])) << 8) \ + | (((uint32_t) (x[2])) << 16) \ + | (((uint32_t) (x[3])) << 24) + ; +} + +static void +store_littleendian(unsigned char *x, uint32_t u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + +static int +crypto_core( + unsigned char *out, + const unsigned char *in, + const unsigned char *k, + const unsigned char *c +) +{ + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + int i; + + j0 = x0 = load_littleendian(c + 0); + j1 = x1 = load_littleendian(k + 0); + j2 = x2 = load_littleendian(k + 4); + j3 = x3 = load_littleendian(k + 8); + j4 = x4 = load_littleendian(k + 12); + j5 = x5 = load_littleendian(c + 4); + j6 = x6 = load_littleendian(in + 0); + j7 = x7 = load_littleendian(in + 4); + j8 = x8 = load_littleendian(in + 8); + j9 = x9 = load_littleendian(in + 12); + j10 = x10 = load_littleendian(c + 8); + j11 = x11 = load_littleendian(k + 16); + j12 = x12 = load_littleendian(k + 20); + j13 = x13 = load_littleendian(k + 24); + j14 = x14 = load_littleendian(k + 28); + j15 = x15 = load_littleendian(c + 12); + + for (i = ROUNDS;i > 0;i -= 2) { + x4 ^= rotate( x0+x12, 7); + x8 ^= rotate( x4+ x0, 9); + x12 ^= rotate( x8+ x4,13); + x0 ^= rotate(x12+ x8,18); + x9 ^= rotate( x5+ x1, 7); + x13 ^= rotate( x9+ x5, 9); + x1 ^= rotate(x13+ x9,13); + x5 ^= rotate( x1+x13,18); + x14 ^= rotate(x10+ x6, 7); + x2 ^= rotate(x14+x10, 9); + x6 ^= rotate( x2+x14,13); + x10 ^= rotate( x6+ x2,18); + x3 ^= rotate(x15+x11, 7); + x7 ^= rotate( x3+x15, 9); + x11 ^= rotate( x7+ x3,13); + x15 ^= rotate(x11+ x7,18); + x1 ^= rotate( x0+ x3, 7); + x2 ^= rotate( x1+ x0, 9); + x3 ^= rotate( x2+ x1,13); + x0 ^= rotate( x3+ x2,18); + x6 ^= rotate( x5+ x4, 7); + x7 ^= rotate( x6+ x5, 9); + x4 ^= rotate( x7+ x6,13); + x5 ^= rotate( x4+ x7,18); + x11 ^= rotate(x10+ x9, 7); + x8 ^= rotate(x11+x10, 9); + x9 ^= rotate( x8+x11,13); + x10 ^= rotate( x9+ x8,18); + x12 ^= rotate(x15+x14, 7); + x13 ^= rotate(x12+x15, 9); + x14 ^= rotate(x13+x12,13); + x15 ^= rotate(x14+x13,18); + } + + x0 += j0; + x1 += j1; + x2 += j2; + x3 += j3; + x4 += j4; + x5 += j5; + x6 += j6; + x7 += j7; + x8 += j8; + x9 += j9; + x10 += j10; + x11 += j11; + x12 += j12; + x13 += j13; + x14 += j14; + x15 += j15; + + store_littleendian(out + 0,x0); + store_littleendian(out + 4,x1); + store_littleendian(out + 8,x2); + store_littleendian(out + 12,x3); + store_littleendian(out + 16,x4); + store_littleendian(out + 20,x5); + store_littleendian(out + 24,x6); + store_littleendian(out + 28,x7); + store_littleendian(out + 32,x8); + store_littleendian(out + 36,x9); + store_littleendian(out + 40,x10); + store_littleendian(out + 44,x11); + store_littleendian(out + 48,x12); + store_littleendian(out + 52,x13); + store_littleendian(out + 56,x14); + store_littleendian(out + 60,x15); + + return 0; +} + +int +crypto_stream_salsa20_ref_xor( + unsigned char *c, + const unsigned char *m,unsigned long long mlen, + const unsigned char *n, + const unsigned char *k +) +{ + unsigned char in[16]; + unsigned char block[64]; + int i; + unsigned int u; + + if (!mlen) return 0; + + for (i = 0;i < 8;++i) in[i] = n[i]; + for (i = 8;i < 16;++i) in[i] = 0; + + while (mlen >= 64) { + crypto_core(block,in,k,sigma); + for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i]; + + u = 1; + for (i = 8;i < 16;++i) { + u += (unsigned int) in[i]; + in[i] = u; + u >>= 8; + } + + mlen -= 64; + c += 64; + m += 64; + } + + if (mlen) { + crypto_core(block,in,k,sigma); + for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i]; + } + return 0; +} + +int +crypto_stream_salsa20_ref( + unsigned char *c,unsigned long long clen, + const unsigned char *n, + const unsigned char *k +) +{ + unsigned char in[16]; + unsigned char block[64]; + int i; + unsigned int u; + + if (!clen) return 0; + + for (i = 0;i < 8;++i) in[i] = n[i]; + for (i = 8;i < 16;++i) in[i] = 0; + + while (clen >= 64) { + crypto_core(c,in,k,sigma); + + u = 1; + for (i = 8;i < 16;++i) { + u += (unsigned int) in[i]; + in[i] = u; + u >>= 8; + } + + clen -= 64; + c += 64; + } + + if (clen) { + crypto_core(block,in,k,sigma); + for (i = 0;i < clen;++i) c[i] = block[i]; + } + return 0; +} diff --git a/crypto/xsalsa20/stream.s b/crypto/xsalsa20/stream.s new file mode 100644 index 0000000..82a897f --- /dev/null +++ b/crypto/xsalsa20/stream.s @@ -0,0 +1,4823 @@ + +# qhasm: int64 r11_caller + +# qhasm: int64 r12_caller + +# qhasm: int64 r13_caller + +# qhasm: int64 r14_caller + +# qhasm: int64 r15_caller + +# qhasm: int64 rbx_caller + +# qhasm: int64 rbp_caller + +# qhasm: caller r11_caller + +# qhasm: caller r12_caller + +# qhasm: caller r13_caller + +# qhasm: caller r14_caller + +# qhasm: caller r15_caller + +# qhasm: caller rbx_caller + +# qhasm: caller rbp_caller + +# qhasm: stack64 r11_stack + +# qhasm: stack64 r12_stack + +# qhasm: stack64 r13_stack + +# qhasm: stack64 r14_stack + +# qhasm: stack64 r15_stack + +# qhasm: stack64 rbx_stack + +# qhasm: stack64 rbp_stack + +# qhasm: int64 a + +# qhasm: int64 arg1 + +# qhasm: int64 arg2 + +# qhasm: int64 arg3 + +# qhasm: int64 arg4 + +# qhasm: int64 arg5 + +# qhasm: input arg1 + +# qhasm: input arg2 + +# qhasm: input arg3 + +# qhasm: input arg4 + +# qhasm: input arg5 + +# qhasm: int64 k + +# qhasm: int64 kbits + +# qhasm: int64 iv + +# qhasm: int64 i + +# qhasm: stack128 x0 + +# qhasm: stack128 x1 + +# qhasm: stack128 x2 + +# qhasm: stack128 x3 + +# qhasm: int64 m + +# qhasm: int64 out + +# qhasm: int64 bytes + +# qhasm: stack32 eax_stack + +# qhasm: stack32 ebx_stack + +# qhasm: stack32 esi_stack + +# qhasm: stack32 edi_stack + +# qhasm: stack32 ebp_stack + +# qhasm: int6464 diag0 + +# qhasm: int6464 diag1 + +# qhasm: int6464 diag2 + +# qhasm: int6464 diag3 + +# qhasm: int6464 a0 + +# qhasm: int6464 a1 + +# qhasm: int6464 a2 + +# qhasm: int6464 a3 + +# qhasm: int6464 a4 + +# qhasm: int6464 a5 + +# qhasm: int6464 a6 + +# qhasm: int6464 a7 + +# qhasm: int6464 b0 + +# qhasm: int6464 b1 + +# qhasm: int6464 b2 + +# qhasm: int6464 b3 + +# qhasm: int6464 b4 + +# qhasm: int6464 b5 + +# qhasm: int6464 b6 + +# qhasm: int6464 b7 + +# qhasm: int6464 z0 + +# qhasm: int6464 z1 + +# qhasm: int6464 z2 + +# qhasm: int6464 z3 + +# qhasm: int6464 z4 + +# qhasm: int6464 z5 + +# qhasm: int6464 z6 + +# qhasm: int6464 z7 + +# qhasm: int6464 z8 + +# qhasm: int6464 z9 + +# qhasm: int6464 z10 + +# qhasm: int6464 z11 + +# qhasm: int6464 z12 + +# qhasm: int6464 z13 + +# qhasm: int6464 z14 + +# qhasm: int6464 z15 + +# qhasm: stack128 z0_stack + +# qhasm: stack128 z1_stack + +# qhasm: stack128 z2_stack + +# qhasm: stack128 z3_stack + +# qhasm: stack128 z4_stack + +# qhasm: stack128 z5_stack + +# qhasm: stack128 z6_stack + +# qhasm: stack128 z7_stack + +# qhasm: stack128 z8_stack + +# qhasm: stack128 z9_stack + +# qhasm: stack128 z10_stack + +# qhasm: stack128 z11_stack + +# qhasm: stack128 z12_stack + +# qhasm: stack128 z13_stack + +# qhasm: stack128 z14_stack + +# qhasm: stack128 z15_stack + +# qhasm: int6464 y0 + +# qhasm: int6464 y1 + +# qhasm: int6464 y2 + +# qhasm: int6464 y3 + +# qhasm: int6464 y4 + +# qhasm: int6464 y5 + +# qhasm: int6464 y6 + +# qhasm: int6464 y7 + +# qhasm: int6464 y8 + +# qhasm: int6464 y9 + +# qhasm: int6464 y10 + +# qhasm: int6464 y11 + +# qhasm: int6464 y12 + +# qhasm: int6464 y13 + +# qhasm: int6464 y14 + +# qhasm: int6464 y15 + +# qhasm: int6464 r0 + +# qhasm: int6464 r1 + +# qhasm: int6464 r2 + +# qhasm: int6464 r3 + +# qhasm: int6464 r4 + +# qhasm: int6464 r5 + +# qhasm: int6464 r6 + +# qhasm: int6464 r7 + +# qhasm: int6464 r8 + +# qhasm: int6464 r9 + +# qhasm: int6464 r10 + +# qhasm: int6464 r11 + +# qhasm: int6464 r12 + +# qhasm: int6464 r13 + +# qhasm: int6464 r14 + +# qhasm: int6464 r15 + +# qhasm: stack128 orig0 + +# qhasm: stack128 orig1 + +# qhasm: stack128 orig2 + +# qhasm: stack128 orig3 + +# qhasm: stack128 orig4 + +# qhasm: stack128 orig5 + +# qhasm: stack128 orig6 + +# qhasm: stack128 orig7 + +# qhasm: stack128 orig8 + +# qhasm: stack128 orig9 + +# qhasm: stack128 orig10 + +# qhasm: stack128 orig11 + +# qhasm: stack128 orig12 + +# qhasm: stack128 orig13 + +# qhasm: stack128 orig14 + +# qhasm: stack128 orig15 + +# qhasm: int64 in0 + +# qhasm: int64 in1 + +# qhasm: int64 in2 + +# qhasm: int64 in3 + +# qhasm: int64 in4 + +# qhasm: int64 in5 + +# qhasm: int64 in6 + +# qhasm: int64 in7 + +# qhasm: int64 in8 + +# qhasm: int64 in9 + +# qhasm: int64 in10 + +# qhasm: int64 in11 + +# qhasm: int64 in12 + +# qhasm: int64 in13 + +# qhasm: int64 in14 + +# qhasm: int64 in15 + +# qhasm: stack512 tmp + +# qhasm: int64 ctarget + +# qhasm: stack64 bytes_backup + +# qhasm: enter crypto_stream_salsa20_amd64_xmm6 +.text +.p2align 5 +.globl _crypto_stream_salsa20_amd64_xmm6 +.globl crypto_stream_salsa20_amd64_xmm6 +_crypto_stream_salsa20_amd64_xmm6: +crypto_stream_salsa20_amd64_xmm6: +mov %rsp,%r11 +and $31,%r11 +add $480,%r11 +sub %r11,%rsp + +# qhasm: r11_stack = r11_caller +# asm 1: movq r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: bytes = arg2 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rsi,%r9 + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: iv = arg3 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rdx,%rdx + +# qhasm: k = arg4 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %rcx,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int64#7 +# asm 2: mov $0,>a=%rax +mov $0,%rax + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: sub r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = arg2 +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rsi,%rsi + +# qhasm: bytes = arg3 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rdx,%r9 + +# qhasm: iv = arg4 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rcx,%rdx + +# qhasm: k = arg5 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %r8,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int64#4d +# asm 2: movl 20(in12=%ecx +movl 20(%r10),%ecx + +# qhasm: in1 = *(uint32 *) (k + 0) +# asm 1: movl 0(in1=int64#5d +# asm 2: movl 0(in1=%r8d +movl 0(%r10),%r8d + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int64#7d +# asm 2: movl 0(in6=%eax +movl 0(%rdx),%eax + +# qhasm: in11 = *(uint32 *) (k + 16) +# asm 1: movl 16(in11=int64#9d +# asm 2: movl 16(in11=%r11d +movl 16(%r10),%r11d + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#1 +# asm 2: movl x1=0(%rsp) +movl %ecx,0(%rsp) + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in8=int64#4 +# asm 2: mov $0,>in8=%rcx +mov $0,%rcx + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int64#5d +# asm 2: movl 24(in13=%r8d +movl 24(%r10),%r8d + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int64#7d +# asm 2: movl 4(in2=%eax +movl 4(%r10),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int64#3d +# asm 2: movl 4(in7=%edx +movl 4(%rdx),%edx + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x2)[1] = in13 +# asm 1: movl in4=int64#3d +# asm 2: movl 12(in4=%edx +movl 12(%r10),%edx + +# qhasm: in9 = 0 +# asm 1: mov $0,>in9=int64#4 +# asm 2: mov $0,>in9=%rcx +mov $0,%rcx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int64#5d +# asm 2: movl 28(in14=%r8d +movl 28(%r10),%r8d + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int64#7d +# asm 2: movl 8(in3=%eax +movl 8(%r10),%eax + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#3 +# asm 2: movl x3=32(%rsp) +movl %edx,32(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in0=int64#3 +# asm 2: mov $1634760805,>in0=%rdx +mov $1634760805,%rdx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int64#4 +# asm 2: mov $857760878,>in5=%rcx +mov $857760878,%rcx + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int64#5 +# asm 2: mov $2036477234,>in10=%r8 +mov $2036477234,%r8 + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int64#7 +# asm 2: mov $1797285236,>in15=%rax +mov $1797285236,%rax + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#4 +# asm 2: movl x0=48(%rsp) +movl %edx,48(%rsp) + +# qhasm: ((uint32 *)&x0)[1] = in5 +# asm 1: movl z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=64(%rsp) +movdqa %xmm1,64(%rsp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=80(%rsp) +movdqa %xmm2,80(%rsp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=96(%rsp) +movdqa %xmm3,96(%rsp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=112(%rsp) +movdqa %xmm0,112(%rsp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 0(%rsp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=128(%rsp) +movdqa %xmm1,128(%rsp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=144(%rsp) +movdqa %xmm2,144(%rsp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=160(%rsp) +movdqa %xmm3,160(%rsp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=176(%rsp) +movdqa %xmm0,176(%rsp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 16(%rsp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=192(%rsp) +movdqa %xmm1,192(%rsp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=208(%rsp) +movdqa %xmm2,208(%rsp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=224(%rsp) +movdqa %xmm0,224(%rsp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%rsp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=240(%rsp) +movdqa %xmm1,240(%rsp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=256(%rsp) +movdqa %xmm2,256(%rsp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=272(%rsp) +movdqa %xmm0,272(%rsp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#3d +# asm 2: movl in8=%edx +movl 16(%rsp),%edx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#4d +# asm 2: movl 4+in9=%ecx +movl 4+32(%rsp),%ecx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=288(%rsp) +movl %edx,288(%rsp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=304(%rsp) +movl %ecx,304(%rsp) + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %edx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: i = 20 +# asm 1: mov $20,>i=int64#3 +# asm 2: mov $20,>i=%rdx +mov $20,%rdx + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 64(%rsp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 80(%rsp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 96(%rsp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 256(%rsp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 272(%rsp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 128(%rsp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 144(%rsp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 176(%rsp),%xmm7 + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#9 +# asm 2: movdqa z7=%xmm8 +movdqa 192(%rsp),%xmm8 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#10 +# asm 2: movdqa z13=%xmm9 +movdqa 208(%rsp),%xmm9 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#11 +# asm 2: movdqa z2=%xmm10 +movdqa 224(%rsp),%xmm10 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#12 +# asm 2: movdqa z9=%xmm11 +movdqa 304(%rsp),%xmm11 + +# qhasm: z0 = orig0 +# asm 1: movdqa z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 112(%rsp),%xmm12 + +# qhasm: z12 = orig12 +# asm 1: movdqa z12=int6464#14 +# asm 2: movdqa z12=%xmm13 +movdqa 160(%rsp),%xmm13 + +# qhasm: z4 = orig4 +# asm 1: movdqa z4=int6464#15 +# asm 2: movdqa z4=%xmm14 +movdqa 240(%rsp),%xmm14 + +# qhasm: z8 = orig8 +# asm 1: movdqa z8=int6464#16 +# asm 2: movdqa z8=%xmm15 +movdqa 288(%rsp),%xmm15 + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y4 = z12 +# asm 1: movdqa y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: uint32323232 y4 += z0 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y9=int6464#2 +# asm 2: movdqa y9=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y9 += z5 +# asm 1: paddd r9=int6464#3 +# asm 2: movdqa r9=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y9 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#2 +# asm 2: movdqa y8=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y8 += z4 +# asm 1: paddd r8=int6464#3 +# asm 2: movdqa r8=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#2 +# asm 2: movdqa y13=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y13 += z9 +# asm 1: paddd r13=int6464#3 +# asm 2: movdqa r13=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y12=int6464#2 +# asm 2: movdqa y12=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y12 += z8 +# asm 1: paddd r12=int6464#3 +# asm 2: movdqa r12=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y12 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm11,%xmm1 + +# qhasm: uint32323232 y1 += z13 +# asm 1: paddd r1=int6464#3 +# asm 2: movdqa r1=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y1 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm15,%xmm1 + +# qhasm: uint32323232 y0 += z12 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm12,320(%rsp) + +# qhasm: y5 = z13 +# asm 1: movdqa y5=int6464#3 +# asm 2: movdqa y5=%xmm2 +movdqa %xmm9,%xmm2 + +# qhasm: uint32323232 y5 += z1 +# asm 1: paddd r5=int6464#13 +# asm 2: movdqa r5=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y14=int6464#3 +# asm 2: movdqa y14=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: uint32323232 y14 += z10 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y14 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm0,336(%rsp) + +# qhasm: y3 = z11 +# asm 1: movdqa y3=int6464#1 +# asm 2: movdqa y3=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y3 += z15 +# asm 1: paddd r3=int6464#13 +# asm 2: movdqa r3=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y3 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#1 +# asm 2: movdqa y2=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y2 += z14 +# asm 1: paddd r2=int6464#13 +# asm 2: movdqa r2=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#1 +# asm 2: movdqa y7=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y7 += z3 +# asm 1: paddd r7=int6464#13 +# asm 2: movdqa r7=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y6=int6464#1 +# asm 2: movdqa y6=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y6 += z2 +# asm 1: paddd r6=int6464#13 +# asm 2: movdqa r6=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 y11 += z7 +# asm 1: paddd r11=int6464#13 +# asm 2: movdqa r11=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y11 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm10,%xmm0 + +# qhasm: uint32323232 y10 += z6 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 320(%rsp),%xmm0 + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: y1 = z3 +# asm 1: movdqa y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm4,%xmm1 + +# qhasm: uint32323232 y1 += z0 +# asm 1: paddd r1=int6464#13 +# asm 2: movdqa r1=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y1 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y15=int6464#2 +# asm 2: movdqa y15=%xmm1 +movdqa %xmm8,%xmm1 + +# qhasm: uint32323232 y15 += z11 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z5=int6464#13 +# asm 2: movdqa z5=%xmm12 +movdqa 336(%rsp),%xmm12 + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y6 = z4 +# asm 1: movdqa y6=int6464#2 +# asm 2: movdqa y6=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y6 += z5 +# asm 1: paddd r6=int6464#3 +# asm 2: movdqa r6=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y6 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#2 +# asm 2: movdqa y2=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y2 += z1 +# asm 1: paddd r2=int6464#3 +# asm 2: movdqa r2=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#2 +# asm 2: movdqa y7=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y7 += z6 +# asm 1: paddd r7=int6464#3 +# asm 2: movdqa r7=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y3=int6464#2 +# asm 2: movdqa y3=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y3 += z2 +# asm 1: paddd r3=int6464#3 +# asm 2: movdqa r3=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y3 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm5,%xmm1 + +# qhasm: uint32323232 y4 += z7 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm10,%xmm1 + +# qhasm: uint32323232 y0 += z3 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm0,320(%rsp) + +# qhasm: y5 = z7 +# asm 1: movdqa y5=int6464#1 +# asm 2: movdqa y5=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint32323232 y5 += z4 +# asm 1: paddd r5=int6464#3 +# asm 2: movdqa r5=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint32323232 y11 += z10 +# asm 1: paddd r11=int6464#3 +# asm 2: movdqa r11=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y11 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm12,336(%rsp) + +# qhasm: y12 = z14 +# asm 1: movdqa y12=int6464#1 +# asm 2: movdqa y12=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y12 += z15 +# asm 1: paddd r12=int6464#13 +# asm 2: movdqa r12=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y12 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#1 +# asm 2: movdqa y8=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y8 += z11 +# asm 1: paddd r8=int6464#13 +# asm 2: movdqa r8=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#1 +# asm 2: movdqa y13=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y13 += z12 +# asm 1: paddd r13=int6464#13 +# asm 2: movdqa r13=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y9=int6464#1 +# asm 2: movdqa y9=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y9 += z8 +# asm 1: paddd r9=int6464#13 +# asm 2: movdqa r9=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y9 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y14=int6464#1 +# asm 2: movdqa y14=%xmm0 +movdqa %xmm13,%xmm0 + +# qhasm: uint32323232 y14 += z13 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y14 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: uint32323232 y10 += z9 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y15=int6464#1 +# asm 2: movdqa y15=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint32323232 y15 += z14 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 320(%rsp),%xmm12 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 336(%rsp),%xmm0 + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: bytes -= 256 +# asm 1: sub $256,? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=int64#3 +# asm 2: mov ctarget=%rdx +mov %rdi,%rdx + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: m = &tmp +# asm 1: leaq m=int64#2 +# asm 2: leaq m=%rsi +leaq 416(%rsp),%rsi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_backup = bytes +# asm 1: movq bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 0(%rsp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 16(%rsp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%rsp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 20 +# asm 1: mov $20,>i=int64#4 +# asm 2: mov $20,>i=%rcx +mov $20,%rcx + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int64#4 +# asm 2: movd in0=%rcx +movd %xmm0,%rcx + +# qhasm: in12 = diag1 +# asm 1: movd in12=int64#5 +# asm 2: movd in12=%r8 +movd %xmm1,%r8 + +# qhasm: in8 = diag2 +# asm 1: movd in8=int64#6 +# asm 2: movd in8=%r9 +movd %xmm2,%r9 + +# qhasm: in4 = diag3 +# asm 1: movd in4=int64#7 +# asm 2: movd in4=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in1 = diag1 +# asm 1: movd in1=int64#5 +# asm 2: movd in1=%r8 +movd %xmm1,%r8 + +# qhasm: in13 = diag2 +# asm 1: movd in13=int64#6 +# asm 2: movd in13=%r9 +movd %xmm2,%r9 + +# qhasm: in9 = diag3 +# asm 1: movd in9=int64#7 +# asm 2: movd in9=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int64#4 +# asm 2: movd in10=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = diag1 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm1,%r8 + +# qhasm: in2 = diag2 +# asm 1: movd in2=int64#6 +# asm 2: movd in2=%r9 +movd %xmm2,%r9 + +# qhasm: in14 = diag3 +# asm 1: movd in14=int64#7 +# asm 2: movd in14=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int64#4 +# asm 2: movd in15=%rcx +movd %xmm0,%rcx + +# qhasm: in11 = diag1 +# asm 1: movd in11=int64#5 +# asm 2: movd in11=%r8 +movd %xmm1,%r8 + +# qhasm: in7 = diag2 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm2,%r9 + +# qhasm: in3 = diag3 +# asm 1: movd in3=int64#7 +# asm 2: movd in3=%rax +movd %xmm3,%rax + +# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#4d +# asm 2: movl in8=%ecx +movl 16(%rsp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#5d +# asm 2: movl 4+in9=%r8d +movl 4+32(%rsp),%r8d + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#5 +# asm 2: mov in9=%r8 +mov %rcx,%r8 + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: out = ctarget +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdx,%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: r11_caller = r11_stack +# asm 1: movq r11_caller=int64#9 +# asm 2: movq r11_caller=%r11 +movq 352(%rsp),%r11 + +# qhasm: r12_caller = r12_stack +# asm 1: movq r12_caller=int64#10 +# asm 2: movq r12_caller=%r12 +movq 360(%rsp),%r12 + +# qhasm: r13_caller = r13_stack +# asm 1: movq r13_caller=int64#11 +# asm 2: movq r13_caller=%r13 +movq 368(%rsp),%r13 + +# qhasm: r14_caller = r14_stack +# asm 1: movq r14_caller=int64#12 +# asm 2: movq r14_caller=%r14 +movq 376(%rsp),%r14 + +# qhasm: r15_caller = r15_stack +# asm 1: movq r15_caller=int64#13 +# asm 2: movq r15_caller=%r15 +movq 384(%rsp),%r15 + +# qhasm: rbx_caller = rbx_stack +# asm 1: movq rbx_caller=int64#14 +# asm 2: movq rbx_caller=%rbx +movq 392(%rsp),%rbx + +# qhasm: rbp_caller = rbp_stack +# asm 1: movq rbp_caller=int64#15 +# asm 2: movq rbp_caller=%rbp +movq 400(%rsp),%rbp + +# qhasm: leave +add %r11,%rsp +xor %rax,%rax +xor %rdx,%rdx +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64, +#include +#include +#include +#include +#include +#include +#include "crypto_core_hsalsa20.h" +#include "crypto_stream_salsa20.h" +#include "crypto_xsalsa20.h" + +extern int geturandom_bytes(uchar_t *rbytes, int nbytes); + +static const unsigned char sigma[16] = "expand 32-byte k"; +static const unsigned char tau[16] = "expand 16-byte k"; + +static int +crypto_salsa20(unsigned char *c, const unsigned char *m, unsigned long long mlen, + const unsigned char *n, const unsigned char *k, int klen) +{ + unsigned char subkey[32]; + if (klen < XSALSA20_CRYPTO_KEYBYTES) + crypto_core_hsalsa20(subkey,n,k,tau); + else + crypto_core_hsalsa20(subkey,n,k,sigma); + return crypto_stream_salsa20_xor(c,m,mlen,n + 16,subkey); +} + +int +salsa20_init(salsa20_ctx_t *ctx, uchar_t *salt, int saltlen, uchar_t *pwd, int pwd_len, + uchar_t *nonce, int enc) +{ + struct timespec tp; + uint64_t tv; + uchar_t num[25]; + uchar_t IV[32]; + uchar_t *key = ctx->pkey; + +#ifndef _USE_PBK + int logN; + uint32_t r, p; + uint64_t N; + + pickparams(&logN, &r, &p); + N = (uint64_t)(1) << logN; + if (crypto_scrypt(pwd, pwd_len, salt, saltlen, N, r, p, key, ctx->keylen)) { + fprintf(stderr, "Scrypt failed\n"); + return (-1); + } +#else + rv = PKCS5_PBKDF2_HMAC(pwd, pwd_len, salt, saltlen, PBE_ROUNDS, EVP_sha256(), + ctx->keylen, key); + if (rv != ctx->keylen) { + fprintf(stderr, "Key size is %d bytes - should be %d bits\n", i, ctx->keylen); + return (-1); + } +#endif + + /* + * Copy the key. XSalsa20 core cipher always uses a 256-bit key. If we are using a + * 128-bit key then the key value is repeated twice to form a 256-bit value. + * This approach is based on the Salsa20 code submitted to eSTREAM. See the function + * ECRYPT_keysetup() in the Salsa20 submission: + * http://www.ecrypt.eu.org/stream/svn/viewcvs.cgi/ecrypt/trunk/submissions/salsa20/full/ref/salsa20.c?rev=161&view=auto + * + * The input values corresponding to a 256-bit key contain repeated values if key + * length is 128-bit. + */ + memcpy(ctx->key, key, ctx->keylen); + if (ctx->keylen < XSALSA20_CRYPTO_KEYBYTES) { + uchar_t *k; + k = ctx->key + ctx->keylen; + memcpy(k, key, XSALSA20_CRYPTO_KEYBYTES - ctx->keylen); + } + if (enc) { + + // Derive 192-bit nonce + if (RAND_status() != 1 || RAND_bytes(IV, XSALSA20_CRYPTO_NONCEBYTES) != 1) { + if (geturandom_bytes(IV, XSALSA20_CRYPTO_NONCEBYTES) != 0) { + if (clock_gettime(CLOCK_MONOTONIC, &tp) == -1) { + time((time_t *)&tv); + } else { + tv = tp.tv_sec * 1000UL + tp.tv_nsec; + } + sprintf((char *)num, "%" PRIu64, tv); + PKCS5_PBKDF2_HMAC((const char *)num, strlen((char *)num), salt, + saltlen, PBE_ROUNDS, EVP_sha256(), 32, IV); + } + } + memcpy(ctx->nonce, IV, XSALSA20_CRYPTO_NONCEBYTES); + + // Nullify stack components + memset(num, 0, 25); + memset(IV, 0, 32); + memset(&tp, 0, sizeof (tp)); + tv = 0; + } else { + memcpy(ctx->nonce, nonce, XSALSA20_CRYPTO_NONCEBYTES); + memset(nonce, 0, XSALSA20_CRYPTO_NONCEBYTES); + } + return (0); +} + +int +salsa20_encrypt(salsa20_ctx_t *ctx, uchar_t *plaintext, uchar_t *ciphertext, uint64_t len, uint64_t id) +{ + return (crypto_salsa20(ciphertext, plaintext, len, ctx->nonce + id, ctx->key, ctx->keylen)); +} + +int +salsa20_decrypt(salsa20_ctx_t *ctx, uchar_t *ciphertext, uchar_t *plaintext, uint64_t len, uint64_t id) +{ + return (crypto_salsa20(plaintext, ciphertext, len, ctx->nonce + id, ctx->key, ctx->keylen)); +} + +uchar_t * +salsa20_nonce(salsa20_ctx_t *ctx) +{ + return (ctx->nonce); +} + +void +salsa20_clean_pkey(salsa20_ctx_t *ctx) +{ + memset(ctx->pkey, 0, ctx->keylen); +} + +void +salsa20_cleanup(salsa20_ctx_t *ctx) +{ + memset((void *)(&ctx->key), 0, sizeof (ctx->key)); + memset(ctx->nonce, 0, XSALSA20_CRYPTO_NONCEBYTES); + free(ctx); +} diff --git a/main.c b/main.c index 701dcb4..1e597f8 100644 --- a/main.c +++ b/main.c @@ -50,6 +50,7 @@ #include #include #include +#include #include /* @@ -797,9 +798,9 @@ start_decompress(const char *filename, const char *to_filename) * If encryption is enabled initialize crypto. */ if (flags & MASK_CRYPTO_ALG) { - int saltlen; + int saltlen, noncelen; uchar_t *salt1, *salt2; - uint64_t nonce, d3; + uchar_t nonce[MAX_NONCE], n1[MAX_NONCE]; uchar_t pw[MAX_PW_LEN]; int pw_len; mac_ctx_t hdr_mac; @@ -807,6 +808,7 @@ start_decompress(const char *filename, const char *to_filename) unsigned int hlen; unsigned short d1; unsigned int d2; + uint64_t d3; /* * In encrypted files we do not have a normal digest. The HMAC @@ -816,6 +818,15 @@ start_decompress(const char *filename, const char *to_filename) pw_len = -1; compressed_chunksize += mac_bytes; encrypt_type = flags & MASK_CRYPTO_ALG; + + if (encrypt_type == CRYPTO_ALG_AES) { + noncelen = 8; + } else if (encrypt_type == CRYPTO_ALG_SALSA20) { + noncelen = XSALSA20_CRYPTO_NONCEBYTES; + } else { + fprintf(stderr, "Invalid Encryption algorithm code: %d. File corrupt ?\n", encrypt_type); + UNCOMP_BAIL; + } if (Read(compfd, &saltlen, sizeof (saltlen)) < sizeof (saltlen)) { perror("Read: "); UNCOMP_BAIL; @@ -830,7 +841,7 @@ start_decompress(const char *filename, const char *to_filename) } deserialize_checksum(salt2, salt1, saltlen); - if (Read(compfd, &nonce, sizeof (nonce)) < sizeof (nonce)) { + if (Read(compfd, n1, noncelen) < noncelen) { memset(salt2, 0, saltlen); free(salt2); memset(salt1, 0, saltlen); @@ -838,7 +849,13 @@ start_decompress(const char *filename, const char *to_filename) perror("Read: "); UNCOMP_BAIL; } - nonce = ntohll(nonce); + + if (encrypt_type == CRYPTO_ALG_AES) { + *((uint64_t *)nonce) = ntohll(*((uint64_t *)n1)); + + } else if (encrypt_type == CRYPTO_ALG_SALSA20) { + deserialize_checksum(nonce, n1, noncelen); + } if (version > 6) { if (Read(compfd, &keylen, sizeof (keylen)) < sizeof (keylen)) { @@ -923,6 +940,7 @@ start_decompress(const char *filename, const char *to_filename) memset(salt2, 0, saltlen); free(salt2); memset(pw, 0, MAX_PW_LEN); + memset(nonce, 0, noncelen); /* * Verify file header HMAC. @@ -936,16 +954,15 @@ start_decompress(const char *filename, const char *to_filename) hmac_update(&hdr_mac, (uchar_t *)&d1, sizeof (version)); d1 = htons(flags); hmac_update(&hdr_mac, (uchar_t *)&d1, sizeof (flags)); - d3 = htonll(chunksize); - hmac_update(&hdr_mac, (uchar_t *)&d3, sizeof (nonce)); + d3 = htonll(chunksize); + hmac_update(&hdr_mac, (uchar_t *)&d3, sizeof (chunksize)); d2 = htonl(level); hmac_update(&hdr_mac, (uchar_t *)&d2, sizeof (level)); if (version > 6) { d2 = htonl(saltlen); hmac_update(&hdr_mac, (uchar_t *)&d2, sizeof (saltlen)); hmac_update(&hdr_mac, salt1, saltlen); - nonce = htonll(nonce); - hmac_update(&hdr_mac, (uchar_t *)&nonce, sizeof (nonce)); + hmac_update(&hdr_mac, n1, noncelen); d2 = htonl(keylen); hmac_update(&hdr_mac, (uchar_t *)&d2, sizeof (keylen)); } @@ -953,7 +970,7 @@ start_decompress(const char *filename, const char *to_filename) hmac_cleanup(&hdr_mac); memset(salt1, 0, saltlen); free(salt1); - nonce = 0; + memset(n1, 0, noncelen); if (memcmp(hdr_hash2, hdr_hash1, mac_bytes) != 0) { close(uncompfd); unlink(to_filename); err_exit(0, "Header verification failed! File tampered or wrong password.\n"); @@ -1844,8 +1861,14 @@ start_compress(const char *filename, uint64_t chunksize, int level) pos += sizeof (int); serialize_checksum(crypto_ctx.salt, pos, crypto_ctx.saltlen); pos += crypto_ctx.saltlen; - *((uint64_t *)pos) = htonll(crypto_nonce(&crypto_ctx)); - pos += 8; + if (encrypt_type == CRYPTO_ALG_AES) { + *((uint64_t *)pos) = htonll(*((uint64_t *)crypto_nonce(&crypto_ctx))); + pos += 8; + + } else if (encrypt_type == CRYPTO_ALG_SALSA20) { + serialize_checksum(crypto_nonce(&crypto_ctx), pos, XSALSA20_CRYPTO_NONCEBYTES); + pos += XSALSA20_CRYPTO_NONCEBYTES; + } *((int *)pos) = htonl(keylen); pos += sizeof (int); } @@ -2251,7 +2274,7 @@ main(int argc, char *argv[]) slab_init(); init_pcompress(); - while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDEew:rLPS:B:Fk:")) != -1) { + while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDEe:w:rLPS:B:Fk:")) != -1) { int ovr; switch (opt) { @@ -2325,7 +2348,10 @@ main(int argc, char *argv[]) break; case 'e': - encrypt_type = CRYPTO_ALG_AES; + encrypt_type = get_crypto_alg(optarg); + if (encrypt_type == 0) { + err_exit(0, "Invalid encryption algorithm. Should be AES or SALSA20.\n", optarg); + } break; case 'w':