diff --git a/crypto/aes/crypto_aes.c b/crypto/aes/crypto_aes.c
index 5357ec1..937cf02 100644
--- a/crypto/aes/crypto_aes.c
+++ b/crypto/aes/crypto_aes.c
@@ -180,6 +180,9 @@ aes_encrypt(aes_ctx_t *ctx, uchar_t *plaintext, uchar_t *ciphertext, uint64_t le
 	}
 	crypto_aesctr_stream(strm, plaintext, ciphertext, len);
 	crypto_aesctr_free(strm);
+	strm = NULL;
+	k1 = NULL;
+	k2 = NULL;
 	memset(&key, 0, sizeof (key));
 	return (0);
 }
@@ -204,6 +207,9 @@ aes_decrypt(aes_ctx_t *ctx, uchar_t *ciphertext, uchar_t *plaintext, uint64_t le
 	}
 	crypto_aesctr_stream(strm, ciphertext, plaintext, len);
 	crypto_aesctr_free(strm);
+	strm = NULL;
+	k1 = NULL;
+	k2 = NULL;
 	memset(&key, 0, sizeof (key));
 	return (0);
 }
diff --git a/crypto/crypto_utils.c b/crypto/crypto_utils.c
index 40a750d..b409555 100644
--- a/crypto/crypto_utils.c
+++ b/crypto/crypto_utils.c
@@ -842,6 +842,8 @@ init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg,
 		}
 		cctx->crypto_alg = crypto_alg;
 		cctx->enc_dec = enc_dec;
+		actx = NULL;
+		sctx = NULL;
 	} else {
 		fprintf(stderr, "Unrecognized algorithm code: %d\n", crypto_alg);
 		return (-1);
diff --git a/crypto/scrypt/crypto_aesctr.c b/crypto/scrypt/crypto_aesctr.c
index fa29576..4776282 100644
--- a/crypto/scrypt/crypto_aesctr.c
+++ b/crypto/scrypt/crypto_aesctr.c
@@ -49,6 +49,7 @@
 
 #include <stdint.h>
 #include <stdlib.h>
+#include <string.h>
 #include <openssl/aes.h>
 #ifdef __USE_SSE_INTRIN__
 #include <emmintrin.h>
@@ -135,7 +136,10 @@ do_last:
 		stream->bytectr += 1;
 	}
 #ifdef __USE_SSE_INTRIN__
-	if (last) return;
+	if (last) {
+		memset(pblk, 0, 16);
+		return;
+	}
 	for (; pos < buflen-15; pos += 16) {
 		__m128i cblk, dat, odat;
 
diff --git a/crypto/xsalsa20/stream.c b/crypto/xsalsa20/stream.c
index 4a91047..e05880c 100644
--- a/crypto/xsalsa20/stream.c
+++ b/crypto/xsalsa20/stream.c
@@ -15,6 +15,10 @@ rotate(uint32_t u,int c)
   return (u << c) | (u >> (32 - c));
 }
 
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define	load_littleendian(x) *((uint32_t *)(x))
+#define	store_littleendian(x, u) *((uint32_t *)(x)) = (u);
+#else
 static uint32_t
 load_littleendian(const unsigned char *x)
 {
@@ -34,6 +38,7 @@ store_littleendian(unsigned char *x, uint32_t u)
   x[2] = u; u >>= 8;
   x[3] = u;
 }
+#endif
 
 static int
 crypto_core(
diff --git a/crypto/xsalsa20/xsalsa20_xor.c b/crypto/xsalsa20/xsalsa20_xor.c
index cb7312c..c956653 100644
--- a/crypto/xsalsa20/xsalsa20_xor.c
+++ b/crypto/xsalsa20/xsalsa20_xor.c
@@ -10,6 +10,7 @@ Public domain.
 #include <time.h>
 #include <openssl/rand.h>
 #include <openssl/evp.h>
+#include <assert.h>
 #include <crypto_scrypt.h>
 #include "crypto_core_hsalsa20.h"
 #include "crypto_stream_salsa20.h"
@@ -21,10 +22,12 @@ static const unsigned char sigma[16] = "expand 32-byte k";
 static const unsigned char tau[16] = "expand 16-byte k";
 
 static int
-crypto_salsa20(unsigned char *c, const unsigned char *m, unsigned long long mlen,
+crypto_xsalsa20(unsigned char *c, const unsigned char *m, unsigned long long mlen,
   const unsigned char *n, const unsigned char *k, int klen)
 {
 	unsigned char subkey[32];
+
+	assert(klen == 32 || klen == 16);
 	if (klen < XSALSA20_CRYPTO_KEYBYTES)
 		crypto_core_hsalsa20(subkey,n,k,tau);
 	else
@@ -124,7 +127,7 @@ int
 salsa20_encrypt(salsa20_ctx_t *ctx, uchar_t *plaintext, uchar_t *ciphertext, uint64_t len, uint64_t id)
 {
 	uchar_t nonce[XSALSA20_CRYPTO_NONCEBYTES];
-	int i;
+	int i, rv;
 	uint64_t *n, carry;
 
 	for (i = 0; i < XSALSA20_CRYPTO_NONCEBYTES; i++) nonce[i] = ctx->nonce[i];
@@ -139,21 +142,28 @@ salsa20_encrypt(salsa20_ctx_t *ctx, uchar_t *plaintext, uchar_t *ciphertext, uin
 			carry = 0;
 			break;
 		}
-		n++;
+		++n;
 	}
 	if (carry) {
 		n = (uint64_t *)nonce;
 		*n += carry;
+		carry = 0;
 	}
 
-	return (crypto_salsa20(ciphertext, plaintext, len, nonce, ctx->key, ctx->keylen));
+	rv = crypto_xsalsa20(ciphertext, plaintext, len, nonce, ctx->key, ctx->keylen);
+	n = (uint64_t *)nonce;
+	for (i = 0; i < XSALSA20_CRYPTO_NONCEBYTES/8; i++) {
+		*n = 0;
+		++n;
+	}
+	return (rv);
 }
 
 int
 salsa20_decrypt(salsa20_ctx_t *ctx, uchar_t *ciphertext, uchar_t *plaintext, uint64_t len, uint64_t id)
 {
 	uchar_t nonce[XSALSA20_CRYPTO_NONCEBYTES];
-	int i;
+	int i, rv;
 	uint64_t *n, carry;
 
 	for (i = 0; i < XSALSA20_CRYPTO_NONCEBYTES; i++) nonce[i] = ctx->nonce[i];
@@ -168,14 +178,21 @@ salsa20_decrypt(salsa20_ctx_t *ctx, uchar_t *ciphertext, uchar_t *plaintext, uin
 			carry = 0;
 			break;
 		}
-		n++;
+		++n;
 	}
 	if (carry) {
 		n = (uint64_t *)nonce;
 		*n += carry;
+		carry = 0;
 	}
 
-	return (crypto_salsa20(plaintext, ciphertext, len, nonce, ctx->key, ctx->keylen));
+	rv = crypto_xsalsa20(plaintext, ciphertext, len, nonce, ctx->key, ctx->keylen);
+	n = (uint64_t *)nonce;
+	for (i = 0; i < XSALSA20_CRYPTO_NONCEBYTES/8; i++) {
+		*n = 0;
+		++n;
+	}
+	return (rv);
 }
 
 uchar_t *
diff --git a/utils/xxhash.c b/utils/xxhash.c
index 76f3576..7decdb2 100644
--- a/utils/xxhash.c
+++ b/utils/xxhash.c
@@ -175,7 +175,7 @@ unsigned int CPUCAP_NM(XXH32)(const void* input, int len, unsigned int seed)
 
 		/*
 		 * 4-way SIMD calculations with 4 ints in two blocks for 2 accumulators will
-		 * interleave to some extent on a hyperthreaded processor providing 10% - 14%
+		 * interleave to some extent on a superscalar processor providing 10% - 14%
 		 * speedup over original xxhash depending on processor. We could have used
 		 * aligned loads but we actually want the unaligned penalty. It helps to
 		 * interleave better for a slight benefit over aligned loads here!