Add SSE2 improvements to CTR mode AES.

Add debug print of encryption and HMAC throughput.
Fix error message for invalid option.
This commit is contained in:
Moinak Ghosh 2013-01-16 19:52:46 +05:30
parent 39dbc4be43
commit 49ec3a054d
2 changed files with 56 additions and 10 deletions

View file

@ -29,10 +29,11 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <openssl/aes.h> #include <openssl/aes.h>
#ifdef __USE_SSE_INTRIN__
#include "sysendian.h" #include <emmintrin.h>
#endif
#include <utils.h>
#include "crypto_aesctr.h" #include "crypto_aesctr.h"
@ -40,7 +41,7 @@ struct crypto_aesctr {
AES_KEY * key; AES_KEY * key;
uint64_t nonce; uint64_t nonce;
uint64_t bytectr; uint64_t bytectr;
uint8_t buf[16]; uint8_t buf[16] __attribute__((aligned(16)));
}; };
/** /**
@ -83,17 +84,25 @@ crypto_aesctr_stream(struct crypto_aesctr * stream, const uint8_t * inbuf,
{ {
uint8_t pblk[16]; uint8_t pblk[16];
size_t pos; size_t pos;
int bytemod; int bytemod, last;
for (pos = 0; pos < buflen; pos++) { last = 0;
pos = 0;
*((uint64_t *)pblk) = htonll(stream->nonce);
do_last:
for (; pos < buflen; pos++) {
/* How far through the buffer are we? */ /* How far through the buffer are we? */
bytemod = stream->bytectr % 16; bytemod = stream->bytectr & (16 - 1);
/* Generate a block of cipherstream if needed. */ /* Generate a block of cipherstream if needed. */
if (bytemod == 0) { if (bytemod == 0) {
be64enc(pblk, stream->nonce); *((uint64_t *)(pblk + 8)) = htonll(stream->bytectr / 16);
be64enc(pblk + 8, stream->bytectr / 16);
AES_encrypt(pblk, stream->buf, stream->key); AES_encrypt(pblk, stream->buf, stream->key);
#ifdef __USE_SSE_INTRIN__
if (!last)
break;
#endif
} }
/* Encrypt a byte. */ /* Encrypt a byte. */
@ -102,6 +111,24 @@ crypto_aesctr_stream(struct crypto_aesctr * stream, const uint8_t * inbuf,
/* Move to the next byte of cipherstream. */ /* Move to the next byte of cipherstream. */
stream->bytectr += 1; stream->bytectr += 1;
} }
#ifdef __USE_SSE_INTRIN__
if (last) return;
for (; pos < buflen-15; pos += 16) {
__m128i cblk, dat, odat;
__builtin_prefetch(outbuf+pos, 1, 0);
__builtin_prefetch(inbuf+pos, 0, 0);
cblk = _mm_load_si128((__m128i *)(stream->buf));
dat = _mm_loadu_si128((__m128i *)(inbuf+pos));
odat = _mm_xor_si128(cblk, dat);
_mm_storeu_si128((__m128i *)(outbuf+pos), odat);
stream->bytectr += 16;
*((uint64_t *)(pblk + 8)) = htonll(stream->bytectr / 16);
AES_encrypt(pblk, stream->buf, stream->key);
}
last = 1;
goto do_last;
#endif
} }
/** /**

21
main.c
View file

@ -378,7 +378,9 @@ redo:
*/ */
if (encrypt_type) { if (encrypt_type) {
unsigned int len; unsigned int len;
DEBUG_STAT_EN(double strt, en);
DEBUG_STAT_EN(strt = get_wtime_millis());
len = mac_bytes; len = mac_bytes;
deserialize_checksum(checksum, tdat->compressed_chunk + cksum_bytes, mac_bytes); deserialize_checksum(checksum, tdat->compressed_chunk + cksum_bytes, mac_bytes);
memset(tdat->compressed_chunk + cksum_bytes, 0, mac_bytes); memset(tdat->compressed_chunk + cksum_bytes, 0, mac_bytes);
@ -402,11 +404,15 @@ redo:
sem_post(&tdat->cmp_done_sem); sem_post(&tdat->cmp_done_sem);
return (NULL); return (NULL);
} }
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "HMAC Verification speed %.3f MB/s\n",
get_mb_s(tdat->rbytes + sizeof (tdat->len_cmp_be), strt, en)));
/* /*
* Encryption algorithm should not change the size and * Encryption algorithm should not change the size and
* encryption is in-place. * encryption is in-place.
*/ */
DEBUG_STAT_EN(strt = get_wtime_millis());
rv = crypto_buf(&crypto_ctx, cseg, cseg, tdat->len_cmp, tdat->id); rv = crypto_buf(&crypto_ctx, cseg, cseg, tdat->len_cmp, tdat->id);
if (rv == -1) { if (rv == -1) {
/* /*
@ -417,6 +423,9 @@ redo:
sem_post(&tdat->cmp_done_sem); sem_post(&tdat->cmp_done_sem);
return (NULL); return (NULL);
} }
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Decryption speed %.3f MB/s\n",
get_mb_s(tdat->len_cmp, strt, en)));
} else if (mac_bytes > 0) { } else if (mac_bytes > 0) {
/* /*
* Verify header CRC32 in non-crypto mode. * Verify header CRC32 in non-crypto mode.
@ -1301,11 +1310,13 @@ plain_index:
*/ */
if (encrypt_type) { if (encrypt_type) {
int ret; int ret;
DEBUG_STAT_EN(double strt, en);
/* /*
* Encryption algorithm must not change the size and * Encryption algorithm must not change the size and
* encryption is in-place. * encryption is in-place.
*/ */
DEBUG_STAT_EN(strt = get_wtime_millis());
ret = crypto_buf(&crypto_ctx, compressed_chunk, compressed_chunk, ret = crypto_buf(&crypto_ctx, compressed_chunk, compressed_chunk,
tdat->len_cmp, tdat->id); tdat->len_cmp, tdat->id);
if (ret == -1) { if (ret == -1) {
@ -1318,6 +1329,9 @@ plain_index:
sem_post(&tdat->cmp_done_sem); sem_post(&tdat->cmp_done_sem);
return (0); return (0);
} }
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Encryption speed %.3f MB/s\n",
get_mb_s(tdat->len_cmp, strt, en)));
} }
if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) { if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) {
@ -1364,14 +1378,19 @@ plain_index:
uchar_t *mac_ptr; uchar_t *mac_ptr;
unsigned int hlen; unsigned int hlen;
uchar_t chash[mac_bytes]; uchar_t chash[mac_bytes];
DEBUG_STAT_EN(double strt, en);
/* Clean out mac_bytes to 0 for stable HMAC. */ /* Clean out mac_bytes to 0 for stable HMAC. */
DEBUG_STAT_EN(strt = get_wtime_millis());
mac_ptr = tdat->cmp_seg + sizeof (tdat->len_cmp) + cksum_bytes; mac_ptr = tdat->cmp_seg + sizeof (tdat->len_cmp) + cksum_bytes;
memset(mac_ptr, 0, mac_bytes); memset(mac_ptr, 0, mac_bytes);
hmac_reinit(&tdat->chunk_hmac); hmac_reinit(&tdat->chunk_hmac);
hmac_update(&tdat->chunk_hmac, tdat->cmp_seg, tdat->len_cmp); hmac_update(&tdat->chunk_hmac, tdat->cmp_seg, tdat->len_cmp);
hmac_final(&tdat->chunk_hmac, chash, &hlen); hmac_final(&tdat->chunk_hmac, chash, &hlen);
serialize_checksum(chash, mac_ptr, hlen); serialize_checksum(chash, mac_ptr, hlen);
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "HMAC Computation speed %.3f MB/s\n",
get_mb_s(tdat->len_cmp, strt, en)));
} else { } else {
/* /*
* Compute header CRC32 in non-crypto mode. * Compute header CRC32 in non-crypto mode.
@ -2142,7 +2161,7 @@ main(int argc, char *argv[])
do_compress = 1; do_compress = 1;
algo = optarg; algo = optarg;
if (init_algo(algo, 1) != 0) { if (init_algo(algo, 1) != 0) {
err_exit(1, "Invalid algorithm %s\n", optarg); err_exit(0, "Invalid algorithm %s\n", optarg);
} }
break; break;