Add SSE2 improvements to CTR mode AES.

Add debug print of encryption and HMAC throughput.
Fix error message for invalid option.
This commit is contained in:
Moinak Ghosh 2013-01-16 19:52:46 +05:30
parent 39dbc4be43
commit 49ec3a054d
2 changed files with 56 additions and 10 deletions

View file

@ -29,10 +29,11 @@
#include <stdint.h>
#include <stdlib.h>
#include <openssl/aes.h>
#include "sysendian.h"
#ifdef __USE_SSE_INTRIN__
#include <emmintrin.h>
#endif
#include <utils.h>
#include "crypto_aesctr.h"
@ -40,7 +41,7 @@ struct crypto_aesctr {
AES_KEY * key;
uint64_t nonce;
uint64_t bytectr;
uint8_t buf[16];
uint8_t buf[16] __attribute__((aligned(16)));
};
/**
@ -83,17 +84,25 @@ crypto_aesctr_stream(struct crypto_aesctr * stream, const uint8_t * inbuf,
{
uint8_t pblk[16];
size_t pos;
int bytemod;
int bytemod, last;
for (pos = 0; pos < buflen; pos++) {
last = 0;
pos = 0;
*((uint64_t *)pblk) = htonll(stream->nonce);
do_last:
for (; pos < buflen; pos++) {
/* How far through the buffer are we? */
bytemod = stream->bytectr % 16;
bytemod = stream->bytectr & (16 - 1);
/* Generate a block of cipherstream if needed. */
if (bytemod == 0) {
be64enc(pblk, stream->nonce);
be64enc(pblk + 8, stream->bytectr / 16);
*((uint64_t *)(pblk + 8)) = htonll(stream->bytectr / 16);
AES_encrypt(pblk, stream->buf, stream->key);
#ifdef __USE_SSE_INTRIN__
if (!last)
break;
#endif
}
/* Encrypt a byte. */
@ -102,6 +111,24 @@ crypto_aesctr_stream(struct crypto_aesctr * stream, const uint8_t * inbuf,
/* Move to the next byte of cipherstream. */
stream->bytectr += 1;
}
#ifdef __USE_SSE_INTRIN__
if (last) return;
for (; pos < buflen-15; pos += 16) {
__m128i cblk, dat, odat;
__builtin_prefetch(outbuf+pos, 1, 0);
__builtin_prefetch(inbuf+pos, 0, 0);
cblk = _mm_load_si128((__m128i *)(stream->buf));
dat = _mm_loadu_si128((__m128i *)(inbuf+pos));
odat = _mm_xor_si128(cblk, dat);
_mm_storeu_si128((__m128i *)(outbuf+pos), odat);
stream->bytectr += 16;
*((uint64_t *)(pblk + 8)) = htonll(stream->bytectr / 16);
AES_encrypt(pblk, stream->buf, stream->key);
}
last = 1;
goto do_last;
#endif
}
/**

21
main.c
View file

@ -378,7 +378,9 @@ redo:
*/
if (encrypt_type) {
unsigned int len;
DEBUG_STAT_EN(double strt, en);
DEBUG_STAT_EN(strt = get_wtime_millis());
len = mac_bytes;
deserialize_checksum(checksum, tdat->compressed_chunk + cksum_bytes, mac_bytes);
memset(tdat->compressed_chunk + cksum_bytes, 0, mac_bytes);
@ -402,11 +404,15 @@ redo:
sem_post(&tdat->cmp_done_sem);
return (NULL);
}
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "HMAC Verification speed %.3f MB/s\n",
get_mb_s(tdat->rbytes + sizeof (tdat->len_cmp_be), strt, en)));
/*
* Encryption algorithm should not change the size and
* encryption is in-place.
*/
DEBUG_STAT_EN(strt = get_wtime_millis());
rv = crypto_buf(&crypto_ctx, cseg, cseg, tdat->len_cmp, tdat->id);
if (rv == -1) {
/*
@ -417,6 +423,9 @@ redo:
sem_post(&tdat->cmp_done_sem);
return (NULL);
}
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Decryption speed %.3f MB/s\n",
get_mb_s(tdat->len_cmp, strt, en)));
} else if (mac_bytes > 0) {
/*
* Verify header CRC32 in non-crypto mode.
@ -1301,11 +1310,13 @@ plain_index:
*/
if (encrypt_type) {
int ret;
DEBUG_STAT_EN(double strt, en);
/*
* Encryption algorithm must not change the size and
* encryption is in-place.
*/
DEBUG_STAT_EN(strt = get_wtime_millis());
ret = crypto_buf(&crypto_ctx, compressed_chunk, compressed_chunk,
tdat->len_cmp, tdat->id);
if (ret == -1) {
@ -1318,6 +1329,9 @@ plain_index:
sem_post(&tdat->cmp_done_sem);
return (0);
}
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Encryption speed %.3f MB/s\n",
get_mb_s(tdat->len_cmp, strt, en)));
}
if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) {
@ -1364,14 +1378,19 @@ plain_index:
uchar_t *mac_ptr;
unsigned int hlen;
uchar_t chash[mac_bytes];
DEBUG_STAT_EN(double strt, en);
/* Clean out mac_bytes to 0 for stable HMAC. */
DEBUG_STAT_EN(strt = get_wtime_millis());
mac_ptr = tdat->cmp_seg + sizeof (tdat->len_cmp) + cksum_bytes;
memset(mac_ptr, 0, mac_bytes);
hmac_reinit(&tdat->chunk_hmac);
hmac_update(&tdat->chunk_hmac, tdat->cmp_seg, tdat->len_cmp);
hmac_final(&tdat->chunk_hmac, chash, &hlen);
serialize_checksum(chash, mac_ptr, hlen);
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "HMAC Computation speed %.3f MB/s\n",
get_mb_s(tdat->len_cmp, strt, en)));
} else {
/*
* Compute header CRC32 in non-crypto mode.
@ -2142,7 +2161,7 @@ main(int argc, char *argv[])
do_compress = 1;
algo = optarg;
if (init_algo(algo, 1) != 0) {
err_exit(1, "Invalid algorithm %s\n", optarg);
err_exit(0, "Invalid algorithm %s\n", optarg);
}
break;