Add SSE2 improvements to CTR mode AES.
Add debug print of encryption and HMAC throughput. Fix error message for invalid option.
This commit is contained in:
parent
39dbc4be43
commit
49ec3a054d
2 changed files with 56 additions and 10 deletions
|
@ -29,10 +29,11 @@
|
|||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <openssl/aes.h>
|
||||
|
||||
#include "sysendian.h"
|
||||
#ifdef __USE_SSE_INTRIN__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include <utils.h>
|
||||
|
||||
#include "crypto_aesctr.h"
|
||||
|
||||
|
@ -40,7 +41,7 @@ struct crypto_aesctr {
|
|||
AES_KEY * key;
|
||||
uint64_t nonce;
|
||||
uint64_t bytectr;
|
||||
uint8_t buf[16];
|
||||
uint8_t buf[16] __attribute__((aligned(16)));
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -83,17 +84,25 @@ crypto_aesctr_stream(struct crypto_aesctr * stream, const uint8_t * inbuf,
|
|||
{
|
||||
uint8_t pblk[16];
|
||||
size_t pos;
|
||||
int bytemod;
|
||||
int bytemod, last;
|
||||
|
||||
for (pos = 0; pos < buflen; pos++) {
|
||||
last = 0;
|
||||
pos = 0;
|
||||
*((uint64_t *)pblk) = htonll(stream->nonce);
|
||||
|
||||
do_last:
|
||||
for (; pos < buflen; pos++) {
|
||||
/* How far through the buffer are we? */
|
||||
bytemod = stream->bytectr % 16;
|
||||
bytemod = stream->bytectr & (16 - 1);
|
||||
|
||||
/* Generate a block of cipherstream if needed. */
|
||||
if (bytemod == 0) {
|
||||
be64enc(pblk, stream->nonce);
|
||||
be64enc(pblk + 8, stream->bytectr / 16);
|
||||
*((uint64_t *)(pblk + 8)) = htonll(stream->bytectr / 16);
|
||||
AES_encrypt(pblk, stream->buf, stream->key);
|
||||
#ifdef __USE_SSE_INTRIN__
|
||||
if (!last)
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Encrypt a byte. */
|
||||
|
@ -102,6 +111,24 @@ crypto_aesctr_stream(struct crypto_aesctr * stream, const uint8_t * inbuf,
|
|||
/* Move to the next byte of cipherstream. */
|
||||
stream->bytectr += 1;
|
||||
}
|
||||
#ifdef __USE_SSE_INTRIN__
|
||||
if (last) return;
|
||||
for (; pos < buflen-15; pos += 16) {
|
||||
__m128i cblk, dat, odat;
|
||||
|
||||
__builtin_prefetch(outbuf+pos, 1, 0);
|
||||
__builtin_prefetch(inbuf+pos, 0, 0);
|
||||
cblk = _mm_load_si128((__m128i *)(stream->buf));
|
||||
dat = _mm_loadu_si128((__m128i *)(inbuf+pos));
|
||||
odat = _mm_xor_si128(cblk, dat);
|
||||
_mm_storeu_si128((__m128i *)(outbuf+pos), odat);
|
||||
stream->bytectr += 16;
|
||||
*((uint64_t *)(pblk + 8)) = htonll(stream->bytectr / 16);
|
||||
AES_encrypt(pblk, stream->buf, stream->key);
|
||||
}
|
||||
last = 1;
|
||||
goto do_last;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
21
main.c
21
main.c
|
@ -378,7 +378,9 @@ redo:
|
|||
*/
|
||||
if (encrypt_type) {
|
||||
unsigned int len;
|
||||
DEBUG_STAT_EN(double strt, en);
|
||||
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
len = mac_bytes;
|
||||
deserialize_checksum(checksum, tdat->compressed_chunk + cksum_bytes, mac_bytes);
|
||||
memset(tdat->compressed_chunk + cksum_bytes, 0, mac_bytes);
|
||||
|
@ -402,11 +404,15 @@ redo:
|
|||
sem_post(&tdat->cmp_done_sem);
|
||||
return (NULL);
|
||||
}
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
DEBUG_STAT_EN(fprintf(stderr, "HMAC Verification speed %.3f MB/s\n",
|
||||
get_mb_s(tdat->rbytes + sizeof (tdat->len_cmp_be), strt, en)));
|
||||
|
||||
/*
|
||||
* Encryption algorithm should not change the size and
|
||||
* encryption is in-place.
|
||||
*/
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
rv = crypto_buf(&crypto_ctx, cseg, cseg, tdat->len_cmp, tdat->id);
|
||||
if (rv == -1) {
|
||||
/*
|
||||
|
@ -417,6 +423,9 @@ redo:
|
|||
sem_post(&tdat->cmp_done_sem);
|
||||
return (NULL);
|
||||
}
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Decryption speed %.3f MB/s\n",
|
||||
get_mb_s(tdat->len_cmp, strt, en)));
|
||||
} else if (mac_bytes > 0) {
|
||||
/*
|
||||
* Verify header CRC32 in non-crypto mode.
|
||||
|
@ -1301,11 +1310,13 @@ plain_index:
|
|||
*/
|
||||
if (encrypt_type) {
|
||||
int ret;
|
||||
DEBUG_STAT_EN(double strt, en);
|
||||
|
||||
/*
|
||||
* Encryption algorithm must not change the size and
|
||||
* encryption is in-place.
|
||||
*/
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
ret = crypto_buf(&crypto_ctx, compressed_chunk, compressed_chunk,
|
||||
tdat->len_cmp, tdat->id);
|
||||
if (ret == -1) {
|
||||
|
@ -1318,6 +1329,9 @@ plain_index:
|
|||
sem_post(&tdat->cmp_done_sem);
|
||||
return (0);
|
||||
}
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Encryption speed %.3f MB/s\n",
|
||||
get_mb_s(tdat->len_cmp, strt, en)));
|
||||
}
|
||||
|
||||
if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) {
|
||||
|
@ -1364,14 +1378,19 @@ plain_index:
|
|||
uchar_t *mac_ptr;
|
||||
unsigned int hlen;
|
||||
uchar_t chash[mac_bytes];
|
||||
DEBUG_STAT_EN(double strt, en);
|
||||
|
||||
/* Clean out mac_bytes to 0 for stable HMAC. */
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
mac_ptr = tdat->cmp_seg + sizeof (tdat->len_cmp) + cksum_bytes;
|
||||
memset(mac_ptr, 0, mac_bytes);
|
||||
hmac_reinit(&tdat->chunk_hmac);
|
||||
hmac_update(&tdat->chunk_hmac, tdat->cmp_seg, tdat->len_cmp);
|
||||
hmac_final(&tdat->chunk_hmac, chash, &hlen);
|
||||
serialize_checksum(chash, mac_ptr, hlen);
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
DEBUG_STAT_EN(fprintf(stderr, "HMAC Computation speed %.3f MB/s\n",
|
||||
get_mb_s(tdat->len_cmp, strt, en)));
|
||||
} else {
|
||||
/*
|
||||
* Compute header CRC32 in non-crypto mode.
|
||||
|
@ -2142,7 +2161,7 @@ main(int argc, char *argv[])
|
|||
do_compress = 1;
|
||||
algo = optarg;
|
||||
if (init_algo(algo, 1) != 0) {
|
||||
err_exit(1, "Invalid algorithm %s\n", optarg);
|
||||
err_exit(0, "Invalid algorithm %s\n", optarg);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
Loading…
Reference in a new issue