Add SSE2 improvements to CTR mode AES.
Add debug print of encryption and HMAC throughput. Fix error message for invalid option.
This commit is contained in:
parent
39dbc4be43
commit
49ec3a054d
2 changed files with 56 additions and 10 deletions
|
@ -29,10 +29,11 @@
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include <openssl/aes.h>
|
#include <openssl/aes.h>
|
||||||
|
#ifdef __USE_SSE_INTRIN__
|
||||||
#include "sysendian.h"
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
#include <utils.h>
|
||||||
|
|
||||||
#include "crypto_aesctr.h"
|
#include "crypto_aesctr.h"
|
||||||
|
|
||||||
|
@ -40,7 +41,7 @@ struct crypto_aesctr {
|
||||||
AES_KEY * key;
|
AES_KEY * key;
|
||||||
uint64_t nonce;
|
uint64_t nonce;
|
||||||
uint64_t bytectr;
|
uint64_t bytectr;
|
||||||
uint8_t buf[16];
|
uint8_t buf[16] __attribute__((aligned(16)));
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -83,17 +84,25 @@ crypto_aesctr_stream(struct crypto_aesctr * stream, const uint8_t * inbuf,
|
||||||
{
|
{
|
||||||
uint8_t pblk[16];
|
uint8_t pblk[16];
|
||||||
size_t pos;
|
size_t pos;
|
||||||
int bytemod;
|
int bytemod, last;
|
||||||
|
|
||||||
for (pos = 0; pos < buflen; pos++) {
|
last = 0;
|
||||||
|
pos = 0;
|
||||||
|
*((uint64_t *)pblk) = htonll(stream->nonce);
|
||||||
|
|
||||||
|
do_last:
|
||||||
|
for (; pos < buflen; pos++) {
|
||||||
/* How far through the buffer are we? */
|
/* How far through the buffer are we? */
|
||||||
bytemod = stream->bytectr % 16;
|
bytemod = stream->bytectr & (16 - 1);
|
||||||
|
|
||||||
/* Generate a block of cipherstream if needed. */
|
/* Generate a block of cipherstream if needed. */
|
||||||
if (bytemod == 0) {
|
if (bytemod == 0) {
|
||||||
be64enc(pblk, stream->nonce);
|
*((uint64_t *)(pblk + 8)) = htonll(stream->bytectr / 16);
|
||||||
be64enc(pblk + 8, stream->bytectr / 16);
|
|
||||||
AES_encrypt(pblk, stream->buf, stream->key);
|
AES_encrypt(pblk, stream->buf, stream->key);
|
||||||
|
#ifdef __USE_SSE_INTRIN__
|
||||||
|
if (!last)
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Encrypt a byte. */
|
/* Encrypt a byte. */
|
||||||
|
@ -102,6 +111,24 @@ crypto_aesctr_stream(struct crypto_aesctr * stream, const uint8_t * inbuf,
|
||||||
/* Move to the next byte of cipherstream. */
|
/* Move to the next byte of cipherstream. */
|
||||||
stream->bytectr += 1;
|
stream->bytectr += 1;
|
||||||
}
|
}
|
||||||
|
#ifdef __USE_SSE_INTRIN__
|
||||||
|
if (last) return;
|
||||||
|
for (; pos < buflen-15; pos += 16) {
|
||||||
|
__m128i cblk, dat, odat;
|
||||||
|
|
||||||
|
__builtin_prefetch(outbuf+pos, 1, 0);
|
||||||
|
__builtin_prefetch(inbuf+pos, 0, 0);
|
||||||
|
cblk = _mm_load_si128((__m128i *)(stream->buf));
|
||||||
|
dat = _mm_loadu_si128((__m128i *)(inbuf+pos));
|
||||||
|
odat = _mm_xor_si128(cblk, dat);
|
||||||
|
_mm_storeu_si128((__m128i *)(outbuf+pos), odat);
|
||||||
|
stream->bytectr += 16;
|
||||||
|
*((uint64_t *)(pblk + 8)) = htonll(stream->bytectr / 16);
|
||||||
|
AES_encrypt(pblk, stream->buf, stream->key);
|
||||||
|
}
|
||||||
|
last = 1;
|
||||||
|
goto do_last;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
21
main.c
21
main.c
|
@ -378,7 +378,9 @@ redo:
|
||||||
*/
|
*/
|
||||||
if (encrypt_type) {
|
if (encrypt_type) {
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
|
DEBUG_STAT_EN(double strt, en);
|
||||||
|
|
||||||
|
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||||
len = mac_bytes;
|
len = mac_bytes;
|
||||||
deserialize_checksum(checksum, tdat->compressed_chunk + cksum_bytes, mac_bytes);
|
deserialize_checksum(checksum, tdat->compressed_chunk + cksum_bytes, mac_bytes);
|
||||||
memset(tdat->compressed_chunk + cksum_bytes, 0, mac_bytes);
|
memset(tdat->compressed_chunk + cksum_bytes, 0, mac_bytes);
|
||||||
|
@ -402,11 +404,15 @@ redo:
|
||||||
sem_post(&tdat->cmp_done_sem);
|
sem_post(&tdat->cmp_done_sem);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||||
|
DEBUG_STAT_EN(fprintf(stderr, "HMAC Verification speed %.3f MB/s\n",
|
||||||
|
get_mb_s(tdat->rbytes + sizeof (tdat->len_cmp_be), strt, en)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Encryption algorithm should not change the size and
|
* Encryption algorithm should not change the size and
|
||||||
* encryption is in-place.
|
* encryption is in-place.
|
||||||
*/
|
*/
|
||||||
|
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||||
rv = crypto_buf(&crypto_ctx, cseg, cseg, tdat->len_cmp, tdat->id);
|
rv = crypto_buf(&crypto_ctx, cseg, cseg, tdat->len_cmp, tdat->id);
|
||||||
if (rv == -1) {
|
if (rv == -1) {
|
||||||
/*
|
/*
|
||||||
|
@ -417,6 +423,9 @@ redo:
|
||||||
sem_post(&tdat->cmp_done_sem);
|
sem_post(&tdat->cmp_done_sem);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||||
|
DEBUG_STAT_EN(fprintf(stderr, "Decryption speed %.3f MB/s\n",
|
||||||
|
get_mb_s(tdat->len_cmp, strt, en)));
|
||||||
} else if (mac_bytes > 0) {
|
} else if (mac_bytes > 0) {
|
||||||
/*
|
/*
|
||||||
* Verify header CRC32 in non-crypto mode.
|
* Verify header CRC32 in non-crypto mode.
|
||||||
|
@ -1301,11 +1310,13 @@ plain_index:
|
||||||
*/
|
*/
|
||||||
if (encrypt_type) {
|
if (encrypt_type) {
|
||||||
int ret;
|
int ret;
|
||||||
|
DEBUG_STAT_EN(double strt, en);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Encryption algorithm must not change the size and
|
* Encryption algorithm must not change the size and
|
||||||
* encryption is in-place.
|
* encryption is in-place.
|
||||||
*/
|
*/
|
||||||
|
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||||
ret = crypto_buf(&crypto_ctx, compressed_chunk, compressed_chunk,
|
ret = crypto_buf(&crypto_ctx, compressed_chunk, compressed_chunk,
|
||||||
tdat->len_cmp, tdat->id);
|
tdat->len_cmp, tdat->id);
|
||||||
if (ret == -1) {
|
if (ret == -1) {
|
||||||
|
@ -1318,6 +1329,9 @@ plain_index:
|
||||||
sem_post(&tdat->cmp_done_sem);
|
sem_post(&tdat->cmp_done_sem);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||||
|
DEBUG_STAT_EN(fprintf(stderr, "Encryption speed %.3f MB/s\n",
|
||||||
|
get_mb_s(tdat->len_cmp, strt, en)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) {
|
if ((enable_rabin_scan || enable_fixed_scan) && tdat->rctx->valid) {
|
||||||
|
@ -1364,14 +1378,19 @@ plain_index:
|
||||||
uchar_t *mac_ptr;
|
uchar_t *mac_ptr;
|
||||||
unsigned int hlen;
|
unsigned int hlen;
|
||||||
uchar_t chash[mac_bytes];
|
uchar_t chash[mac_bytes];
|
||||||
|
DEBUG_STAT_EN(double strt, en);
|
||||||
|
|
||||||
/* Clean out mac_bytes to 0 for stable HMAC. */
|
/* Clean out mac_bytes to 0 for stable HMAC. */
|
||||||
|
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||||
mac_ptr = tdat->cmp_seg + sizeof (tdat->len_cmp) + cksum_bytes;
|
mac_ptr = tdat->cmp_seg + sizeof (tdat->len_cmp) + cksum_bytes;
|
||||||
memset(mac_ptr, 0, mac_bytes);
|
memset(mac_ptr, 0, mac_bytes);
|
||||||
hmac_reinit(&tdat->chunk_hmac);
|
hmac_reinit(&tdat->chunk_hmac);
|
||||||
hmac_update(&tdat->chunk_hmac, tdat->cmp_seg, tdat->len_cmp);
|
hmac_update(&tdat->chunk_hmac, tdat->cmp_seg, tdat->len_cmp);
|
||||||
hmac_final(&tdat->chunk_hmac, chash, &hlen);
|
hmac_final(&tdat->chunk_hmac, chash, &hlen);
|
||||||
serialize_checksum(chash, mac_ptr, hlen);
|
serialize_checksum(chash, mac_ptr, hlen);
|
||||||
|
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||||
|
DEBUG_STAT_EN(fprintf(stderr, "HMAC Computation speed %.3f MB/s\n",
|
||||||
|
get_mb_s(tdat->len_cmp, strt, en)));
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Compute header CRC32 in non-crypto mode.
|
* Compute header CRC32 in non-crypto mode.
|
||||||
|
@ -2142,7 +2161,7 @@ main(int argc, char *argv[])
|
||||||
do_compress = 1;
|
do_compress = 1;
|
||||||
algo = optarg;
|
algo = optarg;
|
||||||
if (init_algo(algo, 1) != 0) {
|
if (init_algo(algo, 1) != 0) {
|
||||||
err_exit(1, "Invalid algorithm %s\n", optarg);
|
err_exit(0, "Invalid algorithm %s\n", optarg);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue