Add parallel versions of various checksums for single-segment, single-thread compression.
This commit is contained in:
parent
2da0d0950b
commit
468044d816
9 changed files with 461 additions and 44 deletions
|
@ -30,10 +30,11 @@ MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h
|
||||||
MAINOBJS = $(MAINSRCS:.c=.o)
|
MAINOBJS = $(MAINSRCS:.c=.o)
|
||||||
|
|
||||||
CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \
|
CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \
|
||||||
crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c
|
crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c \
|
||||||
|
crypto/sha2_utils.c crypto/sha3_utils.c
|
||||||
CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \
|
CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \
|
||||||
crypto/scrypt/sha256.h crypto/scrypt/crypto_aesctr.h crypto/aes/crypto_aes.h \
|
crypto/scrypt/sha256.h crypto/scrypt/crypto_aesctr.h crypto/aes/crypto_aes.h \
|
||||||
$(MAINHDRS)
|
crypto/sha2_utils.h crypto/sha3_utils.h $(MAINHDRS)
|
||||||
CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o)
|
CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o)
|
||||||
CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@
|
CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,8 @@
|
||||||
#include <utils.h>
|
#include <utils.h>
|
||||||
|
|
||||||
#include "crypto_utils.h"
|
#include "crypto_utils.h"
|
||||||
|
#include "sha2_utils.h"
|
||||||
|
#include "sha3_utils.h"
|
||||||
|
|
||||||
#define PROVIDER_OPENSSL 0
|
#define PROVIDER_OPENSSL 0
|
||||||
#define PROVIDER_X64_OPT 1
|
#define PROVIDER_X64_OPT 1
|
||||||
|
@ -91,6 +93,9 @@ extern uint64_t lzma_crc64_8bchk(const uint8_t *buf, uint64_t size,
|
||||||
uint64_t crc, uint64_t *cnt);
|
uint64_t crc, uint64_t *cnt);
|
||||||
|
|
||||||
#ifdef __OSSL_OLD__
|
#ifdef __OSSL_OLD__
|
||||||
|
/*
|
||||||
|
* The two functions below fill missing functionality in older versions of OpenSSL.
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
HMAC_CTX_copy(HMAC_CTX *dctx, HMAC_CTX *sctx)
|
HMAC_CTX_copy(HMAC_CTX *dctx, HMAC_CTX *sctx)
|
||||||
{
|
{
|
||||||
|
@ -163,6 +168,14 @@ PKCS5_PBKDF2_HMAC(const char *pass, int passlen,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute a digest of the given data segment. The parameter mt indicates whether
|
||||||
|
* to use the parallel(OpenMP) versions. Parallel versions are only used when
|
||||||
|
* a single segment is used to hold the entire file - essentially a single-threaded
|
||||||
|
* compression.
|
||||||
|
* In other cases segments are handled in separate threads any way and we do not
|
||||||
|
* need or want another level of parallelism to cause contention.
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, int mt)
|
compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, int mt)
|
||||||
{
|
{
|
||||||
|
@ -181,7 +194,6 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in
|
||||||
if (bdsp.blake2bp(cksum_buf, buf, NULL, 32, bytes, 0) != 0)
|
if (bdsp.blake2bp(cksum_buf, buf, NULL, 32, bytes, 0) != 0)
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (cksum == CKSUM_BLAKE512) {
|
} else if (cksum == CKSUM_BLAKE512) {
|
||||||
if (!mt) {
|
if (!mt) {
|
||||||
if (bdsp.blake2b(cksum_buf, buf, NULL, 64, bytes, 0) != 0)
|
if (bdsp.blake2b(cksum_buf, buf, NULL, 64, bytes, 0) != 0)
|
||||||
|
@ -190,7 +202,10 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in
|
||||||
if (bdsp.blake2bp(cksum_buf, buf, NULL, 64, bytes, 0) != 0)
|
if (bdsp.blake2bp(cksum_buf, buf, NULL, 64, bytes, 0) != 0)
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* No parallelism for SKEIN. It is deprecated and retained here only for
|
||||||
|
* backwards compatiblity.
|
||||||
|
*/
|
||||||
} else if (cksum == CKSUM_SKEIN256) {
|
} else if (cksum == CKSUM_SKEIN256) {
|
||||||
Skein_512_Ctxt_t ctx;
|
Skein_512_Ctxt_t ctx;
|
||||||
|
|
||||||
|
@ -207,40 +222,44 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in
|
||||||
|
|
||||||
} else if (cksum == CKSUM_SHA256) {
|
} else if (cksum == CKSUM_SHA256) {
|
||||||
if (cksum_provider == PROVIDER_OPENSSL) {
|
if (cksum_provider == PROVIDER_OPENSSL) {
|
||||||
SHA256_CTX ctx;
|
if (!mt)
|
||||||
|
ossl_SHA256(cksum_buf, buf, bytes);
|
||||||
SHA256_Init(&ctx);
|
else
|
||||||
SHA256_Update(&ctx, buf, bytes);
|
ossl_SHA256_par(cksum_buf, buf, bytes);
|
||||||
SHA256_Final(cksum_buf, &ctx);
|
|
||||||
} else {
|
} else {
|
||||||
SHA512_Context ctx;
|
if (!mt)
|
||||||
|
opt_SHA512t256(cksum_buf, buf, bytes);
|
||||||
opt_SHA512t256_Init(&ctx);
|
else
|
||||||
opt_SHA512t256_Update(&ctx, buf, bytes);
|
opt_SHA512t256_par(cksum_buf, buf, bytes);
|
||||||
opt_SHA512t256_Final(&ctx, cksum_buf);
|
|
||||||
}
|
}
|
||||||
} else if (cksum == CKSUM_SHA512) {
|
} else if (cksum == CKSUM_SHA512) {
|
||||||
if (cksum_provider == PROVIDER_OPENSSL) {
|
if (cksum_provider == PROVIDER_OPENSSL) {
|
||||||
SHA512_CTX ctx;
|
if (!mt)
|
||||||
|
ossl_SHA512(cksum_buf, buf, bytes);
|
||||||
SHA512_Init(&ctx);
|
else
|
||||||
SHA512_Update(&ctx, buf, bytes);
|
ossl_SHA512_par(cksum_buf, buf, bytes);
|
||||||
SHA512_Final(cksum_buf, &ctx);
|
|
||||||
} else {
|
} else {
|
||||||
SHA512_Context ctx;
|
if (!mt)
|
||||||
|
opt_SHA512(cksum_buf, buf, bytes);
|
||||||
opt_SHA512_Init(&ctx);
|
else
|
||||||
opt_SHA512_Update(&ctx, buf, bytes);
|
opt_SHA512_par(cksum_buf, buf, bytes);
|
||||||
opt_SHA512_Final(&ctx, cksum_buf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (cksum == CKSUM_KECCAK256) {
|
} else if (cksum == CKSUM_KECCAK256) {
|
||||||
if (Keccak_Hash(256, buf, bytes * 8, cksum_buf) != 0)
|
if (!mt) {
|
||||||
|
if (Keccak256(cksum_buf, buf, bytes) != 0)
|
||||||
return (-1);
|
return (-1);
|
||||||
|
} else {
|
||||||
|
if (Keccak256_par(cksum_buf, buf, bytes) != 0)
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
} else if (cksum == CKSUM_KECCAK512) {
|
} else if (cksum == CKSUM_KECCAK512) {
|
||||||
if (Keccak_Hash(512, buf, bytes * 8, cksum_buf) != 0)
|
if (!mt) {
|
||||||
|
if (Keccak512(cksum_buf, buf, bytes) != 0)
|
||||||
return (-1);
|
return (-1);
|
||||||
|
} else {
|
||||||
|
if (Keccak512_par(cksum_buf, buf, bytes) != 0)
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
@ -339,8 +358,8 @@ deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Perform keyed hashing. With Skein, HMAC is not used, rather Skein's
|
* Perform keyed hashing. With Skein/Blake/Keccak, HMAC is not used, rather
|
||||||
* native MAC is used which is more optimal than HMAC.
|
* their native MAC features are used which are more optimal than HMAC.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx)
|
hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx)
|
||||||
|
@ -658,6 +677,9 @@ hmac_cleanup(mac_ctx_t *mctx)
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Encryption related functions.
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg,
|
init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg,
|
||||||
uchar_t *salt, int saltlen, uint64_t nonce, int enc_dec)
|
uchar_t *salt, int saltlen, uint64_t nonce, int enc_dec)
|
||||||
|
@ -814,6 +836,9 @@ err0:
|
||||||
return (4);
|
return (4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Input password string from terminal without echoing.
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
get_pw_string(uchar_t pw[MAX_PW_LEN], const char *prompt, int twice)
|
get_pw_string(uchar_t pw[MAX_PW_LEN], const char *prompt, int twice)
|
||||||
{
|
{
|
||||||
|
|
|
@ -61,7 +61,7 @@
|
||||||
|
|
||||||
typedef void (*update_func_ptr)(const void *input_data, void *digest, uint64_t num_blks);
|
typedef void (*update_func_ptr)(const void *input_data, void *digest, uint64_t num_blks);
|
||||||
|
|
||||||
static const uint8_t padding[128] = {
|
static const uint8_t padding[SHA512_BLOCK_SIZE] = {
|
||||||
0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
@ -157,7 +157,7 @@ APS_NAMESPACE(SHA512_Update) (SHA512_Context *sc, const void *vdata, size_t len)
|
||||||
|
|
||||||
if (sc->bufferLength) {
|
if (sc->bufferLength) {
|
||||||
do {
|
do {
|
||||||
bufferBytesLeft = 128L - sc->bufferLength;
|
bufferBytesLeft = SHA512_BLOCK_SIZE - sc->bufferLength;
|
||||||
bytesToCopy = bufferBytesLeft;
|
bytesToCopy = bufferBytesLeft;
|
||||||
if (bytesToCopy > len)
|
if (bytesToCopy > len)
|
||||||
bytesToCopy = len;
|
bytesToCopy = len;
|
||||||
|
@ -172,14 +172,14 @@ APS_NAMESPACE(SHA512_Update) (SHA512_Context *sc, const void *vdata, size_t len)
|
||||||
data += bytesToCopy;
|
data += bytesToCopy;
|
||||||
len -= bytesToCopy;
|
len -= bytesToCopy;
|
||||||
|
|
||||||
if (sc->bufferLength == 128L) {
|
if (sc->bufferLength == SHA512_BLOCK_SIZE) {
|
||||||
sc->blocks = 1;
|
sc->blocks = 1;
|
||||||
sha512_update_func(sc->buffer.words, sc->hash, sc->blocks);
|
sha512_update_func(sc->buffer.words, sc->hash, sc->blocks);
|
||||||
sc->bufferLength = 0L;
|
sc->bufferLength = 0L;
|
||||||
} else {
|
} else {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} while (len > 0 && len <= 128L);
|
} while (len > 0 && len <= SHA512_BLOCK_SIZE);
|
||||||
if (!len) return;
|
if (!len) return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,8 +218,8 @@ _final (SHA512_Context *sc, uint8_t *hash, int hashWords, int halfWord)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
bytesToPad = 240L - sc->bufferLength;
|
bytesToPad = 240L - sc->bufferLength;
|
||||||
if (bytesToPad > 128L)
|
if (bytesToPad > SHA512_BLOCK_SIZE)
|
||||||
bytesToPad -= 128L;
|
bytesToPad -= SHA512_BLOCK_SIZE;
|
||||||
|
|
||||||
lengthPad[0] = BYTESWAP64(sc->totalLength[0]);
|
lengthPad[0] = BYTESWAP64(sc->totalLength[0]);
|
||||||
lengthPad[1] = BYTESWAP64(sc->totalLength[1]);
|
lengthPad[1] = BYTESWAP64(sc->totalLength[1]);
|
||||||
|
@ -258,7 +258,7 @@ APS_NAMESPACE(SHA512t256_Final) (SHA512_Context *sc, uint8_t hash[SHA512t256_HAS
|
||||||
#define HASH_UPDATE APS_NAMESPACE(SHA512_Update)
|
#define HASH_UPDATE APS_NAMESPACE(SHA512_Update)
|
||||||
#define HASH_FINAL APS_NAMESPACE(SHA512_Final)
|
#define HASH_FINAL APS_NAMESPACE(SHA512_Final)
|
||||||
#define HASH_SIZE SHA512_HASH_SIZE
|
#define HASH_SIZE SHA512_HASH_SIZE
|
||||||
#define HASH_BLOCK_SIZE 128
|
#define HASH_BLOCK_SIZE SHA512_BLOCK_SIZE
|
||||||
|
|
||||||
#define HMAC_CONTEXT HMAC_SHA512_Context
|
#define HMAC_CONTEXT HMAC_SHA512_Context
|
||||||
#define HMAC_INIT APS_NAMESPACE(HMAC_SHA512_Init)
|
#define HMAC_INIT APS_NAMESPACE(HMAC_SHA512_Init)
|
||||||
|
@ -283,7 +283,7 @@ APS_NAMESPACE(SHA512t256_Final) (SHA512_Context *sc, uint8_t hash[SHA512t256_HAS
|
||||||
#define HASH_UPDATE APS_NAMESPACE(SHA512t256_Update)
|
#define HASH_UPDATE APS_NAMESPACE(SHA512t256_Update)
|
||||||
#define HASH_FINAL APS_NAMESPACE(SHA512t256_Final)
|
#define HASH_FINAL APS_NAMESPACE(SHA512t256_Final)
|
||||||
#define HASH_SIZE SHA512t256_HASH_SIZE
|
#define HASH_SIZE SHA512t256_HASH_SIZE
|
||||||
#define HASH_BLOCK_SIZE 128
|
#define HASH_BLOCK_SIZE SHA512_BLOCK_SIZE
|
||||||
|
|
||||||
#define HMAC_CONTEXT HMAC_SHA512_Context
|
#define HMAC_CONTEXT HMAC_SHA512_Context
|
||||||
#define HMAC_INIT APS_NAMESPACE(HMAC_SHA512t256_Init)
|
#define HMAC_INIT APS_NAMESPACE(HMAC_SHA512t256_Init)
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
|
|
||||||
#define SHA512_HASH_SIZE 64
|
#define SHA512_HASH_SIZE 64
|
||||||
#define SHA512t256_HASH_SIZE 32
|
#define SHA512t256_HASH_SIZE 32
|
||||||
|
#define SHA512_BLOCK_SIZE 128L
|
||||||
|
|
||||||
/* Hash size in 64-bit words */
|
/* Hash size in 64-bit words */
|
||||||
#define SHA512_HASH_WORDS 8
|
#define SHA512_HASH_WORDS 8
|
||||||
|
@ -50,8 +51,8 @@ typedef struct _SHA512_Context {
|
||||||
uint64_t hash[SHA512_HASH_WORDS];
|
uint64_t hash[SHA512_HASH_WORDS];
|
||||||
uint32_t bufferLength;
|
uint32_t bufferLength;
|
||||||
union {
|
union {
|
||||||
uint64_t words[16];
|
uint64_t words[SHA512_BLOCK_SIZE/8];
|
||||||
uint8_t bytes[128];
|
uint8_t bytes[SHA512_BLOCK_SIZE];
|
||||||
} buffer;
|
} buffer;
|
||||||
} SHA512_Context;
|
} SHA512_Context;
|
||||||
|
|
||||||
|
|
245
crypto/sha2_utils.c
Normal file
245
crypto/sha2_utils.c
Normal file
|
@ -0,0 +1,245 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of Pcompress, a chunked parallel multi-
|
||||||
|
* algorithm lossless compression and decompression program.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||||
|
* Use is subject to license terms.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <openssl/sha.h>
|
||||||
|
#include <sha512.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
#include <utils.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper functions for single-call SHA2 hashing. Both serial and
|
||||||
|
* parallel versions are provided.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ossl_SHA256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
SHA256_CTX ctx;
|
||||||
|
|
||||||
|
SHA256_Init(&ctx);
|
||||||
|
SHA256_Update(&ctx, buf, bytes);
|
||||||
|
SHA256_Final(cksum_buf, &ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ossl_SHA256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
uchar_t *pos[2];
|
||||||
|
uint64_t len[2];
|
||||||
|
uchar_t cksum[2][32];
|
||||||
|
int i;
|
||||||
|
SHA256_CTX *mctx;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
|
||||||
|
* If not then just do a simple serial hashing.
|
||||||
|
*/
|
||||||
|
if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) {
|
||||||
|
mctx = (SHA256_CTX *)malloc(sizeof (SHA256_CTX));
|
||||||
|
SHA256_Init(mctx);
|
||||||
|
SHA256_Update(mctx, buf, bytes);
|
||||||
|
SHA256_Final(cksum_buf, mctx);
|
||||||
|
free(mctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pos[0] = buf;
|
||||||
|
len[0] = bytes/2;
|
||||||
|
buf += bytes/2;
|
||||||
|
pos[1] = buf;
|
||||||
|
len[1] = bytes - bytes/2;
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
# pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for(i = 0; i < 2; ++i)
|
||||||
|
{
|
||||||
|
SHA256_CTX ctx;
|
||||||
|
SHA256_Init(&ctx);
|
||||||
|
SHA256_Update(&ctx, pos[i], len[i]);
|
||||||
|
SHA256_Final(cksum[i], &ctx);
|
||||||
|
}
|
||||||
|
mctx = (SHA256_CTX *)malloc(sizeof (SHA256_CTX));
|
||||||
|
SHA256_Init(mctx);
|
||||||
|
SHA256_Update(mctx, cksum, 2 * 32);
|
||||||
|
SHA256_Final(cksum_buf, mctx);
|
||||||
|
free(mctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ossl_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
SHA512_CTX ctx;
|
||||||
|
|
||||||
|
SHA512_Init(&ctx);
|
||||||
|
SHA512_Update(&ctx, buf, bytes);
|
||||||
|
SHA512_Final(cksum_buf, &ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ossl_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
uchar_t *pos[2];
|
||||||
|
uint64_t len[2];
|
||||||
|
uchar_t cksum[2][64];
|
||||||
|
int i;
|
||||||
|
SHA512_CTX *mctx;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
|
||||||
|
* If not then just do a simple hashing.
|
||||||
|
*/
|
||||||
|
if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) {
|
||||||
|
mctx = (SHA512_CTX *)malloc(sizeof (SHA512_CTX));
|
||||||
|
SHA512_Init(mctx);
|
||||||
|
SHA512_Update(mctx, buf, bytes);
|
||||||
|
SHA512_Final(cksum_buf, mctx);
|
||||||
|
free(mctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pos[0] = buf;
|
||||||
|
len[0] = bytes/2;
|
||||||
|
pos[1] = buf + bytes/2;
|
||||||
|
len[1] = bytes - bytes/2;
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
# pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for(i = 0; i < 2; ++i)
|
||||||
|
{
|
||||||
|
SHA512_CTX ctx;
|
||||||
|
SHA512_Init(&ctx);
|
||||||
|
SHA512_Update(&ctx, pos[i], len[i]);
|
||||||
|
SHA512_Final(cksum[i], &ctx);
|
||||||
|
}
|
||||||
|
mctx = (SHA512_CTX *)malloc(sizeof (SHA512_CTX));
|
||||||
|
SHA512_Init(mctx);
|
||||||
|
SHA512_Update(mctx, cksum, 2 * 64);
|
||||||
|
SHA512_Final(cksum_buf, mctx);
|
||||||
|
free(mctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
opt_SHA512t256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
SHA512_Context ctx;
|
||||||
|
|
||||||
|
opt_SHA512t256_Init(&ctx);
|
||||||
|
opt_SHA512t256_Update(&ctx, buf, bytes);
|
||||||
|
opt_SHA512t256_Final(&ctx, cksum_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
opt_SHA512t256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
uchar_t *pos[2];
|
||||||
|
uint64_t len[2];
|
||||||
|
uchar_t cksum[2][32];
|
||||||
|
int i;
|
||||||
|
SHA512_Context *mctx;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
|
||||||
|
* If not then just do a simple serial hashing.
|
||||||
|
*/
|
||||||
|
if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) {
|
||||||
|
mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context));
|
||||||
|
opt_SHA512t256_Init(mctx);
|
||||||
|
opt_SHA512t256_Update(mctx, buf, bytes);
|
||||||
|
opt_SHA512t256_Final(mctx, cksum_buf);
|
||||||
|
free(mctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pos[0] = buf;
|
||||||
|
len[0] = bytes/2;
|
||||||
|
pos[1] = buf + bytes/2;
|
||||||
|
len[1] = bytes - bytes/2;
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
# pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for(i = 0; i < 2; ++i)
|
||||||
|
{
|
||||||
|
SHA512_Context ctx;
|
||||||
|
opt_SHA512t256_Init(&ctx);
|
||||||
|
opt_SHA512t256_Update(&ctx, pos[i], len[i]);
|
||||||
|
opt_SHA512t256_Final(&ctx, cksum[i]);
|
||||||
|
}
|
||||||
|
mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context));
|
||||||
|
opt_SHA512t256_Init(mctx);
|
||||||
|
opt_SHA512t256_Update(mctx, cksum, 2 * 32);
|
||||||
|
opt_SHA512t256_Final(mctx, cksum_buf);
|
||||||
|
free(mctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
opt_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
SHA512_Context ctx;
|
||||||
|
|
||||||
|
opt_SHA512_Init(&ctx);
|
||||||
|
opt_SHA512_Update(&ctx, buf, bytes);
|
||||||
|
opt_SHA512_Final(&ctx, cksum_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
opt_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
uchar_t *pos[2];
|
||||||
|
uint64_t len[2];
|
||||||
|
uchar_t cksum[2][64];
|
||||||
|
int i;
|
||||||
|
SHA512_Context *mctx;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
|
||||||
|
* If not then just do a simple serial hashing.
|
||||||
|
*/
|
||||||
|
if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) {
|
||||||
|
mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context));
|
||||||
|
opt_SHA512_Init(mctx);
|
||||||
|
opt_SHA512_Update(mctx, buf, bytes);
|
||||||
|
opt_SHA512_Final(mctx, cksum_buf);
|
||||||
|
free(mctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pos[0] = buf;
|
||||||
|
len[0] = bytes/2;
|
||||||
|
pos[1] = buf + bytes/2;
|
||||||
|
len[1] = bytes - bytes/2;
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
# pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for(i = 0; i < 2; ++i)
|
||||||
|
{
|
||||||
|
SHA512_Context ctx;
|
||||||
|
opt_SHA512_Init(&ctx);
|
||||||
|
opt_SHA512_Update(&ctx, pos[i], len[i]);
|
||||||
|
opt_SHA512_Final(&ctx, cksum[i]);
|
||||||
|
}
|
||||||
|
mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context));
|
||||||
|
opt_SHA512_Init(mctx);
|
||||||
|
opt_SHA512_Update(mctx, cksum, 2 * 64);
|
||||||
|
opt_SHA512_Final(mctx, cksum_buf);
|
||||||
|
free(mctx);
|
||||||
|
}
|
||||||
|
|
15
crypto/sha2_utils.h
Normal file
15
crypto/sha2_utils.h
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
#ifndef _SHA2_UTILS_H_
|
||||||
|
#define _SHA2_UTILS_H_
|
||||||
|
|
||||||
|
void ossl_SHA256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
void ossl_SHA256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
void opt_SHA512t256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
void opt_SHA512t256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
|
||||||
|
void ossl_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
void ossl_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
void opt_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
void opt_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
113
crypto/sha3_utils.c
Normal file
113
crypto/sha3_utils.c
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of Pcompress, a chunked parallel multi-
|
||||||
|
* algorithm lossless compression and decompression program.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||||
|
* Use is subject to license terms.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <KeccakNISTInterface.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
#include <utils.h>
|
||||||
|
|
||||||
|
#define KECCAK_BLOCK_SIZE 1024
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper functions for single-call SHA3 (Keccak) hashing. Both serial
|
||||||
|
* and parallel versions are provided.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
Keccak256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
return (Keccak_Hash(256, buf, bytes * 8, cksum_buf));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
Keccak256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
uchar_t *pos[2];
|
||||||
|
uint64_t len[2];
|
||||||
|
uchar_t cksum[2][32];
|
||||||
|
int i, rv[2];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
|
||||||
|
* If not then just do a simple serial hashing.
|
||||||
|
*/
|
||||||
|
if (bytes / 2 <= KECCAK_BLOCK_SIZE * 2) {
|
||||||
|
Keccak_Hash(256, buf, bytes * 8, cksum_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pos[0] = buf;
|
||||||
|
len[0] = bytes/2;
|
||||||
|
pos[1] = buf + bytes/2;
|
||||||
|
len[1] = bytes - bytes/2;
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
# pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for(i = 0; i < 2; ++i)
|
||||||
|
{
|
||||||
|
rv[i] = Keccak_Hash(256, pos[i], len[i] * 8, cksum[i]);
|
||||||
|
}
|
||||||
|
if (rv[0] != 0 || rv[1] != 0)
|
||||||
|
return (-1);
|
||||||
|
return (Keccak_Hash(256, (const BitSequence *)cksum, 2 * 32 * 8, cksum_buf));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
Keccak512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
return (Keccak_Hash(512, buf, bytes * 8, cksum_buf));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
Keccak512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
|
||||||
|
{
|
||||||
|
uchar_t *pos[2];
|
||||||
|
uint64_t len[2];
|
||||||
|
uchar_t cksum[2][64];
|
||||||
|
int i, rv[2];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
|
||||||
|
* If not then just do a simple serial hashing.
|
||||||
|
*/
|
||||||
|
if (bytes / 2 <= KECCAK_BLOCK_SIZE * 2) {
|
||||||
|
Keccak_Hash(512, buf, bytes * 8, cksum_buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pos[0] = buf;
|
||||||
|
len[0] = bytes/2;
|
||||||
|
pos[1] = buf + bytes/2;
|
||||||
|
len[1] = bytes - bytes/2;
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
# pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for(i = 0; i < 2; ++i)
|
||||||
|
{
|
||||||
|
rv[i] = Keccak_Hash(512, pos[i], len[i] * 8, cksum[i]);
|
||||||
|
}
|
||||||
|
if (rv[0] != 0 || rv[1] != 0)
|
||||||
|
return (-1);
|
||||||
|
return (Keccak_Hash(512, (const BitSequence *)cksum, 2 * 64 * 8, cksum_buf));
|
||||||
|
}
|
11
crypto/sha3_utils.h
Normal file
11
crypto/sha3_utils.h
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
#ifndef _SHA3_UTILS_H_
|
||||||
|
#define _SHA3_UTILS_H_
|
||||||
|
|
||||||
|
int Keccak256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
int Keccak256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
|
||||||
|
int Keccak512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
int Keccak512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
6
main.c
6
main.c
|
@ -528,8 +528,14 @@ redo:
|
||||||
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data,
|
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data,
|
||||||
tdat->props);
|
tdat->props);
|
||||||
} else {
|
} else {
|
||||||
|
DEBUG_STAT_EN(double strt, en);
|
||||||
|
|
||||||
|
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||||
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
|
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
|
||||||
&_chunksize, tdat->level, HDR, tdat->data);
|
&_chunksize, tdat->level, HDR, tdat->data);
|
||||||
|
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||||
|
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n",
|
||||||
|
get_mb_s(_chunksize, strt, en)));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
memcpy(tdat->uncompressed_chunk, cseg, _chunksize);
|
memcpy(tdat->uncompressed_chunk, cseg, _chunksize);
|
||||||
|
|
Loading…
Reference in a new issue