diff --git a/Makefile.in b/Makefile.in index 5574a43..e3f70cc 100644 --- a/Makefile.in +++ b/Makefile.in @@ -30,10 +30,11 @@ MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h MAINOBJS = $(MAINSRCS:.c=.o) CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \ - crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c + crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c \ + crypto/sha2_utils.c crypto/sha3_utils.c CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \ crypto/scrypt/sha256.h crypto/scrypt/crypto_aesctr.h crypto/aes/crypto_aes.h \ - $(MAINHDRS) + crypto/sha2_utils.h crypto/sha3_utils.h $(MAINHDRS) CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o) CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@ diff --git a/crypto/crypto_utils.c b/crypto/crypto_utils.c index 3dcaa03..ac48299 100644 --- a/crypto/crypto_utils.c +++ b/crypto/crypto_utils.c @@ -43,6 +43,8 @@ #include #include "crypto_utils.h" +#include "sha2_utils.h" +#include "sha3_utils.h" #define PROVIDER_OPENSSL 0 #define PROVIDER_X64_OPT 1 @@ -91,6 +93,9 @@ extern uint64_t lzma_crc64_8bchk(const uint8_t *buf, uint64_t size, uint64_t crc, uint64_t *cnt); #ifdef __OSSL_OLD__ +/* + * The two functions below fill missing functionality in older versions of OpenSSL. + */ int HMAC_CTX_copy(HMAC_CTX *dctx, HMAC_CTX *sctx) { @@ -163,6 +168,14 @@ PKCS5_PBKDF2_HMAC(const char *pass, int passlen, } #endif +/* + * Compute a digest of the given data segment. The parameter mt indicates whether + * to use the parallel(OpenMP) versions. Parallel versions are only used when + * a single segment is used to hold the entire file - essentially a single-threaded + * compression. + * In other cases segments are handled in separate threads any way and we do not + * need or want another level of parallelism to cause contention. + */ int compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, int mt) { @@ -181,7 +194,6 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in if (bdsp.blake2bp(cksum_buf, buf, NULL, 32, bytes, 0) != 0) return (-1); } - } else if (cksum == CKSUM_BLAKE512) { if (!mt) { if (bdsp.blake2b(cksum_buf, buf, NULL, 64, bytes, 0) != 0) @@ -190,7 +202,10 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in if (bdsp.blake2bp(cksum_buf, buf, NULL, 64, bytes, 0) != 0) return (-1); } - + /* + * No parallelism for SKEIN. It is deprecated and retained here only for + * backwards compatiblity. + */ } else if (cksum == CKSUM_SKEIN256) { Skein_512_Ctxt_t ctx; @@ -207,40 +222,44 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in } else if (cksum == CKSUM_SHA256) { if (cksum_provider == PROVIDER_OPENSSL) { - SHA256_CTX ctx; - - SHA256_Init(&ctx); - SHA256_Update(&ctx, buf, bytes); - SHA256_Final(cksum_buf, &ctx); + if (!mt) + ossl_SHA256(cksum_buf, buf, bytes); + else + ossl_SHA256_par(cksum_buf, buf, bytes); } else { - SHA512_Context ctx; - - opt_SHA512t256_Init(&ctx); - opt_SHA512t256_Update(&ctx, buf, bytes); - opt_SHA512t256_Final(&ctx, cksum_buf); + if (!mt) + opt_SHA512t256(cksum_buf, buf, bytes); + else + opt_SHA512t256_par(cksum_buf, buf, bytes); } } else if (cksum == CKSUM_SHA512) { if (cksum_provider == PROVIDER_OPENSSL) { - SHA512_CTX ctx; - - SHA512_Init(&ctx); - SHA512_Update(&ctx, buf, bytes); - SHA512_Final(cksum_buf, &ctx); + if (!mt) + ossl_SHA512(cksum_buf, buf, bytes); + else + ossl_SHA512_par(cksum_buf, buf, bytes); } else { - SHA512_Context ctx; - - opt_SHA512_Init(&ctx); - opt_SHA512_Update(&ctx, buf, bytes); - opt_SHA512_Final(&ctx, cksum_buf); + if (!mt) + opt_SHA512(cksum_buf, buf, bytes); + else + opt_SHA512_par(cksum_buf, buf, bytes); } - } else if (cksum == CKSUM_KECCAK256) { - if (Keccak_Hash(256, buf, bytes * 8, cksum_buf) != 0) - return (-1); - + if (!mt) { + if (Keccak256(cksum_buf, buf, bytes) != 0) + return (-1); + } else { + if (Keccak256_par(cksum_buf, buf, bytes) != 0) + return (-1); + } } else if (cksum == CKSUM_KECCAK512) { - if (Keccak_Hash(512, buf, bytes * 8, cksum_buf) != 0) - return (-1); + if (!mt) { + if (Keccak512(cksum_buf, buf, bytes) != 0) + return (-1); + } else { + if (Keccak512_par(cksum_buf, buf, bytes) != 0) + return (-1); + } } else { return (-1); } @@ -339,8 +358,8 @@ deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes) } /* - * Perform keyed hashing. With Skein, HMAC is not used, rather Skein's - * native MAC is used which is more optimal than HMAC. + * Perform keyed hashing. With Skein/Blake/Keccak, HMAC is not used, rather + * their native MAC features are used which are more optimal than HMAC. */ int hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx) @@ -658,6 +677,9 @@ hmac_cleanup(mac_ctx_t *mctx) return (0); } +/* + * Encryption related functions. + */ int init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg, uchar_t *salt, int saltlen, uint64_t nonce, int enc_dec) @@ -814,6 +836,9 @@ err0: return (4); } +/* + * Input password string from terminal without echoing. + */ int get_pw_string(uchar_t pw[MAX_PW_LEN], const char *prompt, int twice) { diff --git a/crypto/sha2/sha512.c b/crypto/sha2/sha512.c index b00a285..c7c56d3 100644 --- a/crypto/sha2/sha512.c +++ b/crypto/sha2/sha512.c @@ -61,7 +61,7 @@ typedef void (*update_func_ptr)(const void *input_data, void *digest, uint64_t num_blks); -static const uint8_t padding[128] = { +static const uint8_t padding[SHA512_BLOCK_SIZE] = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -157,7 +157,7 @@ APS_NAMESPACE(SHA512_Update) (SHA512_Context *sc, const void *vdata, size_t len) if (sc->bufferLength) { do { - bufferBytesLeft = 128L - sc->bufferLength; + bufferBytesLeft = SHA512_BLOCK_SIZE - sc->bufferLength; bytesToCopy = bufferBytesLeft; if (bytesToCopy > len) bytesToCopy = len; @@ -172,14 +172,14 @@ APS_NAMESPACE(SHA512_Update) (SHA512_Context *sc, const void *vdata, size_t len) data += bytesToCopy; len -= bytesToCopy; - if (sc->bufferLength == 128L) { + if (sc->bufferLength == SHA512_BLOCK_SIZE) { sc->blocks = 1; sha512_update_func(sc->buffer.words, sc->hash, sc->blocks); sc->bufferLength = 0L; } else { return; } - } while (len > 0 && len <= 128L); + } while (len > 0 && len <= SHA512_BLOCK_SIZE); if (!len) return; } @@ -218,8 +218,8 @@ _final (SHA512_Context *sc, uint8_t *hash, int hashWords, int halfWord) int i; bytesToPad = 240L - sc->bufferLength; - if (bytesToPad > 128L) - bytesToPad -= 128L; + if (bytesToPad > SHA512_BLOCK_SIZE) + bytesToPad -= SHA512_BLOCK_SIZE; lengthPad[0] = BYTESWAP64(sc->totalLength[0]); lengthPad[1] = BYTESWAP64(sc->totalLength[1]); @@ -258,7 +258,7 @@ APS_NAMESPACE(SHA512t256_Final) (SHA512_Context *sc, uint8_t hash[SHA512t256_HAS #define HASH_UPDATE APS_NAMESPACE(SHA512_Update) #define HASH_FINAL APS_NAMESPACE(SHA512_Final) #define HASH_SIZE SHA512_HASH_SIZE -#define HASH_BLOCK_SIZE 128 +#define HASH_BLOCK_SIZE SHA512_BLOCK_SIZE #define HMAC_CONTEXT HMAC_SHA512_Context #define HMAC_INIT APS_NAMESPACE(HMAC_SHA512_Init) @@ -283,7 +283,7 @@ APS_NAMESPACE(SHA512t256_Final) (SHA512_Context *sc, uint8_t hash[SHA512t256_HAS #define HASH_UPDATE APS_NAMESPACE(SHA512t256_Update) #define HASH_FINAL APS_NAMESPACE(SHA512t256_Final) #define HASH_SIZE SHA512t256_HASH_SIZE -#define HASH_BLOCK_SIZE 128 +#define HASH_BLOCK_SIZE SHA512_BLOCK_SIZE #define HMAC_CONTEXT HMAC_SHA512_Context #define HMAC_INIT APS_NAMESPACE(HMAC_SHA512t256_Init) diff --git a/crypto/sha2/sha512.h b/crypto/sha2/sha512.h index 501c0b0..92729df 100644 --- a/crypto/sha2/sha512.h +++ b/crypto/sha2/sha512.h @@ -38,8 +38,9 @@ #include -#define SHA512_HASH_SIZE 64 -#define SHA512t256_HASH_SIZE 32 +#define SHA512_HASH_SIZE 64 +#define SHA512t256_HASH_SIZE 32 +#define SHA512_BLOCK_SIZE 128L /* Hash size in 64-bit words */ #define SHA512_HASH_WORDS 8 @@ -50,8 +51,8 @@ typedef struct _SHA512_Context { uint64_t hash[SHA512_HASH_WORDS]; uint32_t bufferLength; union { - uint64_t words[16]; - uint8_t bytes[128]; + uint64_t words[SHA512_BLOCK_SIZE/8]; + uint8_t bytes[SHA512_BLOCK_SIZE]; } buffer; } SHA512_Context; diff --git a/crypto/sha2_utils.c b/crypto/sha2_utils.c new file mode 100644 index 0000000..b746f1b --- /dev/null +++ b/crypto/sha2_utils.c @@ -0,0 +1,245 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + */ + +#include +#include +#include +#include +#include +#include + +#if defined(_OPENMP) +#include +#endif +#include + +/* + * Helper functions for single-call SHA2 hashing. Both serial and + * parallel versions are provided. + */ +void +ossl_SHA256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + SHA256_CTX ctx; + + SHA256_Init(&ctx); + SHA256_Update(&ctx, buf, bytes); + SHA256_Final(cksum_buf, &ctx); +} + +void +ossl_SHA256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + uchar_t *pos[2]; + uint64_t len[2]; + uchar_t cksum[2][32]; + int i; + SHA256_CTX *mctx; + + /* + * Is it worth doing the overhead of parallelism ? Buffer large enough ? + * If not then just do a simple serial hashing. + */ + if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) { + mctx = (SHA256_CTX *)malloc(sizeof (SHA256_CTX)); + SHA256_Init(mctx); + SHA256_Update(mctx, buf, bytes); + SHA256_Final(cksum_buf, mctx); + free(mctx); + return; + } + pos[0] = buf; + len[0] = bytes/2; + buf += bytes/2; + pos[1] = buf; + len[1] = bytes - bytes/2; +#if defined(_OPENMP) +# pragma omp parallel for +#endif + for(i = 0; i < 2; ++i) + { + SHA256_CTX ctx; + SHA256_Init(&ctx); + SHA256_Update(&ctx, pos[i], len[i]); + SHA256_Final(cksum[i], &ctx); + } + mctx = (SHA256_CTX *)malloc(sizeof (SHA256_CTX)); + SHA256_Init(mctx); + SHA256_Update(mctx, cksum, 2 * 32); + SHA256_Final(cksum_buf, mctx); + free(mctx); +} + +void +ossl_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + SHA512_CTX ctx; + + SHA512_Init(&ctx); + SHA512_Update(&ctx, buf, bytes); + SHA512_Final(cksum_buf, &ctx); +} + +void +ossl_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + uchar_t *pos[2]; + uint64_t len[2]; + uchar_t cksum[2][64]; + int i; + SHA512_CTX *mctx; + + /* + * Is it worth doing the overhead of parallelism ? Buffer large enough ? + * If not then just do a simple hashing. + */ + if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) { + mctx = (SHA512_CTX *)malloc(sizeof (SHA512_CTX)); + SHA512_Init(mctx); + SHA512_Update(mctx, buf, bytes); + SHA512_Final(cksum_buf, mctx); + free(mctx); + return; + } + pos[0] = buf; + len[0] = bytes/2; + pos[1] = buf + bytes/2; + len[1] = bytes - bytes/2; +#if defined(_OPENMP) +# pragma omp parallel for +#endif + for(i = 0; i < 2; ++i) + { + SHA512_CTX ctx; + SHA512_Init(&ctx); + SHA512_Update(&ctx, pos[i], len[i]); + SHA512_Final(cksum[i], &ctx); + } + mctx = (SHA512_CTX *)malloc(sizeof (SHA512_CTX)); + SHA512_Init(mctx); + SHA512_Update(mctx, cksum, 2 * 64); + SHA512_Final(cksum_buf, mctx); + free(mctx); +} + +void +opt_SHA512t256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + SHA512_Context ctx; + + opt_SHA512t256_Init(&ctx); + opt_SHA512t256_Update(&ctx, buf, bytes); + opt_SHA512t256_Final(&ctx, cksum_buf); +} + +void +opt_SHA512t256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + uchar_t *pos[2]; + uint64_t len[2]; + uchar_t cksum[2][32]; + int i; + SHA512_Context *mctx; + + /* + * Is it worth doing the overhead of parallelism ? Buffer large enough ? + * If not then just do a simple serial hashing. + */ + if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) { + mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context)); + opt_SHA512t256_Init(mctx); + opt_SHA512t256_Update(mctx, buf, bytes); + opt_SHA512t256_Final(mctx, cksum_buf); + free(mctx); + return; + } + pos[0] = buf; + len[0] = bytes/2; + pos[1] = buf + bytes/2; + len[1] = bytes - bytes/2; +#if defined(_OPENMP) +# pragma omp parallel for +#endif + for(i = 0; i < 2; ++i) + { + SHA512_Context ctx; + opt_SHA512t256_Init(&ctx); + opt_SHA512t256_Update(&ctx, pos[i], len[i]); + opt_SHA512t256_Final(&ctx, cksum[i]); + } + mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context)); + opt_SHA512t256_Init(mctx); + opt_SHA512t256_Update(mctx, cksum, 2 * 32); + opt_SHA512t256_Final(mctx, cksum_buf); + free(mctx); +} + +void +opt_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + SHA512_Context ctx; + + opt_SHA512_Init(&ctx); + opt_SHA512_Update(&ctx, buf, bytes); + opt_SHA512_Final(&ctx, cksum_buf); +} + +void +opt_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + uchar_t *pos[2]; + uint64_t len[2]; + uchar_t cksum[2][64]; + int i; + SHA512_Context *mctx; + + /* + * Is it worth doing the overhead of parallelism ? Buffer large enough ? + * If not then just do a simple serial hashing. + */ + if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) { + mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context)); + opt_SHA512_Init(mctx); + opt_SHA512_Update(mctx, buf, bytes); + opt_SHA512_Final(mctx, cksum_buf); + free(mctx); + return; + } + pos[0] = buf; + len[0] = bytes/2; + pos[1] = buf + bytes/2; + len[1] = bytes - bytes/2; +#if defined(_OPENMP) +# pragma omp parallel for +#endif + for(i = 0; i < 2; ++i) + { + SHA512_Context ctx; + opt_SHA512_Init(&ctx); + opt_SHA512_Update(&ctx, pos[i], len[i]); + opt_SHA512_Final(&ctx, cksum[i]); + } + mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context)); + opt_SHA512_Init(mctx); + opt_SHA512_Update(mctx, cksum, 2 * 64); + opt_SHA512_Final(mctx, cksum_buf); + free(mctx); +} + diff --git a/crypto/sha2_utils.h b/crypto/sha2_utils.h new file mode 100644 index 0000000..61e6187 --- /dev/null +++ b/crypto/sha2_utils.h @@ -0,0 +1,15 @@ +#ifndef _SHA2_UTILS_H_ +#define _SHA2_UTILS_H_ + +void ossl_SHA256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); +void ossl_SHA256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); +void opt_SHA512t256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); +void opt_SHA512t256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); + +void ossl_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); +void ossl_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); +void opt_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); +void opt_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); + +#endif + diff --git a/crypto/sha3_utils.c b/crypto/sha3_utils.c new file mode 100644 index 0000000..c967c69 --- /dev/null +++ b/crypto/sha3_utils.c @@ -0,0 +1,113 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + */ + +#include +#include +#include +#include +#include + +#if defined(_OPENMP) +#include +#endif +#include + +#define KECCAK_BLOCK_SIZE 1024 + +/* + * Helper functions for single-call SHA3 (Keccak) hashing. Both serial + * and parallel versions are provided. + */ + +int +Keccak256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + return (Keccak_Hash(256, buf, bytes * 8, cksum_buf)); +} + +int +Keccak256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + uchar_t *pos[2]; + uint64_t len[2]; + uchar_t cksum[2][32]; + int i, rv[2]; + + /* + * Is it worth doing the overhead of parallelism ? Buffer large enough ? + * If not then just do a simple serial hashing. + */ + if (bytes / 2 <= KECCAK_BLOCK_SIZE * 2) { + Keccak_Hash(256, buf, bytes * 8, cksum_buf); + return; + } + pos[0] = buf; + len[0] = bytes/2; + pos[1] = buf + bytes/2; + len[1] = bytes - bytes/2; +#if defined(_OPENMP) +# pragma omp parallel for +#endif + for(i = 0; i < 2; ++i) + { + rv[i] = Keccak_Hash(256, pos[i], len[i] * 8, cksum[i]); + } + if (rv[0] != 0 || rv[1] != 0) + return (-1); + return (Keccak_Hash(256, (const BitSequence *)cksum, 2 * 32 * 8, cksum_buf)); +} + +int +Keccak512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + return (Keccak_Hash(512, buf, bytes * 8, cksum_buf)); +} + +int +Keccak512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) +{ + uchar_t *pos[2]; + uint64_t len[2]; + uchar_t cksum[2][64]; + int i, rv[2]; + + /* + * Is it worth doing the overhead of parallelism ? Buffer large enough ? + * If not then just do a simple serial hashing. + */ + if (bytes / 2 <= KECCAK_BLOCK_SIZE * 2) { + Keccak_Hash(512, buf, bytes * 8, cksum_buf); + return; + } + pos[0] = buf; + len[0] = bytes/2; + pos[1] = buf + bytes/2; + len[1] = bytes - bytes/2; +#if defined(_OPENMP) +# pragma omp parallel for +#endif + for(i = 0; i < 2; ++i) + { + rv[i] = Keccak_Hash(512, pos[i], len[i] * 8, cksum[i]); + } + if (rv[0] != 0 || rv[1] != 0) + return (-1); + return (Keccak_Hash(512, (const BitSequence *)cksum, 2 * 64 * 8, cksum_buf)); +} diff --git a/crypto/sha3_utils.h b/crypto/sha3_utils.h new file mode 100644 index 0000000..123b51b --- /dev/null +++ b/crypto/sha3_utils.h @@ -0,0 +1,11 @@ +#ifndef _SHA3_UTILS_H_ +#define _SHA3_UTILS_H_ + +int Keccak256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); +int Keccak256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); + +int Keccak512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); +int Keccak512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes); + +#endif + diff --git a/main.c b/main.c index a2e48fc..374d44f 100644 --- a/main.c +++ b/main.c @@ -528,8 +528,14 @@ redo: tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data, tdat->props); } else { + DEBUG_STAT_EN(double strt, en); + + DEBUG_STAT_EN(strt = get_wtime_millis()); rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data); + DEBUG_STAT_EN(en = get_wtime_millis()); + DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n", + get_mb_s(_chunksize, strt, en))); } } else { memcpy(tdat->uncompressed_chunk, cseg, _chunksize);