Add parallel versions of various checksums for single-segment, single-thread compression.

This commit is contained in:
Moinak Ghosh 2013-01-27 23:47:55 +05:30
parent 2da0d0950b
commit 468044d816
9 changed files with 461 additions and 44 deletions

View file

@ -30,10 +30,11 @@ MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h
MAINOBJS = $(MAINSRCS:.c=.o)
CRYPTO_SRCS = crypto/aes/crypto_aes.c crypto/scrypt/crypto_scrypt-nosse.c \
crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c
crypto/scrypt/sha256.c crypto/scrypt/crypto_aesctr.c crypto/crypto_utils.c \
crypto/sha2_utils.c crypto/sha3_utils.c
CRYPTO_HDRS = crypto/crypto_utils.h crypto/scrypt/crypto_scrypt.h \
crypto/scrypt/sha256.h crypto/scrypt/crypto_aesctr.h crypto/aes/crypto_aes.h \
$(MAINHDRS)
crypto/sha2_utils.h crypto/sha3_utils.h $(MAINHDRS)
CRYPTO_OBJS = $(CRYPTO_SRCS:.c=.o)
CRYPTO_CPPFLAGS=-I@OPENSSL_INCDIR@

View file

@ -43,6 +43,8 @@
#include <utils.h>
#include "crypto_utils.h"
#include "sha2_utils.h"
#include "sha3_utils.h"
#define PROVIDER_OPENSSL 0
#define PROVIDER_X64_OPT 1
@ -91,6 +93,9 @@ extern uint64_t lzma_crc64_8bchk(const uint8_t *buf, uint64_t size,
uint64_t crc, uint64_t *cnt);
#ifdef __OSSL_OLD__
/*
* The two functions below fill missing functionality in older versions of OpenSSL.
*/
int
HMAC_CTX_copy(HMAC_CTX *dctx, HMAC_CTX *sctx)
{
@ -163,6 +168,14 @@ PKCS5_PBKDF2_HMAC(const char *pass, int passlen,
}
#endif
/*
* Compute a digest of the given data segment. The parameter mt indicates whether
* to use the parallel(OpenMP) versions. Parallel versions are only used when
* a single segment is used to hold the entire file - essentially a single-threaded
* compression.
* In other cases segments are handled in separate threads any way and we do not
* need or want another level of parallelism to cause contention.
*/
int
compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, int mt)
{
@ -181,7 +194,6 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in
if (bdsp.blake2bp(cksum_buf, buf, NULL, 32, bytes, 0) != 0)
return (-1);
}
} else if (cksum == CKSUM_BLAKE512) {
if (!mt) {
if (bdsp.blake2b(cksum_buf, buf, NULL, 64, bytes, 0) != 0)
@ -190,7 +202,10 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in
if (bdsp.blake2bp(cksum_buf, buf, NULL, 64, bytes, 0) != 0)
return (-1);
}
/*
* No parallelism for SKEIN. It is deprecated and retained here only for
* backwards compatiblity.
*/
} else if (cksum == CKSUM_SKEIN256) {
Skein_512_Ctxt_t ctx;
@ -207,40 +222,44 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, uint64_t bytes, in
} else if (cksum == CKSUM_SHA256) {
if (cksum_provider == PROVIDER_OPENSSL) {
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, buf, bytes);
SHA256_Final(cksum_buf, &ctx);
if (!mt)
ossl_SHA256(cksum_buf, buf, bytes);
else
ossl_SHA256_par(cksum_buf, buf, bytes);
} else {
SHA512_Context ctx;
opt_SHA512t256_Init(&ctx);
opt_SHA512t256_Update(&ctx, buf, bytes);
opt_SHA512t256_Final(&ctx, cksum_buf);
if (!mt)
opt_SHA512t256(cksum_buf, buf, bytes);
else
opt_SHA512t256_par(cksum_buf, buf, bytes);
}
} else if (cksum == CKSUM_SHA512) {
if (cksum_provider == PROVIDER_OPENSSL) {
SHA512_CTX ctx;
SHA512_Init(&ctx);
SHA512_Update(&ctx, buf, bytes);
SHA512_Final(cksum_buf, &ctx);
if (!mt)
ossl_SHA512(cksum_buf, buf, bytes);
else
ossl_SHA512_par(cksum_buf, buf, bytes);
} else {
SHA512_Context ctx;
opt_SHA512_Init(&ctx);
opt_SHA512_Update(&ctx, buf, bytes);
opt_SHA512_Final(&ctx, cksum_buf);
if (!mt)
opt_SHA512(cksum_buf, buf, bytes);
else
opt_SHA512_par(cksum_buf, buf, bytes);
}
} else if (cksum == CKSUM_KECCAK256) {
if (Keccak_Hash(256, buf, bytes * 8, cksum_buf) != 0)
return (-1);
if (!mt) {
if (Keccak256(cksum_buf, buf, bytes) != 0)
return (-1);
} else {
if (Keccak256_par(cksum_buf, buf, bytes) != 0)
return (-1);
}
} else if (cksum == CKSUM_KECCAK512) {
if (Keccak_Hash(512, buf, bytes * 8, cksum_buf) != 0)
return (-1);
if (!mt) {
if (Keccak512(cksum_buf, buf, bytes) != 0)
return (-1);
} else {
if (Keccak512_par(cksum_buf, buf, bytes) != 0)
return (-1);
}
} else {
return (-1);
}
@ -339,8 +358,8 @@ deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes)
}
/*
* Perform keyed hashing. With Skein, HMAC is not used, rather Skein's
* native MAC is used which is more optimal than HMAC.
* Perform keyed hashing. With Skein/Blake/Keccak, HMAC is not used, rather
* their native MAC features are used which are more optimal than HMAC.
*/
int
hmac_init(mac_ctx_t *mctx, int cksum, crypto_ctx_t *cctx)
@ -658,6 +677,9 @@ hmac_cleanup(mac_ctx_t *mctx)
return (0);
}
/*
* Encryption related functions.
*/
int
init_crypto(crypto_ctx_t *cctx, uchar_t *pwd, int pwd_len, int crypto_alg,
uchar_t *salt, int saltlen, uint64_t nonce, int enc_dec)
@ -814,6 +836,9 @@ err0:
return (4);
}
/*
* Input password string from terminal without echoing.
*/
int
get_pw_string(uchar_t pw[MAX_PW_LEN], const char *prompt, int twice)
{

View file

@ -61,7 +61,7 @@
typedef void (*update_func_ptr)(const void *input_data, void *digest, uint64_t num_blks);
static const uint8_t padding[128] = {
static const uint8_t padding[SHA512_BLOCK_SIZE] = {
0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -157,7 +157,7 @@ APS_NAMESPACE(SHA512_Update) (SHA512_Context *sc, const void *vdata, size_t len)
if (sc->bufferLength) {
do {
bufferBytesLeft = 128L - sc->bufferLength;
bufferBytesLeft = SHA512_BLOCK_SIZE - sc->bufferLength;
bytesToCopy = bufferBytesLeft;
if (bytesToCopy > len)
bytesToCopy = len;
@ -172,14 +172,14 @@ APS_NAMESPACE(SHA512_Update) (SHA512_Context *sc, const void *vdata, size_t len)
data += bytesToCopy;
len -= bytesToCopy;
if (sc->bufferLength == 128L) {
if (sc->bufferLength == SHA512_BLOCK_SIZE) {
sc->blocks = 1;
sha512_update_func(sc->buffer.words, sc->hash, sc->blocks);
sc->bufferLength = 0L;
} else {
return;
}
} while (len > 0 && len <= 128L);
} while (len > 0 && len <= SHA512_BLOCK_SIZE);
if (!len) return;
}
@ -218,8 +218,8 @@ _final (SHA512_Context *sc, uint8_t *hash, int hashWords, int halfWord)
int i;
bytesToPad = 240L - sc->bufferLength;
if (bytesToPad > 128L)
bytesToPad -= 128L;
if (bytesToPad > SHA512_BLOCK_SIZE)
bytesToPad -= SHA512_BLOCK_SIZE;
lengthPad[0] = BYTESWAP64(sc->totalLength[0]);
lengthPad[1] = BYTESWAP64(sc->totalLength[1]);
@ -258,7 +258,7 @@ APS_NAMESPACE(SHA512t256_Final) (SHA512_Context *sc, uint8_t hash[SHA512t256_HAS
#define HASH_UPDATE APS_NAMESPACE(SHA512_Update)
#define HASH_FINAL APS_NAMESPACE(SHA512_Final)
#define HASH_SIZE SHA512_HASH_SIZE
#define HASH_BLOCK_SIZE 128
#define HASH_BLOCK_SIZE SHA512_BLOCK_SIZE
#define HMAC_CONTEXT HMAC_SHA512_Context
#define HMAC_INIT APS_NAMESPACE(HMAC_SHA512_Init)
@ -283,7 +283,7 @@ APS_NAMESPACE(SHA512t256_Final) (SHA512_Context *sc, uint8_t hash[SHA512t256_HAS
#define HASH_UPDATE APS_NAMESPACE(SHA512t256_Update)
#define HASH_FINAL APS_NAMESPACE(SHA512t256_Final)
#define HASH_SIZE SHA512t256_HASH_SIZE
#define HASH_BLOCK_SIZE 128
#define HASH_BLOCK_SIZE SHA512_BLOCK_SIZE
#define HMAC_CONTEXT HMAC_SHA512_Context
#define HMAC_INIT APS_NAMESPACE(HMAC_SHA512t256_Init)

View file

@ -38,8 +38,9 @@
#include <utils.h>
#define SHA512_HASH_SIZE 64
#define SHA512t256_HASH_SIZE 32
#define SHA512_HASH_SIZE 64
#define SHA512t256_HASH_SIZE 32
#define SHA512_BLOCK_SIZE 128L
/* Hash size in 64-bit words */
#define SHA512_HASH_WORDS 8
@ -50,8 +51,8 @@ typedef struct _SHA512_Context {
uint64_t hash[SHA512_HASH_WORDS];
uint32_t bufferLength;
union {
uint64_t words[16];
uint8_t bytes[128];
uint64_t words[SHA512_BLOCK_SIZE/8];
uint8_t bytes[SHA512_BLOCK_SIZE];
} buffer;
} SHA512_Context;

245
crypto/sha2_utils.c Normal file
View file

@ -0,0 +1,245 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
#include <sys/types.h>
#include <stdlib.h>
#include <openssl/sha.h>
#include <sha512.h>
#include <stdio.h>
#include <string.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include <utils.h>
/*
* Helper functions for single-call SHA2 hashing. Both serial and
* parallel versions are provided.
*/
void
ossl_SHA256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, buf, bytes);
SHA256_Final(cksum_buf, &ctx);
}
void
ossl_SHA256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
uchar_t *pos[2];
uint64_t len[2];
uchar_t cksum[2][32];
int i;
SHA256_CTX *mctx;
/*
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
* If not then just do a simple serial hashing.
*/
if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) {
mctx = (SHA256_CTX *)malloc(sizeof (SHA256_CTX));
SHA256_Init(mctx);
SHA256_Update(mctx, buf, bytes);
SHA256_Final(cksum_buf, mctx);
free(mctx);
return;
}
pos[0] = buf;
len[0] = bytes/2;
buf += bytes/2;
pos[1] = buf;
len[1] = bytes - bytes/2;
#if defined(_OPENMP)
# pragma omp parallel for
#endif
for(i = 0; i < 2; ++i)
{
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, pos[i], len[i]);
SHA256_Final(cksum[i], &ctx);
}
mctx = (SHA256_CTX *)malloc(sizeof (SHA256_CTX));
SHA256_Init(mctx);
SHA256_Update(mctx, cksum, 2 * 32);
SHA256_Final(cksum_buf, mctx);
free(mctx);
}
void
ossl_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
SHA512_CTX ctx;
SHA512_Init(&ctx);
SHA512_Update(&ctx, buf, bytes);
SHA512_Final(cksum_buf, &ctx);
}
void
ossl_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
uchar_t *pos[2];
uint64_t len[2];
uchar_t cksum[2][64];
int i;
SHA512_CTX *mctx;
/*
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
* If not then just do a simple hashing.
*/
if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) {
mctx = (SHA512_CTX *)malloc(sizeof (SHA512_CTX));
SHA512_Init(mctx);
SHA512_Update(mctx, buf, bytes);
SHA512_Final(cksum_buf, mctx);
free(mctx);
return;
}
pos[0] = buf;
len[0] = bytes/2;
pos[1] = buf + bytes/2;
len[1] = bytes - bytes/2;
#if defined(_OPENMP)
# pragma omp parallel for
#endif
for(i = 0; i < 2; ++i)
{
SHA512_CTX ctx;
SHA512_Init(&ctx);
SHA512_Update(&ctx, pos[i], len[i]);
SHA512_Final(cksum[i], &ctx);
}
mctx = (SHA512_CTX *)malloc(sizeof (SHA512_CTX));
SHA512_Init(mctx);
SHA512_Update(mctx, cksum, 2 * 64);
SHA512_Final(cksum_buf, mctx);
free(mctx);
}
void
opt_SHA512t256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
SHA512_Context ctx;
opt_SHA512t256_Init(&ctx);
opt_SHA512t256_Update(&ctx, buf, bytes);
opt_SHA512t256_Final(&ctx, cksum_buf);
}
void
opt_SHA512t256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
uchar_t *pos[2];
uint64_t len[2];
uchar_t cksum[2][32];
int i;
SHA512_Context *mctx;
/*
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
* If not then just do a simple serial hashing.
*/
if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) {
mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context));
opt_SHA512t256_Init(mctx);
opt_SHA512t256_Update(mctx, buf, bytes);
opt_SHA512t256_Final(mctx, cksum_buf);
free(mctx);
return;
}
pos[0] = buf;
len[0] = bytes/2;
pos[1] = buf + bytes/2;
len[1] = bytes - bytes/2;
#if defined(_OPENMP)
# pragma omp parallel for
#endif
for(i = 0; i < 2; ++i)
{
SHA512_Context ctx;
opt_SHA512t256_Init(&ctx);
opt_SHA512t256_Update(&ctx, pos[i], len[i]);
opt_SHA512t256_Final(&ctx, cksum[i]);
}
mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context));
opt_SHA512t256_Init(mctx);
opt_SHA512t256_Update(mctx, cksum, 2 * 32);
opt_SHA512t256_Final(mctx, cksum_buf);
free(mctx);
}
void
opt_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
SHA512_Context ctx;
opt_SHA512_Init(&ctx);
opt_SHA512_Update(&ctx, buf, bytes);
opt_SHA512_Final(&ctx, cksum_buf);
}
void
opt_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
uchar_t *pos[2];
uint64_t len[2];
uchar_t cksum[2][64];
int i;
SHA512_Context *mctx;
/*
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
* If not then just do a simple serial hashing.
*/
if (bytes / 2 <= SHA512_BLOCK_SIZE * 4) {
mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context));
opt_SHA512_Init(mctx);
opt_SHA512_Update(mctx, buf, bytes);
opt_SHA512_Final(mctx, cksum_buf);
free(mctx);
return;
}
pos[0] = buf;
len[0] = bytes/2;
pos[1] = buf + bytes/2;
len[1] = bytes - bytes/2;
#if defined(_OPENMP)
# pragma omp parallel for
#endif
for(i = 0; i < 2; ++i)
{
SHA512_Context ctx;
opt_SHA512_Init(&ctx);
opt_SHA512_Update(&ctx, pos[i], len[i]);
opt_SHA512_Final(&ctx, cksum[i]);
}
mctx = (SHA512_Context *)malloc(sizeof (SHA512_Context));
opt_SHA512_Init(mctx);
opt_SHA512_Update(mctx, cksum, 2 * 64);
opt_SHA512_Final(mctx, cksum_buf);
free(mctx);
}

15
crypto/sha2_utils.h Normal file
View file

@ -0,0 +1,15 @@
#ifndef _SHA2_UTILS_H_
#define _SHA2_UTILS_H_
void ossl_SHA256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
void ossl_SHA256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
void opt_SHA512t256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
void opt_SHA512t256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
void ossl_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
void ossl_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
void opt_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
void opt_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
#endif

113
crypto/sha3_utils.c Normal file
View file

@ -0,0 +1,113 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
#include <sys/types.h>
#include <KeccakNISTInterface.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include <utils.h>
#define KECCAK_BLOCK_SIZE 1024
/*
* Helper functions for single-call SHA3 (Keccak) hashing. Both serial
* and parallel versions are provided.
*/
int
Keccak256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
return (Keccak_Hash(256, buf, bytes * 8, cksum_buf));
}
int
Keccak256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
uchar_t *pos[2];
uint64_t len[2];
uchar_t cksum[2][32];
int i, rv[2];
/*
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
* If not then just do a simple serial hashing.
*/
if (bytes / 2 <= KECCAK_BLOCK_SIZE * 2) {
Keccak_Hash(256, buf, bytes * 8, cksum_buf);
return;
}
pos[0] = buf;
len[0] = bytes/2;
pos[1] = buf + bytes/2;
len[1] = bytes - bytes/2;
#if defined(_OPENMP)
# pragma omp parallel for
#endif
for(i = 0; i < 2; ++i)
{
rv[i] = Keccak_Hash(256, pos[i], len[i] * 8, cksum[i]);
}
if (rv[0] != 0 || rv[1] != 0)
return (-1);
return (Keccak_Hash(256, (const BitSequence *)cksum, 2 * 32 * 8, cksum_buf));
}
int
Keccak512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
return (Keccak_Hash(512, buf, bytes * 8, cksum_buf));
}
int
Keccak512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes)
{
uchar_t *pos[2];
uint64_t len[2];
uchar_t cksum[2][64];
int i, rv[2];
/*
* Is it worth doing the overhead of parallelism ? Buffer large enough ?
* If not then just do a simple serial hashing.
*/
if (bytes / 2 <= KECCAK_BLOCK_SIZE * 2) {
Keccak_Hash(512, buf, bytes * 8, cksum_buf);
return;
}
pos[0] = buf;
len[0] = bytes/2;
pos[1] = buf + bytes/2;
len[1] = bytes - bytes/2;
#if defined(_OPENMP)
# pragma omp parallel for
#endif
for(i = 0; i < 2; ++i)
{
rv[i] = Keccak_Hash(512, pos[i], len[i] * 8, cksum[i]);
}
if (rv[0] != 0 || rv[1] != 0)
return (-1);
return (Keccak_Hash(512, (const BitSequence *)cksum, 2 * 64 * 8, cksum_buf));
}

11
crypto/sha3_utils.h Normal file
View file

@ -0,0 +1,11 @@
#ifndef _SHA3_UTILS_H_
#define _SHA3_UTILS_H_
int Keccak256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
int Keccak256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
int Keccak512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
int Keccak512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes);
#endif

6
main.c
View file

@ -528,8 +528,14 @@ redo:
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data,
tdat->props);
} else {
DEBUG_STAT_EN(double strt, en);
DEBUG_STAT_EN(strt = get_wtime_millis());
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
&_chunksize, tdat->level, HDR, tdat->data);
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n",
get_mb_s(_chunksize, strt, en)));
}
} else {
memcpy(tdat->uncompressed_chunk, cseg, _chunksize);