/* * This file is a part of Pcompress, a chunked parallel multi- * algorithm lossless compression and decompression program. * * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved. * Use is subject to license terms. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program. * If not, see . * * moinakg@belenix.org, http://moinakg.wordpress.com/ */ #include #include #include #include #include #include #if defined(_OPENMP) #include #endif #include #define BLKSZ (2048) /* * Helper functions for single-call SHA2 hashing. Both serial and * parallel versions are provided. Parallel versions use 2-stage * Merkle Tree hashing. * * At the leaf level data is split into BLKSZ blocks and 4 threads * compute 4 hashes of interleaved block streams. At 2nd level two * new hashes are generated from hashing the 2 pairs of hash values. * In the final stage the 2 hash values are hashed to the final digest. * * References: * http://eprint.iacr.org/2012/476.pdf * http://gva.noekeon.org/papers/bdpv09tree.html */ void ossl_SHA256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) { SHA256_CTX ctx; SHA256_Init(&ctx); SHA256_Update(&ctx, buf, bytes); SHA256_Final(cksum_buf, &ctx); } void ossl_SHA256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) { uchar_t cksum[6][32]; SHA256_CTX ctx[4]; int i, rem; uint64_t _bytes; /* * Is it worth doing the overhead of parallelism ? Buffer large enough ? * If not then just do a simple serial hashing. */ if (bytes <= BLKSZ * 2) { SHA256_Init(&ctx[0]); SHA256_Update(&ctx[0], buf, bytes); SHA256_Final(cksum_buf, &ctx[0]); return; } /* * Do first level hashes in parallel. */ _bytes = (bytes / BLKSZ) * BLKSZ; rem = bytes - _bytes; #if defined(_OPENMP) # pragma omp parallel for #endif for(i = 0; i < 4; ++i) { uint64_t byt; byt = i * BLKSZ; SHA256_Init(&ctx[i]); while (byt < _bytes) { SHA256_Update(&ctx[i], buf + byt, BLKSZ); byt += 4 * BLKSZ; } if (i>0) SHA256_Final(cksum[i], &ctx[i]); } if (rem > 0) { SHA256_Update(&ctx[0], buf + bytes - rem, rem); } SHA256_Final(cksum[0], &ctx[0]); /* * Second level hashes. */ SHA256_Init(&ctx[0]); SHA256_Init(&ctx[1]); SHA256_Update(&ctx[0], &cksum[0], 2 * 32); SHA256_Update(&ctx[1], &cksum[1], 2 * 32); SHA256_Final(cksum[4], &ctx[0]); SHA256_Final(cksum[5], &ctx[1]); /* * Final hash. */ SHA256_Init(&ctx[0]); SHA256_Update(&ctx[0], &cksum[4], 2 * 32); SHA256_Final(cksum_buf, &ctx[0]); } void ossl_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) { SHA512_CTX ctx; SHA512_Init(&ctx); SHA512_Update(&ctx, buf, bytes); SHA512_Final(cksum_buf, &ctx); } void ossl_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) { uchar_t cksum[6][32]; SHA512_CTX ctx[4]; int i, rem; uint64_t _bytes; /* * Is it worth doing the overhead of parallelism ? Buffer large enough ? * If not then just do a simple serial hashing. */ if (bytes <= BLKSZ * 2) { SHA512_Init(&ctx[0]); SHA512_Update(&ctx[0], buf, bytes); SHA512_Final(cksum_buf, &ctx[0]); return; } /* * Do first level hashes in parallel. */ _bytes = (bytes / BLKSZ) * BLKSZ; rem = bytes - _bytes; #if defined(_OPENMP) # pragma omp parallel for #endif for(i = 0; i < 4; ++i) { uint64_t byt; byt = i * BLKSZ; SHA512_Init(&ctx[i]); while (byt < _bytes) { SHA512_Update(&ctx[i], buf + byt, BLKSZ); byt += 4 * BLKSZ; } if (i>0) SHA512_Final(cksum[i], &ctx[i]); } if (rem > 0) { SHA512_Update(&ctx[0], buf + bytes - rem, rem); } SHA512_Final(cksum[0], &ctx[0]); /* * Second level hashes. */ SHA512_Init(&ctx[0]); SHA512_Init(&ctx[1]); SHA512_Update(&ctx[0], &cksum[0], 2 * 32); SHA512_Update(&ctx[1], &cksum[1], 2 * 32); SHA512_Final(cksum[4], &ctx[0]); SHA512_Final(cksum[5], &ctx[1]); /* * Final hash. */ SHA512_Init(&ctx[0]); SHA512_Update(&ctx[0], &cksum[4], 2 * 32); SHA512_Final(cksum_buf, &ctx[0]); } void opt_SHA512t256(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) { SHA512_Context ctx; opt_SHA512t256_Init(&ctx); opt_SHA512t256_Update(&ctx, buf, bytes); opt_SHA512t256_Final(&ctx, cksum_buf); } void opt_SHA512t256_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) { uchar_t cksum[6][32]; SHA512_Context ctx[4]; int i, rem; uint64_t _bytes; /* * Is it worth doing the overhead of parallelism ? Buffer large enough ? * If not then just do a simple serial hashing. */ if (bytes <= BLKSZ * 2) { opt_SHA512t256_Init(&ctx[0]); opt_SHA512t256_Update(&ctx[0], buf, bytes); opt_SHA512t256_Final(&ctx[0], cksum_buf); return; } /* * Do first level hashes in parallel. */ _bytes = (bytes / BLKSZ) * BLKSZ; rem = bytes - _bytes; #if defined(_OPENMP) # pragma omp parallel for #endif for(i = 0; i < 4; ++i) { uint64_t byt; byt = i * BLKSZ; opt_SHA512t256_Init(&ctx[i]); while (byt < _bytes) { opt_SHA512t256_Update(&ctx[i], buf + byt, BLKSZ); byt += 4 * BLKSZ; } if (i>0) opt_SHA512t256_Final(&ctx[i], cksum[i]); } if (rem > 0) { opt_SHA512t256_Update(&ctx[0], buf + bytes - rem, rem); } opt_SHA512t256_Final(&ctx[0], cksum[0]); /* * Second level hashes. */ opt_SHA512t256_Init(&ctx[0]); opt_SHA512t256_Init(&ctx[1]); opt_SHA512t256_Update(&ctx[0], &cksum[0], 2 * 32); opt_SHA512t256_Update(&ctx[1], &cksum[1], 2 * 32); opt_SHA512t256_Final(&ctx[0], cksum[4]); opt_SHA512t256_Final(&ctx[1], cksum[5]); /* * Final hash. */ opt_SHA512t256_Init(&ctx[0]); opt_SHA512t256_Update(&ctx[0], &cksum[4], 2 * 32); opt_SHA512t256_Final(&ctx[0], cksum_buf); } void opt_SHA512(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) { SHA512_Context ctx; opt_SHA512_Init(&ctx); opt_SHA512_Update(&ctx, buf, bytes); opt_SHA512_Final(&ctx, cksum_buf); } void opt_SHA512_par(uchar_t *cksum_buf, uchar_t *buf, uint64_t bytes) { uchar_t cksum[6][64]; SHA512_Context ctx[4]; int i, rem; uint64_t _bytes; /* * Is it worth doing the overhead of parallelism ? Buffer large enough ? * If not then just do a simple serial hashing. */ if (bytes <= BLKSZ * 2) { opt_SHA512_Init(&ctx[0]); opt_SHA512_Update(&ctx[0], buf, bytes); opt_SHA512_Final(&ctx[0], cksum_buf); return; } /* * Do first level hashes in parallel. */ _bytes = (bytes / BLKSZ) * BLKSZ; rem = bytes - _bytes; #if defined(_OPENMP) # pragma omp parallel for #endif for(i = 0; i < 4; ++i) { uint64_t byt; byt = i * BLKSZ; opt_SHA512_Init(&ctx[i]); while (byt < _bytes) { opt_SHA512_Update(&ctx[i], buf + byt, BLKSZ); byt += 4 * BLKSZ; } if (i>0) opt_SHA512_Final(&ctx[i], cksum[i]); } if (rem > 0) { opt_SHA512_Update(&ctx[0], buf + bytes - rem, rem); } opt_SHA512_Final(&ctx[0], cksum[0]); /* * Second level hashes. */ opt_SHA512_Init(&ctx[0]); opt_SHA512_Init(&ctx[1]); opt_SHA512_Update(&ctx[0], &cksum[0], 2 * 64); opt_SHA512_Update(&ctx[1], &cksum[1], 2 * 64); opt_SHA512_Final(&ctx[0], cksum[4]); opt_SHA512_Final(&ctx[1], cksum[5]); /* * Final hash. */ opt_SHA512_Init(&ctx[0]); opt_SHA512_Update(&ctx[0], &cksum[4], 2 * 64); opt_SHA512_Final(&ctx[0], cksum_buf); }