/* * This file is a part of Pcompress, a chunked parallel multi- * algorithm lossless compression and decompression program. * * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved. * Use is subject to license terms. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program. * If not, see . * * moinakg@belenix.org, http://moinakg.wordpress.com/ * */ /*-----------------------------------------------------------*/ /* Block Sorting, Lossless Data Compression Library. */ /* Lempel Ziv Prediction */ /*-----------------------------------------------------------*/ /*-- This file is a part of bsc and/or libbsc, a program and a library for lossless, block-sorting data compression. Copyright (c) 2009-2012 Ilya Grebnov Copyright (c) 2012 Moinak Ghosh See file AUTHORS for a full list of contributors. The bsc and libbsc is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The bsc and libbsc is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the bsc and libbsc. If not, see http://www.gnu.org/licenses/. Please see the files COPYING and COPYING.LIB for full copyright information. See also the bsc and libbsc web site: http://libbsc.com/ for more information. --*/ /* * TODO: Port the parallel implementation. */ #undef LZP_OPENMP #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 #endif #include #include #include #include #include #include #include #include "lzp.h" #define LZP_MATCH_FLAG 0xf2 static inline int bsc_lzp_num_blocks(int64_t n) { int nb; if (n < 256LL * 1024LL) return 1; if (n < 4LL * 1024LL * 1024LL) return 2; if (n < 16LL * 1024LL * 1024LL) return 4; if (n < LZP_MAX_BLOCK) return 8; nb = n / LZP_MAX_BLOCK; if (n % LZP_MAX_BLOCK) nb++; return (nb); } static int bsc_lzp_encode_block(const unsigned char * input, const unsigned char * inputEnd, unsigned char * output, unsigned char * outputEnd, int hashSize, int minLen) { int *lookup, i; if (inputEnd - input < 16) { return LZP_NOT_COMPRESSIBLE; } lookup = (int *)slab_calloc(NULL, (int)(1 << hashSize), sizeof(int)); if (lookup) { unsigned int mask = (int)(1 << hashSize) - 1; const unsigned char * inputStart = input; const unsigned char * outputStart = output; const unsigned char * outputEOB = outputEnd - 4; unsigned int context = 0; for (i = 0; i < 4; ++i) { context = (context << 8) | (*output++ = *input++); } const unsigned char * heuristic = input; const unsigned char * inputMinLenEnd = inputEnd - minLen - 8; while ((input < inputMinLenEnd) && (output < outputEOB)) { unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; int value = lookup[index]; lookup[index] = (int)(input - inputStart); if (value > 0) { const unsigned char * reference = inputStart + value; if ((*(unsigned int *)(input + minLen - 4) == *(unsigned int *)(reference + minLen - 4)) && (*(unsigned int *)(input) == *(unsigned int *)(reference))) { if ((heuristic > input) && (*(unsigned int *)heuristic != *(unsigned int *)(reference + (heuristic - input)))) { goto LZP_MATCH_NOT_FOUND; } int len = 4; for (; input + len < inputMinLenEnd; len += 4) { if (*(unsigned int *)(input + len) != *(unsigned int *)(reference + len)) break; } if (len < minLen) { if (heuristic < input + len) heuristic = input + len; goto LZP_MATCH_NOT_FOUND; } if (input[len] == reference[len]) len++; if (input[len] == reference[len]) len++; if (input[len] == reference[len]) len++; input += len; context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24); *output++ = LZP_MATCH_FLAG; len -= minLen; while (len >= 254) { len -= 254; *output++ = 254; if (output >= outputEOB) break; } *output++ = (unsigned char)(len); } else { unsigned char next; LZP_MATCH_NOT_FOUND: next = *output++ = *input++; context = (context << 8) | next; if (next == LZP_MATCH_FLAG) *output++ = 255; } } else { context = (context << 8) | (*output++ = *input++); } } while ((input < inputEnd) && (output < outputEOB)) { unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; int value = lookup[index]; lookup[index] = (int)(input - inputStart); if (value > 0) { unsigned char next = *output++ = *input++; context = (context << 8) | next; if (next == LZP_MATCH_FLAG) *output++ = 255; } else { context = (context << 8) | (*output++ = *input++); } } slab_free(NULL, lookup); return (output >= outputEOB) ? LZP_NOT_COMPRESSIBLE : (int)(output - outputStart); } return LZP_NOT_ENOUGH_MEMORY; } static int bsc_lzp_decode_block(const unsigned char * input, const unsigned char * inputEnd, unsigned char * output, int hashSize, int minLen) { int *lookup, i; if (inputEnd - input < 4) { return LZP_UNEXPECTED_EOB; } lookup = (int *)slab_calloc(NULL, (int)(1 << hashSize), sizeof(int)); if (lookup) { unsigned int mask = (int)(1 << hashSize) - 1; const unsigned char * outputStart = output; unsigned int context = 0; for (i = 0; i < 4; ++i) { context = (context << 8) | (*output++ = *input++); } while (input < inputEnd) { unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; int value = lookup[index]; lookup[index] = (int)(output - outputStart); if (*input == LZP_MATCH_FLAG && value > 0) { input++; if (*input != 255) { int len = minLen; while (1) { len += *input; if (*input++ != 254) break; } const unsigned char * reference = outputStart + value; unsigned char * outputEnd = output + len; if (output - reference < 4) { int offset[4] = {0, 3, 2, 3}; *output++ = *reference++; *output++ = *reference++; *output++ = *reference++; *output++ = *reference++; reference -= offset[output - reference]; } while (output < outputEnd) { *(unsigned int *)output = *(unsigned int*)reference; output += 4; reference += 4; } output = outputEnd; context = output[-1] | (output[-2] << 8) | (output[-3] << 16) | (output[-4] << 24); } else { input++; context = (context << 8) | (*output++ = LZP_MATCH_FLAG); } } else { context = (context << 8) | (*output++ = *input++); } } slab_free(NULL, lookup); return (int)(output - outputStart); } return LZP_NOT_ENOUGH_MEMORY; } static int64_t bsc_lzp_compress_serial(const unsigned char * input, unsigned char * output, int64_t n, int hashSize, int minLen) { if (bsc_lzp_num_blocks(n) == 1) { int result = bsc_lzp_encode_block(input, input + n, output + 1, output + n - 1, hashSize, minLen); if (result >= LZP_NO_ERROR) result = (output[0] = 1, result + 1); return result; } int nBlocks = bsc_lzp_num_blocks(n); int chunkSize; int blockId; int64_t outputPtr = 1 + 8 * nBlocks; DEBUG_STAT_EN(double strt, en); DEBUG_STAT_EN(strt = get_wtime_millis()); if (n > LZP_MAX_BLOCK) chunkSize = LZP_MAX_BLOCK; else chunkSize = n / nBlocks; output[0] = nBlocks; for (blockId = 0; blockId < nBlocks; ++blockId) { int64_t inputStart = blockId * chunkSize; int inputSize = blockId != nBlocks - 1 ? chunkSize : n - inputStart; int outputSize = inputSize; if (outputSize > n - outputPtr) outputSize = n - outputPtr; int result = bsc_lzp_encode_block(input + inputStart, input + inputStart + inputSize, output + outputPtr, output + outputPtr + outputSize, hashSize, minLen); if (result < LZP_NO_ERROR) { if (outputPtr + inputSize >= n) return LZP_NOT_COMPRESSIBLE; result = inputSize; memcpy(output + outputPtr, input + inputStart, inputSize); } *(int *)(output + 1 + 8 * blockId + 0) = inputSize; *(int *)(output + 1 + 8 * blockId + 4) = result; outputPtr += result; } DEBUG_STAT_EN(en = get_wtime_millis()); DEBUG_STAT_EN(fprintf(stderr, "LZP: Insize: %" PRId64 ", Outsize: %" PRId64 "\n", n, outputPtr)); DEBUG_STAT_EN(fprintf(stderr, "LZP: Processed at %.3f MB/s\n", get_mb_s(n, strt, en))); return outputPtr; } #ifdef LZP_OPENMP static int bsc_lzp_compress_parallel(const unsigned char * input, unsigned char * output, int64_t n, int hashSize, int minLen) { if (unsigned char * buffer = (unsigned char *)bsc_malloc(n * sizeof(unsigned char))) { int compressionResult[ALPHABET_SIZE]; int nBlocks = bsc_lzp_num_blocks(n); int result = LZP_NO_ERROR; int chunkSize = n / nBlocks; int numThreads = omp_get_max_threads(); if (numThreads > nBlocks) numThreads = nBlocks; output[0] = nBlocks; #pragma omp parallel num_threads(numThreads) if(numThreads > 1) { if (omp_get_num_threads() == 1) { result = bsc_lzp_compress_serial(input, output, n, hashSize, minLen); } else { #pragma omp for schedule(dynamic) for (int blockId = 0; blockId < nBlocks; ++blockId) { int blockStart = blockId * chunkSize; int blockSize = blockId != nBlocks - 1 ? chunkSize : n - blockStart; compressionResult[blockId] = bsc_lzp_encode_block(input + blockStart, input + blockStart + blockSize, buffer + blockStart, buffer + blockStart + blockSize, hashSize, minLen); if (compressionResult[blockId] < LZP_NO_ERROR) compressionResult[blockId] = blockSize; *(int *)(output + 1 + 8 * blockId + 0) = blockSize; *(int *)(output + 1 + 8 * blockId + 4) = compressionResult[blockId]; } #pragma omp single { result = 1 + 8 * nBlocks; for (int blockId = 0; blockId < nBlocks; ++blockId) { result += compressionResult[blockId]; } if (result >= n) result = LZP_NOT_COMPRESSIBLE; } if (result >= LZP_NO_ERROR) { #pragma omp for schedule(dynamic) for (int blockId = 0; blockId < nBlocks; ++blockId) { int blockStart = blockId * chunkSize; int blockSize = blockId != nBlocks - 1 ? chunkSize : n - blockStart; int outputPtr = 1 + 8 * nBlocks; for (int p = 0; p < blockId; ++p) outputPtr += compressionResult[p]; if (compressionResult[blockId] != blockSize) { memcpy(output + outputPtr, buffer + blockStart, compressionResult[blockId]); } else { memcpy(output + outputPtr, input + blockStart, compressionResult[blockId]); } } } } } bsc_free(buffer); return result; } return LZP_NOT_ENOUGH_MEMORY; } #endif int64_t lzp_compress(const unsigned char * input, unsigned char * output, int64_t n, int hashSize, int minLen, int features) { #ifdef LZP_OPENMP if ((bsc_lzp_num_blocks(n) != 1) && (features & LZP_FEATURE_MULTITHREADING)) { return bsc_lzp_compress_parallel(input, output, n, hashSize, minLen); } #endif return bsc_lzp_compress_serial(input, output, n, hashSize, minLen); } int64_t lzp_decompress(const unsigned char * input, unsigned char * output, int64_t n, int hashSize, int minLen, int features) { int nBlocks = input[0]; if (nBlocks == 1) { return bsc_lzp_decode_block(input + 1, input + n, output, hashSize, minLen); } int decompressionResult[ALPHABET_SIZE]; #ifdef LZP_OPENMP if (features & LZP_FEATURE_MULTITHREADING) { #pragma omp parallel for schedule(dynamic) for (int blockId = 0; blockId < nBlocks; ++blockId) { int inputPtr = 0; for (int p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4); int outputPtr = 0; for (int p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0); inputPtr += 1 + 8 * nBlocks; int inputSize = *(int *)(input + 1 + 8 * blockId + 4); int outputSize = *(int *)(input + 1 + 8 * blockId + 0); if (inputSize != outputSize) { decompressionResult[blockId] = bsc_lzp_decode_block(input + inputPtr, input + inputPtr + inputSize, output + outputPtr, hashSize, minLen); } else { decompressionResult[blockId] = inputSize; memcpy(output + outputPtr, input + inputPtr, inputSize); } } } else #endif { int blockId, p; for (blockId = 0; blockId < nBlocks; ++blockId) { int64_t inputPtr = 0; for (p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4); int64_t outputPtr = 0; for (p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0); inputPtr += 1 + 8 * nBlocks; int inputSize = *(int *)(input + 1 + 8 * blockId + 4); int outputSize = *(int *)(input + 1 + 8 * blockId + 0); if (inputSize != outputSize) { decompressionResult[blockId] = bsc_lzp_decode_block(input + inputPtr, input + inputPtr + inputSize, output + outputPtr, hashSize, minLen); } else { decompressionResult[blockId] = inputSize; memcpy(output + outputPtr, input + inputPtr, inputSize); } } } int64_t dataSize = 0; int result = LZP_NO_ERROR; int blockId; for (blockId = 0; blockId < nBlocks; ++blockId) { if (decompressionResult[blockId] < LZP_NO_ERROR) result = decompressionResult[blockId]; dataSize += decompressionResult[blockId]; } return (result == LZP_NO_ERROR) ? dataSize : result; } /* * Counter-intuitively we use a larger hash (with better LZP compression) for lower global * compression levels. So that LZP preprocessing plays along nicely with the primary * compression algorithm being used and actually provides a benefit. */ int lzp_hash_size(int level) { if (level > 7) { return (LZP_DEFAULT_LZPHASHSIZE + 2); } else if (level > 5) { return (LZP_DEFAULT_LZPHASHSIZE + 3); } else if (level > 3) { return (LZP_DEFAULT_LZPHASHSIZE + 4); } else { return (LZP_DEFAULT_LZPHASHSIZE + 5); } } /*-----------------------------------------------------------*/ /* End lzp.cpp */ /*-----------------------------------------------------------*/