pcompress/bsc/libbsc/coder/coder.cpp
Moinak Ghosh fb25e53b4f Add forked and optimized copy of LGPL version of Libbsc.
Strip out Sort Transform from Libbsc copy.
Reduce Libbsc memory use.
Avoid redundant adler32 of data block in Libbsc.
2013-11-30 22:13:33 +05:30

346 lines
11 KiB
C++

/*-----------------------------------------------------------*/
/* Block Sorting, Lossless Data Compression Library. */
/* Second stage encoding functions */
/*-----------------------------------------------------------*/
/*--
This file is a part of bsc and/or libbsc, a program and a library for
lossless, block-sorting data compression.
Copyright (c) 2009-2012 Ilya Grebnov <ilya.grebnov@gmail.com>
See file AUTHORS for a full list of contributors.
The bsc and libbsc is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The bsc and libbsc is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the bsc and libbsc. If not, see http://www.gnu.org/licenses/.
Please see the files COPYING and COPYING.LIB for full copyright information.
See also the bsc and libbsc web site:
http://libbsc.com/ for more information.
--*/
#include <stdlib.h>
#include <memory.h>
#include "coder.h"
#include "../libbsc.h"
#include "../platform/platform.h"
#include "qlfc/qlfc.h"
int bsc_coder_init(int features)
{
int result = LIBBSC_NO_ERROR;
if (result == LIBBSC_NO_ERROR) result = bsc_qlfc_init(features);
return result;
}
static INLINE int bsc_coder_num_blocks(int n)
{
if (n < 256 * 1024) return 1;
if (n < 4 * 1024 * 1024) return 2;
if (n < 16 * 1024 * 1024) return 4;
return 8;
}
int bsc_coder_encode_block(const unsigned char * input, unsigned char * output, int inputSize, int outputSize, int coder)
{
if (coder == LIBBSC_CODER_QLFC_STATIC) return bsc_qlfc_static_encode_block (input, output, inputSize, outputSize);
if (coder == LIBBSC_CODER_QLFC_ADAPTIVE) return bsc_qlfc_adaptive_encode_block(input, output, inputSize, outputSize);
return LIBBSC_BAD_PARAMETER;
}
void bsc_coder_split_blocks(const unsigned char * input, int n, int nBlocks, int * blockStart, int * blockSize)
{
int rankSize = 0;
for (int i = 1; i < n; i += 32)
{
if (input[i] != input[i - 1]) rankSize++;
}
if (rankSize > nBlocks)
{
int blockRankSize = rankSize / nBlocks;
blockStart[0] = 0; rankSize = 0;
for (int id = 0, i = 1; i < n; i += 32)
{
if (input[i] != input[i - 1])
{
rankSize++;
if (rankSize == blockRankSize)
{
rankSize = 0;
blockSize[id] = i - blockStart[id];
id++; blockStart[id] = i;
if (id == nBlocks - 1) break;
}
}
}
blockSize[nBlocks - 1] = n - blockStart[nBlocks - 1];
}
else
{
for (int p = 0; p < nBlocks; ++p)
{
blockStart[p] = (n / nBlocks) * p;
blockSize[p] = (p != nBlocks - 1) ? n / nBlocks : n - (n / nBlocks) * (nBlocks - 1);
}
}
}
int bsc_coder_compress_serial(const unsigned char * input, unsigned char * output, int n, int coder)
{
if (bsc_coder_num_blocks(n) == 1)
{
int result = bsc_coder_encode_block(input, output + 1, n, n - 1, coder);
if (result >= LIBBSC_NO_ERROR) result = (output[0] = 1, result + 1);
return result;
}
int compressedStart[ALPHABET_SIZE];
int compressedSize[ALPHABET_SIZE];
int nBlocks = bsc_coder_num_blocks(n);
int outputPtr = 1 + 8 * nBlocks;
bsc_coder_split_blocks(input, n, nBlocks, compressedStart, compressedSize);
output[0] = nBlocks;
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
int inputStart = compressedStart[blockId];
int inputSize = compressedSize[blockId];
int outputSize = inputSize; if (outputSize > n - outputPtr) outputSize = n - outputPtr;
int result = bsc_coder_encode_block(input + inputStart, output + outputPtr, inputSize, outputSize, coder);
if (result < LIBBSC_NO_ERROR)
{
if (outputPtr + inputSize >= n) return LIBBSC_NOT_COMPRESSIBLE;
result = inputSize; memcpy(output + outputPtr, input + inputStart, inputSize);
}
*(int *)(output + 1 + 8 * blockId + 0) = inputSize;
*(int *)(output + 1 + 8 * blockId + 4) = result;
outputPtr += result;
}
return outputPtr;
}
#ifdef LIBBSC_OPENMP
int bsc_coder_compress_parallel(const unsigned char * input, unsigned char * output, int n, int coder)
{
if (unsigned char * buffer = (unsigned char *)bsc_malloc(n * sizeof(unsigned char)))
{
int compressionResult[ALPHABET_SIZE];
int compressedStart[ALPHABET_SIZE];
int compressedSize[ALPHABET_SIZE];
int nBlocks = bsc_coder_num_blocks(n);
int result = LIBBSC_NO_ERROR;
int numThreads = omp_get_max_threads();
if (numThreads > nBlocks) numThreads = nBlocks;
output[0] = nBlocks;
#pragma omp parallel num_threads(numThreads) if(numThreads > 1)
{
if (omp_get_num_threads() == 1)
{
result = bsc_coder_compress_serial(input, output, n, coder);
}
else
{
#pragma omp single
{
bsc_coder_split_blocks(input, n, nBlocks, compressedStart, compressedSize);
}
#pragma omp for schedule(dynamic)
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
int blockStart = compressedStart[blockId];
int blockSize = compressedSize[blockId];
compressionResult[blockId] = bsc_coder_encode_block(input + blockStart, buffer + blockStart, blockSize, blockSize, coder);
if (compressionResult[blockId] < LIBBSC_NO_ERROR) compressionResult[blockId] = blockSize;
*(int *)(output + 1 + 8 * blockId + 0) = blockSize;
*(int *)(output + 1 + 8 * blockId + 4) = compressionResult[blockId];
}
#pragma omp single
{
result = 1 + 8 * nBlocks;
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
result += compressionResult[blockId];
}
if (result >= n) result = LIBBSC_NOT_COMPRESSIBLE;
}
if (result >= LIBBSC_NO_ERROR)
{
#pragma omp for schedule(dynamic)
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
int blockStart = compressedStart[blockId];
int blockSize = compressedSize[blockId];
int outputPtr = 1 + 8 * nBlocks;
for (int p = 0; p < blockId; ++p) outputPtr += compressionResult[p];
if (compressionResult[blockId] != blockSize)
{
memcpy(output + outputPtr, buffer + blockStart, compressionResult[blockId]);
}
else
{
memcpy(output + outputPtr, input + blockStart, compressionResult[blockId]);
}
}
}
}
}
bsc_free(buffer);
return result;
}
return LIBBSC_NOT_ENOUGH_MEMORY;
}
#endif
int bsc_coder_compress(const unsigned char * input, unsigned char * output, int n, int coder, int features)
{
if ((coder != LIBBSC_CODER_QLFC_STATIC) && (coder != LIBBSC_CODER_QLFC_ADAPTIVE))
{
return LIBBSC_BAD_PARAMETER;
}
#ifdef LIBBSC_OPENMP
if ((bsc_coder_num_blocks(n) != 1) && (features & LIBBSC_FEATURE_MULTITHREADING))
{
return bsc_coder_compress_parallel(input, output, n, coder);
}
#endif
return bsc_coder_compress_serial(input, output, n, coder);
}
int bsc_coder_decode_block(const unsigned char * input, unsigned char * output, int coder)
{
if (coder == LIBBSC_CODER_QLFC_STATIC) return bsc_qlfc_static_decode_block (input, output);
if (coder == LIBBSC_CODER_QLFC_ADAPTIVE) return bsc_qlfc_adaptive_decode_block(input, output);
return LIBBSC_BAD_PARAMETER;
}
int bsc_coder_decompress(const unsigned char * input, unsigned char * output, int coder, int features)
{
if ((coder != LIBBSC_CODER_QLFC_STATIC) && (coder != LIBBSC_CODER_QLFC_ADAPTIVE))
{
return LIBBSC_BAD_PARAMETER;
}
int nBlocks = input[0];
if (nBlocks == 1)
{
return bsc_coder_decode_block(input + 1, output, coder);
}
int decompressionResult[ALPHABET_SIZE];
#ifdef LIBBSC_OPENMP
if (features & LIBBSC_FEATURE_MULTITHREADING)
{
#pragma omp parallel for schedule(dynamic)
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
int inputPtr = 0; for (int p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4);
int outputPtr = 0; for (int p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0);
inputPtr += 1 + 8 * nBlocks;
int inputSize = *(int *)(input + 1 + 8 * blockId + 4);
int outputSize = *(int *)(input + 1 + 8 * blockId + 0);
if (inputSize != outputSize)
{
decompressionResult[blockId] = bsc_coder_decode_block(input + inputPtr, output + outputPtr, coder);
}
else
{
decompressionResult[blockId] = inputSize; memcpy(output + outputPtr, input + inputPtr, inputSize);
}
}
}
else
#endif
{
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
int inputPtr = 0; for (int p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4);
int outputPtr = 0; for (int p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0);
inputPtr += 1 + 8 * nBlocks;
int inputSize = *(int *)(input + 1 + 8 * blockId + 4);
int outputSize = *(int *)(input + 1 + 8 * blockId + 0);
if (inputSize != outputSize)
{
decompressionResult[blockId] = bsc_coder_decode_block(input + inputPtr, output + outputPtr, coder);
}
else
{
decompressionResult[blockId] = inputSize; memcpy(output + outputPtr, input + inputPtr, inputSize);
}
}
}
int dataSize = 0, result = LIBBSC_NO_ERROR;
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
if (decompressionResult[blockId] < LIBBSC_NO_ERROR) result = decompressionResult[blockId];
dataSize += decompressionResult[blockId];
}
return (result == LIBBSC_NO_ERROR) ? dataSize : result;
}
/*-----------------------------------------------------------*/
/* End coder.cpp */
/*-----------------------------------------------------------*/