pcompress/bsc/libbsc/libbsc/libbsc.cpp

633 lines
19 KiB
C++
Raw Normal View History

/*-----------------------------------------------------------*/
/* Block Sorting, Lossless Data Compression Library. */
/* Compression/decompression functions */
/*-----------------------------------------------------------*/
/*--
This file is a part of bsc and/or libbsc, a program and a library for
lossless, block-sorting data compression.
Copyright (c) 2009-2012 Ilya Grebnov <ilya.grebnov@gmail.com>
See file AUTHORS for a full list of contributors.
The bsc and libbsc is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The bsc and libbsc is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the bsc and libbsc. If not, see http://www.gnu.org/licenses/.
Please see the files COPYING and COPYING.LIB for full copyright information.
See also the bsc and libbsc web site:
http://libbsc.com/ for more information.
--*/
#include <stdlib.h>
#include <string.h>
#include <memory.h>
#include <stdio.h>
#include "../platform/platform.h"
#include "../libbsc.h"
#include "../adler32/adler32.h"
#include "../bwt/bwt.h"
#include "../lzp/lzp.h"
#include "../coder/coder.h"
int bsc_init(int features)
{
int result = LIBBSC_NO_ERROR;
if (result == LIBBSC_NO_ERROR) result = bsc_platform_init(features);
if (result == LIBBSC_NO_ERROR) result = bsc_coder_init(features);
return result;
}
int bsc_store(const unsigned char * input, unsigned char * output, int n, int features)
{
unsigned int adler32_data = bsc_adler32(input, n, features);
memmove(output + LIBBSC_HEADER_SIZE, input, n);
*(int *)(output + 0) = n + LIBBSC_HEADER_SIZE;
*(int *)(output + 4) = n;
*(int *)(output + 8) = 0;
*(int *)(output + 12) = 0;
*(int *)(output + 16) = adler32_data;
*(int *)(output + 20) = adler32_data;
*(int *)(output + 24) = bsc_adler32(output, 24, features);
return n + LIBBSC_HEADER_SIZE;
}
int bsc_compress_inplace(unsigned char * data, int n, int lzpHashSize, int lzpMinLen, int blockSorter, int coder, int features)
{
int indexes[256];
unsigned char num_indexes;
int mode = 0;
switch (blockSorter)
{
case LIBBSC_BLOCKSORTER_BWT : mode = LIBBSC_BLOCKSORTER_BWT; break;
default : return LIBBSC_BAD_PARAMETER;
}
switch (coder)
{
case LIBBSC_CODER_QLFC_STATIC: mode += (LIBBSC_CODER_QLFC_STATIC << 5); break;
case LIBBSC_CODER_QLFC_ADAPTIVE: mode += (LIBBSC_CODER_QLFC_ADAPTIVE << 5); break;
default : return LIBBSC_BAD_PARAMETER;
}
if (lzpMinLen != 0 || lzpHashSize != 0)
{
if (lzpMinLen < 4 || lzpMinLen > 255) return LIBBSC_BAD_PARAMETER;
if (lzpHashSize < 10 || lzpHashSize > 28) return LIBBSC_BAD_PARAMETER;
mode += (lzpMinLen << 8);
mode += (lzpHashSize << 16);
}
if (n < 0 || n > 1073741824) return LIBBSC_BAD_PARAMETER;
if (n <= LIBBSC_HEADER_SIZE)
{
return bsc_store(data, data, n, features);
}
unsigned int adler32_data = bsc_adler32(data, n, features);
int lzSize = n;
if (mode != (mode & 0xff))
{
unsigned char * buffer = (unsigned char *)bsc_malloc(n);
if (buffer == NULL) return LIBBSC_NOT_ENOUGH_MEMORY;
lzSize = bsc_lzp_compress(data, buffer, n, lzpHashSize, lzpMinLen, features);
if (lzSize < LIBBSC_NO_ERROR)
{
lzSize = n; mode &= 0xff;
}
else
{
memcpy(data, buffer, lzSize);
}
bsc_free(buffer);
}
if (lzSize <= LIBBSC_HEADER_SIZE)
{
blockSorter = LIBBSC_BLOCKSORTER_BWT;
mode = (mode & 0xffffffe0) | LIBBSC_BLOCKSORTER_BWT;
}
int index = LIBBSC_BAD_PARAMETER; num_indexes = 0;
switch (blockSorter)
{
case LIBBSC_BLOCKSORTER_BWT : index = bsc_bwt_encode(data, lzSize, &num_indexes, indexes, features); break;
default : return LIBBSC_BAD_PARAMETER;
}
if (n < 64 * 1024) num_indexes = 0;
if (index < LIBBSC_NO_ERROR)
{
return index;
}
if (unsigned char * buffer = (unsigned char *)bsc_malloc(lzSize + 4096))
{
int result = bsc_coder_compress(data, buffer, lzSize, coder, features);
if (result >= LIBBSC_NO_ERROR) memcpy(data + LIBBSC_HEADER_SIZE, buffer, result);
bsc_free(buffer);
if ((result < LIBBSC_NO_ERROR) || (result + 1 + 4 * num_indexes >= n))
{
return LIBBSC_NOT_COMPRESSIBLE;
}
{
if (num_indexes > 0)
{
memcpy(data + LIBBSC_HEADER_SIZE + result, indexes, 4 * num_indexes);
}
data[LIBBSC_HEADER_SIZE + result + 4 * num_indexes] = num_indexes;
result += 1 + 4 * num_indexes;
}
*(int *)(data + 0) = result + LIBBSC_HEADER_SIZE;
*(int *)(data + 4) = n;
*(int *)(data + 8) = mode;
*(int *)(data + 12) = index;
*(int *)(data + 16) = adler32_data;
*(int *)(data + 20) = bsc_adler32(data + LIBBSC_HEADER_SIZE, result, features);
*(int *)(data + 24) = bsc_adler32(data, 24, features);
return result + LIBBSC_HEADER_SIZE;
}
return LIBBSC_NOT_ENOUGH_MEMORY;
}
int bsc_compress(unsigned char * input, unsigned char * output, int n, int lzpHashSize, int lzpMinLen, int blockSorter, int coder, int features)
{
if (input == output)
{
return bsc_compress_inplace(output, n, lzpHashSize, lzpMinLen, blockSorter, coder, features);
}
int indexes[256];
unsigned char num_indexes;
int mode = 0;
switch (blockSorter)
{
case LIBBSC_BLOCKSORTER_BWT : mode = LIBBSC_BLOCKSORTER_BWT; break;
default : return LIBBSC_BAD_PARAMETER;
}
switch (coder)
{
case LIBBSC_CODER_QLFC_STATIC: mode += (LIBBSC_CODER_QLFC_STATIC << 5); break;
case LIBBSC_CODER_QLFC_ADAPTIVE: mode += (LIBBSC_CODER_QLFC_ADAPTIVE << 5); break;
default : return LIBBSC_BAD_PARAMETER;
}
if (lzpMinLen != 0 || lzpHashSize != 0)
{
if (lzpMinLen < 4 || lzpMinLen > 255) return LIBBSC_BAD_PARAMETER;
if (lzpHashSize < 10 || lzpHashSize > 28) return LIBBSC_BAD_PARAMETER;
mode += (lzpMinLen << 8);
mode += (lzpHashSize << 16);
}
if (n < 0 || n > 1073741824) return LIBBSC_BAD_PARAMETER;
if (n <= LIBBSC_HEADER_SIZE)
{
return (LIBBSC_SKIP_DATA);
}
int lzSize = 0;
if (mode != (mode & 0xff))
{
lzSize = bsc_lzp_compress(input, output, n, lzpHashSize, lzpMinLen, features);
if (lzSize < LIBBSC_NO_ERROR || n - lzSize <= LIBBSC_HEADER_SIZE)
{
mode &= 0xff;
}
}
if (mode == (mode & 0xff))
{
lzSize = n; memcpy(output, input, n);
}
if (lzSize <= LIBBSC_HEADER_SIZE)
{
blockSorter = LIBBSC_BLOCKSORTER_BWT;
mode = (mode & 0xffffffe0) | LIBBSC_BLOCKSORTER_BWT;
}
int index = LIBBSC_BAD_PARAMETER; num_indexes = 0;
switch (blockSorter)
{
case LIBBSC_BLOCKSORTER_BWT : index = bsc_bwt_encode(output, lzSize, &num_indexes, indexes, features); break;
default : return LIBBSC_BAD_PARAMETER;
}
if (n < 64 * 1024) num_indexes = 0;
if (index < LIBBSC_NO_ERROR)
{
return index;
}
int coded = 1;
int result = bsc_coder_compress(output, input, lzSize, coder, features);
if ((result < LIBBSC_NO_ERROR) || (result + 1 + 4 * num_indexes >= n)) {
coded = 0;
result = lzSize;
/*
* Original input has been destroyed so we have to decode BWT.
*/
bsc_bwt_decode(output, lzSize, index, num_indexes, indexes, features);
coded = 0;
if (mode == (mode & 0xff)) {
/*
* No coding, no LZP. No compression whatsoever.
*/
memcpy(input, output, n);
return (LIBBSC_SKIP_DATA);
}
/*
* At least LZP is successful.
*/
memmove(output + LIBBSC_HEADER_SIZE, output, result);
} else {
memcpy(output + LIBBSC_HEADER_SIZE, input, result);
}
if (coded > 0) {
if (num_indexes > 0)
{
memcpy(output + LIBBSC_HEADER_SIZE + result, indexes, 4 * num_indexes);
}
output[LIBBSC_HEADER_SIZE + result + 4 * num_indexes] = num_indexes;
result += 1 + 4 * num_indexes;
}
*(int *)(output + 0) = result + LIBBSC_HEADER_SIZE;
*(int *)(output + 4) = n;
*(int *)(output + 8) = mode;
*(int *)(output + 12) = index;
*(int *)(output + 16) = features;
*(int *)(output + 20) = coded;
*(int *)(output + 24) = bsc_adler32(output, 24, features);
return result + LIBBSC_HEADER_SIZE;
return LIBBSC_NOT_ENOUGH_MEMORY;
}
int bsc_block_info(unsigned char * blockHeader, int headerSize, int * pBlockSize, int * pDataSize, int features)
{
if (headerSize < LIBBSC_HEADER_SIZE)
{
return LIBBSC_UNEXPECTED_EOB;
}
if (*(unsigned int *)(blockHeader + 24) != bsc_adler32(blockHeader, 24, features))
{
return LIBBSC_DATA_CORRUPT;
}
int blockSize = *(int *)(blockHeader + 0);
int dataSize = *(int *)(blockHeader + 4);
int mode = *(int *)(blockHeader + 8);
int index = *(int *)(blockHeader + 12);
int lzpHashSize = (mode >> 16) & 0xff;
int lzpMinLen = (mode >> 8) & 0xff;
int coder = (mode >> 5) & 0x7;
int blockSorter = (mode >> 0) & 0x1f;
int test_mode = 0;
switch (blockSorter)
{
case LIBBSC_BLOCKSORTER_BWT : test_mode = LIBBSC_BLOCKSORTER_BWT; break;
default : if (blockSorter > 0) return LIBBSC_DATA_CORRUPT;
}
switch (coder)
{
case LIBBSC_CODER_QLFC_STATIC: test_mode += (LIBBSC_CODER_QLFC_STATIC << 5); break;
case LIBBSC_CODER_QLFC_ADAPTIVE: test_mode += (LIBBSC_CODER_QLFC_ADAPTIVE << 5); break;
default : return LIBBSC_BAD_PARAMETER;
}
if (lzpMinLen != 0 || lzpHashSize != 0)
{
if (lzpMinLen < 4 || lzpMinLen > 255) return LIBBSC_DATA_CORRUPT;
if (lzpHashSize < 10 || lzpHashSize > 28) return LIBBSC_DATA_CORRUPT;
test_mode += (lzpMinLen << 8);
test_mode += (lzpHashSize << 16);
}
if (test_mode != mode)
{
return LIBBSC_DATA_CORRUPT;
}
if (blockSize < LIBBSC_HEADER_SIZE || blockSize > LIBBSC_HEADER_SIZE + dataSize)
{
return LIBBSC_DATA_CORRUPT;
}
if (index < 0 || index > dataSize)
{
return LIBBSC_DATA_CORRUPT;
}
if (pBlockSize != NULL) *pBlockSize = blockSize;
if (pDataSize != NULL) *pDataSize = dataSize;
return LIBBSC_NO_ERROR;
}
int bsc_decompress_inplace(unsigned char * data, int inputSize, int outputSize, int features)
{
int indexes[256];
unsigned char num_indexes;
int blockSize = 0, dataSize = 0;
int info = bsc_block_info(data, inputSize, &blockSize, &dataSize, features);
if (info != LIBBSC_NO_ERROR)
{
return info;
}
if (inputSize < blockSize || outputSize < dataSize)
{
return LIBBSC_UNEXPECTED_EOB;
}
if (*(unsigned int *)(data + 20) != bsc_adler32(data + LIBBSC_HEADER_SIZE, blockSize - LIBBSC_HEADER_SIZE, features))
{
return LIBBSC_DATA_CORRUPT;
}
int mode = *(int *)(data + 8);
if (mode == 0)
{
memmove(data, data + LIBBSC_HEADER_SIZE, dataSize);
return LIBBSC_NO_ERROR;
}
int index = *(int *)(data + 12);
unsigned int adler32_data = *(int *)(data + 16);
num_indexes = data[blockSize - 1];
if (num_indexes > 0)
{
memcpy(indexes, data + blockSize - 1 - 4 * num_indexes, 4 * num_indexes);
}
int lzpHashSize = (mode >> 16) & 0xff;
int lzpMinLen = (mode >> 8) & 0xff;
int coder = (mode >> 5) & 0x7;
int blockSorter = (mode >> 0) & 0x1f;
int lzSize = LIBBSC_NO_ERROR;
{
unsigned char * buffer = (unsigned char *)bsc_malloc(blockSize);
if (buffer == NULL) return LIBBSC_NOT_ENOUGH_MEMORY;
memcpy(buffer, data, blockSize);
lzSize = bsc_coder_decompress(buffer + LIBBSC_HEADER_SIZE, data, coder, features);
bsc_free(buffer);
}
if (lzSize < LIBBSC_NO_ERROR)
{
return lzSize;
}
int result;
switch (blockSorter)
{
case LIBBSC_BLOCKSORTER_BWT : result = bsc_bwt_decode(data, lzSize, index, num_indexes, indexes, features); break;
default : return LIBBSC_DATA_CORRUPT;
}
if (result < LIBBSC_NO_ERROR)
{
return result;
}
if (mode != (mode & 0xff))
{
if (unsigned char * buffer = (unsigned char *)bsc_malloc(lzSize))
{
memcpy(buffer, data, lzSize);
result = bsc_lzp_decompress(buffer, data, lzSize, lzpHashSize, lzpMinLen, features);
bsc_free(buffer);
if (result < LIBBSC_NO_ERROR)
{
return result;
}
return result == dataSize ? (adler32_data == bsc_adler32(data, dataSize, features) ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT) : LIBBSC_DATA_CORRUPT;
}
return LIBBSC_NOT_ENOUGH_MEMORY;
}
return lzSize == dataSize ? (adler32_data == bsc_adler32(data, dataSize, features) ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT) : LIBBSC_DATA_CORRUPT;
}
int bsc_decompress(unsigned char * input, int inputSize, unsigned char * output, int outputSize, int features)
{
int indexes[256];
unsigned char num_indexes;
if (input == output)
{
return bsc_decompress_inplace(output, inputSize, outputSize, features);
}
int blockSize = 0, dataSize = 0;
int info = bsc_block_info(input, inputSize, &blockSize, &dataSize, features);
if (info != LIBBSC_NO_ERROR)
{
return info;
}
if (inputSize < blockSize || outputSize < dataSize)
{
return LIBBSC_UNEXPECTED_EOB;
}
int coded = *(int *)(input + 20);
int mode = *(int *)(input + 8);
if (mode == 0)
{
memcpy(output, input + LIBBSC_HEADER_SIZE, dataSize);
return LIBBSC_NO_ERROR;
}
int index = *(int *)(input + 12);
int features_stored = *(int *)(input + 16);
if (features_stored != features) return LIBBSC_DATA_CORRUPT;
num_indexes = input[blockSize - 1];
if (num_indexes > 0)
{
memcpy(indexes, input + blockSize - 1 - 4 * num_indexes, 4 * num_indexes);
}
int lzpHashSize = (mode >> 16) & 0xff;
int lzpMinLen = (mode >> 8) & 0xff;
int coder = (mode >> 5) & 0x7;
int blockSorter = (mode >> 0) & 0x1f;
int result, lzSize;
if (coded) {
lzSize = bsc_coder_decompress(input + LIBBSC_HEADER_SIZE, output, coder, features);
if (lzSize < LIBBSC_NO_ERROR)
{
return lzSize;
}
switch (blockSorter)
{
case LIBBSC_BLOCKSORTER_BWT : result = bsc_bwt_decode(output, lzSize, index, num_indexes, indexes, features); break;
default : return LIBBSC_DATA_CORRUPT;
}
if (result < LIBBSC_NO_ERROR)
{
return result;
}
} else {
memcpy(output, input + LIBBSC_HEADER_SIZE, blockSize - LIBBSC_HEADER_SIZE);
lzSize = blockSize - LIBBSC_HEADER_SIZE;
}
result = lzSize;
if (mode != (mode & 0xff))
{
memcpy(input, output, lzSize);
result = bsc_lzp_decompress(input, output, lzSize, lzpHashSize, lzpMinLen, features);
if (result < LIBBSC_NO_ERROR)
{
return result;
}
return result == dataSize ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT;
}
return result == dataSize ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT;
}
/*
* Compatibility to decompress older archive versions.
*/
int bsc_decompress_old(const unsigned char * input, int inputSize, unsigned char * output, int outputSize, int features)
{
int indexes[256];
unsigned char num_indexes;
if (input == output)
{
return bsc_decompress_inplace(output, inputSize, outputSize, features);
}
int blockSize = 0, dataSize = 0;
int info = bsc_block_info((unsigned char *)input, inputSize, &blockSize, &dataSize, features);
if (info != LIBBSC_NO_ERROR)
{
return info;
}
if (inputSize < blockSize || outputSize < dataSize)
{
return LIBBSC_UNEXPECTED_EOB;
}
if (*(unsigned int *)(input + 20) != bsc_adler32(input + LIBBSC_HEADER_SIZE, blockSize - LIBBSC_HEADER_SIZE, features))
{
return LIBBSC_DATA_CORRUPT;
}
int mode = *(int *)(input + 8);
if (mode == 0)
{
memcpy(output, input + LIBBSC_HEADER_SIZE, dataSize);
return LIBBSC_NO_ERROR;
}
int index = *(int *)(input + 12);
unsigned int adler32_data = *(int *)(input + 16);
num_indexes = input[blockSize - 1];
if (num_indexes > 0)
{
memcpy(indexes, input + blockSize - 1 - 4 * num_indexes, 4 * num_indexes);
}
int lzpHashSize = (mode >> 16) & 0xff;
int lzpMinLen = (mode >> 8) & 0xff;
int coder = (mode >> 5) & 0x7;
int blockSorter = (mode >> 0) & 0x1f;
int lzSize = bsc_coder_decompress(input + LIBBSC_HEADER_SIZE, output, coder, features);
if (lzSize < LIBBSC_NO_ERROR)
{
return lzSize;
}
int result;
switch (blockSorter)
{
case LIBBSC_BLOCKSORTER_BWT : result = bsc_bwt_decode(output, lzSize, index, num_indexes, indexes, features); break;
default : return LIBBSC_DATA_CORRUPT;
}
if (result < LIBBSC_NO_ERROR)
{
return result;
}
if (mode != (mode & 0xff))
{
if (unsigned char * buffer = (unsigned char *)bsc_malloc(lzSize))
{
memcpy(buffer, output, lzSize);
result = bsc_lzp_decompress(buffer, output, lzSize, lzpHashSize, lzpMinLen, features);
bsc_free(buffer);
if (result < LIBBSC_NO_ERROR)
{
return result;
}
return result == dataSize ? (adler32_data == bsc_adler32(output, dataSize, features) ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT) : LIBBSC_DATA_CORRUPT;
}
return LIBBSC_NOT_ENOUGH_MEMORY;
}
return lzSize == dataSize ? (adler32_data == bsc_adler32(output, dataSize, features) ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT) : LIBBSC_DATA_CORRUPT;
}
/*-------------------------------------------------*/
/* End libbsc.cpp */
/*-------------------------------------------------*/