pcompress/bsc/libbsc/bwt/bwt.cpp
Moinak Ghosh fb25e53b4f Add forked and optimized copy of LGPL version of Libbsc.
Strip out Sort Transform from Libbsc copy.
Reduce Libbsc memory use.
Avoid redundant adler32 of data block in Libbsc.
2013-11-30 22:13:33 +05:30

404 lines
13 KiB
C++

/*-----------------------------------------------------------*/
/* Block Sorting, Lossless Data Compression Library. */
/* Burrows Wheeler Transform */
/*-----------------------------------------------------------*/
/*--
This file is a part of bsc and/or libbsc, a program and a library for
lossless, block-sorting data compression.
Copyright (c) 2009-2012 Ilya Grebnov <ilya.grebnov@gmail.com>
See file AUTHORS for a full list of contributors.
The bsc and libbsc is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The bsc and libbsc is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the bsc and libbsc. If not, see http://www.gnu.org/licenses/.
Please see the files COPYING and COPYING.LIB for full copyright information.
See also the bsc and libbsc web site:
http://libbsc.com/ for more information.
--*/
#include <stdlib.h>
#include <memory.h>
#include "bwt.h"
#include "../platform/platform.h"
#include "../libbsc.h"
#include "divsufsort/divsufsort.h"
int bsc_bwt_encode(unsigned char * T, int n, unsigned char * num_indexes, int * indexes, int features)
{
int index = divbwt(T, T, NULL, n, num_indexes, indexes, features & LIBBSC_FEATURE_MULTITHREADING);
switch (index)
{
case -1 : return LIBBSC_BAD_PARAMETER;
case -2 : return LIBBSC_NOT_ENOUGH_MEMORY;
}
return index;
}
static int bsc_unbwt_mergedTL_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int n, int index)
{
unsigned int bucket[ALPHABET_SIZE];
memset(bucket, 0, ALPHABET_SIZE * sizeof(unsigned int));
for (int i = 0; i < index; ++i)
{
unsigned char c = T[i];
P[i] = ((bucket[c]++) << 8) | c;
}
for (int i = index; i < n; ++i)
{
unsigned char c = T[i];
P[i + 1] = ((bucket[c]++) << 8) | c;
}
for (int sum = 1, i = 0; i < ALPHABET_SIZE; ++i)
{
int tmp = sum; sum += bucket[i]; bucket[i] = tmp;
}
for (int p = 0, i = n - 1; i >= 0; --i)
{
unsigned int u = P[p];
unsigned char c = u & 0xff;
T[i] = c; p = (u >> 8) + bucket[c];
}
return LIBBSC_NO_ERROR;
}
#define BWT_NUM_FASTBITS (17)
static int bsc_unbwt_biPSI_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int n, int index)
{
if (int * RESTRICT bucket = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int)))
{
if (unsigned short * RESTRICT fastbits = (unsigned short *)bsc_malloc((1 + (1 << BWT_NUM_FASTBITS)) * sizeof(unsigned short)))
{
int count[ALPHABET_SIZE]; memset(count, 0, ALPHABET_SIZE * sizeof(int));
int shift = 0; while ((n >> shift) > (1 << BWT_NUM_FASTBITS)) shift++;
for (int i = 0; i < n; ++i) count[T[i]]++;
for (int sum = 1, c = 0; c < ALPHABET_SIZE; ++c)
{
int tmp = sum; sum += count[c]; count[c] = tmp;
if (count[c] != sum)
{
int * RESTRICT bucket_p = &bucket[c << 8];
int hi = index; if (sum < hi) hi = sum;
for (int i = count[c]; i < hi; ++i) bucket_p[T[i]]++;
int lo = index + 1; if (count[c] > lo) lo = count[c];
for (int i = lo; i < sum; ++i) bucket_p[T[i - 1]]++;
}
}
int lastc = T[0];
for (int v = 0, sum = 1, c = 0; c < ALPHABET_SIZE; ++c)
{
if (c == lastc) sum++;
int * RESTRICT bucket_p = &bucket[c];
for (int d = 0; d < ALPHABET_SIZE; ++d)
{
int tmp = sum; sum += bucket_p[d << 8]; bucket_p[d << 8] = tmp;
if (bucket_p[d << 8] != sum)
{
for (; v <= ((sum - 1) >> shift); ++v) fastbits[v] = (c << 8) | d;
}
}
}
for (int i = 0; i < index; ++i)
{
unsigned char c = T[i];
int p = count[c]++;
if (p < index) P[bucket[(c << 8) | T[p ]]++] = i; else
if (p > index) P[bucket[(c << 8) | T[p - 1]]++] = i;
}
for (int i = index; i < n; ++i)
{
unsigned char c = T[i];
int p = count[c]++;
if (p < index) P[bucket[(c << 8) | T[p ]]++] = i + 1; else
if (p > index) P[bucket[(c << 8) | T[p - 1]]++] = i + 1;
}
for (int c = 0; c < ALPHABET_SIZE; ++c)
{
for (int d = 0; d < c; ++d)
{
int tmp = bucket[(d << 8) | c]; bucket[(d << 8) | c] = bucket[(c << 8) | d]; bucket[(c << 8) | d] = tmp;
}
}
for (int p = index, i = 1; i < n; i += 2)
{
int c = fastbits[p >> shift]; while (bucket[c] <= p) c++;
T[i - 1] = (unsigned char)(c >> 8); T[i] = (unsigned char)(c & 0xff);
p = P[p];
}
T[n - 1] = (unsigned char)lastc;
bsc_free(fastbits); bsc_free(bucket);
return LIBBSC_NO_ERROR;
}
bsc_free(bucket);
};
return LIBBSC_NOT_ENOUGH_MEMORY;
}
static int bsc_unbwt_reconstruct_serial(unsigned char * T, unsigned int * P, int n, int index)
{
if (n < 3 * 1024 * 1024) return bsc_unbwt_mergedTL_serial(T, P, n, index);
return bsc_unbwt_biPSI_serial(T, P, n, index);
}
#ifdef LIBBSC_OPENMP
static int bsc_unbwt_mergedTL_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int n, int index, int * RESTRICT indexes)
{
unsigned int bucket[ALPHABET_SIZE];
memset(bucket, 0, ALPHABET_SIZE * sizeof(unsigned int));
for (int i = 0; i < index; ++i)
{
unsigned char c = T[i];
P[i] = ((bucket[c]++) << 8) | c;
}
for (int i = index; i < n; ++i)
{
unsigned char c = T[i];
P[i + 1] = ((bucket[c]++) << 8) | c;
}
for (int sum = 1, i = 0; i < ALPHABET_SIZE; ++i)
{
int tmp = sum; sum += bucket[i]; bucket[i] = tmp;
}
int mod = n / 8;
{
mod |= mod >> 1; mod |= mod >> 2;
mod |= mod >> 4; mod |= mod >> 8;
mod |= mod >> 16; mod >>= 1; mod++;
}
int nBlocks = 1 + (n - 1) / mod;
#pragma omp parallel for schedule(dynamic)
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
int p = (blockId < nBlocks - 1) ? indexes[blockId] + 1 : 0;
int blockStart = (blockId < nBlocks - 1) ? mod * blockId + mod - 1 : n - 1;
int blockEnd = mod * blockId;
for (int i = blockStart; i >= blockEnd; --i)
{
unsigned int u = P[p];
unsigned char c = u & 0xff;
T[i] = c; p = (u >> 8) + bucket[c];
}
}
return LIBBSC_NO_ERROR;
}
static int bsc_unbwt_biPSI_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int n, int index, int * RESTRICT indexes)
{
if (int * RESTRICT bucket = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int)))
{
if (unsigned short * RESTRICT fastbits = (unsigned short *)bsc_malloc((1 + (1 << BWT_NUM_FASTBITS)) * sizeof(unsigned short)))
{
int count[ALPHABET_SIZE]; memset(count, 0, ALPHABET_SIZE * sizeof(int));
int shift = 0; while ((n >> shift) > (1 << BWT_NUM_FASTBITS)) shift++;
#pragma omp parallel
{
unsigned int count_local[ALPHABET_SIZE];
memset(count_local, 0, ALPHABET_SIZE * sizeof(unsigned int));
#pragma omp for schedule(static) nowait
for (int i = 0; i < n; ++i) count_local[T[i]]++;
#pragma omp critical
for (int c = 0; c < ALPHABET_SIZE; ++c) count[c] += count_local[c];
}
for (int sum = 1, c = 0; c < ALPHABET_SIZE; ++c)
{
int tmp = sum; sum += count[c]; count[c] = tmp;
}
#pragma omp parallel for schedule(static, 1)
for (int c = 0; c < ALPHABET_SIZE; ++c)
{
int start = count[c], end = (c + 1 < ALPHABET_SIZE) ? count[c + 1] : n + 1;
if (start != end)
{
int * RESTRICT bucket_p = &bucket[c << 8];
int hi = index; if (end < hi) hi = end;
for (int i = start; i < hi; ++i) bucket_p[T[i]]++;
int lo = index + 1; if (start > lo) lo = start;
for (int i = lo; i < end; ++i) bucket_p[T[i - 1]]++;
}
}
int lastc = T[0];
for (int v = 0, sum = 1, c = 0; c < ALPHABET_SIZE; ++c)
{
if (c == lastc) sum++;
int * RESTRICT bucket_p = &bucket[c];
for (int d = 0; d < ALPHABET_SIZE; ++d)
{
int tmp = sum; sum += bucket_p[d << 8]; bucket_p[d << 8] = tmp;
if (bucket_p[d << 8] != sum)
{
for (; v <= ((sum - 1) >> shift); ++v) fastbits[v] = (c << 8) | d;
}
}
}
for (int i = 0; i < index; ++i)
{
unsigned char c = T[i];
int p = count[c]++;
if (p < index) P[bucket[(c << 8) | T[p ]]++] = i; else
if (p > index) P[bucket[(c << 8) | T[p - 1]]++] = i;
}
for (int i = index; i < n; ++i)
{
unsigned char c = T[i];
int p = count[c]++;
if (p < index) P[bucket[(c << 8) | T[p ]]++] = i + 1; else
if (p > index) P[bucket[(c << 8) | T[p - 1]]++] = i + 1;
}
for (int c = 0; c < ALPHABET_SIZE; ++c)
{
for (int d = 0; d < c; ++d)
{
int tmp = bucket[(d << 8) | c]; bucket[(d << 8) | c] = bucket[(c << 8) | d]; bucket[(c << 8) | d] = tmp;
}
}
int mod = n / 8;
{
mod |= mod >> 1; mod |= mod >> 2;
mod |= mod >> 4; mod |= mod >> 8;
mod |= mod >> 16; mod >>= 1; mod++;
}
int nBlocks = 1 + (n - 1) / mod;
#pragma omp parallel for schedule(dynamic)
for (int blockId = 0; blockId < nBlocks; ++blockId)
{
int p = (blockId > 0 ) ? indexes[blockId - 1] + 1 : index;
int blockEnd = (blockId < nBlocks - 1) ? mod * blockId + mod : n;
int blockStart = mod * blockId;
if (blockEnd != n) blockEnd++; else T[n - 1] = (unsigned char)lastc;
for (int i = blockStart + 1; i < blockEnd; i += 2)
{
int c = fastbits[p >> shift]; while (bucket[c] <= p) c++;
T[i - 1] = (unsigned char)(c >> 8); T[i] = (unsigned char)(c & 0xff);
p = P[p];
}
}
bsc_free(fastbits); bsc_free(bucket);
return LIBBSC_NO_ERROR;
}
bsc_free(bucket);
};
return LIBBSC_NOT_ENOUGH_MEMORY;
}
static int bsc_unbwt_reconstruct_parallel(unsigned char * T, unsigned int * P, int n, int index, int * indexes)
{
if (n < 3 * 1024 * 1024) return bsc_unbwt_mergedTL_parallel(T, P, n, index, indexes);
return bsc_unbwt_biPSI_parallel(T, P, n, index, indexes);
}
#endif
int bsc_bwt_decode(unsigned char * T, int n, int index, unsigned char num_indexes, int * indexes, int features)
{
if ((T == NULL) || (n < 0) || (index <= 0) || (index > n))
{
return LIBBSC_BAD_PARAMETER;
}
if (n <= 1)
{
return LIBBSC_NO_ERROR;
}
if (unsigned int * P = (unsigned int *)bsc_malloc((n + 1) * sizeof(unsigned int)))
{
int result = LIBBSC_NO_ERROR;
#ifdef LIBBSC_OPENMP
int mod = n / 8;
{
mod |= mod >> 1; mod |= mod >> 2;
mod |= mod >> 4; mod |= mod >> 8;
mod |= mod >> 16; mod >>= 1;
}
if ((features & LIBBSC_FEATURE_MULTITHREADING) && (n >= 64 * 1024) && (num_indexes == (unsigned char)((n - 1) / (mod + 1))) && (indexes != NULL))
{
result = bsc_unbwt_reconstruct_parallel(T, P, n, index, indexes);
}
else
#endif
{
result = bsc_unbwt_reconstruct_serial(T, P, n, index);
}
bsc_free(P);
return result;
};
return LIBBSC_NOT_ENOUGH_MEMORY;
}
/*-----------------------------------------------------------*/
/* End bwt.cpp */
/*-----------------------------------------------------------*/