2012-05-28 14:49:29 +00:00
|
|
|
/*
|
|
|
|
* This file is a part of Pcompress, a chunked parallel multi-
|
|
|
|
* algorithm lossless compression and decompression program.
|
|
|
|
*
|
2013-03-07 14:56:48 +00:00
|
|
|
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
|
2012-05-28 14:49:29 +00:00
|
|
|
* Use is subject to license terms.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
2012-07-07 16:48:29 +00:00
|
|
|
* version 3 of the License, or (at your option) any later version.
|
2012-05-28 14:49:29 +00:00
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
2013-03-07 14:56:48 +00:00
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this program.
|
|
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
2012-05-28 14:49:29 +00:00
|
|
|
*
|
2013-03-07 14:56:48 +00:00
|
|
|
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
2012-05-28 14:49:29 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <strings.h>
|
2012-05-31 12:31:34 +00:00
|
|
|
#if defined(sun) || defined(__sun)
|
|
|
|
#include <sys/byteorder.h>
|
|
|
|
#else
|
2012-05-28 14:49:29 +00:00
|
|
|
#include <byteswap.h>
|
2012-05-31 12:31:34 +00:00
|
|
|
#endif
|
2012-05-28 14:49:29 +00:00
|
|
|
#include <utils.h>
|
|
|
|
#include <pcompress.h>
|
|
|
|
#include <allocator.h>
|
|
|
|
|
2012-09-27 16:59:08 +00:00
|
|
|
#define FIFTY_PCT(x) (((x)/10) * 5)
|
2012-11-04 15:43:26 +00:00
|
|
|
#define FORTY_PCT(x) (((x)/10) * 4)
|
|
|
|
#define ONE_PCT(x) ((x)/100)
|
2012-09-27 16:59:08 +00:00
|
|
|
|
2012-05-31 16:06:33 +00:00
|
|
|
static unsigned int lzma_count = 0;
|
|
|
|
static unsigned int bzip2_count = 0;
|
2012-11-04 15:43:26 +00:00
|
|
|
static unsigned int bsc_count = 0;
|
2012-05-31 16:06:33 +00:00
|
|
|
static unsigned int ppmd_count = 0;
|
|
|
|
|
2012-12-09 04:45:06 +00:00
|
|
|
extern int lzma_compress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *destlen, int level, uchar_t chdr, void *data);
|
|
|
|
extern int bzip2_compress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *destlen, int level, uchar_t chdr, void *data);
|
|
|
|
extern int ppmd_compress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
|
|
|
extern int libbsc_compress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
2012-05-28 14:49:29 +00:00
|
|
|
|
2012-12-09 04:45:06 +00:00
|
|
|
extern int lzma_decompress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
|
|
|
extern int bzip2_decompress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
|
|
|
extern int ppmd_decompress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
|
|
|
extern int libbsc_decompress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
2012-05-28 14:49:29 +00:00
|
|
|
|
2012-12-27 17:36:48 +00:00
|
|
|
extern int lzma_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
2012-11-22 15:32:50 +00:00
|
|
|
int file_version, compress_op_t op);
|
2012-05-28 14:49:29 +00:00
|
|
|
extern int lzma_deinit(void **data);
|
2012-12-27 17:36:48 +00:00
|
|
|
extern int ppmd_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
2012-11-22 15:32:50 +00:00
|
|
|
int file_version, compress_op_t op);
|
2012-05-28 14:49:29 +00:00
|
|
|
extern int ppmd_deinit(void **data);
|
2012-12-27 17:36:48 +00:00
|
|
|
extern int libbsc_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
2012-11-22 15:32:50 +00:00
|
|
|
int file_version, compress_op_t op);
|
2012-11-04 15:43:26 +00:00
|
|
|
extern int libbsc_deinit(void **data);
|
2012-05-28 14:49:29 +00:00
|
|
|
|
|
|
|
struct adapt_data {
|
|
|
|
void *lzma_data;
|
|
|
|
void *ppmd_data;
|
2012-11-04 15:43:26 +00:00
|
|
|
void *bsc_data;
|
2012-05-28 14:49:29 +00:00
|
|
|
int adapt_mode;
|
|
|
|
};
|
|
|
|
|
2012-05-31 16:06:33 +00:00
|
|
|
void
|
|
|
|
adapt_stats(int show)
|
|
|
|
{
|
|
|
|
if (show) {
|
|
|
|
fprintf(stderr, "Adaptive mode stats:\n");
|
|
|
|
fprintf(stderr, " BZIP2 chunk count: %u\n", bzip2_count);
|
2012-11-04 15:43:26 +00:00
|
|
|
fprintf(stderr, " LIBBSC chunk count: %u\n", bsc_count);
|
2012-05-31 16:06:33 +00:00
|
|
|
fprintf(stderr, " PPMd chunk count: %u\n", ppmd_count);
|
|
|
|
fprintf(stderr, " LZMA chunk count: %u\n\n", lzma_count);
|
|
|
|
}
|
|
|
|
lzma_count = 0;
|
|
|
|
bzip2_count = 0;
|
2012-11-04 15:43:26 +00:00
|
|
|
bsc_count = 0;
|
2012-05-31 16:06:33 +00:00
|
|
|
ppmd_count = 0;
|
|
|
|
}
|
|
|
|
|
2012-12-04 18:39:47 +00:00
|
|
|
void
|
2012-12-27 17:36:48 +00:00
|
|
|
adapt_props(algo_props_t *data, int level, uint64_t chunksize)
|
2012-12-04 18:39:47 +00:00
|
|
|
{
|
2012-12-13 15:48:16 +00:00
|
|
|
data->delta2_span = 200;
|
2013-01-14 07:50:07 +00:00
|
|
|
data->deltac_min_distance = EIGHTM;
|
2012-12-04 18:39:47 +00:00
|
|
|
}
|
|
|
|
|
2012-05-28 14:49:29 +00:00
|
|
|
int
|
2012-12-27 17:36:48 +00:00
|
|
|
adapt_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
2012-11-22 15:32:50 +00:00
|
|
|
int file_version, compress_op_t op)
|
2012-05-28 14:49:29 +00:00
|
|
|
{
|
|
|
|
struct adapt_data *adat = (struct adapt_data *)(*data);
|
2012-12-27 17:36:48 +00:00
|
|
|
int rv = 0;
|
2012-05-28 14:49:29 +00:00
|
|
|
|
|
|
|
if (!adat) {
|
|
|
|
adat = (struct adapt_data *)slab_alloc(NULL, sizeof (struct adapt_data));
|
|
|
|
adat->adapt_mode = 1;
|
2012-11-22 15:32:50 +00:00
|
|
|
rv = ppmd_init(&(adat->ppmd_data), level, nthreads, chunksize, file_version, op);
|
2012-08-10 04:45:20 +00:00
|
|
|
adat->lzma_data = NULL;
|
2012-11-04 15:43:26 +00:00
|
|
|
adat->bsc_data = NULL;
|
2012-05-28 14:49:29 +00:00
|
|
|
*data = adat;
|
|
|
|
if (*level > 9) *level = 9;
|
|
|
|
}
|
2012-05-31 16:06:33 +00:00
|
|
|
lzma_count = 0;
|
|
|
|
bzip2_count = 0;
|
|
|
|
ppmd_count = 0;
|
2012-11-04 15:43:26 +00:00
|
|
|
bsc_count = 0;
|
2012-05-28 14:49:29 +00:00
|
|
|
return (rv);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2012-12-27 17:36:48 +00:00
|
|
|
adapt2_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
2012-11-22 15:32:50 +00:00
|
|
|
int file_version, compress_op_t op)
|
2012-05-28 14:49:29 +00:00
|
|
|
{
|
|
|
|
struct adapt_data *adat = (struct adapt_data *)(*data);
|
2012-12-27 17:36:48 +00:00
|
|
|
int rv = 0, lv;
|
2012-05-28 14:49:29 +00:00
|
|
|
|
|
|
|
if (!adat) {
|
|
|
|
adat = (struct adapt_data *)slab_alloc(NULL, sizeof (struct adapt_data));
|
|
|
|
adat->adapt_mode = 2;
|
|
|
|
adat->ppmd_data = NULL;
|
2012-11-26 14:51:03 +00:00
|
|
|
adat->bsc_data = NULL;
|
2012-05-28 14:49:29 +00:00
|
|
|
lv = *level;
|
2012-11-22 15:32:50 +00:00
|
|
|
rv = ppmd_init(&(adat->ppmd_data), &lv, nthreads, chunksize, file_version, op);
|
2012-05-28 14:49:29 +00:00
|
|
|
lv = *level;
|
|
|
|
if (rv == 0)
|
2012-11-22 15:32:50 +00:00
|
|
|
rv = lzma_init(&(adat->lzma_data), &lv, nthreads, chunksize, file_version, op);
|
2012-11-04 15:43:26 +00:00
|
|
|
lv = *level;
|
2012-11-26 14:51:03 +00:00
|
|
|
#ifdef ENABLE_PC_LIBBSC
|
2012-11-04 15:43:26 +00:00
|
|
|
if (rv == 0)
|
2012-11-22 15:32:50 +00:00
|
|
|
rv = libbsc_init(&(adat->bsc_data), &lv, nthreads, chunksize, file_version, op);
|
2012-11-26 14:51:03 +00:00
|
|
|
#endif
|
2012-05-28 14:49:29 +00:00
|
|
|
*data = adat;
|
|
|
|
if (*level > 9) *level = 9;
|
|
|
|
}
|
2012-11-04 15:43:26 +00:00
|
|
|
lzma_count = 0;
|
|
|
|
bzip2_count = 0;
|
|
|
|
ppmd_count = 0;
|
|
|
|
bsc_count = 0;
|
2012-05-28 14:49:29 +00:00
|
|
|
return (rv);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
adapt_deinit(void **data)
|
|
|
|
{
|
|
|
|
struct adapt_data *adat = (struct adapt_data *)(*data);
|
2012-12-27 17:36:48 +00:00
|
|
|
int rv = 0;
|
2012-05-28 14:49:29 +00:00
|
|
|
|
|
|
|
if (adat) {
|
2012-08-10 04:45:20 +00:00
|
|
|
rv = ppmd_deinit(&(adat->ppmd_data));
|
|
|
|
if (adat->lzma_data)
|
|
|
|
rv += lzma_deinit(&(adat->lzma_data));
|
2012-05-28 14:49:29 +00:00
|
|
|
slab_free(NULL, adat);
|
|
|
|
*data = NULL;
|
|
|
|
}
|
|
|
|
return (rv);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2012-12-09 04:45:06 +00:00
|
|
|
adapt_compress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *dstlen, int level, uchar_t chdr, void *data)
|
2012-05-28 14:49:29 +00:00
|
|
|
{
|
|
|
|
struct adapt_data *adat = (struct adapt_data *)(data);
|
2012-09-26 14:17:32 +00:00
|
|
|
uchar_t *src1 = (uchar_t *)src;
|
2013-02-17 16:06:20 +00:00
|
|
|
uint64_t i, tot8b, tag1, tag2, tag3;
|
|
|
|
int rv = 0;
|
|
|
|
double tagcnt, pct_tag;
|
|
|
|
uchar_t cur_byte, prev_byte;
|
2012-05-28 14:49:29 +00:00
|
|
|
|
2012-09-26 14:17:32 +00:00
|
|
|
/*
|
2012-11-04 15:43:26 +00:00
|
|
|
* Count number of 8-bit binary bytes and XML tags in source.
|
2012-09-26 14:17:32 +00:00
|
|
|
*/
|
2012-09-27 16:59:08 +00:00
|
|
|
tot8b = 0;
|
2013-02-17 16:06:20 +00:00
|
|
|
tag1 = 0;
|
|
|
|
tag2 = 0;
|
|
|
|
tag3 = 0;
|
|
|
|
prev_byte = cur_byte = 0;
|
2012-11-04 15:43:26 +00:00
|
|
|
for (i = 0; i < srclen; i++) {
|
2013-02-17 16:06:20 +00:00
|
|
|
cur_byte = src1[i];
|
|
|
|
tot8b += (cur_byte & 0x80); // This way for possible auto-vectorization
|
|
|
|
tag1 += (cur_byte == '<');
|
|
|
|
tag2 += (cur_byte == '>');
|
|
|
|
tag3 += ((prev_byte == '<') & (cur_byte == '/'));
|
|
|
|
tag3 += ((prev_byte == '/') & (cur_byte == '>'));
|
|
|
|
if (cur_byte != ' ')
|
|
|
|
prev_byte = cur_byte;
|
2012-11-04 15:43:26 +00:00
|
|
|
}
|
2012-09-26 14:17:32 +00:00
|
|
|
|
2013-01-23 15:28:39 +00:00
|
|
|
tot8b /= 0x80;
|
2013-02-17 16:06:20 +00:00
|
|
|
tagcnt = tag1 + tag2 + tag3;
|
|
|
|
pct_tag = tagcnt / (double)srclen;
|
|
|
|
|
2012-09-26 14:17:32 +00:00
|
|
|
/*
|
2012-09-27 16:59:08 +00:00
|
|
|
* Use PPMd if some percentage of source is 7-bit textual bytes, otherwise
|
2012-09-26 14:17:32 +00:00
|
|
|
* use Bzip2 or LZMA.
|
|
|
|
*/
|
2012-11-04 15:43:26 +00:00
|
|
|
if (adat->adapt_mode == 2 && tot8b > FORTY_PCT(srclen)) {
|
2012-09-27 16:59:08 +00:00
|
|
|
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data);
|
|
|
|
if (rv < 0)
|
|
|
|
return (rv);
|
2013-02-17 15:35:40 +00:00
|
|
|
rv = ADAPT_COMPRESS_LZMA;
|
2012-09-27 16:59:08 +00:00
|
|
|
lzma_count++;
|
|
|
|
|
|
|
|
} else if (adat->adapt_mode == 1 && tot8b > FIFTY_PCT(srclen)) {
|
|
|
|
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, NULL);
|
|
|
|
if (rv < 0)
|
|
|
|
return (rv);
|
2013-02-17 15:35:40 +00:00
|
|
|
rv = ADAPT_COMPRESS_BZIP2;
|
2012-09-27 16:59:08 +00:00
|
|
|
bzip2_count++;
|
|
|
|
|
2012-05-28 14:49:29 +00:00
|
|
|
} else {
|
2013-02-17 16:31:29 +00:00
|
|
|
#ifdef ENABLE_PC_LIBBSC
|
2013-02-17 16:06:20 +00:00
|
|
|
if (adat->bsc_data && tag1 > tag2 - 4 && tag1 < tag2 + 4 && tag3 > (double)tag1 * 0.40 &&
|
|
|
|
tagcnt > (double)srclen * 0.001) {
|
2012-11-04 15:43:26 +00:00
|
|
|
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data);
|
|
|
|
if (rv < 0)
|
|
|
|
return (rv);
|
2013-02-17 15:35:40 +00:00
|
|
|
rv = ADAPT_COMPRESS_BSC;
|
2012-11-04 15:43:26 +00:00
|
|
|
bsc_count++;
|
|
|
|
} else {
|
2013-02-17 16:31:29 +00:00
|
|
|
#endif
|
2012-11-04 15:43:26 +00:00
|
|
|
rv = ppmd_compress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data);
|
|
|
|
if (rv < 0)
|
|
|
|
return (rv);
|
2013-02-17 15:35:40 +00:00
|
|
|
rv = ADAPT_COMPRESS_PPMD;
|
2012-11-04 15:43:26 +00:00
|
|
|
ppmd_count++;
|
2013-02-17 16:31:29 +00:00
|
|
|
#ifdef ENABLE_PC_LIBBSC
|
2012-11-04 15:43:26 +00:00
|
|
|
}
|
2013-02-17 16:31:29 +00:00
|
|
|
#endif
|
2012-05-28 14:49:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (rv);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2012-12-09 04:45:06 +00:00
|
|
|
adapt_decompress(void *src, uint64_t srclen, void *dst,
|
|
|
|
uint64_t *dstlen, int level, uchar_t chdr, void *data)
|
2012-05-28 14:49:29 +00:00
|
|
|
{
|
|
|
|
struct adapt_data *adat = (struct adapt_data *)(data);
|
2012-08-10 05:17:11 +00:00
|
|
|
uchar_t cmp_flags;
|
2012-05-28 14:49:29 +00:00
|
|
|
|
2012-08-10 05:17:11 +00:00
|
|
|
cmp_flags = (chdr>>4) & CHDR_ALGO_MASK;
|
2012-05-28 14:49:29 +00:00
|
|
|
|
2013-02-17 15:35:40 +00:00
|
|
|
if (cmp_flags == ADAPT_COMPRESS_LZMA) {
|
2012-07-01 16:14:02 +00:00
|
|
|
return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data));
|
2012-05-28 14:49:29 +00:00
|
|
|
|
2013-02-17 15:35:40 +00:00
|
|
|
} else if (cmp_flags == ADAPT_COMPRESS_BZIP2) {
|
2012-07-01 16:14:02 +00:00
|
|
|
return (bzip2_decompress(src, srclen, dst, dstlen, level, chdr, NULL));
|
2012-05-28 14:49:29 +00:00
|
|
|
|
2013-02-17 15:35:40 +00:00
|
|
|
} else if (cmp_flags == ADAPT_COMPRESS_PPMD) {
|
2012-07-01 16:14:02 +00:00
|
|
|
return (ppmd_decompress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data));
|
2012-05-28 14:49:29 +00:00
|
|
|
|
2013-02-17 15:35:40 +00:00
|
|
|
} else if (cmp_flags == ADAPT_COMPRESS_BSC) {
|
2012-11-26 14:51:03 +00:00
|
|
|
#ifdef ENABLE_PC_LIBBSC
|
2012-11-04 15:43:26 +00:00
|
|
|
return (libbsc_decompress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data));
|
2012-11-26 14:51:03 +00:00
|
|
|
#else
|
|
|
|
fprintf(stderr, "Cannot decompress chunk. Libbsc support not present.\n");
|
|
|
|
return (-1);
|
|
|
|
#endif
|
2012-11-04 15:43:26 +00:00
|
|
|
|
2012-05-28 14:49:29 +00:00
|
|
|
} else {
|
2012-11-04 16:16:04 +00:00
|
|
|
fprintf(stderr, "Unrecognized compression mode: %d, file corrupt.\n", cmp_flags);
|
2012-05-28 14:49:29 +00:00
|
|
|
}
|
|
|
|
return (-1);
|
|
|
|
}
|