Leverage file type detection(archiver) to improve compression performance.

Use detected file/data type(archiver) for Adaptive compression modes.
Update type flags and add more extensions.
This commit is contained in:
Moinak Ghosh 2013-11-08 23:50:28 +05:30
parent b7facc929e
commit cae9de9b2e
19 changed files with 340 additions and 239 deletions

View file

@ -35,6 +35,7 @@
#include <utils.h>
#include <pcompress.h>
#include <allocator.h>
#include <pc_archive.h>
#define FIFTY_PCT(x) (((x)/10) * 5)
#define FORTY_PCT(x) (((x)/10) * 4)
@ -46,22 +47,22 @@ static unsigned int bsc_count = 0;
static unsigned int ppmd_count = 0;
extern int lzma_compress(void *src, uint64_t srclen, void *dst,
uint64_t *destlen, int level, uchar_t chdr, void *data);
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
extern int bzip2_compress(void *src, uint64_t srclen, void *dst,
uint64_t *destlen, int level, uchar_t chdr, void *data);
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
extern int ppmd_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int libbsc_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int lzma_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int bzip2_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int ppmd_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int libbsc_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int lzma_init(void **data, int *level, int nthreads, uint64_t chunksize,
int file_version, compress_op_t op);
@ -180,51 +181,63 @@ adapt_deinit(void **data)
int
adapt_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data)
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
{
struct adapt_data *adat = (struct adapt_data *)(data);
uchar_t *src1 = (uchar_t *)src;
uint64_t i, tot8b, tag1, tag2, tag3;
int rv = 0;
double tagcnt, pct_tag;
uchar_t cur_byte, prev_byte;
/*
* Count number of 8-bit binary bytes and XML tags in source.
*/
tot8b = 0;
tag1 = 0;
tag2 = 0;
tag3 = 0;
prev_byte = cur_byte = 0;
for (i = 0; i < srclen; i++) {
cur_byte = src1[i];
tot8b += (cur_byte & 0x80); // This way for possible auto-vectorization
tag1 += (cur_byte == '<');
tag2 += (cur_byte == '>');
tag3 += ((prev_byte == '<') & (cur_byte == '/'));
tag3 += ((prev_byte == '/') & (cur_byte == '>'));
if (cur_byte != ' ')
prev_byte = cur_byte;
if (btype == TYPE_UNKNOWN) {
uint64_t i, tot8b, tag1, tag2, tag3;
double tagcnt, pct_tag;
uchar_t cur_byte, prev_byte;
/*
* Count number of 8-bit binary bytes and XML tags in source.
*/
tot8b = 0;
tag1 = 0;
tag2 = 0;
tag3 = 0;
prev_byte = cur_byte = 0;
for (i = 0; i < srclen; i++) {
cur_byte = src1[i];
tot8b += (cur_byte & 0x80); // This way for possible auto-vectorization
tag1 += (cur_byte == '<');
tag2 += (cur_byte == '>');
tag3 += ((prev_byte == '<') & (cur_byte == '/'));
tag3 += ((prev_byte == '/') & (cur_byte == '>'));
if (cur_byte != ' ')
prev_byte = cur_byte;
}
tot8b /= 0x80;
tagcnt = tag1 + tag2 + tag3;
pct_tag = tagcnt / (double)srclen;
if (adat->adapt_mode == 2 && tot8b > FORTY_PCT(srclen)) {
btype = TYPE_BINARY;
} else if (adat->adapt_mode == 1 && tot8b > FIFTY_PCT(srclen)) {
btype = TYPE_BINARY;
} else {
btype = TYPE_TEXT;
if (tag1 > tag2 - 4 && tag1 < tag2 + 4 && tag3 > (double)tag1 * 0.40 &&
tagcnt > (double)srclen * 0.001)
btype |= TYPE_MARKUP;
}
}
tot8b /= 0x80;
tagcnt = tag1 + tag2 + tag3;
pct_tag = tagcnt / (double)srclen;
/*
* Use PPMd if some percentage of source is 7-bit textual bytes, otherwise
* use Bzip2 or LZMA.
*/
if (adat->adapt_mode == 2 && tot8b > FORTY_PCT(srclen)) {
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data);
if (adat->adapt_mode == 2 && (btype & TYPE_BINARY)) {
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data);
if (rv < 0)
return (rv);
rv = ADAPT_COMPRESS_LZMA;
lzma_count++;
} else if (adat->adapt_mode == 1 && tot8b > FIFTY_PCT(srclen)) {
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, NULL);
} else if (adat->adapt_mode == 1 && (btype & TYPE_BINARY)) {
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL);
if (rv < 0)
return (rv);
rv = ADAPT_COMPRESS_BZIP2;
@ -232,16 +245,15 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
} else {
#ifdef ENABLE_PC_LIBBSC
if (adat->bsc_data && tag1 > tag2 - 4 && tag1 < tag2 + 4 && tag3 > (double)tag1 * 0.40 &&
tagcnt > (double)srclen * 0.001) {
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data);
if (adat->bsc_data && (btype & TYPE_MARKUP)) {
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data);
if (rv < 0)
return (rv);
rv = ADAPT_COMPRESS_BSC;
bsc_count++;
} else {
#endif
rv = ppmd_compress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data);
rv = ppmd_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->ppmd_data);
if (rv < 0)
return (rv);
rv = ADAPT_COMPRESS_PPMD;
@ -256,7 +268,7 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
int
adapt_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data)
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
{
struct adapt_data *adat = (struct adapt_data *)(data);
uchar_t cmp_flags;
@ -264,17 +276,17 @@ adapt_decompress(void *src, uint64_t srclen, void *dst,
cmp_flags = (chdr>>4) & CHDR_ALGO_MASK;
if (cmp_flags == ADAPT_COMPRESS_LZMA) {
return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data));
return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data));
} else if (cmp_flags == ADAPT_COMPRESS_BZIP2) {
return (bzip2_decompress(src, srclen, dst, dstlen, level, chdr, NULL));
return (bzip2_decompress(src, srclen, dst, dstlen, level, chdr, btype, NULL));
} else if (cmp_flags == ADAPT_COMPRESS_PPMD) {
return (ppmd_decompress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data));
return (ppmd_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->ppmd_data));
} else if (cmp_flags == ADAPT_COMPRESS_BSC) {
#ifdef ENABLE_PC_LIBBSC
return (libbsc_decompress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data));
return (libbsc_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data));
#else
log_msg(LOG_ERR, 0, "Cannot decompress chunk. Libbsc support not present.\n");
return (-1);

View file

@ -229,8 +229,6 @@ archiver_read(void *ctx, void *buf, uint64_t count)
sem_post(&(pctx->write_sem));
sem_wait(&(pctx->read_sem));
pctx->arc_buf = NULL;
if (pctx->btype == TYPE_UNKNOWN)
pctx->btype = TYPE_GENERIC;
return (pctx->arc_buf_pos);
}
@ -1166,9 +1164,9 @@ init_archive_mod() {
if (!inited) {
int i, j;
exthtab = malloc(NUM_EXT * sizeof (struct ext_hash_entry));
exthtab = malloc(PHASHNKEYS * sizeof (struct ext_hash_entry));
if (exthtab != NULL) {
for (i = 0; i < NUM_EXT; i++) {
for (i = 0; i < PHASHNKEYS; i++) {
uint64_t extnum;
ub4 slot = phash(extlist[i].ext, extlist[i].len);
extnum = 0;
@ -1211,7 +1209,7 @@ detect_type_by_ext(char *path, int pathlen)
if (len == 0) goto out; // If extension is empty give up
ext = &path[i+1];
slot = phash(ext, len);
if (slot > NUM_EXT) goto out; // Extension maps outside hash table range, give up
if (slot > PHASHNKEYS) goto out; // Extension maps outside hash table range, give up
extnum = 0;
/*
@ -1244,15 +1242,15 @@ detect_type_by_data(uchar_t *buf, size_t len)
if (len < 16) return (TYPE_UNKNOWN);
if (U32_P(buf) == ELFSHORT)
return (TYPE_EXE); // Regular ELF
return (TYPE_BINARY|TYPE_EXE); // Regular ELF
if ((buf[0] == 'M' || buf[0] == 'L') && buf[1] == 'Z')
return (TYPE_EXE); // MSDOS Exe
return (TYPE_BINARY|TYPE_EXE); // MSDOS Exe
if (buf[0] == 0xe9)
return (TYPE_EXE); // MSDOS COM
return (TYPE_BINARY|TYPE_EXE); // MSDOS COM
if (U32_P(buf) == TZSHORT)
return (TYPE_BINARY); // Timezone data
return (TYPE_BINARY|TYPE_BINARY); // Timezone data
if (U32_P(buf) == PPMSHORT)
return (TYPE_COMPRESSED); // PPM Compressed archive
return (TYPE_BINARY|TYPE_COMPRESSED); // PPM Compressed archive
return (TYPE_UNKNOWN);
}

View file

@ -26,6 +26,9 @@
#ifndef _ARCHIVE_H
#define _ARCHIVE_H
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <pcompress.h>
#ifdef __cplusplus
@ -38,16 +41,6 @@ typedef struct {
size_t size;
} archive_list_entry_t;
typedef enum {
TYPE_UNKNOWN = 0,
TYPE_GENERIC,
TYPE_COMPRESSED,
TYPE_EXE,
TYPE_TEXT,
TYPE_BINARY,
TYPE_JPEG
} data_type_t;
/*
* Archiving related functions.
*/

View file

@ -95,7 +95,7 @@ bzerr(int err)
int
bzip2_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
bz_stream bzs;
int ret, ending;
@ -164,7 +164,7 @@ bzip2_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int
bzip2_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
bz_stream bzs;
int ret;
@ -174,6 +174,15 @@ bzip2_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
char *dst1 = (char *)dst;
char *src1 = (char *)src;
if (btype & TYPE_COMPRESSED) {
if ((btype & TYPE_COMPRESSED_LZW) != TYPE_COMPRESSED_LZW &&
(btype & TYPE_COMPRESSED_GZ) != TYPE_COMPRESSED_GZ &&
(btype & TYPE_COMPRESSED_LZ) != TYPE_COMPRESSED_LZ &&
(btype & TYPE_COMPRESSED_LZO) != TYPE_COMPRESSED_LZO)
{
return (-1);
}
}
bzs.bzalloc = slab_alloc_i;
bzs.bzfree = slab_free;
bzs.opaque = NULL;

View file

@ -148,11 +148,15 @@ libbsc_deinit(void **data)
int
libbsc_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
int rv;
struct libbsc_params *bscdat = (struct libbsc_params *)data;
if ((btype & TYPE_COMPRESSED_BZ2) == TYPE_COMPRESSED_BZ2 ||
(btype & TYPE_COMPRESSED_LZMA) == TYPE_COMPRESSED_LZMA)
return (-1);
rv = bsc_compress(src, dst, srclen, bscdat->lzpHashSize, bscdat->lzpMinLen,
LIBBSC_BLOCKSORTER_BWT, bscdat->bscCoder, bscdat->features);
if (rv < 0) {
@ -165,7 +169,7 @@ libbsc_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int
libbsc_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
int rv;
struct libbsc_params *bscdat = (struct libbsc_params *)data;

View file

@ -99,7 +99,7 @@ lz4_deinit(void **data)
int
lz4_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
int rv;
struct lz4_params *lzdat = (struct lz4_params *)data;
@ -135,7 +135,7 @@ lz4_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int
lz4_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
int rv;
struct lz4_params *lzdat = (struct lz4_params *)data;

View file

@ -104,7 +104,7 @@ lz_fx_err(int err)
int
lz_fx_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
int rv;
struct lzfx_params *lzdat = (struct lzfx_params *)data;
@ -124,7 +124,7 @@ lz_fx_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int
lz_fx_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
int rv;
unsigned int _srclen = srclen;

View file

@ -199,7 +199,7 @@ lzerr(int err, int cmp)
*/
int
lzma_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data)
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
{
uint64_t props_len = LZMA_PROPS_SIZE;
SRes res;
@ -210,6 +210,9 @@ lzma_compress(void *src, uint64_t srclen, void *dst,
lzerr(SZ_ERROR_DESTLEN, 1);
return (-1);
}
if ((btype & TYPE_COMPRESSED_LZMA) == TYPE_COMPRESSED_LZMA)
return (-1);
props->level = level;
_dst = (Byte *)dst;
@ -228,7 +231,7 @@ lzma_compress(void *src, uint64_t srclen, void *dst,
int
lzma_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data)
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
{
uint64_t _srclen;
const uchar_t *_src;

View file

@ -61,7 +61,7 @@ none_deinit(void **data)
int
none_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
memcpy(dst, src, srclen);
return (0);
@ -69,7 +69,7 @@ none_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int
none_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
memcpy(dst, src, srclen);
return (0);

View file

@ -201,7 +201,7 @@ show_compression_stats(pc_ctx_t *pctx)
*/
static int
preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t srclen,
void *dst, uint64_t *dstlen, int level, uchar_t chdr, void *data, algo_props_t *props)
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props)
{
uchar_t *dest = (uchar_t *)dst, type = 0;
int64_t result;
@ -247,7 +247,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
U64_P(dest + 1) = htonll(srclen);
_dstlen = srclen;
DEBUG_STAT_EN(strt = get_wtime_millis());
result = cmp_func(src, srclen, dest+9, &_dstlen, level, chdr, data);
result = cmp_func(src, srclen, dest+9, &_dstlen, level, chdr, btype, data);
DEBUG_STAT_EN(en = get_wtime_millis());
if (result > -1 && _dstlen < srclen) {
@ -273,7 +273,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
static int
preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64_t srclen,
void *dst, uint64_t *dstlen, int level, uchar_t chdr, void *data, algo_props_t *props)
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props)
{
uchar_t *sorc = (uchar_t *)src, type;
int64_t result;
@ -288,7 +288,7 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
sorc += 8;
srclen -= 8;
DEBUG_STAT_EN(strt = get_wtime_millis());
result = dec_func(sorc, srclen, dst, dstlen, level, chdr, data);
result = dec_func(sorc, srclen, dst, dstlen, level, chdr, btype, data);
DEBUG_STAT_EN(en = get_wtime_millis());
if (result < 0) return (result);
@ -488,13 +488,13 @@ redo:
if (HDR & COMPRESSED) {
if (HDR & CHUNK_FLAG_PREPROC) {
rv = preproc_decompress(pctx, tdat->decompress, cmpbuf, dedupe_data_sz_cmp,
ubuf, &_chunksize, tdat->level, HDR, tdat->data, tdat->props);
ubuf, &_chunksize, tdat->level, HDR, pctx->btype, tdat->data, tdat->props);
} else {
DEBUG_STAT_EN(double strt, en);
DEBUG_STAT_EN(strt = get_wtime_millis());
rv = tdat->decompress(cmpbuf, dedupe_data_sz_cmp, ubuf, &_chunksize,
tdat->level, HDR, tdat->data);
tdat->level, HDR, pctx->btype, tdat->data);
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Chunk %d decompression speed %.3f MB/s\n",
tdat->id, get_mb_s(_chunksize, strt, en)));
@ -516,7 +516,7 @@ redo:
if (dedupe_index_sz >= 90 && dedupe_index_sz > dedupe_index_sz_cmp) {
/* Index should be at least 90 bytes to have been compressed. */
rv = lzma_decompress(cmpbuf, dedupe_index_sz_cmp, ubuf,
&dedupe_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data);
&dedupe_index_sz, tdat->rctx->level, 0, TYPE_BINARY, tdat->rctx->lzma_data);
} else {
memcpy(ubuf, cmpbuf, dedupe_index_sz);
}
@ -531,14 +531,14 @@ redo:
if (HDR & COMPRESSED) {
if (HDR & CHUNK_FLAG_PREPROC) {
rv = preproc_decompress(pctx, tdat->decompress, cseg, tdat->len_cmp,
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data,
tdat->props);
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, pctx->btype,
tdat->data, tdat->props);
} else {
DEBUG_STAT_EN(double strt, en);
DEBUG_STAT_EN(strt = get_wtime_millis());
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
&_chunksize, tdat->level, HDR, tdat->data);
&_chunksize, tdat->level, HDR, pctx->btype, tdat->data);
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n",
get_mb_s(_chunksize, strt, en)));
@ -1520,7 +1520,8 @@ redo:
/* Compress index if it is at least 90 bytes. */
rv = lzma_compress(tdat->uncompressed_chunk + RABIN_HDR_SIZE,
dedupe_index_sz, compressed_chunk + RABIN_HDR_SIZE,
&index_size_cmp, tdat->rctx->level, 255, tdat->rctx->lzma_data);
&index_size_cmp, tdat->rctx->level, 255, TYPE_BINARY,
tdat->rctx->lzma_data);
/*
* If index compression fails or does not produce a smaller result
@ -1546,14 +1547,15 @@ plain_index:
if ((pctx->lzp_preprocess || pctx->enable_delta2_encode) && _chunksize > 0) {
rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz,
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
tdat->level, 0, tdat->data, tdat->props);
tdat->level, 0, pctx->btype, tdat->data, tdat->props);
} else if (_chunksize > 0) {
DEBUG_STAT_EN(double strt, en);
DEBUG_STAT_EN(strt = get_wtime_millis());
rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz, _chunksize,
compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, tdat->data);
compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, pctx->btype,
tdat->data);
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n",
get_mb_s(_chunksize, strt, en)));
@ -1576,14 +1578,14 @@ plain_index:
if (pctx->lzp_preprocess || pctx->enable_delta2_encode) {
rv = preproc_compress(pctx, tdat->compress,
tdat->uncompressed_chunk, tdat->rbytes,
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data,
compressed_chunk, &_chunksize, tdat->level, 0, pctx->btype, tdat->data,
tdat->props);
} else {
DEBUG_STAT_EN(double strt, en);
DEBUG_STAT_EN(strt = get_wtime_millis());
rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes,
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
compressed_chunk, &_chunksize, tdat->level, 0, pctx->btype, tdat->data);
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n",
get_mb_s(_chunksize, strt, en)));
@ -2292,7 +2294,10 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
rctx = create_dedupe_context(chunksize, 0, pctx->rab_blk_size, pctx->algo, &props,
pctx->enable_delta_encode, pctx->enable_fixed_scan, VERSION, COMPRESS, 0, NULL,
pctx->pipe_mode, nprocs);
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx, pctx);
if (pctx->archive_mode)
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx, pctx);
else
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx, NULL);
} else {
if (pctx->archive_mode)
rbytes = archiver_read(pctx, cread_buf, chunksize);
@ -2405,7 +2410,12 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
* buffer is in progress.
*/
if (pctx->enable_rabin_split) {
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx, pctx);
if (pctx->archive_mode)
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize,
&rabin_count, rctx, pctx);
else
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize,
&rabin_count, rctx, NULL);
} else {
if (pctx->archive_mode)
rbytes = archiver_read(pctx, cread_buf, chunksize);

View file

@ -84,38 +84,38 @@ extern uint32_t zlib_buf_extra(uint64_t buflen);
extern int lz4_buf_extra(uint64_t buflen);
extern int zlib_compress(void *src, uint64_t srclen, void *dst,
uint64_t *destlen, int level, uchar_t chdr, void *data);
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
extern int lzma_compress(void *src, uint64_t srclen, void *dst,
uint64_t *destlen, int level, uchar_t chdr, void *data);
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
extern int bzip2_compress(void *src, uint64_t srclen, void *dst,
uint64_t *destlen, int level, uchar_t chdr, void *data);
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
extern int adapt_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int ppmd_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int lz_fx_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int lz4_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int none_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int zlib_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int lzma_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int bzip2_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int adapt_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int ppmd_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int lz_fx_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int lz4_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int none_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int adapt_init(void **data, int *level, int nthreads, uint64_t chunksize,
int file_version, compress_op_t op);
@ -165,9 +165,9 @@ extern void none_stats(int show);
#ifdef ENABLE_PC_LIBBSC
extern int libbsc_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int libbsc_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data);
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
extern int libbsc_init(void **data, int *level, int nthreads, uint64_t chunksize,
int file_version, compress_op_t op);
extern void libbsc_props(algo_props_t *data, int level, uint64_t chunksize);

View file

@ -109,11 +109,13 @@ ppmd_deinit(void **data)
int
ppmd_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data)
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
{
CPpmd8 *_ppmd = (CPpmd8 *)data;
uchar_t *_src = (uchar_t *)src;
if (btype & TYPE_COMPRESSED)
return (-1);
Ppmd8_RangeEnc_Init(_ppmd);
Ppmd8_Init(_ppmd, _ppmd->Order, PPMD8_RESTORE_METHOD_RESTART);
_ppmd->buf = (Byte *)dst;
@ -132,7 +134,7 @@ ppmd_compress(void *src, uint64_t srclen, void *dst,
int
ppmd_decompress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, void *data)
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
{
CPpmd8 *_ppmd = (CPpmd8 *)data;
Byte *_src = (Byte *)src;

View file

@ -18,9 +18,9 @@ struct ext_entry {
{"c++" , TYPE_TEXT, 3},
{"hpp" , TYPE_TEXT, 3},
{"txt" , TYPE_TEXT, 3},
{"html" , TYPE_TEXT, 4},
{"htm" , TYPE_TEXT, 3},
{"xml" , TYPE_TEXT, 3},
{"html" , TYPE_TEXT|TYPE_MARKUP, 4},
{"htm" , TYPE_TEXT|TYPE_MARKUP, 3},
{"xml" , TYPE_TEXT|TYPE_MARKUP, 3},
{"info" , TYPE_TEXT, 4},
{"ppm" , TYPE_TEXT, 3},
{"svg" , TYPE_TEXT, 3},
@ -44,18 +44,18 @@ struct ext_entry {
{"java" , TYPE_TEXT, 4},
{"m4" , TYPE_TEXT, 2},
{"vb" , TYPE_TEXT, 2},
{"xslt" , TYPE_TEXT, 4},
{"xsl" , TYPE_TEXT, 3},
{"xslt" , TYPE_TEXT|TYPE_MARKUP, 4},
{"xsl" , TYPE_TEXT|TYPE_MARKUP, 3},
{"yacc" , TYPE_TEXT, 4},
{"lex" , TYPE_TEXT, 3},
{"csv" , TYPE_TEXT, 3},
{"shtml" , TYPE_TEXT, 5},
{"xhtml" , TYPE_TEXT, 5},
{"xht" , TYPE_TEXT, 3},
{"shtml" , TYPE_TEXT|TYPE_MARKUP, 5},
{"xhtml" , TYPE_TEXT|TYPE_MARKUP, 5},
{"xht" , TYPE_TEXT|TYPE_MARKUP, 3},
{"asp" , TYPE_TEXT, 3},
{"aspx" , TYPE_TEXT, 4},
{"rss" , TYPE_TEXT, 3},
{"atom" , TYPE_TEXT, 4},
{"rss" , TYPE_TEXT|TYPE_MARKUP, 3},
{"atom" , TYPE_TEXT|TYPE_MARKUP, 4},
{"cgi" , TYPE_TEXT, 3},
{"c#" , TYPE_TEXT, 2},
{"cob" , TYPE_TEXT, 3},
@ -67,8 +67,18 @@ struct ext_entry {
{"ps" , TYPE_TEXT, 2},
{"bib" , TYPE_TEXT, 3},
{"lua" , TYPE_TEXT, 3},
{"qml" , TYPE_TEXT, 3},
{"qml" , TYPE_TEXT|TYPE_MARKUP, 3},
{"fa" , TYPE_TEXT, 2},
{"faa" , TYPE_TEXT, 3},
{"asn" , TYPE_TEXT|TYPE_MARKUP, 3},
{"ffn" , TYPE_TEXT, 3},
{"fna" , TYPE_TEXT, 3},
{"frn" , TYPE_TEXT, 3},
{"gbk" , TYPE_TEXT, 3},
{"gff" , TYPE_TEXT, 3},
{"ptt" , TYPE_TEXT, 3},
{"rnt" , TYPE_TEXT, 3},
{"val" , TYPE_BINARY, 3},
{"tcc" , TYPE_TEXT, 3},
{"css" , TYPE_TEXT, 3},
{"pod" , TYPE_TEXT, 3},
@ -78,55 +88,61 @@ struct ext_entry {
{"upp" , TYPE_TEXT, 3},
{"mom" , TYPE_TEXT, 3},
{"tmac" , TYPE_TEXT, 4},
{"exe" , TYPE_EXE, 3},
{"dll" , TYPE_EXE, 3},
{"bin" , TYPE_EXE, 3},
{"o" , TYPE_EXE, 1},
{"a" , TYPE_EXE, 1},
{"obj" , TYPE_EXE, 3},
{"so" , TYPE_EXE, 2},
{"com" , TYPE_EXE, 3},
{"xpi" , TYPE_EXE, 3},
{"off" , TYPE_EXE, 3},
{"pdf" , TYPE_COMPRESSED, 3},
{"jpg" , TYPE_JPEG, 3},
{"jpeg" , TYPE_JPEG, 4},
{"png" , TYPE_COMPRESSED, 3},
{"mp3" , TYPE_COMPRESSED, 3},
{"wma" , TYPE_COMPRESSED, 3},
{"divx" , TYPE_COMPRESSED, 4},
{"mp4" , TYPE_COMPRESSED, 3},
{"aac" , TYPE_COMPRESSED, 3},
{"m4a" , TYPE_COMPRESSED, 3},
{"m4p" , TYPE_COMPRESSED, 3},
{"ofs" , TYPE_COMPRESSED, 3},
{"ofr" , TYPE_COMPRESSED, 3},
{"flac" , TYPE_COMPRESSED, 4},
{"pac" , TYPE_COMPRESSED, 3},
{"gif" , TYPE_COMPRESSED, 3},
{"jp2" , TYPE_JPEG, 3},
{"gz" , TYPE_COMPRESSED, 2},
{"bz2" , TYPE_COMPRESSED, 3},
{"zip" , TYPE_COMPRESSED, 3},
{"arj" , TYPE_COMPRESSED, 3},
{"arc" , TYPE_COMPRESSED, 3},
{"jar" , TYPE_COMPRESSED, 3},
{"lz" , TYPE_COMPRESSED, 2},
{"lzh" , TYPE_COMPRESSED, 3},
{"lzma" , TYPE_COMPRESSED, 4},
{"lzo" , TYPE_COMPRESSED, 3},
{"dmg" , TYPE_COMPRESSED, 3},
{"7z" , TYPE_COMPRESSED, 2},
{"uha" , TYPE_COMPRESSED, 3},
{"alz" , TYPE_COMPRESSED, 3},
{"ace" , TYPE_COMPRESSED, 3},
{"rar" , TYPE_COMPRESSED, 3},
{"xz" , TYPE_COMPRESSED, 2},
{"exe" , TYPE_BINARY|TYPE_EXE, 3},
{"dll" , TYPE_BINARY|TYPE_EXE, 3},
{"bin" , TYPE_BINARY|TYPE_EXE, 3},
{"o" , TYPE_BINARY|TYPE_EXE, 1},
{"a" , TYPE_BINARY|TYPE_EXE, 1},
{"obj" , TYPE_BINARY|TYPE_EXE, 3},
{"so" , TYPE_BINARY|TYPE_EXE, 2},
{"com" , TYPE_BINARY|TYPE_EXE, 3},
{"xpi" , TYPE_BINARY|TYPE_EXE, 3},
{"off" , TYPE_BINARY|TYPE_EXE, 3},
{"pdf" , TYPE_BINARY, 3},
{"jpg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3},
{"jpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 4},
{"png" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
{"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"divx" , TYPE_BINARY|TYPE_COMPRESSED, 4},
{"mp4" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"aac" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"m4a" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"m4p" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"ofs" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"ofr" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"flac" , TYPE_BINARY|TYPE_COMPRESSED, 4},
{"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"gif" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW, 3},
{"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3},
{"gz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 2},
{"tgz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
{"bz2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2, 3},
{"tbz2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2, 4},
{"zip" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP, 3},
{"arj" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARJ, 3},
{"arc" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARC, 3},
{"jar" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
{"lz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZ, 2},
{"lzh" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH, 3},
{"lha" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH, 3},
{"lzma" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 4},
{"lzo" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZO, 3},
{"dmg" , TYPE_BINARY, 3},
{"7z" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2},
{"uha" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC, 3},
{"alz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ALZ, 3},
{"ace" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ACE, 3},
{"rar" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR, 3},
{"xz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2},
{"txz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 3},
{"pmd" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD, 3},
{"zpaq" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ, 4},
{"xcf" , TYPE_BINARY, 3},
{"mo" , TYPE_BINARY, 2},
{"bmp" , TYPE_BINARY, 3},
{"pyo" , TYPE_BINARY, 3},
{"pyc" , TYPE_BINARY, 3},
{"wav" , TYPE_BINARY, 3},
};
#define NUM_EXT (116)
#endif

View file

@ -5,9 +5,9 @@ cpp,TYPE_TEXT
c++,TYPE_TEXT
hpp,TYPE_TEXT
txt,TYPE_TEXT
html,TYPE_TEXT
htm,TYPE_TEXT
xml,TYPE_TEXT
html,TYPE_TEXT|TYPE_MARKUP
htm,TYPE_TEXT|TYPE_MARKUP
xml,TYPE_TEXT|TYPE_MARKUP
info,TYPE_TEXT
ppm,TYPE_TEXT
svg,TYPE_TEXT
@ -31,18 +31,18 @@ go,TYPE_TEXT
java,TYPE_TEXT
m4,TYPE_TEXT
vb,TYPE_TEXT
xslt,TYPE_TEXT
xsl,TYPE_TEXT
xslt,TYPE_TEXT|TYPE_MARKUP
xsl,TYPE_TEXT|TYPE_MARKUP
yacc,TYPE_TEXT
lex,TYPE_TEXT
csv,TYPE_TEXT
shtml,TYPE_TEXT
xhtml,TYPE_TEXT
xht,TYPE_TEXT
shtml,TYPE_TEXT|TYPE_MARKUP
xhtml,TYPE_TEXT|TYPE_MARKUP
xht,TYPE_TEXT|TYPE_MARKUP
asp,TYPE_TEXT
aspx,TYPE_TEXT
rss,TYPE_TEXT
atom,TYPE_TEXT
rss,TYPE_TEXT|TYPE_MARKUP
atom,TYPE_TEXT|TYPE_MARKUP
cgi,TYPE_TEXT
c#,TYPE_TEXT
cob,TYPE_TEXT
@ -54,8 +54,21 @@ s,TYPE_TEXT
ps,TYPE_TEXT
bib,TYPE_TEXT
lua,TYPE_TEXT
qml,TYPE_TEXT
qml,TYPE_TEXT|TYPE_MARKUP
# These are all genomic data file extensions
fa,TYPE_TEXT
faa,TYPE_TEXT
asn,TYPE_TEXT|TYPE_MARKUP
ffn,TYPE_TEXT
fna,TYPE_TEXT
frn,TYPE_TEXT
gbk,TYPE_TEXT
gff,TYPE_TEXT
ptt,TYPE_TEXT
rnt,TYPE_TEXT
val,TYPE_BINARY
tcc,TYPE_TEXT
css,TYPE_TEXT
pod,TYPE_TEXT
@ -65,52 +78,59 @@ am,TYPE_TEXT
upp,TYPE_TEXT
mom,TYPE_TEXT
tmac,TYPE_TEXT
exe,TYPE_EXE
dll,TYPE_EXE
bin,TYPE_EXE
o,TYPE_EXE
a,TYPE_EXE
obj,TYPE_EXE
so,TYPE_EXE
com,TYPE_EXE
xpi,TYPE_EXE
off,TYPE_EXE
pdf,TYPE_COMPRESSED
jpg,TYPE_JPEG
jpeg,TYPE_JPEG
png,TYPE_COMPRESSED
mp3,TYPE_COMPRESSED
wma,TYPE_COMPRESSED
divx,TYPE_COMPRESSED
mp4,TYPE_COMPRESSED
aac,TYPE_COMPRESSED
m4a,TYPE_COMPRESSED
m4p,TYPE_COMPRESSED
ofs,TYPE_COMPRESSED
ofr,TYPE_COMPRESSED
flac,TYPE_COMPRESSED
pac,TYPE_COMPRESSED
gif,TYPE_COMPRESSED
jp2,TYPE_JPEG
gz,TYPE_COMPRESSED
bz2,TYPE_COMPRESSED
zip,TYPE_COMPRESSED
arj,TYPE_COMPRESSED
arc,TYPE_COMPRESSED
jar,TYPE_COMPRESSED
lz,TYPE_COMPRESSED
lzh,TYPE_COMPRESSED
lzma,TYPE_COMPRESSED
lzo,TYPE_COMPRESSED
dmg,TYPE_COMPRESSED
7z,TYPE_COMPRESSED
uha,TYPE_COMPRESSED
alz,TYPE_COMPRESSED
ace,TYPE_COMPRESSED
rar,TYPE_COMPRESSED
xz,TYPE_COMPRESSED
exe,TYPE_BINARY|TYPE_EXE
dll,TYPE_BINARY|TYPE_EXE
bin,TYPE_BINARY|TYPE_EXE
o,TYPE_BINARY|TYPE_EXE
a,TYPE_BINARY|TYPE_EXE
obj,TYPE_BINARY|TYPE_EXE
so,TYPE_BINARY|TYPE_EXE
com,TYPE_BINARY|TYPE_EXE
xpi,TYPE_BINARY|TYPE_EXE
off,TYPE_BINARY|TYPE_EXE
pdf,TYPE_BINARY
jpg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
jpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
png,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
mp3,TYPE_BINARY|TYPE_COMPRESSED
wma,TYPE_BINARY|TYPE_COMPRESSED
divx,TYPE_BINARY|TYPE_COMPRESSED
mp4,TYPE_BINARY|TYPE_COMPRESSED
aac,TYPE_BINARY|TYPE_COMPRESSED
m4a,TYPE_BINARY|TYPE_COMPRESSED
m4p,TYPE_BINARY|TYPE_COMPRESSED
ofs,TYPE_BINARY|TYPE_COMPRESSED
ofr,TYPE_BINARY|TYPE_COMPRESSED
flac,TYPE_BINARY|TYPE_COMPRESSED
pac,TYPE_BINARY|TYPE_COMPRESSED
gif,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW
jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
gz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
tgz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
bz2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2
tbz2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2
zip,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP
arj,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARJ
arc,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARC
jar,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
lz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZ
lzh,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH
lha,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH
lzma,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
lzo,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZO
dmg,TYPE_BINARY
7z,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
uha,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC
alz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ALZ
ace,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ACE
rar,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR
xz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
txz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
pmd,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD
zpaq,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ
xcf,TYPE_BINARY
mo,TYPE_BINARY
bmp,TYPE_BINARY
pyo,TYPE_BINARY
pyc,TYPE_BINARY
wav,TYPE_BINARY

View file

@ -1,6 +1,5 @@
#!/bin/sh
count=`cat extensions.txt | wc -l`
echo '
/* Generated File. DO NOT EDIT. */
/*
@ -18,6 +17,9 @@ struct ext_entry {
rm -f extlist
cat extensions.txt | while read line
do
[ "x$line" = "x" ] && continue
echo "$line" | egrep "^#" > /dev/null
[ $? -eq 0 ] && continue
_OIFS="$IFS"
IFS=","
set -- $line
@ -30,7 +32,6 @@ do
done
echo '};' >> extensions.h
echo "#define NUM_EXT (${count})" >> extensions.h
echo "#endif" >> extensions.h
./perfect -nm < extlist
rm -f extlist

View file

@ -12,17 +12,21 @@
/* small adjustments to _a_ to make values distinct */
ub1 tab[] = {
10,76,0,76,70,42,0,1,0,0,119,1,61,1,70,79,
0,0,0,4,70,1,0,122,0,119,47,76,76,34,110,101,
0,76,70,70,42,28,0,66,0,108,0,109,28,4,28,4,
70,0,1,20,4,123,123,0,79,75,34,76,69,77,0,69,
125,0,0,82,113,0,125,85,113,0,0,7,0,0,125,0,
0,0,7,87,0,0,82,0,0,88,0,7,0,85,125,85,
0,113,0,0,85,0,0,113,0,113,124,125,0,125,0,0,
113,0,11,113,125,0,0,0,0,85,113,85,22,0,0,125,
0,113,0,0,113,0,82,0,125,111,87,88,69,125,113,0,
124,0,7,22,113,22,0,235,0,120,120,125,113,0,74,120,
0,124,87,7,0,127,0,0,11,85,85,146,115,11,183,146,
0,0,88,0,0,85,42,0,171,0,0,0,0,83,0,0,
};
/* The hash function */
ub4 phash(char *key, int len)
{
ub4 rsl, val = lookup(key, len, 0x9e3779b9);
rsl = ((val>>26)^tab[val&0x3f]);
rsl = ((val>>25)^tab[val&0x7f]);
return rsl;
}

View file

@ -7,9 +7,9 @@
#define PHASH
extern ub1 tab[];
#define PHASHLEN 0x40 /* length of hash mapping table */
#define PHASHNKEYS 116 /* How many keys were hashed */
#define PHASHRANGE 128 /* Range any input might map to */
#define PHASHLEN 0x80 /* length of hash mapping table */
#define PHASHNKEYS 133 /* How many keys were hashed */
#define PHASHRANGE 256 /* Range any input might map to */
#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */
ub4 phash();

View file

@ -228,6 +228,35 @@ struct fn_list {
struct fn_list *next;
};
/*
* Enumerated type constants for file type identification in pc_archive.
*/
typedef enum {
TYPE_UNKNOWN = 0,
TYPE_TEXT = 1,
TYPE_BINARY = 2,
TYPE_COMPRESSED = 4,
TYPE_EXE = 8,
TYPE_JPEG = 12,
TYPE_MARKUP = 16,
TYPE_COMPRESSED_GZ = 20,
TYPE_COMPRESSED_LZW = 24,
TYPE_COMPRESSED_BZ2 = 28,
TYPE_COMPRESSED_ZIP = 32,
TYPE_COMPRESSED_ARJ = 36,
TYPE_COMPRESSED_ARC = 40,
TYPE_COMPRESSED_LH = 44,
TYPE_COMPRESSED_LZMA = 48,
TYPE_COMPRESSED_LZO = 52,
TYPE_COMPRESSED_UHARC = 56,
TYPE_COMPRESSED_ALZ = 60,
TYPE_COMPRESSED_ACE = 64,
TYPE_COMPRESSED_RAR = 68,
TYPE_COMPRESSED_LZ = 72,
TYPE_COMPRESSED_PPMD = 76,
TYPE_COMPRESSED_ZPAQ = 80
} data_type_t;
#ifndef _IN_UTILS_
extern processor_info_t proc_info;
#endif
@ -254,7 +283,7 @@ extern char *get_temp_dir();
/* Pointer type for compress and decompress functions. */
typedef int (*compress_func_ptr)(void *src, uint64_t srclen, void *dst,
uint64_t *destlen, int level, uchar_t chdr, void *data);
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
typedef enum {
COMPRESS,

View file

@ -142,7 +142,7 @@ void zerr(int ret, int cmp)
int
zlib_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
int ret, ending;
unsigned int slen, dlen;
@ -205,7 +205,7 @@ zlib_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int
zlib_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
int level, uchar_t chdr, void *data)
int level, uchar_t chdr, int btype, void *data)
{
int err;
unsigned int slen, dlen;