Leverage file type detection(archiver) to improve compression performance.
Use detected file/data type(archiver) for Adaptive compression modes. Update type flags and add more extensions.
This commit is contained in:
parent
b7facc929e
commit
cae9de9b2e
19 changed files with 340 additions and 239 deletions
|
@ -35,6 +35,7 @@
|
|||
#include <utils.h>
|
||||
#include <pcompress.h>
|
||||
#include <allocator.h>
|
||||
#include <pc_archive.h>
|
||||
|
||||
#define FIFTY_PCT(x) (((x)/10) * 5)
|
||||
#define FORTY_PCT(x) (((x)/10) * 4)
|
||||
|
@ -46,22 +47,22 @@ static unsigned int bsc_count = 0;
|
|||
static unsigned int ppmd_count = 0;
|
||||
|
||||
extern int lzma_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *destlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int bzip2_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *destlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int ppmd_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int libbsc_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
|
||||
extern int lzma_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int bzip2_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int ppmd_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int libbsc_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
|
||||
extern int lzma_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
||||
int file_version, compress_op_t op);
|
||||
|
@ -180,51 +181,63 @@ adapt_deinit(void **data)
|
|||
|
||||
int
|
||||
adapt_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data)
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
struct adapt_data *adat = (struct adapt_data *)(data);
|
||||
uchar_t *src1 = (uchar_t *)src;
|
||||
uint64_t i, tot8b, tag1, tag2, tag3;
|
||||
int rv = 0;
|
||||
double tagcnt, pct_tag;
|
||||
uchar_t cur_byte, prev_byte;
|
||||
|
||||
/*
|
||||
* Count number of 8-bit binary bytes and XML tags in source.
|
||||
*/
|
||||
tot8b = 0;
|
||||
tag1 = 0;
|
||||
tag2 = 0;
|
||||
tag3 = 0;
|
||||
prev_byte = cur_byte = 0;
|
||||
for (i = 0; i < srclen; i++) {
|
||||
cur_byte = src1[i];
|
||||
tot8b += (cur_byte & 0x80); // This way for possible auto-vectorization
|
||||
tag1 += (cur_byte == '<');
|
||||
tag2 += (cur_byte == '>');
|
||||
tag3 += ((prev_byte == '<') & (cur_byte == '/'));
|
||||
tag3 += ((prev_byte == '/') & (cur_byte == '>'));
|
||||
if (cur_byte != ' ')
|
||||
prev_byte = cur_byte;
|
||||
if (btype == TYPE_UNKNOWN) {
|
||||
uint64_t i, tot8b, tag1, tag2, tag3;
|
||||
double tagcnt, pct_tag;
|
||||
uchar_t cur_byte, prev_byte;
|
||||
/*
|
||||
* Count number of 8-bit binary bytes and XML tags in source.
|
||||
*/
|
||||
tot8b = 0;
|
||||
tag1 = 0;
|
||||
tag2 = 0;
|
||||
tag3 = 0;
|
||||
prev_byte = cur_byte = 0;
|
||||
for (i = 0; i < srclen; i++) {
|
||||
cur_byte = src1[i];
|
||||
tot8b += (cur_byte & 0x80); // This way for possible auto-vectorization
|
||||
tag1 += (cur_byte == '<');
|
||||
tag2 += (cur_byte == '>');
|
||||
tag3 += ((prev_byte == '<') & (cur_byte == '/'));
|
||||
tag3 += ((prev_byte == '/') & (cur_byte == '>'));
|
||||
if (cur_byte != ' ')
|
||||
prev_byte = cur_byte;
|
||||
}
|
||||
|
||||
tot8b /= 0x80;
|
||||
tagcnt = tag1 + tag2 + tag3;
|
||||
pct_tag = tagcnt / (double)srclen;
|
||||
if (adat->adapt_mode == 2 && tot8b > FORTY_PCT(srclen)) {
|
||||
btype = TYPE_BINARY;
|
||||
} else if (adat->adapt_mode == 1 && tot8b > FIFTY_PCT(srclen)) {
|
||||
btype = TYPE_BINARY;
|
||||
} else {
|
||||
btype = TYPE_TEXT;
|
||||
if (tag1 > tag2 - 4 && tag1 < tag2 + 4 && tag3 > (double)tag1 * 0.40 &&
|
||||
tagcnt > (double)srclen * 0.001)
|
||||
btype |= TYPE_MARKUP;
|
||||
}
|
||||
}
|
||||
|
||||
tot8b /= 0x80;
|
||||
tagcnt = tag1 + tag2 + tag3;
|
||||
pct_tag = tagcnt / (double)srclen;
|
||||
|
||||
/*
|
||||
* Use PPMd if some percentage of source is 7-bit textual bytes, otherwise
|
||||
* use Bzip2 or LZMA.
|
||||
*/
|
||||
if (adat->adapt_mode == 2 && tot8b > FORTY_PCT(srclen)) {
|
||||
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data);
|
||||
if (adat->adapt_mode == 2 && (btype & TYPE_BINARY)) {
|
||||
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = ADAPT_COMPRESS_LZMA;
|
||||
lzma_count++;
|
||||
|
||||
} else if (adat->adapt_mode == 1 && tot8b > FIFTY_PCT(srclen)) {
|
||||
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, NULL);
|
||||
} else if (adat->adapt_mode == 1 && (btype & TYPE_BINARY)) {
|
||||
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = ADAPT_COMPRESS_BZIP2;
|
||||
|
@ -232,16 +245,15 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
|||
|
||||
} else {
|
||||
#ifdef ENABLE_PC_LIBBSC
|
||||
if (adat->bsc_data && tag1 > tag2 - 4 && tag1 < tag2 + 4 && tag3 > (double)tag1 * 0.40 &&
|
||||
tagcnt > (double)srclen * 0.001) {
|
||||
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data);
|
||||
if (adat->bsc_data && (btype & TYPE_MARKUP)) {
|
||||
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = ADAPT_COMPRESS_BSC;
|
||||
bsc_count++;
|
||||
} else {
|
||||
#endif
|
||||
rv = ppmd_compress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data);
|
||||
rv = ppmd_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->ppmd_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = ADAPT_COMPRESS_PPMD;
|
||||
|
@ -256,7 +268,7 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
|||
|
||||
int
|
||||
adapt_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data)
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
struct adapt_data *adat = (struct adapt_data *)(data);
|
||||
uchar_t cmp_flags;
|
||||
|
@ -264,17 +276,17 @@ adapt_decompress(void *src, uint64_t srclen, void *dst,
|
|||
cmp_flags = (chdr>>4) & CHDR_ALGO_MASK;
|
||||
|
||||
if (cmp_flags == ADAPT_COMPRESS_LZMA) {
|
||||
return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data));
|
||||
return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data));
|
||||
|
||||
} else if (cmp_flags == ADAPT_COMPRESS_BZIP2) {
|
||||
return (bzip2_decompress(src, srclen, dst, dstlen, level, chdr, NULL));
|
||||
return (bzip2_decompress(src, srclen, dst, dstlen, level, chdr, btype, NULL));
|
||||
|
||||
} else if (cmp_flags == ADAPT_COMPRESS_PPMD) {
|
||||
return (ppmd_decompress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data));
|
||||
return (ppmd_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->ppmd_data));
|
||||
|
||||
} else if (cmp_flags == ADAPT_COMPRESS_BSC) {
|
||||
#ifdef ENABLE_PC_LIBBSC
|
||||
return (libbsc_decompress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data));
|
||||
return (libbsc_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data));
|
||||
#else
|
||||
log_msg(LOG_ERR, 0, "Cannot decompress chunk. Libbsc support not present.\n");
|
||||
return (-1);
|
||||
|
|
|
@ -229,8 +229,6 @@ archiver_read(void *ctx, void *buf, uint64_t count)
|
|||
sem_post(&(pctx->write_sem));
|
||||
sem_wait(&(pctx->read_sem));
|
||||
pctx->arc_buf = NULL;
|
||||
if (pctx->btype == TYPE_UNKNOWN)
|
||||
pctx->btype = TYPE_GENERIC;
|
||||
return (pctx->arc_buf_pos);
|
||||
}
|
||||
|
||||
|
@ -1166,9 +1164,9 @@ init_archive_mod() {
|
|||
if (!inited) {
|
||||
int i, j;
|
||||
|
||||
exthtab = malloc(NUM_EXT * sizeof (struct ext_hash_entry));
|
||||
exthtab = malloc(PHASHNKEYS * sizeof (struct ext_hash_entry));
|
||||
if (exthtab != NULL) {
|
||||
for (i = 0; i < NUM_EXT; i++) {
|
||||
for (i = 0; i < PHASHNKEYS; i++) {
|
||||
uint64_t extnum;
|
||||
ub4 slot = phash(extlist[i].ext, extlist[i].len);
|
||||
extnum = 0;
|
||||
|
@ -1211,7 +1209,7 @@ detect_type_by_ext(char *path, int pathlen)
|
|||
if (len == 0) goto out; // If extension is empty give up
|
||||
ext = &path[i+1];
|
||||
slot = phash(ext, len);
|
||||
if (slot > NUM_EXT) goto out; // Extension maps outside hash table range, give up
|
||||
if (slot > PHASHNKEYS) goto out; // Extension maps outside hash table range, give up
|
||||
extnum = 0;
|
||||
|
||||
/*
|
||||
|
@ -1244,15 +1242,15 @@ detect_type_by_data(uchar_t *buf, size_t len)
|
|||
if (len < 16) return (TYPE_UNKNOWN);
|
||||
|
||||
if (U32_P(buf) == ELFSHORT)
|
||||
return (TYPE_EXE); // Regular ELF
|
||||
return (TYPE_BINARY|TYPE_EXE); // Regular ELF
|
||||
if ((buf[0] == 'M' || buf[0] == 'L') && buf[1] == 'Z')
|
||||
return (TYPE_EXE); // MSDOS Exe
|
||||
return (TYPE_BINARY|TYPE_EXE); // MSDOS Exe
|
||||
if (buf[0] == 0xe9)
|
||||
return (TYPE_EXE); // MSDOS COM
|
||||
return (TYPE_BINARY|TYPE_EXE); // MSDOS COM
|
||||
if (U32_P(buf) == TZSHORT)
|
||||
return (TYPE_BINARY); // Timezone data
|
||||
return (TYPE_BINARY|TYPE_BINARY); // Timezone data
|
||||
if (U32_P(buf) == PPMSHORT)
|
||||
return (TYPE_COMPRESSED); // PPM Compressed archive
|
||||
return (TYPE_BINARY|TYPE_COMPRESSED); // PPM Compressed archive
|
||||
|
||||
return (TYPE_UNKNOWN);
|
||||
}
|
||||
|
|
|
@ -26,6 +26,9 @@
|
|||
#ifndef _ARCHIVE_H
|
||||
#define _ARCHIVE_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <pcompress.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -38,16 +41,6 @@ typedef struct {
|
|||
size_t size;
|
||||
} archive_list_entry_t;
|
||||
|
||||
typedef enum {
|
||||
TYPE_UNKNOWN = 0,
|
||||
TYPE_GENERIC,
|
||||
TYPE_COMPRESSED,
|
||||
TYPE_EXE,
|
||||
TYPE_TEXT,
|
||||
TYPE_BINARY,
|
||||
TYPE_JPEG
|
||||
} data_type_t;
|
||||
|
||||
/*
|
||||
* Archiving related functions.
|
||||
*/
|
||||
|
|
|
@ -95,7 +95,7 @@ bzerr(int err)
|
|||
|
||||
int
|
||||
bzip2_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
bz_stream bzs;
|
||||
int ret, ending;
|
||||
|
@ -164,7 +164,7 @@ bzip2_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
|
||||
int
|
||||
bzip2_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
bz_stream bzs;
|
||||
int ret;
|
||||
|
@ -174,6 +174,15 @@ bzip2_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
char *dst1 = (char *)dst;
|
||||
char *src1 = (char *)src;
|
||||
|
||||
if (btype & TYPE_COMPRESSED) {
|
||||
if ((btype & TYPE_COMPRESSED_LZW) != TYPE_COMPRESSED_LZW &&
|
||||
(btype & TYPE_COMPRESSED_GZ) != TYPE_COMPRESSED_GZ &&
|
||||
(btype & TYPE_COMPRESSED_LZ) != TYPE_COMPRESSED_LZ &&
|
||||
(btype & TYPE_COMPRESSED_LZO) != TYPE_COMPRESSED_LZO)
|
||||
{
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
bzs.bzalloc = slab_alloc_i;
|
||||
bzs.bzfree = slab_free;
|
||||
bzs.opaque = NULL;
|
||||
|
|
|
@ -148,11 +148,15 @@ libbsc_deinit(void **data)
|
|||
|
||||
int
|
||||
libbsc_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
int rv;
|
||||
struct libbsc_params *bscdat = (struct libbsc_params *)data;
|
||||
|
||||
if ((btype & TYPE_COMPRESSED_BZ2) == TYPE_COMPRESSED_BZ2 ||
|
||||
(btype & TYPE_COMPRESSED_LZMA) == TYPE_COMPRESSED_LZMA)
|
||||
return (-1);
|
||||
|
||||
rv = bsc_compress(src, dst, srclen, bscdat->lzpHashSize, bscdat->lzpMinLen,
|
||||
LIBBSC_BLOCKSORTER_BWT, bscdat->bscCoder, bscdat->features);
|
||||
if (rv < 0) {
|
||||
|
@ -165,7 +169,7 @@ libbsc_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
|
||||
int
|
||||
libbsc_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
int rv;
|
||||
struct libbsc_params *bscdat = (struct libbsc_params *)data;
|
||||
|
|
|
@ -99,7 +99,7 @@ lz4_deinit(void **data)
|
|||
|
||||
int
|
||||
lz4_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
int rv;
|
||||
struct lz4_params *lzdat = (struct lz4_params *)data;
|
||||
|
@ -135,7 +135,7 @@ lz4_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
|
||||
int
|
||||
lz4_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
int rv;
|
||||
struct lz4_params *lzdat = (struct lz4_params *)data;
|
||||
|
|
|
@ -104,7 +104,7 @@ lz_fx_err(int err)
|
|||
|
||||
int
|
||||
lz_fx_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
int rv;
|
||||
struct lzfx_params *lzdat = (struct lzfx_params *)data;
|
||||
|
@ -124,7 +124,7 @@ lz_fx_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
|
||||
int
|
||||
lz_fx_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
int rv;
|
||||
unsigned int _srclen = srclen;
|
||||
|
|
|
@ -199,7 +199,7 @@ lzerr(int err, int cmp)
|
|||
*/
|
||||
int
|
||||
lzma_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data)
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
uint64_t props_len = LZMA_PROPS_SIZE;
|
||||
SRes res;
|
||||
|
@ -210,6 +210,9 @@ lzma_compress(void *src, uint64_t srclen, void *dst,
|
|||
lzerr(SZ_ERROR_DESTLEN, 1);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if ((btype & TYPE_COMPRESSED_LZMA) == TYPE_COMPRESSED_LZMA)
|
||||
return (-1);
|
||||
props->level = level;
|
||||
|
||||
_dst = (Byte *)dst;
|
||||
|
@ -228,7 +231,7 @@ lzma_compress(void *src, uint64_t srclen, void *dst,
|
|||
|
||||
int
|
||||
lzma_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data)
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
uint64_t _srclen;
|
||||
const uchar_t *_src;
|
||||
|
|
|
@ -61,7 +61,7 @@ none_deinit(void **data)
|
|||
|
||||
int
|
||||
none_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
memcpy(dst, src, srclen);
|
||||
return (0);
|
||||
|
@ -69,7 +69,7 @@ none_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
|
||||
int
|
||||
none_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
memcpy(dst, src, srclen);
|
||||
return (0);
|
||||
|
|
44
pcompress.c
44
pcompress.c
|
@ -201,7 +201,7 @@ show_compression_stats(pc_ctx_t *pctx)
|
|||
*/
|
||||
static int
|
||||
preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t srclen,
|
||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, void *data, algo_props_t *props)
|
||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props)
|
||||
{
|
||||
uchar_t *dest = (uchar_t *)dst, type = 0;
|
||||
int64_t result;
|
||||
|
@ -247,7 +247,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
U64_P(dest + 1) = htonll(srclen);
|
||||
_dstlen = srclen;
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
result = cmp_func(src, srclen, dest+9, &_dstlen, level, chdr, data);
|
||||
result = cmp_func(src, srclen, dest+9, &_dstlen, level, chdr, btype, data);
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
|
||||
if (result > -1 && _dstlen < srclen) {
|
||||
|
@ -273,7 +273,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
|
||||
static int
|
||||
preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64_t srclen,
|
||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, void *data, algo_props_t *props)
|
||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props)
|
||||
{
|
||||
uchar_t *sorc = (uchar_t *)src, type;
|
||||
int64_t result;
|
||||
|
@ -288,7 +288,7 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
|
|||
sorc += 8;
|
||||
srclen -= 8;
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
result = dec_func(sorc, srclen, dst, dstlen, level, chdr, data);
|
||||
result = dec_func(sorc, srclen, dst, dstlen, level, chdr, btype, data);
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
|
||||
if (result < 0) return (result);
|
||||
|
@ -488,13 +488,13 @@ redo:
|
|||
if (HDR & COMPRESSED) {
|
||||
if (HDR & CHUNK_FLAG_PREPROC) {
|
||||
rv = preproc_decompress(pctx, tdat->decompress, cmpbuf, dedupe_data_sz_cmp,
|
||||
ubuf, &_chunksize, tdat->level, HDR, tdat->data, tdat->props);
|
||||
ubuf, &_chunksize, tdat->level, HDR, pctx->btype, tdat->data, tdat->props);
|
||||
} else {
|
||||
DEBUG_STAT_EN(double strt, en);
|
||||
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
rv = tdat->decompress(cmpbuf, dedupe_data_sz_cmp, ubuf, &_chunksize,
|
||||
tdat->level, HDR, tdat->data);
|
||||
tdat->level, HDR, pctx->btype, tdat->data);
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk %d decompression speed %.3f MB/s\n",
|
||||
tdat->id, get_mb_s(_chunksize, strt, en)));
|
||||
|
@ -516,7 +516,7 @@ redo:
|
|||
if (dedupe_index_sz >= 90 && dedupe_index_sz > dedupe_index_sz_cmp) {
|
||||
/* Index should be at least 90 bytes to have been compressed. */
|
||||
rv = lzma_decompress(cmpbuf, dedupe_index_sz_cmp, ubuf,
|
||||
&dedupe_index_sz, tdat->rctx->level, 0, tdat->rctx->lzma_data);
|
||||
&dedupe_index_sz, tdat->rctx->level, 0, TYPE_BINARY, tdat->rctx->lzma_data);
|
||||
} else {
|
||||
memcpy(ubuf, cmpbuf, dedupe_index_sz);
|
||||
}
|
||||
|
@ -531,14 +531,14 @@ redo:
|
|||
if (HDR & COMPRESSED) {
|
||||
if (HDR & CHUNK_FLAG_PREPROC) {
|
||||
rv = preproc_decompress(pctx, tdat->decompress, cseg, tdat->len_cmp,
|
||||
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, tdat->data,
|
||||
tdat->props);
|
||||
tdat->uncompressed_chunk, &_chunksize, tdat->level, HDR, pctx->btype,
|
||||
tdat->data, tdat->props);
|
||||
} else {
|
||||
DEBUG_STAT_EN(double strt, en);
|
||||
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
rv = tdat->decompress(cseg, tdat->len_cmp, tdat->uncompressed_chunk,
|
||||
&_chunksize, tdat->level, HDR, tdat->data);
|
||||
&_chunksize, tdat->level, HDR, pctx->btype, tdat->data);
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n",
|
||||
get_mb_s(_chunksize, strt, en)));
|
||||
|
@ -1520,7 +1520,8 @@ redo:
|
|||
/* Compress index if it is at least 90 bytes. */
|
||||
rv = lzma_compress(tdat->uncompressed_chunk + RABIN_HDR_SIZE,
|
||||
dedupe_index_sz, compressed_chunk + RABIN_HDR_SIZE,
|
||||
&index_size_cmp, tdat->rctx->level, 255, tdat->rctx->lzma_data);
|
||||
&index_size_cmp, tdat->rctx->level, 255, TYPE_BINARY,
|
||||
tdat->rctx->lzma_data);
|
||||
|
||||
/*
|
||||
* If index compression fails or does not produce a smaller result
|
||||
|
@ -1546,14 +1547,15 @@ plain_index:
|
|||
if ((pctx->lzp_preprocess || pctx->enable_delta2_encode) && _chunksize > 0) {
|
||||
rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz,
|
||||
_chunksize, compressed_chunk + index_size_cmp, &_chunksize,
|
||||
tdat->level, 0, tdat->data, tdat->props);
|
||||
tdat->level, 0, pctx->btype, tdat->data, tdat->props);
|
||||
|
||||
} else if (_chunksize > 0) {
|
||||
DEBUG_STAT_EN(double strt, en);
|
||||
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
rv = tdat->compress(tdat->uncompressed_chunk + dedupe_index_sz, _chunksize,
|
||||
compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, tdat->data);
|
||||
compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, pctx->btype,
|
||||
tdat->data);
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n",
|
||||
get_mb_s(_chunksize, strt, en)));
|
||||
|
@ -1576,14 +1578,14 @@ plain_index:
|
|||
if (pctx->lzp_preprocess || pctx->enable_delta2_encode) {
|
||||
rv = preproc_compress(pctx, tdat->compress,
|
||||
tdat->uncompressed_chunk, tdat->rbytes,
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data,
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, pctx->btype, tdat->data,
|
||||
tdat->props);
|
||||
} else {
|
||||
DEBUG_STAT_EN(double strt, en);
|
||||
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
rv = tdat->compress(tdat->uncompressed_chunk, tdat->rbytes,
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, tdat->data);
|
||||
compressed_chunk, &_chunksize, tdat->level, 0, pctx->btype, tdat->data);
|
||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n",
|
||||
get_mb_s(_chunksize, strt, en)));
|
||||
|
@ -2292,7 +2294,10 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
|
|||
rctx = create_dedupe_context(chunksize, 0, pctx->rab_blk_size, pctx->algo, &props,
|
||||
pctx->enable_delta_encode, pctx->enable_fixed_scan, VERSION, COMPRESS, 0, NULL,
|
||||
pctx->pipe_mode, nprocs);
|
||||
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx, pctx);
|
||||
if (pctx->archive_mode)
|
||||
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx, pctx);
|
||||
else
|
||||
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx, NULL);
|
||||
} else {
|
||||
if (pctx->archive_mode)
|
||||
rbytes = archiver_read(pctx, cread_buf, chunksize);
|
||||
|
@ -2405,7 +2410,12 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
|
|||
* buffer is in progress.
|
||||
*/
|
||||
if (pctx->enable_rabin_split) {
|
||||
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize, &rabin_count, rctx, pctx);
|
||||
if (pctx->archive_mode)
|
||||
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize,
|
||||
&rabin_count, rctx, pctx);
|
||||
else
|
||||
rbytes = Read_Adjusted(uncompfd, cread_buf, chunksize,
|
||||
&rabin_count, rctx, NULL);
|
||||
} else {
|
||||
if (pctx->archive_mode)
|
||||
rbytes = archiver_read(pctx, cread_buf, chunksize);
|
||||
|
|
36
pcompress.h
36
pcompress.h
|
@ -84,38 +84,38 @@ extern uint32_t zlib_buf_extra(uint64_t buflen);
|
|||
extern int lz4_buf_extra(uint64_t buflen);
|
||||
|
||||
extern int zlib_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *destlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int lzma_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *destlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int bzip2_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *destlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int adapt_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int ppmd_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int lz_fx_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int lz4_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int none_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
|
||||
extern int zlib_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int lzma_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int bzip2_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int adapt_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int ppmd_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int lz_fx_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int lz4_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int none_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
|
||||
extern int adapt_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
||||
int file_version, compress_op_t op);
|
||||
|
@ -165,9 +165,9 @@ extern void none_stats(int show);
|
|||
|
||||
#ifdef ENABLE_PC_LIBBSC
|
||||
extern int libbsc_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int libbsc_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data);
|
||||
extern int libbsc_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
||||
int file_version, compress_op_t op);
|
||||
extern void libbsc_props(algo_props_t *data, int level, uint64_t chunksize);
|
||||
|
|
|
@ -109,11 +109,13 @@ ppmd_deinit(void **data)
|
|||
|
||||
int
|
||||
ppmd_compress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data)
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
CPpmd8 *_ppmd = (CPpmd8 *)data;
|
||||
uchar_t *_src = (uchar_t *)src;
|
||||
|
||||
if (btype & TYPE_COMPRESSED)
|
||||
return (-1);
|
||||
Ppmd8_RangeEnc_Init(_ppmd);
|
||||
Ppmd8_Init(_ppmd, _ppmd->Order, PPMD8_RESTORE_METHOD_RESTART);
|
||||
_ppmd->buf = (Byte *)dst;
|
||||
|
@ -132,7 +134,7 @@ ppmd_compress(void *src, uint64_t srclen, void *dst,
|
|||
|
||||
int
|
||||
ppmd_decompress(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *dstlen, int level, uchar_t chdr, void *data)
|
||||
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
CPpmd8 *_ppmd = (CPpmd8 *)data;
|
||||
Byte *_src = (Byte *)src;
|
||||
|
|
|
@ -18,9 +18,9 @@ struct ext_entry {
|
|||
{"c++" , TYPE_TEXT, 3},
|
||||
{"hpp" , TYPE_TEXT, 3},
|
||||
{"txt" , TYPE_TEXT, 3},
|
||||
{"html" , TYPE_TEXT, 4},
|
||||
{"htm" , TYPE_TEXT, 3},
|
||||
{"xml" , TYPE_TEXT, 3},
|
||||
{"html" , TYPE_TEXT|TYPE_MARKUP, 4},
|
||||
{"htm" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"xml" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"info" , TYPE_TEXT, 4},
|
||||
{"ppm" , TYPE_TEXT, 3},
|
||||
{"svg" , TYPE_TEXT, 3},
|
||||
|
@ -44,18 +44,18 @@ struct ext_entry {
|
|||
{"java" , TYPE_TEXT, 4},
|
||||
{"m4" , TYPE_TEXT, 2},
|
||||
{"vb" , TYPE_TEXT, 2},
|
||||
{"xslt" , TYPE_TEXT, 4},
|
||||
{"xsl" , TYPE_TEXT, 3},
|
||||
{"xslt" , TYPE_TEXT|TYPE_MARKUP, 4},
|
||||
{"xsl" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"yacc" , TYPE_TEXT, 4},
|
||||
{"lex" , TYPE_TEXT, 3},
|
||||
{"csv" , TYPE_TEXT, 3},
|
||||
{"shtml" , TYPE_TEXT, 5},
|
||||
{"xhtml" , TYPE_TEXT, 5},
|
||||
{"xht" , TYPE_TEXT, 3},
|
||||
{"shtml" , TYPE_TEXT|TYPE_MARKUP, 5},
|
||||
{"xhtml" , TYPE_TEXT|TYPE_MARKUP, 5},
|
||||
{"xht" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"asp" , TYPE_TEXT, 3},
|
||||
{"aspx" , TYPE_TEXT, 4},
|
||||
{"rss" , TYPE_TEXT, 3},
|
||||
{"atom" , TYPE_TEXT, 4},
|
||||
{"rss" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"atom" , TYPE_TEXT|TYPE_MARKUP, 4},
|
||||
{"cgi" , TYPE_TEXT, 3},
|
||||
{"c#" , TYPE_TEXT, 2},
|
||||
{"cob" , TYPE_TEXT, 3},
|
||||
|
@ -67,8 +67,18 @@ struct ext_entry {
|
|||
{"ps" , TYPE_TEXT, 2},
|
||||
{"bib" , TYPE_TEXT, 3},
|
||||
{"lua" , TYPE_TEXT, 3},
|
||||
{"qml" , TYPE_TEXT, 3},
|
||||
{"qml" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"fa" , TYPE_TEXT, 2},
|
||||
{"faa" , TYPE_TEXT, 3},
|
||||
{"asn" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"ffn" , TYPE_TEXT, 3},
|
||||
{"fna" , TYPE_TEXT, 3},
|
||||
{"frn" , TYPE_TEXT, 3},
|
||||
{"gbk" , TYPE_TEXT, 3},
|
||||
{"gff" , TYPE_TEXT, 3},
|
||||
{"ptt" , TYPE_TEXT, 3},
|
||||
{"rnt" , TYPE_TEXT, 3},
|
||||
{"val" , TYPE_BINARY, 3},
|
||||
{"tcc" , TYPE_TEXT, 3},
|
||||
{"css" , TYPE_TEXT, 3},
|
||||
{"pod" , TYPE_TEXT, 3},
|
||||
|
@ -78,55 +88,61 @@ struct ext_entry {
|
|||
{"upp" , TYPE_TEXT, 3},
|
||||
{"mom" , TYPE_TEXT, 3},
|
||||
{"tmac" , TYPE_TEXT, 4},
|
||||
{"exe" , TYPE_EXE, 3},
|
||||
{"dll" , TYPE_EXE, 3},
|
||||
{"bin" , TYPE_EXE, 3},
|
||||
{"o" , TYPE_EXE, 1},
|
||||
{"a" , TYPE_EXE, 1},
|
||||
{"obj" , TYPE_EXE, 3},
|
||||
{"so" , TYPE_EXE, 2},
|
||||
{"com" , TYPE_EXE, 3},
|
||||
{"xpi" , TYPE_EXE, 3},
|
||||
{"off" , TYPE_EXE, 3},
|
||||
{"pdf" , TYPE_COMPRESSED, 3},
|
||||
{"jpg" , TYPE_JPEG, 3},
|
||||
{"jpeg" , TYPE_JPEG, 4},
|
||||
{"png" , TYPE_COMPRESSED, 3},
|
||||
{"mp3" , TYPE_COMPRESSED, 3},
|
||||
{"wma" , TYPE_COMPRESSED, 3},
|
||||
{"divx" , TYPE_COMPRESSED, 4},
|
||||
{"mp4" , TYPE_COMPRESSED, 3},
|
||||
{"aac" , TYPE_COMPRESSED, 3},
|
||||
{"m4a" , TYPE_COMPRESSED, 3},
|
||||
{"m4p" , TYPE_COMPRESSED, 3},
|
||||
{"ofs" , TYPE_COMPRESSED, 3},
|
||||
{"ofr" , TYPE_COMPRESSED, 3},
|
||||
{"flac" , TYPE_COMPRESSED, 4},
|
||||
{"pac" , TYPE_COMPRESSED, 3},
|
||||
{"gif" , TYPE_COMPRESSED, 3},
|
||||
{"jp2" , TYPE_JPEG, 3},
|
||||
{"gz" , TYPE_COMPRESSED, 2},
|
||||
{"bz2" , TYPE_COMPRESSED, 3},
|
||||
{"zip" , TYPE_COMPRESSED, 3},
|
||||
{"arj" , TYPE_COMPRESSED, 3},
|
||||
{"arc" , TYPE_COMPRESSED, 3},
|
||||
{"jar" , TYPE_COMPRESSED, 3},
|
||||
{"lz" , TYPE_COMPRESSED, 2},
|
||||
{"lzh" , TYPE_COMPRESSED, 3},
|
||||
{"lzma" , TYPE_COMPRESSED, 4},
|
||||
{"lzo" , TYPE_COMPRESSED, 3},
|
||||
{"dmg" , TYPE_COMPRESSED, 3},
|
||||
{"7z" , TYPE_COMPRESSED, 2},
|
||||
{"uha" , TYPE_COMPRESSED, 3},
|
||||
{"alz" , TYPE_COMPRESSED, 3},
|
||||
{"ace" , TYPE_COMPRESSED, 3},
|
||||
{"rar" , TYPE_COMPRESSED, 3},
|
||||
{"xz" , TYPE_COMPRESSED, 2},
|
||||
{"exe" , TYPE_BINARY|TYPE_EXE, 3},
|
||||
{"dll" , TYPE_BINARY|TYPE_EXE, 3},
|
||||
{"bin" , TYPE_BINARY|TYPE_EXE, 3},
|
||||
{"o" , TYPE_BINARY|TYPE_EXE, 1},
|
||||
{"a" , TYPE_BINARY|TYPE_EXE, 1},
|
||||
{"obj" , TYPE_BINARY|TYPE_EXE, 3},
|
||||
{"so" , TYPE_BINARY|TYPE_EXE, 2},
|
||||
{"com" , TYPE_BINARY|TYPE_EXE, 3},
|
||||
{"xpi" , TYPE_BINARY|TYPE_EXE, 3},
|
||||
{"off" , TYPE_BINARY|TYPE_EXE, 3},
|
||||
{"pdf" , TYPE_BINARY, 3},
|
||||
{"jpg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3},
|
||||
{"jpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 4},
|
||||
{"png" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
|
||||
{"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"divx" , TYPE_BINARY|TYPE_COMPRESSED, 4},
|
||||
{"mp4" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"aac" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"m4a" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"m4p" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"ofs" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"ofr" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"flac" , TYPE_BINARY|TYPE_COMPRESSED, 4},
|
||||
{"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"gif" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW, 3},
|
||||
{"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3},
|
||||
{"gz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 2},
|
||||
{"tgz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
|
||||
{"bz2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2, 3},
|
||||
{"tbz2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2, 4},
|
||||
{"zip" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP, 3},
|
||||
{"arj" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARJ, 3},
|
||||
{"arc" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARC, 3},
|
||||
{"jar" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
|
||||
{"lz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZ, 2},
|
||||
{"lzh" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH, 3},
|
||||
{"lha" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH, 3},
|
||||
{"lzma" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 4},
|
||||
{"lzo" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZO, 3},
|
||||
{"dmg" , TYPE_BINARY, 3},
|
||||
{"7z" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2},
|
||||
{"uha" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC, 3},
|
||||
{"alz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ALZ, 3},
|
||||
{"ace" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ACE, 3},
|
||||
{"rar" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR, 3},
|
||||
{"xz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2},
|
||||
{"txz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 3},
|
||||
{"pmd" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD, 3},
|
||||
{"zpaq" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ, 4},
|
||||
{"xcf" , TYPE_BINARY, 3},
|
||||
{"mo" , TYPE_BINARY, 2},
|
||||
{"bmp" , TYPE_BINARY, 3},
|
||||
{"pyo" , TYPE_BINARY, 3},
|
||||
{"pyc" , TYPE_BINARY, 3},
|
||||
{"wav" , TYPE_BINARY, 3},
|
||||
};
|
||||
#define NUM_EXT (116)
|
||||
#endif
|
||||
|
|
|
@ -5,9 +5,9 @@ cpp,TYPE_TEXT
|
|||
c++,TYPE_TEXT
|
||||
hpp,TYPE_TEXT
|
||||
txt,TYPE_TEXT
|
||||
html,TYPE_TEXT
|
||||
htm,TYPE_TEXT
|
||||
xml,TYPE_TEXT
|
||||
html,TYPE_TEXT|TYPE_MARKUP
|
||||
htm,TYPE_TEXT|TYPE_MARKUP
|
||||
xml,TYPE_TEXT|TYPE_MARKUP
|
||||
info,TYPE_TEXT
|
||||
ppm,TYPE_TEXT
|
||||
svg,TYPE_TEXT
|
||||
|
@ -31,18 +31,18 @@ go,TYPE_TEXT
|
|||
java,TYPE_TEXT
|
||||
m4,TYPE_TEXT
|
||||
vb,TYPE_TEXT
|
||||
xslt,TYPE_TEXT
|
||||
xsl,TYPE_TEXT
|
||||
xslt,TYPE_TEXT|TYPE_MARKUP
|
||||
xsl,TYPE_TEXT|TYPE_MARKUP
|
||||
yacc,TYPE_TEXT
|
||||
lex,TYPE_TEXT
|
||||
csv,TYPE_TEXT
|
||||
shtml,TYPE_TEXT
|
||||
xhtml,TYPE_TEXT
|
||||
xht,TYPE_TEXT
|
||||
shtml,TYPE_TEXT|TYPE_MARKUP
|
||||
xhtml,TYPE_TEXT|TYPE_MARKUP
|
||||
xht,TYPE_TEXT|TYPE_MARKUP
|
||||
asp,TYPE_TEXT
|
||||
aspx,TYPE_TEXT
|
||||
rss,TYPE_TEXT
|
||||
atom,TYPE_TEXT
|
||||
rss,TYPE_TEXT|TYPE_MARKUP
|
||||
atom,TYPE_TEXT|TYPE_MARKUP
|
||||
cgi,TYPE_TEXT
|
||||
c#,TYPE_TEXT
|
||||
cob,TYPE_TEXT
|
||||
|
@ -54,8 +54,21 @@ s,TYPE_TEXT
|
|||
ps,TYPE_TEXT
|
||||
bib,TYPE_TEXT
|
||||
lua,TYPE_TEXT
|
||||
qml,TYPE_TEXT
|
||||
qml,TYPE_TEXT|TYPE_MARKUP
|
||||
|
||||
# These are all genomic data file extensions
|
||||
fa,TYPE_TEXT
|
||||
faa,TYPE_TEXT
|
||||
asn,TYPE_TEXT|TYPE_MARKUP
|
||||
ffn,TYPE_TEXT
|
||||
fna,TYPE_TEXT
|
||||
frn,TYPE_TEXT
|
||||
gbk,TYPE_TEXT
|
||||
gff,TYPE_TEXT
|
||||
ptt,TYPE_TEXT
|
||||
rnt,TYPE_TEXT
|
||||
val,TYPE_BINARY
|
||||
|
||||
tcc,TYPE_TEXT
|
||||
css,TYPE_TEXT
|
||||
pod,TYPE_TEXT
|
||||
|
@ -65,52 +78,59 @@ am,TYPE_TEXT
|
|||
upp,TYPE_TEXT
|
||||
mom,TYPE_TEXT
|
||||
tmac,TYPE_TEXT
|
||||
exe,TYPE_EXE
|
||||
dll,TYPE_EXE
|
||||
bin,TYPE_EXE
|
||||
o,TYPE_EXE
|
||||
a,TYPE_EXE
|
||||
obj,TYPE_EXE
|
||||
so,TYPE_EXE
|
||||
com,TYPE_EXE
|
||||
xpi,TYPE_EXE
|
||||
off,TYPE_EXE
|
||||
pdf,TYPE_COMPRESSED
|
||||
jpg,TYPE_JPEG
|
||||
jpeg,TYPE_JPEG
|
||||
png,TYPE_COMPRESSED
|
||||
mp3,TYPE_COMPRESSED
|
||||
wma,TYPE_COMPRESSED
|
||||
divx,TYPE_COMPRESSED
|
||||
mp4,TYPE_COMPRESSED
|
||||
aac,TYPE_COMPRESSED
|
||||
m4a,TYPE_COMPRESSED
|
||||
m4p,TYPE_COMPRESSED
|
||||
ofs,TYPE_COMPRESSED
|
||||
ofr,TYPE_COMPRESSED
|
||||
flac,TYPE_COMPRESSED
|
||||
pac,TYPE_COMPRESSED
|
||||
gif,TYPE_COMPRESSED
|
||||
jp2,TYPE_JPEG
|
||||
gz,TYPE_COMPRESSED
|
||||
bz2,TYPE_COMPRESSED
|
||||
zip,TYPE_COMPRESSED
|
||||
arj,TYPE_COMPRESSED
|
||||
arc,TYPE_COMPRESSED
|
||||
jar,TYPE_COMPRESSED
|
||||
lz,TYPE_COMPRESSED
|
||||
lzh,TYPE_COMPRESSED
|
||||
lzma,TYPE_COMPRESSED
|
||||
lzo,TYPE_COMPRESSED
|
||||
dmg,TYPE_COMPRESSED
|
||||
7z,TYPE_COMPRESSED
|
||||
uha,TYPE_COMPRESSED
|
||||
alz,TYPE_COMPRESSED
|
||||
ace,TYPE_COMPRESSED
|
||||
rar,TYPE_COMPRESSED
|
||||
xz,TYPE_COMPRESSED
|
||||
exe,TYPE_BINARY|TYPE_EXE
|
||||
dll,TYPE_BINARY|TYPE_EXE
|
||||
bin,TYPE_BINARY|TYPE_EXE
|
||||
o,TYPE_BINARY|TYPE_EXE
|
||||
a,TYPE_BINARY|TYPE_EXE
|
||||
obj,TYPE_BINARY|TYPE_EXE
|
||||
so,TYPE_BINARY|TYPE_EXE
|
||||
com,TYPE_BINARY|TYPE_EXE
|
||||
xpi,TYPE_BINARY|TYPE_EXE
|
||||
off,TYPE_BINARY|TYPE_EXE
|
||||
pdf,TYPE_BINARY
|
||||
jpg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
|
||||
jpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
|
||||
png,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
|
||||
mp3,TYPE_BINARY|TYPE_COMPRESSED
|
||||
wma,TYPE_BINARY|TYPE_COMPRESSED
|
||||
divx,TYPE_BINARY|TYPE_COMPRESSED
|
||||
mp4,TYPE_BINARY|TYPE_COMPRESSED
|
||||
aac,TYPE_BINARY|TYPE_COMPRESSED
|
||||
m4a,TYPE_BINARY|TYPE_COMPRESSED
|
||||
m4p,TYPE_BINARY|TYPE_COMPRESSED
|
||||
ofs,TYPE_BINARY|TYPE_COMPRESSED
|
||||
ofr,TYPE_BINARY|TYPE_COMPRESSED
|
||||
flac,TYPE_BINARY|TYPE_COMPRESSED
|
||||
pac,TYPE_BINARY|TYPE_COMPRESSED
|
||||
gif,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW
|
||||
jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
|
||||
gz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
|
||||
tgz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
|
||||
bz2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2
|
||||
tbz2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2
|
||||
zip,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP
|
||||
arj,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARJ
|
||||
arc,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARC
|
||||
jar,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
|
||||
lz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZ
|
||||
lzh,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH
|
||||
lha,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH
|
||||
lzma,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
|
||||
lzo,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZO
|
||||
dmg,TYPE_BINARY
|
||||
7z,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
|
||||
uha,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC
|
||||
alz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ALZ
|
||||
ace,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ACE
|
||||
rar,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR
|
||||
xz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
|
||||
txz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
|
||||
pmd,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD
|
||||
zpaq,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ
|
||||
xcf,TYPE_BINARY
|
||||
mo,TYPE_BINARY
|
||||
bmp,TYPE_BINARY
|
||||
pyo,TYPE_BINARY
|
||||
pyc,TYPE_BINARY
|
||||
wav,TYPE_BINARY
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
count=`cat extensions.txt | wc -l`
|
||||
echo '
|
||||
/* Generated File. DO NOT EDIT. */
|
||||
/*
|
||||
|
@ -18,6 +17,9 @@ struct ext_entry {
|
|||
rm -f extlist
|
||||
cat extensions.txt | while read line
|
||||
do
|
||||
[ "x$line" = "x" ] && continue
|
||||
echo "$line" | egrep "^#" > /dev/null
|
||||
[ $? -eq 0 ] && continue
|
||||
_OIFS="$IFS"
|
||||
IFS=","
|
||||
set -- $line
|
||||
|
@ -30,7 +32,6 @@ do
|
|||
done
|
||||
|
||||
echo '};' >> extensions.h
|
||||
echo "#define NUM_EXT (${count})" >> extensions.h
|
||||
echo "#endif" >> extensions.h
|
||||
./perfect -nm < extlist
|
||||
rm -f extlist
|
||||
|
|
|
@ -12,17 +12,21 @@
|
|||
|
||||
/* small adjustments to _a_ to make values distinct */
|
||||
ub1 tab[] = {
|
||||
10,76,0,76,70,42,0,1,0,0,119,1,61,1,70,79,
|
||||
0,0,0,4,70,1,0,122,0,119,47,76,76,34,110,101,
|
||||
0,76,70,70,42,28,0,66,0,108,0,109,28,4,28,4,
|
||||
70,0,1,20,4,123,123,0,79,75,34,76,69,77,0,69,
|
||||
125,0,0,82,113,0,125,85,113,0,0,7,0,0,125,0,
|
||||
0,0,7,87,0,0,82,0,0,88,0,7,0,85,125,85,
|
||||
0,113,0,0,85,0,0,113,0,113,124,125,0,125,0,0,
|
||||
113,0,11,113,125,0,0,0,0,85,113,85,22,0,0,125,
|
||||
0,113,0,0,113,0,82,0,125,111,87,88,69,125,113,0,
|
||||
124,0,7,22,113,22,0,235,0,120,120,125,113,0,74,120,
|
||||
0,124,87,7,0,127,0,0,11,85,85,146,115,11,183,146,
|
||||
0,0,88,0,0,85,42,0,171,0,0,0,0,83,0,0,
|
||||
};
|
||||
|
||||
/* The hash function */
|
||||
ub4 phash(char *key, int len)
|
||||
{
|
||||
ub4 rsl, val = lookup(key, len, 0x9e3779b9);
|
||||
rsl = ((val>>26)^tab[val&0x3f]);
|
||||
rsl = ((val>>25)^tab[val&0x7f]);
|
||||
return rsl;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,9 +7,9 @@
|
|||
#define PHASH
|
||||
|
||||
extern ub1 tab[];
|
||||
#define PHASHLEN 0x40 /* length of hash mapping table */
|
||||
#define PHASHNKEYS 116 /* How many keys were hashed */
|
||||
#define PHASHRANGE 128 /* Range any input might map to */
|
||||
#define PHASHLEN 0x80 /* length of hash mapping table */
|
||||
#define PHASHNKEYS 133 /* How many keys were hashed */
|
||||
#define PHASHRANGE 256 /* Range any input might map to */
|
||||
#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */
|
||||
|
||||
ub4 phash();
|
||||
|
|
|
@ -228,6 +228,35 @@ struct fn_list {
|
|||
struct fn_list *next;
|
||||
};
|
||||
|
||||
/*
|
||||
* Enumerated type constants for file type identification in pc_archive.
|
||||
*/
|
||||
typedef enum {
|
||||
TYPE_UNKNOWN = 0,
|
||||
TYPE_TEXT = 1,
|
||||
TYPE_BINARY = 2,
|
||||
TYPE_COMPRESSED = 4,
|
||||
TYPE_EXE = 8,
|
||||
TYPE_JPEG = 12,
|
||||
TYPE_MARKUP = 16,
|
||||
TYPE_COMPRESSED_GZ = 20,
|
||||
TYPE_COMPRESSED_LZW = 24,
|
||||
TYPE_COMPRESSED_BZ2 = 28,
|
||||
TYPE_COMPRESSED_ZIP = 32,
|
||||
TYPE_COMPRESSED_ARJ = 36,
|
||||
TYPE_COMPRESSED_ARC = 40,
|
||||
TYPE_COMPRESSED_LH = 44,
|
||||
TYPE_COMPRESSED_LZMA = 48,
|
||||
TYPE_COMPRESSED_LZO = 52,
|
||||
TYPE_COMPRESSED_UHARC = 56,
|
||||
TYPE_COMPRESSED_ALZ = 60,
|
||||
TYPE_COMPRESSED_ACE = 64,
|
||||
TYPE_COMPRESSED_RAR = 68,
|
||||
TYPE_COMPRESSED_LZ = 72,
|
||||
TYPE_COMPRESSED_PPMD = 76,
|
||||
TYPE_COMPRESSED_ZPAQ = 80
|
||||
} data_type_t;
|
||||
|
||||
#ifndef _IN_UTILS_
|
||||
extern processor_info_t proc_info;
|
||||
#endif
|
||||
|
@ -254,7 +283,7 @@ extern char *get_temp_dir();
|
|||
|
||||
/* Pointer type for compress and decompress functions. */
|
||||
typedef int (*compress_func_ptr)(void *src, uint64_t srclen, void *dst,
|
||||
uint64_t *destlen, int level, uchar_t chdr, void *data);
|
||||
uint64_t *destlen, int level, uchar_t chdr, int btype, void *data);
|
||||
|
||||
typedef enum {
|
||||
COMPRESS,
|
||||
|
|
|
@ -142,7 +142,7 @@ void zerr(int ret, int cmp)
|
|||
|
||||
int
|
||||
zlib_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
int ret, ending;
|
||||
unsigned int slen, dlen;
|
||||
|
@ -205,7 +205,7 @@ zlib_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
|
||||
int
|
||||
zlib_decompress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
||||
int level, uchar_t chdr, void *data)
|
||||
int level, uchar_t chdr, int btype, void *data)
|
||||
{
|
||||
int err;
|
||||
unsigned int slen, dlen;
|
||||
|
|
Loading…
Reference in a new issue