Optimize preprocessed compression and avoid a bunch of memory copies.
Fix a crash. Add a few more file types. More comments.
This commit is contained in:
parent
664c8ef75b
commit
1e2c3e479a
11 changed files with 110 additions and 50 deletions
|
@ -130,8 +130,14 @@ adapt_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
|||
adat = (struct adapt_data *)slab_alloc(NULL, sizeof (struct adapt_data));
|
||||
adat->adapt_mode = 1;
|
||||
rv = ppmd_init(&(adat->ppmd_data), level, nthreads, chunksize, file_version, op);
|
||||
|
||||
/*
|
||||
* LZ4 is used to tackle some embedded archive headers and/or zero paddings in
|
||||
* otherwise incompressible data. So we always use it at the lowest and fastest
|
||||
* compression level.
|
||||
*/
|
||||
if (rv == 0)
|
||||
rv = lz4_init(&(adat->lz4_data), level, nthreads, chunksize, file_version, op);
|
||||
rv = lz4_init(&(adat->lz4_data), 1, nthreads, chunksize, file_version, op);
|
||||
adat->lzma_data = NULL;
|
||||
adat->bsc_data = NULL;
|
||||
*data = adat;
|
||||
|
@ -167,8 +173,13 @@ adapt2_init(void **data, int *level, int nthreads, uint64_t chunksize,
|
|||
if (rv == 0)
|
||||
rv = libbsc_init(&(adat->bsc_data), &lv, nthreads, chunksize, file_version, op);
|
||||
#endif
|
||||
/*
|
||||
* LZ4 is used to tackle some embedded archive headers and/or zero paddings in
|
||||
* otherwise incompressible data. So we always use it at the lowest and fastest
|
||||
* compression level.
|
||||
*/
|
||||
if (rv == 0)
|
||||
rv = lz4_init(&(adat->lz4_data), level, nthreads, chunksize, file_version, op);
|
||||
rv = lz4_init(&(adat->lz4_data), 1, nthreads, chunksize, file_version, op);
|
||||
*data = adat;
|
||||
if (*level > 9) *level = 9;
|
||||
}
|
||||
|
@ -304,7 +315,7 @@ adapt_decompress(void *src, uint64_t srclen, void *dst,
|
|||
cmp_flags = CHDR_ALGO(chdr);
|
||||
|
||||
if (cmp_flags == ADAPT_COMPRESS_LZ4) {
|
||||
return (lz4_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->lz4_data));
|
||||
return (lz4_decompress(src, srclen, dst, dstlen, 1, chdr, btype, adat->lz4_data));
|
||||
|
||||
} else if (cmp_flags == ADAPT_COMPRESS_LZMA) {
|
||||
return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data));
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
#include "pc_archive.h"
|
||||
|
||||
#define PACKJPG_DEF_BUFSIZ (512 * 1024)
|
||||
#define JPG_SIZE_LIMIT (50 * 1024 * 1024)
|
||||
#define JPG_SIZE_LIMIT (25 * 1024 * 1024)
|
||||
|
||||
struct packjpg_filter_data {
|
||||
uchar_t *buff, *in_buff;
|
||||
|
@ -136,7 +136,7 @@ packjpg_filter(struct filter_info *fi, void *filter_private)
|
|||
{
|
||||
struct packjpg_filter_data *pjdat = (struct packjpg_filter_data *)filter_private;
|
||||
uchar_t *mapbuf, *out;
|
||||
size_t len, in_size = 0, len1;
|
||||
uint64_t len, in_size = 0, len1;
|
||||
|
||||
len = archive_entry_size(fi->entry);
|
||||
len1 = len;
|
||||
|
@ -157,9 +157,7 @@ packjpg_filter(struct filter_info *fi, void *filter_private)
|
|||
munmap(mapbuf, len);
|
||||
return (FILTER_RETURN_SKIP);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
/*
|
||||
* Allocate input buffer and read archive data stream for the entry
|
||||
* into this buffer.
|
||||
|
|
|
@ -1422,6 +1422,12 @@ out:
|
|||
|
||||
/* PPMZ packed into 32-bit integer. */
|
||||
# define PPMINT (0x50504d5aU)
|
||||
|
||||
/* wvpk packed into 32-bit integer. */
|
||||
# define WVPK (0x7776706b)
|
||||
|
||||
/* TTA1 packed into 32-bit integer. */
|
||||
# define TTA1 (0x54544131)
|
||||
#else
|
||||
/* 0x7fELF packed into 32-bit integer. */
|
||||
# define ELFINT (0x464c457fU)
|
||||
|
@ -1431,6 +1437,12 @@ out:
|
|||
|
||||
/* PPMZ packed into 32-bit integer. */
|
||||
# define PPMINT (0x5a4d5050U)
|
||||
|
||||
/* wvpk packed into 32-bit integer. */
|
||||
# define WVPK (0x6b707677)
|
||||
|
||||
/* TTA1 packed into 32-bit integer. */
|
||||
# define TTA1 (0x31415454)
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -1452,6 +1464,8 @@ detect_type_by_data(uchar_t *buf, size_t len)
|
|||
return (TYPE_BINARY); // Timezone data
|
||||
if (U32_P(buf) == PPMINT)
|
||||
return (TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD); // PPM Compressed archive
|
||||
if (U32_P(buf) == WVPK || U32_P(buf) == TTA1)
|
||||
return (TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED);
|
||||
|
||||
return (TYPE_UNKNOWN);
|
||||
}
|
||||
|
|
66
pcompress.c
66
pcompress.c
|
@ -205,44 +205,57 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
{
|
||||
uchar_t *dest = (uchar_t *)dst, type = 0;
|
||||
int64_t result;
|
||||
uint64_t _dstlen;
|
||||
uint64_t _dstlen, fromlen;
|
||||
uchar_t *from, *to;
|
||||
DEBUG_STAT_EN(double strt, en);
|
||||
|
||||
_dstlen = *dstlen;
|
||||
from = src;
|
||||
to = dst;
|
||||
fromlen = srclen;
|
||||
result = 0;
|
||||
|
||||
if (pctx->lzp_preprocess) {
|
||||
int hashsize;
|
||||
|
||||
hashsize = lzp_hash_size(level);
|
||||
result = lzp_compress((const uchar_t *)src, (uchar_t *)dst, srclen,
|
||||
result = lzp_compress((const uchar_t *)from, to, fromlen,
|
||||
hashsize, LZP_DEFAULT_LZPMINLEN, 0);
|
||||
if (result < 0 || result == srclen) {
|
||||
if (!pctx->enable_delta2_encode)
|
||||
return (-1);
|
||||
} else {
|
||||
if (result >= 0 && result < srclen) {
|
||||
uchar_t *tmp;
|
||||
tmp = from;
|
||||
from = to;
|
||||
to = tmp;
|
||||
fromlen = result;
|
||||
type |= PREPROC_TYPE_LZP;
|
||||
srclen = result;
|
||||
memcpy(src, dst, srclen);
|
||||
}
|
||||
|
||||
} else if (!pctx->enable_delta2_encode) {
|
||||
/*
|
||||
* Execution won't come here but just in case ...
|
||||
*/
|
||||
log_msg(LOG_ERR, 0, "Invalid preprocessing mode");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (pctx->enable_delta2_encode && props->delta2_span > 0) {
|
||||
_dstlen = srclen;
|
||||
result = delta2_encode((uchar_t *)src, srclen, (uchar_t *)dst,
|
||||
_dstlen = fromlen;
|
||||
result = delta2_encode((uchar_t *)from, fromlen, to,
|
||||
&_dstlen, props->delta2_span);
|
||||
if (result != -1) {
|
||||
memcpy(src, dst, _dstlen);
|
||||
srclen = _dstlen;
|
||||
uchar_t *tmp;
|
||||
tmp = from;
|
||||
from = to;
|
||||
to = tmp;
|
||||
fromlen = _dstlen;
|
||||
type |= PREPROC_TYPE_DELTA2;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check which is the resulting buffer. If Encoded data is already sitting
|
||||
* in src buffer then a memcpy() is not needed.
|
||||
* Note that from,to ptrs are swapped after every encoding stage. So if
|
||||
* from == dst, it means that encoded data is in dst.
|
||||
*/
|
||||
if (from == dst) {
|
||||
memcpy(src, dst, fromlen);
|
||||
}
|
||||
srclen = fromlen;
|
||||
|
||||
*dest = type;
|
||||
U64_P(dest + 1) = htonll(srclen);
|
||||
_dstlen = srclen;
|
||||
|
@ -305,7 +318,7 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
|
|||
memcpy(src, dst, _dstlen);
|
||||
srclen = _dstlen;
|
||||
*dstlen = _dstlen;
|
||||
} else {
|
||||
} else {
|
||||
return (result);
|
||||
}
|
||||
}
|
||||
|
@ -819,9 +832,9 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
|
|||
err = 1;
|
||||
goto uncomp_done;
|
||||
}
|
||||
to_filename = (char *)origf;
|
||||
}
|
||||
|
||||
|
||||
compressed_chunksize = chunksize + CHUNK_HDR_SZ + zlib_buf_extra(chunksize);
|
||||
|
||||
if (pctx->_props_func) {
|
||||
|
@ -1546,7 +1559,7 @@ plain_index:
|
|||
/* Compress data chunk. */
|
||||
if (_chunksize == 0) {
|
||||
rv = -1;
|
||||
} else if ((pctx->lzp_preprocess || pctx->enable_delta2_encode)) {
|
||||
} else if (pctx->preprocess_mode) {
|
||||
rv = preproc_compress(pctx, tdat->compress,
|
||||
tdat->uncompressed_chunk + dedupe_index_sz, _chunksize,
|
||||
compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0,
|
||||
|
@ -1575,7 +1588,7 @@ plain_index:
|
|||
_chunksize += index_size_cmp;
|
||||
} else {
|
||||
_chunksize = tdat->rbytes;
|
||||
if (pctx->lzp_preprocess || pctx->enable_delta2_encode) {
|
||||
if (pctx->preprocess_mode) {
|
||||
rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk,
|
||||
tdat->rbytes, compressed_chunk, &_chunksize, tdat->level, 0,
|
||||
tdat->btype, tdat->data, tdat->props);
|
||||
|
@ -1639,7 +1652,7 @@ plain_index:
|
|||
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan) && tdat->rctx->valid) {
|
||||
type |= CHUNK_FLAG_DEDUP;
|
||||
}
|
||||
if (pctx->lzp_preprocess || pctx->enable_delta2_encode) {
|
||||
if (pctx->preprocess_mode) {
|
||||
type |= CHUNK_FLAG_PREPROC;
|
||||
}
|
||||
|
||||
|
@ -2689,6 +2702,7 @@ create_pc_context(void)
|
|||
ctx->rab_blk_size = -1;
|
||||
ctx->archive_temp_fd = -1;
|
||||
ctx->pagesize = sysconf(_SC_PAGE_SIZE);
|
||||
ctx->btype = TYPE_UNKNOWN;
|
||||
|
||||
return (ctx);
|
||||
}
|
||||
|
@ -3140,7 +3154,9 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
init_filters(&ff);
|
||||
pctx->enable_packjpg = ff.enable_packjpg;
|
||||
}
|
||||
|
||||
if (pctx->lzp_preprocess || pctx->enable_delta2_encode) {
|
||||
pctx->preprocess_mode = 1;
|
||||
}
|
||||
} else if (pctx->do_uncompress) {
|
||||
struct filter_flags ff;
|
||||
/*
|
||||
|
|
|
@ -58,7 +58,7 @@ extern "C" {
|
|||
#define CHUNK_FLAG_PREPROC 4
|
||||
#define COMP_EXTN ".pz"
|
||||
|
||||
#define PREPROC_TYPE_LZP 1
|
||||
#define PREPROC_TYPE_LZP 1
|
||||
#define PREPROC_TYPE_DELTA2 2
|
||||
#define PREPROC_COMPRESSED 128
|
||||
|
||||
|
@ -203,6 +203,7 @@ typedef struct pc_ctx {
|
|||
int enable_delta2_encode;
|
||||
int enable_rabin_split;
|
||||
int enable_fixed_scan;
|
||||
int preprocess_mode;
|
||||
int lzp_preprocess;
|
||||
int encrypt_type;
|
||||
int archive_mode;
|
||||
|
|
|
@ -68,11 +68,11 @@ struct ext_entry {
|
|||
{"bib" , TYPE_TEXT, 3},
|
||||
{"lua" , TYPE_TEXT, 3},
|
||||
{"qml" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"fa" , TYPE_TEXT, 2},
|
||||
{"fa" , TYPE_TEXT|TYPE_DNA_SEQ, 2},
|
||||
{"faa" , TYPE_TEXT, 3},
|
||||
{"asn" , TYPE_TEXT|TYPE_MARKUP, 3},
|
||||
{"ffn" , TYPE_TEXT, 3},
|
||||
{"fna" , TYPE_TEXT, 3},
|
||||
{"ffn" , TYPE_TEXT|TYPE_DNA_SEQ, 3},
|
||||
{"fna" , TYPE_TEXT|TYPE_DNA_SEQ, 3},
|
||||
{"frn" , TYPE_TEXT, 3},
|
||||
{"gbk" , TYPE_TEXT, 3},
|
||||
{"gff" , TYPE_TEXT, 3},
|
||||
|
@ -101,6 +101,7 @@ struct ext_entry {
|
|||
{"pdf" , TYPE_BINARY, 3},
|
||||
{"jpg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3},
|
||||
{"jpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 4},
|
||||
{"mjpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_MJPEG, 5},
|
||||
{"png" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
|
||||
{"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
{"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3},
|
||||
|
@ -145,5 +146,11 @@ struct ext_entry {
|
|||
{"pyo" , TYPE_BINARY, 3},
|
||||
{"pyc" , TYPE_BINARY, 3},
|
||||
{"wav" , TYPE_BINARY, 3},
|
||||
{"tta" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED, 3},
|
||||
{"wv" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED, 2},
|
||||
{"swf" , TYPE_BINARY, 3},
|
||||
{"SVGZ" , TYPE_BINARY, 4},
|
||||
{"ODT" , TYPE_BINARY, 3},
|
||||
{"3DM" , TYPE_BINARY, 3},
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -57,11 +57,11 @@ lua,TYPE_TEXT
|
|||
qml,TYPE_TEXT|TYPE_MARKUP
|
||||
|
||||
# These are all genomic data file extensions
|
||||
fa,TYPE_TEXT
|
||||
fa,TYPE_TEXT|TYPE_DNA_SEQ
|
||||
faa,TYPE_TEXT
|
||||
asn,TYPE_TEXT|TYPE_MARKUP
|
||||
ffn,TYPE_TEXT
|
||||
fna,TYPE_TEXT
|
||||
ffn,TYPE_TEXT|TYPE_DNA_SEQ
|
||||
fna,TYPE_TEXT|TYPE_DNA_SEQ
|
||||
frn,TYPE_TEXT
|
||||
gbk,TYPE_TEXT
|
||||
gff,TYPE_TEXT
|
||||
|
@ -91,6 +91,7 @@ off,TYPE_BINARY|TYPE_EXE
|
|||
pdf,TYPE_BINARY
|
||||
jpg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
|
||||
jpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
|
||||
mjpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_MJPEG
|
||||
png,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
|
||||
mp3,TYPE_BINARY|TYPE_COMPRESSED
|
||||
wma,TYPE_BINARY|TYPE_COMPRESSED
|
||||
|
@ -135,3 +136,9 @@ bmp,TYPE_BINARY
|
|||
pyo,TYPE_BINARY
|
||||
pyc,TYPE_BINARY
|
||||
wav,TYPE_BINARY
|
||||
tta,TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED
|
||||
wv,TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED
|
||||
swf,TYPE_BINARY
|
||||
SVGZ,TYPE_BINARY,TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
|
||||
ODT,TYPE_BINARY,TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP
|
||||
3DM,TYPE_BINARY
|
||||
|
|
|
@ -12,14 +12,14 @@
|
|||
|
||||
/* small adjustments to _a_ to make values distinct */
|
||||
ub1 tab[] = {
|
||||
125,0,0,82,113,0,125,85,113,0,0,7,0,0,125,0,
|
||||
0,0,7,87,0,0,82,0,0,125,0,7,0,85,125,85,
|
||||
0,113,0,0,85,0,0,113,0,113,124,0,0,125,0,0,
|
||||
113,0,11,113,125,7,0,0,0,85,113,85,22,0,0,125,
|
||||
0,113,0,0,113,0,82,0,125,111,87,124,69,183,113,0,
|
||||
124,0,7,22,120,22,0,127,0,120,51,125,0,0,74,120,
|
||||
0,124,87,113,0,127,0,0,11,85,85,146,69,11,183,146,
|
||||
0,0,88,0,0,85,42,0,171,0,0,0,0,83,0,0,
|
||||
125,0,0,220,235,125,82,0,113,0,0,7,0,0,82,0,
|
||||
0,0,7,124,0,0,82,0,0,125,0,7,0,220,125,120,
|
||||
0,0,0,0,22,0,0,113,0,113,113,0,0,125,85,0,
|
||||
113,0,11,113,125,7,0,0,0,40,0,113,85,0,0,125,
|
||||
0,113,0,0,113,0,125,183,40,27,7,15,58,183,113,0,
|
||||
124,0,0,22,125,220,0,40,0,87,87,125,113,0,183,125,
|
||||
0,125,87,7,0,85,0,0,59,229,85,7,135,116,0,146,
|
||||
0,0,82,0,0,0,200,0,56,125,0,0,61,202,0,0,
|
||||
};
|
||||
|
||||
/* The hash function */
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
extern ub1 tab[];
|
||||
#define PHASHLEN 0x80 /* length of hash mapping table */
|
||||
#define PHASHNKEYS 134 /* How many keys were hashed */
|
||||
#define PHASHNKEYS 141 /* How many keys were hashed */
|
||||
#define PHASHRANGE 256 /* Range any input might map to */
|
||||
#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */
|
||||
|
||||
|
|
|
@ -572,6 +572,6 @@ is_incompressible(int type)
|
|||
int ic = 0;
|
||||
int st = PC_SUBTYPE(type);
|
||||
|
||||
ic = (st == TYPE_JPEG) | (st == TYPE_PACKJPG);
|
||||
ic = (st == TYPE_JPEG) | (st == TYPE_PACKJPG) | (st == TYPE_AUDIO_COMPRESSED);
|
||||
return (ic);
|
||||
}
|
||||
|
|
|
@ -90,9 +90,11 @@ typedef int32_t bsize_t;
|
|||
# if !defined(sun) && !defined (__sun)
|
||||
# define LE64(x) __bswap_64(x)
|
||||
# define LE32(x) __bswap_32(x)
|
||||
# define LE16(x) __bswap_16(x)
|
||||
# else
|
||||
# define LE64(x) BSWAP_64(x)
|
||||
# define LE32(x) BSWAP_32(x)
|
||||
# define LE16(x) BSWAP_16(x)
|
||||
# endif
|
||||
#else
|
||||
# if !defined(sun) && !defined (__sun)
|
||||
|
@ -105,6 +107,7 @@ typedef int32_t bsize_t;
|
|||
# endif
|
||||
# define LE64(x) (x)
|
||||
# define LE32(x) (x)
|
||||
# define LE16(x) (x)
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -262,7 +265,10 @@ typedef enum {
|
|||
TYPE_COMPRESSED_LZ = 136,
|
||||
TYPE_COMPRESSED_PPMD = 144,
|
||||
TYPE_COMPRESSED_ZPAQ = 152,
|
||||
TYPE_PACKJPG = 160
|
||||
TYPE_PACKJPG = 160,
|
||||
TYPE_DNA_SEQ = 168,
|
||||
TYPE_MJPEG = 176,
|
||||
TYPE_AUDIO_COMPRESSED = 184
|
||||
} data_type_t;
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in a new issue