Some fixes in the Dictionary preprocessor.
Fix checking of data type flags. Allow file-level filters to change output data type. Tweak analyzer threshold for markup type.
This commit is contained in:
parent
077da83d5d
commit
d5e1d2cdef
11 changed files with 23 additions and 14 deletions
|
@ -247,7 +247,7 @@ is_bsc_type(int btype)
|
|||
return ((stype == TYPE_BMP) | (stype == TYPE_DNA_SEQ) |
|
||||
(stype == TYPE_MP4) | (stype == TYPE_FLAC) | (stype == TYPE_AVI) |
|
||||
(stype == TYPE_DICOM) | (stype == TYPE_MEDIA_BSC) |
|
||||
(mtype == TYPE_TEXT && stype != TYPE_MARKUP));
|
||||
(mtype & TYPE_TEXT && stype != TYPE_MARKUP));
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -259,7 +259,7 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
|||
int stype = PC_SUBTYPE(btype);
|
||||
analyzer_ctx_t actx;
|
||||
|
||||
if (btype == TYPE_UNKNOWN || PC_TYPE(btype) == TYPE_TEXT ||
|
||||
if (btype == TYPE_UNKNOWN || PC_TYPE(btype) & TYPE_TEXT ||
|
||||
stype == TYPE_ARCHIVE_TAR || stype == TYPE_PDF) {
|
||||
if (adat->actx == NULL) {
|
||||
analyze_buffer(src, srclen, &actx);
|
||||
|
@ -292,14 +292,14 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
|||
rv = ADAPT_COMPRESS_LZ4;
|
||||
lz4_count++;
|
||||
|
||||
} else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) {
|
||||
} else if (adat->adapt_mode == 2 && PC_TYPE(btype) & TYPE_BINARY && !bsc_type) {
|
||||
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = ADAPT_COMPRESS_LZMA;
|
||||
lzma_count++;
|
||||
|
||||
} else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) {
|
||||
} else if (adat->adapt_mode == 1 && PC_TYPE(btype) & TYPE_BINARY && !bsc_type) {
|
||||
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
|
|
|
@ -81,16 +81,19 @@ add_filters_by_type(struct type_data *typetab, struct filter_flags *ff)
|
|||
typetab[slot].filter_private = sdat;
|
||||
typetab[slot].filter_func = packjpg_filter;
|
||||
typetab[slot].filter_name = "packJPG";
|
||||
typetab[slot].result_type = TYPE_BINARY;
|
||||
|
||||
slot = TYPE_BMP >> 3;
|
||||
typetab[slot].filter_private = sdat;
|
||||
typetab[slot].filter_func = packpnm_filter;
|
||||
typetab[slot].filter_name = "packPNM";
|
||||
typetab[slot].result_type = TYPE_BINARY;
|
||||
|
||||
slot = TYPE_PNM >> 3;
|
||||
typetab[slot].filter_private = sdat;
|
||||
typetab[slot].filter_func = packpnm_filter;
|
||||
typetab[slot].filter_name = "packPNM";
|
||||
typetab[slot].result_type = TYPE_BINARY;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -104,6 +107,7 @@ add_filters_by_type(struct type_data *typetab, struct filter_flags *ff)
|
|||
typetab[slot].filter_private = sdat;
|
||||
typetab[slot].filter_func = dispack_filter;
|
||||
typetab[slot].filter_name = "Dispack";
|
||||
typetab[slot].result_type = 0;
|
||||
}
|
||||
|
||||
#ifdef _ENABLE_WAVPACK_
|
||||
|
@ -118,6 +122,7 @@ add_filters_by_type(struct type_data *typetab, struct filter_flags *ff)
|
|||
typetab[slot].filter_private = sdat;
|
||||
typetab[slot].filter_func = wavpack_filter;
|
||||
typetab[slot].filter_name = "WavPack";
|
||||
typetab[slot].result_type = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -100,6 +100,7 @@ struct type_data {
|
|||
void *filter_private;
|
||||
filter_func_ptr filter_func;
|
||||
char *filter_name;
|
||||
int result_type;
|
||||
};
|
||||
|
||||
void add_filters_by_type(struct type_data *typetab, struct filter_flags *ff);
|
||||
|
|
|
@ -1042,6 +1042,9 @@ process_by_filter(int fd, int *typ, struct archive *target_arc,
|
|||
if (wrtn == FILTER_RETURN_ERROR) {
|
||||
log_msg(LOG_ERR, 0, "Warning: Error invoking filter: %s (skipping)",
|
||||
typetab[(*typ >> 3)].filter_name);
|
||||
} else if (wrtn != FILTER_RETURN_SKIP) {
|
||||
if (typetab[(*typ >> 3)].result_type != 0)
|
||||
*typ = typetab[(*typ >> 3)].result_type;
|
||||
}
|
||||
return (wrtn);
|
||||
}
|
||||
|
|
|
@ -110,7 +110,7 @@ bzip2_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
* can be attempted to be compressed again for a possible gain. For others it is
|
||||
* a waste of time.
|
||||
*/
|
||||
if (PC_TYPE(btype) == TYPE_COMPRESSED && level < 7) {
|
||||
if (PC_TYPE(btype) & TYPE_COMPRESSED && level < 7) {
|
||||
int subtype = PC_SUBTYPE(btype);
|
||||
|
||||
if (subtype != TYPE_COMPRESSED_LZW && subtype != TYPE_COMPRESSED_GZ &&
|
||||
|
|
|
@ -88,7 +88,7 @@ analyze_buffer(void *src, uint64_t srclen, analyzer_ctx_t *actx)
|
|||
|
||||
markup = 0;
|
||||
if (tag1 > tag2 - 4 && tag1 < tag2 + 4 && tag3 > (double)tag1 * 0.40 &&
|
||||
tagcnt > (double)spc * 0.1)
|
||||
tagcnt > (double)spc * 0.06)
|
||||
markup = 1;
|
||||
|
||||
if (markup) {
|
||||
|
|
|
@ -165,7 +165,7 @@ libbsc_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
int rv;
|
||||
struct libbsc_params *bscdat = (struct libbsc_params *)data;
|
||||
|
||||
if (PC_TYPE(btype) == TYPE_COMPRESSED) {
|
||||
if (PC_TYPE(btype) & TYPE_COMPRESSED && level < 7) {
|
||||
int subtype = PC_SUBTYPE(btype);
|
||||
if (subtype == TYPE_COMPRESSED_BZ2 || subtype == TYPE_COMPRESSED_LZMA)
|
||||
return (-1);
|
||||
|
|
|
@ -114,7 +114,7 @@ lz_fx_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
/*
|
||||
* Ignore compressed data in fast modes.
|
||||
*/
|
||||
if (level < 7 && PC_TYPE(btype) == TYPE_COMPRESSED)
|
||||
if (level < 7 && PC_TYPE(btype) & TYPE_COMPRESSED)
|
||||
return (-1);
|
||||
|
||||
rv = lzfx_compress(src, _srclen, dst, &_dstlen, lzdat->htab_bits);
|
||||
|
|
|
@ -223,7 +223,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
stype = PC_SUBTYPE(btype);
|
||||
analyzed = 0;
|
||||
if (btype == TYPE_UNKNOWN || stype == TYPE_ARCHIVE_TAR || stype == TYPE_PDF ||
|
||||
PC_TYPE(btype) == TYPE_TEXT || interesting) {
|
||||
PC_TYPE(btype) & TYPE_TEXT || interesting) {
|
||||
analyze_buffer(src, srclen, &actx);
|
||||
analyzed = 1;
|
||||
if (pctx->adapt_mode)
|
||||
|
@ -264,7 +264,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
else
|
||||
b_type = analyze_buffer_simple(from, fromlen);
|
||||
|
||||
if (PC_TYPE(b_type) == TYPE_TEXT) {
|
||||
if (PC_TYPE(b_type) & TYPE_TEXT) {
|
||||
_dstlen = fromlen;
|
||||
result = dict_encode(from, fromlen, to, &_dstlen);
|
||||
if (result != -1) {
|
||||
|
@ -287,7 +287,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
if (analyzed)
|
||||
b_type = actx.forty_pct.btype;
|
||||
|
||||
if (PC_TYPE(b_type) != TYPE_BINARY) {
|
||||
if (!(PC_TYPE(b_type) & TYPE_BINARY)) {
|
||||
hashsize = lzp_hash_size(level);
|
||||
result = lzp_compress((const uchar_t *)from, to, fromlen,
|
||||
hashsize, LZP_DEFAULT_LZPMINLEN, 0);
|
||||
|
@ -312,7 +312,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
if (analyzed)
|
||||
b_type = actx.one_pct.btype;
|
||||
|
||||
if (PC_TYPE(b_type) != TYPE_TEXT) {
|
||||
if (!(PC_TYPE(b_type) & TYPE_TEXT)) {
|
||||
_dstlen = fromlen;
|
||||
result = delta2_encode((uchar_t *)from, fromlen, to,
|
||||
&_dstlen, props->delta2_span,
|
||||
|
|
|
@ -148,7 +148,7 @@ ppmd_compress(void *src, uint64_t srclen, void *dst,
|
|||
CPpmd8 *_ppmd = (CPpmd8 *)data;
|
||||
uchar_t *_src = (uchar_t *)src;
|
||||
|
||||
if (PC_TYPE(btype) == TYPE_COMPRESSED)
|
||||
if (PC_TYPE(btype) & TYPE_COMPRESSED)
|
||||
return (-1);
|
||||
|
||||
Ppmd8_RangeEnc_Init(_ppmd);
|
||||
|
|
|
@ -157,7 +157,7 @@ zlib_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen,
|
|||
* can be attempted to be compressed again for a possible gain. For others it is
|
||||
* a waste of time.
|
||||
*/
|
||||
if (PC_TYPE(btype) == TYPE_COMPRESSED && level < 7) {
|
||||
if (PC_TYPE(btype) & TYPE_COMPRESSED && level < 7) {
|
||||
int subtype = PC_SUBTYPE(btype);
|
||||
|
||||
if (subtype != TYPE_COMPRESSED_LZW &&
|
||||
|
|
Loading…
Reference in a new issue