From d5e1d2cdef7cef6f48f1cd7e192e7660281fd861 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Tue, 13 Jan 2015 19:59:09 +0530 Subject: [PATCH] Some fixes in the Dictionary preprocessor. Fix checking of data type flags. Allow file-level filters to change output data type. Tweak analyzer threshold for markup type. --- adaptive_compress.c | 8 ++++---- archive/pc_arc_filter.c | 5 +++++ archive/pc_arc_filter.h | 1 + archive/pc_archive.c | 3 +++ bzip2_compress.c | 2 +- filters/analyzer/analyzer.c | 2 +- libbsc_compress.c | 2 +- lzfx_compress.c | 2 +- pcompress.c | 8 ++++---- ppmd_compress.c | 2 +- zlib_compress.c | 2 +- 11 files changed, 23 insertions(+), 14 deletions(-) diff --git a/adaptive_compress.c b/adaptive_compress.c index ddb2631..3688de6 100644 --- a/adaptive_compress.c +++ b/adaptive_compress.c @@ -247,7 +247,7 @@ is_bsc_type(int btype) return ((stype == TYPE_BMP) | (stype == TYPE_DNA_SEQ) | (stype == TYPE_MP4) | (stype == TYPE_FLAC) | (stype == TYPE_AVI) | (stype == TYPE_DICOM) | (stype == TYPE_MEDIA_BSC) | - (mtype == TYPE_TEXT && stype != TYPE_MARKUP)); + (mtype & TYPE_TEXT && stype != TYPE_MARKUP)); } int @@ -259,7 +259,7 @@ adapt_compress(void *src, uint64_t srclen, void *dst, int stype = PC_SUBTYPE(btype); analyzer_ctx_t actx; - if (btype == TYPE_UNKNOWN || PC_TYPE(btype) == TYPE_TEXT || + if (btype == TYPE_UNKNOWN || PC_TYPE(btype) & TYPE_TEXT || stype == TYPE_ARCHIVE_TAR || stype == TYPE_PDF) { if (adat->actx == NULL) { analyze_buffer(src, srclen, &actx); @@ -292,14 +292,14 @@ adapt_compress(void *src, uint64_t srclen, void *dst, rv = ADAPT_COMPRESS_LZ4; lz4_count++; - } else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) { + } else if (adat->adapt_mode == 2 && PC_TYPE(btype) & TYPE_BINARY && !bsc_type) { rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data); if (rv < 0) return (rv); rv = ADAPT_COMPRESS_LZMA; lzma_count++; - } else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) { + } else if (adat->adapt_mode == 1 && PC_TYPE(btype) & TYPE_BINARY && !bsc_type) { rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL); if (rv < 0) return (rv); diff --git a/archive/pc_arc_filter.c b/archive/pc_arc_filter.c index 0b3982e..bc4b859 100644 --- a/archive/pc_arc_filter.c +++ b/archive/pc_arc_filter.c @@ -81,16 +81,19 @@ add_filters_by_type(struct type_data *typetab, struct filter_flags *ff) typetab[slot].filter_private = sdat; typetab[slot].filter_func = packjpg_filter; typetab[slot].filter_name = "packJPG"; + typetab[slot].result_type = TYPE_BINARY; slot = TYPE_BMP >> 3; typetab[slot].filter_private = sdat; typetab[slot].filter_func = packpnm_filter; typetab[slot].filter_name = "packPNM"; + typetab[slot].result_type = TYPE_BINARY; slot = TYPE_PNM >> 3; typetab[slot].filter_private = sdat; typetab[slot].filter_func = packpnm_filter; typetab[slot].filter_name = "packPNM"; + typetab[slot].result_type = TYPE_BINARY; } #endif @@ -104,6 +107,7 @@ add_filters_by_type(struct type_data *typetab, struct filter_flags *ff) typetab[slot].filter_private = sdat; typetab[slot].filter_func = dispack_filter; typetab[slot].filter_name = "Dispack"; + typetab[slot].result_type = 0; } #ifdef _ENABLE_WAVPACK_ @@ -118,6 +122,7 @@ add_filters_by_type(struct type_data *typetab, struct filter_flags *ff) typetab[slot].filter_private = sdat; typetab[slot].filter_func = wavpack_filter; typetab[slot].filter_name = "WavPack"; + typetab[slot].result_type = 0; } #endif } diff --git a/archive/pc_arc_filter.h b/archive/pc_arc_filter.h index db4bea7..5a5527c 100644 --- a/archive/pc_arc_filter.h +++ b/archive/pc_arc_filter.h @@ -100,6 +100,7 @@ struct type_data { void *filter_private; filter_func_ptr filter_func; char *filter_name; + int result_type; }; void add_filters_by_type(struct type_data *typetab, struct filter_flags *ff); diff --git a/archive/pc_archive.c b/archive/pc_archive.c index be3680f..7bd85d7 100644 --- a/archive/pc_archive.c +++ b/archive/pc_archive.c @@ -1042,6 +1042,9 @@ process_by_filter(int fd, int *typ, struct archive *target_arc, if (wrtn == FILTER_RETURN_ERROR) { log_msg(LOG_ERR, 0, "Warning: Error invoking filter: %s (skipping)", typetab[(*typ >> 3)].filter_name); + } else if (wrtn != FILTER_RETURN_SKIP) { + if (typetab[(*typ >> 3)].result_type != 0) + *typ = typetab[(*typ >> 3)].result_type; } return (wrtn); } diff --git a/bzip2_compress.c b/bzip2_compress.c index 3152f00..33d43e3 100644 --- a/bzip2_compress.c +++ b/bzip2_compress.c @@ -110,7 +110,7 @@ bzip2_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, * can be attempted to be compressed again for a possible gain. For others it is * a waste of time. */ - if (PC_TYPE(btype) == TYPE_COMPRESSED && level < 7) { + if (PC_TYPE(btype) & TYPE_COMPRESSED && level < 7) { int subtype = PC_SUBTYPE(btype); if (subtype != TYPE_COMPRESSED_LZW && subtype != TYPE_COMPRESSED_GZ && diff --git a/filters/analyzer/analyzer.c b/filters/analyzer/analyzer.c index a6c2878..0a13d31 100644 --- a/filters/analyzer/analyzer.c +++ b/filters/analyzer/analyzer.c @@ -88,7 +88,7 @@ analyze_buffer(void *src, uint64_t srclen, analyzer_ctx_t *actx) markup = 0; if (tag1 > tag2 - 4 && tag1 < tag2 + 4 && tag3 > (double)tag1 * 0.40 && - tagcnt > (double)spc * 0.1) + tagcnt > (double)spc * 0.06) markup = 1; if (markup) { diff --git a/libbsc_compress.c b/libbsc_compress.c index 9658924..5049035 100644 --- a/libbsc_compress.c +++ b/libbsc_compress.c @@ -165,7 +165,7 @@ libbsc_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, int rv; struct libbsc_params *bscdat = (struct libbsc_params *)data; - if (PC_TYPE(btype) == TYPE_COMPRESSED) { + if (PC_TYPE(btype) & TYPE_COMPRESSED && level < 7) { int subtype = PC_SUBTYPE(btype); if (subtype == TYPE_COMPRESSED_BZ2 || subtype == TYPE_COMPRESSED_LZMA) return (-1); diff --git a/lzfx_compress.c b/lzfx_compress.c index df6e25e..72db6fc 100644 --- a/lzfx_compress.c +++ b/lzfx_compress.c @@ -114,7 +114,7 @@ lz_fx_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, /* * Ignore compressed data in fast modes. */ - if (level < 7 && PC_TYPE(btype) == TYPE_COMPRESSED) + if (level < 7 && PC_TYPE(btype) & TYPE_COMPRESSED) return (-1); rv = lzfx_compress(src, _srclen, dst, &_dstlen, lzdat->htab_bits); diff --git a/pcompress.c b/pcompress.c index 332581b..d487c6a 100644 --- a/pcompress.c +++ b/pcompress.c @@ -223,7 +223,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t stype = PC_SUBTYPE(btype); analyzed = 0; if (btype == TYPE_UNKNOWN || stype == TYPE_ARCHIVE_TAR || stype == TYPE_PDF || - PC_TYPE(btype) == TYPE_TEXT || interesting) { + PC_TYPE(btype) & TYPE_TEXT || interesting) { analyze_buffer(src, srclen, &actx); analyzed = 1; if (pctx->adapt_mode) @@ -264,7 +264,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t else b_type = analyze_buffer_simple(from, fromlen); - if (PC_TYPE(b_type) == TYPE_TEXT) { + if (PC_TYPE(b_type) & TYPE_TEXT) { _dstlen = fromlen; result = dict_encode(from, fromlen, to, &_dstlen); if (result != -1) { @@ -287,7 +287,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t if (analyzed) b_type = actx.forty_pct.btype; - if (PC_TYPE(b_type) != TYPE_BINARY) { + if (!(PC_TYPE(b_type) & TYPE_BINARY)) { hashsize = lzp_hash_size(level); result = lzp_compress((const uchar_t *)from, to, fromlen, hashsize, LZP_DEFAULT_LZPMINLEN, 0); @@ -312,7 +312,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t if (analyzed) b_type = actx.one_pct.btype; - if (PC_TYPE(b_type) != TYPE_TEXT) { + if (!(PC_TYPE(b_type) & TYPE_TEXT)) { _dstlen = fromlen; result = delta2_encode((uchar_t *)from, fromlen, to, &_dstlen, props->delta2_span, diff --git a/ppmd_compress.c b/ppmd_compress.c index 73892d3..2b05db2 100644 --- a/ppmd_compress.c +++ b/ppmd_compress.c @@ -148,7 +148,7 @@ ppmd_compress(void *src, uint64_t srclen, void *dst, CPpmd8 *_ppmd = (CPpmd8 *)data; uchar_t *_src = (uchar_t *)src; - if (PC_TYPE(btype) == TYPE_COMPRESSED) + if (PC_TYPE(btype) & TYPE_COMPRESSED) return (-1); Ppmd8_RangeEnc_Init(_ppmd); diff --git a/zlib_compress.c b/zlib_compress.c index 91b7896..90a7791 100644 --- a/zlib_compress.c +++ b/zlib_compress.c @@ -157,7 +157,7 @@ zlib_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, * can be attempted to be compressed again for a possible gain. For others it is * a waste of time. */ - if (PC_TYPE(btype) == TYPE_COMPRESSED && level < 7) { + if (PC_TYPE(btype) & TYPE_COMPRESSED && level < 7) { int subtype = PC_SUBTYPE(btype); if (subtype != TYPE_COMPRESSED_LZW &&