diff --git a/adaptive_compress.c b/adaptive_compress.c index 9dd94c2..e72806a 100644 --- a/adaptive_compress.c +++ b/adaptive_compress.c @@ -210,13 +210,21 @@ adapt_deinit(void **data) return (rv); } +int +is_bsc_type(int btype) +{ + int stype = PC_SUBTYPE(btype); + return ((stype == TYPE_MARKUP) | (stype == TYPE_BMP) | (stype == TYPE_DNA_SEQ) | + (stype == TYPE_TIFF) | (stype == TYPE_MP4) | (stype == TYPE_FLAC)); +} + int adapt_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data) { struct adapt_data *adat = (struct adapt_data *)(data); uchar_t *src1 = (uchar_t *)src; - int rv = 0; + int rv = 0, bsc_type = 0; if (btype == TYPE_UNKNOWN) { uint64_t i, tot8b, tag1, tag2, tag3; @@ -262,6 +270,9 @@ adapt_compress(void *src, uint64_t srclen, void *dst, * is no point trying to compress such data, like Jpegs. However some archive headers * and zero paddings can exist which LZ4 can easily take care of very fast. */ +#ifdef ENABLE_PC_LIBBSC + bsc_type = is_bsc_type(btype); +#endif if (is_incompressible(btype)) { rv = lz4_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lz4_data); if (rv < 0) @@ -269,16 +280,14 @@ adapt_compress(void *src, uint64_t srclen, void *dst, rv = ADAPT_COMPRESS_LZ4; lz4_count++; - } else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY && - PC_SUBTYPE(btype) != TYPE_BMP && PC_SUBTYPE(btype) != TYPE_TIFF) { + } else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) { rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data); if (rv < 0) return (rv); rv = ADAPT_COMPRESS_LZMA; lzma_count++; - } else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY && - PC_SUBTYPE(btype) != TYPE_BMP && PC_SUBTYPE(btype) != TYPE_TIFF) { + } else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) { rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL); if (rv < 0) return (rv); @@ -287,9 +296,7 @@ adapt_compress(void *src, uint64_t srclen, void *dst, } else { #ifdef ENABLE_PC_LIBBSC - if (adat->bsc_data && (PC_SUBTYPE(btype) == TYPE_MARKUP || - PC_SUBTYPE(btype) == TYPE_BMP || PC_SUBTYPE(btype) == TYPE_DNA_SEQ || - PC_SUBTYPE(btype) == TYPE_TIFF)) { + if (adat->bsc_data && bsc_type) { rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data); if (rv < 0) return (rv); diff --git a/utils/phash/extensions.h b/utils/phash/extensions.h index e29394e..12d5c39 100644 --- a/utils/phash/extensions.h +++ b/utils/phash/extensions.h @@ -18,6 +18,7 @@ struct ext_entry { {"c++" , TYPE_TEXT, 3}, {"hpp" , TYPE_TEXT, 3}, {"txt" , TYPE_TEXT, 3}, + {"text" , TYPE_TEXT, 4}, {"html" , TYPE_TEXT|TYPE_MARKUP, 4}, {"htm" , TYPE_TEXT|TYPE_MARKUP, 3}, {"xml" , TYPE_TEXT|TYPE_MARKUP, 3}, @@ -98,13 +99,13 @@ struct ext_entry { {"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"divx" , TYPE_BINARY|TYPE_COMPRESSED, 4}, - {"mp4" , TYPE_BINARY|TYPE_COMPRESSED, 3}, + {"mp4" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_MP4, 3}, {"aac" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"m4a" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"m4p" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"ofs" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"ofr" , TYPE_BINARY|TYPE_COMPRESSED, 3}, - {"flac" , TYPE_BINARY|TYPE_COMPRESSED, 4}, + {"flac" , TYPE_BINARY|TYPE_FLAC, 4}, {"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"gif" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW, 3}, {"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3}, @@ -125,8 +126,8 @@ struct ext_entry { {"dmg" , TYPE_BINARY, 3}, {"7z" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2}, {"uha" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC, 3}, - {"alz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ALZ, 3}, - {"ace" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ACE, 3}, + {"alz" , TYPE_BINARY|TYPE_COMPRESSED, 3}, + {"ace" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"rar" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR, 3}, {"xz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2}, {"txz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 3}, diff --git a/utils/phash/extensions.txt b/utils/phash/extensions.txt index b227fb4..884ac56 100644 --- a/utils/phash/extensions.txt +++ b/utils/phash/extensions.txt @@ -5,6 +5,7 @@ cpp,TYPE_TEXT c++,TYPE_TEXT hpp,TYPE_TEXT txt,TYPE_TEXT +text,TYPE_TEXT html,TYPE_TEXT|TYPE_MARKUP htm,TYPE_TEXT|TYPE_MARKUP xml,TYPE_TEXT|TYPE_MARKUP @@ -88,13 +89,13 @@ png,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ mp3,TYPE_BINARY|TYPE_COMPRESSED wma,TYPE_BINARY|TYPE_COMPRESSED divx,TYPE_BINARY|TYPE_COMPRESSED -mp4,TYPE_BINARY|TYPE_COMPRESSED +mp4,TYPE_BINARY|TYPE_COMPRESSED|TYPE_MP4 aac,TYPE_BINARY|TYPE_COMPRESSED m4a,TYPE_BINARY|TYPE_COMPRESSED m4p,TYPE_BINARY|TYPE_COMPRESSED ofs,TYPE_BINARY|TYPE_COMPRESSED ofr,TYPE_BINARY|TYPE_COMPRESSED -flac,TYPE_BINARY|TYPE_COMPRESSED +flac,TYPE_BINARY|TYPE_FLAC pac,TYPE_BINARY|TYPE_COMPRESSED gif,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG @@ -115,8 +116,8 @@ lzo,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZO dmg,TYPE_BINARY 7z,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA uha,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC -alz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ALZ -ace,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ACE +alz,TYPE_BINARY|TYPE_COMPRESSED +ace,TYPE_BINARY|TYPE_COMPRESSED rar,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR xz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA txz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA diff --git a/utils/phash/phash.c b/utils/phash/phash.c index bea3bcc..57453cd 100644 --- a/utils/phash/phash.c +++ b/utils/phash/phash.c @@ -12,14 +12,14 @@ /* small adjustments to _a_ to make values distinct */ ub1 tab[] = { -125,0,0,82,125,113,82,87,113,0,0,113,0,0,113,125, -0,0,7,113,0,113,0,0,0,125,0,131,0,85,125,22, -0,113,0,0,85,0,0,113,0,113,125,113,0,7,22,0, -82,0,0,113,125,125,0,0,0,0,113,7,74,0,0,125, -0,87,0,0,113,0,125,183,82,0,124,40,40,183,0,0, -124,0,55,85,0,125,0,32,0,82,125,74,0,125,0,0, -0,0,113,113,0,40,0,0,42,27,0,0,0,40,183,4, -0,0,0,0,0,24,0,0,114,124,0,0,103,200,0,0, +125,0,0,82,125,113,82,87,113,0,0,7,0,0,113,125, +0,0,7,87,0,113,0,0,0,125,0,131,0,7,125,22, +0,0,0,0,85,0,0,0,0,113,85,113,0,7,22,0, +82,0,124,113,125,125,0,0,0,0,113,7,85,0,0,85, +0,82,0,0,113,0,125,183,82,55,124,88,58,183,0,0, +124,0,113,85,0,125,0,116,0,82,125,74,0,125,0,32, +0,0,113,124,0,85,0,0,42,61,0,87,0,40,183,4, +0,0,0,0,0,24,0,0,169,11,0,0,127,200,0,0, }; /* The hash function */ diff --git a/utils/phash/phash.h b/utils/phash/phash.h index e422e47..53c0431 100644 --- a/utils/phash/phash.h +++ b/utils/phash/phash.h @@ -8,7 +8,7 @@ extern ub1 tab[]; #define PHASHLEN 0x80 /* length of hash mapping table */ -#define PHASHNKEYS 134 /* How many keys were hashed */ +#define PHASHNKEYS 135 /* How many keys were hashed */ #define PHASHRANGE 256 /* Range any input might map to */ #define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */ diff --git a/utils/utils.h b/utils/utils.h index ca5c7b7..64ba780 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -260,8 +260,8 @@ typedef enum { TYPE_COMPRESSED_LZMA = 88, TYPE_COMPRESSED_LZO = 96, TYPE_COMPRESSED_UHARC = 104, - TYPE_COMPRESSED_ALZ = 112, - TYPE_COMPRESSED_ACE = 120, + TYPE_MP4 = 112, + TYPE_FLAC = 120, TYPE_COMPRESSED_RAR = 128, TYPE_COMPRESSED_LZ = 136, TYPE_COMPRESSED_PPMD = 144,