Use Libbsc for MP4 and FLAC files.

Change some rare file type codes to indicate some common types.
This commit is contained in:
Moinak Ghosh 2013-12-03 21:56:07 +05:30
parent 958bdf7edc
commit 3f62cdf7d5
6 changed files with 36 additions and 27 deletions

View file

@ -210,13 +210,21 @@ adapt_deinit(void **data)
return (rv); return (rv);
} }
int
is_bsc_type(int btype)
{
int stype = PC_SUBTYPE(btype);
return ((stype == TYPE_MARKUP) | (stype == TYPE_BMP) | (stype == TYPE_DNA_SEQ) |
(stype == TYPE_TIFF) | (stype == TYPE_MP4) | (stype == TYPE_FLAC));
}
int int
adapt_compress(void *src, uint64_t srclen, void *dst, adapt_compress(void *src, uint64_t srclen, void *dst,
uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data) uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data)
{ {
struct adapt_data *adat = (struct adapt_data *)(data); struct adapt_data *adat = (struct adapt_data *)(data);
uchar_t *src1 = (uchar_t *)src; uchar_t *src1 = (uchar_t *)src;
int rv = 0; int rv = 0, bsc_type = 0;
if (btype == TYPE_UNKNOWN) { if (btype == TYPE_UNKNOWN) {
uint64_t i, tot8b, tag1, tag2, tag3; uint64_t i, tot8b, tag1, tag2, tag3;
@ -262,6 +270,9 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
* is no point trying to compress such data, like Jpegs. However some archive headers * is no point trying to compress such data, like Jpegs. However some archive headers
* and zero paddings can exist which LZ4 can easily take care of very fast. * and zero paddings can exist which LZ4 can easily take care of very fast.
*/ */
#ifdef ENABLE_PC_LIBBSC
bsc_type = is_bsc_type(btype);
#endif
if (is_incompressible(btype)) { if (is_incompressible(btype)) {
rv = lz4_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lz4_data); rv = lz4_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lz4_data);
if (rv < 0) if (rv < 0)
@ -269,16 +280,14 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
rv = ADAPT_COMPRESS_LZ4; rv = ADAPT_COMPRESS_LZ4;
lz4_count++; lz4_count++;
} else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY && } else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) {
PC_SUBTYPE(btype) != TYPE_BMP && PC_SUBTYPE(btype) != TYPE_TIFF) {
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data); rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data);
if (rv < 0) if (rv < 0)
return (rv); return (rv);
rv = ADAPT_COMPRESS_LZMA; rv = ADAPT_COMPRESS_LZMA;
lzma_count++; lzma_count++;
} else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY && } else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) {
PC_SUBTYPE(btype) != TYPE_BMP && PC_SUBTYPE(btype) != TYPE_TIFF) {
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL); rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL);
if (rv < 0) if (rv < 0)
return (rv); return (rv);
@ -287,9 +296,7 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
} else { } else {
#ifdef ENABLE_PC_LIBBSC #ifdef ENABLE_PC_LIBBSC
if (adat->bsc_data && (PC_SUBTYPE(btype) == TYPE_MARKUP || if (adat->bsc_data && bsc_type) {
PC_SUBTYPE(btype) == TYPE_BMP || PC_SUBTYPE(btype) == TYPE_DNA_SEQ ||
PC_SUBTYPE(btype) == TYPE_TIFF)) {
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data); rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data);
if (rv < 0) if (rv < 0)
return (rv); return (rv);

View file

@ -18,6 +18,7 @@ struct ext_entry {
{"c++" , TYPE_TEXT, 3}, {"c++" , TYPE_TEXT, 3},
{"hpp" , TYPE_TEXT, 3}, {"hpp" , TYPE_TEXT, 3},
{"txt" , TYPE_TEXT, 3}, {"txt" , TYPE_TEXT, 3},
{"text" , TYPE_TEXT, 4},
{"html" , TYPE_TEXT|TYPE_MARKUP, 4}, {"html" , TYPE_TEXT|TYPE_MARKUP, 4},
{"htm" , TYPE_TEXT|TYPE_MARKUP, 3}, {"htm" , TYPE_TEXT|TYPE_MARKUP, 3},
{"xml" , TYPE_TEXT|TYPE_MARKUP, 3}, {"xml" , TYPE_TEXT|TYPE_MARKUP, 3},
@ -98,13 +99,13 @@ struct ext_entry {
{"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"divx" , TYPE_BINARY|TYPE_COMPRESSED, 4}, {"divx" , TYPE_BINARY|TYPE_COMPRESSED, 4},
{"mp4" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"mp4" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_MP4, 3},
{"aac" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"aac" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"m4a" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"m4a" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"m4p" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"m4p" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"ofs" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"ofs" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"ofr" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"ofr" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"flac" , TYPE_BINARY|TYPE_COMPRESSED, 4}, {"flac" , TYPE_BINARY|TYPE_FLAC, 4},
{"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"gif" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW, 3}, {"gif" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW, 3},
{"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3}, {"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3},
@ -125,8 +126,8 @@ struct ext_entry {
{"dmg" , TYPE_BINARY, 3}, {"dmg" , TYPE_BINARY, 3},
{"7z" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2}, {"7z" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2},
{"uha" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC, 3}, {"uha" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC, 3},
{"alz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ALZ, 3}, {"alz" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"ace" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ACE, 3}, {"ace" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"rar" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR, 3}, {"rar" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR, 3},
{"xz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2}, {"xz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 2},
{"txz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 3}, {"txz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 3},

View file

@ -5,6 +5,7 @@ cpp,TYPE_TEXT
c++,TYPE_TEXT c++,TYPE_TEXT
hpp,TYPE_TEXT hpp,TYPE_TEXT
txt,TYPE_TEXT txt,TYPE_TEXT
text,TYPE_TEXT
html,TYPE_TEXT|TYPE_MARKUP html,TYPE_TEXT|TYPE_MARKUP
htm,TYPE_TEXT|TYPE_MARKUP htm,TYPE_TEXT|TYPE_MARKUP
xml,TYPE_TEXT|TYPE_MARKUP xml,TYPE_TEXT|TYPE_MARKUP
@ -88,13 +89,13 @@ png,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
mp3,TYPE_BINARY|TYPE_COMPRESSED mp3,TYPE_BINARY|TYPE_COMPRESSED
wma,TYPE_BINARY|TYPE_COMPRESSED wma,TYPE_BINARY|TYPE_COMPRESSED
divx,TYPE_BINARY|TYPE_COMPRESSED divx,TYPE_BINARY|TYPE_COMPRESSED
mp4,TYPE_BINARY|TYPE_COMPRESSED mp4,TYPE_BINARY|TYPE_COMPRESSED|TYPE_MP4
aac,TYPE_BINARY|TYPE_COMPRESSED aac,TYPE_BINARY|TYPE_COMPRESSED
m4a,TYPE_BINARY|TYPE_COMPRESSED m4a,TYPE_BINARY|TYPE_COMPRESSED
m4p,TYPE_BINARY|TYPE_COMPRESSED m4p,TYPE_BINARY|TYPE_COMPRESSED
ofs,TYPE_BINARY|TYPE_COMPRESSED ofs,TYPE_BINARY|TYPE_COMPRESSED
ofr,TYPE_BINARY|TYPE_COMPRESSED ofr,TYPE_BINARY|TYPE_COMPRESSED
flac,TYPE_BINARY|TYPE_COMPRESSED flac,TYPE_BINARY|TYPE_FLAC
pac,TYPE_BINARY|TYPE_COMPRESSED pac,TYPE_BINARY|TYPE_COMPRESSED
gif,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW gif,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW
jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
@ -115,8 +116,8 @@ lzo,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZO
dmg,TYPE_BINARY dmg,TYPE_BINARY
7z,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA 7z,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
uha,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC uha,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_UHARC
alz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ALZ alz,TYPE_BINARY|TYPE_COMPRESSED
ace,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ACE ace,TYPE_BINARY|TYPE_COMPRESSED
rar,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR rar,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_RAR
xz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA xz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA
txz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA txz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA

View file

@ -12,14 +12,14 @@
/* small adjustments to _a_ to make values distinct */ /* small adjustments to _a_ to make values distinct */
ub1 tab[] = { ub1 tab[] = {
125,0,0,82,125,113,82,87,113,0,0,113,0,0,113,125, 125,0,0,82,125,113,82,87,113,0,0,7,0,0,113,125,
0,0,7,113,0,113,0,0,0,125,0,131,0,85,125,22, 0,0,7,87,0,113,0,0,0,125,0,131,0,7,125,22,
0,113,0,0,85,0,0,113,0,113,125,113,0,7,22,0, 0,0,0,0,85,0,0,0,0,113,85,113,0,7,22,0,
82,0,0,113,125,125,0,0,0,0,113,7,74,0,0,125, 82,0,124,113,125,125,0,0,0,0,113,7,85,0,0,85,
0,87,0,0,113,0,125,183,82,0,124,40,40,183,0,0, 0,82,0,0,113,0,125,183,82,55,124,88,58,183,0,0,
124,0,55,85,0,125,0,32,0,82,125,74,0,125,0,0, 124,0,113,85,0,125,0,116,0,82,125,74,0,125,0,32,
0,0,113,113,0,40,0,0,42,27,0,0,0,40,183,4, 0,0,113,124,0,85,0,0,42,61,0,87,0,40,183,4,
0,0,0,0,0,24,0,0,114,124,0,0,103,200,0,0, 0,0,0,0,0,24,0,0,169,11,0,0,127,200,0,0,
}; };
/* The hash function */ /* The hash function */

View file

@ -8,7 +8,7 @@
extern ub1 tab[]; extern ub1 tab[];
#define PHASHLEN 0x80 /* length of hash mapping table */ #define PHASHLEN 0x80 /* length of hash mapping table */
#define PHASHNKEYS 134 /* How many keys were hashed */ #define PHASHNKEYS 135 /* How many keys were hashed */
#define PHASHRANGE 256 /* Range any input might map to */ #define PHASHRANGE 256 /* Range any input might map to */
#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */ #define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */

View file

@ -260,8 +260,8 @@ typedef enum {
TYPE_COMPRESSED_LZMA = 88, TYPE_COMPRESSED_LZMA = 88,
TYPE_COMPRESSED_LZO = 96, TYPE_COMPRESSED_LZO = 96,
TYPE_COMPRESSED_UHARC = 104, TYPE_COMPRESSED_UHARC = 104,
TYPE_COMPRESSED_ALZ = 112, TYPE_MP4 = 112,
TYPE_COMPRESSED_ACE = 120, TYPE_FLAC = 120,
TYPE_COMPRESSED_RAR = 128, TYPE_COMPRESSED_RAR = 128,
TYPE_COMPRESSED_LZ = 136, TYPE_COMPRESSED_LZ = 136,
TYPE_COMPRESSED_PPMD = 144, TYPE_COMPRESSED_PPMD = 144,