Improve file sorting algorithm.

Add more file extension names.
Fix data type mask size.
This commit is contained in:
Moinak Ghosh 2014-10-27 19:23:03 +05:30
parent cc68550670
commit b7804a0caa
6 changed files with 242 additions and 35 deletions

View file

@ -106,6 +106,7 @@ static struct arc_list_state {
pthread_mutex_t nftw_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t nftw_mutex = PTHREAD_MUTEX_INITIALIZER;
static int detect_type_by_ext(const char *path, int pathlen); static int detect_type_by_ext(const char *path, int pathlen);
static int detect_type_from_ext(const char *ext, int len);
static int detect_type_by_data(uchar_t *buf, size_t len); static int detect_type_by_data(uchar_t *buf, size_t len);
/* /*
@ -208,8 +209,7 @@ creat_write_callback(struct archive *arc, void *ctx, const void *buf, size_t len
Sem_Wait(&(pctx->write_sem)); Sem_Wait(&(pctx->write_sem));
tbuf = pctx->arc_buf + pctx->arc_buf_pos; tbuf = pctx->arc_buf + pctx->arc_buf_pos;
pctx->arc_writing = 1; pctx->arc_writing = 1;
if (remaining > 0) pctx->btype = pctx->ctype;
pctx->btype = pctx->ctype;
} }
} }
} }
@ -373,6 +373,18 @@ compare_members(const void *a, const void *b) {
int rv, i; int rv, i;
member_entry_t *mem1 = (member_entry_t *)a; member_entry_t *mem1 = (member_entry_t *)a;
member_entry_t *mem2 = (member_entry_t *)b; member_entry_t *mem2 = (member_entry_t *)b;
uint64_t sz1, sz2;
/*
* First compare MSB of size. That separates extension and non-extension
* files.
*/
sz1 = mem1->size & 0x8000000000000000;
sz2 = mem2->size & 0x8000000000000000;
if (sz1 > sz2)
return (1);
else if (sz1 < sz2)
return (-1);
rv = 0; rv = 0;
for (i = 0; i < NAMELEN; i++) { for (i = 0; i < NAMELEN; i++) {
@ -380,9 +392,15 @@ compare_members(const void *a, const void *b) {
if (rv != 0) if (rv != 0)
return (rv); return (rv);
} }
if (mem1->size > mem2->size)
/*
* Clear high bits of size. They are just flags.
*/
sz1 = mem1->size & 0x7FFFFFFFFFFFFFFF;
sz2 = mem2->size & 0x7FFFFFFFFFFFFFFF;
if (sz1 > sz2)
return (1); return (1);
else if (mem1->size < mem2->size) else if (sz1 < sz2)
return (-1); return (-1);
return (0); return (0);
} }
@ -394,6 +412,16 @@ compare_members(const void *a, const void *b) {
static int static int
compare_members_lt(member_entry_t *mem1, member_entry_t *mem2) { compare_members_lt(member_entry_t *mem1, member_entry_t *mem2) {
int rv, i; int rv, i;
uint64_t sz1, sz2;
/*
* First compare MSB of size. That separates extension and non-extension
* files.
*/
sz1 = mem1->size & 0x8000000000000000;
sz2 = mem2->size & 0x8000000000000000;
if (sz1 < sz2)
return (1);
rv = 0; rv = 0;
for (i = 0; i < NAMELEN; i++) { for (i = 0; i < NAMELEN; i++) {
@ -403,7 +431,13 @@ compare_members_lt(member_entry_t *mem1, member_entry_t *mem2) {
else if (rv > 0) else if (rv > 0)
return (0); return (0);
} }
if (mem1->size < mem2->size)
/*
* Clear high bits of size. They are just flags.
*/
sz1 = mem1->size & 0x7FFFFFFFFFFFFFFF;
sz2 = mem2->size & 0x7FFFFFFFFFFFFFFF;
if (sz1 < sz2)
return (1); return (1);
return (0); return (0);
} }
@ -662,6 +696,11 @@ add_pathname(const char *fpath, const struct stat *sb,
* If not a directory then we store upto first 4 chars of * If not a directory then we store upto first 4 chars of
* the extension, if present, or first 4 chars of the * the extension, if present, or first 4 chars of the
* filename. * filename.
*
* NOTE: In order to separate files with and without extensions
* we set the MSB of the size parameter to 1 for extension
* and 0 for no extension. This limits the noted size of the
* file to INT64_MAX, but I think that is more than enough!
*/ */
for (i = 0; i < NAMELEN; i++) member->name[i] = 0; for (i = 0; i < NAMELEN; i++) member->name[i] = 0;
@ -670,11 +709,14 @@ add_pathname(const char *fpath, const struct stat *sb,
while (basename[i] != '\0' && i < NAMELEN) { while (basename[i] != '\0' && i < NAMELEN) {
member->name[i] = basename[i]; i++; member->name[i] = basename[i]; i++;
} }
// Clear 64-bit MSB
member->size &= 0x7FFFFFFFFFFFFFFF;
} else { } else {
dot++; dot++;
while (dot[i] != '\0' && i < NAMELEN) { while (dot[i] != '\0' && i < NAMELEN) {
member->name[i] = dot[i]; i++; member->name[i] = dot[i]; i++;
} }
member->size |= 0x8000000000000000;
} }
} else { } else {
/* /*
@ -690,6 +732,11 @@ add_pathname(const char *fpath, const struct stat *sb,
*/ */
for (i = 0; i < NAMELEN; i++) member->name[i] = 255; for (i = 0; i < NAMELEN; i++) member->name[i] = 255;
member->size = INT64_MAX - ftwbuf->level; member->size = INT64_MAX - ftwbuf->level;
/*
* Set 64-bit MSB to force directories to be bunched at the end.
*/
member->size |= 0x8000000000000000;
} }
} }
cont: cont:
@ -1629,22 +1676,17 @@ disable_all_filters()
* outside the hash table range then the function returns unknown type. * outside the hash table range then the function returns unknown type.
*/ */
static int static int
detect_type_by_ext(const char *path, int pathlen) detect_type_from_ext(const char *ext, int len)
{ {
const char *ext = NULL; int i;
ub4 slot; ub4 slot;
int i, len;
uint64_t extnum;
char extl[8]; char extl[8];
uint64_t extnum;
for (i = pathlen-1; i > 0 && path[i] != '.' && path[i] != PATHSEP_CHAR; i--); if (len == 0 || len > 8) goto ret; // If extension is empty give up
if (i == 0 || path[i] != '.') goto out; // If extension not found give up
len = pathlen - i - 1;
if (len == 0 || len > 8) goto out; // If extension is empty give up
ext = &path[i+1];
for (i = 0; i < len; i++) extl[i] = tolower(ext[i]); for (i = 0; i < len; i++) extl[i] = tolower(ext[i]);
slot = phash(extl, len); slot = phash(extl, len);
if (slot >= PHASHNKEYS) goto out; // Extension maps outside hash table range, give up if (slot >= PHASHNKEYS) goto ret; // Extension maps outside hash table range, give up
extnum = 0; extnum = 0;
/* /*
@ -1654,6 +1696,21 @@ detect_type_by_ext(const char *path, int pathlen)
extnum = (extnum << 8) | tolower(ext[i]); extnum = (extnum << 8) | tolower(ext[i]);
if (exthtab[slot].extnum == extnum) if (exthtab[slot].extnum == extnum)
return (exthtab[slot].type); return (exthtab[slot].type);
ret:
return (TYPE_UNKNOWN);
}
static int
detect_type_by_ext(const char *path, int pathlen)
{
const char *ext = NULL;
int i, len;
for (i = pathlen-1; i > 0 && path[i] != '.' && path[i] != PATHSEP_CHAR; i--);
if (i == 0 || path[i] != '.') goto out; // If extension not found give up
len = pathlen - i - 1;
ext = &path[i+1];
return (detect_type_from_ext(ext, len));
out: out:
return (TYPE_UNKNOWN); return (TYPE_UNKNOWN);
} }
@ -1703,7 +1760,7 @@ static int
detect_type_by_data(uchar_t *buf, size_t len) detect_type_by_data(uchar_t *buf, size_t len)
{ {
// At least a few bytes. // At least a few bytes.
if (len < 512) return (TYPE_UNKNOWN); if (len < 10) return (TYPE_UNKNOWN);
// WAV files. // WAV files.
if (identify_wav_type(buf, len)) if (identify_wav_type(buf, len))
@ -1718,10 +1775,10 @@ detect_type_by_data(uchar_t *buf, size_t len)
// Try to detect DICOM medical image file. BSC compresses these better. // Try to detect DICOM medical image file. BSC compresses these better.
if (len > 127) { if (len > 127) {
size_t i; int i;
// DICOM files should have either DICM or ISO_IR within the first 128 bytes // DICOM files should have either DICM or ISO_IR within the first 128 bytes
for (i = 0; i < 128; i++) { for (i = 0; i < 128-4; i++) {
if (buf[i] == 'D') if (buf[i] == 'D')
if (memcmp(&buf[i], "DICM", 4) == 0) if (memcmp(&buf[i], "DICM", 4) == 0)
return (TYPE_BINARY|TYPE_DICOM); return (TYPE_BINARY|TYPE_DICOM);

View file

@ -16,24 +16,37 @@ struct ext_entry {
{"cc" , TYPE_TEXT, 2}, {"cc" , TYPE_TEXT, 2},
{"cpp" , TYPE_TEXT, 3}, {"cpp" , TYPE_TEXT, 3},
{"c++" , TYPE_TEXT, 3}, {"c++" , TYPE_TEXT, 3},
{"h++" , TYPE_TEXT, 3},
{"hpp" , TYPE_TEXT, 3}, {"hpp" , TYPE_TEXT, 3},
{"hxx" , TYPE_TEXT, 3},
{"hh" , TYPE_TEXT, 2},
{"txt" , TYPE_TEXT, 3}, {"txt" , TYPE_TEXT, 3},
{"text" , TYPE_TEXT, 4}, {"text" , TYPE_TEXT, 4},
{"html" , TYPE_TEXT|TYPE_MARKUP, 4}, {"html" , TYPE_TEXT|TYPE_MARKUP, 4},
{"htm" , TYPE_TEXT|TYPE_MARKUP, 3}, {"htm" , TYPE_TEXT|TYPE_MARKUP, 3},
{"xml" , TYPE_TEXT|TYPE_MARKUP, 3}, {"xml" , TYPE_TEXT|TYPE_MARKUP, 3},
{"sgml" , TYPE_TEXT|TYPE_MARKUP, 4},
{"info" , TYPE_TEXT, 4}, {"info" , TYPE_TEXT, 4},
{"svg" , TYPE_TEXT, 3}, {"svg" , TYPE_TEXT, 3},
{"conf" , TYPE_TEXT, 4}, {"conf" , TYPE_TEXT, 4},
{"cfg" , TYPE_TEXT, 3},
{"py" , TYPE_TEXT, 2}, {"py" , TYPE_TEXT, 2},
{"rb" , TYPE_TEXT, 2}, {"rb" , TYPE_TEXT, 2},
{"ru" , TYPE_TEXT, 2},
{"rbw" , TYPE_TEXT, 3},
{"xpm" , TYPE_TEXT, 3}, {"xpm" , TYPE_TEXT, 3},
{"js" , TYPE_TEXT, 2}, {"js" , TYPE_TEXT, 2},
{"jsp" , TYPE_TEXT, 3}, {"jsp" , TYPE_TEXT, 3},
{"pl" , TYPE_TEXT, 2}, {"pl" , TYPE_TEXT, 2},
{"t" , TYPE_TEXT, 1},
{"tcl" , TYPE_TEXT, 3}, {"tcl" , TYPE_TEXT, 3},
{"sh" , TYPE_TEXT, 2}, {"sh" , TYPE_TEXT, 2},
{"ksh" , TYPE_TEXT, 3},
{"csh" , TYPE_TEXT, 3},
{"php" , TYPE_TEXT, 3}, {"php" , TYPE_TEXT, 3},
{"php3" , TYPE_TEXT, 4},
{"php4" , TYPE_TEXT, 4},
{"php5" , TYPE_TEXT, 4},
{"bat" , TYPE_TEXT, 3}, {"bat" , TYPE_TEXT, 3},
{"pm" , TYPE_TEXT, 2}, {"pm" , TYPE_TEXT, 2},
{"r" , TYPE_TEXT, 1}, {"r" , TYPE_TEXT, 1},
@ -44,14 +57,19 @@ struct ext_entry {
{"java" , TYPE_TEXT, 4}, {"java" , TYPE_TEXT, 4},
{"m4" , TYPE_TEXT, 2}, {"m4" , TYPE_TEXT, 2},
{"vb" , TYPE_TEXT, 2}, {"vb" , TYPE_TEXT, 2},
{"vba" , TYPE_TEXT, 3},
{"vbs" , TYPE_TEXT, 3},
{"xslt" , TYPE_TEXT|TYPE_MARKUP, 4}, {"xslt" , TYPE_TEXT|TYPE_MARKUP, 4},
{"xsl" , TYPE_TEXT|TYPE_MARKUP, 3}, {"xsl" , TYPE_TEXT|TYPE_MARKUP, 3},
{"xsd" , TYPE_TEXT|TYPE_MARKUP, 3},
{"xs" , TYPE_TEXT, 2},
{"yacc" , TYPE_TEXT, 4}, {"yacc" , TYPE_TEXT, 4},
{"lex" , TYPE_TEXT, 3}, {"lex" , TYPE_TEXT, 3},
{"csv" , TYPE_TEXT, 3}, {"csv" , TYPE_TEXT, 3},
{"shtml" , TYPE_TEXT|TYPE_MARKUP, 5}, {"shtml" , TYPE_TEXT|TYPE_MARKUP, 5},
{"xhtml" , TYPE_TEXT|TYPE_MARKUP, 5}, {"xhtml" , TYPE_TEXT|TYPE_MARKUP, 5},
{"xht" , TYPE_TEXT|TYPE_MARKUP, 3}, {"xht" , TYPE_TEXT|TYPE_MARKUP, 3},
{"tpl" , TYPE_TEXT|TYPE_MARKUP, 3},
{"asp" , TYPE_TEXT, 3}, {"asp" , TYPE_TEXT, 3},
{"aspx" , TYPE_TEXT, 4}, {"aspx" , TYPE_TEXT, 4},
{"rss" , TYPE_TEXT|TYPE_MARKUP, 3}, {"rss" , TYPE_TEXT|TYPE_MARKUP, 3},
@ -67,6 +85,7 @@ struct ext_entry {
{"ps" , TYPE_TEXT, 2}, {"ps" , TYPE_TEXT, 2},
{"bib" , TYPE_TEXT, 3}, {"bib" , TYPE_TEXT, 3},
{"lua" , TYPE_TEXT, 3}, {"lua" , TYPE_TEXT, 3},
{"nse" , TYPE_TEXT, 3},
{"dtd" , TYPE_TEXT, 3}, {"dtd" , TYPE_TEXT, 3},
{"qml" , TYPE_TEXT|TYPE_MARKUP, 3}, {"qml" , TYPE_TEXT|TYPE_MARKUP, 3},
{"fa" , TYPE_TEXT|TYPE_DNA_SEQ, 2}, {"fa" , TYPE_TEXT|TYPE_DNA_SEQ, 2},
@ -105,6 +124,7 @@ struct ext_entry {
{"m4p" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"m4p" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"ofs" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"ofs" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"ofr" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"ofr" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"ogg" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"flac" , TYPE_BINARY|TYPE_FLAC, 4}, {"flac" , TYPE_BINARY|TYPE_FLAC, 4},
{"avi" , TYPE_BINARY|TYPE_AVI, 3}, {"avi" , TYPE_BINARY|TYPE_AVI, 3},
{"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3},
@ -136,21 +156,76 @@ struct ext_entry {
{"zpaq" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ, 4}, {"zpaq" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ, 4},
{"xcf" , TYPE_BINARY, 3}, {"xcf" , TYPE_BINARY, 3},
{"mo" , TYPE_BINARY, 2}, {"mo" , TYPE_BINARY, 2},
{"gmo" , TYPE_BINARY, 3},
{"pyo" , TYPE_BINARY, 3}, {"pyo" , TYPE_BINARY, 3},
{"pyc" , TYPE_BINARY, 3}, {"pyc" , TYPE_BINARY, 3},
{"wav" , TYPE_BINARY|TYPE_WAV, 3}, {"wav" , TYPE_BINARY|TYPE_WAV, 3},
{"tta" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED, 3}, {"tta" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED, 3},
{"wv" , TYPE_BINARY|TYPE_COMPRESSED, 2}, {"wv" , TYPE_BINARY|TYPE_COMPRESSED, 2},
{"swf" , TYPE_BINARY, 3}, {"swf" , TYPE_BINARY, 3},
{"SVGZ" , TYPE_BINARY, 4}, {"svgz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 4},
{"ODT" , TYPE_BINARY, 3}, {"odt" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP, 3},
{"3DM" , TYPE_BINARY, 3}, {"3dm" , TYPE_BINARY, 3},
{"chm" , TYPE_BINARY, 3}, {"chm" , TYPE_BINARY, 3},
{"CHM" , TYPE_BINARY, 3}, {"CHM" , TYPE_BINARY, 3},
{"svn" , TYPE_BINARY, 3},
{"ppm" , TYPE_BINARY|TYPE_PNM, 3}, {"ppm" , TYPE_BINARY|TYPE_PNM, 3},
{"pbm" , TYPE_BINARY|TYPE_PNM, 3}, {"pbm" , TYPE_BINARY|TYPE_PNM, 3},
{"pgm" , TYPE_BINARY|TYPE_PNM, 3}, {"pgm" , TYPE_BINARY|TYPE_PNM, 3},
{"pnm" , TYPE_BINARY|TYPE_PNM, 3}, {"pnm" , TYPE_BINARY|TYPE_PNM, 3},
{"ppn" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_PACKPNM, 3}, {"ppn" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_PACKPNM, 3},
{"mk" , TYPE_TEXT, 2},
{"diff" , TYPE_TEXT, 4},
{"po" , TYPE_TEXT, 2},
{"pot" , TYPE_TEXT, 3},
{"in" , TYPE_TEXT, 2},
{"ac" , TYPE_TEXT, 2},
{"guess" , TYPE_TEXT, 5},
{"sub" , TYPE_TEXT, 3},
{"rpath" , TYPE_TEXT, 5},
{"texi" , TYPE_TEXT, 4},
{"valgrind" , TYPE_TEXT, 8},
{"gperf" , TYPE_TEXT, 5},
{"latex" , TYPE_TEXT, 5},
{"f77" , TYPE_TEXT, 3},
{"f90" , TYPE_TEXT, 3},
{"f95" , TYPE_TEXT, 3},
{"groovy" , TYPE_TEXT, 6},
{"ebuild" , TYPE_TEXT, 6},
{"rex" , TYPE_TEXT, 3},
{"rexx" , TYPE_TEXT, 4},
{"scala" , TYPE_TEXT, 5},
{"xaml" , TYPE_TEXT|TYPE_MARKUP, 4},
{"yaml" , TYPE_TEXT|TYPE_MARKUP, 4},
{"tex" , TYPE_TEXT, 3},
{"rebol" , TYPE_TEXT, 5},
{"reb" , TYPE_TEXT, 3},
{"perl" , TYPE_TEXT, 4},
{"pas" , TYPE_TEXT, 3},
{"p6" , TYPE_TEXT, 2},
{"z80" , TYPE_TEXT, 3},
{"scm" , TYPE_TEXT, 3},
{"ss" , TYPE_TEXT, 2},
{"ml" , TYPE_TEXT, 2},
{"ml4" , TYPE_TEXT, 3},
{"mli" , TYPE_TEXT, 3},
{"mm" , TYPE_TEXT, 2},
{"m3" , TYPE_TEXT, 2},
{"lisp" , TYPE_TEXT, 4},
{"kdebuild-1" , TYPE_TEXT, 10},
{"hs" , TYPE_TEXT, 2},
{"gemspec" , TYPE_TEXT, 7},
{"fs" , TYPE_TEXT, 2},
{"coffee" , TYPE_TEXT, 6},
{"e" , TYPE_TEXT, 1},
{"cu" , TYPE_TEXT, 2},
{"awk" , TYPE_TEXT, 3},
{"xls" , TYPE_BINARY, 3},
{"xlw" , TYPE_BINARY, 3},
{"qt" , TYPE_BINARY, 2},
{"charset" , TYPE_TEXT, 7},
{"sed" , TYPE_TEXT, 3},
{"mailmap" , TYPE_TEXT, 7},
{"sin" , TYPE_BINARY, 3},
}; };
#endif #endif

View file

@ -3,24 +3,37 @@ h,TYPE_TEXT
cc,TYPE_TEXT cc,TYPE_TEXT
cpp,TYPE_TEXT cpp,TYPE_TEXT
c++,TYPE_TEXT c++,TYPE_TEXT
h++,TYPE_TEXT
hpp,TYPE_TEXT hpp,TYPE_TEXT
hxx,TYPE_TEXT
hh,TYPE_TEXT
txt,TYPE_TEXT txt,TYPE_TEXT
text,TYPE_TEXT text,TYPE_TEXT
html,TYPE_TEXT|TYPE_MARKUP html,TYPE_TEXT|TYPE_MARKUP
htm,TYPE_TEXT|TYPE_MARKUP htm,TYPE_TEXT|TYPE_MARKUP
xml,TYPE_TEXT|TYPE_MARKUP xml,TYPE_TEXT|TYPE_MARKUP
sgml,TYPE_TEXT|TYPE_MARKUP
info,TYPE_TEXT info,TYPE_TEXT
svg,TYPE_TEXT svg,TYPE_TEXT
conf,TYPE_TEXT conf,TYPE_TEXT
cfg,TYPE_TEXT
py,TYPE_TEXT py,TYPE_TEXT
rb,TYPE_TEXT rb,TYPE_TEXT
ru,TYPE_TEXT
rbw,TYPE_TEXT
xpm,TYPE_TEXT xpm,TYPE_TEXT
js,TYPE_TEXT js,TYPE_TEXT
jsp,TYPE_TEXT jsp,TYPE_TEXT
pl,TYPE_TEXT pl,TYPE_TEXT
t,TYPE_TEXT
tcl,TYPE_TEXT tcl,TYPE_TEXT
sh,TYPE_TEXT sh,TYPE_TEXT
ksh,TYPE_TEXT
csh,TYPE_TEXT
php,TYPE_TEXT php,TYPE_TEXT
php3,TYPE_TEXT
php4,TYPE_TEXT
php5,TYPE_TEXT
bat,TYPE_TEXT bat,TYPE_TEXT
pm,TYPE_TEXT pm,TYPE_TEXT
r,TYPE_TEXT r,TYPE_TEXT
@ -31,14 +44,19 @@ go,TYPE_TEXT
java,TYPE_TEXT java,TYPE_TEXT
m4,TYPE_TEXT m4,TYPE_TEXT
vb,TYPE_TEXT vb,TYPE_TEXT
vba,TYPE_TEXT
vbs,TYPE_TEXT
xslt,TYPE_TEXT|TYPE_MARKUP xslt,TYPE_TEXT|TYPE_MARKUP
xsl,TYPE_TEXT|TYPE_MARKUP xsl,TYPE_TEXT|TYPE_MARKUP
xsd,TYPE_TEXT|TYPE_MARKUP
xs,TYPE_TEXT
yacc,TYPE_TEXT yacc,TYPE_TEXT
lex,TYPE_TEXT lex,TYPE_TEXT
csv,TYPE_TEXT csv,TYPE_TEXT
shtml,TYPE_TEXT|TYPE_MARKUP shtml,TYPE_TEXT|TYPE_MARKUP
xhtml,TYPE_TEXT|TYPE_MARKUP xhtml,TYPE_TEXT|TYPE_MARKUP
xht,TYPE_TEXT|TYPE_MARKUP xht,TYPE_TEXT|TYPE_MARKUP
tpl,TYPE_TEXT|TYPE_MARKUP
asp,TYPE_TEXT asp,TYPE_TEXT
aspx,TYPE_TEXT aspx,TYPE_TEXT
rss,TYPE_TEXT|TYPE_MARKUP rss,TYPE_TEXT|TYPE_MARKUP
@ -54,6 +72,7 @@ s,TYPE_TEXT
ps,TYPE_TEXT ps,TYPE_TEXT
bib,TYPE_TEXT bib,TYPE_TEXT
lua,TYPE_TEXT lua,TYPE_TEXT
nse,TYPE_TEXT
dtd,TYPE_TEXT dtd,TYPE_TEXT
qml,TYPE_TEXT|TYPE_MARKUP qml,TYPE_TEXT|TYPE_MARKUP
@ -95,6 +114,7 @@ m4a,TYPE_BINARY|TYPE_COMPRESSED
m4p,TYPE_BINARY|TYPE_COMPRESSED m4p,TYPE_BINARY|TYPE_COMPRESSED
ofs,TYPE_BINARY|TYPE_COMPRESSED ofs,TYPE_BINARY|TYPE_COMPRESSED
ofr,TYPE_BINARY|TYPE_COMPRESSED ofr,TYPE_BINARY|TYPE_COMPRESSED
ogg,TYPE_BINARY|TYPE_COMPRESSED
flac,TYPE_BINARY|TYPE_FLAC flac,TYPE_BINARY|TYPE_FLAC
avi,TYPE_BINARY|TYPE_AVI avi,TYPE_BINARY|TYPE_AVI
pac,TYPE_BINARY|TYPE_COMPRESSED pac,TYPE_BINARY|TYPE_COMPRESSED
@ -126,19 +146,74 @@ pmd,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD
zpaq,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ zpaq,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ
xcf,TYPE_BINARY xcf,TYPE_BINARY
mo,TYPE_BINARY mo,TYPE_BINARY
gmo,TYPE_BINARY
pyo,TYPE_BINARY pyo,TYPE_BINARY
pyc,TYPE_BINARY pyc,TYPE_BINARY
wav,TYPE_BINARY|TYPE_WAV wav,TYPE_BINARY|TYPE_WAV
tta,TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED tta,TYPE_BINARY|TYPE_COMPRESSED|TYPE_AUDIO_COMPRESSED
wv,TYPE_BINARY|TYPE_COMPRESSED wv,TYPE_BINARY|TYPE_COMPRESSED
swf,TYPE_BINARY swf,TYPE_BINARY
SVGZ,TYPE_BINARY,TYPE_COMPRESSED|TYPE_COMPRESSED_GZ svgz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
ODT,TYPE_BINARY,TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP odt,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP
3DM,TYPE_BINARY 3dm,TYPE_BINARY
chm,TYPE_BINARY chm,TYPE_BINARY
CHM,TYPE_BINARY CHM,TYPE_BINARY
svn,TYPE_BINARY
ppm,TYPE_BINARY|TYPE_PNM ppm,TYPE_BINARY|TYPE_PNM
pbm,TYPE_BINARY|TYPE_PNM pbm,TYPE_BINARY|TYPE_PNM
pgm,TYPE_BINARY|TYPE_PNM pgm,TYPE_BINARY|TYPE_PNM
pnm,TYPE_BINARY|TYPE_PNM pnm,TYPE_BINARY|TYPE_PNM
ppn,TYPE_BINARY|TYPE_COMPRESSED|TYPE_PACKPNM ppn,TYPE_BINARY|TYPE_COMPRESSED|TYPE_PACKPNM
mk,TYPE_TEXT
diff,TYPE_TEXT
po,TYPE_TEXT
pot,TYPE_TEXT
in,TYPE_TEXT
ac,TYPE_TEXT
guess,TYPE_TEXT
sub,TYPE_TEXT
rpath,TYPE_TEXT
texi,TYPE_TEXT
valgrind,TYPE_TEXT
gperf,TYPE_TEXT
latex,TYPE_TEXT
f77,TYPE_TEXT
f90,TYPE_TEXT
f95,TYPE_TEXT
groovy,TYPE_TEXT
ebuild,TYPE_TEXT
rex,TYPE_TEXT
rexx,TYPE_TEXT
scala,TYPE_TEXT
xaml,TYPE_TEXT|TYPE_MARKUP
yaml,TYPE_TEXT|TYPE_MARKUP
tex,TYPE_TEXT
rebol,TYPE_TEXT
reb,TYPE_TEXT
perl,TYPE_TEXT
pas,TYPE_TEXT
p6,TYPE_TEXT
z80,TYPE_TEXT
scm,TYPE_TEXT
ss,TYPE_TEXT
ml,TYPE_TEXT
ml4,TYPE_TEXT
mli,TYPE_TEXT
mm,TYPE_TEXT
m3,TYPE_TEXT
lisp,TYPE_TEXT
kdebuild-1,TYPE_TEXT
hs,TYPE_TEXT
gemspec,TYPE_TEXT
fs,TYPE_TEXT
coffee,TYPE_TEXT
e,TYPE_TEXT
cu,TYPE_TEXT
awk,TYPE_TEXT
xls,TYPE_BINARY
xlw,TYPE_BINARY
qt,TYPE_BINARY
charset,TYPE_TEXT
sed,TYPE_TEXT
mailmap,TYPE_TEXT
sin,TYPE_BINARY

View file

@ -12,14 +12,14 @@
/* small adjustments to _a_ to make values distinct */ /* small adjustments to _a_ to make values distinct */
ub1 tab[] = { ub1 tab[] = {
125,0,0,220,85,0,82,87,113,0,0,113,0,0,82,125, 0,0,87,120,113,125,22,125,0,0,0,220,125,0,131,7,
0,0,7,87,0,113,82,0,0,183,0,131,0,7,0,253, 0,0,183,125,82,113,0,131,146,87,125,183,0,7,146,183,
0,0,0,0,85,0,113,0,0,113,125,113,0,7,22,0, 0,0,0,253,183,0,131,113,253,168,0,220,0,7,0,113,
82,0,7,113,125,125,0,0,0,113,113,131,220,0,0,85, 82,0,7,131,145,7,0,0,120,113,0,183,220,183,220,22,
0,87,0,0,113,0,85,183,82,88,7,88,58,113,0,0, 0,183,0,183,113,0,183,120,22,27,125,125,233,124,125,235,
124,0,168,125,0,125,0,116,0,82,125,55,0,22,116,12, 253,131,146,235,15,220,0,235,0,235,212,220,220,220,183,132,
0,125,113,113,0,40,0,0,42,232,0,124,0,92,183,61, 87,125,113,82,220,32,229,235,131,27,0,220,237,113,4,132,
0,0,221,0,0,234,0,0,97,11,0,0,164,91,0,0, 0,0,145,0,148,195,0,253,142,88,66,232,137,135,167,0,
}; };
/* The hash function */ /* The hash function */

View file

@ -8,7 +8,7 @@
extern ub1 tab[]; extern ub1 tab[];
#define PHASHLEN 0x80 /* length of hash mapping table */ #define PHASHLEN 0x80 /* length of hash mapping table */
#define PHASHNKEYS 141 /* How many keys were hashed */ #define PHASHNKEYS 216 /* How many keys were hashed */
#define PHASHRANGE 256 /* Range any input might map to */ #define PHASHRANGE 256 /* Range any input might map to */
#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */ #define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */

View file

@ -322,10 +322,10 @@ typedef enum {
* | | | | * | | | |
* .---------------------------------------. * .---------------------------------------.
* | | | | | | | | | | | * | | | | | | | | | | |
* Bit 10 Bit 0 * Bit 15 Bit 0
*/ */
#define PC_TYPE_MASK 0x7 #define PC_TYPE_MASK 0x7
#define PC_SUBTYPE_MASK 0x7f8 #define PC_SUBTYPE_MASK 0xfff8
#define PC_SUBTYPE(x) ((x) & PC_SUBTYPE_MASK) #define PC_SUBTYPE(x) ((x) & PC_SUBTYPE_MASK)
#define PC_TYPE(x) ((x) & PC_TYPE_MASK) #define PC_TYPE(x) ((x) & PC_TYPE_MASK)