From 991482403bd0646a8f04414c18fa5adc7493ad64 Mon Sep 17 00:00:00 2001
From: Moinak Ghosh <moinakg@gmail.com>
Date: Thu, 7 Nov 2013 21:48:54 +0530
Subject: [PATCH] Add extension based file type detection and setting segment
 data type. Use Bob Jenkins Minimal Perfect Hash to check for known
 extensions. Use semaphore signaling and direct buffer copy for extraction.
 Miscellaneous fixes.

---
 Makefile.in                |    6 +-
 archive/pc_archive.c       |  125 +++-
 archive/pc_archive.h       |   11 +
 pcompress.c                |   13 +-
 pcompress.h                |    3 +
 utils/phash/Makefile       |   23 +
 utils/phash/Makefile.test  |   19 +
 utils/phash/extensions.h   |  115 +++
 utils/phash/extensions.txt |   99 +++
 utils/phash/genhash.sh     |   36 +
 utils/phash/lookupa.c      |  264 +++++++
 utils/phash/lookupa.h      |   49 ++
 utils/phash/perfect.c      | 1387 ++++++++++++++++++++++++++++++++++++
 utils/phash/perfect.h      |  157 ++++
 utils/phash/perfhex.c      | 1319 ++++++++++++++++++++++++++++++++++
 utils/phash/phash.c        |   28 +
 utils/phash/phash.h        |   18 +
 utils/phash/recycle.c      |  115 +++
 utils/phash/recycle.h      |   90 +++
 utils/phash/standard.h     |   82 +++
 utils/phash/testperf.c     |  231 ++++++
 utils/utils.h              |    1 +
 22 files changed, 4177 insertions(+), 14 deletions(-)
 create mode 100644 utils/phash/Makefile
 create mode 100644 utils/phash/Makefile.test
 create mode 100644 utils/phash/extensions.h
 create mode 100644 utils/phash/extensions.txt
 create mode 100644 utils/phash/genhash.sh
 create mode 100644 utils/phash/lookupa.c
 create mode 100644 utils/phash/lookupa.h
 create mode 100644 utils/phash/perfect.c
 create mode 100644 utils/phash/perfect.h
 create mode 100644 utils/phash/perfhex.c
 create mode 100644 utils/phash/phash.c
 create mode 100644 utils/phash/phash.h
 create mode 100644 utils/phash/recycle.c
 create mode 100644 utils/phash/recycle.h
 create mode 100644 utils/phash/standard.h
 create mode 100644 utils/phash/testperf.c

diff --git a/Makefile.in b/Makefile.in
index fbd2eda..ca44a62 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -28,9 +28,11 @@ LINKLIB=pcompress
 LIBVER=1
 MAINSRCS = utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \
 	adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \
-	utils/xxhash_base.c utils/heap.c utils/cpuid.c archive/pc_archive.c pcompress.c
+	utils/xxhash_base.c utils/heap.c utils/cpuid.c archive/pc_archive.c \
+	utils/phash/phash.c utils/phash/lookupa.c utils/phash/recycle.c pcompress.c
 MAINHDRS = allocator.h  pcompress.h  utils/utils.h utils/xxhash.h utils/heap.h \
-	utils/cpuid.h utils/xxhash.h archive/pc_archive.h 
+	utils/cpuid.h utils/xxhash.h archive/pc_archive.h utils/phash/standard.h \
+	utils/phash/lookupa.h utils/phash/recycle.h utils/phash/phash.h
 MAINOBJS = $(MAINSRCS:.c=.o)
 
 PROGSRCS = main.c
diff --git a/archive/pc_archive.c b/archive/pc_archive.c
index cbbf3b2..493fbce 100644
--- a/archive/pc_archive.c
+++ b/archive/pc_archive.c
@@ -45,12 +45,22 @@
 #include <archive.h>
 #include <archive_entry.h>
 #include "pc_archive.h"
+#include <phash/phash.h>
+#include <phash/extensions.h>
+#include <phash/standard.h>
 
 #undef _FEATURES_H
 #define _XOPEN_SOURCE 700
 #include <ftw.h>
 #include <stdint.h>
 
+static int inited = 0;
+pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+static struct ext_hash_entry {
+	uint64_t extnum;
+	int type;
+} *exthtab = NULL;
+
 /*
 AE_IFREG   Regular file
 AE_IFLNK   Symbolic link
@@ -91,6 +101,8 @@ static struct arc_list_state {
 
 pthread_mutex_t nftw_mutex = PTHREAD_MUTEX_INITIALIZER;
 
+static int detect_type_by_ext(char *path, int pathlen);
+
 /*
  * Archive writer callback routines for archive creation operation.
  */
@@ -148,6 +160,28 @@ creat_write_callback(struct archive *arc, void *ctx, const void *buf, size_t len
 		uchar_t *tbuf;
 
 		tbuf = pctx->arc_buf + pctx->arc_buf_pos;
+		if (pctx->btype != pctx->ctype) {
+			if (pctx->btype == TYPE_UNKNOWN || pctx->arc_buf_pos == 0) {
+				pctx->btype = pctx->ctype;
+			} else {
+				if (pctx->arc_buf_pos < pctx->min_chunk) {
+					uint32_t diff = pctx->min_chunk - pctx->arc_buf_pos;
+					if (len > diff)
+						pctx->btype = pctx->ctype;
+					else
+						pctx->ctype = pctx->btype;
+				} else {
+					pctx->arc_writing = 0;
+					sem_post(&(pctx->read_sem));
+					sem_wait(&(pctx->write_sem));
+					tbuf = pctx->arc_buf + pctx->arc_buf_pos;
+					pctx->arc_writing = 1;
+					if (remaining > 0)
+						pctx->btype = pctx->ctype;
+				}
+			}
+		}
+
 		if (remaining > pctx->arc_buf_size - pctx->arc_buf_pos) {
 			size_t nlen = pctx->arc_buf_size - pctx->arc_buf_pos;
 			memcpy(tbuf, buff, nlen);
@@ -189,9 +223,12 @@ archiver_read(void *ctx, void *buf, uint64_t count)
 	pctx->arc_buf = buf;
 	pctx->arc_buf_size = count;
 	pctx->arc_buf_pos = 0;
+	pctx->btype = TYPE_UNKNOWN;
 	sem_post(&(pctx->write_sem));
 	sem_wait(&(pctx->read_sem));
 	pctx->arc_buf = NULL;
+	if (pctx->btype == TYPE_UNKNOWN)
+		pctx->btype = TYPE_GENERIC;
 	return (pctx->arc_buf_pos);
 }
 
@@ -229,8 +266,9 @@ extract_read_callback(struct archive *arc, void *ctx, const void **buf)
 
 	if (pctx->arc_closed) {
 		pctx->arc_buf_size = 0;
+		log_msg(LOG_WARN, 0, "End of file.");
 		archive_set_error(arc, ARCHIVE_EOF, "End of file.");
-		return (0);
+		return (-1);
 	}
 
 	if (!pctx->arc_writing) {
@@ -242,8 +280,9 @@ extract_read_callback(struct archive *arc, void *ctx, const void **buf)
 
 	if (pctx->arc_buf == NULL || pctx->arc_buf_size == 0) {
 		pctx->arc_buf_size = 0;
+		log_msg(LOG_ERR, 0, "End of file when extracting archive.");
 		archive_set_error(arc, ARCHIVE_EOF, "End of file when extracting archive.");
-		return (0);
+		return (-1);
 	}
 	pctx->arc_writing = 1;
 	*buf = pctx->arc_buf;
@@ -256,8 +295,10 @@ archiver_write(void *ctx, void *buf, uint64_t count)
 {
 	pc_ctx_t *pctx = (pc_ctx_t *)ctx;
 
-	if (pctx->arc_closed)
+	if (pctx->arc_closed) {
+		log_msg(LOG_WARN, 0, "Archive extractor closed unexpectedly");
 		return (0);
+	}
 
 	if (pctx->arc_buf != NULL) {
 		log_msg(LOG_ERR, 0, "Incorrect sequencing of archiver_read() call.");
@@ -321,7 +362,7 @@ compare_members_lt(member_entry_t *mem1, member_entry_t *mem2) {
  * fetches the next entry in ascending order of the predetermined sort keys.
  */
 static int
-read_next_path(pc_ctx_t *pctx, char *fpath, char **namechars)
+read_next_path(pc_ctx_t *pctx, char *fpath, char **namechars, int *fpathlen)
 {
 	short namelen;
 	ssize_t rbytes;
@@ -434,6 +475,7 @@ do_mmap:
 		buf = pctx->temp_mmap_buf + (pctx->temp_file_pos - pctx->temp_mmap_pos);
 		memcpy(fpath, buf, namelen);
 		fpath[namelen] = '\0';
+		*fpathlen = namelen;
 
 		n = namelen-1;
 		while (fpath[n] == '/' && n > 0) n--;
@@ -761,6 +803,7 @@ setup_extractor(pc_ctx_t *pctx)
 	}
 	archive_read_support_format_all(arc);
 	pctx->archive_ctx = arc;
+	pctx->arc_writing = 0;
 
 	return (0);
 }
@@ -771,7 +814,7 @@ setup_extractor(pc_ctx_t *pctx)
  */
 static int
 copy_file_data(pc_ctx_t *pctx, struct archive *arc,
-	       struct archive *in_arc, struct archive_entry *entry)
+	       struct archive *in_arc, struct archive_entry *entry, int typ)
 {
 	size_t sz, offset, len;
 	ssize_t bytes_to_write;
@@ -804,6 +847,9 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc,
 		src = mapbuf;
 		wlen = len;
 
+/*		if (typ == TYPE_UNKNOWN)
+			pctx->ctype = detect_type_by_data(src, len);*/
+
 		/*
 		 * Write the entire mmap-ed buffer. Since we are writing to the compressor
 		 * stage pipe there is no need for blocking.
@@ -825,7 +871,7 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc,
 
 static int
 write_entry(pc_ctx_t *pctx, struct archive *arc, struct archive *in_arc,
-	    struct archive_entry *entry)
+	    struct archive_entry *entry, int typ)
 {
 	int rv;
 
@@ -842,7 +888,7 @@ write_entry(pc_ctx_t *pctx, struct archive *arc, struct archive *in_arc,
 	}
 
 	if (archive_entry_size(entry) > 0) {
-		return (copy_file_data(pctx, arc, in_arc, entry));
+		return (copy_file_data(pctx, arc, in_arc, entry, typ));
 	}
 
 	return (0);
@@ -856,7 +902,7 @@ static void *
 archiver_thread_func(void *dat) {
 	pc_ctx_t *pctx = (pc_ctx_t *)dat;
 	char fpath[PATH_MAX], *name, *bnchars = NULL; // Silence compiler
-	int warn, rbytes;
+	int warn, rbytes, fpathlen = 0; // Silence compiler
 	uint32_t ctr;
 	struct archive_entry *entry, *spare_entry, *ent;
 	struct archive *arc, *ard;
@@ -885,7 +931,9 @@ archiver_thread_func(void *dat) {
 	/*
 	 * Read next path entry from list file. read_next_path() also handles sorted reading.
 	 */
-	while ((rbytes = read_next_path(pctx, fpath, &bnchars)) != 0) {
+	while ((rbytes = read_next_path(pctx, fpath, &bnchars, &fpathlen)) != 0) {
+		int typ;
+
 		if (rbytes == -1) break;
 		archive_entry_copy_sourcepath(entry, fpath);
 		if (archive_read_disk_entry_from_file(ard, entry, -1, NULL) != ARCHIVE_OK) {
@@ -894,6 +942,11 @@ archiver_thread_func(void *dat) {
 			continue;
 		}
 
+		if (archive_entry_filetype(entry) == AE_IFREG) {
+			if ((typ = detect_type_by_ext(fpath, fpathlen)) != TYPE_UNKNOWN)
+				pctx->ctype = typ;
+		}
+
 		/*
 		 * Strip leading '/' or '../' or '/../' from member name.
 		 */
@@ -945,7 +998,7 @@ archiver_thread_func(void *dat) {
 		archive_entry_linkify(resolver, &entry, &spare_entry);
 		ent = entry;
 		while (ent != NULL) {
-			if (write_entry(pctx, arc, ard, ent) != 0) {
+			if (write_entry(pctx, arc, ard, ent, typ) != 0) {
 				goto done;
 			}
 			ent = spare_entry;
@@ -1094,3 +1147,55 @@ int
 start_extractor(pc_ctx_t *pctx) {
 	return (pthread_create(&(pctx->archive_thread), NULL, extractor_thread_func, (void *)pctx));
 }
+
+int
+init_archive_mod() {
+	int rv = 0;
+
+	pthread_mutex_lock(&init_mutex);
+	if (!inited) {
+		int i, j;
+
+		exthtab = malloc(NUM_EXT * sizeof (struct ext_hash_entry));
+		if (exthtab != NULL) {
+			for (i = 0; i < NUM_EXT; i++) {
+				uint64_t extnum;
+				ub4 slot = phash(extlist[i].ext, extlist[i].len);
+				extnum = 0;
+				for (j = 0; j < extlist[i].len; j++)
+					extnum = (extnum << 1) | extlist[i].ext[j];
+				exthtab[slot].extnum = extnum;
+				exthtab[slot].type = extlist[i].type;
+			}
+			inited = 1;
+		} else {
+			rv = 1;
+		}
+	}
+	pthread_mutex_unlock(&init_mutex);
+	return (rv);
+}
+
+static int
+detect_type_by_ext(char *path, int pathlen)
+{
+	char *ext = NULL;
+	ub4 slot;
+	int i, len;
+	uint64_t extnum;
+
+	for (i = pathlen-1; i > 0 && path[i] != '.' && path[i] != PATHSEP_CHAR; i--);
+	if (i == 0 || path[i] != '.') goto out;
+	len = pathlen - i - 1;
+	if (len == 0) goto out;
+	ext = &path[i+1];
+	slot = phash(ext, len);
+	if (slot > NUM_EXT) goto out;
+	extnum = 0;
+	for (i = 0; i < len; i++)
+		extnum = (extnum << 1) | ext[i];
+	if (exthtab[slot].extnum == extnum)
+		return (exthtab[slot].type);
+out:
+	return (TYPE_UNKNOWN);
+}
diff --git a/archive/pc_archive.h b/archive/pc_archive.h
index 6e8eb7c..3d6bb36 100644
--- a/archive/pc_archive.h
+++ b/archive/pc_archive.h
@@ -38,6 +38,16 @@ typedef struct {
 	size_t size;
 } archive_list_entry_t;
 
+typedef enum {
+	TYPE_UNKNOWN = 0,
+	TYPE_GENERIC,
+	TYPE_COMPRESSED,
+	TYPE_EXE,
+	TYPE_TEXT,
+	TYPE_BINARY,
+	TYPE_JPEG
+} data_type_t;
+
 /*
  * Archiving related functions.
  */
@@ -48,6 +58,7 @@ int start_extractor(pc_ctx_t *pctx);
 int64_t archiver_read(void *ctx, void *buf, uint64_t count);
 int64_t archiver_write(void *ctx, void *buf, uint64_t count);
 int archiver_close(void *ctx);
+int init_archive_mod();
 
 #ifdef	__cplusplus
 }
diff --git a/pcompress.c b/pcompress.c
index b45f654..82d01bb 100644
--- a/pcompress.c
+++ b/pcompress.c
@@ -1795,6 +1795,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
 	props.cksum = pctx->cksum;
 	props.buf_extra = 0;
 	cread_buf = NULL;
+	pctx->btype = TYPE_UNKNOWN;
 	flags = 0;
 	sbuf.st_size = 0;
 	err = 0;
@@ -2355,6 +2356,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
 			 */
 			tdat->id = pctx->chunk_num;
 			tdat->rbytes = rbytes;
+			tdat->btype = pctx->btype; // Have to copy btype for this buffer as pctx->btype will change
 			if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan || pctx->enable_rabin_global)) {
 				tmp = tdat->cmp_seg;
 				tdat->cmp_seg = cread_buf;
@@ -2665,6 +2667,7 @@ create_pc_context(void)
 
 	slab_init();
 	init_pcompress();
+	init_archive_mod();
 
 	memset(ctx, 0, sizeof (pc_ctx_t));
 	ctx->exec_name = (char *)malloc(NAME_MAX);
@@ -2686,8 +2689,8 @@ destroy_pc_context(pc_ctx_t *pctx)
 	if (pctx->pwd_file)
 		free(pctx->pwd_file);
 	free((void *)(pctx->exec_name));
-	free(pctx);
 	slab_cleanup(pctx->hide_mem_stats);
+	free(pctx);
 }
 
 int DLL_EXPORT
@@ -2904,7 +2907,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
 	 * Sorting of members when archiving is enabled for compression levels >6 (>2 for lz4),
 	 * unless it is explicitly disabled via '-n'.
 	 */
-	if (pctx->enable_archive_sort != -1) {
+	if (pctx->enable_archive_sort != -1 && pctx->do_compress) {
 		if ((memcmp(pctx->algo, "lz4", 3) == 0 && pctx->level > 2) || pctx->level > 6)
 			pctx->enable_archive_sort = 1;
 	} else {
@@ -2918,6 +2921,12 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
 			pctx->rab_blk_size = RAB_BLK_DEFAULT;
 	}
 
+	pctx->min_chunk = MIN_CHUNK;
+	if (pctx->enable_rabin_scan)
+		pctx->min_chunk = RAB_MIN_CHUNK_SIZE;
+	if (pctx->enable_rabin_global)
+		pctx->min_chunk = RAB_MIN_CHUNK_SIZE_GLOBAL;
+
 	/*
 	 * Remaining mandatory arguments are the filenames.
 	 */
diff --git a/pcompress.h b/pcompress.h
index 52968fb..de6b288 100644
--- a/pcompress.h
+++ b/pcompress.h
@@ -224,6 +224,8 @@ typedef struct pc_ctx {
 	uchar_t *arc_buf;
 	uint64_t arc_buf_size, arc_buf_pos;
 	int arc_closed, arc_writing;
+	uchar_t btype, ctype;
+	int min_chunk;
 
 	unsigned int chunk_num;
 	uint64_t largest_chunk, smallest_chunk, avg_chunk;
@@ -268,6 +270,7 @@ struct cmp_data {
 	mac_ctx_t chunk_hmac;
 	algo_props_t *props;
 	int decompressing;
+	uchar_t btype;
 	pc_ctx_t *pctx;
 };
 
diff --git a/utils/phash/Makefile b/utils/phash/Makefile
new file mode 100644
index 0000000..47a79c9
--- /dev/null
+++ b/utils/phash/Makefile
@@ -0,0 +1,23 @@
+CFLAGS = -O
+
+.cc.o:
+	gcc $(CFLAGS) -c $<
+
+O = lookupa.o recycle.o perfhex.o perfect.o
+
+all : $(O)
+	gcc -o perfect $(O) -lm
+	sh genhash.sh
+
+clean:
+	rm -f perfect foo *.o
+
+# DEPENDENCIES
+
+lookupa.o : lookupa.c standard.h lookupa.h
+
+recycle.o : recycle.c standard.h recycle.h
+
+perfhex.o : perfhex.c standard.h lookupa.h recycle.h perfect.h
+
+perfect.o : perfect.c standard.h lookupa.h recycle.h perfect.h
diff --git a/utils/phash/Makefile.test b/utils/phash/Makefile.test
new file mode 100644
index 0000000..6110834
--- /dev/null
+++ b/utils/phash/Makefile.test
@@ -0,0 +1,19 @@
+CFLAGS = -O
+
+.cc.o:
+	gcc $(CFLAGS) -c $<
+
+O = lookupa.o recycle.o phash.o testperf.o
+
+foo : $(O)
+	gcc -o foo $(O) -lm
+
+# DEPENDENCIES
+
+lookupa.o  : lookupa.c standard.h lookupa.h
+
+recycle.o  : recycle.c standard.h recycle.h
+
+phash.o    : phash.c standard.h phash.h lookupa.h
+
+testperf.o : testperf.c standard.h recycle.h phash.h
diff --git a/utils/phash/extensions.h b/utils/phash/extensions.h
new file mode 100644
index 0000000..c3498b3
--- /dev/null
+++ b/utils/phash/extensions.h
@@ -0,0 +1,115 @@
+
+/* Generated File. DO NOT EDIT. */
+/*
+ * List of extensions and their types.
+ */
+
+#ifndef __EXT_H__
+#define __EXT_H__
+struct ext_entry {
+	char *ext;
+	int type;
+	int len;
+} extlist[] = {
+	{"c"	, TYPE_TEXT, 1},
+	{"h"	, TYPE_TEXT, 1},
+	{"cc"	, TYPE_TEXT, 2},
+	{"cpp"	, TYPE_TEXT, 3},
+	{"c++"	, TYPE_TEXT, 3},
+	{"hpp"	, TYPE_TEXT, 3},
+	{"txt"	, TYPE_TEXT, 3},
+	{"html"	, TYPE_TEXT, 4},
+	{"htm"	, TYPE_TEXT, 3},
+	{"xml"	, TYPE_TEXT, 3},
+	{"info"	, TYPE_TEXT, 4},
+	{"ppm"	, TYPE_TEXT, 3},
+	{"svg"	, TYPE_TEXT, 3},
+	{"conf"	, TYPE_TEXT, 4},
+	{"py"	, TYPE_TEXT, 2},
+	{"rb"	, TYPE_TEXT, 2},
+	{"xpm"	, TYPE_TEXT, 3},
+	{"js"	, TYPE_TEXT, 2},
+	{"jsp"	, TYPE_TEXT, 3},
+	{"pl"	, TYPE_TEXT, 2},
+	{"tcl"	, TYPE_TEXT, 3},
+	{"sh"	, TYPE_TEXT, 2},
+	{"php"	, TYPE_TEXT, 3},
+	{"bat"	, TYPE_TEXT, 3},
+	{"pm"	, TYPE_TEXT, 2},
+	{"r"	, TYPE_TEXT, 1},
+	{"d"	, TYPE_TEXT, 1},
+	{"bas"	, TYPE_TEXT, 3},
+	{"asm"	, TYPE_TEXT, 3},
+	{"go"	, TYPE_TEXT, 2},
+	{"java"	, TYPE_TEXT, 4},
+	{"m4"	, TYPE_TEXT, 2},
+	{"vb"	, TYPE_TEXT, 2},
+	{"xslt"	, TYPE_TEXT, 4},
+	{"yacc"	, TYPE_TEXT, 4},
+	{"lex"	, TYPE_TEXT, 3},
+	{"csv"	, TYPE_TEXT, 3},
+	{"shtml"	, TYPE_TEXT, 5},
+	{"xhtml"	, TYPE_TEXT, 5},
+	{"xht"	, TYPE_TEXT, 3},
+	{"asp"	, TYPE_TEXT, 3},
+	{"aspx"	, TYPE_TEXT, 4},
+	{"rss"	, TYPE_TEXT, 3},
+	{"atom"	, TYPE_TEXT, 4},
+	{"cgi"	, TYPE_TEXT, 3},
+	{"c#"	, TYPE_TEXT, 2},
+	{"cob"	, TYPE_TEXT, 3},
+	{"ada"	, TYPE_TEXT, 3},
+	{"ini"	, TYPE_TEXT, 3},
+	{"y"	, TYPE_TEXT, 1},
+	{"swg"	, TYPE_TEXT, 3},
+	{"s"	, TYPE_TEXT, 1},
+	{"ps"	, TYPE_TEXT, 2},
+	{"bib"	, TYPE_TEXT, 3},
+	{"lua"	, TYPE_TEXT, 3},
+	{"qml"	, TYPE_TEXT, 3},
+	{"exe"	, TYPE_EXE, 3},
+	{"dll"	, TYPE_EXE, 3},
+	{"bin"	, TYPE_EXE, 3},
+	{"o"	, TYPE_EXE, 1},
+	{"a"	, TYPE_EXE, 1},
+	{"obj"	, TYPE_EXE, 3},
+	{"so"	, TYPE_EXE, 2},
+	{"com"	, TYPE_EXE, 3},
+	{"xpi"	, TYPE_EXE, 3},
+	{"off"	, TYPE_EXE, 3},
+	{"pdf"	, TYPE_COMPRESSED, 3},
+	{"jpg"	, TYPE_JPEG, 3},
+	{"jpeg"	, TYPE_JPEG, 4},
+	{"png"	, TYPE_COMPRESSED, 3},
+	{"mp3"	, TYPE_COMPRESSED, 3},
+	{"wma"	, TYPE_COMPRESSED, 3},
+	{"divx"	, TYPE_COMPRESSED, 4},
+	{"mp4"	, TYPE_COMPRESSED, 3},
+	{"aac"	, TYPE_COMPRESSED, 3},
+	{"m4a"	, TYPE_COMPRESSED, 3},
+	{"m4p"	, TYPE_COMPRESSED, 3},
+	{"ofs"	, TYPE_COMPRESSED, 3},
+	{"ofr"	, TYPE_COMPRESSED, 3},
+	{"flac"	, TYPE_COMPRESSED, 4},
+	{"pac"	, TYPE_COMPRESSED, 3},
+	{"gif"	, TYPE_COMPRESSED, 3},
+	{"jp2"	, TYPE_JPEG, 3},
+	{"gz"	, TYPE_COMPRESSED, 2},
+	{"bz2"	, TYPE_COMPRESSED, 3},
+	{"zip"	, TYPE_COMPRESSED, 3},
+	{"arj"	, TYPE_COMPRESSED, 3},
+	{"arc"	, TYPE_COMPRESSED, 3},
+	{"jar"	, TYPE_COMPRESSED, 3},
+	{"lz"	, TYPE_COMPRESSED, 2},
+	{"lzh"	, TYPE_COMPRESSED, 3},
+	{"lzma"	, TYPE_COMPRESSED, 4},
+	{"lzo"	, TYPE_COMPRESSED, 3},
+	{"dmg"	, TYPE_COMPRESSED, 3},
+	{"7z"	, TYPE_COMPRESSED, 2},
+	{"uha"	, TYPE_COMPRESSED, 3},
+	{"alz"	, TYPE_COMPRESSED, 3},
+	{"ace"	, TYPE_COMPRESSED, 3},
+	{"xcf"	, TYPE_BINARY, 3},
+};
+#define	NUM_EXT	(99)
+#endif
diff --git a/utils/phash/extensions.txt b/utils/phash/extensions.txt
new file mode 100644
index 0000000..838997d
--- /dev/null
+++ b/utils/phash/extensions.txt
@@ -0,0 +1,99 @@
+c,TYPE_TEXT
+h,TYPE_TEXT
+cc,TYPE_TEXT
+cpp,TYPE_TEXT
+c++,TYPE_TEXT
+hpp,TYPE_TEXT
+txt,TYPE_TEXT
+html,TYPE_TEXT
+htm,TYPE_TEXT
+xml,TYPE_TEXT
+info,TYPE_TEXT
+ppm,TYPE_TEXT
+svg,TYPE_TEXT
+conf,TYPE_TEXT
+py,TYPE_TEXT
+rb,TYPE_TEXT
+xpm,TYPE_TEXT
+js,TYPE_TEXT
+jsp,TYPE_TEXT
+pl,TYPE_TEXT
+tcl,TYPE_TEXT
+sh,TYPE_TEXT
+php,TYPE_TEXT
+bat,TYPE_TEXT
+pm,TYPE_TEXT
+r,TYPE_TEXT
+d,TYPE_TEXT
+bas,TYPE_TEXT
+asm,TYPE_TEXT
+go,TYPE_TEXT
+java,TYPE_TEXT
+m4,TYPE_TEXT
+vb,TYPE_TEXT
+xslt,TYPE_TEXT
+yacc,TYPE_TEXT
+lex,TYPE_TEXT
+csv,TYPE_TEXT
+shtml,TYPE_TEXT
+xhtml,TYPE_TEXT
+xht,TYPE_TEXT
+asp,TYPE_TEXT
+aspx,TYPE_TEXT
+rss,TYPE_TEXT
+atom,TYPE_TEXT
+cgi,TYPE_TEXT
+c#,TYPE_TEXT
+cob,TYPE_TEXT
+ada,TYPE_TEXT
+ini,TYPE_TEXT
+y,TYPE_TEXT
+swg,TYPE_TEXT
+s,TYPE_TEXT
+ps,TYPE_TEXT
+bib,TYPE_TEXT
+lua,TYPE_TEXT
+qml,TYPE_TEXT
+exe,TYPE_EXE
+dll,TYPE_EXE
+bin,TYPE_EXE
+o,TYPE_EXE
+a,TYPE_EXE
+obj,TYPE_EXE
+so,TYPE_EXE
+com,TYPE_EXE
+xpi,TYPE_EXE
+off,TYPE_EXE
+pdf,TYPE_COMPRESSED
+jpg,TYPE_JPEG
+jpeg,TYPE_JPEG
+png,TYPE_COMPRESSED
+mp3,TYPE_COMPRESSED
+wma,TYPE_COMPRESSED
+divx,TYPE_COMPRESSED
+mp4,TYPE_COMPRESSED
+aac,TYPE_COMPRESSED
+m4a,TYPE_COMPRESSED
+m4p,TYPE_COMPRESSED
+ofs,TYPE_COMPRESSED
+ofr,TYPE_COMPRESSED
+flac,TYPE_COMPRESSED
+pac,TYPE_COMPRESSED
+gif,TYPE_COMPRESSED
+jp2,TYPE_JPEG
+gz,TYPE_COMPRESSED
+bz2,TYPE_COMPRESSED
+zip,TYPE_COMPRESSED
+arj,TYPE_COMPRESSED
+arc,TYPE_COMPRESSED
+jar,TYPE_COMPRESSED
+lz,TYPE_COMPRESSED
+lzh,TYPE_COMPRESSED
+lzma,TYPE_COMPRESSED
+lzo,TYPE_COMPRESSED
+dmg,TYPE_COMPRESSED
+7z,TYPE_COMPRESSED
+uha,TYPE_COMPRESSED
+alz,TYPE_COMPRESSED
+ace,TYPE_COMPRESSED
+xcf,TYPE_BINARY
diff --git a/utils/phash/genhash.sh b/utils/phash/genhash.sh
new file mode 100644
index 0000000..4d7b6dc
--- /dev/null
+++ b/utils/phash/genhash.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+count=`cat extensions.txt | wc -l`
+echo '
+/* Generated File. DO NOT EDIT. */
+/*
+ * List of extensions and their types.
+ */
+
+#ifndef __EXT_H__
+#define __EXT_H__
+struct ext_entry {
+	char *ext;
+	int type;
+	int len;
+} extlist[] = {' > extensions.h
+
+rm -f extlist
+cat extensions.txt | while read line
+do
+	_OIFS="$IFS"
+	IFS=","
+	set -- $line
+	IFS="$_OIFS"
+	ext=$1
+	type=$2
+	len=`printf $ext | wc -c`
+	echo $ext >> extlist
+	echo "	{\"${ext}\"	, $type, $len}," >> extensions.h
+done
+
+echo '};' >> extensions.h
+echo "#define	NUM_EXT	(${count})" >> extensions.h
+echo "#endif" >> extensions.h
+./perfect -nm < extlist
+rm -f extlist
diff --git a/utils/phash/lookupa.c b/utils/phash/lookupa.c
new file mode 100644
index 0000000..d3f4a21
--- /dev/null
+++ b/utils/phash/lookupa.c
@@ -0,0 +1,264 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+--------------------------------------------------------------------
+lookupa.c, by Bob Jenkins, December 1996.  Same as lookup2.c
+Use this code however you wish.  Public Domain.  No warranty.
+Source is http://burtleburtle.net/bob/c/lookupa.c
+--------------------------------------------------------------------
+*/
+#ifndef STANDARD
+#include "standard.h"
+#endif
+#ifndef LOOKUPA
+#include "lookupa.h"
+#endif
+
+/*
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bit set, and the deltas of all three
+  high bits or all three low bits, whether the original value of a,b,c
+  is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+  have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a 
+  structure that could supported 2x parallelism, like so:
+      a -= b; 
+      a -= c; x = (c>>13);
+      b -= c; a ^= x;
+      b -= a; x = (a<<8);
+      c -= a; b ^= x;
+      c -= b; x = (b>>13);
+      ...
+  Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
+  of that parallelism.  They've also turned some of those single-cycle
+  latency instructions into multi-cycle latency instructions.  Still,
+  this is the fastest good hash I could find.  There were about 2^^68
+  to choose from.  I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+--------------------------------------------------------------------
+lookup() -- hash a variable-length key into a 32-bit value
+  k     : the key (the unaligned variable-length array of bytes)
+  len   : the length of the key, counting by bytes
+  level : can be any 4-byte value
+Returns a 32-bit value.  Every bit of the key affects every bit of
+the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+About 6len+35 instructions.
+
+The best hash table sizes are powers of 2.  There is no need to do
+mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+use a bitmask.  For example, if you need only 10 bits, do
+  h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+  for (i=0, h=0; i<n; ++i) h = lookup( k[i], len[i], h);
+
+By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
+code any way you wish, private, educational, or commercial.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^32 is
+acceptable.  Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+/* k:      the key */
+/* length: the length of the key */
+/* level:  the previous hash, or an arbitrary value */
+ub4
+lookup(ub1 *k, ub4 length, ub4 level)
+{
+   register ub4 a,b,c,len;
+
+   /* Set up the internal state */
+   len = length;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   c = level;           /* the previous hash value */
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 12)
+   {
+      a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
+      b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
+      c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
+      mix(a,b,c);
+      k += 12; len -= 12;
+   }
+
+   /*------------------------------------- handle the last 11 bytes */
+   c += length;
+   switch(len)              /* all the case statements fall through */
+   {
+   case 11: c+=((ub4)k[10]<<24);
+   case 10: c+=((ub4)k[9]<<16);
+   case 9 : c+=((ub4)k[8]<<8);
+      /* the first byte of c is reserved for the length */
+   case 8 : b+=((ub4)k[7]<<24);
+   case 7 : b+=((ub4)k[6]<<16);
+   case 6 : b+=((ub4)k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=((ub4)k[3]<<24);
+   case 3 : a+=((ub4)k[2]<<16);
+   case 2 : a+=((ub4)k[1]<<8);
+   case 1 : a+=k[0];
+     /* case 0: nothing left to add */
+   }
+   mix(a,b,c);
+   /*-------------------------------------------- report the result */
+   return c;
+}
+
+
+/*
+--------------------------------------------------------------------
+mixc -- mixc 8 4-bit values as quickly and thoroughly as possible.
+Repeating mix() three times achieves avalanche.
+Repeating mix() four times eliminates all funnels and all
+  characteristics stronger than 2^{-11}.
+--------------------------------------------------------------------
+*/
+#define mixc(a,b,c,d,e,f,g,h) \
+{ \
+   a^=b<<11; d+=a; b+=c; \
+   b^=c>>2;  e+=b; c+=d; \
+   c^=d<<8;  f+=c; d+=e; \
+   d^=e>>16; g+=d; e+=f; \
+   e^=f<<10; h+=e; f+=g; \
+   f^=g>>4;  a+=f; g+=h; \
+   g^=h<<8;  b+=g; h+=a; \
+   h^=a>>9;  c+=h; a+=b; \
+}
+
+/*
+--------------------------------------------------------------------
+checksum() -- hash a variable-length key into a 256-bit value
+  k     : the key (the unaligned variable-length array of bytes)
+  len   : the length of the key, counting by bytes
+  state : an array of CHECKSTATE 4-byte values (256 bits)
+The state is the checksum.  Every bit of the key affects every bit of
+the state.  There are no funnels.  About 112+6.875len instructions.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+  for (i=0; i<8; ++i) state[i] = 0x9e3779b9;
+  for (i=0, h=0; i<n; ++i) checksum( k[i], len[i], state);
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use to detect changes between revisions of documents, assuming nobody
+is trying to cause collisions.  Do NOT use for cryptography.
+--------------------------------------------------------------------
+*/
+void
+checksum(ub1 *k, ub4 len, ub4 *state)
+{
+   register ub4 a,b,c,d,e,f,g,h,length;
+
+   /* Use the length and level; add in the golden ratio. */
+   length = len;
+   a=state[0]; b=state[1]; c=state[2]; d=state[3];
+   e=state[4]; f=state[5]; g=state[6]; h=state[7];
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 32)
+   {
+      a += (k[0] +(k[1]<<8) +(k[2]<<16) +(k[3]<<24));
+      b += (k[4] +(k[5]<<8) +(k[6]<<16) +(k[7]<<24));
+      c += (k[8] +(k[9]<<8) +(k[10]<<16)+(k[11]<<24));
+      d += (k[12]+(k[13]<<8)+(k[14]<<16)+(k[15]<<24));
+      e += (k[16]+(k[17]<<8)+(k[18]<<16)+(k[19]<<24));
+      f += (k[20]+(k[21]<<8)+(k[22]<<16)+(k[23]<<24));
+      g += (k[24]+(k[25]<<8)+(k[26]<<16)+(k[27]<<24));
+      h += (k[28]+(k[29]<<8)+(k[30]<<16)+(k[31]<<24));
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      k += 32; len -= 32;
+   }
+
+   /*------------------------------------- handle the last 31 bytes */
+   h += length;
+   switch(len)
+   {
+   case 31: h+=(k[30]<<24);
+   case 30: h+=(k[29]<<16);
+   case 29: h+=(k[28]<<8);
+   case 28: g+=(k[27]<<24);
+   case 27: g+=(k[26]<<16);
+   case 26: g+=(k[25]<<8);
+   case 25: g+=k[24];
+   case 24: f+=(k[23]<<24);
+   case 23: f+=(k[22]<<16);
+   case 22: f+=(k[21]<<8);
+   case 21: f+=k[20];
+   case 20: e+=(k[19]<<24);
+   case 19: e+=(k[18]<<16);
+   case 18: e+=(k[17]<<8);
+   case 17: e+=k[16];
+   case 16: d+=(k[15]<<24);
+   case 15: d+=(k[14]<<16);
+   case 14: d+=(k[13]<<8);
+   case 13: d+=k[12];
+   case 12: c+=(k[11]<<24);
+   case 11: c+=(k[10]<<16);
+   case 10: c+=(k[9]<<8);
+   case 9 : c+=k[8];
+   case 8 : b+=(k[7]<<24);
+   case 7 : b+=(k[6]<<16);
+   case 6 : b+=(k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=(k[3]<<24);
+   case 3 : a+=(k[2]<<16);
+   case 2 : a+=(k[1]<<8);
+   case 1 : a+=k[0];
+   }
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+
+   /*-------------------------------------------- report the result */
+   state[0]=a; state[1]=b; state[2]=c; state[3]=d;
+   state[4]=e; state[5]=f; state[6]=g; state[7]=h;
+}
diff --git a/utils/phash/lookupa.h b/utils/phash/lookupa.h
new file mode 100644
index 0000000..ba6d7c1
--- /dev/null
+++ b/utils/phash/lookupa.h
@@ -0,0 +1,49 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+------------------------------------------------------------------------------
+By Bob Jenkins, September 1996.
+lookupa.h, a hash function for table lookup, same function as lookup.c.
+Use this code in any way you wish.  Public Domain.  It has no warranty.
+Source is http://burtleburtle.net/bob/c/lookupa.h
+------------------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+
+#ifndef LOOKUPA
+#define LOOKUPA
+
+#define CHECKSTATE 8
+#define hashsize(n) ((ub4)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+ub4  lookup(/*_ ub1 *k, ub4 length, ub4 level _*/);
+void checksum(/*_ ub1 *k, ub4 length, ub4 *state _*/);
+
+#endif /* LOOKUPA */
diff --git a/utils/phash/perfect.c b/utils/phash/perfect.c
new file mode 100644
index 0000000..6f4339a
--- /dev/null
+++ b/utils/phash/perfect.c
@@ -0,0 +1,1387 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+------------------------------------------------------------------------------
+perfect.c: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, September 1996, December 1999
+You may use this code in any way you wish, and it is free.  No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfect.c
+
+This generates a minimal perfect hash function.  That means, given a
+set of n keys, this determines a hash function that maps each of
+those keys into a value in 0..n-1 with no collisions.
+
+The perfect hash function first uses a normal hash function on the key
+to determine (a,b) such that the pair (a,b) is distinct for all
+keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
+tab[] is an array of 1-byte values and scramble[] is a 256-term array of 
+2-byte or 4-byte values.  If there are n keys, the length of tab[] is a 
+power of two between n/3 and n.
+
+I found the idea of computing distinct (a,b) values in "Practical minimal 
+perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, 
+Communications of the ACM, January 1992.  They found the idea in Chichelli 
+(CACM Jan 1980).  Beyond that, our methods differ.
+
+The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
+0..*blen*-1.  A fast hash function determines both a and b
+simultaneously.  Any decent hash function is likely to produce
+hashes so that (a,b) is distinct for all pairs.  I try the hash
+using different values of *salt* until all pairs are distinct.
+
+The final hash is (a XOR scramble[tab[b]]).  *scramble* is a
+predetermined mapping of 0..255 into 0..smax-1.  *tab* is an
+array that we fill in in such a way as to make the hash perfect.
+
+First we fill in all values of *tab* that are used by more than one
+key.  We try all possible values for each position until one works.
+
+This leaves m unmapped keys and m values that something could hash to.
+If you treat unmapped keys as lefthand nodes and unused hash values
+as righthand nodes, and draw a line connecting each key to each hash
+value it could map to, you get a bipartite graph.  We attempt to
+find a perfect matching in this graph.  If we succeed, we have
+determined a perfect hash for the whole set of keys.
+
+*scramble* is used because (a^tab[i]) clusters keys around *a*.
+------------------------------------------------------------------------------
+*/
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+#ifndef LOOKUPA
+#include "lookupa.h"
+#endif
+#ifndef RECYCLE
+#include "recycle.h"
+#endif
+#ifndef PERFECT
+#include "perfect.h"
+#endif
+
+/*
+------------------------------------------------------------------------------
+Find the mapping that will produce a perfect hash
+------------------------------------------------------------------------------
+*/
+
+/* return the ceiling of the log (base 2) of val */
+ub4
+mylog2(ub4 val)
+{
+  ub4 i;
+  for (i=0; ((ub4)1<<i) < val; ++i)
+    ;
+  return i;
+}
+
+/* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
+/* permute(0)=0.  This is intended and useful. */
+/* x: a value in some range */
+/* nbits: number of bits in range */
+static ub4
+permute(ub4 x, ub4 nbits)
+{
+  int i;
+  int mask   = ((ub4)1<<nbits)-1;                                /* all ones */
+  int const2 = 1+nbits/2;
+  int const3 = 1+nbits/3;
+  int const4 = 1+nbits/4;
+  int const5 = 1+nbits/5;
+  for (i=0; i<20; ++i)
+  {
+    x = (x+(x<<const2)) & mask; 
+    x = (x^(x>>const3));
+    x = (x+(x<<const4)) & mask;
+    x = (x^(x>>const5));
+  }
+  return x;
+}
+
+/* initialize scramble[] with distinct random values in 0..smax-1 */
+/* scramble: hash is a^scramble[tab[b]] */
+/* smax: scramble values should be in 0..smax-1 */
+static void
+scrambleinit(ub4 *scramble, ub4 smax)
+{
+  ub4 i;
+
+  /* fill scramble[] with distinct random integers in 0..smax-1 */
+  for (i=0; i<SCRAMBLE_LEN; ++i)
+  {
+    scramble[i] = permute(i, mylog2(smax));
+  }
+}
+
+/* 
+ * Check if key1 and key2 are the same. 
+ * We already checked (a,b) are the same.
+ */
+static void
+checkdup(key *key1, key *key2, hashform *form)
+{
+  switch(form->hashtype)
+  {
+  case STRING_HT:
+    if ((key1->len_k == key2->len_k) &&
+	!memcmp(key1->name_k, key2->name_k, (size_t)key1->len_k))
+    {
+      fprintf(stderr, "perfect.c: Duplicates keys!  %.*s\n",
+	      key1->len_k, key1->name_k);
+      exit(SUCCESS);
+    }
+    break;
+  case INT_HT:
+    if (key1->hash_k == key2->hash_k)
+    {
+      fprintf(stderr, "perfect.c: Duplicate keys!  %.8x\n", key1->hash_k);
+      exit(SUCCESS);
+    }
+    break;
+  case AB_HT:
+    fprintf(stderr, "perfect.c: Duplicate keys!  %.8x %.8x\n",
+	    key1->a_k, key1->b_k);
+    exit(SUCCESS);
+    break;
+  default:
+    fprintf(stderr, "perfect.c: Illegal hash type %u\n", (ub4)form->hashtype);
+    exit(SUCCESS);
+    break;
+  }
+}
+
+
+/* 
+ * put keys in tabb according to key->b_k
+ * check if the initial hash might work 
+ */
+/* tabb: output, list of keys with b for (a,b) */
+/* blen: length of tabb */
+/* keys: list of keys already hashed */
+/* form: user directives */
+/* complete: TRUE means to complete init despite collisions */
+static int
+inittab(bstuff *tabb, ub4 blen, key *keys, hashform *form, int complete)
+{
+  int  nocollision = TRUE;
+  key *mykey;
+
+  memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
+
+  /* Two keys with the same (a,b) guarantees a collision */
+  for (mykey=keys; mykey; mykey=mykey->next_k)
+  {
+    key *otherkey;
+
+    for (otherkey=tabb[mykey->b_k].list_b; 
+	 otherkey; 
+	 otherkey=otherkey->nextb_k)
+    {
+      if (mykey->a_k == otherkey->a_k)
+      {
+        nocollision = FALSE;
+	checkdup(mykey, otherkey, form);
+	if (!complete)
+	  return FALSE;
+      }
+    }
+    ++tabb[mykey->b_k].listlen_b;
+    mykey->nextb_k = tabb[mykey->b_k].list_b;
+    tabb[mykey->b_k].list_b = mykey;
+  }
+
+  /* no two keys have the same (a,b) pair */
+  return nocollision;
+}
+
+
+/* Do the initial hash for normal mode (use lookup and checksum) */
+/* keys: list of all keys */
+/* alen: (a,b) has a in 0..alen-1, a power of 2 */
+/* blen: (a,b) has b in 0..blen-1, a power of 2 */
+/* smax: maximum range of computable hash values */
+/* salt: used to initialize the hash function */
+/* final: output, code for the final hash */
+static void
+initnorm(key *keys, ub4 alen, ub4 blen, ub4 smax, ub4 salt, gencode *final)
+{
+  key *mykey;
+  if (mylog2(alen)+mylog2(blen) > UB4BITS)
+  {
+    ub4 initlev = salt*0x9e3779b9;  /* the golden ratio; an arbitrary value */
+
+    for (mykey=keys; mykey; mykey=mykey->next_k)
+    {
+      ub4 i, state[CHECKSTATE];
+      for (i=0; i<CHECKSTATE; ++i) state[i] = initlev;
+      checksum( mykey->name_k, mykey->len_k, state);
+      mykey->a_k = state[0]&(alen-1);
+      mykey->b_k = state[1]&(blen-1);
+    }
+    final->used = 4;
+    sprintf(final->line[0], 
+	    "  ub4 i,state[CHECKSTATE],rsl;\n");
+    sprintf(final->line[1], 
+	    "  for (i=0; i<CHECKSTATE; ++i) state[i]=0x%x;\n",initlev);
+    sprintf(final->line[2],
+	    "  checksum(key, len, state);\n");
+    sprintf(final->line[3], 
+	    "  rsl = ((state[0]&0x%x)^scramble[tab[state[1]&0x%x]]);\n",
+	    alen-1, blen-1);
+  }
+  else
+  {
+    ub4 loga = mylog2(alen);                            /* log based 2 of blen */
+    ub4 initlev = salt*0x9e3779b9;  /* the golden ratio; an arbitrary value */
+
+    for (mykey=keys; mykey; mykey=mykey->next_k)
+    {
+      ub4 hash = lookup(mykey->name_k, mykey->len_k, initlev);
+      mykey->a_k = (loga > 0) ? hash>>(UB4BITS-loga) : 0;
+      mykey->b_k = (blen > 1) ? hash&(blen-1) : 0;
+    }
+    final->used = 2;
+    sprintf(final->line[0], 
+	    "  ub4 rsl, val = lookup(key, len, 0x%x);\n", initlev);
+    if (smax <= 1)
+    {
+      sprintf(final->line[1], "  rsl = 0;\n");
+    }
+    else if (mylog2(alen) == 0)
+    {
+      sprintf(final->line[1], "  rsl = tab[val&0x%x];\n", blen-1);
+    }
+    else if (blen < USE_SCRAMBLE)
+    {
+      sprintf(final->line[1], "  rsl = ((val>>%u)^tab[val&0x%x]);\n",
+	      UB4BITS-mylog2(alen), blen-1);
+    }
+    else
+    {
+      sprintf(final->line[1], "  rsl = ((val>>%u)^scramble[tab[val&0x%x]]);\n",
+	      UB4BITS-mylog2(alen), blen-1);
+    }
+  }
+}
+
+
+
+/* Do initial hash for inline mode */
+/* keys: list of all keys */
+/* alen: (a,b) has a in 0..alen-1, a power of 2 */
+/* blen: (a,b) has b in 0..blen-1, a power of 2 */
+/* smax: range of computable hash values */
+/* salt: used to initialize the hash function */
+/* final: generated code for final hash */
+static void
+initinl(key *keys, ub4 alen, ub4 blen, ub4 smax, ub4 salt, gencode *final)
+{
+  key *mykey;
+  ub4  amask = alen-1;
+  ub4  blog  = mylog2(blen);
+  ub4  initval = salt*0x9e3779b9;    /* the golden ratio; an arbitrary value */
+
+  /* It's more important to have b uniform than a, so b is the low bits */
+  for (mykey = keys;  mykey != (key *)0;  mykey = mykey->next_k)
+  {
+    ub4   hash = initval;
+    ub4   i;
+    for (i=0; i<mykey->len_k; ++i)
+    {
+      hash = (mykey->name_k[i] ^ hash) + ((hash<<(UB4BITS-6))+(hash>>6));
+    }
+    mykey->hash_k = hash;
+    mykey->a_k = (alen > 1) ? (hash & amask) : 0;
+    mykey->b_k = (blen > 1) ? (hash >> (UB4BITS-blog)) : 0;
+  }
+  final->used = 1;
+  if (smax <= 1)
+  {
+    sprintf(final->line[0], "  ub4 rsl = 0;\n");
+  }
+  else if (blen < USE_SCRAMBLE)
+  {
+    sprintf(final->line[0], "  ub4 rsl = ((val & 0x%x) ^ tab[val >> %u]);\n",
+	    amask, UB4BITS-blog);
+  }
+  else
+  {
+    sprintf(final->line[0], "  ub4 rsl = ((val & 0x%x) ^ scramble[tab[val >> %u]]);\n",
+	    amask, UB4BITS-blog);
+  }
+}
+
+
+/* 
+ * Run a hash function on the key to get a and b 
+ * Returns:
+ *   0: didn't find distinct (a,b) for all keys
+ *   1: found distinct (a,b) for all keys, put keys in tabb[]
+ *   2: found a perfect hash, no need to do any more work
+ */
+/* keys:  list of all keys */
+/* nkeys: total number of keys */
+/* tabb:  stuff indexed by b */
+/* alen:  (a,b) has a in 0..alen-1, a power of 2 */
+/* blen:  (a,b) has b in 0..blen-1, a power of 2 */
+/* smax:  range of computable hash values */
+/* salt:  used to initialize the hash function */
+/* form:  user directives */
+/* final: code for final hash */
+static ub4
+initkey(key *keys, ub4 nkeys, bstuff *tabb, ub4 alen, ub4 blen, ub4 smax,
+	ub4 salt, hashform *form, gencode *final)
+{
+  ub4 finished;
+
+  /* Do the initial hash of the keys */
+  switch(form->mode)
+  {
+  case NORMAL_HM:
+    initnorm(keys, alen, blen, smax, salt, final);
+    break;
+  case INLINE_HM:
+    initinl(keys, alen, blen, smax, salt, final);
+    break;
+  case HEX_HM:
+  case DECIMAL_HM:
+    finished = inithex(keys, nkeys, alen, blen, smax, salt, final, form); 
+    if (finished) return 2;
+    break;
+  default:
+    fprintf(stderr, "fatal error: illegal mode\n"); 
+    exit(1);
+  }
+
+  if (nkeys <= 1)
+  {
+    final->used = 1;
+    sprintf(final->line[0], "  ub4 rsl = 0;\n");
+    return 2;
+  }
+
+  return inittab(tabb, blen, keys, form, FALSE);
+}
+
+/* Print an error message and exit if there are duplicates */
+/* tabb: array of lists of keys with the same b */
+/* blen: length of tabb, a power of 2 */
+/* form: user directives */
+static void
+duplicates(bstuff *tabb, ub4 blen, key *keys, hashform *form)
+{
+  ub4  i;
+  key *key1;
+  key *key2;
+
+  (void)inittab(tabb, blen, keys, form, TRUE);
+
+  /* for each b, do nested loops through key list looking for duplicates */
+  for (i=0; i<blen; ++i)
+    for (key1=tabb[i].list_b; key1; key1=key1->nextb_k)
+      for (key2=key1->nextb_k; key2; key2=key2->nextb_k)
+	checkdup(key1, key2, form);
+}
+
+
+/* Try to apply an augmenting list */
+/* rollback: FALSE applies augmenting path, TRUE rolls back */
+static int
+apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback)
+{
+  ub4     hash;
+  key    *mykey;
+  bstuff *pb;
+  ub4     child;
+  ub4     parent;
+  ub4     stabb;                                         /* scramble[tab[b]] */
+
+  /* walk from child to parent */
+  for (child=tail-1; child; child=parent)
+  {
+    parent = tabq[child].parent_q;                    /* find child's parent */
+    pb     = tabq[parent].b_q;             /* find parent's list of siblings */
+
+    /* erase old hash values */
+    stabb = scramble[pb->val_b];
+    for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
+    {
+      hash = mykey->a_k^stabb;
+      if (mykey == tabh[hash].key_h)
+      {                            /* erase hash for all of child's siblings */
+	tabh[hash].key_h = (key *)0;
+      }
+    }
+
+    /* change pb->val_b, which will change the hashes of all parent siblings */
+    pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
+
+    /* set new hash values */
+    stabb = scramble[pb->val_b];
+    for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
+    {
+      hash = mykey->a_k^stabb;
+      if (rollback)
+      {
+	if (parent == 0) continue;                  /* root never had a hash */
+      }
+      else if (tabh[hash].key_h)
+      {
+	/* very rare: roll back any changes */
+	(void)apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
+	return FALSE;                                  /* failure, collision */
+      }
+      tabh[hash].key_h = mykey;
+    }
+  }
+  return TRUE;
+}
+
+
+/*
+-------------------------------------------------------------------------------
+augment(): Add item to the mapping.
+
+Construct a spanning tree of *b*s with *item* as root, where each
+parent can have all its hashes changed (by some new val_b) with 
+at most one collision, and each child is the b of that collision.
+
+I got this from Tarjan's "Data Structures and Network Algorithms".  The
+path from *item* to a *b* that can be remapped with no collision is 
+an "augmenting path".  Change values of tab[b] along the path so that 
+the unmapped key gets mapped and the unused hash value gets used.
+
+Assuming 1 key per b, if m out of n hash values are still unused, 
+you should expect the transitive closure to cover n/m nodes before 
+an unused node is found.  Sum(i=1..n)(n/i) is about nlogn, so expect
+this approach to take about nlogn time to map all single-key b's.
+-------------------------------------------------------------------------------
+*/
+/* tabb:      stuff indexed by b */
+/* tabh:      which key is associated with which hash, indexed by hash */
+/* tabq:      queue of *b* values, this is the spanning tree */
+/* blen:      length of tabb */
+/* scramble:  final hash is a^scramble[tab[b]] */
+/* smax:      highest value in scramble */
+/* item:      &tabb[b] for the b to be mapped */
+/* nkeys:     final hash must be in 0..nkeys-1 */
+/* highwater: a value higher than any now in tabb[].water_b */
+/* form:      TRUE if we should do a minimal perfect hash */
+static int
+augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble,
+	ub4 smax, bstuff *item, ub4 nkeys, ub4 highwater, hashform *form)
+{
+  ub4  q;                      /* current position walking through the queue */
+  ub4  tail;              /* tail of the queue.  0 is the head of the queue. */
+  ub4  limit=((blen < USE_SCRAMBLE) ? smax : UB1MAXVAL+1);
+  ub4  highhash = ((form->perfect == MINIMAL_HP) ? nkeys : smax);
+  int  trans = (form->speed == SLOW_HS || form->perfect == MINIMAL_HP);
+
+  /* initialize the root of the spanning tree */
+  tabq[0].b_q = item;
+  tail = 1;
+
+  /* construct the spanning tree by walking the queue, add children to tail */
+  for (q=0; q<tail; ++q)
+  {
+    bstuff *myb = tabq[q].b_q;                        /* the b for this node */
+    ub4     i;                              /* possible value for myb->val_b */
+
+    if (!trans && (q == 1)) 
+      break;                                  /* don't do transitive closure */
+
+    for (i=0; i<limit; ++i)
+    {
+      bstuff *childb = (bstuff *)0;             /* the b that this i maps to */
+      key    *mykey;                       /* for walking through myb's keys */
+
+      for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
+      {
+	key    *childkey;
+	ub4 hash = mykey->a_k^scramble[i];
+
+	if (hash >= highhash) break;                        /* out of bounds */
+	childkey = tabh[hash].key_h;
+
+	if (childkey)
+	{
+	  bstuff *hitb = &tabb[childkey->b_k];
+
+	  if (childb)
+	  {
+	    if (childb != hitb) break;            /* hit at most one child b */
+	  }
+	  else
+	  {
+	    childb = hitb;                        /* remember this as childb */
+	    if (childb->water_b == highwater) break;     /* already explored */
+	  }
+	}
+      }
+      if (mykey) continue;             /* myb with i has multiple collisions */
+
+      /* add childb to the queue of reachable things */
+      if (childb) childb->water_b = highwater;
+      tabq[tail].b_q      = childb;
+      tabq[tail].newval_q = i;     /* how to make parent (myb) use this hash */
+      tabq[tail].oldval_q = myb->val_b;            /* need this for rollback */
+      tabq[tail].parent_q = q;
+      ++tail;
+
+      if (!childb)
+      {                                  /* found an *i* with no collisions? */
+	/* try to apply the augmenting path */
+	if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
+	  return TRUE;        /* success, item was added to the perfect hash */
+
+	--tail;                    /* don't know how to handle such a child! */
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+/* find a mapping that makes this a perfect hash */
+static int
+perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen,
+	ub4 smax, ub4 *scramble, ub4 nkeys, hashform *form)
+{
+  ub4 maxkeys;                           /* maximum number of keys for any b */
+  ub4 i, j;
+
+  /* clear any state from previous attempts */
+  memset((void *)tabh, 0, 
+	 (size_t)(sizeof(hstuff)*
+		  ((form->perfect == MINIMAL_HP) ? nkeys : smax)));
+  memset((void *)tabq, 0, (size_t)(sizeof(qstuff)*(blen+1)));
+
+  for (maxkeys=0,i=0; i<blen; ++i) 
+    if (tabb[i].listlen_b > maxkeys) 
+      maxkeys = tabb[i].listlen_b;
+
+  /* In descending order by number of keys, map all *b*s */
+  for (j=maxkeys; j>0; --j)
+    for (i=0; i<blen; ++i)
+      if (tabb[i].listlen_b == j)
+	if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys, 
+		     i+1, form))
+	{
+	  printf("fail to map group of size %u for tab size %u\n", j, blen);
+	  return FALSE;
+	}
+
+  /* Success!  We found a perfect hash of all keys into 0..nkeys-1. */
+  return TRUE;
+}
+
+
+/*
+ * Simple case: user gave (a,b).  No more mixing, no guessing alen or blen. 
+ * This assumes a,b reside in (key->a_k, key->b_k), and final->form == AB_HK.
+ */
+/* tabb:     output, tab[] of the perfect hash, length *blen */
+/* alen:     output, 0..alen-1 is range for a of (a,b) */
+/* blen:     output, 0..blen-1 is range for b of (a,b) */
+/* salt:     output, initializes initial hash */
+/* final:    code for final hash */
+/* scramble: input, hash = a^scramble[tab[b]] */
+/* smax:     input, scramble[i] in 0..smax-1 */
+/* keys:     input, keys to hash */
+/* nkeys:    input, number of keys being hashed */
+/* form:     user directives */
+static void
+hash_ab(bstuff **tabb, ub4 *alen, ub4 *blen, ub4 *salt, gencode *final, 
+	ub4 *scramble, ub4 *smax, key *keys, ub4 nkeys, hashform *form)
+{
+  hstuff *tabh;
+  qstuff *tabq;
+  key    *mykey;
+  ub4     i;
+  int     used_tab;
+
+  /* initially make smax the first power of two bigger than nkeys */
+  *smax = ((ub4)1<<mylog2(nkeys));
+  scrambleinit(scramble, *smax);
+
+  /* set *alen and *blen based on max A and B from user */
+  *alen = 1;
+  *blen = 1;
+  for (mykey = keys;  mykey != (key *)0;  mykey = mykey->next_k)
+  {
+    while (*alen <= mykey->a_k) *alen *= 2;
+    while (*blen <= mykey->b_k) *blen *= 2;
+  }
+  if (*alen > 2**smax)
+  {
+    fprintf(stderr,
+      "perfect.c: Can't deal with (A,B) having A bigger than twice \n");
+    fprintf(stderr,
+      "  the smallest power of two greater or equal to any legal hash.\n");
+    exit(SUCCESS);
+  }
+
+  /* allocate working memory */
+  *tabb = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen))); 
+  tabq  = (qstuff *)remalloc(sizeof(qstuff)*(*blen+1), "perfect.c, tabq");
+  tabh  = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? 
+					     nkeys : *smax),
+			     "perfect.c, tabh");
+
+  /* check that (a,b) are distinct and put them in tabb indexed by b */
+  (void)inittab(*tabb, *blen, keys, form, FALSE);
+
+  /* try with smax */
+  if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
+  {
+    if (form->perfect == MINIMAL_HP)
+    {
+      printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
+      exit(SUCCESS);
+    }
+    else
+    {
+      /* try with 2*smax */
+      free((void *)tabh);
+      *smax = *smax * 2;
+      scrambleinit(scramble, *smax);
+      tabh = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
+						nkeys : *smax),
+				"perfect.c, tabh");
+      if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
+      {
+	printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
+	exit(SUCCESS);
+      }
+    }
+  }
+
+  /* check if tab[] was really needed */
+  for (i=0; i<*blen; ++i)
+  {
+    if ((*tabb)[i].val_b != 0) break;            /* assumes permute(0) == 0 */
+  }
+  used_tab = (i < *blen);
+
+  /* write the code for the perfect hash */
+  *salt = 1;
+  final->used = 1;
+  if (!used_tab)
+  {
+    sprintf(final->line[0], "  ub4 rsl = a;\n");
+  }
+  else if (*blen < USE_SCRAMBLE)
+  {
+    sprintf(final->line[0], "  ub4 rsl = (a ^ tab[b]);\n");
+  }
+  else
+  {
+    sprintf(final->line[0], "  ub4 rsl = (a ^ scramble[tab[b]]);\n");
+  }
+
+  printf("success, found a perfect hash\n");
+
+  free((void *)tabq);
+  free((void *)tabh);
+}
+
+
+/* guess initial values for alen and blen */
+/* alen:  output, initial alen */
+/* blen:  output, initial blen */
+/* smax:  input, power of two greater or equal to max hash value */
+/* nkeys: number of keys being hashed */
+/* form:  user directives */
+static void
+initalen(ub4 *alen, ub4 *blen, ub4 *smax, ub4 nkeys, hashform *form)
+{
+  /*
+   * Find initial *alen, *blen
+   * Initial alen and blen values were found empirically.  Some factors:
+   *
+   * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
+   *
+   * alen and blen must be powers of 2 because the values in 0..alen-1 and
+   * 0..blen-1 are produced by applying a bitmask to the initial hash function.
+   *
+   * alen must be less than smax, in fact less than nkeys, because otherwise
+   * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
+   * all the *a*s associated with a given *b*, so there would be no legal
+   * value to assign to tab[b].  This only matters when we're doing a minimal
+   * perfect hash.
+   *
+   * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
+   * and alen*blen = smax*smax/32.
+   *
+   * Values of blen less than smax/4 never work, and smax/2 always works.
+   *
+   * We want blen as small as possible because it is the number of bytes in
+   * the huge array we must create for the perfect hash.
+   *
+   * When nkey <= smax*(5/8), blen=smax/4 works much more often with 
+   * alen=smax/8 than with alen=smax/4.  Above smax*(5/8), blen=smax/4
+   * doesn't seem to care whether alen=smax/8 or alen=smax/4.  I think it
+   * has something to do with 5/8 = 1/8 * 5.  For example examine 80000, 
+   * 85000, and 90000 keys with different values of alen.  This only matters
+   * if we're doing a minimal perfect hash.
+   *
+   * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
+   * Bigger than that it must produce two integers, which increases the
+   * cost of the hash per character hashed.
+   */
+  if (form->perfect == NORMAL_HP)
+  {
+    if ((form->speed == FAST_HS) && (nkeys > *smax*0.8))
+    {
+      *smax = *smax * 2;
+    }
+
+    *alen = ((form->hashtype==INT_HT) && *smax>131072) ? 
+      ((ub4)1<<(UB4BITS-mylog2(*blen))) :   /* distinct keys => distinct (A,B) */
+      *smax;                         /* no reason to restrict alen to smax/2 */
+    if ((form->hashtype == INT_HT) && *smax < 32)
+      *blen = *smax;                      /* go for function speed not space */
+    else if (*smax/4 <= (1<<14))
+      *blen = ((nkeys <= *smax*0.56) ? *smax/32 :
+	       (nkeys <= *smax*0.74) ? *smax/16 : *smax/8);
+    else
+      *blen = ((nkeys <= *smax*0.6) ? *smax/16 : 
+	       (nkeys <= *smax*0.8) ? *smax/8 : *smax/4);
+
+    if ((form->speed == FAST_HS) && (*blen < *smax/8))
+      *blen = *smax/8;
+
+    if (*alen < 1) *alen = 1;
+    if (*blen < 1) *blen = 1;
+  }
+  else
+  {
+    switch(mylog2(*smax))
+    {
+    case 0:
+      *alen = 1;
+      *blen = 1;
+    case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
+      *alen = (form->perfect == NORMAL_HP) ? *smax : *smax/2;
+      *blen = *smax/2;
+      break;
+    case 9:
+    case 10:
+    case 11:
+    case 12:
+    case 13:
+    case 14:
+    case 15:
+    case 16:
+    case 17:
+      if (form->speed == FAST_HS)
+      {
+	*alen = *smax/2;
+	*blen = *smax/4;
+      }
+      else if (*smax/4 < USE_SCRAMBLE)
+      {
+	*alen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
+	*blen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
+      }
+      else
+      {
+	*alen = ((nkeys <= *smax*(5.0/8.0)) ? *smax/8 : 
+		 (nkeys <= *smax*(3.0/4.0)) ? *smax/4 : *smax/2);
+	*blen = *smax/4;                /* always give the small size a shot */
+      }
+      break;
+    case 18:
+      if (form->speed == FAST_HS)
+      {
+	*alen = *smax/2;
+	*blen = *smax/2;
+      }
+      else
+      {
+	*alen = *smax/8;                 /* never require the multiword hash */
+	*blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
+      }
+      break;
+    case 19:
+    case 20:
+      *alen = (nkeys <= *smax*(5.0/8.0)) ? *smax/8 : *smax/2;
+      *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
+      break;
+    default:
+      *alen = *smax/2;              /* just find a hash as quick as possible */
+      *blen = *smax/2;     /* we'll be thrashing virtual memory at this size */
+      break;
+    }
+  }
+}
+
+/* 
+** Try to find a perfect hash function.  
+** Return the successful initializer for the initial hash. 
+** Return 0 if no perfect hash could be found.
+*/
+/* tabb:     output, tab[] of the perfect hash, length *blen */
+/* alen:     output, 0..alen-1 is range for a of (a,b) */
+/* blen:     output, 0..blen-1 is range for b of (a,b) */
+/* salt:     output, initializes initial hash */
+/* final:    code for final hash */
+/* scramble: input, hash = a^scramble[tab[b]] */
+/* smax:     input, scramble[i] in 0..smax-1 */
+/* keys:     input, keys to hash */
+/* nkeys:    input, number of keys being hashed */
+/* form:     user directives */
+void
+findhash(bstuff **tabb, ub4 *alen, ub4 *blen, ub4 *salt, gencode *final, 
+	ub4 *scramble, ub4 *smax, key *keys, ub4 nkeys, hashform *form)
+{
+  ub4 bad_initkey;                       /* how many times did initkey fail? */
+  ub4 bad_perfect;                       /* how many times did perfect fail? */
+  ub4 trysalt;                        /* trial initializer for initial hash */
+  ub4 maxalen;
+  hstuff *tabh;                       /* table of keys indexed by hash value */
+  qstuff *tabq;    /* table of stuff indexed by queue value, used by augment */
+
+  /* The case of (A,B) supplied by the user is a special case */
+  if (form->hashtype == AB_HT)
+  {
+    hash_ab(tabb, alen, blen, salt, final, 
+	    scramble, smax, keys, nkeys, form);
+    return;
+  }
+
+  /* guess initial values for smax, alen and blen */
+  *smax = ((ub4)1<<mylog2(nkeys));
+  initalen(alen, blen, smax, nkeys, form);
+
+  scrambleinit(scramble, *smax);
+
+  maxalen = (form->perfect == MINIMAL_HP) ? *smax/2 : *smax;
+
+  /* allocate working memory */
+  *tabb = (bstuff *)remalloc((size_t)(sizeof(bstuff)*(*blen)), 
+			     "perfect.c, tabb");
+  tabq  = (qstuff *)remalloc(sizeof(qstuff)*(*blen+1), "perfect.c, tabq");
+  tabh  = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? 
+					     nkeys : *smax),
+			     "perfect.c, tabh");
+
+  /* Actually find the perfect hash */
+  *salt = 0;
+  bad_initkey = 0;
+  bad_perfect = 0;
+  for (trysalt=1; ; ++trysalt)
+  {
+    ub4 rslinit;
+    /* Try to find distinct (A,B) for all keys */
+    
+    rslinit = initkey(keys, nkeys, *tabb, *alen, *blen, *smax, trysalt,
+		      form, final);
+
+    if (rslinit == 2)
+    {      /* initkey actually found a perfect hash, not just distinct (a,b) */
+      *salt = 1;
+      *blen = 0;
+      break;
+    }
+    else if (rslinit == 0)
+    {
+      /* didn't find distinct (a,b) */
+      if (++bad_initkey >= RETRY_INITKEY)
+      {
+	/* Try to put more bits in (A,B) to make distinct (A,B) more likely */
+	if (*alen < maxalen)
+	{
+	  *alen *= 2;
+	} 
+	else if (*blen < *smax)
+	{
+	  *blen *= 2;
+	  free(tabq);
+	  free(*tabb);
+	  *tabb  = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
+	  tabq  = (qstuff *)malloc((size_t)(sizeof(qstuff)*(*blen+1)));
+	}
+	else
+	{
+	  duplicates(*tabb, *blen, keys, form);      /* check for duplicates */
+	  printf("fatal error: Cannot perfect hash: cannot find distinct (A,B)\n");
+	  exit(SUCCESS);
+	}
+	bad_initkey = 0;
+	bad_perfect = 0;
+      }
+      continue;                             /* two keys have same (a,b) pair */
+    }
+
+    printf("found distinct (A,B) on attempt %u\n", trysalt);
+
+    /* Given distinct (A,B) for all keys, build a perfect hash */
+    if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
+    {
+      if ((form->hashtype != INT_HT && ++bad_perfect >= RETRY_PERFECT) || 
+	  (form->hashtype == INT_HT && ++bad_perfect >= RETRY_HEX))
+      {
+	if (*blen < *smax)
+	{
+	  *blen *= 2;
+	  free(*tabb);
+	  free(tabq);
+	  *tabb  = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
+	  tabq  = (qstuff *)malloc((size_t)(sizeof(qstuff)*(*blen+1)));
+	  --trysalt;               /* we know this salt got distinct (A,B) */
+	}
+	else
+	{
+	  printf("fatal error: Cannot perfect hash: cannot build tab[]\n");
+	  exit(SUCCESS);
+	}
+	bad_perfect = 0;
+      }
+      continue;
+    }
+    
+    *salt = trysalt;
+    break;
+  }
+
+  printf("built perfect hash table of size %u\n", *blen);
+
+  /* free working memory */
+  free((void *)tabh);
+  free((void *)tabq);
+}
+
+/*
+------------------------------------------------------------------------------
+Input/output type routines
+------------------------------------------------------------------------------
+*/
+
+/* get the list of keys */
+/* keys:     list of all keys */
+/* nkeys:    number of keys */
+/* textroot: get space to store key text */
+/* keyroot:  get space for keys */
+/* form:     user directives */
+static void
+getkeys(key **keys, ub4 *nkeys, reroot *textroot, reroot *keyroot, hashform *form)
+{
+  key  *mykey;
+  char *mytext;
+  mytext = (char *)renew(textroot);
+  *keys = 0;
+  *nkeys = 0;
+  while (fgets(mytext, MAXKEYLEN, stdin))
+  {
+    mykey = (key *)renew(keyroot);
+    if (form->mode == AB_HM)
+    {
+      sscanf(mytext, "%x %x ", &mykey->a_k, &mykey->b_k);
+    }
+    else if (form->mode == ABDEC_HM)
+    {
+      sscanf(mytext, "%u %u ", &mykey->a_k, &mykey->b_k);
+    }
+    else if (form->mode == HEX_HM)
+    {
+      sscanf(mytext, "%x ", &mykey->hash_k);
+    }
+    else if (form->mode == DECIMAL_HM)
+    {
+      sscanf(mytext, "%u ", &mykey->hash_k);
+    }
+    else
+    {
+      mykey->name_k = (ub1 *)mytext;
+      mytext = (char *)renew(textroot);
+      mykey->len_k  = (ub4)(strlen((char *)mykey->name_k)-1);
+    }
+    mykey->next_k = *keys;
+    *keys = mykey;
+    ++*nkeys;
+  }
+  redel(textroot, mytext);
+}
+
+/* make the .h file */
+static void
+make_h(ub4 blen, ub4 smax, ub4 nkeys, ub4 salt)
+{
+  FILE *f;
+  f = fopen("phash.h", "w");
+  fprintf(f, "/* Generated File, DO NOT EDIT */\n");
+  fprintf(f, "/* Perfect hash definitions */\n");
+  fprintf(f, "#ifndef STANDARD\n");
+  fprintf(f, "#include \"standard.h\"\n");
+  fprintf(f, "#endif /* STANDARD */\n");
+  fprintf(f, "#ifndef PHASH\n");
+  fprintf(f, "#define PHASH\n");
+  fprintf(f, "\n");
+  if (blen > 0)
+  {
+    if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
+      fprintf(f, "extern ub1 tab[];\n");
+    else
+    {
+      fprintf(f, "extern ub2 tab[];\n");
+      if (blen >= USE_SCRAMBLE)
+      {
+	if (smax <= UB2MAXVAL+1)
+	  fprintf(f, "extern ub2 scramble[];\n");
+	else
+	  fprintf(f, "extern ub4 scramble[];\n");
+      }
+    }
+    fprintf(f, "#define PHASHLEN 0x%x  /* length of hash mapping table */\n",
+	    blen);
+  }
+  fprintf(f, "#define PHASHNKEYS %u  /* How many keys were hashed */\n",
+          nkeys);
+  fprintf(f, "#define PHASHRANGE %u  /* Range any input might map to */\n",
+          smax);
+  fprintf(f, "#define PHASHSALT 0x%.8x /* internal, initialize normal hash */\n",
+          salt*0x9e3779b9);
+  fprintf(f, "\n");
+  fprintf(f, "ub4 phash();\n");
+  fprintf(f, "\n");
+  fprintf(f, "#endif  /* PHASH */\n");
+  fprintf(f, "\n");
+  fclose(f);
+}
+
+/* make the .c file */
+/* tab:      table indexed by b */
+/* smax:     range of scramble[] */
+/* blen:     b in 0..blen-1, power of 2 */
+/* scramble: used in final hash */
+/* final:    code for the final hash */
+/* form:     user directives */
+static void
+make_c(bstuff *tab, ub4 smax, ub4 blen, ub4 *scramble, gencode *final, hashform *form)
+{
+  ub4   i;
+  FILE *f;
+  f = fopen("phash.c", "w");
+  fprintf(f, "/* Generated File, DO NOT EDIT */\n");
+  fprintf(f, "/* table for the mapping for the perfect hash */\n");
+  fprintf(f, "#ifndef STANDARD\n");
+  fprintf(f, "#include \"standard.h\"\n");
+  fprintf(f, "#endif /* STANDARD */\n");
+  fprintf(f, "#ifndef PHASH\n");
+  fprintf(f, "#include \"phash.h\"\n");
+  fprintf(f, "#endif /* PHASH */\n");
+  fprintf(f, "#ifndef LOOKUPA\n");
+  fprintf(f, "#include \"lookupa.h\"\n");
+  fprintf(f, "#endif /* LOOKUPA */\n");
+  fprintf(f, "\n");
+  if (blen >= USE_SCRAMBLE)
+  {
+    fprintf(f, "/* A way to make the 1-byte values in tab bigger */\n");
+    if (smax > UB2MAXVAL+1)
+    {
+      fprintf(f, "ub4 scramble[] = {\n");
+      for (i=0; i<=UB1MAXVAL; i+=4)
+        fprintf(f, "0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x,\n",
+                scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
+    }
+    else
+    {
+      fprintf(f, "ub2 scramble[] = {\n");
+      for (i=0; i<=UB1MAXVAL; i+=8)
+        fprintf(f, 
+"0x%.4x, 0x%.4x, 0x%.4x, 0x%.4x, 0x%.4x, 0x%.4x, 0x%.4x, 0x%.4x,\n",
+                scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
+                scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
+    }
+    fprintf(f, "};\n");
+    fprintf(f, "\n");
+  }
+  if (blen > 0)
+  {
+    fprintf(f, "/* small adjustments to _a_ to make values distinct */\n");
+
+    if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
+      fprintf(f, "ub1 tab[] = {\n");
+    else
+      fprintf(f, "ub2 tab[] = {\n");
+
+    if (blen < 16)
+    {
+      for (i=0; i<blen; ++i) fprintf(f, "%3d,", scramble[tab[i].val_b]);
+    }
+    else if (blen <= 1024)
+    {
+      for (i=0; i<blen; i+=16)
+	fprintf(f, "%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,\n",
+		scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
+		scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
+		scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
+		scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], 
+		scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], 
+		scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], 
+		scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], 
+		scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); 
+    }
+    else if (blen < USE_SCRAMBLE)
+    {
+      for (i=0; i<blen; i+=8)
+	fprintf(f, "%u,%u,%u,%u,%u,%u,%u,%u,\n",
+		scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
+		scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
+		scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
+		scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); 
+    }
+    else 
+    {
+      for (i=0; i<blen; i+=16)
+	fprintf(f, "%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,\n",
+		tab[i+0].val_b, tab[i+1].val_b, 
+		tab[i+2].val_b, tab[i+3].val_b, 
+		tab[i+4].val_b, tab[i+5].val_b, 
+		tab[i+6].val_b, tab[i+7].val_b, 
+		tab[i+8].val_b, tab[i+9].val_b, 
+		tab[i+10].val_b, tab[i+11].val_b, 
+		tab[i+12].val_b, tab[i+13].val_b, 
+		tab[i+14].val_b, tab[i+15].val_b); 
+    }
+    fprintf(f, "};\n");
+    fprintf(f, "\n");
+  }
+  fprintf(f, "/* The hash function */\n");
+  switch(form->mode)
+  {
+  case NORMAL_HM:
+    fprintf(f, "ub4 phash(char *key, int len)\n");
+    break;
+  case INLINE_HM:
+  case HEX_HM:
+  case DECIMAL_HM:
+    fprintf(f, "ub4 phash(ub4 val)\n");
+    break;
+  case AB_HM:
+  case ABDEC_HM:
+    fprintf(f, "ub4 phash(ub4 a, ub4 b)\n");
+    break;
+  }
+  fprintf(f, "{\n");
+  for (i=0; i<final->used; ++i)
+    fputs(final->line[i], f);
+  fprintf(f, "  return rsl;\n");
+  fprintf(f, "}\n");
+  fprintf(f, "\n");
+  fclose(f);
+}
+
+/*
+------------------------------------------------------------------------------
+Read in the keys, find the hash, and write the .c and .h files
+------------------------------------------------------------------------------
+*/
+/* form: user directives */
+static void driver(hashform *form)
+{
+  ub4       nkeys;                                         /* number of keys */
+  key      *keys;                                    /* head of list of keys */
+  bstuff   *tab;                                       /* table indexed by b */
+  ub4       smax;            /* scramble[] values in 0..smax-1, a power of 2 */
+  ub4       alen;                            /* a in 0..alen-1, a power of 2 */
+  ub4       blen;                            /* b in 0..blen-1, a power of 2 */
+  ub4       salt;                       /* a parameter to the hash function */
+  reroot   *textroot;                      /* MAXKEYLEN-character text lines */
+  reroot   *keyroot;                                       /* source of keys */
+  gencode   final;                                    /* code for final hash */
+  ub4       i;
+  ub4       scramble[SCRAMBLE_LEN];           /* used in final hash function */
+  char      buf[10][80];                        /* buffer for generated code */
+  char     *buf2[10];                             /* also for generated code */
+
+  /* set up memory sources */
+  textroot = remkroot((size_t)MAXKEYLEN);
+  keyroot  = remkroot(sizeof(key));
+
+  /* set up code for final hash */
+  final.line = buf2;
+  final.used = 0;
+  final.len  = 10;
+  for (i=0; i<10; ++i) final.line[i] = buf[i];
+
+  /* read in the list of keywords */
+  getkeys(&keys, &nkeys, textroot, keyroot, form);
+  printf("Read in %u keys\n",nkeys);
+
+  /* find the hash */
+  findhash(&tab, &alen, &blen, &salt, &final, 
+	   scramble, &smax, keys, nkeys, form);
+
+  /* generate the phash.h file */
+  make_h(blen, smax, nkeys, salt);
+  printf("Wrote phash.h\n");
+
+  /* generate the phash.c file */
+  make_c(tab, smax, blen, scramble, &final, form);
+  printf("Wrote phash.c\n");
+
+  /* clean up memory sources */
+  refree(textroot);
+  refree(keyroot);
+  free((void *)tab);
+  printf("Cleaned up\n");
+}
+
+
+/* Describe how to use this utility */
+static void usage_error()
+{
+  printf("Usage: perfect [-{NnIiHhDdAaBb}{MmPp}{FfSs}] < key.txt \n");
+  printf("The input is a list of keys, one key per line.\n");
+  printf("Only one of NnIiHhDdAa and one of MmPp may be specified.\n");
+  printf("  N,n: normal mode, key is any string string (default).\n");
+  printf("  I,i: initial hash for ASCII char strings.\n");
+  printf("The initial hash must be\n");
+  printf("  hash = PHASHSALT;\n");
+  printf("  for (i=0; i<keylength; ++i) {\n");
+  printf("    hash = (hash ^ key[i]) + ((hash<<26)+(hash>>6));\n");
+  printf("  }\n");
+  printf("Note that this can be inlined in any user loop that walks\n");
+  printf("through the key anyways, eliminating the loop overhead.\n");
+  printf("  H,h: Keys are 4-byte integers in hex in this format:\n");
+  printf("ffffffff\n");
+  printf("This is good for optimizing switch statement compilation.\n");
+  printf("  D,d: Same as H,h, except in decimal not hexidecimal\n");
+  printf("  A,a: An (A,B) pair is supplied in hex in this format:\n");
+  printf("aaa bbb\n");
+  printf("  B,b: Same as A,a, except in decimal not hexidecimal\n");
+  printf("This mode does nothing but find the values of tab[].\n");
+  printf("*A* must be less than the total number of keys.\n");
+  printf("  M,m: Minimal perfect hash.  Hash will be in 0..nkeys-1 (default)\n");
+  printf("  P,p: Perfect hash.  Hash will be in 0..n-1, where n >= nkeys\n");
+  printf("and n is a power of 2.  Will probably use a smaller tab[].");
+  printf("  F,f: Fast mode.  Generate the perfect hash fast.\n");
+  printf("  S,s: Slow mode.  Spend time finding a good perfect hash.\n");
+
+  exit(SUCCESS);
+}
+
+
+/* Interpret arguments and call the driver */
+/* See usage_error for the expected arguments */
+int main(int argc, char **argv)
+{
+  int      mode_given = FALSE;
+  int      minimal_given = FALSE;
+  int      speed_given = FALSE;
+  hashform form;
+  char    *c;
+
+  /* default behavior */
+  form.mode = NORMAL_HM;
+  form.hashtype = STRING_HT;
+  form.perfect = MINIMAL_HP;
+  form.speed = SLOW_HS;
+
+  /* let the user override the default behavior */
+  switch (argc)
+  {
+  case 1:
+    break;
+  case 2:
+    if (argv[1][0] != '-')
+    {
+      usage_error();
+      break;
+    }
+    for (c = &argv[1][1]; *c != '\0'; ++c) switch(*c)
+    {
+    case 'n': case 'N':
+    case 'i': case 'I':
+    case 'h': case 'H':
+    case 'd': case 'D':
+    case 'a': case 'A':
+    case 'b': case 'B':
+      if (mode_given == TRUE) 
+	usage_error();
+      switch(*c)
+      {
+      case 'n': case 'N':
+	form.mode = NORMAL_HM;  form.hashtype = STRING_HT; break;
+      case 'i': case 'I':
+	form.mode = INLINE_HM;  form.hashtype = STRING_HT; break;
+      case 'h': case 'H':
+	form.mode = HEX_HM;     form.hashtype = INT_HT; break;
+      case 'd': case 'D':
+	form.mode = DECIMAL_HM; form.hashtype = INT_HT; break;
+      case 'a': case 'A':
+	form.mode = AB_HM;      form.hashtype = AB_HT; break;
+      case 'b': case 'B':
+	form.mode = ABDEC_HM;   form.hashtype = AB_HT; break;
+      }
+      mode_given = TRUE;
+      break;
+    case 'm': case 'M':
+    case 'p': case 'P':
+      if (minimal_given == TRUE)
+	usage_error();
+      switch(*c)
+      {
+      case 'p': case 'P':
+	form.perfect = NORMAL_HP; break;
+      case 'm': case 'M':
+	form.perfect = MINIMAL_HP; break;
+      }
+      minimal_given = TRUE;
+      break;
+    case 'f': case 'F':
+    case 's': case 'S':
+      if (speed_given == TRUE)
+	usage_error();
+      switch(*c)
+      {
+      case 'f': case 'F':
+	form.speed = FAST_HS; break;
+      case 's': case 'S':
+	form.speed = SLOW_HS; break;
+      }
+      speed_given = TRUE;
+      break;
+    default:
+      usage_error();
+    }
+    break;
+  default:
+    usage_error();
+  }
+
+  /* Generate the [minimal] perfect hash */
+  driver(&form);
+
+  return SUCCESS;
+}
diff --git a/utils/phash/perfect.h b/utils/phash/perfect.h
new file mode 100644
index 0000000..fe2a412
--- /dev/null
+++ b/utils/phash/perfect.h
@@ -0,0 +1,157 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+------------------------------------------------------------------------------
+perfect.h: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, September 1996
+You may use this code in any way you wish, and it is free.  No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfect.h
+------------------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+
+#ifndef PERFECT
+#define PERFECT
+
+#define MAXKEYLEN 30                              /* maximum length of a key */
+#define USE_SCRAMBLE  4096           /* use scramble if blen >= USE_SCRAMBLE */
+#define SCRAMBLE_LEN ((ub4)1<<16)                    /* length of *scramble* */
+#define RETRY_INITKEY 2048  /* number of times to try to find distinct (a,b) */
+#define RETRY_PERFECT 1     /* number of times to try to make a perfect hash */
+#define RETRY_HEX     200               /* RETRY_PERFECT when hex keys given */
+
+/* the generated code for the final hash, assumes initial hash is done */
+struct gencode
+{
+  char **line;                       /* array of text lines, 80 bytes apiece */
+  /*
+   * The code placed here must declare "ub4 rsl" 
+   * and assign it the value of the perfect hash using the function inputs.
+   * Later code will be tacked on which returns rsl or manipulates it according
+   * to the user directives.
+   *
+   * This code is at the top of the routine; it may and must declare any
+   * local variables it needs.
+   *
+   * Each way of filling in **line should be given a comment that is a unique
+   * tag.  A testcase named with that tag should also be found which tests
+   * the generated code.
+   */
+  ub4    len;                    /* number of lines available for final hash */
+  ub4    used;                         /* number of lines used by final hash */
+
+  ub4    lowbit;                          /* for HEX, lowest interesting bit */
+  ub4    highbit;                        /* for HEX, highest interesting bit */
+  ub4    diffbits;                         /* bits which differ for some key */
+  ub4    i,j,k,l,m,n,o;                      /* state machine used in hexn() */
+};
+typedef  struct gencode  gencode;
+
+/* user directives: perfect hash? minimal perfect hash? input is an int? */
+struct hashform
+{
+  enum {
+    NORMAL_HM,                                            /* key is a string */
+    INLINE_HM,    /* user will do initial hash, we must choose salt for them */
+    HEX_HM,              /* key to be hashed is a hexidecimal 4-byte integer */
+    DECIMAL_HM,              /* key to be hashed is a decimal 4-byte integer */
+    AB_HM,      /* key to be hashed is "A B", where A and B are (A,B) in hex */
+    ABDEC_HM                                   /* like AB_HM, but in decimal */
+  } mode;
+  enum {
+    STRING_HT,                                            /* key is a string */
+    INT_HT,                                             /* key is an integer */
+    AB_HT             /* dunno what key is, but input is distinct (A,B) pair */
+  } hashtype;
+  enum {
+    NORMAL_HP,                                   /* just find a perfect hash */
+    MINIMAL_HP                                /* find a minimal perfect hash */
+  } perfect;
+  enum {
+    FAST_HS,                                                    /* fast mode */
+    SLOW_HS                                                     /* slow mode */
+  } speed;
+};
+typedef  struct hashform  hashform;
+
+/* representation of a key */
+struct key
+{
+  ub1        *name_k;                                      /* the actual key */
+  ub4         len_k;                         /* the length of the actual key */
+  ub4         hash_k;                 /* the initial hash value for this key */
+  struct key *next_k;                                            /* next key */
+/* beyond this point is mapping-dependent */
+  ub4         a_k;                            /* a, of the key maps to (a,b) */
+  ub4         b_k;                            /* b, of the key maps to (a,b) */
+  struct key *nextb_k;                               /* next key with this b */
+};
+typedef  struct key  key;
+
+/* things indexed by b of original (a,b) pair */
+struct bstuff
+{
+  ub2  val_b;                                        /* hash=a^tabb[b].val_b */
+  key *list_b;                   /* tabb[i].list_b is list of keys with b==i */
+  ub4  listlen_b;                                        /* length of list_b */
+  ub4  water_b;           /* high watermark of who has visited this map node */
+};
+typedef  struct bstuff  bstuff;
+
+/* things indexed by final hash value */
+struct hstuff
+{
+  key *key_h;                   /* tabh[i].key_h is the key with a hash of i */
+};
+typedef  struct hstuff hstuff;
+
+/* things indexed by queue position */
+struct qstuff
+{
+  bstuff *b_q;                        /* b that currently occupies this hash */
+  ub4     parent_q;     /* queue position of parent that could use this hash */
+  ub2     newval_q;      /* what to change parent tab[b] to to use this hash */
+  ub2     oldval_q;                              /* original value of tab[b] */
+};
+typedef  struct qstuff  qstuff;
+
+/* return ceiling(log based 2 of x) */
+ub4 mylog2(/*_ ub4 x _*/);
+
+/* Given the keys, scramble[], and hash mode, find the perfect hash */
+void findhash(/*_ bstuff **tabb, ub4 *alen, ub4 *blen, ub4 *salt,
+		gencode *final, ub4 *scramble, ub4 smax, key *keys, ub4 nkeys, 
+		hashform *form _*/);
+
+/* private, but in a different file because it's excessively verbose */
+int inithex(/*_ key *keys, ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys, 
+	      ub4 salt, gencode *final, gencode *form _*/);
+
+#endif /* PERFECT */
diff --git a/utils/phash/perfhex.c b/utils/phash/perfhex.c
new file mode 100644
index 0000000..ee0d0ce
--- /dev/null
+++ b/utils/phash/perfhex.c
@@ -0,0 +1,1319 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+------------------------------------------------------------------------------
+perfhex.c: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, December 31 1999
+You may use this code in any way you wish, and it is free.  No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfhex.c
+
+The task of this file is to do the minimal amount of mixing needed to
+find distinct (a,b) for each key when each key is a distinct ub4.  That
+means trying all possible ways to mix starting with the fastest.  The
+output is those (a,b) pairs and code in the *final* structure for producing
+those pairs.
+------------------------------------------------------------------------------
+*/
+
+#include <stdlib.h>
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+#ifndef LOOKUPA
+#include "lookupa.h"
+#endif
+#ifndef RECYCLE
+#include "recycle.h"
+#endif
+#ifndef PERFECT
+#include "perfect.h"
+#endif
+
+/* 
+ * Find a perfect hash when there is only one key.  Zero instructions.
+ * Hint: the one key always hashes to 0
+ */
+static void
+hexone(key *keys, gencode *final)
+{
+  /* 1 key: the hash is always 0 */
+  keys->a_k = 0;
+  keys->b_k = 0;
+  final->used = 1;
+  sprintf(final->line[0], "  ub4 rsl = 0;\n");                    /* h1a: 37 */
+}
+
+
+
+/*
+ * Find a perfect hash when there are only two keys.  Max 2 instructions.
+ * There exists a bit that is different for the two keys.  Test it.
+ * Note that a perfect hash of 2 keys is automatically minimal.
+ */
+static void
+hextwo(key *keys, gencode *final)
+{
+  ub4 a = keys->hash_k;
+  ub4 b = keys->next_k->hash_k;
+  ub4 i;
+  
+  if (a == b)
+  {
+    printf("fatal error: duplicate keys\n");
+    exit(SUCCESS);
+  }
+
+  final->used = 1;
+  
+  /* one instruction */
+  if ((a&1) != (b&1))
+  {
+    sprintf(final->line[0], "  ub4 rsl = (val & 1);\n");         /* h2a: 3,4 */
+    return;
+  }
+
+  /* two instructions */
+  for (i=0; i<UB4BITS; ++i)
+  {
+    if ((a&((ub4)1<<i)) != (b&((ub4)1<<i))) break;
+  }
+  /* h2b: 4,6 */
+  sprintf(final->line[0], "  ub4 rsl = ((val << %u) & 1);\n", i);
+}
+
+
+
+/*
+ * find the value to xor to a and b and c to make none of them 3 
+ * assert, (a,b,c) are three distinct values in (0,1,2,3).
+ */
+static ub4
+find_adder(ub4 a, ub4 b, ub4 c)
+{
+  return (a^b^c^3);
+}
+
+
+
+/*
+ * Find a perfect hash when there are only three keys.  Max 6 instructions.
+ *
+ * keys a,b,c.  
+ * There exists bit i such that a[i] != b[i].
+ * Either c[i] != a[i] or c[i] != b[i], assume c[i] != a[i].
+ * There exists bit j such that b[j] != c[j].  Note i != j.
+ * Final hash should be no longer than val[i]^val[j].
+ *
+ * A minimal perfect hash needs to xor one of 0,1,2,3 afterwards to cause
+ * the hole to land on 3.  find_adder() finds that constant
+ */
+static void
+hexthree(key *keys, gencode *final, hashform *form)
+{
+  ub4 a = keys->hash_k;
+  ub4 b = keys->next_k->hash_k;
+  ub4 c = keys->next_k->next_k->hash_k;
+  ub4 i,j,x,y,z;
+  
+  final->used = 1;
+
+  if (a == b || a == c || b == c)
+  {
+    printf("fatal error: duplicate keys\n");
+    exit(SUCCESS);
+  }
+  
+  /* one instruction */
+  x = a&3; 
+  y = b&3;
+  z = c&3;
+  if (x != y && x != z && y != z)
+  {
+    if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
+    {
+      /* h3a: 0,1,2 */
+      sprintf(final->line[0], "  ub4 rsl = (val & 3);\n");
+    }
+    else
+    {
+      /* h3b: 0,3,2 */
+      sprintf(final->line[0], "  ub4 rsl = ((val & 3) ^ %u);\n",
+	      find_adder(x,y,z));
+    }
+    return;
+  }
+
+  x = a>>(UB4BITS-2); 
+  y = b>>(UB4BITS-2); 
+  z = c>>(UB4BITS-2); 
+  if (x != y && x != z && y != z)
+  {
+    if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3)) 
+    {
+      /* h3c: 3fffffff, 7fffffff, bfffffff */
+      sprintf(final->line[0], "  ub4 rsl = (val >> %u);\n", (ub4)(UB4BITS-2));
+    }
+    else
+    {
+      /* h3d: 7fffffff, bfffffff, ffffffff */
+      sprintf(final->line[0], "  ub4 rsl = ((val >> %u) ^ %u);\n",
+	      (ub4)(UB4BITS-2), find_adder(x,y,z));
+    }
+    return;
+  }
+
+  /* two instructions */
+  for (i=0; i<final->highbit; ++i)
+  {
+    x = (a>>i)&3;
+    y = (b>>i)&3;
+    z = (c>>i)&3;
+    if (x != y && x != z && y != z)
+    {
+      if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
+      {
+	/* h3e: ffff3fff, ffff7fff, ffffbfff */
+	sprintf(final->line[0], "  ub4 rsl = ((val >> %u) & 3);\n", i);
+      }
+      else
+      {
+	/* h3f: ffff7fff, ffffbfff, ffffffff */
+	sprintf(final->line[0], "  ub4 rsl = (((val >> %u) & 3) ^ %u);\n", i,
+		find_adder(x,y,z));
+      }
+      return;
+    }
+  }
+
+  /* three instructions */
+  for (i=0; i<=final->highbit; ++i)
+  {
+    x = (a+(a>>i))&3;
+    y = (b+(b>>i))&3;
+    z = (c+(c>>i))&3;
+    if (x != y && x != z && y != z)
+    {
+      if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
+      {
+	/* h3g: 0x000, 0x001, 0x100 */
+	sprintf(final->line[0], "  ub4 rsl = ((val+(val>>%u))&3);\n", i);
+      }
+      else
+      {
+	/* h3h: 0x001, 0x100, 0x101 */
+	sprintf(final->line[0], "  ub4 rsl = (((val+(val>>%u))&3)^%u);\n", i,
+		find_adder(x,y,z));
+      }
+      return;
+    }
+  }
+
+  /*
+   * Four instructions: I can prove this will always work.
+   *
+   * If the three values are distinct, there are two bits which 
+   * distinguish them.  Choose the two such bits that are closest together.
+   * If those bits are values 001 and 100 for those three values,
+   * then there either aren't any bits in between
+   * or the in-between bits aren't valued 001, 110, 100, 011, 010, or 101,
+   * because that would violate the closest-together assumption.
+   * So any in-between bits must be 000 or 111, and of 000 and 111 with
+   * the distinguishing bits won't cause them to stop being distinguishing.
+   */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    for (j=i; j<=final->highbit; ++j)
+    {
+      x = ((a>>i)^(a>>j))&3;
+      y = ((b>>i)^(b>>j))&3;
+      z = ((c>>i)^(c>>j))&3;
+      if (x != y && x != z && y != z)
+      {
+	if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
+	{
+	  /* h3i: 0x00, 0x04, 0x10 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val>>%u) ^ (val>>%u)) & 3);\n", i, j);
+	}
+	else
+	{
+	  /* h3j: 0x04, 0x10, 0x14 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((((val>>%u) ^ (val>>%u)) & 3) ^ %u);\n",
+		  i, j, find_adder(x,y,z));
+	}
+	return;
+      }
+    }
+  }
+
+  printf("fatal error: hexthree\n");
+  exit(SUCCESS);
+}
+
+
+
+/*
+ * Check that a,b,c,d are some permutation of 0,1,2,3
+ * Assume that a,b,c,d are all have values less than 32.
+ */
+static int
+testfour(ub4 a, ub4 b, ub4 c, ub4 d)
+{
+  ub4 mask = (1<<a)^(1<<b)^(1<<c)^(1<<d);
+  return (mask == 0xf);
+}
+
+
+
+/*
+ * Find a perfect hash when there are only four keys.  Max 10 instructions.
+ * Note that a perfect hash for 4 keys will automatically be minimal.
+ */
+static void
+hexfour(key *keys, gencode *final)
+{
+  ub4 a = keys->hash_k;
+  ub4 b = keys->next_k->hash_k;
+  ub4 c = keys->next_k->next_k->hash_k;
+  ub4 d = keys->next_k->next_k->next_k->hash_k;
+  ub4 w,x,y,z;
+  ub4 i,j,k;
+
+  if (a==b || a==c || a==d || b==c || b==d || c==d)
+  {
+    printf("fatal error: Duplicate keys\n");
+    exit(SUCCESS);
+  }
+
+  final->used = 1;
+
+  /* one instruction */
+  if ((final->diffbits & 3) == 3)
+  {
+    w = a&3;
+    x = b&3;
+    y = c&3;
+    z = d&3;
+    if (testfour(w,x,y,z))
+    {
+      sprintf(final->line[0], "  ub4 rsl = (val & 3);\n");   /* h4a: 0,1,2,3 */
+      return;
+    }
+  }
+
+  if (((final->diffbits >> (UB4BITS-2)) & 3) == 3)
+  {
+    w = a>>(UB4BITS-2);
+    x = b>>(UB4BITS-2);
+    y = c>>(UB4BITS-2);
+    z = d>>(UB4BITS-2);
+    if (testfour(w,x,y,z))
+    {                         /* h4b: 0fffffff, 4fffffff, 8fffffff, cfffffff */
+      sprintf(final->line[0], "  ub4 rsl = (val >> %u);\n", (ub4)(UB4BITS-2));
+      return;
+    }
+  }
+
+  /* two instructions */
+  for (i=final->lowbit; i<final->highbit; ++i)
+  {
+    if (((final->diffbits >> i) & 3) == 3)
+    {
+      w = (a>>i)&3;
+      x = (b>>i)&3;
+      y = (c>>i)&3;
+      z = (d>>i)&3;
+      if (testfour(w,x,y,z))
+      {                                                      /* h4c: 0,2,4,6 */
+	sprintf(final->line[0], "  ub4 rsl = ((val >> %u) & 3);\n", i);
+	return;
+      }
+    }
+  }
+
+  /* three instructions (linear with the number of diffbits) */
+  if ((final->diffbits & 3) != 0)
+  {
+    for (i=final->lowbit; i<=final->highbit; ++i)
+    {
+      if (((final->diffbits >> i) & 3) != 0)
+      {
+	w = (a+(a>>i))&3;
+	x = (b+(b>>i))&3;
+	y = (c+(c>>i))&3;
+	z = (d+(d>>i))&3;
+	if (testfour(w,x,y,z))
+	{                                                    /* h4d: 0,1,2,4 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val + (val >> %u)) & 3);\n", i);
+	  return;
+	}
+
+	w = (a-(a>>i))&3;
+	x = (b-(b>>i))&3;
+	y = (c-(c>>i))&3;
+	z = (d-(d>>i))&3;
+	if (testfour(w,x,y,z))
+	{                                                    /* h4e: 0,1,3,5 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val - (val >> %u)) & 3);\n", i);
+	  return;
+	}
+
+	/* h4f: ((val>>k)-val)&3: redundant with h4e */
+
+	w = (a^(a>>i))&3;
+	x = (b^(b>>i))&3;
+	y = (c^(c>>i))&3;
+	z = (d^(d>>i))&3;
+	if (testfour(w,x,y,z))
+	{                                                    /* h4g: 3,4,5,8 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val ^ (val >> %u)) & 3);\n", i);
+	  return;
+	}
+      }
+    }
+  }
+
+  /* four instructions (linear with the number of diffbits) */
+  if ((final->diffbits & 3) != 0)
+  {
+    for (i=final->lowbit; i<=final->highbit; ++i)
+    {
+      if ((((final->diffbits >> i) & 1) != 0) &&
+	  ((final->diffbits & 2) != 0))
+      {
+	w = (a&3)^((a>>i)&1);
+	x = (b&3)^((b>>i)&1);
+	y = (c&3)^((c>>i)&1);
+	z = (d&3)^((d>>i)&1);
+	if (testfour(w,x,y,z))
+	{                                                    /* h4h: 1,2,6,8 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val & 3) ^ ((val >> %u) & 1));\n", i);
+	  return;
+	}
+
+	w = (a&2)^((a>>i)&1);
+	x = (b&2)^((b>>i)&1);
+	y = (c&2)^((c>>i)&1);
+	z = (d&2)^((d>>i)&1);
+	if (testfour(w,x,y,z))
+	{                                                    /* h4i: 1,2,8,a */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val & 2) ^ ((val >> %u) & 1));\n", i);
+	  return;
+	}
+      }
+
+      if ((((final->diffbits >> i) & 2) != 0) &&
+	  ((final->diffbits & 1) != 0))
+      {
+	w = (a&3)^((a>>i)&2);
+	x = (b&3)^((b>>i)&2);
+	y = (c&3)^((c>>i)&2);
+	z = (d&3)^((d>>i)&2);
+	if (testfour(w,x,y,z))
+	{                                                    /* h4j: 0,1,3,4 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val & 3) ^ ((val >> %u) & 2));\n", i);
+	  return;
+	}
+
+	w = (a&1)^((a>>i)&2);
+	x = (b&1)^((b>>i)&2);
+	y = (c&1)^((c>>i)&2);
+	z = (d&1)^((d>>i)&2);
+	if (testfour(w,x,y,z))
+	{                                                    /* h4k: 1,4,7,8 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val & 1) ^ ((val >> %u) & 2));\n", i);
+	  return;
+	}
+      }
+    }
+  }
+
+  /* four instructions (quadratic in the number of diffbits) */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    if (((final->diffbits >> i) & 1) == 1)
+    {
+      for (j=final->lowbit; j<=final->highbit; ++j)
+      {
+	if (((final->diffbits >> j) & 3) != 0)
+	{
+	  /* test + */
+	  w = ((a>>i)+(a>>j))&3;
+	  x = ((b>>i)+(a>>j))&3;
+	  y = ((c>>i)+(a>>j))&3;
+	  z = ((d>>i)+(a>>j))&3;
+	  if (testfour(w,x,y,z))
+	  {                                                /* h4l: testcase? */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %u) + (val >> %u)) & 3);\n", 
+		    i, j);
+	    return;
+	  }
+
+	  /* test - */
+	  w = ((a>>i)-(a>>j))&3;
+	  x = ((b>>i)-(a>>j))&3;
+	  y = ((c>>i)-(a>>j))&3;
+	  z = ((d>>i)-(a>>j))&3;
+	  if (testfour(w,x,y,z))
+	  {                                                /* h4m: testcase? */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %u) - (val >> %u)) & 3);\n",
+		    i, j);
+	    return;
+	  }
+
+	  /* test ^ */
+	  w = ((a>>i)^(a>>j))&3;
+	  x = ((b>>i)^(a>>j))&3;
+	  y = ((c>>i)^(a>>j))&3;
+	  z = ((d>>i)^(a>>j))&3;
+	  if (testfour(w,x,y,z))
+	  {                                                /* h4n: testcase? */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %u) ^ (val >> %u)) & 3);\n",
+		    i, j);
+	    return;
+	  }
+	}
+      }
+    }
+  }
+
+  /* five instructions (quadratic in the number of diffbits) */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    if (((final->diffbits >> i) & 1) != 0)
+    {
+      for (j=final->lowbit; j<=final->highbit; ++j)
+      {
+	if (((final->diffbits >> j) & 3) != 0)
+	{
+	  w = ((a>>j)&3)^((a>>i)&1);
+	  x = ((b>>j)&3)^((b>>i)&1);
+	  y = ((c>>j)&3)^((c>>i)&1);
+	  z = ((d>>j)&3)^((d>>i)&1);
+	  if (testfour(w,x,y,z))
+	  {                                                  /* h4o: 0,4,8,a */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %u) & 3) ^ ((val >> %u) & 1));\n", 
+		    j, i);
+	    return;
+	  }
+	  
+	  w = ((a>>j)&2)^((a>>i)&1);
+	  x = ((b>>j)&2)^((b>>i)&1);
+	  y = ((c>>j)&2)^((c>>i)&1);
+	  z = ((d>>j)&2)^((d>>i)&1);
+	  if (testfour(w,x,y,z))
+	  {                                   /* h4p: 0x04, 0x08, 0x10, 0x14 */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %u) & 2) ^ ((val >> %u) & 1));\n", 
+		    j, i);
+	    return;
+	  }
+	}
+	
+	if (i==0)
+	{
+	  w = ((a>>j)^(a<<1))&3;
+	  x = ((b>>j)^(b<<1))&3;
+	  y = ((c>>j)^(c<<1))&3;
+	  z = ((d>>j)^(d<<1))&3;
+	}
+	else
+	{
+	  w = ((a>>j)&3)^((a>>(i-1))&2);
+	  x = ((b>>j)&3)^((b>>(i-1))&2);
+	  y = ((c>>j)&3)^((c>>(i-1))&2);
+	  z = ((d>>j)&3)^((d>>(i-1))&2);
+	}
+	if (testfour(w,x,y,z))
+	{
+	  if (i==0)                                          /* h4q: 0,4,5,8 */
+	  {
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %u) ^ (val << 1)) & 3);\n",
+		    j);
+	  }
+	  else if (i==1)                         /* h4r: 0x01,0x09,0x0b,0x10 */
+	  {
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %u) & 3) ^ (val & 2));\n",
+		    j);
+	  }
+	  else                                               /* h4s: 0,2,6,8 */
+	  {
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %u) & 3) ^ ((val >> %u) & 2));\n",
+		    j, (i-1));
+	  }
+	  return;
+	}
+	  
+	w = ((a>>j)&1)^((a>>i)&2);
+	x = ((b>>j)&1)^((b>>i)&2);
+	y = ((c>>j)&1)^((c>>i)&2);
+	z = ((d>>j)&1)^((d>>i)&2);
+	if (testfour(w,x,y,z))                   /* h4t: 0x20,0x14,0x10,0x06 */
+	{                   
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %u) & 1) ^ ((val >> %u) & 2));\n",
+		  j, i);
+	  return;
+	}
+      }
+    }
+  }
+
+  /*
+   * OK, bring out the big guns.
+   * There exist three bits i,j,k which distinguish a,b,c,d.
+   * i^(j<<1)^(k*q) is guaranteed to work for some q in {0,1,2,3},
+   *   proven by exhaustive search of all (8 choose 4) cases.
+   * Find three such bits and try the 4 cases.
+   * Linear with the number of diffbits.
+   * Some cases below may duplicate some cases above.  I did it that way
+   *   so that what is below is guaranteed to work, no matter what was
+   *   attempted above.
+   * The generated hash is at most 10 instructions.
+   */
+  for (i=final->lowbit; i<UB4BITS; ++i)
+  {
+    y = (c>>i)&1;
+    z = (d>>i)&1;
+    if (y != z)
+      break;
+  }
+
+  for (j=final->lowbit; j<UB4BITS; ++j)
+  {
+    x = ((b>>i)&1)^(((b>>j)&1)<<1);
+    y = ((c>>i)&1)^(((c>>j)&1)<<1);
+    z = ((d>>i)&1)^(((d>>j)&1)<<1);
+    if (x != y && x != z && y != z)
+      break;
+  }
+
+  for (k=final->lowbit; k<UB4BITS; ++k)
+  {
+    w = ((a>>i)&1)^(((a>>j)&1)<<1)^(((a>>k)&1)<<2);
+    x = ((b>>i)&1)^(((b>>j)&1)<<1)^(((b>>k)&1)<<2);
+    y = ((c>>i)&1)^(((c>>j)&1)<<1)^(((c>>k)&1)<<2);
+    z = ((d>>i)&1)^(((d>>j)&1)<<1)^(((d>>k)&1)<<2);
+    if (w != x && w != y && w != z && x != y && x != z && y != z)
+      break;
+  }
+
+  /* Assert: bits i,j,k were found which distinguish a,b,c,d */
+  if (i==UB4BITS || j==UB4BITS || k==UB4BITS)
+  {
+    printf("Fatal error: hexfour(), i %u j %u k %u\n", i,j,k);
+    exit(SUCCESS);
+  }
+
+  /* now try the four cases */
+  {
+    ub4 m,n,o,p;
+    
+    /* if any bit has two 1s and two 0s, make that bit o */
+    if (((a>>i)&1)+((b>>i)&1)+((c>>i)&1)+((d>>i)&1) != 2)
+      { m=j; n=k; o=i; }
+    else if (((a>>j)&1)+((b>>j)&1)+((c>>j)&1)+((d>>j)&1) != 2)
+      { m=i; n=k; o=j; }
+    else
+      { m=i; n=j; o=k; }
+    if (m > n) {p=m; m=n; n=p; }                          /* guarantee m < n */
+
+    /* printf("m %u n %u o %u  %u %u %u %u\n", m, n, o, w,x,y,z); */
+
+    /* seven instructions, multiply bit o by 1 */
+    w = (((a>>m)^(a>>o))&1)^((a>>(n-1))&2);
+    x = (((b>>m)^(b>>o))&1)^((b>>(n-1))&2);
+    y = (((c>>m)^(c>>o))&1)^((c>>(n-1))&2);
+    z = (((d>>m)^(d>>o))&1)^((d>>(n-1))&2);
+    if (testfour(w,x,y,z))
+    {
+      if (m>o) {p=m; m=o; o=p;}                 /* make sure m < o and m < n */
+
+      if (m==0)                                                   /* 0,2,8,9 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = (((val^(val>>%u))&1)^((val>>%u)&2));\n", o, n-1);
+      }
+      else                                            /* 0x00,0x04,0x10,0x12 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = ((((val>>%u) ^ (val>>%u)) & 1) ^ ((val>>%u) & 2));\n",
+		m, o, n-1);
+      }
+      return;
+    }
+    
+    /* six to seven instructions, multiply bit o by 2 */
+    w = ((a>>m)&1)^((((a>>n)^(a>>o))&1)<<1);
+    x = ((b>>m)&1)^((((b>>n)^(b>>o))&1)<<1);
+    y = ((c>>m)&1)^((((c>>n)^(c>>o))&1)<<1);
+    z = ((d>>m)&1)^((((d>>n)^(d>>o))&1)<<1);
+    if (testfour(w,x,y,z))
+    {
+      if (m==o-1) {p=n; n=o; o=p;}                /* make m==n-1 if possible */
+
+      if (m==0)                                                   /* 0,1,5,8 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = ((val & 1) ^ (((val>>%u) ^ (val>>%u)) & 2));\n",
+		n-1, o-1);
+      }
+      else if (o==0)                                  /* 0x00,0x04,0x05,0x10 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = (((val>>%u) & 2) ^ (((val>>%u) ^ val) & 1));\n",
+		m-1, n);
+      }
+      else                                            /* 0x00,0x02,0x0a,0x10 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = (((val>>%u) & 1) ^ (((val>>%u) ^ (val>>%u)) & 2));\n",
+		m, n-1, o-1);
+      }
+      return;
+    }
+    
+    /* multiplying by 3 is a pain: seven or eight instructions */
+    w = (((a>>m)&1)^((a>>(n-1))&2))^((a>>o)&1)^(((a>>o)&1)<<1);
+    x = (((b>>m)&1)^((b>>(n-1))&2))^((b>>o)&1)^(((b>>o)&1)<<1);
+    y = (((c>>m)&1)^((c>>(n-1))&2))^((c>>o)&1)^(((c>>o)&1)<<1);
+    z = (((d>>m)&1)^((d>>(n-1))&2))^((d>>o)&1)^(((d>>o)&1)<<1);
+    if (testfour(w,x,y,z))
+    {
+      final->used = 2;
+      sprintf(final->line[0], "  ub4 b = (val >> %u) & 1;\n", o);
+      if (m==o-1 && m==0)                             /* 0x02,0x10,0x11,0x18 */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = ((val & 3) ^ ((val >> %u) & 2) ^ b);\n", n-1);
+      }
+      else if (m==o-1)                                            /* 0,4,6,c */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = (((val >> %u) & 3) ^ ((val >> %u) & 2) ^ b);\n",
+		m, n-1);
+      }
+      else if (m==n-1 && m==0)                                /* 02,0a,0b,18 */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = ((val & 3) ^ b ^ (b << 1));\n");
+      }
+      else if (m==n-1)                                            /* 0,2,4,8 */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = (((val >> %u) & 3) ^ b ^ (b << 1));\n", m);
+      }
+      else if (o==n-1 && m==0)                          /* h4am: not reached */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = ((val & 1) ^ ((val >> %u) & 3) ^ (b <<1 ));\n",
+		o);
+      }
+      else if (o==n-1)                                /* 0x00,0x02,0x08,0x10 */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = (((val >> %u) & 1) ^ ((val >> %u) & 3) ^ (b << 1));\n",
+		m, o);
+      }
+      else if ((m != o-1) && (m != n-1) && (o != m-1) && (o != n-1))
+      {
+	final->used = 3;
+	sprintf(final->line[0], "  ub4 newval = val & 0x%x;\n", 
+		(((ub4)1<<m)^((ub4)1<<n)^((ub4)1<<o)));
+	if (o==0)                                     /* 0x00,0x01,0x04,0x10 */
+	{
+	  sprintf(final->line[1], "  ub4 b = -newval;\n");
+	}
+	else                                          /* 0x00,0x04,0x09,0x10 */
+	{
+	  sprintf(final->line[1], "  ub4 b = -(newval >> %u);\n", o);
+	}
+	if (m==0)                                     /* 0x00,0x04,0x09,0x10 */
+	{
+	  sprintf(final->line[2], 
+		  "  ub4 rsl = ((newval ^ (newval>>%u) ^ b) & 3);\n", n-1);
+	}
+	else                                          /* 0x00,0x03,0x04,0x10 */
+	{
+	  sprintf(final->line[2], 
+		  "  ub4 rsl = (((newval>>%u) ^ (newval>>%u) ^ b) & 3);\n",
+		  m, n-1);
+	}
+      }
+      else if (o == m-1)
+      {
+	if (o==0)                                     /* 0x02,0x03,0x0a,0x10 */
+	{
+	  sprintf(final->line[0], "  ub4 b = (val<<1) & 2;\n");
+	}
+	else if (o==1)                                /* 0x00,0x02,0x04,0x10 */
+	{
+	  sprintf(final->line[0], "  ub4 b = val & 2;\n");
+	}
+	else                                          /* 0x00,0x04,0x08,0x20 */
+	{
+	  sprintf(final->line[0], "  ub4 b = (val>>%u) & 2;\n", o-1);
+	}
+
+	if (o==0)                                     /* 0x02,0x03,0x0a,0x10 */
+	{
+	  sprintf(final->line[1],
+		  "  ub4 rsl = ((val & 3) ^ ((val>>%u) & 1) ^ b);\n",
+		  n);
+	}
+	else                                          /* 0x00,0x02,0x04,0x10 */
+	{
+	  sprintf(final->line[1],
+		  "  ub4 rsl = (((val>>%u) & 3) ^ ((val>>%u) & 1) ^ b);\n",
+		  o, n);
+	}
+      }
+      else                         /* h4ax: 10 instructions, but not reached */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = (((val>>%u) & 1) ^ ((val>>%u) & 2) ^ b ^ (b<<1));\n",
+		m, n-1);
+      }
+
+      return;
+    }
+
+    /* five instructions, multiply bit o by 0, covered before the big guns */
+    w = ((a>>m)&1)^(a>>(n-1)&2);
+    x = ((b>>m)&1)^(b>>(n-1)&2);
+    y = ((c>>m)&1)^(c>>(n-1)&2);
+    z = ((d>>m)&1)^(d>>(n-1)&2);
+    if (testfour(w,x,y,z))
+    {                                                    /* h4v, not reached */
+      sprintf(final->line[0], 
+	      "  ub4 rsl = (((val>>%u) & 1) ^ ((val>>%u) & 2));\n", m, n-1);
+      return;
+    }
+  }
+
+  printf("fatal error: bug in hexfour!\n");
+  exit(SUCCESS);
+  return;
+}
+
+
+/* test if a_k is distinct and in range for all keys */
+/* *keys:   keys being hashed */
+/* badmask: used for minimal perfect hashing */
+static int
+testeight(key *keys, ub1 badmask)
+{
+  ub1  mask = badmask;
+  key *mykey;
+
+  for (mykey=keys; mykey; mykey=mykey->next_k)
+  {
+    if (bit(mask, 1<<mykey->a_k)) return FALSE;
+    bis(mask, 1<<mykey->a_k);
+  }
+  return TRUE;
+}
+
+
+
+/*
+ * Try to find a perfect hash when there are five to eight keys.
+ *
+ * We can't deterministically find a perfect hash, but there's a reasonable
+ * chance we'll get lucky.  Give it a shot.  Return TRUE if we succeed.
+ */
+static int
+hexeight(key *keys, ub4 nkeys, gencode *final, hashform *form)
+{
+  key *mykey;                                       /* walk through the keys */
+  ub4  i,j,k;
+  ub1  badmask;
+
+  printf("hexeight\n");
+
+  /* what hash values should never be used? */
+  badmask = 0;
+  if (form->perfect == MINIMAL_HP)
+  {
+    for (i=nkeys; i<8; ++i)
+      bis(badmask,(1<<i));
+  }
+
+  /* one instruction */
+  for (mykey=keys; mykey; mykey=mykey->next_k)
+    mykey->a_k = mykey->hash_k & 7;
+  if (testeight(keys, badmask))
+  {                                                                   /* h8a */
+    final->used = 1;
+    sprintf(final->line[0], "  ub4 rsl = (val & 7);\n");
+    return TRUE;
+  }
+
+  /* two instructions */
+  for (i=final->lowbit; i<=final->highbit-2; ++i)
+  {
+    for (mykey=keys; mykey; mykey=mykey->next_k)
+      mykey->a_k = (mykey->hash_k >> i) & 7;
+    if (testeight(keys, badmask))
+    {                                                                 /* h8b */
+      final->used = 1;
+      sprintf(final->line[0], "  ub4 rsl = ((val >> %u) & 7);\n", i);
+      return TRUE;
+    }
+  }
+
+  /* four instructions */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    for (j=i+1; j<=final->highbit; ++j)
+    {
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+	mykey->a_k = ((mykey->hash_k >> i)+(mykey->hash_k >> j)) & 7;
+      if (testeight(keys, badmask))
+      {
+	final->used = 1;
+	if (i == 0)                                                   /* h8c */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val + (val >> %u)) & 7);\n", j);
+	else                                                          /* h8d */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %u) + (val >> %u)) & 7);\n", i, j);
+	return TRUE;
+      }
+
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+	mykey->a_k = ((mykey->hash_k >> i)^(mykey->hash_k >> j)) & 7;
+      if (testeight(keys, badmask))
+      {
+	final->used = 1;
+	if (i == 0)                                                   /* h8e */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val ^ (val >> %u)) & 7);\n", j);
+	else                                                          /* h8f */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %u) ^ (val >> %u)) & 7);\n", i, j);
+
+	return TRUE;
+      }
+
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+	mykey->a_k = ((mykey->hash_k >> i)-(mykey->hash_k >> j)) & 7;
+      if (testeight(keys, badmask))
+      {
+	final->used = 1;
+	if (i == 0)                                                   /* h8g */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val - (val >> %u)) & 7);\n", j);
+	else                                                          /* h8h */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %u) - (val >> %u)) & 7);\n", i, j);
+
+	return TRUE;
+      }
+    }
+  }
+
+
+  /* six instructions */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    for (j=i+1; j<=final->highbit; ++j)
+    {
+      for (k=j+1; k<=final->highbit; ++k)
+      {
+	for (mykey=keys; mykey; mykey=mykey->next_k)
+	  mykey->a_k  = ((mykey->hash_k >> i) +
+			 (mykey->hash_k >> j) +
+			 (mykey->hash_k >> k)) & 7;
+	if (testeight(keys, badmask))
+	{                                                             /* h8i */
+	  final->used = 1;
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %u) + (val >> %u) + (val >> %u)) & 7);\n", 
+		  i, j, k);
+	  return TRUE;
+	}
+      }
+    }
+  }
+
+
+  return FALSE;
+}
+
+
+
+/*
+ * Guns aren't enough.  Bring out the Bomb.  Use tab[].
+ * This finds the initial (a,b) when we need to use tab[].
+ *
+ * We need to produce a different (a,b) every time this is called.  Try all
+ * reasonable cases, fastest first.
+ *
+ * The initial mix (which this determines) can be filled into final starting
+ * at line[1].  val is set and a,b are declared.  The final hash (at line[7])
+ * is a^tab[b] or a^scramble[tab[b]].
+ *
+ * The code will probably look like this, minus some stuff:
+ *     val += CONSTANT;
+ *     val ^= (val<<16);
+ *     val += (val>>8);
+ *     val ^= (val<<4);
+ *     b = (val >> l) & 7;
+ *     a = (val + (val<<m)) >> 29;
+ *     return a^scramble[tab[b]];
+ * Note that *a* and tab[b] will be computed in parallel by most modern chips.
+ *
+ * final->i is the current state of the state machine.
+ * final->j and final->k are counters in the loops the states simulate.
+ */
+static void
+hexn(key *keys, ub4 salt, ub4 alen, ub4 blen, gencode *final)
+{
+  key *mykey;
+  ub4  highbit = final->highbit;
+  ub4  lowbit = final->lowbit;
+  ub4  alog = mylog2(alen);
+  ub4  blog = mylog2(blen);
+
+  for (;;)
+  {
+    switch(final->i)
+    {
+    case 1:
+      /* a = val>>30; b=val&3 */
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+      {
+	mykey->a_k = (mykey->hash_k << (UB4BITS-(highbit+1)))>>(UB4BITS-alog);
+	mykey->b_k = (mykey->hash_k >> lowbit) & (blen-1);
+      }
+      if (lowbit == 0)                                                /* hna */
+	sprintf(final->line[5], "  b = (val & 0x%x);\n", 
+		blen-1);
+      else                                                            /* hnb */
+	sprintf(final->line[5], "  b = ((val >> %u) & 0x%x);\n", 
+		lowbit, blen-1);
+      if (highbit+1 == UB4BITS)                                       /* hnc */
+	sprintf(final->line[6], "  a = (val >> %u);\n",
+		UB4BITS-alog);
+      else                                                            /* hnd */
+	sprintf(final->line[6], "  a = ((val << %u ) >> %u);\n",
+		UB4BITS-(highbit+1), UB4BITS-alog);
+  
+      ++final->i;
+      return;
+
+    case 2:
+      /* a = val&3; b=val>>30 */
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+      {
+	mykey->a_k = (mykey->hash_k >> lowbit) & (alen-1);
+	mykey->b_k = (mykey->hash_k << (UB4BITS-(highbit+1)))>>(UB4BITS-blog);
+      }
+      if (highbit+1 == UB4BITS)                                       /* hne */
+	sprintf(final->line[5], "  b = (val >> %u);\n",
+		UB4BITS-blog);
+      else                                                            /* hnf */
+	sprintf(final->line[5], "  b = ((val << %u ) >> %u);\n",
+		UB4BITS-(highbit+1), UB4BITS-blog);
+      if (lowbit == 0)                                                /* hng */
+	sprintf(final->line[6], "  a = (val & 0x%x);\n", 
+		alen-1);
+      else                                                            /* hnh */
+	sprintf(final->line[6], "  a = ((val >> %u) & 0x%x);\n", 
+		lowbit, alen-1);
+  
+      ++final->i;
+      return;
+
+    case 3:
+      /*
+       * cases 3,4,5:
+       * for (k=lowbit; k<=highbit; ++k)
+       *   for (j=lowbit; j<=highbit; ++j)
+       *     b = (val>>j)&3;
+       *     a = (val<<k)>>30;
+       */
+      final->k = lowbit;
+      final->j = lowbit;
+      ++final->i;
+      break;
+
+    case 4:
+      if (!(final->j < highbit))
+      {
+	++final->i;
+	break;
+      }
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+      {
+	mykey->b_k = (mykey->hash_k >> (final->j)) & (blen-1);
+	mykey->a_k = (mykey->hash_k << (UB4BITS-final->k-1)) >> (UB4BITS-alog);
+      }
+      if (final->j == 0)                                              /* hni */
+	sprintf(final->line[5], "  b = val & 0x%x;\n",
+		blen-1);
+      else if (blog+final->j == UB4BITS)                             /* hnja */
+	sprintf(final->line[5], "  b = val >> %u;\n",
+		final->j);
+      else
+	sprintf(final->line[5], "  b = (val >> %u) & 0x%x;\n",      /* hnj */
+		final->j, blen-1);
+      if (UB4BITS-final->k-1 == 0)                                    /* hnk */
+	sprintf(final->line[6], "  a = (val >> %u);\n",
+		UB4BITS-alog);
+      else                                                            /* hnl */
+	sprintf(final->line[6], "  a = ((val << %u) >> %u);\n",
+		UB4BITS-final->k-1, UB4BITS-alog);
+      while (++final->j < highbit)
+      {
+	if (((final->diffbits>>(final->j)) & (blen-1)) > 2)
+	  break;
+      }
+      return;
+
+    case 5:
+      while (++final->k < highbit)
+      {
+	if ((((final->diffbits<<(UB4BITS-final->k-1))>>alog) & (alen-1)) > 0)
+	  break;
+      }
+      if (!(final->k < highbit))
+      {
+	++final->i;
+	break;
+      }
+      final->j = lowbit;
+      final->i = 4;
+      break;
+
+
+    case 6:
+      /*
+       * cases 6,7,8:
+       * for (k=0; k<UB4BITS-alog; ++k)
+       *   for (j=0; j<UB4BITS-blog; ++j)
+       *     val = val+f(salt);
+       *     val ^= (val >> 16);
+       *     val += (val << 8);
+       *     val ^= (val >> 4);
+       *     b = (val >> j) & 3;
+       *     a = (val + (val << k)) >> 30;
+       */
+      final->k = 0;
+      final->j = 0;
+      ++final->i;
+      break;
+
+    case 7:
+      /* Just do something that will surely work */
+      {
+	ub4 addk = 0x9e3779b9*salt;
+
+	if (!(final->j <= UB4BITS-blog))
+	{
+	  ++final->i;
+	  break;
+	}
+	for (mykey=keys; mykey; mykey=mykey->next_k)
+	{
+	  ub4 val = mykey->hash_k + addk;
+	  if (final->highbit+1 - final->lowbit > 16)
+	    val ^= (val >> 16);
+	  if (final->highbit+1 - final->lowbit > 8)
+	    val += (val << 8);
+	  val ^= (val >> 4);
+	  mykey->b_k = (val >> final->j) & (blen-1);
+	  if (final->k == 0)
+	    mykey->a_k = val >> (UB4BITS-alog);
+	  else
+	    mykey->a_k = (val + (val << final->k)) >> (UB4BITS-alog);
+	}
+	sprintf(final->line[1], "  val += 0x%x;\n", addk);
+	if (final->highbit+1 - final->lowbit > 16)                    /* hnm */
+	  sprintf(final->line[2], "  val ^= (val >> 16);\n");
+	if (final->highbit+1 - final->lowbit > 8)                     /* hnn */
+	  sprintf(final->line[3], "  val += (val << 8);\n");
+	sprintf(final->line[4], "  val ^= (val >> 4);\n");
+	if (final->j == 0)              /* hno: don't know how to reach this */
+	  sprintf(final->line[5], "  b = val & 0x%x;\n", blen-1);
+	else                                                          /* hnp */
+	  sprintf(final->line[5], "  b = (val >> %u) & 0x%x;\n",
+		  final->j, blen-1);
+	if (final->k == 0)                                            /* hnq */
+	  sprintf(final->line[6], "  a = val >> %u;\n", UB4BITS-alog);
+	else                                                          /* hnr */
+	  sprintf(final->line[6], "  a = (val + (val << %u)) >> %u;\n",
+		  final->k, UB4BITS-alog);
+
+	++final->j;
+	return;
+      }
+
+    case 8:
+      ++final->k;
+      if (!(final->k <= UB4BITS-alog))
+      {
+	++final->i;
+	break;
+      }
+      final->j = 0;
+      final->i = 7;
+      break;
+
+    case 9:
+      final->i = 6;
+      break;
+    }
+  }
+}
+
+
+
+/* find the highest and lowest bit where any key differs */
+static void
+setlow(key *keys, gencode *final)
+{
+  ub4  lowbit;
+  ub4  highbit;
+  ub4  i;
+  key *mykey;
+  ub4  firstkey;
+
+  /* mark the interesting bits in final->mask */
+  final->diffbits = (ub4)0;
+  if (keys) firstkey = keys->hash_k;
+  for (mykey=keys;  mykey!=(key *)0;  mykey=mykey->next_k)
+    final->diffbits |= (firstkey ^ mykey->hash_k);
+
+  /* find the lowest interesting bit */
+  for (i=0; i<UB4BITS; ++i)
+    if (final->diffbits & (((ub4)1)<<i))
+      break;
+  final->lowbit = i;
+
+  /* find the highest interesting bit */
+  for (i=UB4BITS; --i; )
+    if (final->diffbits & (((ub4)1)<<i))
+      break;
+  final->highbit = i;
+}
+
+/* 
+ * Initialize (a,b) when keys are integers.
+ *
+ * Normally there's an initial hash which produces a number.  That hash takes
+ * an initializer.  Changing the initializer causes the initial hash to 
+ * produce a different (uniformly distributed) number without any extra work.
+ *
+ * Well, here we start with a number.  There's no initial hash.  Any mixing
+ * costs extra work.  So we go through a lot of special cases to minimize the
+ * mixing needed to get distinct (a,b).  For small sets of keys, it's often
+ * fastest to skip the final hash and produce the perfect hash from the number
+ * directly.
+ *
+ * The target user for this is switch statement optimization.  The common case
+ * is 3 to 16 keys, and instruction counts matter.  The competition is a 
+ * binary tree of branches.
+ *
+ * Return TRUE if we found a perfect hash and no more work is needed.
+ * Return FALSE if we just did an initial hash and more work is needed.
+ */
+/* keys:  list of all keys */
+/* nkeys: number of keys to hash */
+/* alen:  (a,b) has a in 0..alen-1, a power of 2 */
+/* blen:  (a,b) has b in 0..blen-1, a power of 2 */
+/* smax:  maximum range of computable hash values */
+/* salt:  used to initialize the hash function */
+/* final: output, code for the final hash */
+/* form:  user directives */
+int
+inithex(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub4 salt, gencode *final, hashform *form)
+{
+  setlow(keys, final);
+
+  switch (nkeys)
+  {
+  case 1:
+    hexone(keys, final);
+    return TRUE;
+  case 2:
+    hextwo(keys, final);
+    return TRUE;
+  case 3:
+    hexthree(keys, final, form);
+    return TRUE;
+  case 4:
+    hexfour(keys, final);
+    return TRUE;
+  case 5:  case 6:  case 7:  case 8:
+    if (salt == 1 &&                                  /* first time through */
+	hexeight(keys, nkeys, final, form)) /* get lucky, don't need tab[] ? */
+      return TRUE;
+    /* fall through */
+  default:
+    if (salt == 1)
+    {
+      final->used = 8;
+      final->i = 1;
+      final->j = final->k = final->l = final->m = final->n = final->o = 0;
+      sprintf(final->line[0], "  ub4 a, b, rsl;\n");
+      sprintf(final->line[1], "\n");
+      sprintf(final->line[2], "\n");
+      sprintf(final->line[3], "\n");
+      sprintf(final->line[4], "\n");
+      sprintf(final->line[5], "\n");
+      sprintf(final->line[6], "\n");
+      if (blen < USE_SCRAMBLE)
+      {                                                               /* hns */
+	sprintf(final->line[7], "  rsl = (a^tab[b]);\n");
+      }
+      else
+      {                                                               /* hnt */
+	sprintf(final->line[7], "  rsl = (a^scramble[tab[b]]);\n");
+      }
+    }
+    hexn(keys, salt, alen, blen, final);
+    return FALSE;
+  }
+}
diff --git a/utils/phash/phash.c b/utils/phash/phash.c
new file mode 100644
index 0000000..a4183c3
--- /dev/null
+++ b/utils/phash/phash.c
@@ -0,0 +1,28 @@
+/* Generated File, DO NOT EDIT */
+/* table for the mapping for the perfect hash */
+#ifndef STANDARD
+#include "standard.h"
+#endif /* STANDARD */
+#ifndef PHASH
+#include "phash.h"
+#endif /* PHASH */
+#ifndef LOOKUPA
+#include "lookupa.h"
+#endif /* LOOKUPA */
+
+/* small adjustments to _a_ to make values distinct */
+ub1 tab[] = {
+20,70,0,4,61,76,0,119,0,0,16,4,10,1,61,76,
+61,0,0,16,1,61,0,76,0,123,32,70,28,34,119,51,
+0,76,4,122,70,0,0,43,0,106,20,83,0,0,28,66,
+79,0,1,47,79,122,0,0,71,75,85,26,0,103,0,76,
+};
+
+/* The hash function */
+ub4 phash(char *key, int len)
+{
+  ub4 rsl, val = lookup(key, len, 0x9e3779b9);
+  rsl = ((val>>26)^tab[val&0x3f]);
+  return rsl;
+}
+
diff --git a/utils/phash/phash.h b/utils/phash/phash.h
new file mode 100644
index 0000000..a9396a6
--- /dev/null
+++ b/utils/phash/phash.h
@@ -0,0 +1,18 @@
+/* Generated File, DO NOT EDIT */
+/* Perfect hash definitions */
+#ifndef STANDARD
+#include "standard.h"
+#endif /* STANDARD */
+#ifndef PHASH
+#define PHASH
+
+extern ub1 tab[];
+#define PHASHLEN 0x40  /* length of hash mapping table */
+#define PHASHNKEYS 99  /* How many keys were hashed */
+#define PHASHRANGE 128  /* Range any input might map to */
+#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */
+
+ub4 phash();
+
+#endif  /* PHASH */
+
diff --git a/utils/phash/recycle.c b/utils/phash/recycle.c
new file mode 100644
index 0000000..3497309
--- /dev/null
+++ b/utils/phash/recycle.c
@@ -0,0 +1,115 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, September 1996.  recycle.c
+You may use this code in any way you wish, and it is free.  No warranty.
+
+This manages memory for commonly-allocated structures.
+It allocates RESTART to REMAX items at a time.
+Timings have shown that, if malloc is used for every new structure,
+  malloc will consume about 90% of the time in a program.  This
+  module cuts down the number of mallocs by an order of magnitude.
+This also decreases memory fragmentation, and freeing structures
+  only requires freeing the root.
+--------------------------------------------------------------------
+*/
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef STANDARD
+# include "standard.h"
+#endif
+#ifndef RECYCLE
+# include "recycle.h"
+#endif
+
+reroot *
+remkroot(size_t size)
+{
+   reroot *r = (reroot *)remalloc(sizeof(reroot), "recycle.c, root");
+   r->list = (recycle *)0;
+   r->trash = (recycle *)0;
+   r->size = align(size);
+   r->logsize = RESTART;
+   r->numleft = 0;
+   return r;
+}
+
+void
+refree(struct reroot *r)
+{
+   recycle *temp;
+   if ((temp = r->list) != NULL)
+      while (r->list)
+      {
+         temp = r->list->next;
+         free((char *)r->list);
+         r->list = temp;
+      }
+   free((char *)r);
+   return;
+}
+
+/* to be called from the macro renew only */
+char *
+renewx(struct reroot *r)
+{
+   recycle *temp;
+   if (r->trash)
+   {  /* pull a node off the trash heap */
+      temp = r->trash;
+      r->trash = temp->next;
+      (void)memset((void *)temp, 0, r->size);
+   }
+   else
+   {  /* allocate a new block of nodes */
+      r->numleft = r->size*((ub4)1<<r->logsize);
+      if (r->numleft < REMAX) ++r->logsize;
+      temp = (recycle *)remalloc(sizeof(recycle) + r->numleft, 
+				 "recycle.c, data");
+      temp->next = r->list;
+      r->list = temp;
+      r->numleft-=r->size;
+      temp = (recycle *)((char *)(r->list+1)+r->numleft);
+   }
+   return (char *)temp;
+}
+
+char *
+remalloc(size_t len, char *purpose)
+{
+  char *x = (char *)malloc(len);
+  if (!x)
+  {
+    fprintf(stderr, "malloc of %zu failed for %s\n", 
+	    len, purpose);
+    exit(SUCCESS);
+  }
+  return x;
+}
+
diff --git a/utils/phash/recycle.h b/utils/phash/recycle.h
new file mode 100644
index 0000000..fd7465a
--- /dev/null
+++ b/utils/phash/recycle.h
@@ -0,0 +1,90 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, September 1996.  recycle.h
+You may use this code in any way you wish, and it is free.  No warranty.
+
+This manages memory for commonly-allocated structures.
+It allocates RESTART to REMAX items at a time.
+Timings have shown that, if malloc is used for every new structure,
+  malloc will consume about 90% of the time in a program.  This
+  module cuts down the number of mallocs by an order of magnitude.
+This also decreases memory fragmentation, and freeing all structures
+  only requires freeing the root.
+--------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+
+#ifndef RECYCLE
+#define RECYCLE
+
+#define RESTART    0
+#define REMAX      32000
+
+struct recycle
+{
+   struct recycle *next;
+};
+typedef  struct recycle  recycle;
+
+struct reroot
+{
+   struct recycle *list;     /* list of malloced blocks */
+   struct recycle *trash;    /* list of deleted items */
+   size_t          size;     /* size of an item */
+   size_t          logsize;  /* log_2 of number of items in a block */
+   word            numleft;  /* number of bytes left in this block */
+};
+typedef  struct reroot  reroot;
+
+/* make a new recycling root */
+reroot  *remkroot(/*_ size_t mysize _*/);
+
+/* free a recycling root and all the items it has made */
+void     refree(/*_ struct reroot *r _*/);
+
+/* get a new (cleared) item from the root */
+#define renew(r) ((r)->numleft ? \
+   (((char *)((r)->list+1))+((r)->numleft-=(r)->size)) : renewx(r))
+
+char    *renewx(/*_ struct reroot *r _*/);
+
+/* delete an item; let the root recycle it */
+/* void     redel(/o_ struct reroot *r, struct recycle *item _o/); */
+#define redel(root,item) { \
+   ((recycle *)item)->next=(root)->trash; \
+   (root)->trash=(recycle *)(item); \
+}
+
+/* malloc, but complain to stderr and exit program if no joy */
+/* use plain free() to free memory allocated by remalloc() */
+char    *remalloc(/*_ size_t len, char *purpose _*/);
+
+#endif  /* RECYCLE */
diff --git a/utils/phash/standard.h b/utils/phash/standard.h
new file mode 100644
index 0000000..cba1c43
--- /dev/null
+++ b/utils/phash/standard.h
@@ -0,0 +1,82 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+------------------------------------------------------------------------------
+Standard definitions and types, Bob Jenkins
+------------------------------------------------------------------------------
+*/
+#ifndef STANDARD
+# define STANDARD
+# ifndef STDIO
+#  include <stdio.h>
+#  define STDIO
+# endif
+# ifndef STDDEF
+#  include <stddef.h>
+#  define STDDEF
+# endif
+typedef  unsigned long long  ub8;
+#define UB8MAXVAL 0xffffffffffffffffLL
+#define UB8BITS 64
+typedef    signed long long  sb8;
+#define SB8MAXVAL 0x7fffffffffffffffLL
+typedef  unsigned int  ub4;   /* unsigned 4-byte quantities */
+#define UB4MAXVAL 0xffffffff
+typedef    signed int  sb4;
+#define UB4BITS 32
+#define SB4MAXVAL 0x7fffffff
+typedef  unsigned short int  ub2;
+#define UB2MAXVAL 0xffff
+#define UB2BITS 16
+typedef    signed short int  sb2;
+#define SB2MAXVAL 0x7fff
+typedef  unsigned       char ub1;
+#define UB1MAXVAL 0xff
+#define UB1BITS 8
+typedef    signed       char sb1;   /* signed 1-byte quantities */
+#define SB1MAXVAL 0x7f
+typedef                 int  word;  /* fastest type available */
+
+#define bis(target,mask)  ((target) |=  (mask))
+#define bic(target,mask)  ((target) &= ~(mask))
+#define bit(target,mask)  ((target) &   (mask))
+#ifndef min
+# define min(a,b) (((a)<(b)) ? (a) : (b))
+#endif /* min */
+#ifndef max
+# define max(a,b) (((a)<(b)) ? (b) : (a))
+#endif /* max */
+#ifndef align
+# define align(a) (((ub4)a+(sizeof(void *)-1))&(~(sizeof(void *)-1)))
+#endif /* align */
+#ifndef abs
+# define abs(a)   (((a)>0) ? (a) : -(a))
+#endif
+#define TRUE  1
+#define FALSE 0
+#define SUCCESS 0  /* 1 on VAX */
+
+#endif /* STANDARD */
diff --git a/utils/phash/testperf.c b/utils/phash/testperf.c
new file mode 100644
index 0000000..fd17bc4
--- /dev/null
+++ b/utils/phash/testperf.c
@@ -0,0 +1,231 @@
+/*
+ * This file is a part of Pcompress, a chunked parallel multi-
+ * algorithm lossless compression and decompression program.
+ *
+ * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ *
+ * moinakg@belenix.org, http://moinakg.wordpress.com/
+ *      
+ */
+
+/*
+----------------------------------------------------------------------------
+Test a perfect hash.
+By Bob Jenkins.  Public Domain.
+----------------------------------------------------------------------------
+*/
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+#ifndef RECYCLE
+#include "recycle.h"
+#endif
+#ifndef PHASH
+#include "phash.h"
+#endif
+
+/* user directives: perfect hash? minimal perfect hash? input is an int? */
+struct hashform
+{
+  enum {
+    NORMAL_HM,                                            /* key is a string */
+    INLINE_HM,   /* user will do initial hash, we must choose salt for them */
+    HEX_HM,              /* key to be hashed is a hexidecimal 4-byte integer */
+    DECIMAL_HM,          /* key to be hashed is a hexidecimal 4-byte integer */
+    AB_HM,      /* key to be hashed is "A B", where A and B are (A,B) in hex */
+    ABDEC_HM                                /* same as AB_HM, but in decimal */
+  } mode;
+};
+typedef  struct hashform  hashform;
+
+#define MAXKEYLEN  30
+struct key
+{
+  char *kname;
+  ub4   klen;
+  struct key *knext;
+};
+typedef  struct key  key;
+
+/* get the list of keys */
+static void getkeys(keys, nkeys, textroot, keyroot)
+key    **keys;        /* list of all keys */
+ub4     *nkeys;       /* number of keys */
+reroot  *textroot;    /* get space to store key text */
+reroot  *keyroot;     /* get space for keys */
+{
+  key  *mykey;
+  char *mytext;
+  mytext = (char *)renew(textroot);
+  *keys  = (key *)0;
+  *nkeys = (ub4)0;
+  while (fgets(mytext, MAXKEYLEN, stdin))
+  {
+    ub4 i;
+    mykey = (key *)renew(keyroot);
+    mykey->kname = (ub1 *)mytext;
+    mytext = (char *)renew(textroot);
+    mykey->klen  = (ub4)(strlen((char *)mykey->kname)-1);
+    mykey->knext = *keys;
+    *keys = mykey;
+    ++*nkeys;
+  }
+  redel(textroot, mytext);
+}
+
+
+/*
+------------------------------------------------------------------------------
+Read in the keys, find the hash, and write the .c and .h files
+------------------------------------------------------------------------------
+*/
+void driver(form)
+hashform *form;
+{
+  ub4     nkeys;      /* number of keys */
+  key    *keys;       /* head of list of keys */
+  key    *mykey;
+  reroot *textroot;   /* MAXKEYLEN-character text lines */
+  reroot *keyroot;    /* source of keys */
+
+  /* set up memory sources */
+  textroot = remkroot((size_t)MAXKEYLEN);
+  keyroot  = remkroot(sizeof(key));
+
+  /* read in the list of keywords */
+  getkeys(&keys, &nkeys, textroot, keyroot);
+  printf("Read in %u keys\n",nkeys);
+
+  for (mykey=keys; mykey; mykey=mykey->knext)
+  {
+    ub4 hash;
+    ub4 i;
+    ub4 a;
+    ub4 b;
+    switch(form->mode)
+    {
+    case NORMAL_HM:
+      hash = phash(mykey->kname, mykey->klen);  
+      break;
+    case INLINE_HM:
+      hash = PHASHSALT;
+      for (i=0; i<mykey->klen; ++i)
+      {
+	hash = (mykey->kname[i] ^ hash) + ((hash<<26)+(hash>>6));
+      }
+      hash = phash(hash);
+      break;
+    case HEX_HM:
+      sscanf(mykey->kname, "%x ", &hash);
+      hash = phash(hash);
+      break;
+    case DECIMAL_HM:
+      sscanf(mykey->kname, "%u ", &hash);
+      hash = phash(hash);
+      break;
+    case AB_HM:
+      sscanf(mykey->kname, "%x %x ", &a, &b);
+      hash = phash(a,b);
+      break;
+    case ABDEC_HM:
+      sscanf(mykey->kname, "%u %u ", &a, &b);
+      hash = phash(a,b);
+      break;
+    }
+    printf("%8d  %.*s\n", hash, mykey->klen, mykey->kname);
+  }
+
+  /* clean up memory sources */
+  refree(textroot);
+  refree(keyroot);
+}
+
+
+void usage_error()
+{
+  printf("usage is the same as perfect (which see)\n");
+  exit(SUCCESS);
+}
+
+int main(argc, argv)
+int    argc;
+char **argv;
+{
+  hashform  form;
+  char     *c;
+  int       mode_given = 0;
+
+  form.mode = NORMAL_HM;
+
+  /* let the user override the default behavior */
+  switch (argc)
+  {
+  case 1:
+    break;
+  case 2:
+    if (argv[1][0] != '-')
+    {
+      usage_error();
+      break;
+    }
+    for (c = &argv[1][1]; *c != '\0'; ++c) switch(*c)
+    {
+    case 'n': case 'N':
+    case 'i': case 'I':
+    case 'h': case 'H':
+    case 'd': case 'D':
+    case 'a': case 'A':
+    case 'b': case 'B':
+      if (mode_given == TRUE) 
+	usage_error();
+      switch(*c)
+      {
+      case 'n': case 'N':
+	form.mode = NORMAL_HM; break;
+      case 'i': case 'I':
+	form.mode = INLINE_HM; break;
+      case 'h': case 'H':
+	form.mode = HEX_HM; break;
+      case 'd': case 'D':
+	form.mode = DECIMAL_HM; break;
+      case 'a': case 'A':
+	form.mode = AB_HM; break;
+      case 'b': case 'B':
+	form.mode = ABDEC_HM; break;
+      }
+      mode_given = TRUE;
+      break;
+    case 'm': case 'M':
+    case 'p': case 'P':
+    case 'f': case 'F':
+    case 's': case 'S':
+      break;
+    default:
+      usage_error();
+    }
+    break;
+  default:
+    usage_error();
+  }
+
+  driver(&form);
+  return 1;
+}
diff --git a/utils/utils.h b/utils/utils.h
index a506f83..416121d 100644
--- a/utils/utils.h
+++ b/utils/utils.h
@@ -151,6 +151,7 @@ typedef int32_t bsize_t;
 #define	DEBUG_STAT_EN(...)
 #endif
 
+#define	PATHSEP_CHAR	'/'
 #define	BYTES_TO_MB(x) ((x) / (1024 * 1024))
 #define	U64_P(x) *((uint64_t *)(x))
 #define	U32_P(x) *((uint32_t *)(x))