pcompress/utils/phash/perfect.h
Moinak Ghosh 991482403b Add extension based file type detection and setting segment data type.
Use Bob Jenkins Minimal Perfect Hash to check for known extensions.
Use semaphore signaling and direct buffer copy for extraction.
Miscellaneous fixes.
2013-11-07 21:48:54 +05:30

157 lines
6.5 KiB
C

/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program.
* If not, see <http://www.gnu.org/licenses/>.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
*/
/*
------------------------------------------------------------------------------
perfect.h: code to generate code for a hash for perfect hashing.
(c) Bob Jenkins, September 1996
You may use this code in any way you wish, and it is free. No warranty.
I hereby place this in the public domain.
Source is http://burtleburtle.net/bob/c/perfect.h
------------------------------------------------------------------------------
*/
#ifndef STANDARD
#include "standard.h"
#endif
#ifndef PERFECT
#define PERFECT
#define MAXKEYLEN 30 /* maximum length of a key */
#define USE_SCRAMBLE 4096 /* use scramble if blen >= USE_SCRAMBLE */
#define SCRAMBLE_LEN ((ub4)1<<16) /* length of *scramble* */
#define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */
#define RETRY_PERFECT 1 /* number of times to try to make a perfect hash */
#define RETRY_HEX 200 /* RETRY_PERFECT when hex keys given */
/* the generated code for the final hash, assumes initial hash is done */
struct gencode
{
char **line; /* array of text lines, 80 bytes apiece */
/*
* The code placed here must declare "ub4 rsl"
* and assign it the value of the perfect hash using the function inputs.
* Later code will be tacked on which returns rsl or manipulates it according
* to the user directives.
*
* This code is at the top of the routine; it may and must declare any
* local variables it needs.
*
* Each way of filling in **line should be given a comment that is a unique
* tag. A testcase named with that tag should also be found which tests
* the generated code.
*/
ub4 len; /* number of lines available for final hash */
ub4 used; /* number of lines used by final hash */
ub4 lowbit; /* for HEX, lowest interesting bit */
ub4 highbit; /* for HEX, highest interesting bit */
ub4 diffbits; /* bits which differ for some key */
ub4 i,j,k,l,m,n,o; /* state machine used in hexn() */
};
typedef struct gencode gencode;
/* user directives: perfect hash? minimal perfect hash? input is an int? */
struct hashform
{
enum {
NORMAL_HM, /* key is a string */
INLINE_HM, /* user will do initial hash, we must choose salt for them */
HEX_HM, /* key to be hashed is a hexidecimal 4-byte integer */
DECIMAL_HM, /* key to be hashed is a decimal 4-byte integer */
AB_HM, /* key to be hashed is "A B", where A and B are (A,B) in hex */
ABDEC_HM /* like AB_HM, but in decimal */
} mode;
enum {
STRING_HT, /* key is a string */
INT_HT, /* key is an integer */
AB_HT /* dunno what key is, but input is distinct (A,B) pair */
} hashtype;
enum {
NORMAL_HP, /* just find a perfect hash */
MINIMAL_HP /* find a minimal perfect hash */
} perfect;
enum {
FAST_HS, /* fast mode */
SLOW_HS /* slow mode */
} speed;
};
typedef struct hashform hashform;
/* representation of a key */
struct key
{
ub1 *name_k; /* the actual key */
ub4 len_k; /* the length of the actual key */
ub4 hash_k; /* the initial hash value for this key */
struct key *next_k; /* next key */
/* beyond this point is mapping-dependent */
ub4 a_k; /* a, of the key maps to (a,b) */
ub4 b_k; /* b, of the key maps to (a,b) */
struct key *nextb_k; /* next key with this b */
};
typedef struct key key;
/* things indexed by b of original (a,b) pair */
struct bstuff
{
ub2 val_b; /* hash=a^tabb[b].val_b */
key *list_b; /* tabb[i].list_b is list of keys with b==i */
ub4 listlen_b; /* length of list_b */
ub4 water_b; /* high watermark of who has visited this map node */
};
typedef struct bstuff bstuff;
/* things indexed by final hash value */
struct hstuff
{
key *key_h; /* tabh[i].key_h is the key with a hash of i */
};
typedef struct hstuff hstuff;
/* things indexed by queue position */
struct qstuff
{
bstuff *b_q; /* b that currently occupies this hash */
ub4 parent_q; /* queue position of parent that could use this hash */
ub2 newval_q; /* what to change parent tab[b] to to use this hash */
ub2 oldval_q; /* original value of tab[b] */
};
typedef struct qstuff qstuff;
/* return ceiling(log based 2 of x) */
ub4 mylog2(/*_ ub4 x _*/);
/* Given the keys, scramble[], and hash mode, find the perfect hash */
void findhash(/*_ bstuff **tabb, ub4 *alen, ub4 *blen, ub4 *salt,
gencode *final, ub4 *scramble, ub4 smax, key *keys, ub4 nkeys,
hashform *form _*/);
/* private, but in a different file because it's excessively verbose */
int inithex(/*_ key *keys, ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys,
ub4 salt, gencode *final, gencode *form _*/);
#endif /* PERFECT */