This commit is contained in:
Sears Russell 2006-05-24 02:19:04 +00:00
parent 4c168d8fb2
commit 2e06cafaa8
13 changed files with 146 additions and 67 deletions

View file

@ -3,8 +3,16 @@
#ifndef __HASH_H
#define __HASH_H
unsigned int max_bucket(unsigned char tableBits, unsigned long nextExtension);
unsigned int hash(const void * val, long val_length, unsigned char tableBits, unsigned long nextExtension);
unsigned int max_bucket(unsigned char tableBits, unsigned int nextExtension);
/**
This function maps from the length of the bucket list to a appropriate set
of linear hash parameters to fill that size.
*/
void hashGetParamsForSize(unsigned int desiredSize, unsigned char *tableBits,
unsigned int* nextExtension);
unsigned int hash(const void * val, long val_length, unsigned char tableBits, unsigned int nextExtension);
#define twoToThe(x) (1 << (x))
/** @todo logBase2 should be able to handle 64 bit values, but
currently doesn't...*/
unsigned int logBase2(unsigned int value);
#endif /*__HASH_H */

View file

@ -3,7 +3,7 @@
lib_LIBRARIES=liblladd.a
#liblladd_a_LIBADD=logger/liblogger.a operations/liboperations.a
# removed: recovery.c transactional.c logger.c logger/logparser.c logger/logstreamer.c
liblladd_a_SOURCES=crc32.c common.c stats.c io.c bufferManager.c linkedlist.c operations.c \
liblladd_a_SOURCES=crc32.c lhtable.c common.c stats.c io.c bufferManager.c linkedlist.c operations.c \
pageFile.c pageCache.c page.c bufferPool.c blobManager.c recovery2.c truncation.c \
transactional2.c \
lockManager.c iterator.c consumer.c arrayCollection.c ringbuffer.c fifo.c multiplexer.c graph.c\

View file

@ -12,7 +12,7 @@
unsigned int crc32(const void *buffer, unsigned int count, unsigned int crc);
static int BuildCRCTable(void);
static unsigned int *CRCTable; // Table constructed for fast lookup.
static unsigned int CRCTable[256]; // Table constructed for fast lookup.
#define CRC32_POLYNOMIAL 0xEDB88320
@ -23,13 +23,6 @@ static int BuildCRCTable(void)
int i, j;
unsigned int crc;
CRCTable = malloc(256 * sizeof(unsigned int));
if (CRCTable == NULL)
{
fprintf(stderr, "Can't malloc space for CRC table in file %s\n", __FILE__);
return -1L;
}
for (i = 0; i <= 255; i++)
{
crc = i;
@ -51,8 +44,7 @@ unsigned int crc32(const void *buffer, unsigned int count, unsigned int crc)
if (firsttime)
{
if (BuildCRCTable())
return -1;
BuildCRCTable();
firsttime = 0;
}

View file

@ -1,29 +1,25 @@
#include <lladd/hash.h>
/*#include <math.h> */
/*static int thomasWangs32BitMixFunction(int key);
static unsigned long thomasWangs64BitMixFunction(unsigned long key);*/
#include <assert.h>
//#include <stdio.h>
/**
@todo Make hash.c 32/64bit little/big-endian clean...
*/
unsigned int max_bucket(unsigned char tableBits, unsigned long nextExtension) {
unsigned int oldTableLength = twoToThe(tableBits - 1);
return oldTableLength + nextExtension - 1;
#ifdef THOMAS_WANG_32
static int thomasWangs32BitMixFunction(int key)
{
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return key;
}
#else
#ifdef THOMAS_WANG_64
/** @todo replace powl in hash with something more efficient, if hash() becomes a bottleneck. */
unsigned int hash(const void * val, long val_length, unsigned char tableBits, unsigned long nextExtension) {
unsigned int oldTableLength = /*powl(2, tableBits - 1); */ twoToThe(tableBits - 1);
unsigned int unmixed = crc32(val, val_length, (unsigned int)-1);
unsigned int ret = unmixed & (oldTableLength - 1);
/* What would the low hash value be? */
if(ret < nextExtension) { /* Might be too low. */
unsigned int tableLength = /* powl(2, tableBits); */ twoToThe(tableBits);
ret = unmixed & (tableLength - 1);
}
return (int) ret;
}
/*static unsigned long thomasWangs64BitMixFunction(unsigned long key)
static unsigned long thomasWangs64BitMixFunction(unsigned long key)
{
key += ~(key << 32L);
key ^= (key >> 22L);
@ -36,15 +32,90 @@ unsigned int hash(const void * val, long val_length, unsigned char tableBits, un
return key;
}
static int thomasWangs32BitMixFunction(int key)
{
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return key;
#endif
#endif
unsigned int max_bucket(unsigned char tableBits, unsigned int nextExtension) {
unsigned int oldTableLength = twoToThe(tableBits - 1);
return oldTableLength + nextExtension - 1;
}
void hashGetParamsForSize(unsigned int desiredSize,
unsigned char * tableBits, unsigned int* nextExtension) {
*tableBits = logBase2(desiredSize)+1;
*nextExtension = ((desiredSize) - twoToThe(*tableBits-1));
}
unsigned int hash(const void * val, long val_length,
unsigned char tableBits, unsigned int nextExtension) {
// Calculate the hash value as it was before this round of splitting.
unsigned int oldTableLength = twoToThe(tableBits - 1);
unsigned int unmixed = crc32(val, val_length, (unsigned int)-1);
unsigned int ret = unmixed & (oldTableLength - 1);
// If the hash value is before the point in this round where we've split,
// use the new value instead. (The new value may be the same as the
// old value.)
if(ret < nextExtension) { /* Might be too low. */
unsigned int tableLength = twoToThe(tableBits);
ret = unmixed & (tableLength - 1);
}
// printf("ret = %d, bits = %d, nextExt = %d\n", ret, tableBits, nextExtension);
// fflush(stdout);
// assert(ret >= 0 && ret < nextExtension - 1);
return ret;
}
static const char LogTable256[] =
{
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
/*Taken from
http://graphics.stanford.edu/~seander/bithacks.html
@todo extend to handle unsigned long (this will mean 64bit on 64bit
platforms; need compiler macro to test for sizeof(long), test
harness to compare logBase2Slow's output with logBase2's output,
etc...)
*/
unsigned int logBase2(unsigned int v) {
unsigned int r = 0; // r will be lg(v)
register unsigned int t, tt; // temporaries
if ((tt = v >> 16))
{
r = (t = v >> 24) ? 24 + LogTable256[t] : 16 + LogTable256[tt & 0xFF];
}
else
{
r = (t = v >> 8) ? 8 + LogTable256[t] : LogTable256[v];
}
return r;
}
unsigned long logBase2Slow(unsigned long v) {
unsigned long r = 0; // r will be lg(v)
while (v >>= 1) // unroll for more speed...
{
r++;
}
return r;
}

View file

@ -370,7 +370,7 @@ static void recover_split(int xid, recordid hashRid, int i, int next_split, int
Tread(xid, *next, next);
}
if(foundDup) {
long new_hash = hash(next+1, keySize, i, ULONG_MAX) + 2;
long new_hash = hash(next+1, keySize, i, UINT_MAX) + 2;
if(new_hash == next_split) {
// set B->next = 0
*next = NULLRID;
@ -449,7 +449,7 @@ void instant_rehash(int xid, recordid hashRid, int next_split, int i, int keySiz
}
int old_hash;
int new_hash = hash(A_contents+1, keySize, i, ULONG_MAX) + 2;
int new_hash = hash(A_contents+1, keySize, i, UINT_MAX) + 2;
while(new_hash != next_split) {
// Move things into the new bucket until we find something that belongs in the first bucket...
@ -503,7 +503,7 @@ void instant_rehash(int xid, recordid hashRid, int next_split, int i, int keySiz
TinstantSet(xid, A, A_contents);
Tdealloc(xid, oldANext);
new_hash = hash(A_contents+1, keySize, i, ULONG_MAX) + 2;
new_hash = hash(A_contents+1, keySize, i, UINT_MAX) + 2;
}
/* printf("Got past loop 1\n");
fflush(NULL); */
@ -515,8 +515,8 @@ void instant_rehash(int xid, recordid hashRid, int next_split, int i, int keySiz
TreadUnlocked(xid, B, B_contents);
C = B_contents->next;
old_hash = hash(B_contents+1, keySize, i-1, ULONG_MAX) + 2;
new_hash = hash(B_contents+1, keySize, i, ULONG_MAX) + 2;
old_hash = hash(B_contents+1, keySize, i-1, UINT_MAX) + 2;
new_hash = hash(B_contents+1, keySize, i, UINT_MAX) + 2;
assert(next_split == old_hash);
assert(new_hash == old_hash || new_hash == old_hash + twoToThe(i-1));

View file

@ -172,8 +172,8 @@ void rehash(int xid, recordid hashRid, int next_split, int i, int keySize, int v
return;
}
int old_hash;
int new_hash = hash(A_contents+1, keySize, i, ULONG_MAX) + 2;
unsigned int old_hash;
unsigned int new_hash = hash(A_contents+1, keySize, i, UINT_MAX) + 2;
while(new_hash != next_split) {
// Need a record in A that belongs in the first bucket...
@ -226,7 +226,7 @@ void rehash(int xid, recordid hashRid, int next_split, int i, int keySize, int v
Tset(xid, A, A_contents);
Tdealloc(xid, oldANext);
new_hash = hash(A_contents+1, keySize, i, ULONG_MAX) + 2;
new_hash = hash(A_contents+1, keySize, i, UINT_MAX) + 2;
}
/* printf("Got past loop 1\n");
fflush(NULL); */
@ -238,8 +238,8 @@ void rehash(int xid, recordid hashRid, int next_split, int i, int keySize, int v
Tread(xid, B, B_contents);
C = B_contents->next;
old_hash = hash(B_contents+1, keySize, i-1, ULONG_MAX) + 2;
new_hash = hash(B_contents+1, keySize, i, ULONG_MAX) + 2;
old_hash = hash(B_contents+1, keySize, i-1, UINT_MAX) + 2;
new_hash = hash(B_contents+1, keySize, i, UINT_MAX) + 2;
assert(next_split == old_hash);
assert(new_hash == old_hash || new_hash == old_hash + twoToThe(i-1));

View file

@ -20,8 +20,9 @@ void dirtyPages_add(Page * p) {
pthread_mutex_lock(&dirtyPages_mutex);
if(!p->dirty) {
p->dirty = 1;
assert(!pblHtLookup(dirtyPages, &(p->id), sizeof(int)));
//assert(p->LSN);
void* ret = pblHtLookup(dirtyPages, &(p->id), sizeof(int));
assert(!ret);
pblHtInsert(dirtyPages, &(p->id), sizeof(int), (void*)p->LSN);
}
pthread_mutex_unlock(&dirtyPages_mutex);
@ -32,8 +33,10 @@ void dirtyPages_remove(Page * p) {
// printf("Removing page %d\n", p->id);
//assert(pblHtLookup(dirtyPages, &(p->id), sizeof(int)));
// printf("With lsn = %d\n", (lsn_t)pblHtCurrent(dirtyPages));
assert(!pblHtRemove(dirtyPages, &(p->id), sizeof(int)));
//assert(!pblHtLookup(dirtyPages, &(p->id), sizeof(int)));
int ret = pblHtRemove(dirtyPages, &(p->id), sizeof(int));
//assert(!ret); <--- Due to a bug in the PBL compatibility mode,
//there is no way to tell whether the value didn't exist, or if it
//was null.
pthread_mutex_unlock(&dirtyPages_mutex);
}

View file

@ -1,5 +1,5 @@
# TODO: Doesn't build the pbl tests..
lib_LIBRARIES=libpbl.a
libpbl_a_SOURCES=pbl.c pblhash.c pblkf.c pblisam.c
libpbl_a_SOURCES=#pblhash.c #pbl.c pblhash.c #pblkf.c pblisam.c
# jbhash.c
AM_CFLAGS= -g -Wall -pedantic -std=gnu99

View file

@ -24,6 +24,11 @@
please see: http://mission.base.com/.
$Log$
Revision 1.9 2006/05/24 02:19:04 sears
No more pblht (still need to delete pbl files from CVS, but want to run performance comparison first...)
Also, the linear hash table doesn't extend itself yet.
Revision 1.8 2006/04/11 02:20:21 sears
removed memcpy() calls from inMemoryLog; added "const" qualifier to many LogEntry pointers.
@ -161,7 +166,7 @@ typedef struct pbl_hashtable_s pbl_hashtable_t;
return ((unsigned int)(crc32((char*)key, keylen, -1))) & (PBL_HASHTABLE_SIZE-1); //% PBL_HASHTABLE_SIZE;
}*/
#define hash(x, y) (((keylen)==sizeof(int) ? \
#define hash(key, keylen) (((keylen)==sizeof(int) ? \
(*(unsigned int*)key) & (PBL_HASHTABLE_SIZE-1) :\
((unsigned int)(crc32((char*)(key), (keylen), -1))) & (PBL_HASHTABLE_SIZE-1)))

View file

@ -51,6 +51,6 @@ int main() {
srunner_run_all(sr, CK_NORMAL);
nf = srunner_ntests_failed(sr);
srunner_free(sr);
/* suite_free(s); */
/* suite_free(s); */
return (nf == 0) ? 0 : 1;
}

View file

@ -1,7 +1,7 @@
# INCLUDES = @CHECK_CFLAGS@
if HAVE_LIBCHECK
## Had to disable check_lht because lht needs to be rewritten.
TESTS = check_logEntry check_logWriter check_page check_operations check_transactional2 check_recovery check_blobRecovery check_bufferManager check_indirect check_pageOperations check_linearHash check_logicalLinearHash check_header check_linkedListNTA check_linearHashNTA check_pageOrientedList check_lockManager check_compensations check_errorHandling check_ringbuffer check_iterator check_multiplexer check_bTree
TESTS = check_lhtable check_logEntry check_logWriter check_page check_operations check_transactional2 check_recovery check_blobRecovery check_bufferManager check_indirect check_pageOperations check_linearHash check_logicalLinearHash check_header check_linkedListNTA check_linearHashNTA check_pageOrientedList check_lockManager check_compensations check_errorHandling check_ringbuffer check_iterator check_multiplexer check_bTree
#check_lladdhash
else
TESTS =

View file

@ -72,11 +72,11 @@ START_TEST(checkHashFcn) {
int j = (int) (100000.0*random()/(RAND_MAX+1.0)); /* int for CRC. */
int k = (int) 2+(30.0*random()/(RAND_MAX+1.0)); /* number of bits in result. */
unsigned long first = hash(&j, sizeof(int), k, ULONG_MAX);
unsigned long first = hash(&j, sizeof(int), k, UINT_MAX);
int boundary = first + 10;
unsigned long second = hash(&j, sizeof(int), k, boundary);
assert(first == second);
unsigned long third = hash(&j, sizeof(int), k+1, ULONG_MAX);
unsigned long third = hash(&j, sizeof(int), k+1, UINT_MAX);
assert((first == third) || (pow(2,k)+ first == third));
}
} END_TEST

View file

@ -74,11 +74,11 @@ START_TEST(checkHashFcn) {
int j = (int) (100000.0*random()/(RAND_MAX+1.0)); /* int for CRC. */
int k = (int) 2+(30.0*random()/(RAND_MAX+1.0)); /* number of bits in result. */
unsigned long first = hash(&j, sizeof(int), k, ULONG_MAX);
unsigned long first = hash(&j, sizeof(int), k, UINT_MAX);
int boundary = first + 10;
unsigned long second = hash(&j, sizeof(int), k, boundary);
assert(first == second);
unsigned long third = hash(&j, sizeof(int), k+1, ULONG_MAX);
unsigned long third = hash(&j, sizeof(int), k+1, UINT_MAX);
assert((first == third) || (pow(2,k)+ first == third));
}
} END_TEST