2004-10-12 02:44:47 +00:00
|
|
|
#include <lladd/operations/linearHash.h>
|
|
|
|
#include <lladd/hash.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <assert.h>
|
|
|
|
/**
|
|
|
|
|
|
|
|
A from-scratch implementation of linear hashing. Uses the
|
|
|
|
arrayList operations to implement its hashbuckets.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define BUCKETS_OFFSET (2)
|
|
|
|
|
|
|
|
#define headerKeySize (headerRidA.page)
|
|
|
|
#define headerValSize (headerRidA.slot)
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
#define headerHashBits (headerRidB->page)
|
|
|
|
#define headerNextSplit (headerRidB->slot)
|
2004-10-12 02:44:47 +00:00
|
|
|
|
|
|
|
#include <math.h>
|
|
|
|
#include <malloc.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <lladd/operations/linearHash.h>
|
2004-10-17 02:17:00 +00:00
|
|
|
#include <pbl/pbl.h>
|
2004-10-12 02:44:47 +00:00
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
recordid next;
|
|
|
|
} hashEntry;
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
pblHashTable_t * openHashes = NULL;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void rehash(int xid, recordid hash, int next_split, int i, int keySize, int valSize);
|
2004-10-12 02:44:47 +00:00
|
|
|
void update_hash_header(int xid, recordid hash, int i, int next_split);
|
2004-10-17 02:17:00 +00:00
|
|
|
int deleteFromBucket(int xid, recordid hash, int bucket_number, recordid bucket_rid, void * key, int keySize, recordid * deletedEntry);
|
|
|
|
void insertIntoBucket(int xid, recordid hashRid, int bucket_number, recordid bucket_rid, hashEntry * e, int keySize, int valSize, recordid deletedEntry, int skipDelete);
|
2004-10-12 02:44:47 +00:00
|
|
|
int findInBucket(int xid, recordid hashRid, int bucket_number, const void * key, int keySize, void * val, int valSize);
|
|
|
|
|
|
|
|
|
|
|
|
int findInBucket(int xid, recordid hashRid, int bucket_number, const void * key, int keySize, void * val, int valSize) {
|
|
|
|
hashEntry * e = malloc(sizeof(hashEntry) + keySize + valSize);
|
|
|
|
|
|
|
|
recordid bucket = hashRid;
|
|
|
|
|
|
|
|
bucket.slot = bucket_number;
|
|
|
|
|
|
|
|
recordid nextEntry;
|
|
|
|
|
|
|
|
Tread(xid, bucket, &nextEntry);
|
|
|
|
if(nextEntry.size) {
|
2004-10-17 02:17:00 +00:00
|
|
|
assert(nextEntry.size == sizeof(hashEntry) + keySize + valSize);
|
|
|
|
}
|
|
|
|
/* if(nextEntry.size) {
|
2004-10-12 02:44:47 +00:00
|
|
|
e = malloc(nextEntry.size);
|
|
|
|
} else {
|
|
|
|
e = malloc(1);
|
2004-10-17 02:17:00 +00:00
|
|
|
} */
|
2004-10-12 02:44:47 +00:00
|
|
|
int found = 0;
|
|
|
|
while(nextEntry.size > 0) {
|
|
|
|
Tread(xid, nextEntry, e);
|
|
|
|
if(!memcmp(key, e+1, keySize)) {
|
|
|
|
memcpy(val, ((byte*)(e+1))+keySize, valSize);
|
|
|
|
found = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
nextEntry = e->next;
|
|
|
|
}
|
2004-10-17 02:17:00 +00:00
|
|
|
free(e);
|
2004-10-12 02:44:47 +00:00
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
void expand (int xid, recordid hash, int next_split, int i, int keySize, int valSize) {
|
2004-10-12 02:44:47 +00:00
|
|
|
TarrayListExtend(xid, hash, 1);
|
|
|
|
if(next_split >= powl(2,i-1)+2) {
|
|
|
|
/* printf("\n\n%d %d (i++)\n\n", next_split, i); */
|
|
|
|
i++;
|
|
|
|
next_split = 2;
|
|
|
|
}
|
|
|
|
/* printf("-%d-", next_split); */
|
|
|
|
/* printf("rehash(%d, %d + 2)\n", i, next_split - 2); */
|
2004-10-17 02:17:00 +00:00
|
|
|
rehash(xid, hash, next_split, i, keySize, valSize);
|
2004-10-12 02:44:47 +00:00
|
|
|
next_split++;
|
|
|
|
update_hash_header(xid, hash, i, next_split);
|
|
|
|
}
|
|
|
|
|
|
|
|
void update_hash_header(int xid, recordid hash, int i, int next_split) {
|
2004-10-17 02:17:00 +00:00
|
|
|
recordid * headerRidB = pblHtLookup(openHashes, &hash.page, sizeof(int));
|
2004-10-12 02:44:47 +00:00
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
/* hash.slot = 1; */
|
|
|
|
/* Tread(xid, hash, headerRidB); */
|
2004-10-12 02:44:47 +00:00
|
|
|
/* headerHashBits and headerHashSplit are #defined to refer to headerRidB. */
|
|
|
|
headerHashBits = i;
|
|
|
|
headerNextSplit = next_split;
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
Tset(xid, hash, headerRidB);
|
2004-10-12 02:44:47 +00:00
|
|
|
}
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
void rehash(int xid, recordid hashRid, int next_split, int i, int keySize, int valSize) {
|
2004-10-12 02:44:47 +00:00
|
|
|
recordid bucket = hashRid;
|
|
|
|
bucket.slot = next_split;
|
2004-10-17 02:17:00 +00:00
|
|
|
/*recordid headerRidA;
|
|
|
|
Tread(xid, hashRid, &headerRidA); */
|
2004-10-12 02:44:47 +00:00
|
|
|
/* recordid oldRid;
|
|
|
|
oldRid.page = 0;
|
|
|
|
oldRid.slot = 0;
|
|
|
|
oldRid.size = 0; */
|
2004-10-17 02:17:00 +00:00
|
|
|
hashEntry * e = calloc(1,sizeof(hashEntry) + keySize + valSize /* headerValSize + headerKeySize */);
|
2004-10-12 02:44:47 +00:00
|
|
|
|
|
|
|
if(bucket.size) {
|
|
|
|
Tread(xid, bucket, &bucket);
|
|
|
|
}
|
|
|
|
|
|
|
|
while(bucket.size > 0) {
|
|
|
|
Tread(xid, bucket, e);
|
|
|
|
|
|
|
|
/* printf("#%d", *(int*)(e+1)); */
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
int old_hash = hash(e+1, keySize, i-1, ULONG_MAX) + 2;
|
2004-10-12 02:44:47 +00:00
|
|
|
assert(next_split == old_hash);
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
int new_hash = hash(e+1, keySize, i, ULONG_MAX) + 2;
|
2004-10-12 02:44:47 +00:00
|
|
|
|
|
|
|
bucket = e->next;
|
2004-10-17 02:17:00 +00:00
|
|
|
|
|
|
|
assert((!bucket.size )|| bucket.size == sizeof(hashEntry) + keySize + valSize /*headerValSize + headerKeySize */);
|
2004-10-12 02:44:47 +00:00
|
|
|
|
|
|
|
if(new_hash != next_split) {
|
|
|
|
|
|
|
|
assert(new_hash == next_split + powl(2, i-1));
|
|
|
|
|
|
|
|
recordid oldEntry;
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
/** @todo could be optimized. Why deleteFromBucket, then
|
|
|
|
insertIntoBucket? Causes us to travers the bucket list an
|
|
|
|
extra time... */
|
2004-10-12 02:44:47 +00:00
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
recordid next_split_contents, new_hash_contents;
|
|
|
|
recordid tmp = hashRid;
|
|
|
|
tmp.slot = next_split;
|
|
|
|
Tread(xid, tmp, &next_split_contents);
|
|
|
|
tmp.slot = new_hash;
|
|
|
|
Tread(xid, tmp, &new_hash_contents);
|
2004-10-12 02:44:47 +00:00
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
assert(deleteFromBucket(xid, hashRid, next_split, next_split_contents, e+1, keySize,/* valSize, headerKeySize,*/ &oldEntry));
|
|
|
|
insertIntoBucket(xid, hashRid, new_hash, new_hash_contents, e, keySize, valSize, /*headerKeySize, headerValSize, */oldEntry, 1);
|
|
|
|
} else {
|
2004-10-12 02:44:47 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
free(e);
|
|
|
|
}
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
void insertIntoBucket(int xid, recordid hashRid, int bucket_number, recordid bucket_contents, hashEntry * e, int keySize, int valSize, recordid newEntry, int skipDelete) {
|
2004-10-12 02:44:47 +00:00
|
|
|
recordid deleteMe;
|
2004-10-17 02:17:00 +00:00
|
|
|
if(!skipDelete) {
|
|
|
|
if(deleteFromBucket(xid, hashRid, bucket_number, bucket_contents, e+1, keySize, &deleteMe)) {
|
|
|
|
Tdealloc(xid, deleteMe);
|
|
|
|
hashRid.slot = bucket_number;
|
|
|
|
Tread(xid, hashRid, &bucket_contents);
|
|
|
|
hashRid.slot = 0;
|
|
|
|
}
|
2004-10-12 02:44:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*@todo consider recovery for insertIntoBucket. */
|
|
|
|
/* recordid newEntry = Talloc(xid, sizeof(hashEntry) + keySize + valSize); */
|
|
|
|
recordid bucket = hashRid;
|
|
|
|
bucket.slot = bucket_number;
|
2004-10-17 02:17:00 +00:00
|
|
|
/* Tread(xid, bucket, &(e->next)); */
|
|
|
|
e->next = bucket_contents;
|
2004-10-12 02:44:47 +00:00
|
|
|
Tset(xid, newEntry, e);
|
|
|
|
Tset(xid, bucket, &newEntry);
|
|
|
|
}
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
int deleteFromBucket(int xid, recordid hash, int bucket_number, recordid bucket_contents, void * key, int keySize, recordid * deletedEntry) {
|
2004-10-12 02:44:47 +00:00
|
|
|
hashEntry * e;
|
|
|
|
recordid bucket = hash;
|
|
|
|
bucket.slot = bucket_number;
|
|
|
|
recordid nextEntry;
|
2004-10-17 02:17:00 +00:00
|
|
|
nextEntry = bucket_contents;
|
|
|
|
/* Tread(xid, bucket, &nextEntry); */
|
2004-10-12 02:44:47 +00:00
|
|
|
if(nextEntry.size) {
|
|
|
|
e = calloc(1,nextEntry.size);
|
|
|
|
} else {
|
|
|
|
e = calloc(1,1);
|
|
|
|
}
|
|
|
|
int first = 1;
|
|
|
|
int found = 0;
|
|
|
|
recordid lastEntry;
|
|
|
|
while(nextEntry.size > 0) {
|
|
|
|
Tread(xid, nextEntry, e);
|
|
|
|
if(!memcmp(key, e+1, keySize)) {
|
|
|
|
if(first) {
|
|
|
|
assert(e->next.size < 1000);
|
|
|
|
Tset(xid, bucket, &(e->next));
|
|
|
|
} else {
|
|
|
|
recordid next = e->next;
|
|
|
|
Tread(xid, lastEntry, e);
|
|
|
|
assert(next.size < 1000);
|
|
|
|
e->next = next;
|
|
|
|
Tset(xid, lastEntry, e);
|
|
|
|
}
|
|
|
|
*deletedEntry = nextEntry;
|
|
|
|
/* Tdealloc(xid, nextEntry); */
|
|
|
|
found = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
lastEntry = nextEntry;
|
|
|
|
first = 0;
|
|
|
|
nextEntry = e->next;
|
|
|
|
}
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
|
|
|
recordid ThashAlloc(int xid, int keySize, int valSize) {
|
|
|
|
/* Want 16 buckets + 2 header rids, doubling on overflow. */
|
|
|
|
recordid rid = TarrayListAlloc(xid, 16 + 2, 2, sizeof(recordid));
|
|
|
|
TarrayListExtend(xid, rid, 32+2);
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
recordid headerRidA;
|
|
|
|
recordid * headerRidB = malloc (sizeof(recordid));
|
|
|
|
|
2004-10-12 02:44:47 +00:00
|
|
|
headerKeySize = keySize;
|
|
|
|
headerValSize = valSize;
|
|
|
|
|
|
|
|
headerNextSplit = INT_MAX;
|
|
|
|
headerHashBits = 4;
|
|
|
|
|
|
|
|
rid.slot =0;
|
|
|
|
Tset(xid, rid, &headerRidA);
|
|
|
|
rid.slot =1;
|
2004-10-17 02:17:00 +00:00
|
|
|
Tset(xid, rid, headerRidB);
|
|
|
|
|
|
|
|
pblHtInsert(openHashes, &rid.page, sizeof(int), headerRidB);
|
|
|
|
|
2004-10-12 02:44:47 +00:00
|
|
|
rid.slot =0;
|
|
|
|
return rid;
|
|
|
|
}
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
void ThashInit() {
|
|
|
|
openHashes = pblHtCreate();
|
|
|
|
}
|
|
|
|
|
|
|
|
void ThashDeinit() {
|
|
|
|
pblHtDelete(openHashes);
|
|
|
|
}
|
|
|
|
|
2004-10-12 02:44:47 +00:00
|
|
|
void ThashInsert(int xid, recordid hashRid,
|
|
|
|
void * key, int keySize,
|
|
|
|
void * val, int valSize) {
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
/* recordid headerRidA; */
|
|
|
|
recordid * headerRidB = pblHtLookup(openHashes, &hashRid.page, sizeof(int));
|
|
|
|
|
|
|
|
/* recordid tmp = hashRid; */
|
|
|
|
/* tmp.slot = 0;
|
2004-10-12 02:44:47 +00:00
|
|
|
Tread(xid, tmp, &headerRidA);
|
|
|
|
assert(headerKeySize == keySize);
|
2004-10-17 02:17:00 +00:00
|
|
|
tmp.slot = 1; */
|
|
|
|
/* Tread(xid, tmp, &headerRidB); */
|
|
|
|
/* assert(headerValSize == valSize); */
|
2004-10-12 02:44:47 +00:00
|
|
|
|
|
|
|
int bucket = hash(key, keySize, headerHashBits, headerNextSplit - 2) + 2;
|
|
|
|
|
|
|
|
hashEntry * e = calloc(1,sizeof(hashEntry) + keySize + valSize);
|
|
|
|
memcpy(e+1, key, keySize);
|
|
|
|
memcpy(((byte*)(e+1)) + keySize, val, valSize);
|
|
|
|
|
|
|
|
recordid newEntry = Talloc(xid, sizeof(hashEntry) + keySize + valSize);
|
|
|
|
/* printf("%d -> %d\n", *(int*)(e+1), bucket); */
|
2004-10-17 02:17:00 +00:00
|
|
|
recordid bucket_contents;
|
|
|
|
hashRid.slot = bucket;
|
|
|
|
Tread(xid, hashRid, &bucket_contents);
|
|
|
|
hashRid.slot = 0;
|
|
|
|
insertIntoBucket(xid, hashRid, bucket, bucket_contents, e, keySize, valSize, newEntry, 0);
|
|
|
|
expand(xid, hashRid, headerNextSplit, headerHashBits, keySize, valSize);
|
2004-10-12 02:44:47 +00:00
|
|
|
|
|
|
|
free(e);
|
|
|
|
|
|
|
|
}
|
|
|
|
/** @todo hash hable probably should track the number of items in it,
|
|
|
|
so that expand can be selectively called. */
|
|
|
|
void ThashDelete(int xid, recordid hashRid,
|
|
|
|
void * key, int keySize) {
|
2004-10-17 02:17:00 +00:00
|
|
|
recordid * headerRidB = pblHtLookup(openHashes, &hashRid.page, sizeof(int));
|
2004-10-12 02:44:47 +00:00
|
|
|
recordid tmp = hashRid;
|
|
|
|
tmp.slot = 1;
|
2004-10-17 02:17:00 +00:00
|
|
|
/* Tread(xid, tmp, headerRidB); */
|
2004-10-12 02:44:47 +00:00
|
|
|
int bucket_number = hash(key, keySize, headerHashBits, headerNextSplit - 2) + 2;
|
|
|
|
recordid deleteMe;
|
2004-10-17 02:17:00 +00:00
|
|
|
hashRid.slot = bucket_number;
|
|
|
|
recordid bucket_contents;
|
|
|
|
Tread(xid, hashRid, &bucket_contents);
|
|
|
|
hashRid.slot = 0;
|
|
|
|
if(deleteFromBucket(xid, hashRid, bucket_number, bucket_contents, key, keySize, &deleteMe)) {
|
2004-10-12 02:44:47 +00:00
|
|
|
Tdealloc(xid, deleteMe);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-10-17 02:17:00 +00:00
|
|
|
int ThashOpen(int xid, recordid hashRid) {
|
|
|
|
recordid * headerRidB = malloc(sizeof(recordid));
|
|
|
|
hashRid.slot = 1;
|
|
|
|
Tread(xid, hashRid, headerRidB);
|
|
|
|
|
|
|
|
pblHtInsert(openHashes, &hashRid.page, sizeof(int), headerRidB);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-10-12 02:44:47 +00:00
|
|
|
void ThashUpdate(int xid, recordid hashRid, void * key, int keySize, void * val, int valSize) {
|
|
|
|
ThashDelete(xid, hashRid, key, keySize);
|
|
|
|
ThashInsert(xid, hashRid, key, keySize, val, valSize);
|
2004-10-17 02:17:00 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int ThashClose(int xid, recordid hashRid) {
|
|
|
|
recordid * freeMe = pblHtLookup(openHashes, &hashRid.page, sizeof(int));
|
|
|
|
pblHtRemove(openHashes, &hashRid.page, sizeof(int));
|
|
|
|
free(freeMe);
|
|
|
|
return 0;
|
2004-10-12 02:44:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int ThashLookup(int xid, recordid hashRid, void * key, int keySize, void * buf, int valSize) {
|
2004-10-17 02:17:00 +00:00
|
|
|
/* recordid headerRidB; */
|
|
|
|
recordid * headerRidB = pblHtLookup(openHashes, &hashRid.page, sizeof(int));
|
2004-10-12 02:44:47 +00:00
|
|
|
recordid tmp = hashRid;
|
|
|
|
tmp.slot = 1;
|
|
|
|
int bucket_number = hash(key, keySize, headerHashBits, headerNextSplit - 2) + 2;
|
|
|
|
/* printf("look in %d\n", bucket_number); */
|
|
|
|
int ret = findInBucket(xid, hashRid, bucket_number, key, keySize, buf, valSize);
|
|
|
|
return ret;
|
|
|
|
}
|