Working, but slow and non-concurrent variable length key/value support for linear hashtable.

This commit is contained in:
Sears Russell 2005-01-28 21:28:23 +00:00
parent 17f76264aa
commit 56d98adcbf
7 changed files with 234 additions and 51 deletions

View file

@ -163,8 +163,9 @@ typedef struct {
#include "operations/naiveLinearHash.h"
#include "operations/nestedTopActions.h"
#include "operations/linkedListNTA.h"
#include "operations/linearHashNTA.h"
#include "operations/pageOrientedListNTA.h"
#include "operations/linearHashNTA.h"
extern Operation operationsTable[]; /* [MAX_OPERATIONS]; memset somewhere */

View file

@ -22,6 +22,14 @@
#ifndef __LINEAR_HASH_NTA_H
#define __LINEAR_HASH_NTA_H
/** Currently, only used in the type field of the iterators. */
#define FIXED_LENGTH_HASH 0
#define VARIABLE_LENGTH_HASH 1
/** Pass this into the keySize and/or valueSize parameter of the
constructor below if the hashtable should support variable length
keys and/or values, respectively. */
#define VARIABLE_LENGTH -1
typedef struct {
recordid hashHeader;
@ -30,6 +38,7 @@ typedef struct {
int keySize;
int valueSize;
lladd_linkedList_iterator * it;
lladd_pagedList_iterator * pit;
} lladd_hash_iterator;
recordid ThashCreate(int xid, int keySize, int valSize);

View file

@ -84,6 +84,7 @@ lladd_pagedList_iterator * TpagedListIterator(int xid, recordid list);
int TpagedListNext(int xid, lladd_pagedList_iterator * it, byte ** key, int * keySize, byte ** value, int * valueSize);
recordid TpagedListAlloc(int xid);
void TpagedListDelete(int xid, recordid list);
int TpagedListSpansPages(int xid, recordid list);
Operation getPagedListInsert();
Operation getPagedListRemove();
#endif

View file

@ -37,16 +37,28 @@ static void ThashSplitBucket(int xid, recordid hashHeader, lladd_hash_header * l
recordid ThashCreate(int xid, int keySize, int valueSize) {
recordid hashHeader = Talloc(xid, sizeof(lladd_hash_header));
lladd_hash_header lhh;
lhh.buckets = TarrayListAlloc(xid, HASH_INIT_ARRAY_LIST_COUNT, HASH_INIT_ARRAY_LIST_MULT, sizeof(lladd_linkedList_entry) + keySize + valueSize);
if(keySize == VARIABLE_LENGTH || valueSize == VARIABLE_LENGTH) {
lhh.buckets = TarrayListAlloc(xid, HASH_INIT_ARRAY_LIST_COUNT, HASH_INIT_ARRAY_LIST_MULT, sizeof(long));
} else {
lhh.buckets = TarrayListAlloc(xid, HASH_INIT_ARRAY_LIST_COUNT, HASH_INIT_ARRAY_LIST_MULT, sizeof(lladd_linkedList_entry) + keySize + valueSize);
}
TarrayListExtend(xid, lhh.buckets, HASH_INIT_ARRAY_LIST_COUNT);
int i;
byte * entry = calloc(1, lhh.buckets.size);
recordid bucket = lhh.buckets;
for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) {
bucket.slot = i;
Tset(xid, bucket, entry);
if(keySize == VARIABLE_LENGTH || valueSize == VARIABLE_LENGTH) {
for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) {
recordid rid = TpagedListAlloc(xid);
bucket.slot = i;
Tset(xid, bucket, &(rid.page));
}
} else {
byte * entry = calloc(1, lhh.buckets.size);
for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) {
bucket.slot = i;
Tset(xid, bucket, entry);
}
free (entry);
}
free (entry);
lhh.keySize = keySize;
lhh.valueSize = valueSize;
lhh.nextSplit = 0;
@ -139,16 +151,37 @@ static int __ThashInsert(int xid, recordid hashHeader, const byte* key, int keyS
Tread(xid, hashHeader, &lhh);
lhh.numEntries ++;
if(lhh.numEntries > (int)((double)(lhh.nextSplit + twoToThe(lhh.bits-1)) * HASH_FILL_FACTOR)) {
ThashSplitBucket(xid, hashHeader, &lhh);
if(lhh.keySize == VARIABLE_LENGTH || lhh.valueSize == VARIABLE_LENGTH) {
/* if(lhh.numEntries > (int)((double)(lhh.nextSplit + twoToThe(lhh.bits-1)) * (HASH_FILL_FACTOR * 40))) {
ThashSplitBucket(xid, hashHeader, &lhh);
} */
} else {
if(lhh.numEntries > (int)((double)(lhh.nextSplit + twoToThe(lhh.bits-1)) * HASH_FILL_FACTOR)) {
ThashSplitBucket(xid, hashHeader, &lhh);
}
}
assert(lhh.keySize == keySize); assert(lhh.valueSize == valueSize);
recordid bucket = lhh.buckets;
bucket.slot = hash(key, keySize, lhh.bits, lhh.nextSplit);
int ret = TlinkedListInsert(xid, bucket, key, keySize, value, valueSize);
int ret;
if(lhh.keySize == VARIABLE_LENGTH || lhh.valueSize == VARIABLE_LENGTH) {
recordid bucketList;
Tread(xid, bucket, &(bucketList.page));
bucketList.slot = 0;
bucketList.size = 0;
// int before = TpagedListSpansPages(xid, bucketList);
ret = TpagedListInsert(xid, bucketList, key, keySize, value, valueSize);
int after = TpagedListSpansPages(xid, bucketList);
if(after) { // Page overflowed...
ThashSplitBucket(xid, hashHeader, &lhh);
}
} else {
assert(lhh.keySize == keySize); assert(lhh.valueSize == valueSize);
ret = TlinkedListInsert(xid, bucket, key, keySize, value, valueSize);
}
if(ret) { lhh.numEntries--; }
Tset(xid, hashHeader, &lhh);
@ -189,12 +222,21 @@ static int __ThashRemove(int xid, recordid hashHeader, const byte * key, int key
Tset(xid, hashHeader, &lhh);
assert(lhh.keySize == keySize);
recordid bucket = lhh.buckets;
bucket.slot = hash(key, keySize, lhh.bits, lhh.nextSplit);
int ret = TlinkedListRemove(xid, bucket, key, keySize);
int ret;
if(lhh.keySize == VARIABLE_LENGTH || lhh.valueSize == VARIABLE_LENGTH) {
recordid bucketList;
Tread(xid, bucket, &(bucketList.page));
bucketList.slot = 0;
bucketList.size = 0;
ret = TpagedListRemove(xid, bucketList, key, keySize);
} else {
assert(lhh.keySize == keySize);
ret = TlinkedListRemove(xid, bucket, key, keySize);
}
return ret;
}
@ -203,16 +245,26 @@ int ThashLookup(int xid, recordid hashHeader, const byte * key, int keySize, byt
pthread_mutex_lock(&linear_hash_mutex);
Tread(xid, hashHeader, &lhh);
assert(lhh.keySize == keySize);
recordid bucket = lhh.buckets;
bucket.slot = hash(key, keySize, lhh.bits, lhh.nextSplit);
int ret = TlinkedListFind(xid, bucket, key, keySize, value);
int ret;
if(lhh.keySize == VARIABLE_LENGTH || lhh.valueSize == VARIABLE_LENGTH) {
recordid bucketList;
Tread(xid, bucket, &(bucketList.page));
bucketList.slot = 0;
bucketList.size = 0;
ret = TpagedListFind(xid, bucketList, key, keySize, value);
} else {
assert(lhh.keySize == keySize);
ret = TlinkedListFind(xid, bucket, key, keySize, value);
}
pthread_mutex_unlock(&linear_hash_mutex);
return ret;
}
static void ThashSplitBucket(int xid, recordid hashHeader, lladd_hash_header * lhh) {
// if(1) { return; }
long old_bucket = lhh->nextSplit;
long new_bucket = old_bucket + twoToThe(lhh->bits-1);
recordid old_bucket_rid = lhh->buckets;
@ -221,27 +273,54 @@ static void ThashSplitBucket(int xid, recordid hashHeader, lladd_hash_header * l
new_bucket_rid.slot = new_bucket;
// void * handle = TbeginNestedTopAction(xid, OPERATION_NOOP, NULL, 0);
TarrayListExtend(xid, lhh->buckets, 1);
byte * entry = calloc(1, lhh->buckets.size);
Tset(xid, new_bucket_rid, entry);
free(entry);
recordid new_bucket_list; // will be uninitialized if we have fixed length entries.
if(lhh->keySize == VARIABLE_LENGTH || lhh->valueSize == VARIABLE_LENGTH) {
new_bucket_list = TpagedListAlloc(xid);
Tset(xid, new_bucket_rid, &(new_bucket_list.page));
} else {
byte * entry = calloc(1, lhh->buckets.size);
Tset(xid, new_bucket_rid, entry);
free(entry);
}
if(lhh->nextSplit < twoToThe(lhh->bits-1)-1) {
lhh->nextSplit++;
} else {
lhh->nextSplit = 0;
lhh->bits++;
}
lladd_linkedList_iterator * it = TlinkedListIterator(xid, old_bucket_rid, lhh->keySize, lhh->valueSize);
byte * key, *value;
int keySize, valueSize;
while(TlinkedListNext(xid, it, &key, &keySize, &value, &valueSize)) {
assert(valueSize == lhh->valueSize);
assert(keySize == lhh->keySize);
if(hash(key, keySize, lhh->bits, lhh->nextSplit) != old_bucket) {
TlinkedListRemove(xid, old_bucket_rid, key, keySize);
TlinkedListInsert(xid, new_bucket_rid, key, keySize, value, valueSize);
/** @todo linearHashNTA's split bucket should use the 'move' function call. */
if(lhh->keySize == VARIABLE_LENGTH || lhh->valueSize == VARIABLE_LENGTH) {
recordid old_bucket_list;
// recordid new_bucket_list;
Tread(xid, old_bucket_rid, &(old_bucket_list.page));
old_bucket_list.slot = 0;
old_bucket_list.size = 0;
// Tread(xid, new_bucket_rid, &(new_bucket_list.page)); // @todo could remember value from above.
lladd_pagedList_iterator * pit = TpagedListIterator(xid, old_bucket_list);
byte *key, *value;
int keySize, valueSize;
while(TpagedListNext(xid, pit, &key, &keySize, &value, &valueSize)) {
if(hash(key, keySize, lhh->bits, lhh->nextSplit) != old_bucket) {
TpagedListRemove(xid, old_bucket_list, key, keySize);
TpagedListInsert(xid, new_bucket_list, key, keySize, value, valueSize);
}
free(key);
free(value);
}
} else {
lladd_linkedList_iterator * it = TlinkedListIterator(xid, old_bucket_rid, lhh->keySize, lhh->valueSize);
byte * key, *value;
int keySize, valueSize;
while(TlinkedListNext(xid, it, &key, &keySize, &value, &valueSize)) {
assert(valueSize == lhh->valueSize);
assert(keySize == lhh->keySize);
if(hash(key, keySize, lhh->bits, lhh->nextSplit) != old_bucket) {
TlinkedListRemove(xid, old_bucket_rid, key, keySize);
TlinkedListInsert(xid, new_bucket_rid, key, keySize, value, valueSize);
}
free(key);
free(value);
}
free(key);
free(value);
}
// TendNestedTopAction(xid, handle);
return;
@ -256,19 +335,42 @@ lladd_hash_iterator * ThashIterator(int xid, recordid hashHeader, int keySize, i
it->bucket.slot = 0;
it->keySize = keySize;
it->valueSize = valueSize;
it->it = TlinkedListIterator(xid, it->bucket, it->keySize, it->valueSize);
if(keySize == VARIABLE_LENGTH || valueSize == VARIABLE_LENGTH) {
it->it = NULL;
it->pit= TpagedListIterator(xid, it->bucket);
} else {
it->pit = NULL;
it->it = TlinkedListIterator(xid, it->bucket, it->keySize, it->valueSize);
}
return it;
}
int ThashNext(int xid, lladd_hash_iterator * it, byte ** key, int * keySize, byte** value, int * valueSize) {
while(!TlinkedListNext(xid, it->it, key, keySize, value, valueSize)) {
it->bucket.slot++;
if(it->bucket.slot < it->numBuckets) {
it->it = TlinkedListIterator(xid, it->bucket, it->keySize, it->valueSize);
} else {
free(it);
return 0;
if(it->it) {
assert(!it->pit);
while(!TlinkedListNext(xid, it->it, key, keySize, value, valueSize)) {
it->bucket.slot++;
if(it->bucket.slot < it->numBuckets) {
it->it = TlinkedListIterator(xid, it->bucket, it->keySize, it->valueSize);
} else {
free(it);
return 0;
}
}
} else {
assert(it->pit);
while(!TpagedListNext(xid, it->pit, key, keySize, value, valueSize)) {
it->bucket.slot++;
if(it->bucket.slot < it->numBuckets) {
recordid bucketList;
Tread(xid, it->bucket, &(bucketList.page));
bucketList.slot =0;
bucketList.size =0;
it->pit = TpagedListIterator(xid, bucketList);
} else {
free(it);
return 0;
}
}
}
return 1;

View file

@ -33,17 +33,25 @@ recordid TpagedListAlloc(int xid) {
return list;
}
int TpagedListSpansPages(int xid, recordid list) {
// TpagedListCompact(int xid, recordid list);
list.slot = 0;
list.size = sizeof(long);
long nextPage;
Tread(xid, list, &nextPage);
return nextPage != 0;
}
int TpagedListInsert(int xid, recordid list, const byte * key, int keySize, const byte * value, int valueSize) {
int ret = 0;
// if find in list, return 1
byte * val;
if(keySize == TpagedListFind(xid, list, key, keySize, &val)) {
if(-1 != TpagedListFind(xid, list, key, keySize, &val)) {
free(val);
ret = 1;
int removed = TpagedListRemove(xid, list, key, keySize);
assert(removed);
// delete from list
}
Page * p = loadPage(list.page);
int recordSize = (sizeof(short)+keySize+valueSize);
@ -64,13 +72,13 @@ int TpagedListInsert(int xid, recordid list, const byte * key, int keySize, cons
nextPage = TpageAlloc(xid);
Tset(xid, list, &nextPage);
p = loadPage(nextPage);
// slottedPageInitialize(p);
//slottedPageInitialize(p);
// ** @todo shouldn't a log entry be generated here?? */
list.page = nextPage;
assert(slottedFreespace(p) >= recordSize);
long zero = 0;
TallocFromPage(xid, list.page, sizeof(long));
Tset(xid, list, &zero);
recordid rid = TallocFromPage(xid, list.page, sizeof(long));
Tset(xid, rid, &zero);
} else {
releasePage(p);
list.page = nextPage;
@ -83,7 +91,7 @@ int TpagedListInsert(int xid, recordid list, const byte * key, int keySize, cons
}
releasePage(p);
// printf("recordsize = %d\n", recordSize);
recordid rid = TallocFromPage(xid, list.page, recordSize); // Allocates a record at a location given by the caller
short* record = malloc(recordSize);
*record = keySize;
@ -135,7 +143,7 @@ int TpagedListFind(int xid, recordid list, const byte * key, int keySize, byte *
list.page = nextPage;
}
return 0;
return -1;
}
int TpagedListRemove(int xid, recordid list, const byte * key, int keySize) {
long nextPage = 1;
@ -158,7 +166,7 @@ int TpagedListRemove(int xid, recordid list, const byte * key, int keySize) {
if(*dat == keySize && !memcmp(dat+1, key, keySize)) {
Tdealloc(xid, entry);
assert(-1 == TrecordSize(xid, entry));
free(dat);
return 1;
}
@ -179,7 +187,7 @@ int TpagedListRemove(int xid, recordid list, const byte * key, int keySize) {
int TpagedListMove(int xid, recordid start_list, recordid end_list, const byte *key, int keySize) {
byte * value;
int valueSize = TpagedListFind(xid, start_list, key, keySize, &value);
if(valueSize) {
if(valueSize != -1) {
int ret = TpagedListRemove(xid, start_list, key, keySize);
assert(ret);
ret = TpagedListInsert(xid, end_list, key, keySize, value, valueSize);

View file

@ -322,7 +322,7 @@ int getRecordTypeUnlocked(int xid, Page * p, recordid rid) {
return UNINITIALIZED_RECORD;
} else if(page_type == SLOTTED_PAGE) {
if(*numslots_ptr(p) <= rid.slot || *slot_length_ptr(p, rid.slot) == INVALID_SLOT) {
if(*numslots_ptr(p) <= rid.slot || *slot_ptr(p, rid.slot) == INVALID_SLOT /*|| *slot_length_ptr(p, rid.slot) == INVALID_SLOT*/) {
return UNINITIALIZED_PAGE;
} else if(*slot_length_ptr(p, rid.slot) == BLOB_REC_SIZE) {
return BLOB_RECORD;

View file

@ -110,6 +110,68 @@ START_TEST(linearHashNTAtest)
Tcommit(xid);
Tdeinit();
} END_TEST
/** @test
*/
START_TEST(linearHashNTAVariableSizetest)
{
Tinit();
int xid = Tbegin();
recordid val;
recordid hashHeader = ThashCreate(xid, VARIABLE_LENGTH, VARIABLE_LENGTH);
recordid * val2;
int i;
printf("\n"); fflush(stdout);
for(i = 0; i < NUM_ENTRIES; i++) {
if(!(i % (NUM_ENTRIES/10))) {
printf("."); fflush(stdout);
}
val.page = i * NUM_ENTRIES;
val.slot = val.page * NUM_ENTRIES;
val.size = val.slot * NUM_ENTRIES;
assert(-1 == ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2));
ThashInsert(xid, hashHeader, (byte*)&i, sizeof(int), (byte*)&val, sizeof(recordid));
assert(sizeof(recordid) == ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2));
assert(val2->page == i * NUM_ENTRIES);
assert(val2->slot == val2->page * NUM_ENTRIES);
assert(val2->size == val2->slot * NUM_ENTRIES);
free(val2);
}
Tcommit(xid);
printf("\n"); fflush(stdout);
xid = Tbegin();
for(i = 0; i < NUM_ENTRIES; i+=10){
if(!(i % (NUM_ENTRIES/10))) {
printf("-"); fflush(stdout);
}
assert(sizeof(recordid) == ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2));
free(val2);
assert(ThashRemove(xid, hashHeader, (byte*)&i, sizeof(int)));
assert(-1==ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2));
assert(!ThashRemove(xid, hashHeader, (byte*)&i, sizeof(int)));
}
printf("\nabort()\n"); fflush(stdout);
Tabort(xid);
xid = Tbegin();
for(i = 0; i < NUM_ENTRIES; i++) {
if(!(i % (NUM_ENTRIES/10))) {
printf("+"); fflush(stdout);
}
assert(sizeof(recordid) == ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2));
assert(val2->page == i * NUM_ENTRIES);
assert(val2->slot == val2->page * NUM_ENTRIES);
assert(val2->size == val2->slot * NUM_ENTRIES);
free(val2);
}
Tcommit(xid);
Tdeinit();
} END_TEST
#define NUM_THREADS 100
#define NUM_T_ENTRIES 1000
typedef struct {
@ -235,7 +297,7 @@ Suite * check_suite(void) {
/* Sub tests are added, one per line, here */
tcase_add_test(tc, linearHashNTAVariableSizetest);
tcase_add_test(tc, linearHashNTAIteratortest);
tcase_add_test(tc, linearHashNTAtest);
tcase_add_test(tc, linearHashNTAThreadedTest);