From 56d98adcbff9c605496806bf67a6916ab02d152b Mon Sep 17 00:00:00 2001 From: Sears Russell Date: Fri, 28 Jan 2005 21:28:23 +0000 Subject: [PATCH] Working, but slow and non-concurrent variable length key/value support for linear hashtable. --- lladd/operations.h | 3 +- lladd/operations/linearHashNTA.h | 9 ++ lladd/operations/pageOrientedListNTA.h | 1 + src/lladd/operations/linearHashNTA.c | 178 ++++++++++++++++----- src/lladd/operations/pageOrientedListNTA.c | 28 ++-- src/lladd/page.c | 2 +- test/lladd/check_linearHashNTA.c | 64 +++++++- 7 files changed, 234 insertions(+), 51 deletions(-) diff --git a/lladd/operations.h b/lladd/operations.h index 93b433d..5ad282d 100644 --- a/lladd/operations.h +++ b/lladd/operations.h @@ -163,8 +163,9 @@ typedef struct { #include "operations/naiveLinearHash.h" #include "operations/nestedTopActions.h" #include "operations/linkedListNTA.h" -#include "operations/linearHashNTA.h" #include "operations/pageOrientedListNTA.h" +#include "operations/linearHashNTA.h" + extern Operation operationsTable[]; /* [MAX_OPERATIONS]; memset somewhere */ diff --git a/lladd/operations/linearHashNTA.h b/lladd/operations/linearHashNTA.h index c6a06ff..523f7c8 100644 --- a/lladd/operations/linearHashNTA.h +++ b/lladd/operations/linearHashNTA.h @@ -22,6 +22,14 @@ #ifndef __LINEAR_HASH_NTA_H #define __LINEAR_HASH_NTA_H +/** Currently, only used in the type field of the iterators. */ +#define FIXED_LENGTH_HASH 0 +#define VARIABLE_LENGTH_HASH 1 + +/** Pass this into the keySize and/or valueSize parameter of the + constructor below if the hashtable should support variable length + keys and/or values, respectively. */ +#define VARIABLE_LENGTH -1 typedef struct { recordid hashHeader; @@ -30,6 +38,7 @@ typedef struct { int keySize; int valueSize; lladd_linkedList_iterator * it; + lladd_pagedList_iterator * pit; } lladd_hash_iterator; recordid ThashCreate(int xid, int keySize, int valSize); diff --git a/lladd/operations/pageOrientedListNTA.h b/lladd/operations/pageOrientedListNTA.h index acb29bd..5ff6530 100644 --- a/lladd/operations/pageOrientedListNTA.h +++ b/lladd/operations/pageOrientedListNTA.h @@ -84,6 +84,7 @@ lladd_pagedList_iterator * TpagedListIterator(int xid, recordid list); int TpagedListNext(int xid, lladd_pagedList_iterator * it, byte ** key, int * keySize, byte ** value, int * valueSize); recordid TpagedListAlloc(int xid); void TpagedListDelete(int xid, recordid list); +int TpagedListSpansPages(int xid, recordid list); Operation getPagedListInsert(); Operation getPagedListRemove(); #endif diff --git a/src/lladd/operations/linearHashNTA.c b/src/lladd/operations/linearHashNTA.c index 3631482..2f554f2 100644 --- a/src/lladd/operations/linearHashNTA.c +++ b/src/lladd/operations/linearHashNTA.c @@ -37,16 +37,28 @@ static void ThashSplitBucket(int xid, recordid hashHeader, lladd_hash_header * l recordid ThashCreate(int xid, int keySize, int valueSize) { recordid hashHeader = Talloc(xid, sizeof(lladd_hash_header)); lladd_hash_header lhh; - lhh.buckets = TarrayListAlloc(xid, HASH_INIT_ARRAY_LIST_COUNT, HASH_INIT_ARRAY_LIST_MULT, sizeof(lladd_linkedList_entry) + keySize + valueSize); + if(keySize == VARIABLE_LENGTH || valueSize == VARIABLE_LENGTH) { + lhh.buckets = TarrayListAlloc(xid, HASH_INIT_ARRAY_LIST_COUNT, HASH_INIT_ARRAY_LIST_MULT, sizeof(long)); + } else { + lhh.buckets = TarrayListAlloc(xid, HASH_INIT_ARRAY_LIST_COUNT, HASH_INIT_ARRAY_LIST_MULT, sizeof(lladd_linkedList_entry) + keySize + valueSize); + } TarrayListExtend(xid, lhh.buckets, HASH_INIT_ARRAY_LIST_COUNT); int i; - byte * entry = calloc(1, lhh.buckets.size); recordid bucket = lhh.buckets; - for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) { - bucket.slot = i; - Tset(xid, bucket, entry); + if(keySize == VARIABLE_LENGTH || valueSize == VARIABLE_LENGTH) { + for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) { + recordid rid = TpagedListAlloc(xid); + bucket.slot = i; + Tset(xid, bucket, &(rid.page)); + } + } else { + byte * entry = calloc(1, lhh.buckets.size); + for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) { + bucket.slot = i; + Tset(xid, bucket, entry); + } + free (entry); } - free (entry); lhh.keySize = keySize; lhh.valueSize = valueSize; lhh.nextSplit = 0; @@ -139,16 +151,37 @@ static int __ThashInsert(int xid, recordid hashHeader, const byte* key, int keyS Tread(xid, hashHeader, &lhh); lhh.numEntries ++; - - if(lhh.numEntries > (int)((double)(lhh.nextSplit + twoToThe(lhh.bits-1)) * HASH_FILL_FACTOR)) { - ThashSplitBucket(xid, hashHeader, &lhh); + if(lhh.keySize == VARIABLE_LENGTH || lhh.valueSize == VARIABLE_LENGTH) { + /* if(lhh.numEntries > (int)((double)(lhh.nextSplit + twoToThe(lhh.bits-1)) * (HASH_FILL_FACTOR * 40))) { + ThashSplitBucket(xid, hashHeader, &lhh); + } */ + } else { + if(lhh.numEntries > (int)((double)(lhh.nextSplit + twoToThe(lhh.bits-1)) * HASH_FILL_FACTOR)) { + ThashSplitBucket(xid, hashHeader, &lhh); + } } - assert(lhh.keySize == keySize); assert(lhh.valueSize == valueSize); recordid bucket = lhh.buckets; bucket.slot = hash(key, keySize, lhh.bits, lhh.nextSplit); - int ret = TlinkedListInsert(xid, bucket, key, keySize, value, valueSize); + int ret; + if(lhh.keySize == VARIABLE_LENGTH || lhh.valueSize == VARIABLE_LENGTH) { + + recordid bucketList; + Tread(xid, bucket, &(bucketList.page)); + bucketList.slot = 0; + bucketList.size = 0; + // int before = TpagedListSpansPages(xid, bucketList); + ret = TpagedListInsert(xid, bucketList, key, keySize, value, valueSize); + int after = TpagedListSpansPages(xid, bucketList); + if(after) { // Page overflowed... + ThashSplitBucket(xid, hashHeader, &lhh); + } + + } else { + assert(lhh.keySize == keySize); assert(lhh.valueSize == valueSize); + ret = TlinkedListInsert(xid, bucket, key, keySize, value, valueSize); + } if(ret) { lhh.numEntries--; } Tset(xid, hashHeader, &lhh); @@ -189,12 +222,21 @@ static int __ThashRemove(int xid, recordid hashHeader, const byte * key, int key Tset(xid, hashHeader, &lhh); - assert(lhh.keySize == keySize); recordid bucket = lhh.buckets; bucket.slot = hash(key, keySize, lhh.bits, lhh.nextSplit); - int ret = TlinkedListRemove(xid, bucket, key, keySize); + int ret; + if(lhh.keySize == VARIABLE_LENGTH || lhh.valueSize == VARIABLE_LENGTH) { + recordid bucketList; + Tread(xid, bucket, &(bucketList.page)); + bucketList.slot = 0; + bucketList.size = 0; + ret = TpagedListRemove(xid, bucketList, key, keySize); + } else { + assert(lhh.keySize == keySize); + ret = TlinkedListRemove(xid, bucket, key, keySize); + } return ret; } @@ -203,16 +245,26 @@ int ThashLookup(int xid, recordid hashHeader, const byte * key, int keySize, byt pthread_mutex_lock(&linear_hash_mutex); Tread(xid, hashHeader, &lhh); - assert(lhh.keySize == keySize); recordid bucket = lhh.buckets; bucket.slot = hash(key, keySize, lhh.bits, lhh.nextSplit); - int ret = TlinkedListFind(xid, bucket, key, keySize, value); + int ret; + if(lhh.keySize == VARIABLE_LENGTH || lhh.valueSize == VARIABLE_LENGTH) { + recordid bucketList; + Tread(xid, bucket, &(bucketList.page)); + bucketList.slot = 0; + bucketList.size = 0; + ret = TpagedListFind(xid, bucketList, key, keySize, value); + } else { + assert(lhh.keySize == keySize); + ret = TlinkedListFind(xid, bucket, key, keySize, value); + } pthread_mutex_unlock(&linear_hash_mutex); return ret; } static void ThashSplitBucket(int xid, recordid hashHeader, lladd_hash_header * lhh) { + // if(1) { return; } long old_bucket = lhh->nextSplit; long new_bucket = old_bucket + twoToThe(lhh->bits-1); recordid old_bucket_rid = lhh->buckets; @@ -221,27 +273,54 @@ static void ThashSplitBucket(int xid, recordid hashHeader, lladd_hash_header * l new_bucket_rid.slot = new_bucket; // void * handle = TbeginNestedTopAction(xid, OPERATION_NOOP, NULL, 0); TarrayListExtend(xid, lhh->buckets, 1); - byte * entry = calloc(1, lhh->buckets.size); - Tset(xid, new_bucket_rid, entry); - free(entry); + recordid new_bucket_list; // will be uninitialized if we have fixed length entries. + if(lhh->keySize == VARIABLE_LENGTH || lhh->valueSize == VARIABLE_LENGTH) { + new_bucket_list = TpagedListAlloc(xid); + Tset(xid, new_bucket_rid, &(new_bucket_list.page)); + } else { + byte * entry = calloc(1, lhh->buckets.size); + Tset(xid, new_bucket_rid, entry); + free(entry); + } if(lhh->nextSplit < twoToThe(lhh->bits-1)-1) { lhh->nextSplit++; } else { lhh->nextSplit = 0; lhh->bits++; } - lladd_linkedList_iterator * it = TlinkedListIterator(xid, old_bucket_rid, lhh->keySize, lhh->valueSize); - byte * key, *value; - int keySize, valueSize; - while(TlinkedListNext(xid, it, &key, &keySize, &value, &valueSize)) { - assert(valueSize == lhh->valueSize); - assert(keySize == lhh->keySize); - if(hash(key, keySize, lhh->bits, lhh->nextSplit) != old_bucket) { - TlinkedListRemove(xid, old_bucket_rid, key, keySize); - TlinkedListInsert(xid, new_bucket_rid, key, keySize, value, valueSize); + /** @todo linearHashNTA's split bucket should use the 'move' function call. */ + if(lhh->keySize == VARIABLE_LENGTH || lhh->valueSize == VARIABLE_LENGTH) { + recordid old_bucket_list; + // recordid new_bucket_list; + Tread(xid, old_bucket_rid, &(old_bucket_list.page)); + old_bucket_list.slot = 0; + old_bucket_list.size = 0; + // Tread(xid, new_bucket_rid, &(new_bucket_list.page)); // @todo could remember value from above. + lladd_pagedList_iterator * pit = TpagedListIterator(xid, old_bucket_list); + byte *key, *value; + int keySize, valueSize; + while(TpagedListNext(xid, pit, &key, &keySize, &value, &valueSize)) { + if(hash(key, keySize, lhh->bits, lhh->nextSplit) != old_bucket) { + TpagedListRemove(xid, old_bucket_list, key, keySize); + TpagedListInsert(xid, new_bucket_list, key, keySize, value, valueSize); + } + free(key); + free(value); + } + } else { + lladd_linkedList_iterator * it = TlinkedListIterator(xid, old_bucket_rid, lhh->keySize, lhh->valueSize); + byte * key, *value; + int keySize, valueSize; + while(TlinkedListNext(xid, it, &key, &keySize, &value, &valueSize)) { + assert(valueSize == lhh->valueSize); + assert(keySize == lhh->keySize); + if(hash(key, keySize, lhh->bits, lhh->nextSplit) != old_bucket) { + TlinkedListRemove(xid, old_bucket_rid, key, keySize); + TlinkedListInsert(xid, new_bucket_rid, key, keySize, value, valueSize); + } + free(key); + free(value); } - free(key); - free(value); } // TendNestedTopAction(xid, handle); return; @@ -256,19 +335,42 @@ lladd_hash_iterator * ThashIterator(int xid, recordid hashHeader, int keySize, i it->bucket.slot = 0; it->keySize = keySize; it->valueSize = valueSize; - it->it = TlinkedListIterator(xid, it->bucket, it->keySize, it->valueSize); - + if(keySize == VARIABLE_LENGTH || valueSize == VARIABLE_LENGTH) { + it->it = NULL; + it->pit= TpagedListIterator(xid, it->bucket); + } else { + it->pit = NULL; + it->it = TlinkedListIterator(xid, it->bucket, it->keySize, it->valueSize); + } return it; } int ThashNext(int xid, lladd_hash_iterator * it, byte ** key, int * keySize, byte** value, int * valueSize) { - while(!TlinkedListNext(xid, it->it, key, keySize, value, valueSize)) { - it->bucket.slot++; - if(it->bucket.slot < it->numBuckets) { - it->it = TlinkedListIterator(xid, it->bucket, it->keySize, it->valueSize); - } else { - free(it); - return 0; + if(it->it) { + assert(!it->pit); + while(!TlinkedListNext(xid, it->it, key, keySize, value, valueSize)) { + it->bucket.slot++; + if(it->bucket.slot < it->numBuckets) { + it->it = TlinkedListIterator(xid, it->bucket, it->keySize, it->valueSize); + } else { + free(it); + return 0; + } + } + } else { + assert(it->pit); + while(!TpagedListNext(xid, it->pit, key, keySize, value, valueSize)) { + it->bucket.slot++; + if(it->bucket.slot < it->numBuckets) { + recordid bucketList; + Tread(xid, it->bucket, &(bucketList.page)); + bucketList.slot =0; + bucketList.size =0; + it->pit = TpagedListIterator(xid, bucketList); + } else { + free(it); + return 0; + } } } return 1; diff --git a/src/lladd/operations/pageOrientedListNTA.c b/src/lladd/operations/pageOrientedListNTA.c index c1050b0..f0eacff 100644 --- a/src/lladd/operations/pageOrientedListNTA.c +++ b/src/lladd/operations/pageOrientedListNTA.c @@ -33,17 +33,25 @@ recordid TpagedListAlloc(int xid) { return list; } +int TpagedListSpansPages(int xid, recordid list) { + // TpagedListCompact(int xid, recordid list); + + list.slot = 0; + list.size = sizeof(long); + long nextPage; + Tread(xid, list, &nextPage); + return nextPage != 0; +} + int TpagedListInsert(int xid, recordid list, const byte * key, int keySize, const byte * value, int valueSize) { int ret = 0; // if find in list, return 1 byte * val; - if(keySize == TpagedListFind(xid, list, key, keySize, &val)) { - + if(-1 != TpagedListFind(xid, list, key, keySize, &val)) { free(val); ret = 1; int removed = TpagedListRemove(xid, list, key, keySize); assert(removed); - // delete from list } Page * p = loadPage(list.page); int recordSize = (sizeof(short)+keySize+valueSize); @@ -64,13 +72,13 @@ int TpagedListInsert(int xid, recordid list, const byte * key, int keySize, cons nextPage = TpageAlloc(xid); Tset(xid, list, &nextPage); p = loadPage(nextPage); - // slottedPageInitialize(p); + //slottedPageInitialize(p); // ** @todo shouldn't a log entry be generated here?? */ list.page = nextPage; assert(slottedFreespace(p) >= recordSize); long zero = 0; - TallocFromPage(xid, list.page, sizeof(long)); - Tset(xid, list, &zero); + recordid rid = TallocFromPage(xid, list.page, sizeof(long)); + Tset(xid, rid, &zero); } else { releasePage(p); list.page = nextPage; @@ -83,7 +91,7 @@ int TpagedListInsert(int xid, recordid list, const byte * key, int keySize, cons } releasePage(p); - + // printf("recordsize = %d\n", recordSize); recordid rid = TallocFromPage(xid, list.page, recordSize); // Allocates a record at a location given by the caller short* record = malloc(recordSize); *record = keySize; @@ -135,7 +143,7 @@ int TpagedListFind(int xid, recordid list, const byte * key, int keySize, byte * list.page = nextPage; } - return 0; + return -1; } int TpagedListRemove(int xid, recordid list, const byte * key, int keySize) { long nextPage = 1; @@ -158,7 +166,7 @@ int TpagedListRemove(int xid, recordid list, const byte * key, int keySize) { if(*dat == keySize && !memcmp(dat+1, key, keySize)) { Tdealloc(xid, entry); - + assert(-1 == TrecordSize(xid, entry)); free(dat); return 1; } @@ -179,7 +187,7 @@ int TpagedListRemove(int xid, recordid list, const byte * key, int keySize) { int TpagedListMove(int xid, recordid start_list, recordid end_list, const byte *key, int keySize) { byte * value; int valueSize = TpagedListFind(xid, start_list, key, keySize, &value); - if(valueSize) { + if(valueSize != -1) { int ret = TpagedListRemove(xid, start_list, key, keySize); assert(ret); ret = TpagedListInsert(xid, end_list, key, keySize, value, valueSize); diff --git a/src/lladd/page.c b/src/lladd/page.c index 3bc8ee4..c3c7363 100644 --- a/src/lladd/page.c +++ b/src/lladd/page.c @@ -322,7 +322,7 @@ int getRecordTypeUnlocked(int xid, Page * p, recordid rid) { return UNINITIALIZED_RECORD; } else if(page_type == SLOTTED_PAGE) { - if(*numslots_ptr(p) <= rid.slot || *slot_length_ptr(p, rid.slot) == INVALID_SLOT) { + if(*numslots_ptr(p) <= rid.slot || *slot_ptr(p, rid.slot) == INVALID_SLOT /*|| *slot_length_ptr(p, rid.slot) == INVALID_SLOT*/) { return UNINITIALIZED_PAGE; } else if(*slot_length_ptr(p, rid.slot) == BLOB_REC_SIZE) { return BLOB_RECORD; diff --git a/test/lladd/check_linearHashNTA.c b/test/lladd/check_linearHashNTA.c index b52230b..8e07fea 100644 --- a/test/lladd/check_linearHashNTA.c +++ b/test/lladd/check_linearHashNTA.c @@ -110,6 +110,68 @@ START_TEST(linearHashNTAtest) Tcommit(xid); Tdeinit(); } END_TEST + +/** @test +*/ +START_TEST(linearHashNTAVariableSizetest) +{ + Tinit(); + + int xid = Tbegin(); + recordid val; + recordid hashHeader = ThashCreate(xid, VARIABLE_LENGTH, VARIABLE_LENGTH); + recordid * val2; + int i; + printf("\n"); fflush(stdout); + for(i = 0; i < NUM_ENTRIES; i++) { + if(!(i % (NUM_ENTRIES/10))) { + printf("."); fflush(stdout); + } + val.page = i * NUM_ENTRIES; + val.slot = val.page * NUM_ENTRIES; + val.size = val.slot * NUM_ENTRIES; + assert(-1 == ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2)); + ThashInsert(xid, hashHeader, (byte*)&i, sizeof(int), (byte*)&val, sizeof(recordid)); + assert(sizeof(recordid) == ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2)); + assert(val2->page == i * NUM_ENTRIES); + assert(val2->slot == val2->page * NUM_ENTRIES); + assert(val2->size == val2->slot * NUM_ENTRIES); + free(val2); + } + + Tcommit(xid); + printf("\n"); fflush(stdout); + + xid = Tbegin(); + for(i = 0; i < NUM_ENTRIES; i+=10){ + if(!(i % (NUM_ENTRIES/10))) { + printf("-"); fflush(stdout); + } + assert(sizeof(recordid) == ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2)); + free(val2); + assert(ThashRemove(xid, hashHeader, (byte*)&i, sizeof(int))); + assert(-1==ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2)); + assert(!ThashRemove(xid, hashHeader, (byte*)&i, sizeof(int))); + } + printf("\nabort()\n"); fflush(stdout); + Tabort(xid); + xid = Tbegin(); + for(i = 0; i < NUM_ENTRIES; i++) { + if(!(i % (NUM_ENTRIES/10))) { + printf("+"); fflush(stdout); + } + assert(sizeof(recordid) == ThashLookup(xid, hashHeader, (byte*)&i, sizeof(int), (byte**)&val2)); + assert(val2->page == i * NUM_ENTRIES); + assert(val2->slot == val2->page * NUM_ENTRIES); + assert(val2->size == val2->slot * NUM_ENTRIES); + free(val2); + } + Tcommit(xid); + Tdeinit(); +} END_TEST + + + #define NUM_THREADS 100 #define NUM_T_ENTRIES 1000 typedef struct { @@ -235,7 +297,7 @@ Suite * check_suite(void) { /* Sub tests are added, one per line, here */ - + tcase_add_test(tc, linearHashNTAVariableSizetest); tcase_add_test(tc, linearHashNTAIteratortest); tcase_add_test(tc, linearHashNTAtest); tcase_add_test(tc, linearHashNTAThreadedTest);