Amortize bucket initialization costs for linear hash tables. The best case improvement (~ 20%) is seen with hashes with fixed length key/value pairs and insert-only workloads

This commit is contained in:
Sears Russell 2010-05-12 17:51:20 +00:00
parent dc1f799a37
commit 71292538e8
2 changed files with 34 additions and 30 deletions

View file

@ -7,35 +7,34 @@
int main(int argc, char** argv) { int main(int argc, char** argv) {
assert(argc == 3); assert(argc == 3 || argc == 4);
int xact_count = atoi(argv[1]); int xact_count = atoi(argv[1]);
int count = atoi(argv[2]); int count = atoi(argv[2]);
int k;
/* unlink("storefile.txt"); int fixed_len = (argc == 4);
unlink("logfile.txt");
unlink("blob0_file.txt");
unlink("blob1_file.txt"); */
Tinit(); Tinit();
int xid = Tbegin(); int xid = Tbegin();
//recordid hash = ThashCreate(xid, sizeof(int), sizeof(int)); recordid hash;
recordid hash = ThashCreate(xid, VARIABLE_LENGTH, VARIABLE_LENGTH); if(fixed_len) {
hash = ThashCreate(xid, sizeof(int), sizeof(int));
} else {
hash = ThashCreate(xid, VARIABLE_LENGTH, VARIABLE_LENGTH);
}
Tcommit(xid); Tcommit(xid);
int i = 0; int i = 0;
for(k = 0; k < xact_count; k++) { for(int k = 0; k < xact_count; k++) {
xid = Tbegin(); xid = Tbegin();
for(;i < count *(k+1) ; i++) { for(;i < count *(k+1) ; i++) {
ThashInsert(xid, hash, (byte*)&i, sizeof(int), (byte*)&i, sizeof(int)); ThashInsert(xid, hash, (byte*)&i, sizeof(int), (byte*)&i, sizeof(int));
} }
Tcommit(xid); Tcommit(xid);

View file

@ -93,6 +93,7 @@ compensated_function recordid ThashCreate(int xid, int keySize, int valueSize) {
} }
} else { } else {
#ifdef ARRAY_LIST_OLD_ALLOC
byte * entry = calloc(1, lhh.buckets.size); byte * entry = calloc(1, lhh.buckets.size);
for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) { for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) {
bucket.slot = i; bucket.slot = i;
@ -101,6 +102,7 @@ compensated_function recordid ThashCreate(int xid, int keySize, int valueSize) {
} end_action_ret(NULLRID); } end_action_ret(NULLRID);
} }
free (entry); free (entry);
#endif
} }
lhh.keySize = keySize; lhh.keySize = keySize;
lhh.valueSize = valueSize; lhh.valueSize = valueSize;
@ -360,16 +362,19 @@ compensated_function static void ThashSplitBucket(int xid, recordid hashHeader,
recordid new_bucket_rid = lhh->buckets; recordid new_bucket_rid = lhh->buckets;
old_bucket_rid.slot = old_bucket; old_bucket_rid.slot = old_bucket;
new_bucket_rid.slot = new_bucket; new_bucket_rid.slot = new_bucket;
if(!(new_bucket % HASH_INIT_ARRAY_LIST_COUNT)) {
TarrayListExtend(xid, lhh->buckets, 1); TarrayListExtend(xid, lhh->buckets, HASH_INIT_ARRAY_LIST_COUNT);
}
recordid new_bucket_list; // will be uninitialized if we have fixed length entries. recordid new_bucket_list; // will be uninitialized if we have fixed length entries.
if(lhh->keySize == VARIABLE_LENGTH || lhh->valueSize == VARIABLE_LENGTH) { if(lhh->keySize == VARIABLE_LENGTH || lhh->valueSize == VARIABLE_LENGTH) {
new_bucket_list = TpagedListAlloc(xid); new_bucket_list = TpagedListAlloc(xid);
Tset(xid, new_bucket_rid, &new_bucket_list); Tset(xid, new_bucket_rid, &new_bucket_list);
} else { } else {
#ifdef ARRAY_LIST_OLD_ALLOC
byte * entry = calloc(1, lhh->buckets.size); byte * entry = calloc(1, lhh->buckets.size);
Tset(xid, new_bucket_rid, entry); Tset(xid, new_bucket_rid, entry);
free(entry); free(entry);
#endif
} }
if(lhh->nextSplit < stasis_util_two_to_the(lhh->bits-1)-1) { if(lhh->nextSplit < stasis_util_two_to_the(lhh->bits-1)-1) {
lhh->nextSplit++; lhh->nextSplit++;