Amortize bucket initialization costs for linear hash tables. The best case improvement (~ 20%) is seen with hashes with fixed length key/value pairs and insert-only workloads

This commit is contained in:
Sears Russell 2010-05-12 17:51:20 +00:00
parent dc1f799a37
commit 71292538e8
2 changed files with 34 additions and 30 deletions

View file

@ -7,41 +7,40 @@
int main(int argc, char** argv) {
assert(argc == 3);
assert(argc == 3 || argc == 4);
int xact_count = atoi(argv[1]);
int count = atoi(argv[2]);
int k;
/* unlink("storefile.txt");
unlink("logfile.txt");
unlink("blob0_file.txt");
unlink("blob1_file.txt"); */
int fixed_len = (argc == 4);
Tinit();
int xid = Tbegin();
//recordid hash = ThashCreate(xid, sizeof(int), sizeof(int));
recordid hash = ThashCreate(xid, VARIABLE_LENGTH, VARIABLE_LENGTH);
Tcommit(xid);
int i = 0;
for(k = 0; k < xact_count; k++) {
xid = Tbegin();
for(;i < count *(k+1) ; i++) {
ThashInsert(xid, hash, (byte*)&i, sizeof(int), (byte*)&i, sizeof(int));
}
Tcommit(xid);
}
int xid = Tbegin();
Tdeinit();
recordid hash;
if(fixed_len) {
hash = ThashCreate(xid, sizeof(int), sizeof(int));
} else {
hash = ThashCreate(xid, VARIABLE_LENGTH, VARIABLE_LENGTH);
}
Tcommit(xid);
int i = 0;
for(int k = 0; k < xact_count; k++) {
xid = Tbegin();
for(;i < count *(k+1) ; i++) {
ThashInsert(xid, hash, (byte*)&i, sizeof(int), (byte*)&i, sizeof(int));
}
Tcommit(xid);
}
Tdeinit();
}

View file

@ -93,6 +93,7 @@ compensated_function recordid ThashCreate(int xid, int keySize, int valueSize) {
}
} else {
#ifdef ARRAY_LIST_OLD_ALLOC
byte * entry = calloc(1, lhh.buckets.size);
for(i = 0; i < HASH_INIT_ARRAY_LIST_COUNT; i++) {
bucket.slot = i;
@ -101,6 +102,7 @@ compensated_function recordid ThashCreate(int xid, int keySize, int valueSize) {
} end_action_ret(NULLRID);
}
free (entry);
#endif
}
lhh.keySize = keySize;
lhh.valueSize = valueSize;
@ -360,16 +362,19 @@ compensated_function static void ThashSplitBucket(int xid, recordid hashHeader,
recordid new_bucket_rid = lhh->buckets;
old_bucket_rid.slot = old_bucket;
new_bucket_rid.slot = new_bucket;
TarrayListExtend(xid, lhh->buckets, 1);
if(!(new_bucket % HASH_INIT_ARRAY_LIST_COUNT)) {
TarrayListExtend(xid, lhh->buckets, HASH_INIT_ARRAY_LIST_COUNT);
}
recordid new_bucket_list; // will be uninitialized if we have fixed length entries.
if(lhh->keySize == VARIABLE_LENGTH || lhh->valueSize == VARIABLE_LENGTH) {
new_bucket_list = TpagedListAlloc(xid);
Tset(xid, new_bucket_rid, &new_bucket_list);
} else {
#ifdef ARRAY_LIST_OLD_ALLOC
byte * entry = calloc(1, lhh->buckets.size);
Tset(xid, new_bucket_rid, entry);
free(entry);
#endif
}
if(lhh->nextSplit < stasis_util_two_to_the(lhh->bits-1)-1) {
lhh->nextSplit++;