fix latch ordering in concurrent buffer manager. It no longer does a lock(p->loadlatch) while holding a hashtable bucket latch. Instead, it trylocks, then compensates for in progress changes if the latch fails.

This commit is contained in:
Sears Russell 2010-04-23 23:51:12 +00:00
parent 04de939e42
commit 74803d354e

View file

@ -87,7 +87,7 @@ static void * writeBackWorker(void * bmp) {
while(ch->running && (!needFlush(bm))) { while(ch->running && (!needFlush(bm))) {
DEBUG("Sleeping in write back worker (count = %lld)\n", stasis_dirty_page_table_dirty_count(bh->dpt)); DEBUG("Sleeping in write back worker (count = %lld)\n", stasis_dirty_page_table_dirty_count(bh->dpt));
pthread_mutex_lock(&mut); pthread_mutex_lock(&mut);
pthread_cond_wait(&ch->needFree, &mut); pthread_cond_wait(&ch->needFree, &mut); // XXX Make sure it's OK to have many different mutexes waiting on the same cond.
pthread_mutex_unlock(&mut); pthread_mutex_unlock(&mut);
DEBUG("Woke write back worker (count = %lld)\n", stasis_dirty_page_table_dirty_count(bh->dpt)); DEBUG("Woke write back worker (count = %lld)\n", stasis_dirty_page_table_dirty_count(bh->dpt));
} }
@ -104,8 +104,16 @@ static Page * chGetCachedPage(stasis_buffer_manager_t* bm, int xid, const pageid
stasis_buffer_concurrent_hash_t * ch = bm->impl; stasis_buffer_concurrent_hash_t * ch = bm->impl;
hashtable_bucket_handle_t h; hashtable_bucket_handle_t h;
Page * p = hashtable_lookup_lock(ch->ht, pageid, &h); Page * p = hashtable_lookup_lock(ch->ht, pageid, &h);
readlock(p->loadlatch, 0); if(p) {
ch->lru->remove(ch->lru, p); int succ = tryreadlock(p->loadlatch, 0);
if(!succ) {
// weird corner case: if we're writing back the page during the call, this returns NULL instead of blocking.
// Could go either way on what the correct behavior is.
p = NULL;
} else {
ch->lru->remove(ch->lru, p);
}
}
hashtable_unlock(&h); hashtable_unlock(&h);
return p; return p;
} }
@ -118,7 +126,7 @@ static void deinitTLS(void *tlsp) {
while(hashtable_test_and_set(ch->ht,p->id, p)) { while(hashtable_test_and_set(ch->ht,p->id, p)) {
p->id --; p->id --;
} }
ch->lru->insert(ch->lru, tls->p); ch->lru->insert(ch->lru, tls->p); // TODO: put it into the LRU end instead of the MRU end, so the memory is treated as stale.
} }
static inline stasis_buffer_concurrent_hash_tls_t * populateTLS(stasis_buffer_manager_t* bm) { static inline stasis_buffer_concurrent_hash_tls_t * populateTLS(stasis_buffer_manager_t* bm) {
stasis_buffer_concurrent_hash_t *ch = bm->impl; stasis_buffer_concurrent_hash_t *ch = bm->impl;
@ -135,19 +143,29 @@ static inline stasis_buffer_concurrent_hash_tls_t * populateTLS(stasis_buffer_ma
hashtable_bucket_handle_t h; hashtable_bucket_handle_t h;
tls->p = hashtable_remove_begin(ch->ht, tmp->id, &h); tls->p = hashtable_remove_begin(ch->ht, tmp->id, &h);
if(tls->p) { if(tls->p) {
// TODO: It would be nice to make this a trywritelock to avoid blocking here. // It used to be the case that we could get in trouble because page->id could change concurrently with us. However, this is no longer a problem,
// However, this would cause subtle problems; page->id could change while we're in LRU. LRU partitions its latches on page->id, and references state // since getStaleAndRemove is atomic, and the only code that changes page->id does so with pages that are in TLS (and therefore went through getStaleAndRemove)
// stored with the page... int succ =
writelock(tls->p->loadlatch,0); trywritelock(tls->p->loadlatch,0); // if this blocks, it is because someone else has pinned the page (it can't be due to eviction because the lru is atomic)
// The getStaleAndRemove was not atomic with the hashtable remove, which is OK (but we can't trust tmp anymore...)
tmp = 0; if(succ) {
if(tls->p->id >= 0) { // The getStaleAndRemove was not atomic with the hashtable remove, which is OK (but we can't trust tmp anymore...)
ch->page_handle->write(ch->page_handle, tls->p); tmp = 0;
if(tls->p->id >= 0) {
ch->page_handle->write(ch->page_handle, tls->p);
}
hashtable_remove_finish(ch->ht, &h); // need to hold bucket lock until page is flushed. Otherwise, another thread could read stale data from the filehandle.
tls->p->id = INVALID_PAGE; // in case loadPage has a pointer to it, and we did this in race with it; when loadPage reacquires loadlatch, it will notice the discrepency
assert(!tls->p->dirty);
unlock(tls->p->loadlatch);
break;
} else {
// put back in LRU before making it accessible (again) via the hash.
// otherwise, someone could try to pin it.
ch->lru->insert(ch->lru, tmp); // OK because lru now does refcounting, and we know that tmp->id can't change (because we're the ones that got it from LRU)
hashtable_remove_cancel(ch->ht, &h);
tls->p = NULL; // This iteration of the loop failed, set this so the loop runs again.
} }
hashtable_remove_finish(ch->ht, &h); // need to hold bucket lock until page is flushed. Otherwise, another thread could read stale data from the filehandle.
assert(!tls->p->dirty);
unlock(tls->p->loadlatch);
break;
} else { } else {
// page is not in hashtable, but it is in LRU. getStale and hashtable remove are not atomic. // page is not in hashtable, but it is in LRU. getStale and hashtable remove are not atomic.
// However, we cannot observe this; the lru remove happens before the hashtable remove, // However, we cannot observe this; the lru remove happens before the hashtable remove,
@ -162,44 +180,57 @@ static inline stasis_buffer_concurrent_hash_tls_t * populateTLS(stasis_buffer_ma
return tls; return tls;
} }
static void chReleasePage(stasis_buffer_manager_t * bm, Page * p);
static Page * chLoadPageImpl_helper(stasis_buffer_manager_t* bm, int xid, const pageid_t pageid, int uninitialized, pagetype_t type) { static Page * chLoadPageImpl_helper(stasis_buffer_manager_t* bm, int xid, const pageid_t pageid, int uninitialized, pagetype_t type) {
stasis_buffer_concurrent_hash_t *ch = bm->impl; stasis_buffer_concurrent_hash_t *ch = bm->impl;
stasis_buffer_concurrent_hash_tls_t *tls = populateTLS(bm); stasis_buffer_concurrent_hash_tls_t *tls = populateTLS(bm);
hashtable_bucket_handle_t h; hashtable_bucket_handle_t h;
Page * p; Page * p = 0;
int first = 1; int first = 1;
while(NULL == (p = hashtable_test_and_set_lock(ch->ht, pageid, tls->p, &h))) {
first = 0;
// The page was not in the hash. Now it is up to us.
p = tls->p;
tls->p = NULL;
int succ = trywritelock(p->loadlatch, 0); do {
assert(succ); if(p) { // the last time around pinned the wrong page!
chReleasePage(bm, p);
// this has to happen before the page enters LRU; concurrentWrapper (and perhaps future implementations) hash on pageid. // ch->lru->insert(ch->lru, p);
p->id = pageid; // unlock(p->loadlatch);
// this has to happen after the hashtable insertion succeeds, otherwise above, we could get a page from lru that isn't in the cache.
ch->lru->insert(ch->lru, p);
hashtable_unlock(&h);
if(uninitialized) {
type = UNINITIALIZED_PAGE;
stasis_page_loaded(p, UNINITIALIZED_PAGE);
} else {
ch->page_handle->read(ch->page_handle, p, type);
} }
unlock(p->loadlatch); while(NULL == (p = hashtable_test_and_set_lock(ch->ht, pageid, tls->p, &h))) {
first = 0;
// The page was not in the hash. Now it is up to us.
p = tls->p;
tls->p = NULL;
tls = populateTLS(bm); int succ = trywritelock(p->loadlatch, 0);
if(needFlush(bm)) { pthread_cond_signal(&ch->needFree); } assert(succ);
}
readlock(p->loadlatch, 0); // this has to happen before the page enters LRU; concurrentWrapper (and perhaps future implementations) hash on pageid.
ch->lru->remove(ch->lru, p); p->id = pageid;
hashtable_unlock(&h);
assert(p->id == pageid); // this has to happen after the hashtable insertion succeeds, otherwise above, we could get a page from lru that isn't in the cache.
ch->lru->insert(ch->lru, p);
hashtable_unlock(&h);
if(uninitialized) {
type = UNINITIALIZED_PAGE;
stasis_page_loaded(p, UNINITIALIZED_PAGE);
} else {
ch->page_handle->read(ch->page_handle, p, type);
}
unlock(p->loadlatch);
tls = populateTLS(bm);
if(needFlush(bm)) { pthread_cond_signal(&ch->needFree); }
}
ch->lru->remove(ch->lru, p); // this can happen before or after the the hashable unlock, since the invariant is that in lru -> in hashtable, and it's in the hash
hashtable_unlock(&h);
// spooky. It's not in LRU, and it's not been load latched. This
// is OK, but it's possible that popluateTLS has a pointer to the page
// already, regardless of which hash bucket it's in.
readlock(p->loadlatch, 0);
// Now, we know that populateTLS won't evict the page, since it gets a writelock before doing so.
} while(p->id != pageid); // On the off chance that the page got evicted, we'll need to try again.
return p; return p;
} }
static Page * chLoadPageImpl(stasis_buffer_manager_t *bm, stasis_buffer_manager_handle_t *h, int xid, const pageid_t pageid, pagetype_t type) { static Page * chLoadPageImpl(stasis_buffer_manager_t *bm, stasis_buffer_manager_handle_t *h, int xid, const pageid_t pageid, pagetype_t type) {