fix latch ordering in concurrent buffer manager. It no longer does a lock(p->loadlatch) while holding a hashtable bucket latch. Instead, it trylocks, then compensates for in progress changes if the latch fails.

2010-04-23 23:51:12 +00:00 · 2010-04-23 23:51:12 +00:00 · 74803d354e
commit 74803d354e
parent 04de939e42
1 changed files with 77 additions and 46 deletions
--- a/src/stasis/bufferManager/concurrentBufferManager.c
+++ b/src/stasis/bufferManager/concurrentBufferManager.c
@ -87,7 +87,7 @@ static void * writeBackWorker(void * bmp) {
    while(ch->running && (!needFlush(bm))) {
      DEBUG("Sleeping in write back worker (count = %lld)\n", stasis_dirty_page_table_dirty_count(bh->dpt));
      pthread_mutex_lock(&mut);
-      pthread_cond_wait(&ch->needFree, &mut);
+      pthread_cond_wait(&ch->needFree, &mut); // XXX Make sure it's OK to have many different mutexes waiting on the same cond.
      pthread_mutex_unlock(&mut);
      DEBUG("Woke write back worker (count = %lld)\n", stasis_dirty_page_table_dirty_count(bh->dpt));
    }
@ -104,8 +104,16 @@ static Page * chGetCachedPage(stasis_buffer_manager_t* bm, int xid, const pageid
  stasis_buffer_concurrent_hash_t * ch = bm->impl;
  hashtable_bucket_handle_t h;
  Page * p = hashtable_lookup_lock(ch->ht, pageid, &h);
-  readlock(p->loadlatch, 0);
+  if(p) {
    int succ = tryreadlock(p->loadlatch, 0);
    if(!succ) {
      // weird corner case: if we're writing back the page during the call, this returns NULL instead of blocking.
      // Could go either way on what the correct behavior is.
      p = NULL;
    } else {
      ch->lru->remove(ch->lru, p);
    }
  }
  hashtable_unlock(&h);
  return p;
 }
@ -118,7 +126,7 @@ static void deinitTLS(void *tlsp) {
  while(hashtable_test_and_set(ch->ht,p->id, p)) {
    p->id --;
  }
-  ch->lru->insert(ch->lru, tls->p);
+  ch->lru->insert(ch->lru, tls->p); // TODO: put it into the LRU end instead of the MRU end, so the memory is treated as stale.
 }
 static inline stasis_buffer_concurrent_hash_tls_t * populateTLS(stasis_buffer_manager_t* bm) {
  stasis_buffer_concurrent_hash_t *ch = bm->impl;
@ -135,19 +143,29 @@ static inline stasis_buffer_concurrent_hash_tls_t * populateTLS(stasis_buffer_ma
    hashtable_bucket_handle_t h;
    tls->p = hashtable_remove_begin(ch->ht, tmp->id, &h);
    if(tls->p) {
-      // TODO: It would be nice to make this a trywritelock to avoid blocking here.
+      // It used to be the case that we could get in trouble because page->id could change concurrently with us.  However, this is no longer a problem,
-      // However, this would cause subtle problems; page->id could change while we're in LRU.  LRU partitions its latches on page->id, and references state
+      // since getStaleAndRemove is atomic, and the only code that changes page->id does so with pages that are in TLS (and therefore went through getStaleAndRemove)
-      // stored with the page...
+      int succ =
-      writelock(tls->p->loadlatch,0);
+	trywritelock(tls->p->loadlatch,0);  // if this blocks, it is because someone else has pinned the page (it can't be due to eviction because the lru is atomic)
      if(succ) {
 	// The getStaleAndRemove was not atomic with the hashtable remove, which is OK (but we can't trust tmp anymore...)
 	tmp = 0;
 	if(tls->p->id >= 0) {
 	  ch->page_handle->write(ch->page_handle, tls->p);
 	}
 	hashtable_remove_finish(ch->ht, &h);  // need to hold bucket lock until page is flushed.  Otherwise, another thread could read stale data from the filehandle.
 	tls->p->id = INVALID_PAGE; // in case loadPage has a pointer to it, and we did this in race with it; when loadPage reacquires loadlatch, it will notice the discrepency
 	assert(!tls->p->dirty);
 	unlock(tls->p->loadlatch);
 	break;
      } else {
 	// put back in LRU before making it accessible (again) via the hash.
 	// otherwise, someone could try to pin it.
 	ch->lru->insert(ch->lru, tmp);  // OK because lru now does refcounting, and we know that tmp->id can't change (because we're the ones that got it from LRU)
 	hashtable_remove_cancel(ch->ht, &h);
 	tls->p = NULL; // This iteration of the loop failed, set this so the loop runs again.
      }
    } else {
      // page is not in hashtable, but it is in LRU.  getStale and hashtable remove are not atomic.
      // However, we cannot observe this; the lru remove happens before the hashtable remove,
@ -162,12 +180,21 @@ static inline stasis_buffer_concurrent_hash_tls_t * populateTLS(stasis_buffer_ma
  return tls;
 }
 static void chReleasePage(stasis_buffer_manager_t * bm, Page * p);
 static Page * chLoadPageImpl_helper(stasis_buffer_manager_t* bm, int xid, const pageid_t pageid, int uninitialized, pagetype_t type) {
  stasis_buffer_concurrent_hash_t *ch = bm->impl;
  stasis_buffer_concurrent_hash_tls_t *tls = populateTLS(bm);
  hashtable_bucket_handle_t h;
-  Page * p;
+  Page * p = 0;
  int first = 1;
  do {
    if(p) { // the last time around pinned the wrong page!
      chReleasePage(bm, p);
      // ch->lru->insert(ch->lru, p);
      // unlock(p->loadlatch);
    }
    while(NULL == (p = hashtable_test_and_set_lock(ch->ht, pageid, tls->p, &h))) {
      first = 0;
      // The page was not in the hash.  Now it is up to us.
@ -196,10 +223,14 @@ static Page * chLoadPageImpl_helper(stasis_buffer_manager_t* bm, int xid, const
      tls = populateTLS(bm);
      if(needFlush(bm)) { pthread_cond_signal(&ch->needFree); }
    }
-  readlock(p->loadlatch, 0);
+    ch->lru->remove(ch->lru, p);  // this can happen before or after the the hashable unlock, since the invariant is that in lru -> in hashtable, and it's in the hash
  ch->lru->remove(ch->lru, p);
    hashtable_unlock(&h);
-  assert(p->id == pageid);
+    // spooky.  It's not in LRU, and it's not been load latched.  This
    // is OK, but it's possible that popluateTLS has a pointer to the page
    // already, regardless of which hash bucket it's in.
    readlock(p->loadlatch, 0);
    // Now, we know that populateTLS won't evict the page, since it gets a writelock before doing so.
  } while(p->id != pageid); // On the off chance that the page got evicted, we'll need to try again.
  return p;
 }
 static Page * chLoadPageImpl(stasis_buffer_manager_t *bm, stasis_buffer_manager_handle_t *h, int xid, const pageid_t pageid, pagetype_t type) {