handle null lsmIiterator_ gracefully

This commit is contained in:
Sears Russell 2008-11-24 23:41:11 +00:00
parent 09641257ad
commit a321ba6e4e
3 changed files with 65 additions and 69 deletions

View file

@ -636,67 +636,35 @@ void TlsmFree(int xid, recordid tree, lsm_page_deallocator_t dealloc,
Tdealloc(xid, *(recordid*)allocator_state); Tdealloc(xid, *(recordid*)allocator_state);
} }
static const recordid lsmLookup(int xid, Page *node, int depth, static const recordid lsmLookup(int xid, Page *node, int depth, const byte *key,
const byte *key, size_t keySize, lsm_comparator_t cmp) { size_t keySize, lsm_comparator_t cmp) {
if(*recordcount_ptr(node) == FIRST_SLOT) { if(*recordcount_ptr(node) == FIRST_SLOT) {
return NULLRID; return NULLRID;
} }
assert(*recordcount_ptr(node) > FIRST_SLOT); assert(*recordcount_ptr(node) > FIRST_SLOT);
int match = FIRST_SLOT;
const lsmTreeNodeRecord *prev = readNodeRecord(xid,node,FIRST_SLOT,keySize); // don't need to compare w/ first item in tree.
slotid_t prev_slot = FIRST_SLOT; const lsmTreeNodeRecord *rec = readNodeRecord(xid,node,FIRST_SLOT,keySize);
int prev_cmp_key = cmp(prev+1,key);
// @todo binary search within each page
for(int i = FIRST_SLOT+1; i < *recordcount_ptr(node); i++) { for(int i = FIRST_SLOT+1; i < *recordcount_ptr(node); i++) {
const lsmTreeNodeRecord *rec = readNodeRecord(xid,node,i,keySize); rec = readNodeRecord(xid,node,i,keySize);
int cmpval = cmp(rec+1,key);
int rec_cmp_key = cmp(rec+1,key); if(cmpval > 0) {
break;
if(depth) {
if(prev_cmp_key <= 0 && rec_cmp_key > 0) {
pageid_t child_id = prev->ptr;
Page *child_page = loadPage(xid, child_id);
readlock(child_page->rwlatch,0);
recordid ret = lsmLookup(xid,child_page,depth-1,key,keySize,cmp);
unlock(child_page->rwlatch);
releasePage(child_page);
return ret;
}
} else {
// XXX Doesn't handle runs of duplicates.
if(prev_cmp_key <= 0 && rec_cmp_key > 0) {
recordid ret = {node->id, prev_slot, keySize};
return ret;
}
} }
prev = rec; match = i;
prev_slot = i;
prev_cmp_key = rec_cmp_key;
if(rec_cmp_key > 0) { break; }
} }
if(depth) { if(depth) {
// this handles the rhs of the tree. pageid_t child_id = readNodeRecord(xid,node,match,keySize)->ptr;
if(prev_cmp_key <= 0) { Page* child_page = loadPage(xid, child_id);
pageid_t child_id = prev->ptr; readlock(child_page->rwlatch,0);
Page *child_page = loadPage(xid, child_id); recordid ret = lsmLookup(xid,child_page,depth-1,key,keySize,cmp);
readlock(child_page->rwlatch,0); unlock(child_page->rwlatch);
recordid ret = lsmLookup(xid,child_page,depth-1,key,keySize,cmp); releasePage(child_page);
unlock(child_page->rwlatch); return ret;
releasePage(child_page);
return ret;
}
} else { } else {
if(prev_cmp_key <= 0) { recordid ret = {node->id, match, keySize};
recordid ret = {node->id, prev_slot, keySize}; return ret;
return ret;
}
} }
return NULLRID;
} }
static pageid_t lsmLookupLeafPageFromRid(int xid, recordid rid, size_t keySize) { static pageid_t lsmLookupLeafPageFromRid(int xid, recordid rid, size_t keySize) {
@ -852,6 +820,11 @@ lladdIterator_t* lsmTreeIterator_openAt(int xid, recordid root, const byte* key)
recordid lsm_entry_rid = lsmLookup(xid,p,depth,key,getKeySize(xid,p),comparators[cmp_nr->ptr]); recordid lsm_entry_rid = lsmLookup(xid,p,depth,key,getKeySize(xid,p),comparators[cmp_nr->ptr]);
if(lsm_entry_rid.page == NULLRID.page && lsm_entry_rid.slot == NULLRID.slot) {
return 0;
}
assert(lsm_entry_rid.size != INVALID_SLOT);
if(root.page != lsm_entry_rid.page) { if(root.page != lsm_entry_rid.page) {
unlock(p->rwlatch); unlock(p->rwlatch);
releasePage(p); releasePage(p);

View file

@ -269,9 +269,14 @@ class treeIterator {
slot_(0) slot_(0)
{ {
init_helper(); init_helper();
treeIterator * end = this->end(); if(lsmIterator_) {
for(;*this != *end && **this < key; ++(*this)) { } treeIterator * end = this->end();
delete end; for(;*this != *end && **this < key; ++(*this)) { }
delete end;
} else {
this->slot_ = 0;
this->pageid_ = 0;
}
} }
explicit treeIterator(recordid tree, ROW &scratch, int keylen) : explicit treeIterator(recordid tree, ROW &scratch, int keylen) :
tree_(tree), tree_(tree),
@ -320,6 +325,7 @@ class treeIterator {
} }
} }
ROW & operator*() { ROW & operator*() {
assert(this->lsmIterator_);
ROW* readTuple = currentPage_->recordRead(-1,slot_, &scratch_); ROW* readTuple = currentPage_->recordRead(-1,slot_, &scratch_);
if(!readTuple) { if(!readTuple) {
@ -366,6 +372,11 @@ class treeIterator {
} }
inline treeIterator* end() { inline treeIterator* end() {
treeIterator* t = new treeIterator(tree_,scratch_,keylen_); treeIterator* t = new treeIterator(tree_,scratch_,keylen_);
if(!lsmIterator_) {
t->slot_ = 0;
t->pageid_ = 0;
return t;
}
if(t->p_) { if(t->p_) {
releasePage(t->p_); releasePage(t->p_);
t->p_=0; t->p_=0;

View file

@ -107,14 +107,15 @@ namespace rose {
// this is just a guessed value... it seems about right based on // this is just a guessed value... it seems about right based on
// experiments, but 450 bytes overhead per tuple is insane! // experiments, but 450 bytes overhead per tuple is insane!
static const int RB_TREE_OVERHEAD = 400; // = 450; static const int RB_TREE_OVERHEAD = 400; // = 450;
static const pageid_t MEM_SIZE = 1000 * 1000 * 1000; static pageid_t C0_MEM_SIZE = 1000 * 1000 * 1000;
// static const pageid_t MEM_SIZE = 100 * 1000; // static const pageid_t C0_MEM_SIZE = 100 * 1000;
// How many pages should we try to fill with the first C1 merge? // How many pages should we try to fill with the first C1 merge?
static int R = 10; // XXX set this as low as possible (for dynamic setting. = sqrt(C2 size / C0 size)) static int R = 10; // XXX set this as low as possible (for dynamic setting. = sqrt(C2 size / C0 size))
#ifdef THROTTLED #ifdef THROTTLED
static const pageid_t START_SIZE = 100; //10 * 1000; /*10 **/ //1000; // XXX 4 is fudge related to RB overhead. static const pageid_t START_SIZE = 100; //10 * 1000; /*10 **/ //1000; // XXX 4 is fudge related to RB overhead.
#else #else
static const pageid_t START_SIZE = MEM_SIZE * R /( PAGE_SIZE * 4); //10 * 1000; /*10 **/ //1000; // XXX 4 is fudge related to RB overhead. Do not run this code
static const pageid_t START_SIZE = C0_MEM_SIZE * R /( PAGE_SIZE * 4); //10 * 1000; /*10 **/ //1000; // XXX 4 is fudge related to RB overhead.
#endif #endif
// Lower total work by perfomrming one merge at higher level // Lower total work by perfomrming one merge at higher level
// for every FUDGE^2 merges at the immediately lower level. // for every FUDGE^2 merges at the immediately lower level.
@ -147,8 +148,9 @@ namespace rose {
// loop around here to produce multiple batches for merge. // loop around here to produce multiple batches for merge.
gettimeofday(&start_push_tv,0); gettimeofday(&start_push_tv,0);
gettimeofday(&start_tv,0); gettimeofday(&start_tv,0);
pthread_mutex_lock(a->block_ready_mut);
while(1) { while(1) {
pthread_mutex_lock(a->block_ready_mut);
int done = 0; int done = 0;
@ -164,7 +166,6 @@ namespace rose {
*a->in_block_needed = false; *a->in_block_needed = false;
if(done) { if(done) {
pthread_cond_signal(a->out_block_ready_cond); pthread_cond_signal(a->out_block_ready_cond);
pthread_mutex_unlock(a->block_ready_mut);
break; break;
} }
@ -181,7 +182,7 @@ namespace rose {
ITERB *tbEnd = tbBegin->end(); ITERB *tbEnd = tbBegin->end();
{ // this { protects us from recalcitrant iterators below (tree iterators hold stasis page latches...) { // this { protects us from recalcitrant iterators below (tree iterators hold stasis page latches...)
pthread_mutex_unlock(a->block_ready_mut); ///XXX pthread_mutex_unlock(a->block_ready_mut);
Tcommit(xid); Tcommit(xid);
xid = Tbegin(); xid = Tbegin();
@ -263,7 +264,7 @@ namespace rose {
gettimeofday(&start_push_tv,0); gettimeofday(&start_push_tv,0);
pthread_mutex_lock(a->block_ready_mut); //XXX pthread_mutex_lock(a->block_ready_mut);
// keep actual handle around so that it can be freed below. // keep actual handle around so that it can be freed below.
typename ITERB::handle old_in_tree = **a->in_tree; typename ITERB::handle old_in_tree = **a->in_tree;
@ -298,10 +299,10 @@ namespace rose {
if(a->out_tree) { if(a->out_tree) {
double frac_wasted = ((double)RB_TREE_OVERHEAD)/(double)(RB_TREE_OVERHEAD + PAGELAYOUT::FMT::TUP::sizeofBytes()); double frac_wasted = ((double)RB_TREE_OVERHEAD)/(double)(RB_TREE_OVERHEAD + PAGELAYOUT::FMT::TUP::sizeofBytes());
target_R = sqrt(((double)(*a->out_tree_size+*a->my_tree_size)) / ((MEM_SIZE*(1-frac_wasted))/(4096*ratio))); target_R = sqrt(((double)(*a->out_tree_size+*a->my_tree_size)) / ((C0_MEM_SIZE*(1-frac_wasted))/(4096*ratio)));
printf("R_C2-C1 = %6.1f R_C1-C0 = %6.1f target = %6.1f\n", printf("R_C2-C1 = %6.1f R_C1-C0 = %6.1f target = %6.1f\n",
((double)(*a->out_tree_size/*+*a->my_tree_size*/)) / ((double)*a->my_tree_size), ((double)(*a->out_tree_size/*+*a->my_tree_size*/)) / ((double)*a->my_tree_size),
((double)*a->my_tree_size) / ((double)(MEM_SIZE*(1-frac_wasted))/(4096*ratio)),target_R); ((double)*a->my_tree_size) / ((double)(C0_MEM_SIZE*(1-frac_wasted))/(4096*ratio)),target_R);
} }
#else #else
if(a->out_tree_size) { if(a->out_tree_size) {
@ -369,11 +370,11 @@ namespace rose {
assert(a->my_tree->r_.page != tree->r_.page); assert(a->my_tree->r_.page != tree->r_.page);
*a->my_tree = *tree; *a->my_tree = *tree;
pthread_mutex_unlock(a->block_ready_mut);
gettimeofday(&start_tv,0); gettimeofday(&start_tv,0);
} }
pthread_mutex_unlock(a->block_ready_mut);
Tcommit(xid); Tcommit(xid);
return 0; return 0;
@ -581,7 +582,7 @@ namespace rose {
ret->still_open, ret->still_open,
block0_size, block0_size,
block1_size, block1_size,
(R * MEM_SIZE) / (PAGE_SIZE * 4), // XXX 4 = estimated compression ratio (R * C0_MEM_SIZE) / (PAGE_SIZE * 4), // XXX 4 = estimated compression ratio
R, R,
//new typename LSM_ITER::treeIteratorHandle(NULLRID), //new typename LSM_ITER::treeIteratorHandle(NULLRID),
block0_scratch, block0_scratch,
@ -665,25 +666,30 @@ namespace rose {
assert(*((char*)t.get(i)) || *((char*)t.get(i))+1); assert(*((char*)t.get(i)) || *((char*)t.get(i))+1);
} */ } */
pthread_mutex_lock(h->mut); //XXX
h->scratch_tree->insert(t); h->scratch_tree->insert(t);
uint64_t handleBytes = h->scratch_tree->size() * (RB_TREE_OVERHEAD + PAGELAYOUT::FMT::TUP::sizeofBytes()); uint64_t handleBytes = h->scratch_tree->size() * (RB_TREE_OVERHEAD + PAGELAYOUT::FMT::TUP::sizeofBytes());
//XXX 4 = estimated compression ratio. //XXX 4 = estimated compression ratio.
uint64_t inputSizeThresh = (4 * PAGE_SIZE * *h->input_size); // / (PAGELAYOUT::FMT::TUP::sizeofBytes()); uint64_t inputSizeThresh = (4 * PAGE_SIZE * *h->input_size); // / (PAGELAYOUT::FMT::TUP::sizeofBytes());
uint64_t memSizeThresh = MEM_SIZE; uint64_t memSizeThresh = C0_MEM_SIZE;
#ifdef INFINITE_RESOURCES #ifdef INFINITE_RESOURCES
static const int LATCH_INTERVAL = 10000; static const int LATCH_INTERVAL = 10000;
static int count = LATCH_INTERVAL; /// XXX HACK static int count = LATCH_INTERVAL; /// XXX HACK
bool go = false; bool go = false;
if(!count) { if(!count) {
pthread_mutex_lock(h->mut); ///XXX pthread_mutex_lock(h->mut);
go = *h->input_needed; go = *h->input_needed;
pthread_mutex_unlock(h->mut); ///XXX pthread_mutex_unlock(h->mut);
count = LATCH_INTERVAL; count = LATCH_INTERVAL;
} }
count --; count --;
#endif #endif
pthread_mutex_unlock(h->mut);
if( (handleBytes > memSizeThresh / 2) && ( if( (handleBytes > memSizeThresh / 2) && (
#ifdef INFINITE_RESOURCES #ifdef INFINITE_RESOURCES
go || go ||
@ -841,6 +847,7 @@ namespace rose {
void** void**
TlsmTableFindGTE(int xid, lsmTableHandle<PAGELAYOUT> *h, TlsmTableFindGTE(int xid, lsmTableHandle<PAGELAYOUT> *h,
typename PAGELAYOUT::FMT::TUP &val) { typename PAGELAYOUT::FMT::TUP &val) {
pthread_mutex_lock(h->mut);
// typedef stlSetIterator<typename std::set<typename PAGELAYOUT::FMT::TUP, // typedef stlSetIterator<typename std::set<typename PAGELAYOUT::FMT::TUP,
typedef stlSetIterator<typename std::set<typename PAGELAYOUT::FMT::TUP, typedef stlSetIterator<typename std::set<typename PAGELAYOUT::FMT::TUP,
@ -880,6 +887,11 @@ namespace rose {
return ret; return ret;
} }
template<class PAGELAYOUT>
void
TlsmTableFindGTEDone(lsmTableHandle<PAGELAYOUT> *h) {
pthread_mutex_unlock(h->mut);
}
template<class PAGELAYOUT> template<class PAGELAYOUT>
const typename PAGELAYOUT::FMT::TUP * const typename PAGELAYOUT::FMT::TUP *
TlsmTableFind(int xid, lsmTableHandle<PAGELAYOUT> *h, TlsmTableFind(int xid, lsmTableHandle<PAGELAYOUT> *h,