LSM table is now able to spawn threads, then exit.

This commit is contained in:
Sears Russell 2007-11-02 15:00:47 +00:00
parent 7e2c37534e
commit f101919244
2 changed files with 181 additions and 124 deletions

View file

@ -66,6 +66,12 @@ class treeIterator {
currentPage_ = (PAGELAYOUT*)p_->impl;
}
}
typedef recordid handle;
explicit treeIterator(recordid tree) :
tree_(tree),
scratch_(),
keylen_(ROW::sizeofBytes())
{ }
explicit treeIterator(treeIterator& t) :
tree_(t.tree_),
scratch_(t.scratch_),
@ -152,7 +158,7 @@ class treeIterator {
void operator=(treeIterator & t) { abort(); }
int operator-(treeIterator & t) { abort(); }
recordid tree_;
ROW & scratch_;
ROW scratch_;
int keylen_;
lladdIterator_t * lsmIterator_;
slot_index_t slot_;
@ -381,9 +387,15 @@ class versioningIterator {
information. The rows should be sorted based on value, then sorted by
version, with the newest value first.
*/
template<class STLITER,class ROW> class stlSetIterator {
template<class SET,class ROW> class stlSetIterator {
private:
typedef typename SET::iterator STLITER;
public:
typedef SET handle;
stlSetIterator( SET& s ) : it_(s.begin()), itend_(s.end()) {}
stlSetIterator( STLITER& it, STLITER& itend ) : it_(it), itend_(itend) {}
explicit stlSetIterator(stlSetIterator &i) : it_(i.it_), itend_(i.itend_){}
const ROW& operator* () { return *it_; }
@ -416,11 +428,11 @@ class versioningIterator {
STLITER it_;
STLITER itend_;
friend const byte*
toByteArray<STLITER,ROW>(stlSetIterator<STLITER,ROW> * const t);
toByteArray<SET,ROW>(stlSetIterator<SET,ROW> * const t);
};
template <class STLITER,class ROW>
inline const byte * toByteArray(stlSetIterator<STLITER,ROW> * const t) {
template <class SET,class ROW>
inline const byte * toByteArray(stlSetIterator<SET,ROW> * const t) {
return (*(t->it_)).toByteArray();
}
/** Produce a byte array from the value stored at t's current

View file

@ -20,23 +20,18 @@ namespace rose {
*/
template<class PAGELAYOUT>
struct new_insert_args {
int comparator_idx;
int rowsize; //typedef int32_t val_t;
// ITER *begin;
// ITER *end;
template<class PAGELAYOUT, class ITERA, class ITERB>
struct merge_args {
pageid_t(*pageAlloc)(int,void*);
void *pageAllocState;
pthread_mutex_t * block_ready_mut;
pthread_cond_t * block_needed_cond;
pthread_cond_t * block_ready_cond;
int max_waiters;
int wait_count;
recordid * wait_queue;
typename PAGELAYOUT::FMT::TUP *scratchA;
typename PAGELAYOUT::FMT::TUP *scratchB;
pageid_t mergedPages;
pthread_cond_t * in_block_needed_cond;
pthread_cond_t * out_block_needed_cond;
pthread_cond_t * in_block_ready_cond;
pthread_cond_t * out_block_ready_cond;
bool * still_open;
typename ITERA::handle ** out_tree;
typename ITERB::handle ** in_tree;
};
template <class PAGELAYOUT, class ITER>
@ -101,39 +96,59 @@ namespace rose {
ITERA is an iterator over the data structure that mergeThread creates (a lsm tree iterator).
ITERB is an iterator over the data structures that mergeThread takes as input (lsm tree, or rb tree..)
*/
template<class PAGELAYOUT, class ITERA, class ITERB> //class PAGELAYOUTX, class ENGINE, class ITERA, class ITERB,
// class ROW, class TYPE>
template<class PAGELAYOUT, class ITERA, class ITERB>
void* mergeThread(void* arg) {
// The ITER argument of a is unused (we don't look at it's begin or end fields...)
//insert_args<PAGELAYOUT,ENGINE,ITERA,ROW>* a =
// (insert_args<PAGELAYOUT,ENGINE,ITERA,ROW>*)arg;
new_insert_args<PAGELAYOUT> * a = (new_insert_args<PAGELAYOUT>*)arg;
merge_args<PAGELAYOUT, ITERA, ITERB> * a = (merge_args<PAGELAYOUT, ITERA, ITERB>*)arg;
struct timeval start_tv, wait_tv, stop_tv;
int merge_count = 0;
int xid = Tbegin();
// Initialize tree with an empty tree.
// XXX hardcodes ITERA's type:
recordid oldtree = TlsmCreate(xid, PAGELAYOUT::cmp_id(),a->pageAlloc,
a->pageAllocState,PAGELAYOUT::FMT::TUP::sizeofBytes());
Tcommit(xid);
// loop around here to produce multiple batches for merge.
while(1) {
gettimeofday(&start_tv,0);
pthread_mutex_lock(a->block_ready_mut);
while(a->wait_count <2) {
pthread_cond_wait(a->block_ready_cond,a->block_ready_mut);
if(!*(a->still_open)) {
pthread_mutex_unlock(a->block_ready_mut);
break;
}
while(!*(a->in_tree)) {
pthread_cond_signal(a->in_block_needed_cond);
pthread_cond_wait(a->in_block_ready_cond,a->block_ready_mut);
}
gettimeofday(&wait_tv,0);
recordid * oldTreeA = &a->wait_queue[0];
recordid * oldTreeB = &a->wait_queue[1];
xid = Tbegin();
recordid tree = TlsmCreate(xid, PAGELAYOUT::cmp_id(),a->pageAlloc,
a->pageAllocState,PAGELAYOUT::FMT::TUP::sizeofBytes());
ITERA taBegin(oldtree);
ITERB tbBegin(**a->in_tree);
// XXX keep in_tree handle around so that it can be freed below.
free(*a->in_tree); // free's copy of handle; not tree
*a->in_tree = 0; // free slot for producer
pthread_cond_signal(a->in_block_needed_cond);
pthread_mutex_unlock(a->block_ready_mut);
recordid tree = TlsmCreate(-1, a->comparator_idx,a->pageAlloc,a->pageAllocState,a->rowsize);
ITERA taBegin(*oldTreeA,*(a->scratchA),a->rowsize);
ITERB tbBegin(*oldTreeB,*(a->scratchB),a->rowsize);
ITERA *taEnd = taBegin.end();
ITERB *tbEnd = tbBegin.end();
@ -143,28 +158,43 @@ namespace rose {
mergeIterator<ITERA, ITERB, typename PAGELAYOUT::FMT::TUP>
mEnd(taBegin, tbBegin, *taEnd, *tbEnd);
mEnd.seekEnd();
uint64_t insertedTuples;
pageid_t mergedPages = compressData<PAGELAYOUT,mergeIterator<ITERA,ITERB,typename PAGELAYOUT::FMT::TUP> >
(&mBegin, &mEnd,tree,a->pageAlloc,a->pageAllocState,&insertedTuples);
delete taEnd;
delete tbEnd;
gettimeofday(&stop_tv,0);
pthread_mutex_lock(a->block_ready_mut);
a->mergedPages = mergedPages;
// TlsmFree(wait_queue[0]) /// XXX Need to implement (de)allocation!
// TlsmFree(wait_queue[1])
memcpy(&a->wait_queue[0],&tree,sizeof(tree));
for(int i = 1; i + 1 < a->wait_count; i++) {
memcpy(&a->wait_queue[i],&a->wait_queue[i+1],sizeof(tree));
pthread_mutex_lock(a->block_ready_mut);
static int threshold_calc = 1000; // XXX REALLY NEED TO FIX THIS!
if(a->out_tree && // is there a upstream merger (note the lack of the * on a->out_tree)?
mergedPages > threshold_calc // do we have enough data to bother it?
) {
while(*a->out_tree) { // we probably don't need the "while..."
pthread_cond_wait(a->out_block_needed_cond, a->block_ready_mut);
}
// XXX C++? Objects? Constructors? Who needs them?
*a->out_tree = (recordid*)malloc(sizeof(tree));
**a->out_tree = tree;
pthread_cond_signal(a->out_block_ready_cond);
// This is a bit wasteful; allocate a new empty tree to merge against.
// We don't want to ever look at the one we just handed upstream...
// We could wait for an in tree to be ready, and then pass it directly
// to compress data (to avoid all those merging comparisons...)
tree = TlsmCreate(xid, PAGELAYOUT::cmp_id(),a->pageAlloc,
a->pageAllocState,PAGELAYOUT::FMT::TUP::sizeofBytes());
}
a->wait_count--;
pthread_mutex_unlock(a->block_ready_mut);
merge_count++;
@ -172,83 +202,20 @@ namespace rose {
double wait_elapsed = tv_to_double(wait_tv) - tv_to_double(start_tv);
double work_elapsed = tv_to_double(stop_tv) - tv_to_double(wait_tv);
double total_elapsed = wait_elapsed + work_elapsed;
double ratio = ((double)(insertedTuples * (uint64_t)a->rowsize))
double ratio = ((double)(insertedTuples * (uint64_t)PAGELAYOUT::FMT::TUP::sizeofBytes()))
/ (double)(PAGE_SIZE * mergedPages);
double throughput = ((double)(insertedTuples * (uint64_t)a->rowsize))
double throughput = ((double)(insertedTuples * (uint64_t)PAGELAYOUT::FMT::TUP::sizeofBytes()))
/ (1024.0 * 1024.0 * total_elapsed);
printf("merge # %-6d: comp ratio: %-9.3f waited %6.1f sec "
"worked %6.1f sec inserts %-12ld (%9.3f mb/s)\n", merge_count, ratio,
wait_elapsed, work_elapsed, (unsigned long)insertedTuples, throughput);
pthread_cond_signal(a->block_needed_cond);
Tcommit(xid);
}
return 0;
}
/*
template<class PAGELAYOUT, class ITER>
void* insertThread(void* arg) {
new_insert_args<PAGELAYOUT> * a = (new_insert_args<PAGELAYOUT>*)arg;
struct timeval start_tv, start_wait_tv, stop_tv;
int insert_count = 0;
pageid_t lastTreeBlocks = 0;
uint64_t lastTreeInserts = 0;
pageid_t desiredInserts = 0;
// this is a hand-tuned value; it should be set dynamically, not staticly
double K = 0.18;
// loop around here to produce multiple batches for merge.
while(1) {
gettimeofday(&start_tv,0);
// XXX this needs to be an iterator over an in-memory tree.
ITER i(*(a->begin));
ITER j(desiredInserts ? *(a->begin) : *(a->end));
if(desiredInserts) {
j += desiredInserts;
}
recordid tree = TlsmCreate(-1, a->comparator_idx,a->rowsize);
lastTreeBlocks =
compressData<PAGELAYOUT,PAGELAYOUT::init_page,ITER>
(&i, &j,1,tree,a->pageAlloc,a->pageAllocState, &lastTreeInserts);
gettimeofday(&start_wait_tv,0);
pthread_mutex_lock(a->block_ready_mut);
while(a->wait_count >= a->max_waiters) {
pthread_cond_wait(a->block_needed_cond,a->block_ready_mut);
}
memcpy(&a->wait_queue[a->wait_count],&tree,sizeof(recordid));
a->wait_count++;
pthread_cond_signal(a->block_ready_cond);
gettimeofday(&stop_tv,0);
double work_elapsed = tv_to_double(start_wait_tv) - tv_to_double(start_tv);
double wait_elapsed = tv_to_double(stop_tv) - tv_to_double(start_wait_tv);
double elapsed = tv_to_double(stop_tv) - tv_to_double(start_tv);
printf("insert# %-6d waited %6.1f sec "
"worked %6.1f sec inserts %-12ld (%9.3f mb/s)\n",
++insert_count,
wait_elapsed,
work_elapsed,
(long int)lastTreeInserts,
(lastTreeInserts*(uint64_t)a->rowsize / (1024.0*1024.0)) / elapsed);
if(a->mergedPages != -1) {
desiredInserts = (pageid_t)(((double)a->mergedPages / K)
* ((double)lastTreeInserts
/ (double)lastTreeBlocks));
}
pthread_mutex_unlock(a->block_ready_mut);
}
return 0;
}
*/
typedef struct {
recordid bigTree;
recordid bigTreeAllocState; // this is probably the head of an arraylist of regions used by the tree...
@ -258,6 +225,7 @@ namespace rose {
epoch_t end;
} lsmTableHeader_t;
template<class PAGELAYOUT>
inline recordid TlsmTableAlloc(int xid) {
@ -281,30 +249,107 @@ namespace rose {
Tset(xid, ret, &h);
return ret;
}
/// XXX start should return a struct that contains these!
pthread_t merge1_thread;
pthread_t merge2_thread;
bool * still_open;
template<class PAGELAYOUT>
void TlsmTableStart(recordid tree) {
/// XXX xid for daemon processes?
void * (*merger)(void*) = mergeThread
<PAGELAYOUT,
treeIterator<typename PAGELAYOUT::FMT::TUP, typename PAGELAYOUT::FMT>,
treeIterator<typename PAGELAYOUT::FMT::TUP, typename PAGELAYOUT::FMT> >;
/*mergeThread
<PAGELAYOUT,
treeIterator<typename PAGELAYOUT::FMT::TUP, typename PAGELAYOUT::FMT>,
stlSetIterator<typename std::set<typename PAGELAYOUT::FMT::TUP,
typename PAGELAYOUT::FMT::TUP::stl_cmp>::iterator,
typename PAGELAYOUT::FMT::TUP> >
(0); */
lsmTableHeader_t h;
Tread(-1, tree, &h);
typedef treeIterator<typename PAGELAYOUT::FMT::TUP,
typename PAGELAYOUT::FMT> LSM_ITER;
typedef stlSetIterator<typename std::set<typename PAGELAYOUT::FMT::TUP,
typename PAGELAYOUT::FMT::TUP::stl_cmp>,
typename PAGELAYOUT::FMT::TUP> RB_ITER;
pthread_mutex_t * block_ready_mut =
(pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
pthread_cond_t * block0_needed_cond =
(pthread_cond_t*)malloc(sizeof(pthread_cond_t));
pthread_cond_t * block1_needed_cond =
(pthread_cond_t*)malloc(sizeof(pthread_cond_t));
pthread_cond_t * block2_needed_cond =
(pthread_cond_t*)malloc(sizeof(pthread_cond_t));
pthread_cond_t * block0_ready_cond =
(pthread_cond_t*)malloc(sizeof(pthread_cond_t));
pthread_cond_t * block1_ready_cond =
(pthread_cond_t*)malloc(sizeof(pthread_cond_t));
pthread_cond_t * block2_ready_cond =
(pthread_cond_t*)malloc(sizeof(pthread_cond_t));
pthread_mutex_init(block_ready_mut,0);
pthread_cond_init(block0_needed_cond,0);
pthread_cond_init(block1_needed_cond,0);
pthread_cond_init(block2_needed_cond,0);
pthread_cond_init(block0_ready_cond,0);
pthread_cond_init(block1_ready_cond,0);
pthread_cond_init(block2_ready_cond,0);
typename LSM_ITER::handle * block1_scratch =
(typename LSM_ITER::handle*) malloc(sizeof(typename LSM_ITER::handle));
still_open = (bool*)malloc(sizeof(bool));
*still_open = 1;
recordid * ridp = (recordid*)malloc(sizeof(recordid));
*ridp = h.bigTreeAllocState;
recordid ** block1_scratch_p = (recordid**)malloc(sizeof(block1_scratch));
*block1_scratch_p = block1_scratch;
merge_args<PAGELAYOUT, LSM_ITER, LSM_ITER> * args1 = (merge_args<PAGELAYOUT,LSM_ITER,LSM_ITER>*)malloc(sizeof(merge_args<PAGELAYOUT,LSM_ITER,LSM_ITER>));
merge_args<PAGELAYOUT, LSM_ITER, LSM_ITER> tmpargs1 =
{
TlsmRegionAllocRid,
ridp,
block_ready_mut,
block1_needed_cond,
block2_needed_cond,
block1_ready_cond,
block2_ready_cond,
still_open,
0,
block1_scratch_p
};
*args1 = tmpargs1;
void * (*merger1)(void*) = mergeThread
<PAGELAYOUT, LSM_ITER, LSM_ITER>;
ridp = (recordid*)malloc(sizeof(recordid));
*ridp = h.mediumTreeAllocState;
merge_args<PAGELAYOUT, LSM_ITER, RB_ITER> * args2 = (merge_args<PAGELAYOUT,LSM_ITER,RB_ITER>*)malloc(sizeof(merge_args<PAGELAYOUT,LSM_ITER,RB_ITER>));
merge_args<PAGELAYOUT, LSM_ITER, RB_ITER> tmpargs2 =
{
TlsmRegionAllocRid,
ridp,
block_ready_mut,
block0_needed_cond,
block1_needed_cond,
block0_ready_cond,
block1_ready_cond,
still_open,
block1_scratch_p,
0 // XXX how does this thing get fed new trees of tuples?
};
*args2 = tmpargs2;
void * (*merger2)(void*) = mergeThread
<PAGELAYOUT, LSM_ITER, RB_ITER>;
pthread_create(&merge1_thread, 0, merger1, args1);
pthread_create(&merge2_thread, 0, merger2, args2);
}
template<class PAGELAYOUT>
void TlsmTableStop(recordid tree) {
*still_open = 0;
pthread_join(merge1_thread,0);
pthread_join(merge2_thread,0);
}
}