No more leaks; "fixed" RB tree memory usage estimates.
This commit is contained in:
parent
5a6ba6ed94
commit
e46dcce461
6 changed files with 143 additions and 39 deletions
|
@ -38,8 +38,11 @@ namespace rose {
|
|||
typename PAGELAYOUT::FMT::TUP t;
|
||||
|
||||
|
||||
static const long INSERTS = 1000000;
|
||||
long int count = INSERTS / 20;
|
||||
static const long INSERTS = 10000000;
|
||||
// static const long INSERTS = 10000000;
|
||||
// static const long INSERTS = 100000;
|
||||
static const long COUNT = INSERTS / 100;
|
||||
long int count = COUNT;
|
||||
|
||||
struct timeval start_tv, now_tv;
|
||||
double start, now, last_start;
|
||||
|
@ -48,12 +51,17 @@ namespace rose {
|
|||
start = rose::tv_to_double(start_tv);
|
||||
last_start = start;
|
||||
|
||||
printf("tuple 'size'%d\n", PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||
|
||||
for(long int i = 0; i < INSERTS; i++) {
|
||||
t.set0(&i);
|
||||
t.set1(&i);
|
||||
t.set2(&i);
|
||||
t.set3(&i);
|
||||
TlsmTableInsert(h,t);
|
||||
count --;
|
||||
if(!count) {
|
||||
count = INSERTS / 20;
|
||||
count = COUNT;
|
||||
gettimeofday(&now_tv,0);
|
||||
now = tv_to_double(now_tv);
|
||||
printf("%3d%% complete "
|
||||
|
|
|
@ -38,7 +38,6 @@ template <class STLITER, class ROW> class stlSetIterator;
|
|||
template <class STLITER, class ROW>
|
||||
inline const byte * toByteArray(stlSetIterator<STLITER,ROW> * const t);
|
||||
|
||||
|
||||
/**
|
||||
Scans through an LSM tree's leaf pages, each tuple in the tree, in
|
||||
order. This iterator is designed for maximum forward scan
|
||||
|
@ -70,7 +69,23 @@ class treeIterator {
|
|||
{
|
||||
init_helper();
|
||||
}
|
||||
typedef recordid handle;
|
||||
// typedef recordid handle;
|
||||
class treeIteratorHandle {
|
||||
public:
|
||||
treeIteratorHandle() : r_(NULLRID) {}
|
||||
treeIteratorHandle(const recordid r) : r_(r) {}
|
||||
/* const treeIteratorHandle & operator=(const recordid *r) {
|
||||
r_ = *r;
|
||||
return this;
|
||||
} */
|
||||
treeIteratorHandle * operator=(const recordid &r) {
|
||||
r_ = r;
|
||||
return this;
|
||||
}
|
||||
|
||||
recordid r_;
|
||||
};
|
||||
typedef treeIteratorHandle* handle;
|
||||
explicit treeIterator(recordid tree) :
|
||||
tree_(tree),
|
||||
scratch_(),
|
||||
|
@ -80,6 +95,15 @@ class treeIterator {
|
|||
{
|
||||
init_helper();
|
||||
}
|
||||
explicit treeIterator(treeIteratorHandle* tree) :
|
||||
tree_(tree->r_),
|
||||
scratch_(),
|
||||
keylen_(ROW::sizeofBytes()),
|
||||
lsmIterator_(lsmTreeIterator_open(-1,tree->r_)),
|
||||
slot_(0)
|
||||
{
|
||||
init_helper();
|
||||
}
|
||||
explicit treeIterator(treeIterator& t) :
|
||||
tree_(t.tree_),
|
||||
scratch_(t.scratch_),
|
||||
|
|
|
@ -19,9 +19,14 @@ namespace rose {
|
|||
dispatched), interface to the underlying primititves
|
||||
*/
|
||||
|
||||
// Lower total work by perfomrming one merge at higher level
|
||||
// for every FUDGE^2 merges at the immediately lower level.
|
||||
// (Constrast to R, which controls the ratio of sizes of the trees.)
|
||||
static const int FUDGE = 1;
|
||||
|
||||
template<class PAGELAYOUT, class ITERA, class ITERB>
|
||||
struct merge_args {
|
||||
int worker_id;
|
||||
pageid_t(*pageAlloc)(int,void*);
|
||||
void *pageAllocState;
|
||||
pthread_mutex_t * block_ready_mut;
|
||||
|
@ -30,6 +35,10 @@ namespace rose {
|
|||
pthread_cond_t * in_block_ready_cond;
|
||||
pthread_cond_t * out_block_ready_cond;
|
||||
bool * still_open;
|
||||
pageid_t * my_tree_size;
|
||||
pageid_t * out_tree_size;
|
||||
pageid_t max_size;
|
||||
pageid_t r_i;
|
||||
typename ITERA::handle in_process_tree;
|
||||
typename ITERB::handle ** in_tree;
|
||||
typename ITERA::handle ** out_tree;
|
||||
|
@ -110,8 +119,10 @@ namespace rose {
|
|||
|
||||
// Initialize tree with an empty tree.
|
||||
// XXX hardcodes ITERA's type:
|
||||
typename ITERA::handle oldtree = TlsmCreate(xid, PAGELAYOUT::cmp_id(),a->pageAlloc,
|
||||
a->pageAllocState,PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||
typename ITERA::handle oldtree
|
||||
= new typename ITERA::treeIteratorHandle(
|
||||
TlsmCreate(xid, PAGELAYOUT::cmp_id(),a->pageAlloc,
|
||||
a->pageAllocState,PAGELAYOUT::FMT::TUP::sizeofBytes()) );
|
||||
|
||||
Tcommit(xid);
|
||||
// loop around here to produce multiple batches for merge.
|
||||
|
@ -142,20 +153,12 @@ namespace rose {
|
|||
|
||||
gettimeofday(&wait_tv,0);
|
||||
|
||||
// XXX keep in_tree handle around so that it can be freed below.
|
||||
|
||||
typename ITERB::handle old_in_tree = **a->in_tree;
|
||||
|
||||
ITERA taBegin(oldtree);
|
||||
ITERB tbBegin(**a->in_tree);
|
||||
|
||||
ITERA *taEnd = taBegin.end();
|
||||
ITERB *tbEnd = tbBegin.end();
|
||||
|
||||
free(*a->in_tree); // free's copy of handle; not tree
|
||||
*a->in_tree = 0; // free slot for producer
|
||||
|
||||
pthread_cond_signal(a->in_block_needed_cond);
|
||||
|
||||
pthread_mutex_unlock(a->block_ready_mut);
|
||||
|
||||
|
@ -180,6 +183,7 @@ namespace rose {
|
|||
delete taEnd;
|
||||
delete tbEnd;
|
||||
|
||||
|
||||
gettimeofday(&stop_tv,0);
|
||||
|
||||
// TlsmFree(wait_queue[0]) /// XXX Need to implement (de)allocation!
|
||||
|
@ -195,26 +199,47 @@ namespace rose {
|
|||
double throughput = ((double)(insertedTuples * (uint64_t)PAGELAYOUT::FMT::TUP::sizeofBytes()))
|
||||
/ (1024.0 * 1024.0 * total_elapsed);
|
||||
|
||||
printf("merge # %-6d: comp ratio: %-9.3f waited %6.1f sec "
|
||||
"worked %6.1f sec inserts %-12ld (%9.3f mb/s) %6d pages\n", merge_count, ratio,
|
||||
wait_elapsed, work_elapsed, (unsigned long)insertedTuples, throughput, mergedPages);
|
||||
printf("worker %d merge # %-6d: comp ratio: %-9.3f waited %6.1f sec "
|
||||
"worked %6.1f sec inserts %-12ld (%9.3f mb/s) %6d pages (need %6d)\n", a->worker_id, merge_count, ratio,
|
||||
wait_elapsed, work_elapsed, (unsigned long)insertedTuples, throughput, mergedPages, !a->out_tree_size ? -1 : (FUDGE * *a->out_tree_size / a->r_i));
|
||||
|
||||
|
||||
gettimeofday(&start_tv,0);
|
||||
|
||||
pthread_mutex_lock(a->block_ready_mut);
|
||||
|
||||
static int threshold_calc = 1000; // XXX REALLY NEED TO FIX THIS!
|
||||
if(a->out_tree && // is there a upstream merger (note the lack of the * on a->out_tree)?
|
||||
mergedPages > threshold_calc // do we have enough data to bother it?
|
||||
// keep actual handle around so that it can be freed below.
|
||||
typename ITERB::handle old_in_tree = **a->in_tree;
|
||||
delete old_in_tree;
|
||||
free(*a->in_tree); // free pointer to handle
|
||||
|
||||
// XXX should we delay this to this point?
|
||||
// otherwise, the contents of in_tree become temporarily unavailable to observers.
|
||||
*a->in_tree = 0; // tell producer that the slot is now open
|
||||
|
||||
pthread_cond_signal(a->in_block_needed_cond);
|
||||
|
||||
|
||||
if(a->out_tree_size) {
|
||||
*a->my_tree_size = *a->out_tree_size / (a->r_i * FUDGE);
|
||||
} else {
|
||||
if(*a->my_tree_size < mergedPages) {
|
||||
*a->my_tree_size = mergedPages;
|
||||
}
|
||||
}
|
||||
|
||||
if(a->out_tree && // is there a upstream merger? (note the lack of the * on a->out_tree)
|
||||
((a->max_size && mergedPages > a->max_size )
|
||||
||
|
||||
mergedPages > (FUDGE * *a->out_tree_size / a->r_i)) // do we have enough data to bother it?
|
||||
) {
|
||||
while(*a->out_tree) { // we probably don't need the "while..."
|
||||
pthread_cond_wait(a->out_block_needed_cond, a->block_ready_mut);
|
||||
}
|
||||
|
||||
// XXX C++? Objects? Constructors? Who needs them?
|
||||
*a->out_tree = (recordid*)malloc(sizeof(tree));
|
||||
**a->out_tree = tree;
|
||||
*a->out_tree = (typeof(*a->out_tree))malloc(sizeof(**a->out_tree));
|
||||
**a->out_tree = new typename ITERA::treeIteratorHandle(tree);
|
||||
pthread_cond_signal(a->out_block_ready_cond);
|
||||
|
||||
// This is a bit wasteful; allocate a new empty tree to merge against.
|
||||
|
@ -228,7 +253,7 @@ namespace rose {
|
|||
|
||||
// XXX TlsmFree(xid,oldtree);
|
||||
|
||||
oldtree = tree;
|
||||
*oldtree = tree;
|
||||
|
||||
pthread_mutex_unlock(a->block_ready_mut);
|
||||
|
||||
|
@ -287,6 +312,7 @@ namespace rose {
|
|||
pthread_mutex_t * mut;
|
||||
pthread_cond_t * input_ready_cond;
|
||||
pthread_cond_t * input_needed_cond;
|
||||
pageid_t * input_size;
|
||||
merge_args<PAGELAYOUT, treeIterator<typename PAGELAYOUT::FMT::TUP,
|
||||
typename PAGELAYOUT::FMT>, treeIterator<typename PAGELAYOUT::FMT::TUP,
|
||||
typename PAGELAYOUT::FMT> > * args1;
|
||||
|
@ -296,6 +322,15 @@ namespace rose {
|
|||
typename PAGELAYOUT::FMT::TUP> > * args2;
|
||||
};
|
||||
|
||||
// How many bytes of tuples can we afford to keep in RAM?
|
||||
// this is just a guessed value... it seems about right based on
|
||||
// experiments, but 450 bytes overhead per tuple is insane!
|
||||
static const int RB_TREE_OVERHEAD = 450;
|
||||
static const pageid_t MEM_SIZE = 800 * 1000 * 1000;
|
||||
// How many pages should we try to fill with the first C1 merge?
|
||||
static const pageid_t START_SIZE = 10 * 1000;
|
||||
static const int R = 40;
|
||||
|
||||
template<class PAGELAYOUT>
|
||||
lsmTableHandle <PAGELAYOUT> * TlsmTableStart(recordid& tree) {
|
||||
/// XXX xid for daemon processes?
|
||||
|
@ -335,6 +370,13 @@ namespace rose {
|
|||
pthread_cond_init(block1_ready_cond,0);
|
||||
pthread_cond_init(block2_ready_cond,0);
|
||||
|
||||
pageid_t * block0_size = (pageid_t*)malloc(sizeof(pageid_t));
|
||||
// don't merge until we have enough data to be worthwhile...
|
||||
*block0_size = START_SIZE;
|
||||
pageid_t * block1_size = (pageid_t*)malloc(sizeof(pageid_t));
|
||||
// similarly, wait to merge the next block until we have merged block FUDGE times.
|
||||
*block1_size = FUDGE * R * *block0_size;
|
||||
|
||||
LSM_HANDLE ** block1_scratch = (LSM_HANDLE**) malloc(sizeof(LSM_HANDLE*));
|
||||
*block1_scratch = 0;
|
||||
|
||||
|
@ -357,7 +399,7 @@ namespace rose {
|
|||
|
||||
ret->input_ready_cond = block0_ready_cond;
|
||||
ret->input_needed_cond = block0_needed_cond;
|
||||
|
||||
ret->input_size = block0_size;
|
||||
|
||||
recordid * ridp = (recordid*)malloc(sizeof(recordid));
|
||||
*ridp = h.bigTreeAllocState;
|
||||
|
@ -365,6 +407,7 @@ namespace rose {
|
|||
ret->args1 = (merge_args<PAGELAYOUT,LSM_ITER,LSM_ITER>*)malloc(sizeof(merge_args<PAGELAYOUT,LSM_ITER,LSM_ITER>));
|
||||
merge_args<PAGELAYOUT, LSM_ITER, LSM_ITER> tmpargs1 =
|
||||
{
|
||||
1,
|
||||
TlsmRegionAllocRid,
|
||||
ridp,
|
||||
block_ready_mut,
|
||||
|
@ -373,7 +416,11 @@ namespace rose {
|
|||
block1_ready_cond,
|
||||
block2_ready_cond,
|
||||
ret->still_open,
|
||||
NULLRID,
|
||||
block1_size,
|
||||
0, // biggest component computes its size directly.
|
||||
0, // No max size for biggest component
|
||||
R,
|
||||
new typename LSM_ITER::treeIteratorHandle(NULLRID),
|
||||
block1_scratch,
|
||||
0
|
||||
};
|
||||
|
@ -386,6 +433,7 @@ namespace rose {
|
|||
ret->args2 = (merge_args<PAGELAYOUT,LSM_ITER,RB_ITER>*)malloc(sizeof(merge_args<PAGELAYOUT,LSM_ITER,RB_ITER>));
|
||||
merge_args<PAGELAYOUT, LSM_ITER, RB_ITER> tmpargs2 =
|
||||
{
|
||||
2,
|
||||
TlsmRegionAllocRid,
|
||||
ridp,
|
||||
block_ready_mut,
|
||||
|
@ -394,7 +442,11 @@ namespace rose {
|
|||
block0_ready_cond,
|
||||
block1_ready_cond,
|
||||
ret->still_open,
|
||||
NULLRID,
|
||||
block0_size,
|
||||
block1_size,
|
||||
(R * MEM_SIZE) / (PAGE_SIZE * 4), // 4 = estimated compression ratio
|
||||
R,
|
||||
new typename LSM_ITER::treeIteratorHandle(NULLRID),
|
||||
block0_scratch,
|
||||
block1_scratch
|
||||
};
|
||||
|
@ -407,16 +459,7 @@ namespace rose {
|
|||
return ret;
|
||||
}
|
||||
template<class PAGELAYOUT>
|
||||
void TlsmTableStop( lsmTableHandle<PAGELAYOUT> * h) {
|
||||
*(h->still_open) = 0;
|
||||
pthread_join(h->merge1_thread,0);
|
||||
pthread_join(h->merge2_thread,0);
|
||||
}
|
||||
template<class PAGELAYOUT>
|
||||
void TlsmTableInsert( lsmTableHandle<PAGELAYOUT> *h,
|
||||
typename PAGELAYOUT::FMT::TUP &t) {
|
||||
h->scratch_handle->insert(t);
|
||||
if(h->scratch_handle->size() > 100000) { // XXX set threshold sanely!!!
|
||||
void TlsmTableFlush(lsmTableHandle<PAGELAYOUT> *h) {
|
||||
pthread_mutex_lock(h->mut);
|
||||
|
||||
while(*h->input_handle) {
|
||||
|
@ -429,10 +472,32 @@ namespace rose {
|
|||
*(h->input_handle) = tmp_ptr;
|
||||
|
||||
pthread_cond_signal(h->input_ready_cond);
|
||||
|
||||
h->scratch_handle = new typeof(*h->scratch_handle);
|
||||
|
||||
pthread_mutex_unlock(h->mut);
|
||||
|
||||
}
|
||||
template<class PAGELAYOUT>
|
||||
void TlsmTableStop( lsmTableHandle<PAGELAYOUT> * h) {
|
||||
TlsmTableFlush(h);
|
||||
*(h->still_open) = 0;
|
||||
pthread_join(h->merge1_thread,0);
|
||||
pthread_join(h->merge2_thread,0);
|
||||
}
|
||||
template<class PAGELAYOUT>
|
||||
void TlsmTableInsert( lsmTableHandle<PAGELAYOUT> *h,
|
||||
typename PAGELAYOUT::FMT::TUP &t) {
|
||||
h->scratch_handle->insert(t);
|
||||
//XXX 4 = estimated compression ratio.
|
||||
|
||||
uint64_t handleBytes = h->scratch_handle->size() * (RB_TREE_OVERHEAD + PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||
uint64_t inputSizeThresh = (4 * PAGE_SIZE * *h->input_size); // / (PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||
uint64_t memSizeThresh = MEM_SIZE;
|
||||
|
||||
if(handleBytes > inputSizeThresh || handleBytes > memSizeThresh) { // XXX ok?
|
||||
printf("Handle mbytes %ld Input size: %ld input size thresh: %ld mbytes mem size thresh: %ld\n",
|
||||
handleBytes / (1024*1024), *h->input_size, inputSizeThresh / (1024*1024), memSizeThresh / (1024*1024));
|
||||
TlsmTableFlush<PAGELAYOUT>(h);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -253,6 +253,8 @@ recordid TlsmCreate(int xid, int comparator,
|
|||
writeNodeRecord(xid, p, DEPTH, dummy, keySize, 0);
|
||||
writeNodeRecord(xid, p, COMPARATOR, dummy, keySize, comparator);
|
||||
|
||||
free(dummy);
|
||||
|
||||
unlock(p->rwlatch);
|
||||
releasePage(p);
|
||||
return ret;
|
||||
|
|
|
@ -28,7 +28,7 @@ namespace rose {
|
|||
|
||||
plugin_id_t pluginid = plugin_id<FORMAT, COMPRESSOR, typename COMPRESSOR::TYP>();
|
||||
|
||||
plugin_id_t * plugins = new plugin_id_t[column_count];
|
||||
plugin_id_t * plugins = (plugin_id_t*)malloc(column_count * sizeof(plugin_id_t));
|
||||
for(column_number_t c = 0; c < column_count; c++) {
|
||||
plugins[c] = pluginid;
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ namespace rose {
|
|||
typename COMPRESSOR::TYP val = *(typename COMPRESSOR::TYP*)(t->get(c));
|
||||
com->offset(val);
|
||||
}
|
||||
free(plugins);
|
||||
return f;
|
||||
}
|
||||
static inline int cmp_id() {
|
||||
|
|
|
@ -44,7 +44,11 @@ void dirtyPages_remove(Page * p) {
|
|||
//assert(pblHtLookup(dirtyPages, &(p->id), sizeof(int)));
|
||||
// printf("With lsn = %d\n", (lsn_t)pblHtCurrent(dirtyPages));
|
||||
p->dirty = 0;
|
||||
lsn_t * old = pblHtLookup(dirtyPages, &(p->id),sizeof(int));
|
||||
pblHtRemove(dirtyPages, &(p->id), sizeof(int));
|
||||
if(old) {
|
||||
free(old);
|
||||
}
|
||||
//assert(!ret); <--- Due to a bug in the PBL compatibility mode,
|
||||
//there is no way to tell whether the value didn't exist, or if it
|
||||
//was null.
|
||||
|
|
Loading…
Reference in a new issue