2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
#include <math.h>
|
|
|
|
#include "merger.h"
|
2010-03-05 19:07:47 +00:00
|
|
|
|
2010-03-17 21:51:26 +00:00
|
|
|
|
|
|
|
#include <stasis/transactional.h>
|
|
|
|
#undef try
|
|
|
|
#undef end
|
|
|
|
|
|
|
|
int merge_scheduler::addlogtable(logtable<datatuple> *ltable)
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
struct logtable_mergedata * mdata = new logtable_mergedata;
|
|
|
|
|
|
|
|
// initialize merge data
|
2010-02-18 23:31:57 +00:00
|
|
|
ltable->set_tree_c0_mergeable(NULL);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
mergedata.push_back(std::make_pair(ltable, mdata));
|
|
|
|
return mergedata.size()-1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
merge_scheduler::~merge_scheduler()
|
|
|
|
{
|
|
|
|
mergedata.clear();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void merge_scheduler::shutdown()
|
|
|
|
{
|
|
|
|
//signal shutdown
|
2010-01-27 23:34:33 +00:00
|
|
|
for(size_t i=0; i<mergedata.size(); i++)
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
2010-03-17 21:51:26 +00:00
|
|
|
logtable<datatuple> *ltable = mergedata[i].first;
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-02-17 23:38:31 +00:00
|
|
|
ltable->stop();
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2010-01-27 23:34:33 +00:00
|
|
|
for(size_t i=0; i<mergedata.size(); i++)
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
|
|
|
logtable_mergedata *mdata = mergedata[i].second;
|
|
|
|
|
|
|
|
pthread_join(mdata->memmerge_thread,0);
|
|
|
|
pthread_join(mdata->diskmerge_thread,0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-10 21:49:50 +00:00
|
|
|
void merge_scheduler::startlogtable(int index, int64_t MAX_C0_SIZE)
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
2010-02-10 21:49:50 +00:00
|
|
|
|
2010-03-17 21:51:26 +00:00
|
|
|
logtable<datatuple> * ltable = mergedata[index].first;
|
2010-01-23 02:13:59 +00:00
|
|
|
struct logtable_mergedata *mdata = mergedata[index].second;
|
|
|
|
|
|
|
|
//initialize rb-tree
|
2010-03-09 01:42:23 +00:00
|
|
|
ltable->set_tree_c0(new memTreeComponent<datatuple>::rbtree_t);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
//disk merger args
|
2010-08-05 17:43:46 +00:00
|
|
|
#ifdef NO_SNOWSHOVEL
|
2010-05-12 22:16:41 +00:00
|
|
|
ltable->set_max_c0_size(MAX_C0_SIZE);
|
2010-08-05 17:43:46 +00:00
|
|
|
#else
|
|
|
|
ltable->set_max_c0_size(MAX_C0_SIZE*2); // XXX blatant hack.
|
|
|
|
#endif
|
2010-02-19 00:59:14 +00:00
|
|
|
diskTreeComponent ** block1_scratch = new diskTreeComponent*;
|
2010-01-23 02:13:59 +00:00
|
|
|
*block1_scratch=0;
|
|
|
|
|
2010-02-17 23:38:31 +00:00
|
|
|
DEBUG("Tree C1 is %lld\n", (long long)ltable->get_tree_c1()->get_root_rec().page);
|
|
|
|
DEBUG("Tree C2 is %lld\n", (long long)ltable->get_tree_c2()->get_root_rec().page);
|
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
void * (*diskmerger)(void*) = diskMergeThread;
|
|
|
|
void * (*memmerger)(void*) = memMergeThread;
|
|
|
|
|
2010-06-21 20:03:05 +00:00
|
|
|
pthread_create(&mdata->diskmerge_thread, 0, diskmerger, ltable);
|
|
|
|
pthread_create(&mdata->memmerge_thread, 0, memmerger, ltable);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2010-03-05 19:07:47 +00:00
|
|
|
template <class ITA, class ITB>
|
2010-05-19 23:42:06 +00:00
|
|
|
void merge_iterators(int xid, diskTreeComponent * forceMe,
|
2010-03-05 19:07:47 +00:00
|
|
|
ITA *itrA,
|
|
|
|
ITB *itrB,
|
2010-03-17 21:51:26 +00:00
|
|
|
logtable<datatuple> *ltable,
|
2010-03-13 00:05:06 +00:00
|
|
|
diskTreeComponent *scratch_tree,
|
2010-06-05 00:41:52 +00:00
|
|
|
mergeStats * stats,
|
2010-03-05 19:07:47 +00:00
|
|
|
bool dropDeletes);
|
|
|
|
|
|
|
|
|
2010-02-18 23:31:57 +00:00
|
|
|
/**
|
2010-02-25 01:29:32 +00:00
|
|
|
* Merge algorithm: Outsider's view
|
2010-02-18 23:31:57 +00:00
|
|
|
*<pre>
|
|
|
|
1: while(1)
|
|
|
|
2: wait for c0_mergable
|
|
|
|
3: begin
|
2010-02-25 01:29:32 +00:00
|
|
|
4: merge c0_mergable and c1 into c1' # Blocks; tree must be consistent at this point
|
2010-04-28 21:29:15 +00:00
|
|
|
5: force c1' # Blocks
|
2010-02-25 01:29:32 +00:00
|
|
|
6: if c1' is too big # Blocks; tree must be consistent at this point.
|
|
|
|
7: c1_mergable = c1'
|
2010-02-18 23:31:57 +00:00
|
|
|
8: c1 = new_empty
|
2010-02-25 01:29:32 +00:00
|
|
|
8.5: delete old c1_mergeable # Happens in other thread (not here)
|
|
|
|
9: else
|
|
|
|
10: c1 = c1'
|
|
|
|
11: c0_mergeable = NULL
|
|
|
|
11.5: delete old c0_mergeable
|
|
|
|
12: delete old c1
|
|
|
|
13: commit
|
2010-02-18 23:31:57 +00:00
|
|
|
</pre>
|
2010-02-25 01:29:32 +00:00
|
|
|
Merge algorithm: actual order: 1 2 3 4 5 6 12 11.5 11 [7 8 (9) 10] 13
|
2010-02-18 23:31:57 +00:00
|
|
|
*/
|
2010-01-23 02:13:59 +00:00
|
|
|
void* memMergeThread(void*arg)
|
|
|
|
{
|
|
|
|
|
2010-03-05 19:07:47 +00:00
|
|
|
int xid;
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-06-21 20:03:05 +00:00
|
|
|
logtable<datatuple> * ltable = (logtable<datatuple>*)arg;
|
2010-02-18 23:31:57 +00:00
|
|
|
assert(ltable->get_tree_c1());
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
int merge_count =0;
|
2010-06-21 20:03:05 +00:00
|
|
|
mergeStats * stats = ltable->merge_mgr->get_merge_stats(1);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-02-25 01:29:32 +00:00
|
|
|
while(true) // 1
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_writelock(ltable->header_mut);
|
2010-06-02 21:47:58 +00:00
|
|
|
ltable->merge_mgr->new_merge(1);
|
2010-01-23 02:13:59 +00:00
|
|
|
int done = 0;
|
2010-02-25 01:29:32 +00:00
|
|
|
// 2: wait for c0_mergable
|
2010-08-05 17:43:46 +00:00
|
|
|
#ifdef NO_SNOWSHOVEL
|
2010-02-18 23:31:57 +00:00
|
|
|
while(!ltable->get_tree_c0_mergeable())
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
2010-05-19 23:42:06 +00:00
|
|
|
pthread_cond_signal(<able->c0_needed);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-02-17 23:38:31 +00:00
|
|
|
if(!ltable->is_still_running()){
|
2010-01-23 02:13:59 +00:00
|
|
|
done = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2010-03-05 19:07:47 +00:00
|
|
|
DEBUG("mmt:\twaiting for block ready cond\n");
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_cond_wait(<able->c0_ready, ltable->header_mut);
|
2010-05-19 23:42:06 +00:00
|
|
|
|
2010-03-05 19:07:47 +00:00
|
|
|
DEBUG("mmt:\tblock ready\n");
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-08-05 17:43:46 +00:00
|
|
|
}
|
|
|
|
#else
|
2010-08-21 03:09:18 +00:00
|
|
|
// the merge iterator will wait until c0 is big enough for us to proceed.
|
2010-08-05 17:43:46 +00:00
|
|
|
if(!ltable->is_still_running()) {
|
|
|
|
done = 1;
|
|
|
|
}
|
|
|
|
#endif
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
if(done==1)
|
|
|
|
{
|
2010-05-19 23:42:06 +00:00
|
|
|
pthread_cond_signal(<able->c1_ready); // no block is ready. this allows the other thread to wake up, and see that we're shutting down.
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_unlock(ltable->header_mut);
|
2010-01-23 02:13:59 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2010-04-29 23:13:04 +00:00
|
|
|
stats->starting_merge();
|
2010-03-05 19:07:47 +00:00
|
|
|
|
2010-02-18 23:31:57 +00:00
|
|
|
// 3: Begin transaction
|
|
|
|
xid = Tbegin();
|
|
|
|
|
|
|
|
// 4: Merge
|
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
//create the iterators
|
2010-03-17 21:51:26 +00:00
|
|
|
diskTreeComponent::iterator *itrA = ltable->get_tree_c1()->open_iterator();
|
2010-08-05 17:43:46 +00:00
|
|
|
#ifdef NO_SNOWSHOVEL
|
2010-03-09 19:02:54 +00:00
|
|
|
memTreeComponent<datatuple>::iterator *itrB =
|
|
|
|
new memTreeComponent<datatuple>::iterator(ltable->get_tree_c0_mergeable());
|
2010-08-05 17:43:46 +00:00
|
|
|
#else
|
2010-08-18 17:29:25 +00:00
|
|
|
// memTreeComponent<datatuple>::revalidatingIterator *itrB =
|
|
|
|
// new memTreeComponent<datatuple>::revalidatingIterator(ltable->get_tree_c0(), <able->rb_mut);
|
2010-08-21 03:09:18 +00:00
|
|
|
// memTreeComponent<datatuple>::batchedRevalidatingIterator *itrB =
|
|
|
|
// new memTreeComponent<datatuple>::batchedRevalidatingIterator(ltable->get_tree_c0(), <able->tree_bytes, ltable->max_c0_size, <able->flushing, 100, <able->rb_mut);
|
2010-08-05 17:43:46 +00:00
|
|
|
#endif
|
2010-10-03 23:05:45 +00:00
|
|
|
const int64_t min_bloom_target = 1000000000;
|
2010-01-23 02:13:59 +00:00
|
|
|
//create a new tree
|
2010-10-05 16:55:29 +00:00
|
|
|
diskTreeComponent * c1_prime = new diskTreeComponent(xid, ltable->internal_region_size, ltable->datapage_region_size, ltable->datapage_size, stats, (stats->target_size < min_bloom_target ? min_bloom_target : stats->target_size) / 100);
|
2010-02-15 23:02:01 +00:00
|
|
|
|
2010-08-05 17:43:46 +00:00
|
|
|
ltable->set_tree_c1_prime(c1_prime);
|
|
|
|
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_unlock(ltable->header_mut);
|
2010-08-21 03:09:18 +00:00
|
|
|
#ifndef NO_SNOWSHOVEL
|
|
|
|
// needs to be past the rwlc_unlock...
|
|
|
|
memTreeComponent<datatuple>::batchedRevalidatingIterator *itrB =
|
|
|
|
new memTreeComponent<datatuple>::batchedRevalidatingIterator(ltable->get_tree_c0(), <able->tree_bytes, ltable->max_c0_size, <able->flushing, 100, <able->rb_mut);
|
|
|
|
#endif
|
2010-02-15 23:02:01 +00:00
|
|
|
//: do the merge
|
2010-03-05 19:07:47 +00:00
|
|
|
DEBUG("mmt:\tMerging:\n");
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-05-19 23:42:06 +00:00
|
|
|
merge_iterators<typeof(*itrA),typeof(*itrB)>(xid, c1_prime, itrA, itrB, ltable, c1_prime, stats, false);
|
2010-02-15 23:02:01 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
delete itrA;
|
|
|
|
delete itrB;
|
2010-02-15 23:02:01 +00:00
|
|
|
|
2010-02-18 23:31:57 +00:00
|
|
|
// 5: force c1'
|
|
|
|
|
2010-05-28 01:29:10 +00:00
|
|
|
rwlc_writelock(ltable->header_mut);
|
|
|
|
|
2010-03-13 00:05:06 +00:00
|
|
|
//force write the new tree to disk
|
|
|
|
c1_prime->force(xid);
|
2010-02-18 23:31:57 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
merge_count++;
|
2010-04-29 01:03:56 +00:00
|
|
|
DEBUG("mmt:\tmerge_count %lld #bytes written %lld\n", stats.merge_count, stats.output_size());
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-05-19 23:42:06 +00:00
|
|
|
// Immediately clean out c0 mergeable so that writers may continue.
|
2010-02-25 01:29:32 +00:00
|
|
|
|
2010-05-19 23:42:06 +00:00
|
|
|
// first, we need to move the c1' into c1.
|
|
|
|
|
|
|
|
// 12: delete old c1
|
|
|
|
ltable->get_tree_c1()->dealloc(xid);
|
|
|
|
delete ltable->get_tree_c1();
|
|
|
|
|
|
|
|
// 10: c1 = c1'
|
|
|
|
ltable->set_tree_c1(c1_prime);
|
2010-08-05 17:43:46 +00:00
|
|
|
ltable->set_tree_c1_prime(0);
|
2010-05-19 23:42:06 +00:00
|
|
|
|
2010-08-05 17:43:46 +00:00
|
|
|
#ifdef NO_SNOWSHOVEL
|
2010-05-19 23:42:06 +00:00
|
|
|
// 11.5: delete old c0_mergeable
|
|
|
|
memTreeComponent<datatuple>::tearDownTree(ltable->get_tree_c0_mergeable());
|
|
|
|
// 11: c0_mergeable = NULL
|
|
|
|
ltable->set_tree_c0_mergeable(NULL);
|
2010-08-05 17:43:46 +00:00
|
|
|
#endif
|
|
|
|
ltable->set_c0_is_merging(false);
|
2010-05-21 23:43:17 +00:00
|
|
|
double new_c1_size = stats->output_size();
|
2010-05-19 23:42:06 +00:00
|
|
|
pthread_cond_signal(<able->c0_needed);
|
|
|
|
|
2010-06-21 20:03:05 +00:00
|
|
|
ltable->update_persistent_header(xid, 1);
|
2010-05-19 23:42:06 +00:00
|
|
|
Tcommit(xid);
|
2010-02-25 01:29:32 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
//TODO: this is simplistic for now
|
2010-02-25 01:29:32 +00:00
|
|
|
//6: if c1' is too big, signal the other merger
|
2010-08-21 03:09:18 +00:00
|
|
|
|
|
|
|
// update c0 effective size.
|
|
|
|
double frac = 1.0/(double)merge_count;
|
|
|
|
ltable->num_c0_mergers = merge_count;
|
|
|
|
ltable->mean_c0_effective_size =
|
|
|
|
(int64_t) (
|
|
|
|
((double)ltable->mean_c0_effective_size)*(1-frac) +
|
|
|
|
((double)stats->bytes_in_small*frac));
|
|
|
|
ltable->merge_mgr->get_merge_stats(0)->target_size = ltable->mean_c0_effective_size;
|
2010-05-21 23:43:17 +00:00
|
|
|
double target_R = *ltable->R();
|
2010-08-21 03:09:18 +00:00
|
|
|
|
|
|
|
printf("Merge done. R = %f MemSize = %lld Mean = %lld, This = %lld, Count = %d factor %3.3fcur%3.3favg\n", target_R, (long long)ltable->max_c0_size, (long long int)ltable->mean_c0_effective_size, stats->bytes_in_small, merge_count, ((double)stats->bytes_in_small) / (double)ltable->max_c0_size, ((double)ltable->mean_c0_effective_size) / (double)ltable->max_c0_size);
|
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
assert(target_R >= MIN_R);
|
2010-08-21 03:09:18 +00:00
|
|
|
bool signal_c2 = (new_c1_size / ltable->mean_c0_effective_size > target_R);
|
2010-05-21 23:43:17 +00:00
|
|
|
DEBUG("\nc1 size %f R %f\n", new_c1_size, target_R);
|
2010-02-25 01:29:32 +00:00
|
|
|
if( signal_c2 )
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
2010-04-28 21:29:15 +00:00
|
|
|
DEBUG("mmt:\tsignaling C2 for merge\n");
|
2010-03-05 19:07:47 +00:00
|
|
|
DEBUG("mmt:\tnew_c1_size %.2f\tMAX_C0_SIZE %lld\ta->max_size %lld\t targetr %.2f \n", new_c1_size,
|
2010-02-10 21:49:50 +00:00
|
|
|
ltable->max_c0_size, a->max_size, target_R);
|
2010-02-18 23:31:57 +00:00
|
|
|
|
2010-05-19 23:42:06 +00:00
|
|
|
// XXX need to report backpressure here!
|
2010-02-18 23:31:57 +00:00
|
|
|
while(ltable->get_tree_c1_mergeable()) {
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_cond_wait(<able->c1_needed, ltable->header_mut);
|
2010-01-23 02:13:59 +00:00
|
|
|
}
|
2010-02-25 01:29:32 +00:00
|
|
|
|
2010-05-19 23:42:06 +00:00
|
|
|
xid = Tbegin();
|
2010-02-25 01:29:32 +00:00
|
|
|
|
2010-05-19 23:42:06 +00:00
|
|
|
// we just set c1 = c1'. Want to move c1 -> c1 mergeable, clean out c1.
|
2010-02-25 01:29:32 +00:00
|
|
|
|
2010-04-28 21:29:15 +00:00
|
|
|
// 7: and perhaps c1_mergeable
|
2010-08-05 17:43:46 +00:00
|
|
|
ltable->set_tree_c1_mergeable(ltable->get_tree_c1()); // c1_prime == c1.
|
2010-05-26 00:58:17 +00:00
|
|
|
stats->handed_off_tree();
|
2010-02-25 01:29:32 +00:00
|
|
|
|
2010-04-28 21:29:15 +00:00
|
|
|
// 8: c1 = new empty.
|
2010-05-19 23:42:06 +00:00
|
|
|
ltable->set_tree_c1(new diskTreeComponent(xid, ltable->internal_region_size, ltable->datapage_region_size, ltable->datapage_size, stats));
|
|
|
|
|
|
|
|
pthread_cond_signal(<able->c1_ready);
|
2010-06-21 20:03:05 +00:00
|
|
|
pageid_t old_bytes_out = stats->bytes_out;
|
|
|
|
stats->bytes_out = 0; // XXX HACK
|
|
|
|
ltable->update_persistent_header(xid, 1);
|
|
|
|
stats->bytes_out = old_bytes_out;
|
2010-05-19 23:42:06 +00:00
|
|
|
Tcommit(xid);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2010-05-19 23:42:06 +00:00
|
|
|
// DEBUG("mmt:\tUpdated C1's position on disk to %lld\n",ltable->get_tree_c1()->get_root_rec().page);
|
2010-02-25 01:29:32 +00:00
|
|
|
// 13
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_unlock(ltable->header_mut);
|
2010-08-24 00:40:48 +00:00
|
|
|
|
|
|
|
ltable->merge_mgr->finished_merge(1);
|
2010-05-19 23:42:06 +00:00
|
|
|
// stats->pretty_print(stdout);
|
2010-03-05 19:07:47 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
//TODO: get the freeing outside of the lock
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2010-01-28 02:20:49 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
void *diskMergeThread(void*arg)
|
|
|
|
{
|
2010-04-28 21:29:15 +00:00
|
|
|
int xid;
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-06-21 20:03:05 +00:00
|
|
|
logtable<datatuple> * ltable = (logtable<datatuple>*)arg;
|
2010-02-18 23:31:57 +00:00
|
|
|
assert(ltable->get_tree_c2());
|
2010-03-05 19:07:47 +00:00
|
|
|
|
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
int merge_count =0;
|
2010-06-21 20:03:05 +00:00
|
|
|
mergeStats * stats = ltable->merge_mgr->get_merge_stats(2);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
while(true)
|
|
|
|
{
|
2010-04-28 19:21:25 +00:00
|
|
|
|
2010-03-24 20:30:35 +00:00
|
|
|
// 2: wait for input
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_writelock(ltable->header_mut);
|
2010-06-02 21:47:58 +00:00
|
|
|
ltable->merge_mgr->new_merge(2);
|
2010-01-23 02:13:59 +00:00
|
|
|
int done = 0;
|
|
|
|
// get a new input for merge
|
2010-02-18 23:31:57 +00:00
|
|
|
while(!ltable->get_tree_c1_mergeable())
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
2010-05-19 23:42:06 +00:00
|
|
|
pthread_cond_signal(<able->c1_needed);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-02-17 23:38:31 +00:00
|
|
|
if(!ltable->is_still_running()){
|
2010-01-23 02:13:59 +00:00
|
|
|
done = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2010-03-05 19:07:47 +00:00
|
|
|
DEBUG("dmt:\twaiting for block ready cond\n");
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_cond_wait(<able->c1_ready, ltable->header_mut);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-03-05 19:07:47 +00:00
|
|
|
DEBUG("dmt:\tblock ready\n");
|
2010-01-23 02:13:59 +00:00
|
|
|
}
|
|
|
|
if(done==1)
|
|
|
|
{
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_unlock(ltable->header_mut);
|
2010-01-23 02:13:59 +00:00
|
|
|
break;
|
|
|
|
}
|
2010-03-05 19:07:47 +00:00
|
|
|
|
2010-04-29 23:13:04 +00:00
|
|
|
stats->starting_merge();
|
2010-03-05 19:07:47 +00:00
|
|
|
|
2010-04-28 21:29:15 +00:00
|
|
|
// 3: begin
|
2010-02-25 01:29:32 +00:00
|
|
|
xid = Tbegin();
|
|
|
|
|
|
|
|
// 4: do the merge.
|
2010-01-23 02:13:59 +00:00
|
|
|
//create the iterators
|
2010-04-28 19:21:25 +00:00
|
|
|
diskTreeComponent::iterator *itrA = ltable->get_tree_c2()->open_iterator();
|
2010-08-30 22:22:25 +00:00
|
|
|
#ifdef NO_SNOWSHOVEL
|
2010-03-17 21:51:26 +00:00
|
|
|
diskTreeComponent::iterator *itrB = ltable->get_tree_c1_mergeable()->open_iterator();
|
2010-08-30 22:22:25 +00:00
|
|
|
#else
|
|
|
|
diskTreeComponent::iterator *itrB = ltable->get_tree_c1_mergeable()->open_iterator(<able->merge_mgr->cur_c1_c2_progress_delta, 0.05, 0 /*XXX*/);
|
|
|
|
#endif
|
2010-02-25 01:29:32 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
//create a new tree
|
2010-05-19 23:42:06 +00:00
|
|
|
diskTreeComponent * c2_prime = new diskTreeComponent(xid, ltable->internal_region_size, ltable->datapage_region_size, ltable->datapage_size, stats);
|
2010-02-17 23:38:31 +00:00
|
|
|
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_unlock(ltable->header_mut);
|
2010-04-28 21:29:15 +00:00
|
|
|
|
|
|
|
//do the merge
|
2010-03-05 19:07:47 +00:00
|
|
|
DEBUG("dmt:\tMerging:\n");
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-05-21 23:43:17 +00:00
|
|
|
merge_iterators<typeof(*itrA),typeof(*itrB)>(xid, c2_prime, itrA, itrB, ltable, c2_prime, stats, true);
|
2010-04-28 21:29:15 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
delete itrA;
|
2010-04-28 21:29:15 +00:00
|
|
|
delete itrB;
|
2010-02-18 23:31:57 +00:00
|
|
|
|
2010-02-25 01:29:32 +00:00
|
|
|
//5: force write the new region to disk
|
2010-03-13 00:05:06 +00:00
|
|
|
c2_prime->force(xid);
|
2010-02-18 23:31:57 +00:00
|
|
|
|
2010-02-25 01:29:32 +00:00
|
|
|
// (skip 6, 7, 8, 8.5, 9))
|
2010-02-18 23:31:57 +00:00
|
|
|
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_writelock(ltable->header_mut);
|
2010-02-25 01:29:32 +00:00
|
|
|
//12
|
2010-03-13 00:05:06 +00:00
|
|
|
ltable->get_tree_c2()->dealloc(xid);
|
2010-02-18 23:31:57 +00:00
|
|
|
delete ltable->get_tree_c2();
|
2010-02-25 01:29:32 +00:00
|
|
|
//11.5
|
2010-03-13 00:05:06 +00:00
|
|
|
ltable->get_tree_c1_mergeable()->dealloc(xid);
|
2010-02-25 01:29:32 +00:00
|
|
|
//11
|
|
|
|
delete ltable->get_tree_c1_mergeable();
|
|
|
|
ltable->set_tree_c1_mergeable(0);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
|
|
|
//writes complete
|
2010-02-17 22:11:22 +00:00
|
|
|
//now atomically replace the old c2 with new c2
|
2010-01-23 02:13:59 +00:00
|
|
|
//pthread_mutex_lock(a->block_ready_mut);
|
2010-02-25 01:29:32 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
merge_count++;
|
|
|
|
//update the current optimal R value
|
2010-08-21 03:09:18 +00:00
|
|
|
*(ltable->R()) = std::max(MIN_R, sqrt( ((double)stats->output_size()) / ((double)ltable->mean_c0_effective_size) ) );
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-05-21 23:43:17 +00:00
|
|
|
DEBUG("\nR = %f\n", *(ltable->R()));
|
|
|
|
|
2010-04-29 01:03:56 +00:00
|
|
|
DEBUG("dmt:\tmerge_count %lld\t#written bytes: %lld\n optimal r %.2f", stats.merge_count, stats.output_size(), *(a->r_i));
|
2010-08-05 17:43:46 +00:00
|
|
|
// 10: C2 is never too big
|
2010-02-18 23:31:57 +00:00
|
|
|
ltable->set_tree_c2(c2_prime);
|
2010-05-21 23:43:17 +00:00
|
|
|
stats->handed_off_tree();
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-03-05 19:07:47 +00:00
|
|
|
DEBUG("dmt:\tUpdated C2's position on disk to %lld\n",(long long)-1);
|
2010-02-25 01:29:32 +00:00
|
|
|
// 13
|
2010-06-21 20:03:05 +00:00
|
|
|
ltable->update_persistent_header(xid, 2);
|
2010-01-23 02:13:59 +00:00
|
|
|
Tcommit(xid);
|
2010-05-19 23:42:06 +00:00
|
|
|
|
2010-05-27 01:49:27 +00:00
|
|
|
rwlc_unlock(ltable->header_mut);
|
2010-05-19 23:42:06 +00:00
|
|
|
// stats->pretty_print(stdout);
|
2010-08-24 00:40:48 +00:00
|
|
|
ltable->merge_mgr->finished_merge(2);
|
|
|
|
|
2010-03-05 19:07:47 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-05-27 23:15:24 +00:00
|
|
|
static void periodically_force(int xid, int *i, diskTreeComponent * forceMe, stasis_log_t * log) {
|
2010-06-18 23:00:23 +00:00
|
|
|
if(*i > mergeManager::FORCE_INTERVAL) {
|
2010-05-27 23:15:24 +00:00
|
|
|
if(forceMe) forceMe->force(xid);
|
|
|
|
log->force_tail(log, LOG_FORCE_WAL);
|
|
|
|
*i = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-18 17:29:25 +00:00
|
|
|
static int garbage_collect(logtable<datatuple> * ltable, datatuple ** garbage, int garbage_len, int next_garbage, bool force = false) {
|
|
|
|
if(next_garbage == garbage_len || force) {
|
|
|
|
pthread_mutex_lock(<able->rb_mut);
|
|
|
|
for(int i = 0; i < next_garbage; i++) {
|
|
|
|
datatuple * t2tmp = NULL;
|
|
|
|
{
|
|
|
|
memTreeComponent<datatuple>::rbtree_t::iterator rbitr = ltable->get_tree_c0()->find(garbage[i]);
|
|
|
|
if(rbitr != ltable->get_tree_c0()->end()) {
|
|
|
|
t2tmp = *rbitr;
|
|
|
|
if((t2tmp->datalen() == garbage[i]->datalen()) &&
|
|
|
|
!memcmp(t2tmp->data(), garbage[i]->data(), garbage[i]->datalen())) {
|
|
|
|
// they match, delete t2tmp
|
|
|
|
} else {
|
|
|
|
t2tmp = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} // close rbitr before touching the tree.
|
|
|
|
if(t2tmp) {
|
|
|
|
ltable->get_tree_c0()->erase(garbage[i]);
|
|
|
|
ltable->tree_bytes -= garbage[i]->byte_length();
|
|
|
|
datatuple::freetuple(t2tmp);
|
|
|
|
}
|
|
|
|
datatuple::freetuple(garbage[i]);
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(<able->rb_mut);
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
return next_garbage;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-28 02:20:49 +00:00
|
|
|
template <class ITA, class ITB>
|
2010-03-05 19:07:47 +00:00
|
|
|
void merge_iterators(int xid,
|
2010-05-19 23:42:06 +00:00
|
|
|
diskTreeComponent * forceMe,
|
2010-01-28 02:20:49 +00:00
|
|
|
ITA *itrA, //iterator on c1 or c2
|
|
|
|
ITB *itrB, //iterator on c0 or c1, respectively
|
2010-03-17 21:51:26 +00:00
|
|
|
logtable<datatuple> *ltable,
|
2010-06-05 00:41:52 +00:00
|
|
|
diskTreeComponent *scratch_tree, mergeStats * stats,
|
2010-01-28 02:20:49 +00:00
|
|
|
bool dropDeletes // should be true iff this is biggest component
|
|
|
|
)
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
2010-05-26 00:58:17 +00:00
|
|
|
stasis_log_t * log = (stasis_log_t*)stasis_log();
|
|
|
|
|
2010-05-28 01:29:10 +00:00
|
|
|
datatuple *t1 = itrA->next_callerFrees();
|
2010-06-05 00:41:52 +00:00
|
|
|
ltable->merge_mgr->read_tuple_from_large_component(stats->merge_level, t1);
|
2010-01-23 02:13:59 +00:00
|
|
|
datatuple *t2 = 0;
|
2010-04-28 21:29:15 +00:00
|
|
|
|
2010-08-18 17:29:25 +00:00
|
|
|
int garbage_len = 100;
|
|
|
|
int next_garbage = 0;
|
|
|
|
datatuple ** garbage = (datatuple**)malloc(sizeof(garbage[0]) * garbage_len);
|
|
|
|
|
2010-05-19 23:42:06 +00:00
|
|
|
int i = 0;
|
|
|
|
|
2010-03-09 19:02:54 +00:00
|
|
|
while( (t2=itrB->next_callerFrees()) != 0)
|
2010-04-28 21:29:15 +00:00
|
|
|
{
|
2010-06-02 21:47:58 +00:00
|
|
|
ltable->merge_mgr->read_tuple_from_small_component(stats->merge_level, t2);
|
2010-03-05 19:07:47 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
DEBUG("tuple\t%lld: keylen %d datalen %d\n",
|
2010-04-28 21:29:15 +00:00
|
|
|
ntuples, *(t2->keylen),*(t2->datalen) );
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-02-23 17:05:47 +00:00
|
|
|
while(t1 != 0 && datatuple::compare(t1->key(), t1->keylen(), t2->key(), t2->keylen()) < 0) // t1 is less than t2
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
|
|
|
//insert t1
|
2010-04-29 00:57:48 +00:00
|
|
|
scratch_tree->insertTuple(xid, t1);
|
2010-05-27 23:15:24 +00:00
|
|
|
i+=t1->byte_length();
|
2010-06-02 21:47:58 +00:00
|
|
|
ltable->merge_mgr->wrote_tuple(stats->merge_level, t1);
|
2010-02-10 21:49:50 +00:00
|
|
|
datatuple::freetuple(t1);
|
2010-08-23 23:28:29 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
//advance itrA
|
2010-03-09 19:02:54 +00:00
|
|
|
t1 = itrA->next_callerFrees();
|
2010-08-23 23:28:29 +00:00
|
|
|
ltable->merge_mgr->read_tuple_from_large_component(stats->merge_level, t1);
|
|
|
|
|
2010-05-27 23:15:24 +00:00
|
|
|
periodically_force(xid, &i, forceMe, log);
|
2010-01-23 02:13:59 +00:00
|
|
|
}
|
|
|
|
|
2010-02-23 17:05:47 +00:00
|
|
|
if(t1 != 0 && datatuple::compare(t1->key(), t1->keylen(), t2->key(), t2->keylen()) == 0)
|
2010-01-23 02:13:59 +00:00
|
|
|
{
|
|
|
|
datatuple *mtuple = ltable->gettuplemerger()->merge(t1,t2);
|
2010-05-12 22:16:41 +00:00
|
|
|
stats->merged_tuples(mtuple, t2, t1); // this looks backwards, but is right.
|
2010-04-28 21:29:15 +00:00
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
//insert merged tuple, drop deletes
|
2010-03-13 00:05:06 +00:00
|
|
|
if(dropDeletes && !mtuple->isDelete()) {
|
2010-04-29 00:57:48 +00:00
|
|
|
scratch_tree->insertTuple(xid, mtuple);
|
2010-05-27 23:15:24 +00:00
|
|
|
i+=mtuple->byte_length();
|
2010-03-13 00:05:06 +00:00
|
|
|
}
|
2010-02-10 21:49:50 +00:00
|
|
|
datatuple::freetuple(t1);
|
2010-06-02 21:47:58 +00:00
|
|
|
ltable->merge_mgr->wrote_tuple(stats->merge_level, mtuple);
|
2010-03-09 19:02:54 +00:00
|
|
|
t1 = itrA->next_callerFrees(); //advance itrA
|
2010-08-23 23:28:29 +00:00
|
|
|
ltable->merge_mgr->read_tuple_from_large_component(stats->merge_level, t1);
|
2010-02-10 21:49:50 +00:00
|
|
|
datatuple::freetuple(mtuple);
|
2010-05-28 01:29:10 +00:00
|
|
|
periodically_force(xid, &i, forceMe, log);
|
2010-01-23 02:13:59 +00:00
|
|
|
}
|
|
|
|
else
|
2010-04-28 21:29:15 +00:00
|
|
|
{
|
2010-01-23 02:13:59 +00:00
|
|
|
//insert t2
|
2010-04-29 00:57:48 +00:00
|
|
|
scratch_tree->insertTuple(xid, t2);
|
2010-05-27 23:15:24 +00:00
|
|
|
i+=t2->byte_length();
|
2010-05-26 00:58:17 +00:00
|
|
|
|
2010-06-02 21:47:58 +00:00
|
|
|
ltable->merge_mgr->wrote_tuple(stats->merge_level, t2);
|
2010-05-28 01:29:10 +00:00
|
|
|
periodically_force(xid, &i, forceMe, log);
|
2010-01-28 02:20:49 +00:00
|
|
|
// cannot free any tuples here; they may still be read through a lookup
|
2010-01-23 02:13:59 +00:00
|
|
|
}
|
2010-08-05 17:43:46 +00:00
|
|
|
#ifndef NO_SNOWSHOVEL
|
2010-08-18 17:29:25 +00:00
|
|
|
if(stats->merge_level == 1) {
|
|
|
|
next_garbage = garbage_collect(ltable, garbage, garbage_len, next_garbage);
|
|
|
|
garbage[next_garbage] = t2;
|
|
|
|
next_garbage++;
|
|
|
|
}
|
|
|
|
#if 0
|
2010-08-05 17:43:46 +00:00
|
|
|
pthread_mutex_lock(<able->rb_mut);
|
|
|
|
if(stats->merge_level == 1) {
|
|
|
|
datatuple * t2tmp = NULL;
|
|
|
|
{
|
|
|
|
memTreeComponent<datatuple>::rbtree_t::iterator rbitr = ltable->get_tree_c0()->find(t2);
|
|
|
|
if(rbitr != ltable->get_tree_c0()->end()) {
|
|
|
|
t2tmp = *rbitr;
|
|
|
|
if((t2tmp->datalen() == t2->datalen()) &&
|
|
|
|
!memcmp(t2tmp->data(), t2->data(), t2->datalen())) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(t2tmp) {
|
|
|
|
ltable->get_tree_c0()->erase(t2);
|
|
|
|
ltable->tree_bytes -= t2->byte_length();
|
|
|
|
datatuple::freetuple(t2tmp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(<able->rb_mut);
|
|
|
|
#endif
|
2010-08-18 17:29:25 +00:00
|
|
|
if(stats->merge_level != 1) {
|
|
|
|
datatuple::freetuple(t2);
|
|
|
|
}
|
|
|
|
#else
|
2010-02-10 21:49:50 +00:00
|
|
|
datatuple::freetuple(t2);
|
2010-08-18 17:29:25 +00:00
|
|
|
#endif
|
|
|
|
|
2010-01-23 02:13:59 +00:00
|
|
|
}
|
|
|
|
|
2010-03-13 00:05:06 +00:00
|
|
|
while(t1 != 0) {// t1 is less than t2
|
2010-04-29 00:57:48 +00:00
|
|
|
scratch_tree->insertTuple(xid, t1);
|
2010-06-02 21:47:58 +00:00
|
|
|
ltable->merge_mgr->wrote_tuple(stats->merge_level, t1);
|
2010-05-27 23:15:24 +00:00
|
|
|
i += t1->byte_length();
|
2010-04-28 21:29:15 +00:00
|
|
|
datatuple::freetuple(t1);
|
2010-01-23 02:13:59 +00:00
|
|
|
|
2010-04-28 21:29:15 +00:00
|
|
|
//advance itrA
|
|
|
|
t1 = itrA->next_callerFrees();
|
2010-06-05 00:41:52 +00:00
|
|
|
ltable->merge_mgr->read_tuple_from_large_component(stats->merge_level, t1);
|
2010-05-27 23:15:24 +00:00
|
|
|
periodically_force(xid, &i, forceMe, log);
|
2010-03-13 00:05:06 +00:00
|
|
|
}
|
2010-01-23 02:13:59 +00:00
|
|
|
DEBUG("dpages: %d\tnpages: %d\tntuples: %d\n", dpages, npages, ntuples);
|
|
|
|
|
2010-08-18 17:29:25 +00:00
|
|
|
next_garbage = garbage_collect(ltable, garbage, garbage_len, next_garbage, true);
|
|
|
|
free(garbage);
|
|
|
|
|
2010-03-13 00:05:06 +00:00
|
|
|
scratch_tree->writes_done();
|
2010-01-23 02:13:59 +00:00
|
|
|
}
|