Refactoring of logtable. Cleaned a bunch of includes, and logtable is now a template. This is in preparation for the new merge policy
git-svn-id: svn+ssh://svn.corp.yahoo.com/yahoo/yrl/labs/pnuts/code/logstore@698 8dad8b1f-cf64-0410-95b6-bcf113ffbcfe
This commit is contained in:
parent
6cd1ccb5ff
commit
e1c937a602
21 changed files with 433 additions and 420 deletions
|
@ -1,5 +1,7 @@
|
||||||
#include "logstore.h"
|
#include "logstore.h"
|
||||||
#include "datapage.h"
|
#include "datapage.h"
|
||||||
|
#include "regionAllocator.h"
|
||||||
|
|
||||||
#include <stasis/page.h>
|
#include <stasis/page.h>
|
||||||
|
|
||||||
static const int DATA_PAGE = USER_DEFINED_PAGE(1);
|
static const int DATA_PAGE = USER_DEFINED_PAGE(1);
|
||||||
|
|
|
@ -5,7 +5,8 @@
|
||||||
|
|
||||||
#include <stasis/page.h>
|
#include <stasis/page.h>
|
||||||
#include <stasis/constants.h>
|
#include <stasis/constants.h>
|
||||||
#include "regionAllocator.h"
|
|
||||||
|
struct RegionAllocator;
|
||||||
|
|
||||||
//#define CHECK_FOR_SCRIBBLING
|
//#define CHECK_FOR_SCRIBBLING
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include "merger.h"
|
#include "merger.h"
|
||||||
#include "diskTreeComponent.h"
|
#include "diskTreeComponent.h"
|
||||||
|
#include "regionAllocator.h"
|
||||||
|
|
||||||
#include <stasis/transactional.h>
|
#include <stasis/transactional.h>
|
||||||
#include <stasis/page.h>
|
#include <stasis/page.h>
|
||||||
|
@ -46,6 +47,27 @@ void diskTreeComponent::internalNodes::init_stasis() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
recordid diskTreeComponent::get_root_rid() { return ltree->get_root_rec(); }
|
||||||
|
recordid diskTreeComponent::get_datapage_allocator_rid() { return ltree->get_datapage_alloc()->header_rid(); }
|
||||||
|
recordid diskTreeComponent::get_internal_node_allocator_rid() { return ltree->get_internal_node_alloc()->header_rid(); }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void diskTreeComponent::force(int xid) {
|
||||||
|
ltree->get_datapage_alloc()->force_regions(xid);
|
||||||
|
ltree->get_internal_node_alloc()->force_regions(xid);
|
||||||
|
}
|
||||||
|
void diskTreeComponent::dealloc(int xid) {
|
||||||
|
ltree->get_datapage_alloc()->dealloc_regions(xid);
|
||||||
|
ltree->get_internal_node_alloc()->dealloc_regions(xid);
|
||||||
|
}
|
||||||
|
void diskTreeComponent::list_regions(int xid, pageid_t *internal_node_region_length, pageid_t *internal_node_region_count, pageid_t **internal_node_regions,
|
||||||
|
pageid_t *datapage_region_length, pageid_t *datapage_region_count, pageid_t **datapage_regions) {
|
||||||
|
*internal_node_regions = ltree->get_internal_node_alloc()->list_regions(xid, internal_node_region_length, internal_node_region_count);
|
||||||
|
*datapage_regions = ltree->get_datapage_alloc() ->list_regions(xid, datapage_region_length, datapage_region_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void diskTreeComponent::writes_done() {
|
void diskTreeComponent::writes_done() {
|
||||||
if(dp) {
|
if(dp) {
|
||||||
dp->writes_done();
|
dp->writes_done();
|
||||||
|
@ -335,6 +357,20 @@ recordid diskTreeComponent::internalNodes::appendPage(int xid,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
diskTreeComponent::internalNodes::internalNodes(int xid, pageid_t internal_region_size, pageid_t datapage_region_size, pageid_t datapage_size)
|
||||||
|
: lastLeaf(-1),
|
||||||
|
internal_node_alloc(new RegionAllocator(xid, internal_region_size)),
|
||||||
|
datapage_alloc(new RegionAllocator(xid, datapage_region_size))
|
||||||
|
{ create(xid); }
|
||||||
|
|
||||||
|
diskTreeComponent::internalNodes::internalNodes(int xid, recordid root, recordid internal_node_state, recordid datapage_state)
|
||||||
|
: lastLeaf(-1),
|
||||||
|
root_rec(root),
|
||||||
|
internal_node_alloc(new RegionAllocator(xid, internal_node_state)),
|
||||||
|
datapage_alloc(new RegionAllocator(xid, datapage_state))
|
||||||
|
{ }
|
||||||
|
|
||||||
|
|
||||||
/* adding pages:
|
/* adding pages:
|
||||||
|
|
||||||
1) Try to append value to lsmTreeState->lastLeaf
|
1) Try to append value to lsmTreeState->lastLeaf
|
||||||
|
@ -842,7 +878,7 @@ void diskTreeComponent::internalNodes::iterator::close() {
|
||||||
// tree iterator implementation
|
// tree iterator implementation
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void diskTreeComponent::diskTreeIterator::init_iterators(datatuple * key1, datatuple * key2) {
|
void diskTreeComponent::iterator::init_iterators(datatuple * key1, datatuple * key2) {
|
||||||
assert(!key2); // unimplemented
|
assert(!key2); // unimplemented
|
||||||
if(tree_.size == INVALID_SIZE) {
|
if(tree_.size == INVALID_SIZE) {
|
||||||
lsmIterator_ = NULL;
|
lsmIterator_ = NULL;
|
||||||
|
@ -855,14 +891,14 @@ void diskTreeComponent::diskTreeIterator::init_iterators(datatuple * key1, datat
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
diskTreeComponent::diskTreeIterator::diskTreeIterator(diskTreeComponent::internalNodes *tree) :
|
diskTreeComponent::iterator::iterator(diskTreeComponent::internalNodes *tree) :
|
||||||
tree_(tree ? tree->get_root_rec() : NULLRID)
|
tree_(tree ? tree->get_root_rec() : NULLRID)
|
||||||
{
|
{
|
||||||
init_iterators(NULL, NULL);
|
init_iterators(NULL, NULL);
|
||||||
init_helper(NULL);
|
init_helper(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
diskTreeComponent::diskTreeIterator::diskTreeIterator(diskTreeComponent::internalNodes *tree, datatuple* key) :
|
diskTreeComponent::iterator::iterator(diskTreeComponent::internalNodes *tree, datatuple* key) :
|
||||||
tree_(tree ? tree->get_root_rec() : NULLRID)
|
tree_(tree ? tree->get_root_rec() : NULLRID)
|
||||||
{
|
{
|
||||||
init_iterators(key,NULL);
|
init_iterators(key,NULL);
|
||||||
|
@ -870,7 +906,7 @@ diskTreeComponent::diskTreeIterator::diskTreeIterator(diskTreeComponent::interna
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
diskTreeComponent::diskTreeIterator::~diskTreeIterator()
|
diskTreeComponent::iterator::~iterator()
|
||||||
{
|
{
|
||||||
if(lsmIterator_) {
|
if(lsmIterator_) {
|
||||||
lsmIterator_->close();
|
lsmIterator_->close();
|
||||||
|
@ -886,7 +922,7 @@ diskTreeComponent::diskTreeIterator::~diskTreeIterator()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void diskTreeComponent::diskTreeIterator::init_helper(datatuple* key1)
|
void diskTreeComponent::iterator::init_helper(datatuple* key1)
|
||||||
{
|
{
|
||||||
if(!lsmIterator_)
|
if(!lsmIterator_)
|
||||||
{
|
{
|
||||||
|
@ -918,7 +954,7 @@ void diskTreeComponent::diskTreeIterator::init_helper(datatuple* key1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
datatuple * diskTreeComponent::diskTreeIterator::next_callerFrees()
|
datatuple * diskTreeComponent::iterator::next_callerFrees()
|
||||||
{
|
{
|
||||||
if(!this->lsmIterator_) { return NULL; }
|
if(!this->lsmIterator_) { return NULL; }
|
||||||
|
|
||||||
|
|
|
@ -8,25 +8,13 @@
|
||||||
#ifndef DISKTREECOMPONENT_H_
|
#ifndef DISKTREECOMPONENT_H_
|
||||||
#define DISKTREECOMPONENT_H_
|
#define DISKTREECOMPONENT_H_
|
||||||
|
|
||||||
#include <stasis/transactional.h>
|
|
||||||
|
|
||||||
#include <stasis/operations.h>
|
|
||||||
#include <stasis/bufferManager.h>
|
|
||||||
#include <stasis/allocationPolicy.h>
|
|
||||||
#include <stasis/blobManager.h>
|
|
||||||
#include <stasis/page.h>
|
|
||||||
#include <stasis/truncation.h>
|
|
||||||
|
|
||||||
#include "merger.h"
|
|
||||||
#include "regionAllocator.h"
|
|
||||||
#include "datapage.h"
|
#include "datapage.h"
|
||||||
#include "tuplemerger.h"
|
|
||||||
#include "datatuple.h"
|
#include "datatuple.h"
|
||||||
|
|
||||||
class diskTreeComponent {
|
class diskTreeComponent {
|
||||||
public:
|
public:
|
||||||
class internalNodes;
|
class internalNodes;
|
||||||
class diskTreeIterator;
|
class iterator;
|
||||||
|
|
||||||
diskTreeComponent(int xid, pageid_t internal_region_size, pageid_t datapage_region_size, pageid_t datapage_size) :
|
diskTreeComponent(int xid, pageid_t internal_region_size, pageid_t datapage_region_size, pageid_t datapage_size) :
|
||||||
ltree(new diskTreeComponent::internalNodes(xid, internal_region_size, datapage_region_size, datapage_size)),
|
ltree(new diskTreeComponent::internalNodes(xid, internal_region_size, datapage_region_size, datapage_size)),
|
||||||
|
@ -43,35 +31,26 @@ class diskTreeComponent {
|
||||||
delete ltree;
|
delete ltree;
|
||||||
}
|
}
|
||||||
|
|
||||||
recordid get_root_rid() { return ltree->get_root_rec(); }
|
recordid get_root_rid();
|
||||||
recordid get_datapage_allocator_rid() { return ltree->get_datapage_alloc()->header_rid(); }
|
recordid get_datapage_allocator_rid();
|
||||||
recordid get_internal_node_allocator_rid() { return ltree->get_internal_node_alloc()->header_rid(); }
|
recordid get_internal_node_allocator_rid();
|
||||||
internalNodes * get_internal_nodes() { return ltree; }
|
internalNodes * get_internal_nodes() { return ltree; }
|
||||||
datatuple* findTuple(int xid, datatuple::key_t key, size_t keySize);
|
datatuple* findTuple(int xid, datatuple::key_t key, size_t keySize);
|
||||||
int insertTuple(int xid, /*DataPage<datatuple> *dp,*/ datatuple *t, merge_stats_t *stats);
|
int insertTuple(int xid, /*DataPage<datatuple> *dp,*/ datatuple *t, merge_stats_t *stats);
|
||||||
void writes_done();
|
void writes_done();
|
||||||
|
|
||||||
|
|
||||||
diskTreeIterator * iterator() {
|
iterator * open_iterator() {
|
||||||
return new diskTreeIterator(ltree);
|
return new iterator(ltree);
|
||||||
}
|
}
|
||||||
diskTreeIterator * iterator(datatuple * key) {
|
iterator * open_iterator(datatuple * key) {
|
||||||
return new diskTreeIterator(ltree, key);
|
return new iterator(ltree, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
void force(int xid) {
|
void force(int xid);
|
||||||
ltree->get_datapage_alloc()->force_regions(xid);
|
void dealloc(int xid);
|
||||||
ltree->get_internal_node_alloc()->force_regions(xid);
|
|
||||||
}
|
|
||||||
void dealloc(int xid) {
|
|
||||||
ltree->get_datapage_alloc()->dealloc_regions(xid);
|
|
||||||
ltree->get_internal_node_alloc()->dealloc_regions(xid);
|
|
||||||
}
|
|
||||||
void list_regions(int xid, pageid_t *internal_node_region_length, pageid_t *internal_node_region_count, pageid_t **internal_node_regions,
|
void list_regions(int xid, pageid_t *internal_node_region_length, pageid_t *internal_node_region_count, pageid_t **internal_node_regions,
|
||||||
pageid_t *datapage_region_length, pageid_t *datapage_region_count, pageid_t **datapage_regions) {
|
pageid_t *datapage_region_length, pageid_t *datapage_region_count, pageid_t **datapage_regions);
|
||||||
*internal_node_regions = ltree->get_internal_node_alloc()->list_regions(xid, internal_node_region_length, internal_node_region_count);
|
|
||||||
*datapage_regions = ltree->get_datapage_alloc() ->list_regions(xid, datapage_region_length, datapage_region_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_tree(int xid) {
|
void print_tree(int xid) {
|
||||||
ltree->print_tree(xid);
|
ltree->print_tree(xid);
|
||||||
|
@ -93,18 +72,8 @@ class diskTreeComponent {
|
||||||
static void init_stasis();
|
static void init_stasis();
|
||||||
static void deinit_stasis();
|
static void deinit_stasis();
|
||||||
|
|
||||||
internalNodes(int xid, pageid_t internal_region_size, pageid_t datapage_region_size, pageid_t datapage_size)
|
internalNodes(int xid, pageid_t internal_region_size, pageid_t datapage_region_size, pageid_t datapage_size);
|
||||||
: lastLeaf(-1),
|
internalNodes(int xid, recordid root, recordid internal_node_state, recordid datapage_state);
|
||||||
internal_node_alloc(new RegionAllocator(xid, internal_region_size)),
|
|
||||||
datapage_alloc(new RegionAllocator(xid, datapage_region_size))
|
|
||||||
{ create(xid); }
|
|
||||||
|
|
||||||
internalNodes(int xid, recordid root, recordid internal_node_state, recordid datapage_state)
|
|
||||||
: lastLeaf(-1),
|
|
||||||
root_rec(root),
|
|
||||||
internal_node_alloc(new RegionAllocator(xid, internal_node_state)),
|
|
||||||
datapage_alloc(new RegionAllocator(xid, datapage_state))
|
|
||||||
{ }
|
|
||||||
|
|
||||||
void print_tree(int xid);
|
void print_tree(int xid);
|
||||||
|
|
||||||
|
@ -199,15 +168,15 @@ class diskTreeComponent {
|
||||||
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
class diskTreeIterator
|
class iterator
|
||||||
{
|
{
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit diskTreeIterator(diskTreeComponent::internalNodes *tree);
|
explicit iterator(diskTreeComponent::internalNodes *tree);
|
||||||
|
|
||||||
explicit diskTreeIterator(diskTreeComponent::internalNodes *tree,datatuple *key);
|
explicit iterator(diskTreeComponent::internalNodes *tree,datatuple *key);
|
||||||
|
|
||||||
~diskTreeIterator();
|
~iterator();
|
||||||
|
|
||||||
datatuple * next_callerFrees();
|
datatuple * next_callerFrees();
|
||||||
|
|
||||||
|
@ -215,9 +184,9 @@ class diskTreeComponent {
|
||||||
void init_iterators(datatuple * key1, datatuple * key2);
|
void init_iterators(datatuple * key1, datatuple * key2);
|
||||||
inline void init_helper(datatuple * key1);
|
inline void init_helper(datatuple * key1);
|
||||||
|
|
||||||
explicit diskTreeIterator() { abort(); }
|
explicit iterator() { abort(); }
|
||||||
void operator=(diskTreeIterator & t) { abort(); }
|
void operator=(iterator & t) { abort(); }
|
||||||
int operator-(diskTreeIterator & t) { abort(); }
|
int operator-(iterator & t) { abort(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
recordid tree_; //root of the tree
|
recordid tree_; //root of the tree
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
#include "logserver.h"
|
#include "logserver.h"
|
||||||
#include "datatuple.h"
|
#include "datatuple.h"
|
||||||
#include "merger.h"
|
#include "merger.h"
|
||||||
|
@ -27,7 +29,7 @@
|
||||||
|
|
||||||
void *serverLoop(void *args);
|
void *serverLoop(void *args);
|
||||||
|
|
||||||
void logserver::startserver(logtable *ltable)
|
void logserver::startserver(logtable<datatuple> *ltable)
|
||||||
{
|
{
|
||||||
sys_alive = true;
|
sys_alive = true;
|
||||||
this->ltable = ltable;
|
this->ltable = ltable;
|
||||||
|
@ -479,7 +481,7 @@ int op_scan(pthread_data *data, datatuple * tuple, datatuple * tuple2, size_t li
|
||||||
int err = writeoptosocket(*(data->workitem), LOGSTORE_RESPONSE_SENDING_TUPLES);
|
int err = writeoptosocket(*(data->workitem), LOGSTORE_RESPONSE_SENDING_TUPLES);
|
||||||
|
|
||||||
if(!err) {
|
if(!err) {
|
||||||
logtableIterator<datatuple> * itr = new logtableIterator<datatuple>(data->ltable, tuple);
|
logtable<datatuple>::iterator * itr = new logtable<datatuple>::iterator(data->ltable, tuple);
|
||||||
datatuple * t;
|
datatuple * t;
|
||||||
while(!err && (t = itr->getnext())) {
|
while(!err && (t = itr->getnext())) {
|
||||||
if(tuple2) { // are we at the end of range?
|
if(tuple2) { // are we at the end of range?
|
||||||
|
@ -767,7 +769,7 @@ int op_dbg_blockmap(pthread_data* data) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int op_dbg_drop_database(pthread_data * data) {
|
int op_dbg_drop_database(pthread_data * data) {
|
||||||
logtableIterator<datatuple> * itr = new logtableIterator<datatuple>(data->ltable);
|
logtable<datatuple>::iterator * itr = new logtable<datatuple>::iterator(data->ltable);
|
||||||
datatuple * del;
|
datatuple * del;
|
||||||
fprintf(stderr, "DROPPING DATABASE...\n");
|
fprintf(stderr, "DROPPING DATABASE...\n");
|
||||||
long long n = 0;
|
long long n = 0;
|
||||||
|
|
10
logserver.h
10
logserver.h
|
@ -9,6 +9,8 @@
|
||||||
#include <stasis/transactional.h>
|
#include <stasis/transactional.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
|
||||||
|
#include "logstore.h"
|
||||||
|
|
||||||
#undef begin
|
#undef begin
|
||||||
#undef try
|
#undef try
|
||||||
#undef end
|
#undef end
|
||||||
|
@ -21,8 +23,6 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
class logtable;
|
|
||||||
|
|
||||||
struct pthread_item;
|
struct pthread_item;
|
||||||
|
|
||||||
struct pthread_data {
|
struct pthread_data {
|
||||||
|
@ -39,7 +39,7 @@ struct pthread_data {
|
||||||
|
|
||||||
int *workitem; //id of the socket to work
|
int *workitem; //id of the socket to work
|
||||||
|
|
||||||
logtable *ltable;
|
logtable<datatuple> *ltable;
|
||||||
bool *sys_alive;
|
bool *sys_alive;
|
||||||
|
|
||||||
#ifdef STATS_ENABLED
|
#ifdef STATS_ENABLED
|
||||||
|
@ -97,7 +97,7 @@ public:
|
||||||
delete qlock;
|
delete qlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
void startserver(logtable *ltable);
|
void startserver(logtable<datatuple> *ltable);
|
||||||
|
|
||||||
void stopserver();
|
void stopserver();
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ private:
|
||||||
int * self_pipe; // write a byte to self_pipe[1] to wake up select().
|
int * self_pipe; // write a byte to self_pipe[1] to wake up select().
|
||||||
std::vector<pthread_item *> th_list; // list of threads
|
std::vector<pthread_item *> th_list; // list of threads
|
||||||
|
|
||||||
logtable *ltable;
|
logtable<datatuple> *ltable;
|
||||||
|
|
||||||
#ifdef STATS_ENABLED
|
#ifdef STATS_ENABLED
|
||||||
int num_reqs;
|
int num_reqs;
|
||||||
|
|
63
logstore.cpp
63
logstore.cpp
|
@ -1,17 +1,9 @@
|
||||||
#include <string.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "merger.h"
|
|
||||||
#include "logstore.h"
|
#include "logstore.h"
|
||||||
#include "datapage.h"
|
#include "merger.h"
|
||||||
|
|
||||||
#include <stasis/transactional.h>
|
#include <stasis/transactional.h>
|
||||||
#include <stasis/page.h>
|
#undef try
|
||||||
#include <stasis/page/slotted.h>
|
#undef end
|
||||||
#include <stasis/bufferManager.h>
|
|
||||||
#include <stasis/bufferManager/bufferHash.h>
|
|
||||||
|
|
||||||
static inline double tv_to_double(struct timeval tv)
|
static inline double tv_to_double(struct timeval tv)
|
||||||
{
|
{
|
||||||
|
@ -23,7 +15,8 @@ static inline double tv_to_double(struct timeval tv)
|
||||||
// LOG TABLE IMPLEMENTATION
|
// LOG TABLE IMPLEMENTATION
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
logtable::logtable(pageid_t internal_region_size, pageid_t datapage_region_size, pageid_t datapage_size)
|
template<class TUPLE>
|
||||||
|
logtable<TUPLE>::logtable(pageid_t internal_region_size, pageid_t datapage_region_size, pageid_t datapage_size)
|
||||||
{
|
{
|
||||||
|
|
||||||
tree_c0 = NULL;
|
tree_c0 = NULL;
|
||||||
|
@ -48,7 +41,8 @@ logtable::logtable(pageid_t internal_region_size, pageid_t datapage_region_size,
|
||||||
this->datapage_size = datapage_size;
|
this->datapage_size = datapage_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
logtable::~logtable()
|
template<class TUPLE>
|
||||||
|
logtable<TUPLE>::~logtable()
|
||||||
{
|
{
|
||||||
if(tree_c1 != NULL)
|
if(tree_c1 != NULL)
|
||||||
delete tree_c1;
|
delete tree_c1;
|
||||||
|
@ -64,7 +58,8 @@ logtable::~logtable()
|
||||||
delete tmerger;
|
delete tmerger;
|
||||||
}
|
}
|
||||||
|
|
||||||
recordid logtable::allocTable(int xid)
|
template<class TUPLE>
|
||||||
|
recordid logtable<TUPLE>::allocTable(int xid)
|
||||||
{
|
{
|
||||||
|
|
||||||
table_rec = Talloc(xid, sizeof(tbl_header));
|
table_rec = Talloc(xid, sizeof(tbl_header));
|
||||||
|
@ -79,13 +74,15 @@ recordid logtable::allocTable(int xid)
|
||||||
|
|
||||||
return table_rec;
|
return table_rec;
|
||||||
}
|
}
|
||||||
void logtable::openTable(int xid, recordid rid) {
|
template<class TUPLE>
|
||||||
|
void logtable<TUPLE>::openTable(int xid, recordid rid) {
|
||||||
table_rec = rid;
|
table_rec = rid;
|
||||||
Tread(xid, table_rec, &tbl_header);
|
Tread(xid, table_rec, &tbl_header);
|
||||||
tree_c2 = new diskTreeComponent(xid, tbl_header.c2_root, tbl_header.c2_state, tbl_header.c2_dp_state);
|
tree_c2 = new diskTreeComponent(xid, tbl_header.c2_root, tbl_header.c2_state, tbl_header.c2_dp_state);
|
||||||
tree_c1 = new diskTreeComponent(xid, tbl_header.c1_root, tbl_header.c1_state, tbl_header.c1_dp_state);
|
tree_c1 = new diskTreeComponent(xid, tbl_header.c1_root, tbl_header.c1_state, tbl_header.c1_dp_state);
|
||||||
}
|
}
|
||||||
void logtable::update_persistent_header(int xid) {
|
template<class TUPLE>
|
||||||
|
void logtable<TUPLE>::update_persistent_header(int xid) {
|
||||||
|
|
||||||
tbl_header.c2_root = tree_c2->get_root_rid();
|
tbl_header.c2_root = tree_c2->get_root_rid();
|
||||||
tbl_header.c2_dp_state = tree_c2->get_datapage_allocator_rid();
|
tbl_header.c2_dp_state = tree_c2->get_datapage_allocator_rid();
|
||||||
|
@ -97,7 +94,18 @@ void logtable::update_persistent_header(int xid) {
|
||||||
Tset(xid, table_rec, &tbl_header);
|
Tset(xid, table_rec, &tbl_header);
|
||||||
}
|
}
|
||||||
|
|
||||||
void logtable::flushTable()
|
template<class TUPLE>
|
||||||
|
void logtable<TUPLE>::setMergeData(logtable_mergedata * mdata){
|
||||||
|
this->mergedata = mdata;
|
||||||
|
mdata->internal_region_size = internal_region_size;
|
||||||
|
mdata->datapage_region_size = datapage_region_size;
|
||||||
|
mdata->datapage_size = datapage_size;
|
||||||
|
|
||||||
|
bump_epoch();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class TUPLE>
|
||||||
|
void logtable<TUPLE>::flushTable()
|
||||||
{
|
{
|
||||||
struct timeval start_tv, stop_tv;
|
struct timeval start_tv, stop_tv;
|
||||||
double start, stop;
|
double start, stop;
|
||||||
|
@ -173,7 +181,8 @@ void logtable::flushTable()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
datatuple * logtable::findTuple(int xid, const datatuple::key_t key, size_t keySize)
|
template<class TUPLE>
|
||||||
|
datatuple * logtable<TUPLE>::findTuple(int xid, const datatuple::key_t key, size_t keySize)
|
||||||
{
|
{
|
||||||
//prepare a search tuple
|
//prepare a search tuple
|
||||||
datatuple *search_tuple = datatuple::create(key, keySize);
|
datatuple *search_tuple = datatuple::create(key, keySize);
|
||||||
|
@ -319,7 +328,8 @@ datatuple * logtable::findTuple(int xid, const datatuple::key_t key, size_t keyS
|
||||||
* returns the first record found with the matching key
|
* returns the first record found with the matching key
|
||||||
* (not to be used together with diffs)
|
* (not to be used together with diffs)
|
||||||
**/
|
**/
|
||||||
datatuple * logtable::findTuple_first(int xid, datatuple::key_t key, size_t keySize)
|
template<class TUPLE>
|
||||||
|
datatuple * logtable<TUPLE>::findTuple_first(int xid, datatuple::key_t key, size_t keySize)
|
||||||
{
|
{
|
||||||
//prepare a search tuple
|
//prepare a search tuple
|
||||||
datatuple * search_tuple = datatuple::create(key, keySize);
|
datatuple * search_tuple = datatuple::create(key, keySize);
|
||||||
|
@ -387,7 +397,8 @@ datatuple * logtable::findTuple_first(int xid, datatuple::key_t key, size_t keyS
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void logtable::insertTuple(datatuple *tuple)
|
template<class TUPLE>
|
||||||
|
void logtable<TUPLE>::insertTuple(datatuple *tuple)
|
||||||
{
|
{
|
||||||
//lock the red-black tree
|
//lock the red-black tree
|
||||||
readlock(header_lock,0);
|
readlock(header_lock,0);
|
||||||
|
@ -440,10 +451,12 @@ void logtable::insertTuple(datatuple *tuple)
|
||||||
DEBUG("tree size %d tuples %lld bytes.\n", tsize, tree_bytes);
|
DEBUG("tree size %d tuples %lld bytes.\n", tsize, tree_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void logtable::registerIterator(logtableIterator<datatuple> * it) {
|
template<class TUPLE>
|
||||||
|
void logtable<TUPLE>::registerIterator(iterator * it) {
|
||||||
its.push_back(it);
|
its.push_back(it);
|
||||||
}
|
}
|
||||||
void logtable::forgetIterator(logtableIterator<datatuple> * it) {
|
template<class TUPLE>
|
||||||
|
void logtable<TUPLE>::forgetIterator(iterator * it) {
|
||||||
for(unsigned int i = 0; i < its.size(); i++) {
|
for(unsigned int i = 0; i < its.size(); i++) {
|
||||||
if(its[i] == it) {
|
if(its[i] == it) {
|
||||||
its.erase(its.begin()+i);
|
its.erase(its.begin()+i);
|
||||||
|
@ -451,12 +464,12 @@ void logtable::forgetIterator(logtableIterator<datatuple> * it) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void logtable::bump_epoch() {
|
template<class TUPLE>
|
||||||
|
void logtable<TUPLE>::bump_epoch() {
|
||||||
assert(!trywritelock(header_lock,0));
|
assert(!trywritelock(header_lock,0));
|
||||||
epoch++;
|
epoch++;
|
||||||
for(unsigned int i = 0; i < its.size(); i++) {
|
for(unsigned int i = 0; i < its.size(); i++) {
|
||||||
its[i]->invalidate();
|
its[i]->invalidate();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
template class logtable<datatuple>;
|
||||||
template class logtableIterator<datatuple>;
|
|
||||||
|
|
558
logstore.h
558
logstore.h
|
@ -1,49 +1,41 @@
|
||||||
#ifndef _LOGSTORE_H_
|
#ifndef _LOGSTORE_H_
|
||||||
#define _LOGSTORE_H_
|
#define _LOGSTORE_H_
|
||||||
|
|
||||||
|
#include <stasis/common.h>
|
||||||
|
#undef try
|
||||||
#undef end
|
#undef end
|
||||||
#undef begin
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
//#include <set>
|
|
||||||
#include <sstream>
|
|
||||||
#include <iostream>
|
|
||||||
#include <queue>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "logserver.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
typedef struct merge_stats_t {
|
||||||
#include <stdlib.h>
|
int merge_level; // 1 => C0->C1, 2 => C1->C2
|
||||||
#include <errno.h>
|
pageid_t merge_count; // This is the merge_count'th merge
|
||||||
|
struct timeval sleep; // When did we go to sleep waiting for input?
|
||||||
#include <pthread.h>
|
struct timeval start; // When did we wake up and start merging? (at steady state with max throughput, this should be equal to sleep)
|
||||||
|
struct timeval done; // When did we finish merging?
|
||||||
|
pageid_t bytes_out; // How many bytes did we write (including internal tree nodes)?
|
||||||
|
pageid_t num_tuples_out; // How many tuples did we write?
|
||||||
#include <stasis/transactional.h>
|
pageid_t num_datapages_out; // How many datapages?
|
||||||
|
pageid_t bytes_in_small; // How many bytes from the small input tree (for C0, we ignore tree overheads)?
|
||||||
#include <stasis/operations.h>
|
pageid_t num_tuples_in_small; // Tuples from the small input?
|
||||||
#include <stasis/bufferManager.h>
|
pageid_t bytes_in_large; // Bytes from the large input?
|
||||||
#include <stasis/allocationPolicy.h>
|
pageid_t num_tuples_in_large; // Tuples from large input?
|
||||||
#include <stasis/blobManager.h>
|
} merge_stats_t;
|
||||||
#include <stasis/page.h>
|
|
||||||
#include <stasis/truncation.h>
|
|
||||||
|
|
||||||
#include "diskTreeComponent.h"
|
#include "diskTreeComponent.h"
|
||||||
#include "memTreeComponent.h"
|
#include "memTreeComponent.h"
|
||||||
#include "datapage.h"
|
|
||||||
#include "tuplemerger.h"
|
|
||||||
#include "datatuple.h"
|
|
||||||
|
|
||||||
#include "merger.h"
|
#include "tuplemerger.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct logtable_mergedata;
|
||||||
|
|
||||||
template<class TUPLE>
|
template<class TUPLE>
|
||||||
class logtableIterator ;
|
class logtable {
|
||||||
|
|
||||||
class logtable
|
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
class iterator;
|
||||||
logtable(pageid_t internal_region_size = 1000, pageid_t datapage_region_size = 10000, pageid_t datapage_size = 40); // scans 160KB / 2 per lookup on average. at 100MB/s, this is 0.7 ms. XXX pick datapage_size in principled way.
|
logtable(pageid_t internal_region_size = 1000, pageid_t datapage_region_size = 10000, pageid_t datapage_size = 40); // scans 160KB / 2 per lookup on average. at 100MB/s, this is 0.7 ms. XXX pick datapage_size in principled way.
|
||||||
~logtable();
|
~logtable();
|
||||||
|
|
||||||
|
@ -59,15 +51,12 @@ public:
|
||||||
void openTable(int xid, recordid rid);
|
void openTable(int xid, recordid rid);
|
||||||
void flushTable();
|
void flushTable();
|
||||||
|
|
||||||
// DataPage<datatuple>* insertTuple(int xid, datatuple *tuple,diskTreeComponent::internalNodes *ltree);
|
|
||||||
// datatuple * findTuple(int xid, const datatuple::key_t key, size_t keySize, diskTreeComponent::internalNodes *ltree);
|
|
||||||
|
|
||||||
inline recordid & get_table_rec(){return table_rec;} // TODO This is called by merger.cpp for no good reason. (remove the calls)
|
inline recordid & get_table_rec(){return table_rec;} // TODO This is called by merger.cpp for no good reason. (remove the calls)
|
||||||
|
|
||||||
inline uint64_t get_epoch() { return epoch; }
|
inline uint64_t get_epoch() { return epoch; }
|
||||||
|
|
||||||
void registerIterator(logtableIterator<datatuple> * it);
|
void registerIterator(iterator * it);
|
||||||
void forgetIterator(logtableIterator<datatuple> * it);
|
void forgetIterator(iterator * it);
|
||||||
void bump_epoch() ;
|
void bump_epoch() ;
|
||||||
|
|
||||||
inline diskTreeComponent * get_tree_c2(){return tree_c2;}
|
inline diskTreeComponent * get_tree_c2(){return tree_c2;}
|
||||||
|
@ -85,14 +74,7 @@ public:
|
||||||
|
|
||||||
void update_persistent_header(int xid);
|
void update_persistent_header(int xid);
|
||||||
|
|
||||||
void setMergeData(logtable_mergedata * mdata) {
|
void setMergeData(logtable_mergedata * mdata);
|
||||||
this->mergedata = mdata;
|
|
||||||
|
|
||||||
mdata->internal_region_size = internal_region_size;
|
|
||||||
mdata->datapage_region_size = datapage_region_size;
|
|
||||||
mdata->datapage_size = datapage_size;
|
|
||||||
|
|
||||||
bump_epoch(); }
|
|
||||||
logtable_mergedata* getMergeData(){return mergedata;}
|
logtable_mergedata* getMergeData(){return mergedata;}
|
||||||
|
|
||||||
inline tuplemerger * gettuplemerger(){return tmerger;}
|
inline tuplemerger * gettuplemerger(){return tmerger;}
|
||||||
|
@ -139,256 +121,256 @@ private:
|
||||||
|
|
||||||
tuplemerger *tmerger;
|
tuplemerger *tmerger;
|
||||||
|
|
||||||
std::vector<logtableIterator<datatuple> *> its;
|
std::vector<iterator *> its;
|
||||||
|
|
||||||
bool still_running_;
|
bool still_running_;
|
||||||
};
|
|
||||||
|
|
||||||
template<class ITRA, class ITRN, class TUPLE>
|
|
||||||
class mergeManyIterator {
|
|
||||||
public:
|
public:
|
||||||
explicit mergeManyIterator(ITRA* a, ITRN** iters, int num_iters, TUPLE*(*merge)(const TUPLE*,const TUPLE*), int (*cmp)(const TUPLE*,const TUPLE*)) :
|
|
||||||
num_iters_(num_iters+1),
|
|
||||||
first_iter_(a),
|
|
||||||
iters_((ITRN**)malloc(sizeof(*iters_) * num_iters)), // exactly the number passed in
|
|
||||||
current_((TUPLE**)malloc(sizeof(*current_) * (num_iters_))), // one more than was passed in
|
|
||||||
last_iter_(-1),
|
|
||||||
cmp_(cmp),
|
|
||||||
merge_(merge),
|
|
||||||
dups((int*)malloc(sizeof(*dups)*num_iters_))
|
|
||||||
{
|
|
||||||
current_[0] = first_iter_->getnext();
|
|
||||||
for(int i = 1; i < num_iters_; i++) {
|
|
||||||
iters_[i-1] = iters[i-1];
|
|
||||||
current_[i] = iters_[i-1]->next_callerFrees();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
~mergeManyIterator() {
|
|
||||||
delete(first_iter_);
|
|
||||||
for(int i = 0; i < num_iters_; i++) {
|
|
||||||
if(i != last_iter_) {
|
|
||||||
if(current_[i]) TUPLE::freetuple(current_[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(int i = 1; i < num_iters_; i++) {
|
|
||||||
delete iters_[i-1];
|
|
||||||
}
|
|
||||||
free(current_);
|
|
||||||
free(iters_);
|
|
||||||
free(dups);
|
|
||||||
}
|
|
||||||
TUPLE * peek() {
|
|
||||||
TUPLE * ret = getnext();
|
|
||||||
last_iter_ = -1; // don't advance iterator on next peek() or getnext() call.
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
TUPLE * getnext() {
|
|
||||||
int num_dups = 0;
|
|
||||||
if(last_iter_ != -1) {
|
|
||||||
// get the value after the one we just returned to the user
|
|
||||||
//TUPLE::freetuple(current_[last_iter_]); // should never be null
|
|
||||||
if(last_iter_ == 0) {
|
|
||||||
current_[last_iter_] = first_iter_->getnext();
|
|
||||||
} else if(last_iter_ != -1){
|
|
||||||
current_[last_iter_] = iters_[last_iter_-1]->next_callerFrees();
|
|
||||||
} else {
|
|
||||||
// last call was 'peek'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// find the first non-empty iterator. (Don't need to special-case ITRA since we're looking at current.)
|
|
||||||
int min = 0;
|
|
||||||
while(min < num_iters_ && !current_[min]) {
|
|
||||||
min++;
|
|
||||||
}
|
|
||||||
if(min == num_iters_) { return NULL; }
|
|
||||||
// examine current to decide which tuple to return.
|
|
||||||
for(int i = min+1; i < num_iters_; i++) {
|
|
||||||
if(current_[i]) {
|
|
||||||
int res = cmp_(current_[min], current_[i]);
|
|
||||||
if(res > 0) { // min > i
|
|
||||||
min = i;
|
|
||||||
num_dups = 0;
|
|
||||||
} else if(res == 0) { // min == i
|
|
||||||
dups[num_dups] = i;
|
|
||||||
num_dups++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TUPLE * ret;
|
|
||||||
if(!merge_) {
|
|
||||||
ret = current_[min];
|
|
||||||
} else {
|
|
||||||
// XXX use merge function to build a new ret.
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
// advance the iterators that match the tuple we're returning.
|
|
||||||
for(int i = 0; i < num_dups; i++) {
|
|
||||||
TUPLE::freetuple(current_[dups[i]]); // should never be null
|
|
||||||
current_[dups[i]] = iters_[dups[i]-1]->next_callerFrees();
|
|
||||||
}
|
|
||||||
last_iter_ = min; // mark the min iter to be advance at the next invocation of next(). This saves us a copy in the non-merging case.
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
}
|
template<class ITRA, class ITRN>
|
||||||
private:
|
class mergeManyIterator {
|
||||||
int num_iters_;
|
public:
|
||||||
ITRA * first_iter_;
|
explicit mergeManyIterator(ITRA* a, ITRN** iters, int num_iters, TUPLE*(*merge)(const TUPLE*,const TUPLE*), int (*cmp)(const TUPLE*,const TUPLE*)) :
|
||||||
ITRN ** iters_;
|
num_iters_(num_iters+1),
|
||||||
TUPLE ** current_;
|
first_iter_(a),
|
||||||
int last_iter_;
|
iters_((ITRN**)malloc(sizeof(*iters_) * num_iters)), // exactly the number passed in
|
||||||
|
current_((TUPLE**)malloc(sizeof(*current_) * (num_iters_))), // one more than was passed in
|
||||||
|
last_iter_(-1),
|
||||||
int (*cmp_)(const TUPLE*,const TUPLE*);
|
cmp_(cmp),
|
||||||
TUPLE*(*merge_)(const TUPLE*,const TUPLE*);
|
merge_(merge),
|
||||||
|
dups((int*)malloc(sizeof(*dups)*num_iters_))
|
||||||
// temporary variables initiaized once for effiency
|
{
|
||||||
int * dups;
|
current_[0] = first_iter_->getnext();
|
||||||
};
|
for(int i = 1; i < num_iters_; i++) {
|
||||||
|
iters_[i-1] = iters[i-1];
|
||||||
template<class TUPLE>
|
current_[i] = iters_[i-1]->next_callerFrees();
|
||||||
class logtableIterator {
|
|
||||||
public:
|
|
||||||
explicit logtableIterator(logtable* ltable)
|
|
||||||
: ltable(ltable),
|
|
||||||
epoch(ltable->get_epoch()),
|
|
||||||
merge_it_(NULL),
|
|
||||||
last_returned(NULL),
|
|
||||||
key(NULL),
|
|
||||||
valid(false) {
|
|
||||||
writelock(ltable->header_lock, 0);
|
|
||||||
ltable->registerIterator(this);
|
|
||||||
validate();
|
|
||||||
unlock(ltable->header_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
explicit logtableIterator(logtable* ltable,TUPLE *key)
|
|
||||||
: ltable(ltable),
|
|
||||||
epoch(ltable->get_epoch()),
|
|
||||||
merge_it_(NULL),
|
|
||||||
last_returned(NULL),
|
|
||||||
key(key),
|
|
||||||
valid(false)
|
|
||||||
{
|
|
||||||
writelock(ltable->header_lock, 0);
|
|
||||||
ltable->registerIterator(this);
|
|
||||||
validate();
|
|
||||||
unlock(ltable->header_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
~logtableIterator() {
|
|
||||||
ltable->forgetIterator(this);
|
|
||||||
invalidate();
|
|
||||||
if(last_returned) TUPLE::freetuple(last_returned);
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
TUPLE * getnextHelper() {
|
|
||||||
TUPLE * tmp = merge_it_->getnext();
|
|
||||||
if(last_returned && tmp) {
|
|
||||||
assert(TUPLE::compare(last_returned->key(), last_returned->keylen(), tmp->key(), tmp->keylen()) < 0);
|
|
||||||
TUPLE::freetuple(last_returned);
|
|
||||||
}
|
|
||||||
last_returned = tmp;
|
|
||||||
return last_returned;
|
|
||||||
}
|
|
||||||
public:
|
|
||||||
TUPLE * getnextIncludingTombstones() {
|
|
||||||
readlock(ltable->header_lock, 0);
|
|
||||||
revalidate();
|
|
||||||
TUPLE * ret = getnextHelper();
|
|
||||||
unlock(ltable->header_lock);
|
|
||||||
return ret ? ret->create_copy() : NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
TUPLE * getnext() {
|
|
||||||
readlock(ltable->header_lock, 0);
|
|
||||||
revalidate();
|
|
||||||
TUPLE * ret;
|
|
||||||
while((ret = getnextHelper()) && ret->isDelete()) { } // getNextHelper handles its own memory.
|
|
||||||
unlock(ltable->header_lock);
|
|
||||||
return ret ? ret->create_copy() : NULL; // XXX hate making copy! Caller should not manage our memory.
|
|
||||||
}
|
|
||||||
|
|
||||||
void invalidate() {
|
|
||||||
if(valid) {
|
|
||||||
delete merge_it_;
|
|
||||||
merge_it_ = NULL;
|
|
||||||
valid = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
inline void init_helper();
|
|
||||||
|
|
||||||
explicit logtableIterator() { abort(); }
|
|
||||||
void operator=(logtableIterator<TUPLE> & t) { abort(); }
|
|
||||||
int operator-(logtableIterator<TUPLE> & t) { abort(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
static const int C1 = 0;
|
|
||||||
static const int C1_MERGEABLE = 1;
|
|
||||||
static const int C2 = 2;
|
|
||||||
logtable * ltable;
|
|
||||||
uint64_t epoch;
|
|
||||||
typedef mergeManyIterator<
|
|
||||||
typename memTreeComponent<TUPLE>::revalidatingIterator,
|
|
||||||
typename memTreeComponent<TUPLE>::iterator,
|
|
||||||
TUPLE> inner_merge_it_t;
|
|
||||||
typedef mergeManyIterator<
|
|
||||||
inner_merge_it_t,
|
|
||||||
diskTreeComponent::diskTreeIterator,
|
|
||||||
TUPLE> merge_it_t;
|
|
||||||
|
|
||||||
merge_it_t* merge_it_;
|
|
||||||
|
|
||||||
TUPLE * last_returned;
|
|
||||||
TUPLE * key;
|
|
||||||
bool valid;
|
|
||||||
void revalidate() {
|
|
||||||
if(!valid) {
|
|
||||||
validate();
|
|
||||||
} else {
|
|
||||||
assert(epoch == ltable->get_epoch());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void validate() {
|
|
||||||
typename memTreeComponent<TUPLE>::revalidatingIterator * c0_it;
|
|
||||||
typename memTreeComponent<TUPLE>::iterator *c0_mergeable_it[1];
|
|
||||||
diskTreeComponent::diskTreeIterator * disk_it[3];
|
|
||||||
epoch = ltable->get_epoch();
|
|
||||||
if(last_returned) {
|
|
||||||
c0_it = new typename memTreeComponent<TUPLE>::revalidatingIterator(ltable->get_tree_c0(), ltable->getMergeData()->rbtree_mut, last_returned);
|
|
||||||
c0_mergeable_it[0] = new typename memTreeComponent<TUPLE>::iterator (ltable->get_tree_c0_mergeable(), last_returned);
|
|
||||||
disk_it[0] = ltable->get_tree_c1()->iterator(last_returned);
|
|
||||||
disk_it[1] = ltable->get_tree_c1_mergeable()->iterator(last_returned);
|
|
||||||
disk_it[2] = ltable->get_tree_c2()->iterator(last_returned);
|
|
||||||
} else if(key) {
|
|
||||||
c0_it = new typename memTreeComponent<TUPLE>::revalidatingIterator(ltable->get_tree_c0(), ltable->getMergeData()->rbtree_mut, key);
|
|
||||||
c0_mergeable_it[0] = new typename memTreeComponent<TUPLE>::iterator (ltable->get_tree_c0_mergeable(), key);
|
|
||||||
disk_it[0] = ltable->get_tree_c1()->iterator(key);
|
|
||||||
disk_it[1] = ltable->get_tree_c1_mergeable()->iterator(key);
|
|
||||||
disk_it[2] = ltable->get_tree_c2()->iterator(key);
|
|
||||||
} else {
|
|
||||||
c0_it = new typename memTreeComponent<TUPLE>::revalidatingIterator(ltable->get_tree_c0(), ltable->getMergeData()->rbtree_mut );
|
|
||||||
c0_mergeable_it[0] = new typename memTreeComponent<TUPLE>::iterator (ltable->get_tree_c0_mergeable() );
|
|
||||||
disk_it[0] = ltable->get_tree_c1()->iterator();
|
|
||||||
disk_it[1] = ltable->get_tree_c1_mergeable()->iterator();
|
|
||||||
disk_it[2] = ltable->get_tree_c2()->iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
inner_merge_it_t * inner_merge_it =
|
|
||||||
new inner_merge_it_t(c0_it, c0_mergeable_it, 1, NULL, TUPLE::compare_obj);
|
|
||||||
merge_it_ = new merge_it_t(inner_merge_it, disk_it, 3, NULL, TUPLE::compare_obj); // XXX Hardcodes comparator, and does not handle merges
|
|
||||||
if(last_returned) {
|
|
||||||
TUPLE * junk = merge_it_->peek();
|
|
||||||
if(junk && !TUPLE::compare(junk->key(), junk->keylen(), last_returned->key(), last_returned->keylen())) {
|
|
||||||
// we already returned junk
|
|
||||||
TUPLE::freetuple(merge_it_->getnext());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
valid = true;
|
~mergeManyIterator() {
|
||||||
}
|
delete(first_iter_);
|
||||||
|
for(int i = 0; i < num_iters_; i++) {
|
||||||
|
if(i != last_iter_) {
|
||||||
|
if(current_[i]) TUPLE::freetuple(current_[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int i = 1; i < num_iters_; i++) {
|
||||||
|
delete iters_[i-1];
|
||||||
|
}
|
||||||
|
free(current_);
|
||||||
|
free(iters_);
|
||||||
|
free(dups);
|
||||||
|
}
|
||||||
|
TUPLE * peek() {
|
||||||
|
TUPLE * ret = getnext();
|
||||||
|
last_iter_ = -1; // don't advance iterator on next peek() or getnext() call.
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
TUPLE * getnext() {
|
||||||
|
int num_dups = 0;
|
||||||
|
if(last_iter_ != -1) {
|
||||||
|
// get the value after the one we just returned to the user
|
||||||
|
//TUPLE::freetuple(current_[last_iter_]); // should never be null
|
||||||
|
if(last_iter_ == 0) {
|
||||||
|
current_[last_iter_] = first_iter_->getnext();
|
||||||
|
} else if(last_iter_ != -1){
|
||||||
|
current_[last_iter_] = iters_[last_iter_-1]->next_callerFrees();
|
||||||
|
} else {
|
||||||
|
// last call was 'peek'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// find the first non-empty iterator. (Don't need to special-case ITRA since we're looking at current.)
|
||||||
|
int min = 0;
|
||||||
|
while(min < num_iters_ && !current_[min]) {
|
||||||
|
min++;
|
||||||
|
}
|
||||||
|
if(min == num_iters_) { return NULL; }
|
||||||
|
// examine current to decide which tuple to return.
|
||||||
|
for(int i = min+1; i < num_iters_; i++) {
|
||||||
|
if(current_[i]) {
|
||||||
|
int res = cmp_(current_[min], current_[i]);
|
||||||
|
if(res > 0) { // min > i
|
||||||
|
min = i;
|
||||||
|
num_dups = 0;
|
||||||
|
} else if(res == 0) { // min == i
|
||||||
|
dups[num_dups] = i;
|
||||||
|
num_dups++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TUPLE * ret;
|
||||||
|
if(!merge_) {
|
||||||
|
ret = current_[min];
|
||||||
|
} else {
|
||||||
|
// XXX use merge function to build a new ret.
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
// advance the iterators that match the tuple we're returning.
|
||||||
|
for(int i = 0; i < num_dups; i++) {
|
||||||
|
TUPLE::freetuple(current_[dups[i]]); // should never be null
|
||||||
|
current_[dups[i]] = iters_[dups[i]-1]->next_callerFrees();
|
||||||
|
}
|
||||||
|
last_iter_ = min; // mark the min iter to be advance at the next invocation of next(). This saves us a copy in the non-merging case.
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
int num_iters_;
|
||||||
|
ITRA * first_iter_;
|
||||||
|
ITRN ** iters_;
|
||||||
|
TUPLE ** current_;
|
||||||
|
int last_iter_;
|
||||||
|
|
||||||
|
|
||||||
|
int (*cmp_)(const TUPLE*,const TUPLE*);
|
||||||
|
TUPLE*(*merge_)(const TUPLE*,const TUPLE*);
|
||||||
|
|
||||||
|
// temporary variables initiaized once for effiency
|
||||||
|
int * dups;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class iterator {
|
||||||
|
public:
|
||||||
|
explicit iterator(logtable* ltable)
|
||||||
|
: ltable(ltable),
|
||||||
|
epoch(ltable->get_epoch()),
|
||||||
|
merge_it_(NULL),
|
||||||
|
last_returned(NULL),
|
||||||
|
key(NULL),
|
||||||
|
valid(false) {
|
||||||
|
writelock(ltable->header_lock, 0);
|
||||||
|
ltable->registerIterator(this);
|
||||||
|
validate();
|
||||||
|
unlock(ltable->header_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit iterator(logtable* ltable,TUPLE *key)
|
||||||
|
: ltable(ltable),
|
||||||
|
epoch(ltable->get_epoch()),
|
||||||
|
merge_it_(NULL),
|
||||||
|
last_returned(NULL),
|
||||||
|
key(key),
|
||||||
|
valid(false)
|
||||||
|
{
|
||||||
|
writelock(ltable->header_lock, 0);
|
||||||
|
ltable->registerIterator(this);
|
||||||
|
validate();
|
||||||
|
unlock(ltable->header_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
~iterator() {
|
||||||
|
ltable->forgetIterator(this);
|
||||||
|
invalidate();
|
||||||
|
if(last_returned) TUPLE::freetuple(last_returned);
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
TUPLE * getnextHelper() {
|
||||||
|
TUPLE * tmp = merge_it_->getnext();
|
||||||
|
if(last_returned && tmp) {
|
||||||
|
assert(TUPLE::compare(last_returned->key(), last_returned->keylen(), tmp->key(), tmp->keylen()) < 0);
|
||||||
|
TUPLE::freetuple(last_returned);
|
||||||
|
}
|
||||||
|
last_returned = tmp;
|
||||||
|
return last_returned;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
TUPLE * getnextIncludingTombstones() {
|
||||||
|
readlock(ltable->header_lock, 0);
|
||||||
|
revalidate();
|
||||||
|
TUPLE * ret = getnextHelper();
|
||||||
|
unlock(ltable->header_lock);
|
||||||
|
return ret ? ret->create_copy() : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
TUPLE * getnext() {
|
||||||
|
readlock(ltable->header_lock, 0);
|
||||||
|
revalidate();
|
||||||
|
TUPLE * ret;
|
||||||
|
while((ret = getnextHelper()) && ret->isDelete()) { } // getNextHelper handles its own memory.
|
||||||
|
unlock(ltable->header_lock);
|
||||||
|
return ret ? ret->create_copy() : NULL; // XXX hate making copy! Caller should not manage our memory.
|
||||||
|
}
|
||||||
|
|
||||||
|
void invalidate() {
|
||||||
|
if(valid) {
|
||||||
|
delete merge_it_;
|
||||||
|
merge_it_ = NULL;
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
inline void init_helper();
|
||||||
|
|
||||||
|
explicit iterator() { abort(); }
|
||||||
|
void operator=(iterator & t) { abort(); }
|
||||||
|
int operator-(iterator & t) { abort(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
static const int C1 = 0;
|
||||||
|
static const int C1_MERGEABLE = 1;
|
||||||
|
static const int C2 = 2;
|
||||||
|
logtable * ltable;
|
||||||
|
uint64_t epoch;
|
||||||
|
typedef mergeManyIterator<
|
||||||
|
typename memTreeComponent<TUPLE>::revalidatingIterator,
|
||||||
|
typename memTreeComponent<TUPLE>::iterator> inner_merge_it_t;
|
||||||
|
typedef mergeManyIterator<
|
||||||
|
inner_merge_it_t,
|
||||||
|
diskTreeComponent::iterator> merge_it_t;
|
||||||
|
|
||||||
|
merge_it_t* merge_it_;
|
||||||
|
|
||||||
|
TUPLE * last_returned;
|
||||||
|
TUPLE * key;
|
||||||
|
bool valid;
|
||||||
|
void revalidate() {
|
||||||
|
if(!valid) {
|
||||||
|
validate();
|
||||||
|
} else {
|
||||||
|
assert(epoch == ltable->get_epoch());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void validate() {
|
||||||
|
typename memTreeComponent<TUPLE>::revalidatingIterator * c0_it;
|
||||||
|
typename memTreeComponent<TUPLE>::iterator *c0_mergeable_it[1];
|
||||||
|
diskTreeComponent::iterator * disk_it[3];
|
||||||
|
epoch = ltable->get_epoch();
|
||||||
|
if(last_returned) {
|
||||||
|
c0_it = new typename memTreeComponent<TUPLE>::revalidatingIterator(ltable->get_tree_c0(), ltable->getMergeData()->rbtree_mut, last_returned);
|
||||||
|
c0_mergeable_it[0] = new typename memTreeComponent<TUPLE>::iterator (ltable->get_tree_c0_mergeable(), last_returned);
|
||||||
|
disk_it[0] = ltable->get_tree_c1()->open_iterator(last_returned);
|
||||||
|
disk_it[1] = ltable->get_tree_c1_mergeable()->open_iterator(last_returned);
|
||||||
|
disk_it[2] = ltable->get_tree_c2()->open_iterator(last_returned);
|
||||||
|
} else if(key) {
|
||||||
|
c0_it = new typename memTreeComponent<TUPLE>::revalidatingIterator(ltable->get_tree_c0(), ltable->getMergeData()->rbtree_mut, key);
|
||||||
|
c0_mergeable_it[0] = new typename memTreeComponent<TUPLE>::iterator (ltable->get_tree_c0_mergeable(), key);
|
||||||
|
disk_it[0] = ltable->get_tree_c1()->open_iterator(key);
|
||||||
|
disk_it[1] = ltable->get_tree_c1_mergeable()->open_iterator(key);
|
||||||
|
disk_it[2] = ltable->get_tree_c2()->open_iterator(key);
|
||||||
|
} else {
|
||||||
|
c0_it = new typename memTreeComponent<TUPLE>::revalidatingIterator(ltable->get_tree_c0(), ltable->getMergeData()->rbtree_mut );
|
||||||
|
c0_mergeable_it[0] = new typename memTreeComponent<TUPLE>::iterator (ltable->get_tree_c0_mergeable() );
|
||||||
|
disk_it[0] = ltable->get_tree_c1()->open_iterator();
|
||||||
|
disk_it[1] = ltable->get_tree_c1_mergeable()->open_iterator();
|
||||||
|
disk_it[2] = ltable->get_tree_c2()->open_iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
inner_merge_it_t * inner_merge_it =
|
||||||
|
new inner_merge_it_t(c0_it, c0_mergeable_it, 1, NULL, TUPLE::compare_obj);
|
||||||
|
merge_it_ = new merge_it_t(inner_merge_it, disk_it, 3, NULL, TUPLE::compare_obj); // XXX Hardcodes comparator, and does not handle merges
|
||||||
|
if(last_returned) {
|
||||||
|
TUPLE * junk = merge_it_->peek();
|
||||||
|
if(junk && !TUPLE::compare(junk->key(), junk->keylen(), last_returned->key(), last_returned->keylen())) {
|
||||||
|
// we already returned junk
|
||||||
|
TUPLE::freetuple(merge_it_->getnext());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
valid = true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
#include "memTreeComponent.h"
|
#include "memTreeComponent.h"
|
||||||
|
#include "datatuple.h"
|
||||||
|
|
||||||
template<class TUPLE>
|
template<class TUPLE>
|
||||||
void memTreeComponent<TUPLE>::tearDownTree(rbtree_ptr_t tree) {
|
void memTreeComponent<TUPLE>::tearDownTree(rbtree_ptr_t tree) {
|
||||||
TUPLE * t = 0;
|
TUPLE * t = 0;
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#ifndef _MEMTREECOMPONENT_H_
|
#ifndef _MEMTREECOMPONENT_H_
|
||||||
#define _MEMTREECOMPONENT_H_
|
#define _MEMTREECOMPONENT_H_
|
||||||
#include <set>
|
#include <set>
|
||||||
#include "datatuple.h"
|
|
||||||
template<class TUPLE>
|
template<class TUPLE>
|
||||||
class memTreeComponent {
|
class memTreeComponent {
|
||||||
public:
|
public:
|
||||||
|
@ -115,7 +115,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
~revalidatingIterator() {
|
~revalidatingIterator() {
|
||||||
if(next_ret_) datatuple::freetuple(next_ret_);
|
if(next_ret_) TUPLE::freetuple(next_ret_);
|
||||||
}
|
}
|
||||||
|
|
||||||
TUPLE* getnext() {
|
TUPLE* getnext() {
|
||||||
|
|
27
merger.cpp
27
merger.cpp
|
@ -2,6 +2,11 @@
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "merger.h"
|
#include "merger.h"
|
||||||
|
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
|
#undef try
|
||||||
|
#undef end
|
||||||
|
|
||||||
void merge_stats_pp(FILE* fd, merge_stats_t &stats) {
|
void merge_stats_pp(FILE* fd, merge_stats_t &stats) {
|
||||||
long long sleep_time = stats.start.tv_sec - stats.sleep.tv_sec;
|
long long sleep_time = stats.start.tv_sec - stats.sleep.tv_sec;
|
||||||
long long work_time = stats.done.tv_sec - stats.start.tv_sec;
|
long long work_time = stats.done.tv_sec - stats.start.tv_sec;
|
||||||
|
@ -44,7 +49,7 @@ void merge_stats_pp(FILE* fd, merge_stats_t &stats) {
|
||||||
|
|
||||||
double merge_stats_nsec_to_merge_in_bytes(merge_stats_t); // how many nsec did we burn on each byte from the small tree (want this to be equal for the two mergers)
|
double merge_stats_nsec_to_merge_in_bytes(merge_stats_t); // how many nsec did we burn on each byte from the small tree (want this to be equal for the two mergers)
|
||||||
|
|
||||||
int merge_scheduler::addlogtable(logtable *ltable)
|
int merge_scheduler::addlogtable(logtable<datatuple> *ltable)
|
||||||
{
|
{
|
||||||
|
|
||||||
struct logtable_mergedata * mdata = new logtable_mergedata;
|
struct logtable_mergedata * mdata = new logtable_mergedata;
|
||||||
|
@ -76,7 +81,7 @@ merge_scheduler::~merge_scheduler()
|
||||||
{
|
{
|
||||||
for(size_t i=0; i<mergedata.size(); i++)
|
for(size_t i=0; i<mergedata.size(); i++)
|
||||||
{
|
{
|
||||||
logtable *ltable = mergedata[i].first;
|
logtable<datatuple> *ltable = mergedata[i].first;
|
||||||
logtable_mergedata *mdata = mergedata[i].second;
|
logtable_mergedata *mdata = mergedata[i].second;
|
||||||
|
|
||||||
//delete the mergedata fields
|
//delete the mergedata fields
|
||||||
|
@ -112,7 +117,7 @@ void merge_scheduler::shutdown()
|
||||||
//signal shutdown
|
//signal shutdown
|
||||||
for(size_t i=0; i<mergedata.size(); i++)
|
for(size_t i=0; i<mergedata.size(); i++)
|
||||||
{
|
{
|
||||||
logtable *ltable = mergedata[i].first;
|
logtable<datatuple> *ltable = mergedata[i].first;
|
||||||
logtable_mergedata *mdata = mergedata[i].second;
|
logtable_mergedata *mdata = mergedata[i].second;
|
||||||
|
|
||||||
//flush the in memory table to write any tuples still in memory
|
//flush the in memory table to write any tuples still in memory
|
||||||
|
@ -142,7 +147,7 @@ void merge_scheduler::shutdown()
|
||||||
void merge_scheduler::startlogtable(int index, int64_t MAX_C0_SIZE)
|
void merge_scheduler::startlogtable(int index, int64_t MAX_C0_SIZE)
|
||||||
{
|
{
|
||||||
|
|
||||||
logtable * ltable = mergedata[index].first;
|
logtable<datatuple> * ltable = mergedata[index].first;
|
||||||
struct logtable_mergedata *mdata = mergedata[index].second;
|
struct logtable_mergedata *mdata = mergedata[index].second;
|
||||||
|
|
||||||
pthread_cond_t * block1_needed_cond = new pthread_cond_t;
|
pthread_cond_t * block1_needed_cond = new pthread_cond_t;
|
||||||
|
@ -228,7 +233,7 @@ template <class ITA, class ITB>
|
||||||
void merge_iterators(int xid,
|
void merge_iterators(int xid,
|
||||||
ITA *itrA,
|
ITA *itrA,
|
||||||
ITB *itrB,
|
ITB *itrB,
|
||||||
logtable *ltable,
|
logtable<datatuple> *ltable,
|
||||||
diskTreeComponent *scratch_tree,
|
diskTreeComponent *scratch_tree,
|
||||||
merge_stats_t *stats,
|
merge_stats_t *stats,
|
||||||
bool dropDeletes);
|
bool dropDeletes);
|
||||||
|
@ -262,7 +267,7 @@ void* memMergeThread(void*arg)
|
||||||
|
|
||||||
merger_args * a = (merger_args*)(arg);
|
merger_args * a = (merger_args*)(arg);
|
||||||
|
|
||||||
logtable * ltable = a->ltable;
|
logtable<datatuple> * ltable = a->ltable;
|
||||||
assert(ltable->get_tree_c1());
|
assert(ltable->get_tree_c1());
|
||||||
|
|
||||||
int merge_count =0;
|
int merge_count =0;
|
||||||
|
@ -318,7 +323,7 @@ void* memMergeThread(void*arg)
|
||||||
// 4: Merge
|
// 4: Merge
|
||||||
|
|
||||||
//create the iterators
|
//create the iterators
|
||||||
diskTreeComponent::diskTreeIterator *itrA = ltable->get_tree_c1()->iterator();
|
diskTreeComponent::iterator *itrA = ltable->get_tree_c1()->open_iterator();
|
||||||
memTreeComponent<datatuple>::iterator *itrB =
|
memTreeComponent<datatuple>::iterator *itrB =
|
||||||
new memTreeComponent<datatuple>::iterator(ltable->get_tree_c0_mergeable());
|
new memTreeComponent<datatuple>::iterator(ltable->get_tree_c0_mergeable());
|
||||||
|
|
||||||
|
@ -420,7 +425,7 @@ void *diskMergeThread(void*arg)
|
||||||
|
|
||||||
merger_args * a = (merger_args*)(arg);
|
merger_args * a = (merger_args*)(arg);
|
||||||
|
|
||||||
logtable * ltable = a->ltable;
|
logtable<datatuple> * ltable = a->ltable;
|
||||||
assert(ltable->get_tree_c2());
|
assert(ltable->get_tree_c2());
|
||||||
|
|
||||||
|
|
||||||
|
@ -472,8 +477,8 @@ void *diskMergeThread(void*arg)
|
||||||
|
|
||||||
// 4: do the merge.
|
// 4: do the merge.
|
||||||
//create the iterators
|
//create the iterators
|
||||||
diskTreeComponent::diskTreeIterator *itrA = ltable->get_tree_c2()->iterator(); //new diskTreeIterator<datatuple>(ltable->get_tree_c2()->get_root_rec());
|
diskTreeComponent::iterator *itrA = ltable->get_tree_c2()->open_iterator(); //new iterator<datatuple>(ltable->get_tree_c2()->get_root_rec());
|
||||||
diskTreeComponent::diskTreeIterator *itrB = ltable->get_tree_c1_mergeable()->iterator();
|
diskTreeComponent::iterator *itrB = ltable->get_tree_c1_mergeable()->open_iterator();
|
||||||
|
|
||||||
//create a new tree
|
//create a new tree
|
||||||
diskTreeComponent * c2_prime = new diskTreeComponent(xid, a->internal_region_size, a->datapage_region_size, a->datapage_size);
|
diskTreeComponent * c2_prime = new diskTreeComponent(xid, a->internal_region_size, a->datapage_region_size, a->datapage_size);
|
||||||
|
@ -535,7 +540,7 @@ template <class ITA, class ITB>
|
||||||
void merge_iterators(int xid,
|
void merge_iterators(int xid,
|
||||||
ITA *itrA, //iterator on c1 or c2
|
ITA *itrA, //iterator on c1 or c2
|
||||||
ITB *itrB, //iterator on c0 or c1, respectively
|
ITB *itrB, //iterator on c0 or c1, respectively
|
||||||
logtable *ltable,
|
logtable<datatuple> *ltable,
|
||||||
diskTreeComponent *scratch_tree, merge_stats_t *stats,
|
diskTreeComponent *scratch_tree, merge_stats_t *stats,
|
||||||
bool dropDeletes // should be true iff this is biggest component
|
bool dropDeletes // should be true iff this is biggest component
|
||||||
)
|
)
|
||||||
|
|
26
merger.h
26
merger.h
|
@ -1,8 +1,8 @@
|
||||||
#ifndef _MERGER_H_
|
#ifndef _MERGER_H_
|
||||||
#define _MERGER_H_
|
#define _MERGER_H_
|
||||||
|
|
||||||
#include <vector>
|
#include "logstore.h"
|
||||||
#include <utility>
|
#include "datatuple.h"
|
||||||
|
|
||||||
#include <stasis/common.h>
|
#include <stasis/common.h>
|
||||||
#undef try
|
#undef try
|
||||||
|
@ -11,26 +11,10 @@
|
||||||
//TODO: 400 bytes overhead per tuple, this is nuts, check if this is true...
|
//TODO: 400 bytes overhead per tuple, this is nuts, check if this is true...
|
||||||
static const int RB_TREE_OVERHEAD = 400;
|
static const int RB_TREE_OVERHEAD = 400;
|
||||||
static const double MIN_R = 3.0;
|
static const double MIN_R = 3.0;
|
||||||
class logtable;
|
|
||||||
|
|
||||||
typedef struct merge_stats_t {
|
|
||||||
int merge_level; // 1 => C0->C1, 2 => C1->C2
|
|
||||||
pageid_t merge_count; // This is the merge_count'th merge
|
|
||||||
struct timeval sleep; // When did we go to sleep waiting for input?
|
|
||||||
struct timeval start; // When did we wake up and start merging? (at steady state with max throughput, this should be equal to sleep)
|
|
||||||
struct timeval done; // When did we finish merging?
|
|
||||||
pageid_t bytes_out; // How many bytes did we write (including internal tree nodes)?
|
|
||||||
pageid_t num_tuples_out; // How many tuples did we write?
|
|
||||||
pageid_t num_datapages_out; // How many datapages?
|
|
||||||
pageid_t bytes_in_small; // How many bytes from the small input tree (for C0, we ignore tree overheads)?
|
|
||||||
pageid_t num_tuples_in_small; // Tuples from the small input?
|
|
||||||
pageid_t bytes_in_large; // Bytes from the large input?
|
|
||||||
pageid_t num_tuples_in_large; // Tuples from large input?
|
|
||||||
} merge_stats_t;
|
|
||||||
|
|
||||||
struct merger_args
|
struct merger_args
|
||||||
{
|
{
|
||||||
logtable * ltable;
|
logtable<datatuple> * ltable;
|
||||||
int worker_id;
|
int worker_id;
|
||||||
|
|
||||||
pthread_mutex_t * block_ready_mut;
|
pthread_mutex_t * block_ready_mut;
|
||||||
|
@ -82,12 +66,12 @@ struct logtable_mergedata
|
||||||
|
|
||||||
class merge_scheduler
|
class merge_scheduler
|
||||||
{
|
{
|
||||||
std::vector<std::pair<logtable *, logtable_mergedata*> > mergedata;
|
std::vector<std::pair<logtable<datatuple> *, logtable_mergedata*> > mergedata;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
~merge_scheduler();
|
~merge_scheduler();
|
||||||
|
|
||||||
int addlogtable(logtable * ltable);
|
int addlogtable(logtable<datatuple> * ltable);
|
||||||
void startlogtable(int index, int64_t MAX_C0_SIZE = 100*1024*1024);
|
void startlogtable(int index, int64_t MAX_C0_SIZE = 100*1024*1024);
|
||||||
|
|
||||||
struct logtable_mergedata *getMergeData(int index){return mergedata[index].second;}
|
struct logtable_mergedata *getMergeData(int index){return mergedata[index].second;}
|
||||||
|
|
|
@ -8,6 +8,11 @@
|
||||||
#ifndef REGIONALLOCATOR_H_
|
#ifndef REGIONALLOCATOR_H_
|
||||||
#define REGIONALLOCATOR_H_
|
#define REGIONALLOCATOR_H_
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
|
#undef try
|
||||||
|
#undef end
|
||||||
|
|
||||||
|
|
||||||
class RegionAllocator
|
class RegionAllocator
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include "logstore.h"
|
#include "logstore.h"
|
||||||
|
#include "logserver.h"
|
||||||
#include "datapage.h"
|
#include "datapage.h"
|
||||||
#include "merger.h"
|
#include "merger.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
@ -55,7 +56,7 @@ void initialize_server()
|
||||||
|
|
||||||
mscheduler = new merge_scheduler;
|
mscheduler = new merge_scheduler;
|
||||||
|
|
||||||
logtable ltable;
|
logtable<datatuple> ltable;
|
||||||
|
|
||||||
recordid table_root = ROOT_RECORD;
|
recordid table_root = ROOT_RECORD;
|
||||||
if(TrecordType(xid, ROOT_RECORD) == INVALID_SLOT) {
|
if(TrecordType(xid, ROOT_RECORD) == INVALID_SLOT) {
|
||||||
|
|
|
@ -13,7 +13,9 @@
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include "check_util.h"
|
#include "check_util.h"
|
||||||
|
#include "regionAllocator.h"
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
#undef begin
|
#undef begin
|
||||||
#undef end
|
#undef end
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,8 @@
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
|
#undef begin
|
||||||
|
#undef end
|
||||||
|
|
||||||
|
|
||||||
#include "logstore.h"
|
#include "logstore.h"
|
||||||
|
|
||||||
|
@ -13,7 +17,7 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
int xid = Tbegin();
|
int xid = Tbegin();
|
||||||
|
|
||||||
logtable ltable(1000, 10000, 5);
|
logtable<datatuple> ltable(1000, 10000, 5);
|
||||||
|
|
||||||
recordid table_root = ltable.allocTable(xid);
|
recordid table_root = ltable.allocTable(xid);
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
#undef begin
|
#undef begin
|
||||||
#undef end
|
#undef end
|
||||||
|
|
||||||
|
@ -29,7 +30,7 @@ void insertProbeIter(size_t NUM_ENTRIES)
|
||||||
|
|
||||||
int xid = Tbegin();
|
int xid = Tbegin();
|
||||||
|
|
||||||
logtable ltable(1000, 10000, 5);
|
logtable<datatuple> ltable(1000, 10000, 5);
|
||||||
recordid table_root = ltable.allocTable(xid);
|
recordid table_root = ltable.allocTable(xid);
|
||||||
|
|
||||||
Tcommit(xid);
|
Tcommit(xid);
|
||||||
|
@ -84,7 +85,7 @@ void insertProbeIter(size_t NUM_ENTRIES)
|
||||||
printf("Stage 2: Sequentially reading %d tuples\n", NUM_ENTRIES);
|
printf("Stage 2: Sequentially reading %d tuples\n", NUM_ENTRIES);
|
||||||
|
|
||||||
size_t tuplenum = 0;
|
size_t tuplenum = 0;
|
||||||
diskTreeComponent::diskTreeIterator * tree_itr = ltable_c1->iterator();
|
diskTreeComponent::iterator * tree_itr = ltable_c1->open_iterator();
|
||||||
|
|
||||||
|
|
||||||
datatuple *dt=0;
|
datatuple *dt=0;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
#define OFFSET (NUM_ENTRIES * 10)
|
#define OFFSET (NUM_ENTRIES * 10)
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
#undef begin
|
#undef begin
|
||||||
#undef end
|
#undef end
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
#undef begin
|
#undef begin
|
||||||
#undef end
|
#undef end
|
||||||
|
|
||||||
|
@ -46,7 +47,7 @@ void insertProbeIter(size_t NUM_ENTRIES)
|
||||||
int xid = Tbegin();
|
int xid = Tbegin();
|
||||||
|
|
||||||
merge_scheduler mscheduler;
|
merge_scheduler mscheduler;
|
||||||
logtable ltable(1000, 10000, 5);
|
logtable<datatuple> ltable(1000, 10000, 5);
|
||||||
|
|
||||||
recordid table_root = ltable.allocTable(xid);
|
recordid table_root = ltable.allocTable(xid);
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
#undef begin
|
#undef begin
|
||||||
#undef end
|
#undef end
|
||||||
|
|
||||||
|
@ -45,7 +46,7 @@ void insertProbeIter(size_t NUM_ENTRIES)
|
||||||
int xid = Tbegin();
|
int xid = Tbegin();
|
||||||
|
|
||||||
merge_scheduler mscheduler;
|
merge_scheduler mscheduler;
|
||||||
logtable ltable(1000, 10000, 100);
|
logtable<datatuple> ltable(1000, 10000, 100);
|
||||||
|
|
||||||
recordid table_root = ltable.allocTable(xid);
|
recordid table_root = ltable.allocTable(xid);
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
#undef begin
|
#undef begin
|
||||||
#undef end
|
#undef end
|
||||||
|
|
||||||
|
@ -101,7 +102,7 @@ void insertProbeIter(size_t NUM_ENTRIES)
|
||||||
int xid = Tbegin();
|
int xid = Tbegin();
|
||||||
|
|
||||||
merge_scheduler mscheduler;
|
merge_scheduler mscheduler;
|
||||||
logtable ltable(1000, 1000, 40);
|
logtable<datatuple> ltable(1000, 1000, 40);
|
||||||
|
|
||||||
recordid table_root = ltable.allocTable(xid);
|
recordid table_root = ltable.allocTable(xid);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue