Partial implementation of a collection oriented interface for ROSE. (Checking in to create version to diff against.)
This commit is contained in:
parent
4daffabdf3
commit
7e2c37534e
18 changed files with 841 additions and 136 deletions
|
@ -3,11 +3,14 @@ LDADD=$(top_builddir)/src/stasis/libstasis.la \
|
||||||
|
|
||||||
|
|
||||||
rose_SOURCES=rose.cpp
|
rose_SOURCES=rose.cpp
|
||||||
|
roseTable_SOURCES=roseTable.cpp
|
||||||
|
|
||||||
if BUILD_BENCHMARKS
|
if BUILD_BENCHMARKS
|
||||||
noinst_PROGRAMS=lhtableThreaded naiveHash logicalHash readLogicalHash naiveMultiThreaded logicalMultThreaded rawSet \
|
noinst_PROGRAMS=lhtableThreaded naiveHash logicalHash readLogicalHash \
|
||||||
arrayListSet logicalMultiReaders linearHashNTA linkedListNTA pageOrientedListNTA \
|
naiveMultiThreaded logicalMultThreaded rawSet arrayListSet \
|
||||||
linearHashNTAThreaded linearHashNTAMultiReader linearHashNTAWriteRequests transitiveClosure zeroCopy sequentialThroughput rose
|
logicalMultiReaders linearHashNTA linkedListNTA pageOrientedListNTA \
|
||||||
|
linearHashNTAThreaded linearHashNTAMultiReader linearHashNTAWriteRequests \
|
||||||
|
transitiveClosure zeroCopy sequentialThroughput rose roseTable
|
||||||
endif
|
endif
|
||||||
AM_CFLAGS=${GLOBAL_CFLAGS}
|
AM_CFLAGS=${GLOBAL_CFLAGS}
|
||||||
AM_CXXFLAGS=${GLOBAL_CXXFLAGS} -I ${top_builddir}/src
|
AM_CXXFLAGS=${GLOBAL_CXXFLAGS} -I ${top_builddir}/src
|
||||||
|
|
|
@ -47,6 +47,8 @@ using rose::mergeThread;
|
||||||
using rose::compressData;
|
using rose::compressData;
|
||||||
using rose::tv_to_double; // XXX
|
using rose::tv_to_double; // XXX
|
||||||
|
|
||||||
|
using rose::initPage;
|
||||||
|
|
||||||
static const int32_t GB = 1024 * 1024 * 1024;
|
static const int32_t GB = 1024 * 1024 * 1024;
|
||||||
|
|
||||||
static int lsm_sim; // XXX this global variable shouldn't be global!
|
static int lsm_sim; // XXX this global variable shouldn't be global!
|
||||||
|
@ -54,6 +56,7 @@ static int lsm_sim; // XXX this global variable shouldn't be global!
|
||||||
|
|
||||||
#define FIRST_PAGE 1
|
#define FIRST_PAGE 1
|
||||||
|
|
||||||
|
#define FAST_ALLOC 6
|
||||||
/**
|
/**
|
||||||
Bypass stasis' allocation mechanisms. Stasis' page allocation
|
Bypass stasis' allocation mechanisms. Stasis' page allocation
|
||||||
costs should be minimal, but this program doesn't support
|
costs should be minimal, but this program doesn't support
|
||||||
|
@ -350,8 +353,6 @@ void run_test(unsigned int inserts, column_number_t column_count,
|
||||||
|
|
||||||
int num_pages = 0;
|
int num_pages = 0;
|
||||||
|
|
||||||
TlsmSetPageAllocator(roseFastAlloc, &num_pages);
|
|
||||||
|
|
||||||
Tinit();
|
Tinit();
|
||||||
|
|
||||||
recordid tree = NULLRID;
|
recordid tree = NULLRID;
|
||||||
|
@ -361,7 +362,7 @@ void run_test(unsigned int inserts, column_number_t column_count,
|
||||||
pthread_cond_t block_ready_cond = PTHREAD_COND_INITIALIZER;
|
pthread_cond_t block_ready_cond = PTHREAD_COND_INITIALIZER;
|
||||||
int max_waiters = 3; // merged, old, new.
|
int max_waiters = 3; // merged, old, new.
|
||||||
if(lsm_sim) {
|
if(lsm_sim) {
|
||||||
struct insert_args<PAGELAYOUT,ENGINE,ITER,ROW,TYPE> args = {
|
struct insert_args<PAGELAYOUT,ENGINE,ITER,ROW> args = {
|
||||||
comparator_idx,
|
comparator_idx,
|
||||||
rowsize,
|
rowsize,
|
||||||
&begin,
|
&begin,
|
||||||
|
@ -385,7 +386,13 @@ void run_test(unsigned int inserts, column_number_t column_count,
|
||||||
&args);
|
&args);
|
||||||
pthread_t merger;
|
pthread_t merger;
|
||||||
pthread_create(&merger, 0,
|
pthread_create(&merger, 0,
|
||||||
(void*(*)(void*))mergeThread<PAGELAYOUT,ENGINE,ITER,ROW,TYPE>,
|
(void*(*)(void*))mergeThread
|
||||||
|
<PAGELAYOUT,
|
||||||
|
ENGINE,
|
||||||
|
treeIterator<ROW,PAGELAYOUT>,
|
||||||
|
treeIterator<ROW,PAGELAYOUT>,
|
||||||
|
ROW,
|
||||||
|
TYPE>,
|
||||||
&args);
|
&args);
|
||||||
|
|
||||||
pthread_join(inserter,0);
|
pthread_join(inserter,0);
|
||||||
|
@ -398,11 +405,11 @@ void run_test(unsigned int inserts, column_number_t column_count,
|
||||||
gettimeofday(&start_tv, 0);
|
gettimeofday(&start_tv, 0);
|
||||||
|
|
||||||
if(buildTree) {
|
if(buildTree) {
|
||||||
tree = TlsmCreate(-1, comparator_idx,rowsize);
|
tree = TlsmCreate(-1, comparator_idx, roseFastAlloc, &num_pages, rowsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t insertedByCompress;
|
uint64_t insertedByCompress;
|
||||||
compressData<PAGELAYOUT,ENGINE,TYPE,ITER,ROW>
|
compressData<PAGELAYOUT,ENGINE,TYPE,ROW,ITER>
|
||||||
(&begin, &end,buildTree,tree,roseFastAlloc,(void*)&num_pages,
|
(&begin, &end,buildTree,tree,roseFastAlloc,(void*)&num_pages,
|
||||||
&insertedByCompress);
|
&insertedByCompress);
|
||||||
|
|
||||||
|
|
53
benchmarks/roseTable.cpp
Normal file
53
benchmarks/roseTable.cpp
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include "stasis/operations/lsmTable.h"
|
||||||
|
|
||||||
|
#include "stasis/transactional.h"
|
||||||
|
|
||||||
|
#include "stasis/page/compression/multicolumn-impl.h"
|
||||||
|
#include "stasis/page/compression/for-impl.h"
|
||||||
|
#include "stasis/page/compression/rle-impl.h"
|
||||||
|
#include "stasis/page/compression/staticTuple.h"
|
||||||
|
#include "stasis/page/compression/pageLayout.h"
|
||||||
|
|
||||||
|
typedef int32_t val_t; // XXX want multiple types!
|
||||||
|
|
||||||
|
namespace rose {
|
||||||
|
template<class PAGELAYOUT>
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
static int cmp_num = 1;
|
||||||
|
static int init_num = 1;
|
||||||
|
|
||||||
|
unlink("storefile.txt");
|
||||||
|
unlink("logfile.txt");
|
||||||
|
|
||||||
|
sync();
|
||||||
|
|
||||||
|
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
|
||||||
|
|
||||||
|
Tinit();
|
||||||
|
|
||||||
|
int xid = Tbegin();
|
||||||
|
|
||||||
|
recordid lsmTable = TlsmTableAlloc<PAGELAYOUT>(xid);
|
||||||
|
|
||||||
|
Tcommit(xid);
|
||||||
|
|
||||||
|
TlsmTableStart<PAGELAYOUT>(lsmTable);
|
||||||
|
|
||||||
|
TlsmTableStop<PAGELAYOUT>(lsmTable);
|
||||||
|
|
||||||
|
Tdeinit();
|
||||||
|
|
||||||
|
printf("test\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
typedef rose::StaticTuple<4,int64_t,int32_t,int16_t,int8_t> tup;
|
||||||
|
// XXX multicolumn is deprecated; want static dispatch!
|
||||||
|
return rose::main
|
||||||
|
<rose::SingleColumnTypePageLayout
|
||||||
|
<rose::Multicolumn<tup>,rose::Rle<val_t> > >
|
||||||
|
(argc,argv);
|
||||||
|
}
|
|
@ -22,10 +22,21 @@
|
||||||
|
|
||||||
namespace rose {
|
namespace rose {
|
||||||
|
|
||||||
template <class ITER, class ROW> class mergeIterator;
|
template <class ITERA, class ITERB, class ROW> class mergeIterator;
|
||||||
|
|
||||||
|
template <class ITERA, class ITERB, class ROW>
|
||||||
|
inline const byte * toByteArray(mergeIterator<ITERA,ITERB,ROW> * const t);
|
||||||
|
|
||||||
|
|
||||||
|
template <class ITER, class ROW> class versioningIterator;
|
||||||
|
|
||||||
template <class ITER, class ROW>
|
template <class ITER, class ROW>
|
||||||
inline const byte * toByteArray(mergeIterator<ITER,ROW> * const t);
|
inline const byte * toByteArray(versioningIterator<ITER,ROW> * const t);
|
||||||
|
|
||||||
|
|
||||||
|
template <class STLITER, class ROW> class stlSetIterator;
|
||||||
|
template <class STLITER, class ROW>
|
||||||
|
inline const byte * toByteArray(stlSetIterator<STLITER,ROW> * const t);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -154,12 +165,9 @@ class treeIterator {
|
||||||
This iterator takes two otehr iterators as arguments, and merges
|
This iterator takes two otehr iterators as arguments, and merges
|
||||||
their output, dropping duplicate entries.
|
their output, dropping duplicate entries.
|
||||||
|
|
||||||
@todo LSM tree is not be very useful without support for deletion
|
It does not understand versioning or tombstones.
|
||||||
(and, therefore, versioning). Such support will require
|
|
||||||
modifications to mergeIterator (or perhaps, a new iterator
|
|
||||||
class).
|
|
||||||
*/
|
*/
|
||||||
template<class ITER, class ROW>
|
template<class ITERA, class ITERB, class ROW>
|
||||||
class mergeIterator {
|
class mergeIterator {
|
||||||
private:
|
private:
|
||||||
static const int A = 0;
|
static const int A = 0;
|
||||||
|
@ -192,7 +200,7 @@ class mergeIterator {
|
||||||
return cur;
|
return cur;
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
mergeIterator(ITER & a, ITER & b, ITER & aend, ITER & bend) :
|
mergeIterator(ITERA & a, ITERB & b, ITERA & aend, ITERB & bend) :
|
||||||
off_(0),
|
off_(0),
|
||||||
a_(a),
|
a_(a),
|
||||||
b_(b),
|
b_(b),
|
||||||
|
@ -211,7 +219,7 @@ class mergeIterator {
|
||||||
before_eof_(i.before_eof_)
|
before_eof_(i.before_eof_)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
ROW& operator* () {
|
const ROW& operator* () {
|
||||||
if(curr_ == A || curr_ == BOTH) { return *a_; }
|
if(curr_ == A || curr_ == BOTH) { return *a_; }
|
||||||
if(curr_ == B) { return *b_; }
|
if(curr_ == B) { return *b_; }
|
||||||
abort();
|
abort();
|
||||||
|
@ -272,19 +280,153 @@ class mergeIterator {
|
||||||
inline unsigned int offset() { return off_; }
|
inline unsigned int offset() { return off_; }
|
||||||
private:
|
private:
|
||||||
unsigned int off_;
|
unsigned int off_;
|
||||||
ITER a_;
|
ITERA a_;
|
||||||
ITER b_;
|
ITERB b_;
|
||||||
ITER aend_;
|
ITERA aend_;
|
||||||
ITER bend_;
|
ITERB bend_;
|
||||||
int curr_;
|
int curr_;
|
||||||
int before_eof_;
|
int before_eof_;
|
||||||
friend const byte* toByteArray<ITER,ROW>(mergeIterator<ITER,ROW> * const t);
|
friend const byte*
|
||||||
|
toByteArray<ITERA,ITERB,ROW>(mergeIterator<ITERA,ITERB,ROW> * const t);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
This iterator takes an iterator that produces rows with versioning
|
||||||
|
information. The rows should be sorted based on value, then sorted by
|
||||||
|
version, with the newest value first.
|
||||||
|
*/
|
||||||
|
template<class ITER, class ROW>
|
||||||
|
class versioningIterator {
|
||||||
|
public:
|
||||||
|
versioningIterator(ITER & a, ITER & aend,
|
||||||
|
int beginning_of_time) :
|
||||||
|
a_(a),
|
||||||
|
aend_(aend),
|
||||||
|
check_tombstone_(0),
|
||||||
|
tombstone_(0),
|
||||||
|
off_(0)
|
||||||
|
{}
|
||||||
|
explicit versioningIterator(versioningIterator &i) :
|
||||||
|
a_(i.a_),
|
||||||
|
aend_(i.aend_),
|
||||||
|
check_tombstone_(i.check_tombstone_),
|
||||||
|
tombstone_(i.tombstone_),
|
||||||
|
off_(i.off_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
ROW& operator* () {
|
||||||
|
return *a_;
|
||||||
|
}
|
||||||
|
void seekEnd() {
|
||||||
|
a_ = aend_; // XXX good idea?
|
||||||
|
}
|
||||||
|
inline bool operator==(const versioningIterator &o) const {
|
||||||
|
return a_ == o.a_;
|
||||||
|
}
|
||||||
|
inline bool operator!=(const versioningIterator &o) const {
|
||||||
|
return !(*this == o);
|
||||||
|
}
|
||||||
|
inline void operator++() {
|
||||||
|
if(check_tombstone_) {
|
||||||
|
do {
|
||||||
|
++a_;
|
||||||
|
} while(a_ != aend_ && *a_ == tombstone_);
|
||||||
|
} else {
|
||||||
|
++a_;
|
||||||
|
}
|
||||||
|
if((*a_).tombstone()) {
|
||||||
|
tombstone_.copyFrom(*a_);
|
||||||
|
check_tombstone_ = 1;
|
||||||
|
} else {
|
||||||
|
check_tombstone_ = 0;
|
||||||
|
}
|
||||||
|
off_++;
|
||||||
|
}
|
||||||
|
inline void operator--() {
|
||||||
|
--a_;
|
||||||
|
// need to remember that we backed up so that ++ can work...
|
||||||
|
// the cursor is always positioned on a live value, and -- can
|
||||||
|
// only be followed by ++, so this should do the right thing.
|
||||||
|
check_tombstone_ = 0;
|
||||||
|
off_--;
|
||||||
|
}
|
||||||
|
inline int operator-(versioningIterator&i) {
|
||||||
|
return off_ - i.off_;
|
||||||
|
}
|
||||||
|
inline void operator=(versioningIterator const &i) {
|
||||||
|
a_ = i.a_;
|
||||||
|
aend_ = i.aend_;
|
||||||
|
check_tombstone_ = i.check_tombstone_;
|
||||||
|
tombstone_ = i.tombstone_;
|
||||||
|
// scratch_ = *a_;
|
||||||
|
off_ = i.off_;
|
||||||
|
}
|
||||||
|
inline unsigned int offset() { return off_; }
|
||||||
|
private:
|
||||||
|
// unsigned int off_;
|
||||||
|
ITER a_;
|
||||||
|
ITER aend_;
|
||||||
|
int check_tombstone_;
|
||||||
|
ROW tombstone_;
|
||||||
|
// ROW &scratch_;
|
||||||
|
off_t off_;
|
||||||
|
// int before_eof_;
|
||||||
|
// typeof(ROW::TIMESTAMP) beginning_of_time_;
|
||||||
|
friend const byte*
|
||||||
|
toByteArray<ITER,ROW>(versioningIterator<ITER,ROW> * const t);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
This iterator takes an iterator that produces rows with versioning
|
||||||
|
information. The rows should be sorted based on value, then sorted by
|
||||||
|
version, with the newest value first.
|
||||||
|
*/
|
||||||
|
template<class STLITER,class ROW> class stlSetIterator {
|
||||||
|
public:
|
||||||
|
stlSetIterator( STLITER& it, STLITER& itend ) : it_(it), itend_(itend) {}
|
||||||
|
explicit stlSetIterator(stlSetIterator &i) : it_(i.it_), itend_(i.itend_){}
|
||||||
|
const ROW& operator* () { return *it_; }
|
||||||
|
|
||||||
|
void seekEnd() {
|
||||||
|
it_ = itend_; // XXX good idea?
|
||||||
|
}
|
||||||
|
stlSetIterator * end() { return new stlSetIterator(itend_,itend_); }
|
||||||
|
inline bool operator==(const stlSetIterator &o) const {
|
||||||
|
return it_ == o.it_;
|
||||||
|
}
|
||||||
|
inline bool operator!=(const stlSetIterator &o) const {
|
||||||
|
return !(*this == o);
|
||||||
|
}
|
||||||
|
inline void operator++() {
|
||||||
|
++it_;
|
||||||
|
}
|
||||||
|
inline void operator--() {
|
||||||
|
--it_;
|
||||||
|
}
|
||||||
|
inline int operator-(stlSetIterator&i) {
|
||||||
|
return it_ - i.it_;
|
||||||
|
}
|
||||||
|
inline void operator=(stlSetIterator const &i) {
|
||||||
|
it_ = i.it_;
|
||||||
|
itend_ = i.itend_;
|
||||||
|
}
|
||||||
|
// inline unsigned int offset() { return off_; }
|
||||||
|
private:
|
||||||
|
// unsigned int off_;
|
||||||
|
STLITER it_;
|
||||||
|
STLITER itend_;
|
||||||
|
friend const byte*
|
||||||
|
toByteArray<STLITER,ROW>(stlSetIterator<STLITER,ROW> * const t);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class STLITER,class ROW>
|
||||||
|
inline const byte * toByteArray(stlSetIterator<STLITER,ROW> * const t) {
|
||||||
|
return (*(t->it_)).toByteArray();
|
||||||
|
}
|
||||||
/** Produce a byte array from the value stored at t's current
|
/** Produce a byte array from the value stored at t's current
|
||||||
position */
|
position */
|
||||||
template <class ITER, class ROW>
|
template <class ITERA, class ITERB, class ROW>
|
||||||
inline const byte * toByteArray(mergeIterator<ITER,ROW> * const t) {
|
inline const byte * toByteArray(mergeIterator<ITERA,ITERB,ROW> * const t) {
|
||||||
if(t->curr_ == t->A || t->curr_ == t->BOTH) {
|
if(t->curr_ == t->A || t->curr_ == t->BOTH) {
|
||||||
return toByteArray(&t->a_);
|
return toByteArray(&t->a_);
|
||||||
} else if(t->curr_ == t->B) {
|
} else if(t->curr_ == t->B) {
|
||||||
|
@ -293,12 +435,19 @@ inline const byte * toByteArray(mergeIterator<ITER,ROW> * const t) {
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Produce a byte array from the value stored at t's current
|
||||||
|
position */
|
||||||
|
template <class ITER, class ROW>
|
||||||
|
inline const byte * toByteArray(versioningIterator<ITER,ROW> * const t) {
|
||||||
|
return toByteArray(&t->a_);
|
||||||
|
}
|
||||||
|
|
||||||
template <class PAGELAYOUT>
|
template <class PAGELAYOUT>
|
||||||
inline const byte* toByteArray(treeIterator<int,PAGELAYOUT> *const t) {
|
inline const byte* toByteArray(treeIterator<int,PAGELAYOUT> *const t) {
|
||||||
return (const byte*)&(**t);
|
return (const byte*)&(**t);
|
||||||
}
|
}
|
||||||
template <class PAGELAYOUT,class TYPE>
|
template <class PAGELAYOUT,class ROW>
|
||||||
inline const byte* toByteArray(treeIterator<Tuple<TYPE>,PAGELAYOUT> *const t) {
|
inline const byte* toByteArray(treeIterator<ROW,PAGELAYOUT> *const t) {
|
||||||
return (**t).toByteArray();
|
return (**t).toByteArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
311
src/stasis/operations/lsmTable.h
Normal file
311
src/stasis/operations/lsmTable.h
Normal file
|
@ -0,0 +1,311 @@
|
||||||
|
#ifndef _ROSE_COMPRESSION_LSMTABLE_H__
|
||||||
|
#define _ROSE_COMPRESSION_LSMTABLE_H__
|
||||||
|
|
||||||
|
#undef end
|
||||||
|
#undef begin
|
||||||
|
|
||||||
|
#include <set>
|
||||||
|
#include "lsmIterators.h"
|
||||||
|
|
||||||
|
namespace rose {
|
||||||
|
/**
|
||||||
|
@file
|
||||||
|
|
||||||
|
This file contains worker threads and the end user interface for Rose's
|
||||||
|
LSM-tree based table implementation. The page format is set at compile
|
||||||
|
time with a template instantiation.
|
||||||
|
|
||||||
|
@see lsmWorkers.h provides a more general (and dynamically
|
||||||
|
dispatched), interface to the underlying primititves
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
template<class PAGELAYOUT>
|
||||||
|
struct new_insert_args {
|
||||||
|
int comparator_idx;
|
||||||
|
int rowsize; //typedef int32_t val_t;
|
||||||
|
// ITER *begin;
|
||||||
|
// ITER *end;
|
||||||
|
pageid_t(*pageAlloc)(int,void*);
|
||||||
|
void *pageAllocState;
|
||||||
|
pthread_mutex_t * block_ready_mut;
|
||||||
|
pthread_cond_t * block_needed_cond;
|
||||||
|
pthread_cond_t * block_ready_cond;
|
||||||
|
int max_waiters;
|
||||||
|
int wait_count;
|
||||||
|
recordid * wait_queue;
|
||||||
|
typename PAGELAYOUT::FMT::TUP *scratchA;
|
||||||
|
typename PAGELAYOUT::FMT::TUP *scratchB;
|
||||||
|
pageid_t mergedPages;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class PAGELAYOUT, class ITER>
|
||||||
|
pageid_t compressData(ITER * begin, ITER * end, recordid tree,
|
||||||
|
pageid_t (*pageAlloc)(int,void*),
|
||||||
|
void *pageAllocState, uint64_t *inserted) {
|
||||||
|
*inserted = 0;
|
||||||
|
|
||||||
|
if(*begin == *end) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
pageid_t next_page = pageAlloc(-1,pageAllocState);
|
||||||
|
Page *p = loadPage(-1, next_page);
|
||||||
|
pageid_t pageCount = 0;
|
||||||
|
|
||||||
|
if(*begin != *end) {
|
||||||
|
TlsmAppendPage(-1,tree,toByteArray(begin),pageAlloc,pageAllocState,p->id);
|
||||||
|
}
|
||||||
|
pageCount++;
|
||||||
|
|
||||||
|
typename PAGELAYOUT::FMT * mc = PAGELAYOUT::initPage(p, &**begin);
|
||||||
|
|
||||||
|
int lastEmpty = 0;
|
||||||
|
|
||||||
|
for(ITER i(*begin); i != *end; ++i) {
|
||||||
|
rose::slot_index_t ret = mc->append(-1, *i);
|
||||||
|
|
||||||
|
(*inserted)++;
|
||||||
|
|
||||||
|
if(ret == rose::NOSPACE) {
|
||||||
|
p->dirty = 1;
|
||||||
|
mc->pack();
|
||||||
|
releasePage(p);
|
||||||
|
|
||||||
|
--(*end);
|
||||||
|
if(i != *end) {
|
||||||
|
next_page = pageAlloc(-1,pageAllocState);
|
||||||
|
p = loadPage(-1, next_page);
|
||||||
|
|
||||||
|
mc = PAGELAYOUT::initPage(p, &*i);
|
||||||
|
|
||||||
|
TlsmAppendPage(-1,tree,toByteArray(&i),pageAlloc,pageAllocState,p->id);
|
||||||
|
|
||||||
|
pageCount++;
|
||||||
|
lastEmpty = 0;
|
||||||
|
} else {
|
||||||
|
lastEmpty = 1;
|
||||||
|
}
|
||||||
|
++(*end);
|
||||||
|
--i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
p->dirty = 1;
|
||||||
|
mc->pack();
|
||||||
|
releasePage(p);
|
||||||
|
return pageCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
ITERA is an iterator over the data structure that mergeThread creates (a lsm tree iterator).
|
||||||
|
ITERB is an iterator over the data structures that mergeThread takes as input (lsm tree, or rb tree..)
|
||||||
|
*/
|
||||||
|
template<class PAGELAYOUT, class ITERA, class ITERB> //class PAGELAYOUTX, class ENGINE, class ITERA, class ITERB,
|
||||||
|
// class ROW, class TYPE>
|
||||||
|
void* mergeThread(void* arg) {
|
||||||
|
// The ITER argument of a is unused (we don't look at it's begin or end fields...)
|
||||||
|
//insert_args<PAGELAYOUT,ENGINE,ITERA,ROW>* a =
|
||||||
|
// (insert_args<PAGELAYOUT,ENGINE,ITERA,ROW>*)arg;
|
||||||
|
new_insert_args<PAGELAYOUT> * a = (new_insert_args<PAGELAYOUT>*)arg;
|
||||||
|
|
||||||
|
struct timeval start_tv, wait_tv, stop_tv;
|
||||||
|
|
||||||
|
int merge_count = 0;
|
||||||
|
// loop around here to produce multiple batches for merge.
|
||||||
|
while(1) {
|
||||||
|
|
||||||
|
gettimeofday(&start_tv,0);
|
||||||
|
|
||||||
|
pthread_mutex_lock(a->block_ready_mut);
|
||||||
|
while(a->wait_count <2) {
|
||||||
|
pthread_cond_wait(a->block_ready_cond,a->block_ready_mut);
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday(&wait_tv,0);
|
||||||
|
|
||||||
|
recordid * oldTreeA = &a->wait_queue[0];
|
||||||
|
recordid * oldTreeB = &a->wait_queue[1];
|
||||||
|
|
||||||
|
pthread_mutex_unlock(a->block_ready_mut);
|
||||||
|
|
||||||
|
recordid tree = TlsmCreate(-1, a->comparator_idx,a->pageAlloc,a->pageAllocState,a->rowsize);
|
||||||
|
|
||||||
|
ITERA taBegin(*oldTreeA,*(a->scratchA),a->rowsize);
|
||||||
|
ITERB tbBegin(*oldTreeB,*(a->scratchB),a->rowsize);
|
||||||
|
|
||||||
|
ITERA *taEnd = taBegin.end();
|
||||||
|
ITERB *tbEnd = tbBegin.end();
|
||||||
|
|
||||||
|
mergeIterator<ITERA, ITERB, typename PAGELAYOUT::FMT::TUP>
|
||||||
|
mBegin(taBegin, tbBegin, *taEnd, *tbEnd);
|
||||||
|
|
||||||
|
mergeIterator<ITERA, ITERB, typename PAGELAYOUT::FMT::TUP>
|
||||||
|
mEnd(taBegin, tbBegin, *taEnd, *tbEnd);
|
||||||
|
|
||||||
|
|
||||||
|
mEnd.seekEnd();
|
||||||
|
uint64_t insertedTuples;
|
||||||
|
pageid_t mergedPages = compressData<PAGELAYOUT,mergeIterator<ITERA,ITERB,typename PAGELAYOUT::FMT::TUP> >
|
||||||
|
(&mBegin, &mEnd,tree,a->pageAlloc,a->pageAllocState,&insertedTuples);
|
||||||
|
delete taEnd;
|
||||||
|
delete tbEnd;
|
||||||
|
|
||||||
|
gettimeofday(&stop_tv,0);
|
||||||
|
|
||||||
|
pthread_mutex_lock(a->block_ready_mut);
|
||||||
|
|
||||||
|
a->mergedPages = mergedPages;
|
||||||
|
|
||||||
|
// TlsmFree(wait_queue[0]) /// XXX Need to implement (de)allocation!
|
||||||
|
// TlsmFree(wait_queue[1])
|
||||||
|
|
||||||
|
memcpy(&a->wait_queue[0],&tree,sizeof(tree));
|
||||||
|
for(int i = 1; i + 1 < a->wait_count; i++) {
|
||||||
|
memcpy(&a->wait_queue[i],&a->wait_queue[i+1],sizeof(tree));
|
||||||
|
}
|
||||||
|
a->wait_count--;
|
||||||
|
pthread_mutex_unlock(a->block_ready_mut);
|
||||||
|
|
||||||
|
merge_count++;
|
||||||
|
|
||||||
|
double wait_elapsed = tv_to_double(wait_tv) - tv_to_double(start_tv);
|
||||||
|
double work_elapsed = tv_to_double(stop_tv) - tv_to_double(wait_tv);
|
||||||
|
double total_elapsed = wait_elapsed + work_elapsed;
|
||||||
|
double ratio = ((double)(insertedTuples * (uint64_t)a->rowsize))
|
||||||
|
/ (double)(PAGE_SIZE * mergedPages);
|
||||||
|
double throughput = ((double)(insertedTuples * (uint64_t)a->rowsize))
|
||||||
|
/ (1024.0 * 1024.0 * total_elapsed);
|
||||||
|
|
||||||
|
printf("merge # %-6d: comp ratio: %-9.3f waited %6.1f sec "
|
||||||
|
"worked %6.1f sec inserts %-12ld (%9.3f mb/s)\n", merge_count, ratio,
|
||||||
|
wait_elapsed, work_elapsed, (unsigned long)insertedTuples, throughput);
|
||||||
|
|
||||||
|
pthread_cond_signal(a->block_needed_cond);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
|
||||||
|
template<class PAGELAYOUT, class ITER>
|
||||||
|
void* insertThread(void* arg) {
|
||||||
|
|
||||||
|
new_insert_args<PAGELAYOUT> * a = (new_insert_args<PAGELAYOUT>*)arg;
|
||||||
|
struct timeval start_tv, start_wait_tv, stop_tv;
|
||||||
|
|
||||||
|
int insert_count = 0;
|
||||||
|
|
||||||
|
pageid_t lastTreeBlocks = 0;
|
||||||
|
uint64_t lastTreeInserts = 0;
|
||||||
|
pageid_t desiredInserts = 0;
|
||||||
|
|
||||||
|
// this is a hand-tuned value; it should be set dynamically, not staticly
|
||||||
|
double K = 0.18;
|
||||||
|
|
||||||
|
// loop around here to produce multiple batches for merge.
|
||||||
|
while(1) {
|
||||||
|
gettimeofday(&start_tv,0);
|
||||||
|
// XXX this needs to be an iterator over an in-memory tree.
|
||||||
|
ITER i(*(a->begin));
|
||||||
|
ITER j(desiredInserts ? *(a->begin) : *(a->end));
|
||||||
|
if(desiredInserts) {
|
||||||
|
j += desiredInserts;
|
||||||
|
}
|
||||||
|
recordid tree = TlsmCreate(-1, a->comparator_idx,a->rowsize);
|
||||||
|
lastTreeBlocks =
|
||||||
|
compressData<PAGELAYOUT,PAGELAYOUT::init_page,ITER>
|
||||||
|
(&i, &j,1,tree,a->pageAlloc,a->pageAllocState, &lastTreeInserts);
|
||||||
|
|
||||||
|
gettimeofday(&start_wait_tv,0);
|
||||||
|
pthread_mutex_lock(a->block_ready_mut);
|
||||||
|
while(a->wait_count >= a->max_waiters) {
|
||||||
|
pthread_cond_wait(a->block_needed_cond,a->block_ready_mut);
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(&a->wait_queue[a->wait_count],&tree,sizeof(recordid));
|
||||||
|
a->wait_count++;
|
||||||
|
|
||||||
|
pthread_cond_signal(a->block_ready_cond);
|
||||||
|
gettimeofday(&stop_tv,0);
|
||||||
|
double work_elapsed = tv_to_double(start_wait_tv) - tv_to_double(start_tv);
|
||||||
|
double wait_elapsed = tv_to_double(stop_tv) - tv_to_double(start_wait_tv);
|
||||||
|
double elapsed = tv_to_double(stop_tv) - tv_to_double(start_tv);
|
||||||
|
printf("insert# %-6d waited %6.1f sec "
|
||||||
|
"worked %6.1f sec inserts %-12ld (%9.3f mb/s)\n",
|
||||||
|
++insert_count,
|
||||||
|
wait_elapsed,
|
||||||
|
work_elapsed,
|
||||||
|
(long int)lastTreeInserts,
|
||||||
|
(lastTreeInserts*(uint64_t)a->rowsize / (1024.0*1024.0)) / elapsed);
|
||||||
|
|
||||||
|
if(a->mergedPages != -1) {
|
||||||
|
desiredInserts = (pageid_t)(((double)a->mergedPages / K)
|
||||||
|
* ((double)lastTreeInserts
|
||||||
|
/ (double)lastTreeBlocks));
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(a->block_ready_mut);
|
||||||
|
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
recordid bigTree;
|
||||||
|
recordid bigTreeAllocState; // this is probably the head of an arraylist of regions used by the tree...
|
||||||
|
recordid mediumTree;
|
||||||
|
recordid mediumTreeAllocState;
|
||||||
|
epoch_t beginning;
|
||||||
|
epoch_t end;
|
||||||
|
} lsmTableHeader_t;
|
||||||
|
|
||||||
|
template<class PAGELAYOUT>
|
||||||
|
inline recordid TlsmTableAlloc(int xid) {
|
||||||
|
|
||||||
|
// XXX use a (slow) page allocator in alloc, then create a new
|
||||||
|
// (fast) region allocator in start.
|
||||||
|
|
||||||
|
recordid ret = Talloc(xid, sizeof(lsmTableHeader_t));
|
||||||
|
lsmTableHeader_t h;
|
||||||
|
h.bigTreeAllocState = Talloc(xid,sizeof(TlsmRegionAllocConf_t));
|
||||||
|
Tset(xid,h.bigTreeAllocState,&LSM_REGION_ALLOC_STATIC_INITIALIZER);
|
||||||
|
h.bigTree = TlsmCreate(xid, PAGELAYOUT::cmp_id(),
|
||||||
|
TlsmRegionAllocRid,&h.bigTreeAllocState,
|
||||||
|
PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||||
|
h.mediumTreeAllocState = Talloc(xid,sizeof(TlsmRegionAllocConf_t));
|
||||||
|
Tset(xid,h.mediumTreeAllocState,&LSM_REGION_ALLOC_STATIC_INITIALIZER);
|
||||||
|
h.mediumTree = TlsmCreate(xid, PAGELAYOUT::cmp_id(),
|
||||||
|
TlsmRegionAllocRid,&h.mediumTreeAllocState,
|
||||||
|
PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||||
|
epoch_t beginning = 0;
|
||||||
|
epoch_t end = 0;
|
||||||
|
Tset(xid, ret, &h);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class PAGELAYOUT>
|
||||||
|
void TlsmTableStart(recordid tree) {
|
||||||
|
/// XXX xid for daemon processes?
|
||||||
|
|
||||||
|
void * (*merger)(void*) = mergeThread
|
||||||
|
<PAGELAYOUT,
|
||||||
|
treeIterator<typename PAGELAYOUT::FMT::TUP, typename PAGELAYOUT::FMT>,
|
||||||
|
treeIterator<typename PAGELAYOUT::FMT::TUP, typename PAGELAYOUT::FMT> >;
|
||||||
|
|
||||||
|
/*mergeThread
|
||||||
|
<PAGELAYOUT,
|
||||||
|
treeIterator<typename PAGELAYOUT::FMT::TUP, typename PAGELAYOUT::FMT>,
|
||||||
|
stlSetIterator<typename std::set<typename PAGELAYOUT::FMT::TUP,
|
||||||
|
typename PAGELAYOUT::FMT::TUP::stl_cmp>::iterator,
|
||||||
|
typename PAGELAYOUT::FMT::TUP> >
|
||||||
|
(0); */
|
||||||
|
|
||||||
|
lsmTableHeader_t h;
|
||||||
|
Tread(-1, tree, &h);
|
||||||
|
|
||||||
|
}
|
||||||
|
template<class PAGELAYOUT>
|
||||||
|
void TlsmTableStop(recordid tree) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _ROSE_COMPRESSION_LSMTABLE_H__
|
|
@ -8,12 +8,40 @@
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
|
||||||
static lsm_comparator_t comparators[MAX_LSM_COMPARATORS];
|
static lsm_comparator_t comparators[MAX_LSM_COMPARATORS];
|
||||||
|
static lsm_page_initializer_t initializers[MAX_LSM_PAGE_INITIALIZERS];
|
||||||
|
|
||||||
|
TlsmRegionAllocConf_t LSM_REGION_ALLOC_STATIC_INITIALIZER =
|
||||||
|
{ -1, -1, 1000 };
|
||||||
|
|
||||||
|
pageid_t TlsmRegionAlloc(int xid, void *conf) {
|
||||||
|
TlsmRegionAllocConf_t * a = (TlsmRegionAllocConf_t*)conf;
|
||||||
|
if(a->nextPage == a->endOfRegion) {
|
||||||
|
a->nextPage = TregionAlloc(xid, a->regionSize,0);
|
||||||
|
a->endOfRegion = a->nextPage + a->regionSize;
|
||||||
|
}
|
||||||
|
pageid_t ret = a->nextPage;
|
||||||
|
(a->nextPage)++;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
pageid_t TlsmRegionAllocRid(int xid, void * ridp) {
|
||||||
|
recordid rid = *(recordid*)ridp;
|
||||||
|
TlsmRegionAllocConf_t conf;
|
||||||
|
Tread(xid,rid,&conf);
|
||||||
|
pageid_t ret = TlsmRegionAlloc(xid,&conf);
|
||||||
|
// XXX get rid of Tset by storing next page in memory, and losing it
|
||||||
|
// on crash.
|
||||||
|
Tset(xid,rid,&conf);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
void lsmTreeRegisterComparator(int id, lsm_comparator_t i) {
|
void lsmTreeRegisterComparator(int id, lsm_comparator_t i) {
|
||||||
// XXX need to de-init this somewhere... assert(!comparators[id]);
|
// XXX need to de-init this somewhere... assert(!comparators[id]);
|
||||||
comparators[id] = i;
|
comparators[id] = i;
|
||||||
}
|
}
|
||||||
|
void lsmTreeRegisterPageInitializer(int id, lsm_page_initializer_t i) {
|
||||||
|
initializers[id] = i;
|
||||||
|
}
|
||||||
|
|
||||||
#define HEADER_SIZE (2 * sizeof(lsmTreeNodeRecord))
|
#define HEADER_SIZE (2 * sizeof(lsmTreeNodeRecord))
|
||||||
|
|
||||||
|
@ -76,20 +104,6 @@ void lsmTreeRegisterComparator(int id, lsm_comparator_t i) {
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static pageid_t defaultAllocator(int xid, void *ignored) {
|
|
||||||
return TpageAlloc(xid);
|
|
||||||
}
|
|
||||||
|
|
||||||
static pageid_t (*pageAllocator)(int xid, void *ignored) = defaultAllocator;
|
|
||||||
static void *pageAllocatorConfig;
|
|
||||||
void TlsmSetPageAllocator(pageid_t (*allocer)(int xid, void * ignored),
|
|
||||||
void * config) {
|
|
||||||
pageAllocator = allocer;
|
|
||||||
pageAllocatorConfig = config;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct lsmTreeState {
|
typedef struct lsmTreeState {
|
||||||
pageid_t lastLeaf;
|
pageid_t lastLeaf;
|
||||||
} lsmTreeState;
|
} lsmTreeState;
|
||||||
|
@ -197,13 +211,15 @@ void writeNodeRecordVirtualMethods(int xid, Page *p, int slot,
|
||||||
stasis_page_lsn_write(xid, p, 0); // XXX need real LSN?
|
stasis_page_lsn_write(xid, p, 0); // XXX need real LSN?
|
||||||
}
|
}
|
||||||
|
|
||||||
recordid TlsmCreate(int xid, int comparator, int keySize) {
|
recordid TlsmCreate(int xid, int comparator,
|
||||||
|
lsm_page_allocator_t allocator, void *allocator_state,
|
||||||
|
int keySize) {
|
||||||
|
|
||||||
// can the pages hold at least two keys?
|
// can the pages hold at least two keys?
|
||||||
assert(HEADER_SIZE + 2 * (sizeof(lsmTreeNodeRecord) +keySize) <
|
assert(HEADER_SIZE + 2 * (sizeof(lsmTreeNodeRecord) +keySize) <
|
||||||
USABLE_SIZE_OF_PAGE - 2 * sizeof(short));
|
USABLE_SIZE_OF_PAGE - 2 * sizeof(short));
|
||||||
|
|
||||||
pageid_t root = pageAllocator(xid, pageAllocatorConfig);
|
pageid_t root = allocator(xid, allocator_state); //pageAllocatorConfig);
|
||||||
DEBUG("Root = %lld\n", root);
|
DEBUG("Root = %lld\n", root);
|
||||||
recordid ret = { root, 0, 0 };
|
recordid ret = { root, 0, 0 };
|
||||||
|
|
||||||
|
@ -244,13 +260,15 @@ recordid TlsmCreate(int xid, int comparator, int keySize) {
|
||||||
|
|
||||||
static recordid buildPathToLeaf(int xid, recordid root, Page *root_p,
|
static recordid buildPathToLeaf(int xid, recordid root, Page *root_p,
|
||||||
int depth, const byte *key, size_t key_len,
|
int depth, const byte *key, size_t key_len,
|
||||||
pageid_t val_page, pageid_t lastLeaf) {
|
pageid_t val_page, pageid_t lastLeaf,
|
||||||
|
lsm_page_allocator_t allocator,
|
||||||
|
void *allocator_state) {
|
||||||
// root is the recordid on the root page that should point to the
|
// root is the recordid on the root page that should point to the
|
||||||
// new subtree.
|
// new subtree.
|
||||||
assert(depth);
|
assert(depth);
|
||||||
DEBUG("buildPathToLeaf(depth=%d) (lastleaf=%lld) called\n",depth, lastLeaf);
|
DEBUG("buildPathToLeaf(depth=%d) (lastleaf=%lld) called\n",depth, lastLeaf);
|
||||||
|
|
||||||
pageid_t child = pageAllocator(xid, pageAllocatorConfig); // XXX Use some other function...
|
pageid_t child = allocator(xid,allocator_state);
|
||||||
DEBUG("new child = %lld internal? %d\n", child, depth-1);
|
DEBUG("new child = %lld internal? %d\n", child, depth-1);
|
||||||
|
|
||||||
Page *child_p = loadPage(xid, child);
|
Page *child_p = loadPage(xid, child);
|
||||||
|
@ -266,7 +284,7 @@ static recordid buildPathToLeaf(int xid, recordid root, Page *root_p,
|
||||||
stasis_record_alloc_done(xid, child_p, child_rec);
|
stasis_record_alloc_done(xid, child_p, child_rec);
|
||||||
|
|
||||||
ret = buildPathToLeaf(xid, child_rec, child_p, depth-1, key, key_len,
|
ret = buildPathToLeaf(xid, child_rec, child_p, depth-1, key, key_len,
|
||||||
val_page,lastLeaf);
|
val_page,lastLeaf, allocator, allocator_state);
|
||||||
|
|
||||||
unlock(child_p->rwlatch);
|
unlock(child_p->rwlatch);
|
||||||
releasePage(child_p);
|
releasePage(child_p);
|
||||||
|
@ -328,7 +346,9 @@ static recordid buildPathToLeaf(int xid, recordid root, Page *root_p,
|
||||||
static recordid appendInternalNode(int xid, Page *p,
|
static recordid appendInternalNode(int xid, Page *p,
|
||||||
int depth,
|
int depth,
|
||||||
const byte *key, size_t key_len,
|
const byte *key, size_t key_len,
|
||||||
pageid_t val_page, pageid_t lastLeaf) {
|
pageid_t val_page, pageid_t lastLeaf,
|
||||||
|
lsm_page_allocator_t allocator,
|
||||||
|
void *allocator_state) {
|
||||||
assert(*stasis_page_type_ptr(p) == LSM_ROOT_PAGE ||
|
assert(*stasis_page_type_ptr(p) == LSM_ROOT_PAGE ||
|
||||||
*stasis_page_type_ptr(p) == FIXED_PAGE);
|
*stasis_page_type_ptr(p) == FIXED_PAGE);
|
||||||
if(!depth) {
|
if(!depth) {
|
||||||
|
@ -350,7 +370,7 @@ static recordid appendInternalNode(int xid, Page *p,
|
||||||
Page *child_page = loadPage(xid, child_id);
|
Page *child_page = loadPage(xid, child_id);
|
||||||
writelock(child_page->rwlatch,0);
|
writelock(child_page->rwlatch,0);
|
||||||
ret = appendInternalNode(xid, child_page, depth-1, key, key_len,
|
ret = appendInternalNode(xid, child_page, depth-1, key, key_len,
|
||||||
val_page, lastLeaf);
|
val_page, lastLeaf, allocator, allocator_state);
|
||||||
|
|
||||||
unlock(child_page->rwlatch);
|
unlock(child_page->rwlatch);
|
||||||
releasePage(child_page);
|
releasePage(child_page);
|
||||||
|
@ -360,7 +380,7 @@ static recordid appendInternalNode(int xid, Page *p,
|
||||||
if(ret.size != INVALID_SLOT) {
|
if(ret.size != INVALID_SLOT) {
|
||||||
stasis_record_alloc_done(xid, p, ret);
|
stasis_record_alloc_done(xid, p, ret);
|
||||||
ret = buildPathToLeaf(xid, ret, p, depth, key, key_len, val_page,
|
ret = buildPathToLeaf(xid, ret, p, depth, key, key_len, val_page,
|
||||||
lastLeaf);
|
lastLeaf, allocator, allocator_state);
|
||||||
|
|
||||||
DEBUG("split tree rooted at %lld, wrote value to {%d %d %lld}\n",
|
DEBUG("split tree rooted at %lld, wrote value to {%d %d %lld}\n",
|
||||||
p->id, ret.page, ret.slot, ret.size);
|
p->id, ret.page, ret.slot, ret.size);
|
||||||
|
@ -418,6 +438,7 @@ static pageid_t findFirstLeaf(int xid, Page *root, int depth) {
|
||||||
}
|
}
|
||||||
recordid TlsmAppendPage(int xid, recordid tree,
|
recordid TlsmAppendPage(int xid, recordid tree,
|
||||||
const byte *key,
|
const byte *key,
|
||||||
|
lsm_page_allocator_t allocator, void *allocator_state,
|
||||||
long val_page) {
|
long val_page) {
|
||||||
Page *p = loadPage(xid, tree.page);
|
Page *p = loadPage(xid, tree.page);
|
||||||
writelock(p->rwlatch, 0);
|
writelock(p->rwlatch, 0);
|
||||||
|
@ -461,12 +482,13 @@ recordid TlsmAppendPage(int xid, recordid tree,
|
||||||
|
|
||||||
assert(tree.page == p->id);
|
assert(tree.page == p->id);
|
||||||
ret = appendInternalNode(xid, p, depth, key, keySize, val_page,
|
ret = appendInternalNode(xid, p, depth, key, keySize, val_page,
|
||||||
s->lastLeaf == tree.page ? -1 : s->lastLeaf);
|
s->lastLeaf == tree.page ? -1 : s->lastLeaf,
|
||||||
|
allocator, allocator_state);
|
||||||
|
|
||||||
if(ret.size == INVALID_SLOT) {
|
if(ret.size == INVALID_SLOT) {
|
||||||
DEBUG("Need to split root; depth = %d\n", depth);
|
DEBUG("Need to split root; depth = %d\n", depth);
|
||||||
|
|
||||||
pageid_t child = pageAllocator(xid, pageAllocatorConfig);
|
pageid_t child = allocator(xid, allocator_state);
|
||||||
Page *lc = loadPage(xid, child);
|
Page *lc = loadPage(xid, child);
|
||||||
writelock(lc->rwlatch,0);
|
writelock(lc->rwlatch,0);
|
||||||
|
|
||||||
|
@ -519,7 +541,8 @@ recordid TlsmAppendPage(int xid, recordid tree,
|
||||||
|
|
||||||
assert(tree.page == p->id);
|
assert(tree.page == p->id);
|
||||||
ret = appendInternalNode(xid, p, depth, key, keySize, val_page,
|
ret = appendInternalNode(xid, p, depth, key, keySize, val_page,
|
||||||
s->lastLeaf == tree.page ? -1 : s->lastLeaf);
|
s->lastLeaf == tree.page ? -1 : s->lastLeaf,
|
||||||
|
allocator, allocator_state);
|
||||||
|
|
||||||
assert(ret.size != INVALID_SLOT);
|
assert(ret.size != INVALID_SLOT);
|
||||||
|
|
||||||
|
@ -625,8 +648,8 @@ pageid_t TlsmFindPage(int xid, recordid tree, const byte *key) {
|
||||||
|
|
||||||
size_t keySize = getKeySize(xid,p);
|
size_t keySize = getKeySize(xid,p);
|
||||||
|
|
||||||
const lsmTreeNodeRecord *depth_nr = readNodeRecord(xid, p , 0, keySize);
|
const lsmTreeNodeRecord *depth_nr = readNodeRecord(xid, p , DEPTH, keySize);
|
||||||
const lsmTreeNodeRecord *cmp_nr = readNodeRecord(xid, p , 1, keySize);
|
const lsmTreeNodeRecord *cmp_nr = readNodeRecord(xid, p , COMPARATOR, keySize);
|
||||||
|
|
||||||
int depth = depth_nr->ptr;
|
int depth = depth_nr->ptr;
|
||||||
|
|
||||||
|
|
|
@ -22,12 +22,7 @@ inline const byte * toByteArray(Tuple<val_t>::iterator * const t) {
|
||||||
return (**t).toByteArray();
|
return (**t).toByteArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
double tv_to_double(struct timeval tv) {
|
template<class PAGELAYOUT,class ENGINE,class ITER,class ROW>
|
||||||
return static_cast<double>(tv.tv_sec) +
|
|
||||||
(static_cast<double>(tv.tv_usec) / 1000000.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class PAGELAYOUT,class ENGINE,class ITER,class ROW,class TYPE>
|
|
||||||
struct insert_args {
|
struct insert_args {
|
||||||
int comparator_idx;
|
int comparator_idx;
|
||||||
int rowsize;typedef int32_t val_t;
|
int rowsize;typedef int32_t val_t;
|
||||||
|
@ -51,34 +46,34 @@ struct insert_args {
|
||||||
passed into them, and allocate a new PAGELAYOUT object of the
|
passed into them, and allocate a new PAGELAYOUT object of the
|
||||||
appropriate type.
|
appropriate type.
|
||||||
*/
|
*/
|
||||||
template <class COMPRESSOR, class TYPE>
|
template <class COMPRESSOR, class TYPE, class ROW>
|
||||||
static Pstar<COMPRESSOR, TYPE> * initPage(Pstar<COMPRESSOR,TYPE> **pstar,
|
inline Pstar<COMPRESSOR, TYPE> * initPage(Pstar<COMPRESSOR,TYPE> **pstar,
|
||||||
Page *p, TYPE current) {
|
Page *p, const TYPE current) {
|
||||||
*pstar = new Pstar<COMPRESSOR, TYPE>(-1, p);
|
*pstar = new Pstar<COMPRESSOR, TYPE>(-1, p);
|
||||||
(*pstar)->compressor()->offset(current);
|
(*pstar)->compressor()->offset(current);
|
||||||
return *pstar;
|
return *pstar;
|
||||||
}
|
}
|
||||||
template <class COMPRESSOR, class TYPE>
|
template <class COMPRESSOR, class TYPE, class ROW>
|
||||||
static Pstar<COMPRESSOR, TYPE> * initPage(Pstar<COMPRESSOR,TYPE> **pstar,
|
inline Pstar<COMPRESSOR, TYPE> * initPage(Pstar<COMPRESSOR,TYPE> **pstar,
|
||||||
Page *p, Tuple<TYPE> & current) {
|
Page *p, const ROW & current) {
|
||||||
*pstar = new Pstar<COMPRESSOR, TYPE>(-1, p);
|
*pstar = new Pstar<COMPRESSOR, TYPE>(-1, p);
|
||||||
(*pstar)->compressor()->offset(current);
|
(*pstar)->compressor()->offset(current);
|
||||||
return *pstar;
|
return *pstar;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class COMPRESSOR, class TYPE >
|
template <class COMPRESSOR, class TYPE, class ROW >
|
||||||
static Multicolumn<Tuple<TYPE> > * initPage(Multicolumn<Tuple<TYPE> > ** mc,
|
inline Multicolumn<ROW> * initPage(Multicolumn<ROW> ** mc,
|
||||||
Page *p, Tuple<TYPE> & t) {
|
Page *p, const ROW & t) {
|
||||||
column_number_t column_count = t.column_count();
|
column_number_t column_count = t.column_count();
|
||||||
plugin_id_t plugin_id =
|
plugin_id_t plugin_id =
|
||||||
rose::plugin_id<Multicolumn<Tuple<TYPE> >,COMPRESSOR,TYPE>();
|
rose::plugin_id<Multicolumn<ROW>,COMPRESSOR,TYPE>();
|
||||||
|
|
||||||
plugin_id_t * plugins = new plugin_id_t[column_count];
|
plugin_id_t * plugins = new plugin_id_t[column_count];
|
||||||
for(column_number_t c = 0; c < column_count; c++) {
|
for(column_number_t c = 0; c < column_count; c++) {
|
||||||
plugins[c] = plugin_id;
|
plugins[c] = plugin_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
*mc = new Multicolumn<Tuple<TYPE> >(-1,p,column_count,plugins);
|
*mc = new Multicolumn<ROW>(-1,p,column_count,plugins);
|
||||||
for(column_number_t c = 0; c < column_count; c++) {
|
for(column_number_t c = 0; c < column_count; c++) {
|
||||||
((COMPRESSOR*)(*mc)->compressor(c))->offset(*t.get(c));
|
((COMPRESSOR*)(*mc)->compressor(c))->offset(*t.get(c));
|
||||||
}
|
}
|
||||||
|
@ -86,16 +81,16 @@ static Multicolumn<Tuple<TYPE> > * initPage(Multicolumn<Tuple<TYPE> > ** mc,
|
||||||
delete [] plugins;
|
delete [] plugins;
|
||||||
return *mc;
|
return *mc;
|
||||||
}
|
}
|
||||||
template <class COMPRESSOR, class TYPE >
|
template <class COMPRESSOR, class TYPE, class ROW >
|
||||||
static Multicolumn<Tuple<TYPE> > * initPage(Multicolumn<Tuple<TYPE> > ** mc,
|
inline Multicolumn<ROW> * initPage(Multicolumn<ROW> ** mc,
|
||||||
Page *p, TYPE t) {
|
Page *p, const TYPE t) {
|
||||||
plugin_id_t plugin_id =
|
plugin_id_t plugin_id =
|
||||||
rose::plugin_id<Multicolumn<Tuple<TYPE> >,COMPRESSOR,TYPE>();
|
rose::plugin_id<Multicolumn<ROW>,COMPRESSOR,TYPE>();
|
||||||
|
|
||||||
plugin_id_t * plugins = new plugin_id_t[1];
|
plugin_id_t * plugins = new plugin_id_t[1];
|
||||||
plugins[0] = plugin_id;
|
plugins[0] = plugin_id;
|
||||||
|
|
||||||
*mc = new Multicolumn<Tuple<TYPE> >(-1,p,1,plugins);
|
*mc = new Multicolumn<ROW>(-1,p,1,plugins);
|
||||||
((COMPRESSOR*)(*mc)->compressor(0))->offset(t);
|
((COMPRESSOR*)(*mc)->compressor(0))->offset(t);
|
||||||
|
|
||||||
delete [] plugins;
|
delete [] plugins;
|
||||||
|
@ -112,7 +107,7 @@ static Multicolumn<Tuple<TYPE> > * initPage(Multicolumn<Tuple<TYPE> > ** mc,
|
||||||
@return the number of pages that were needed to store the
|
@return the number of pages that were needed to store the
|
||||||
compressed data.
|
compressed data.
|
||||||
*/
|
*/
|
||||||
template <class PAGELAYOUT, class COMPRESSOR, class TYPE, class ITER, class ROW>
|
template <class PAGELAYOUT, class COMPRESSOR, class TYPE, class ROW, class ITER>
|
||||||
pageid_t compressData(ITER * const begin, ITER * const end,
|
pageid_t compressData(ITER * const begin, ITER * const end,
|
||||||
int buildTree, recordid tree, pageid_t (*pageAlloc)(int,void*),
|
int buildTree, recordid tree, pageid_t (*pageAlloc)(int,void*),
|
||||||
void *pageAllocState, uint64_t * inserted) {
|
void *pageAllocState, uint64_t * inserted) {
|
||||||
|
@ -129,13 +124,12 @@ pageid_t compressData(ITER * const begin, ITER * const end,
|
||||||
|
|
||||||
|
|
||||||
if(*begin != *end && buildTree) {
|
if(*begin != *end && buildTree) {
|
||||||
TlsmAppendPage(-1,tree,toByteArray(begin),p->id);
|
TlsmAppendPage(-1,tree,toByteArray(begin),pageAlloc,pageAllocState,p->id);
|
||||||
}
|
}
|
||||||
pageCount++;
|
pageCount++;
|
||||||
|
|
||||||
PAGELAYOUT * mc;
|
PAGELAYOUT * mc;
|
||||||
|
initPage<COMPRESSOR,TYPE,ROW>(&mc, p, **begin);
|
||||||
initPage<COMPRESSOR,TYPE>(&mc, p, **begin);
|
|
||||||
|
|
||||||
int lastEmpty = 0;
|
int lastEmpty = 0;
|
||||||
|
|
||||||
|
@ -154,9 +148,10 @@ pageid_t compressData(ITER * const begin, ITER * const end,
|
||||||
next_page = pageAlloc(-1,pageAllocState);
|
next_page = pageAlloc(-1,pageAllocState);
|
||||||
p = loadPage(-1, next_page);
|
p = loadPage(-1, next_page);
|
||||||
|
|
||||||
mc = initPage<COMPRESSOR, TYPE>(&mc, p, *i);
|
mc = initPage<COMPRESSOR, TYPE, ROW>(&mc, p, *i);
|
||||||
|
|
||||||
if(buildTree) {
|
if(buildTree) {
|
||||||
TlsmAppendPage(-1,tree,toByteArray(&i),p->id);
|
TlsmAppendPage(-1,tree,toByteArray(&i),pageAlloc,pageAllocState,p->id);
|
||||||
}
|
}
|
||||||
pageCount++;
|
pageCount++;
|
||||||
lastEmpty = 0;
|
lastEmpty = 0;
|
||||||
|
@ -176,8 +171,8 @@ pageid_t compressData(ITER * const begin, ITER * const end,
|
||||||
|
|
||||||
template<class PAGELAYOUT,class ENGINE,class ITER,class ROW,class TYPE>
|
template<class PAGELAYOUT,class ENGINE,class ITER,class ROW,class TYPE>
|
||||||
void* insertThread(void* arg) {
|
void* insertThread(void* arg) {
|
||||||
insert_args<PAGELAYOUT,ENGINE,ITER,ROW,TYPE>* a =
|
insert_args<PAGELAYOUT,ENGINE,ITER,ROW>* a =
|
||||||
(insert_args<PAGELAYOUT,ENGINE,ITER,ROW,TYPE>*)arg;
|
(insert_args<PAGELAYOUT,ENGINE,ITER,ROW>*)arg;
|
||||||
|
|
||||||
struct timeval start_tv, start_wait_tv, stop_tv;
|
struct timeval start_tv, start_wait_tv, stop_tv;
|
||||||
|
|
||||||
|
@ -198,8 +193,9 @@ void* insertThread(void* arg) {
|
||||||
if(desiredInserts) {
|
if(desiredInserts) {
|
||||||
j += desiredInserts;
|
j += desiredInserts;
|
||||||
}
|
}
|
||||||
recordid tree = TlsmCreate(-1, a->comparator_idx,a->rowsize);
|
recordid tree = TlsmCreate(-1, a->comparator_idx,a->pageAlloc, a->pageAllocState, a->rowsize);
|
||||||
lastTreeBlocks = compressData<PAGELAYOUT,ENGINE,TYPE,ITER,ROW>
|
lastTreeBlocks =
|
||||||
|
compressData<PAGELAYOUT,ENGINE,TYPE,ROW,ITER>
|
||||||
(&i, &j,1,tree,a->pageAlloc,a->pageAllocState, &lastTreeInserts);
|
(&i, &j,1,tree,a->pageAlloc,a->pageAllocState, &lastTreeInserts);
|
||||||
|
|
||||||
gettimeofday(&start_wait_tv,0);
|
gettimeofday(&start_wait_tv,0);
|
||||||
|
@ -235,10 +231,16 @@ void* insertThread(void* arg) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class PAGELAYOUT,class ENGINE,class ITER,class ROW,class TYPE>
|
/**
|
||||||
|
ITERA is an iterator over the data structure that mergeThread creates (a lsm tree iterator).
|
||||||
|
ITERB is an iterator over the data structures that mergeThread takes as input (lsm tree, or rb tree..)
|
||||||
|
*/
|
||||||
|
template<class PAGELAYOUT, class ENGINE, class ITERA, class ITERB,
|
||||||
|
class ROW, class TYPE>
|
||||||
void* mergeThread(void* arg) {
|
void* mergeThread(void* arg) {
|
||||||
insert_args<PAGELAYOUT,ENGINE,ITER,ROW,TYPE>* a =
|
// The ITER argument of a is unused (we don't look at it's begin or end fields...)
|
||||||
(insert_args<PAGELAYOUT,ENGINE,ITER,ROW,TYPE>*)arg;
|
insert_args<PAGELAYOUT,ENGINE,ITERA,ROW>* a =
|
||||||
|
(insert_args<PAGELAYOUT,ENGINE,ITERA,ROW>*)arg;
|
||||||
|
|
||||||
struct timeval start_tv, wait_tv, stop_tv;
|
struct timeval start_tv, wait_tv, stop_tv;
|
||||||
|
|
||||||
|
@ -260,27 +262,26 @@ void* mergeThread(void* arg) {
|
||||||
|
|
||||||
pthread_mutex_unlock(a->block_ready_mut);
|
pthread_mutex_unlock(a->block_ready_mut);
|
||||||
|
|
||||||
recordid tree = TlsmCreate(-1, a->comparator_idx,a->rowsize);
|
recordid tree = TlsmCreate(-1, a->comparator_idx,a->pageAlloc,a->pageAllocState,a->rowsize);
|
||||||
|
|
||||||
treeIterator<ROW,PAGELAYOUT> taBegin(*oldTreeA,*(a->scratchA),a->rowsize);
|
ITERA taBegin(*oldTreeA,*(a->scratchA),a->rowsize);
|
||||||
treeIterator<ROW,PAGELAYOUT> tbBegin(*oldTreeB,*(a->scratchB),a->rowsize);
|
ITERB tbBegin(*oldTreeB,*(a->scratchB),a->rowsize);
|
||||||
|
|
||||||
treeIterator<ROW,PAGELAYOUT> *taEnd = taBegin.end();
|
ITERA *taEnd = taBegin.end();
|
||||||
treeIterator<ROW,PAGELAYOUT> *tbEnd = tbBegin.end();
|
ITERB *tbEnd = tbBegin.end();
|
||||||
|
|
||||||
mergeIterator<treeIterator<ROW,PAGELAYOUT>,ROW>
|
mergeIterator<ITERA, ITERB, ROW>
|
||||||
mBegin(taBegin, tbBegin, *taEnd, *tbEnd);
|
mBegin(taBegin, tbBegin, *taEnd, *tbEnd);
|
||||||
|
|
||||||
mergeIterator<treeIterator<ROW,PAGELAYOUT>,ROW>
|
mergeIterator<ITERA, ITERB, ROW>
|
||||||
mEnd(taBegin, tbBegin, *taEnd, *tbEnd);
|
mEnd(taBegin, tbBegin, *taEnd, *tbEnd);
|
||||||
|
|
||||||
|
|
||||||
mEnd.seekEnd();
|
mEnd.seekEnd();
|
||||||
uint64_t insertedTuples;
|
uint64_t insertedTuples;
|
||||||
pageid_t mergedPages = compressData<PAGELAYOUT,ENGINE,TYPE,
|
pageid_t mergedPages = compressData<PAGELAYOUT,ENGINE,TYPE,ROW,
|
||||||
mergeIterator<treeIterator<ROW,PAGELAYOUT>,ROW>,ROW>
|
mergeIterator<ITERA, ITERB, ROW> >
|
||||||
(&mBegin, &mEnd,1,tree,a->pageAlloc,a->pageAllocState,&insertedTuples);
|
(&mBegin, &mEnd,1,tree,a->pageAlloc,a->pageAllocState,&insertedTuples);
|
||||||
|
|
||||||
delete taEnd;
|
delete taEnd;
|
||||||
delete tbEnd;
|
delete tbEnd;
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ typedef uint32_t slot_index_t;
|
||||||
typedef uint8_t plugin_id_t;
|
typedef uint8_t plugin_id_t;
|
||||||
typedef uint8_t column_number_t;
|
typedef uint8_t column_number_t;
|
||||||
typedef uint16_t column_offset_t;
|
typedef uint16_t column_offset_t;
|
||||||
|
typedef uint16_t epoch_t;
|
||||||
|
|
||||||
static const record_size_t VARIABLE_SIZE = CHAR_MAX;
|
static const record_size_t VARIABLE_SIZE = CHAR_MAX;
|
||||||
static const slot_index_t NOSPACE = UINT_MAX;
|
static const slot_index_t NOSPACE = UINT_MAX;
|
||||||
|
@ -71,6 +72,12 @@ inline plugin_id_t plugin_id() {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double tv_to_double(struct timeval tv) {
|
||||||
|
return static_cast<double>(tv.tv_sec) +
|
||||||
|
(static_cast<double>(tv.tv_usec) / 1000000.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,8 @@ namespace rose {
|
||||||
template <class TYPE>
|
template <class TYPE>
|
||||||
class For {
|
class For {
|
||||||
public:
|
public:
|
||||||
|
typedef TYPE TYP;
|
||||||
|
|
||||||
static const int PLUGIN_ID = 0;
|
static const int PLUGIN_ID = 0;
|
||||||
/**
|
/**
|
||||||
Set the page offset. For frame of reference, this is used to
|
Set the page offset. For frame of reference, this is used to
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
#define _ROSE_COMPRESSION_MULTICOLUMN_IMPL_H__
|
#define _ROSE_COMPRESSION_MULTICOLUMN_IMPL_H__
|
||||||
|
|
||||||
#include "multicolumn.h"
|
#include "multicolumn.h"
|
||||||
|
#include "for-impl.h"
|
||||||
|
#include "rle-impl.h"
|
||||||
namespace rose {
|
namespace rose {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -68,6 +68,7 @@ template <class TUPLE> class Multicolumn {
|
||||||
public:
|
public:
|
||||||
static page_impl impl();
|
static page_impl impl();
|
||||||
static const plugin_id_t PAGE_FORMAT_ID = 1;
|
static const plugin_id_t PAGE_FORMAT_ID = 1;
|
||||||
|
typedef TUPLE TUP;
|
||||||
|
|
||||||
Multicolumn(int xid, Page *p, column_number_t column_count,
|
Multicolumn(int xid, Page *p, column_number_t column_count,
|
||||||
plugin_id_t * plugins);
|
plugin_id_t * plugins);
|
||||||
|
|
62
src/stasis/page/compression/pageLayout.h
Normal file
62
src/stasis/page/compression/pageLayout.h
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
#ifndef _ROSE_COMPRESSION_PAGELAYOUT_H__
|
||||||
|
#define _ROSE_COMPRESSION_PAGELAYOUT_H__
|
||||||
|
#include "compression.h" // for plugin_id
|
||||||
|
namespace rose {
|
||||||
|
// XXX need to be able to de-init this stuff.
|
||||||
|
static int cmp_num = 1;
|
||||||
|
static int init_num = 1;
|
||||||
|
template <class FORMAT, class COMPRESSOR>
|
||||||
|
class SingleColumnTypePageLayout {
|
||||||
|
public:
|
||||||
|
typedef FORMAT FMT;
|
||||||
|
static inline void initPageLayout() {
|
||||||
|
stasis_page_impl_register(FMT::impl());
|
||||||
|
|
||||||
|
// XXX these should register template instantiations of worker
|
||||||
|
// threads that are statically compiled to deal with the tree
|
||||||
|
// we're instantiating.
|
||||||
|
lsmTreeRegisterComparator(cmp_num, FMT::TUP::cmp);
|
||||||
|
lsmTreeRegisterPageInitializer
|
||||||
|
(init_num, (lsm_page_initializer_t)initPage);
|
||||||
|
my_cmp_num = cmp_num;
|
||||||
|
cmp_num++;
|
||||||
|
my_init_num = init_num;
|
||||||
|
init_num++;
|
||||||
|
}
|
||||||
|
static inline FORMAT * initPage(Page *p, const typename FORMAT::TUP * t) {
|
||||||
|
const column_number_t column_count = t->column_count();
|
||||||
|
|
||||||
|
plugin_id_t pluginid = plugin_id<FORMAT, COMPRESSOR, typename COMPRESSOR::TYP>();
|
||||||
|
|
||||||
|
plugin_id_t * plugins = new plugin_id_t[column_count];
|
||||||
|
for(column_number_t c = 0; c < column_count; c++) {
|
||||||
|
plugins[c] = pluginid;
|
||||||
|
}
|
||||||
|
FORMAT * f = new FORMAT(-1,p,column_count,plugins);
|
||||||
|
for(column_number_t c = 0; c < column_count; c++) {
|
||||||
|
COMPRESSOR* com = (COMPRESSOR*) f->compressor(c);
|
||||||
|
typename COMPRESSOR::TYP val = *(typename COMPRESSOR::TYP*)(t->get(c));
|
||||||
|
com->offset(val);
|
||||||
|
}
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
static inline int cmp_id() {
|
||||||
|
return my_cmp_num;
|
||||||
|
}
|
||||||
|
static inline int init_id() {
|
||||||
|
return my_init_num;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
static int my_cmp_num;
|
||||||
|
static int my_init_num;
|
||||||
|
};
|
||||||
|
template <class FORMAT, class COMPRESSOR>
|
||||||
|
int SingleColumnTypePageLayout<FORMAT,COMPRESSOR>::my_cmp_num = -1;
|
||||||
|
template <class FORMAT, class COMPRESSOR>
|
||||||
|
int SingleColumnTypePageLayout<FORMAT,COMPRESSOR>::my_init_num = -1;
|
||||||
|
|
||||||
|
template <class PAGELAYOUT>
|
||||||
|
recordid TlsmTableAlloc();
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif // _ROSE_COMPRESSION_PAGELAYOUT_H__
|
|
@ -16,6 +16,8 @@ class Rle {
|
||||||
public:
|
public:
|
||||||
typedef uint32_t block_index_t;
|
typedef uint32_t block_index_t;
|
||||||
typedef uint16_t copy_count_t;
|
typedef uint16_t copy_count_t;
|
||||||
|
typedef TYPE TYP;
|
||||||
|
|
||||||
static const copy_count_t MAX_COPY_COUNT = USHRT_MAX;
|
static const copy_count_t MAX_COPY_COUNT = USHRT_MAX;
|
||||||
|
|
||||||
struct triple_t {
|
struct triple_t {
|
||||||
|
|
|
@ -3,16 +3,23 @@
|
||||||
|
|
||||||
namespace rose {
|
namespace rose {
|
||||||
|
|
||||||
template<int N,
|
template<int N, class TYPE0,
|
||||||
typename TYPE0, class TYPE1, class TYPE2, class TYPE3, class TYPE4,
|
class TYPE1=bool, class TYPE2=bool, class TYPE3=bool, class TYPE4=bool,
|
||||||
class TYPE5, class TYPE6, class TYPE7, class TYPE8, class TYPE9>
|
class TYPE5=bool, class TYPE6=bool, class TYPE7=bool, class TYPE8=bool,
|
||||||
|
class TYPE9=bool>
|
||||||
class StaticTuple {
|
class StaticTuple {
|
||||||
public:
|
public:
|
||||||
|
static const char NORMAL = 0;
|
||||||
|
static const char TOMBSTONE = 1;
|
||||||
|
static const int TUPLE_ID = 1;
|
||||||
|
|
||||||
explicit inline StaticTuple() {
|
explicit inline StaticTuple() {
|
||||||
|
s.flag_ = NORMAL;
|
||||||
initializePointers();
|
initializePointers();
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit inline StaticTuple(StaticTuple& t) {
|
explicit inline StaticTuple(StaticTuple& t) {
|
||||||
|
s.flag_ = t.s.flag_;
|
||||||
|
s.epoch_ = t.s.epoch_;
|
||||||
if(0 < N) s.cols0_ = t.s.cols0_;
|
if(0 < N) s.cols0_ = t.s.cols0_;
|
||||||
if(1 < N) s.cols1_ = t.s.cols1_;
|
if(1 < N) s.cols1_ = t.s.cols1_;
|
||||||
if(2 < N) s.cols2_ = t.s.cols2_;
|
if(2 < N) s.cols2_ = t.s.cols2_;
|
||||||
|
@ -28,6 +35,21 @@ class StaticTuple {
|
||||||
|
|
||||||
inline ~StaticTuple() { }
|
inline ~StaticTuple() { }
|
||||||
|
|
||||||
|
static inline byte_off_t sizeofBytes() {
|
||||||
|
return sizeof(flag_t) + sizeof(epoch_t) +
|
||||||
|
(0 < N) ? sizeof(TYPE0) : 0 +
|
||||||
|
(1 < N) ? sizeof(TYPE1) : 0 +
|
||||||
|
(2 < N) ? sizeof(TYPE2) : 0 +
|
||||||
|
(3 < N) ? sizeof(TYPE3) : 0 +
|
||||||
|
(4 < N) ? sizeof(TYPE4) : 0 +
|
||||||
|
(5 < N) ? sizeof(TYPE5) : 0 +
|
||||||
|
(6 < N) ? sizeof(TYPE6) : 0 +
|
||||||
|
(7 < N) ? sizeof(TYPE7) : 0 +
|
||||||
|
(8 < N) ? sizeof(TYPE8) : 0 +
|
||||||
|
(9 < N) ? sizeof(TYPE9) : 0 ;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
inline void* set(column_number_t col, void* val) {
|
inline void* set(column_number_t col, void* val) {
|
||||||
memcpy(cols_[col],val,size_[col]);
|
memcpy(cols_[col],val,size_[col]);
|
||||||
return(cols_[col]);
|
return(cols_[col]);
|
||||||
|
@ -44,7 +66,7 @@ class StaticTuple {
|
||||||
inline TYPE8 * set8(TYPE8* val) { s.cols8_=*val; }
|
inline TYPE8 * set8(TYPE8* val) { s.cols8_=*val; }
|
||||||
inline TYPE9 * set9(TYPE9* val) { s.cols9_=*val; }
|
inline TYPE9 * set9(TYPE9* val) { s.cols9_=*val; }
|
||||||
|
|
||||||
inline const void* get(column_number_t col) const {
|
inline void* get(column_number_t col) const {
|
||||||
return cols_[col];
|
return cols_[col];
|
||||||
}
|
}
|
||||||
inline column_number_t column_count() const { return N; }
|
inline column_number_t column_count() const { return N; }
|
||||||
|
@ -52,13 +74,23 @@ class StaticTuple {
|
||||||
inline byte_off_t column_len(column_number_t col) const {
|
inline byte_off_t column_len(column_number_t col) const {
|
||||||
return size_[col];
|
return size_[col];
|
||||||
}
|
}
|
||||||
inline byte* toByteArray() {
|
inline byte* toByteArray() const {
|
||||||
return &s;
|
return (byte*)&s;
|
||||||
}
|
}
|
||||||
inline bool operator==(StaticTuple &t) {
|
inline bool operator==(const StaticTuple &t) const {
|
||||||
return s == t.s;
|
if(0 < N) if(s.cols0_ != t.s.cols0_) return 0;
|
||||||
|
if(1 < N) if(s.cols1_ != t.s.cols1_) return 0;
|
||||||
|
if(2 < N) if(s.cols2_ != t.s.cols2_) return 0;
|
||||||
|
if(3 < N) if(s.cols3_ != t.s.cols3_) return 0;
|
||||||
|
if(4 < N) if(s.cols4_ != t.s.cols4_) return 0;
|
||||||
|
if(5 < N) if(s.cols5_ != t.s.cols5_) return 0;
|
||||||
|
if(6 < N) if(s.cols6_ != t.s.cols6_) return 0;
|
||||||
|
if(7 < N) if(s.cols7_ != t.s.cols7_) return 0;
|
||||||
|
if(8 < N) if(s.cols8_ != t.s.cols8_) return 0;
|
||||||
|
if(9 < N) if(s.cols9_ != t.s.cols9_) return 0;
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
inline bool operator<(StaticTuple &t) {
|
inline bool operator<(const StaticTuple &t) const {
|
||||||
if(0 < N) {
|
if(0 < N) {
|
||||||
if(s.cols0_ < t.s.cols0_) return 1;
|
if(s.cols0_ < t.s.cols0_) return 1;
|
||||||
}
|
}
|
||||||
|
@ -101,6 +133,30 @@ class StaticTuple {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int cmp(const void *ap, const void *bp) {
|
||||||
|
const StaticTuple * a = (const StaticTuple*)ap;
|
||||||
|
const StaticTuple * b = (const StaticTuple*)bp;
|
||||||
|
if(*a < *b) {
|
||||||
|
return -1;
|
||||||
|
} else if(*a == *b) {
|
||||||
|
// Sort *backwards* on epoch values.
|
||||||
|
if(a->s.epoch_ > b->s.epoch_) { return 1; }
|
||||||
|
else if(a->s.epoch_ < b->s.epoch_) { return -1; }
|
||||||
|
else return 0;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
struct stl_cmp
|
||||||
|
{
|
||||||
|
bool operator()(const StaticTuple* s1, const StaticTuple* s2) const
|
||||||
|
{
|
||||||
|
return *s1 < *s2;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class iterator {
|
class iterator {
|
||||||
public:
|
public:
|
||||||
inline iterator(void const *const *const dataset, int offset)
|
inline iterator(void const *const *const dataset, int offset)
|
||||||
|
@ -152,6 +208,8 @@ class StaticTuple {
|
||||||
|
|
||||||
void * cols_[N];
|
void * cols_[N];
|
||||||
size_t size_[N];
|
size_t size_[N];
|
||||||
|
typedef char flag_t;
|
||||||
|
typedef unsigned int epoch_t;
|
||||||
struct {
|
struct {
|
||||||
TYPE0 cols0_;
|
TYPE0 cols0_;
|
||||||
TYPE1 cols1_;
|
TYPE1 cols1_;
|
||||||
|
@ -163,6 +221,8 @@ class StaticTuple {
|
||||||
TYPE7 cols7_;
|
TYPE7 cols7_;
|
||||||
TYPE8 cols8_;
|
TYPE8 cols8_;
|
||||||
TYPE9 cols9_;
|
TYPE9 cols9_;
|
||||||
|
flag_t flag_;
|
||||||
|
epoch_t epoch_;
|
||||||
} s;
|
} s;
|
||||||
|
|
||||||
inline void initializePointers() {
|
inline void initializePointers() {
|
||||||
|
|
|
@ -47,7 +47,9 @@ class Tuple {
|
||||||
}
|
}
|
||||||
} */
|
} */
|
||||||
inline ~Tuple() { delete[] cols_; delete[] byteArray_; }
|
inline ~Tuple() { delete[] cols_; delete[] byteArray_; }
|
||||||
|
inline bool tombstone() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
inline TYPE * set(column_number_t col,void* val) {
|
inline TYPE * set(column_number_t col,void* val) {
|
||||||
cols_[col] = *(TYPE*)val;
|
cols_[col] = *(TYPE*)val;
|
||||||
return (TYPE*)val;
|
return (TYPE*)val;
|
||||||
|
@ -55,6 +57,11 @@ class Tuple {
|
||||||
inline TYPE * get(column_number_t col) const {
|
inline TYPE * get(column_number_t col) const {
|
||||||
return &(cols_[col]);
|
return &(cols_[col]);
|
||||||
}
|
}
|
||||||
|
inline void copyFrom(Tuple<TYPE> t) {
|
||||||
|
for(int i = 0; i < count_; i++) {
|
||||||
|
set(i,t.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
inline column_number_t column_count() const {
|
inline column_number_t column_count() const {
|
||||||
return count_;
|
return count_;
|
||||||
}
|
}
|
||||||
|
@ -156,6 +163,7 @@ class Tuple {
|
||||||
int off_;
|
int off_;
|
||||||
Tuple<TYPE> scratch_;
|
Tuple<TYPE> scratch_;
|
||||||
};
|
};
|
||||||
|
static const uint32_t TIMESTAMP = 0;
|
||||||
private:
|
private:
|
||||||
Tuple() { abort(); }
|
Tuple() { abort(); }
|
||||||
explicit Tuple(const Tuple& t) { abort(); }
|
explicit Tuple(const Tuple& t) { abort(); }
|
||||||
|
|
|
@ -244,6 +244,7 @@ extern const short SLOT_TYPE_LENGTHS[];
|
||||||
#define FILE_PERM (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)
|
#define FILE_PERM (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)
|
||||||
#define LOG_MODE (O_CREAT | O_RDWR | O_SYNC)
|
#define LOG_MODE (O_CREAT | O_RDWR | O_SYNC)
|
||||||
|
|
||||||
#define MAX_LSM_COMPARATORS 256
|
#define MAX_LSM_COMPARATORS 16
|
||||||
|
#define MAX_LSM_PAGE_INITIALIZERS 256
|
||||||
|
#define MAX_LSM_PAGE_ALLOCATORS 16
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -23,8 +23,20 @@ typedef struct {
|
||||||
} lladd_lsm_iterator;
|
} lladd_lsm_iterator;
|
||||||
|
|
||||||
typedef int(*lsm_comparator_t)(const void* a, const void* b);
|
typedef int(*lsm_comparator_t)(const void* a, const void* b);
|
||||||
|
typedef void*(*lsm_page_initializer_t)(Page *, void *);
|
||||||
|
typedef pageid_t(*lsm_page_allocator_t)(int, void *);
|
||||||
|
|
||||||
void lsmTreeRegisterComparator(int id, lsm_comparator_t i);
|
void lsmTreeRegisterComparator(int id, lsm_comparator_t i);
|
||||||
|
void lsmTreeRegisterPageInitializer(int id, lsm_page_initializer_t i);
|
||||||
|
|
||||||
|
pageid_t TlsmRegionAlloc(int xid, void *conf);
|
||||||
|
pageid_t TlsmRegionAllocRid(int xid, void *conf);
|
||||||
|
typedef struct {
|
||||||
|
pageid_t nextPage;
|
||||||
|
pageid_t endOfRegion;
|
||||||
|
pageid_t regionSize;
|
||||||
|
} TlsmRegionAllocConf_t;
|
||||||
|
extern TlsmRegionAllocConf_t LSM_REGION_ALLOC_STATIC_INITIALIZER;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Initialize a new LSM tree.
|
Initialize a new LSM tree.
|
||||||
|
@ -37,24 +49,23 @@ void lsmTreeRegisterComparator(int id, lsm_comparator_t i);
|
||||||
|
|
||||||
@param keySize
|
@param keySize
|
||||||
*/
|
*/
|
||||||
recordid TlsmCreate(int xid, int comparator, int keySize);
|
recordid TlsmCreate(int xid, int comparator,
|
||||||
|
lsm_page_allocator_t allocator, void *allocator_state,
|
||||||
|
int keySize);
|
||||||
/**
|
/**
|
||||||
Free the space associated with an LSM tree.
|
Free the space associated with an LSM tree.
|
||||||
*/
|
*/
|
||||||
recordid TlsmDealloc(int xid, recordid tree);
|
recordid TlsmDealloc(int xid,
|
||||||
|
lsm_page_allocator_t allocator, void *allocator_state,
|
||||||
|
recordid tree);
|
||||||
/**
|
/**
|
||||||
Append a new leaf page to an LSM tree. Leaves must be appended in
|
Append a new leaf page to an LSM tree. Leaves must be appended in
|
||||||
ascending order; LSM trees do not support update in place.
|
ascending order; LSM trees do not support update in place.
|
||||||
*/
|
*/
|
||||||
recordid TlsmAppendPage(int xid, recordid tree,
|
recordid TlsmAppendPage(int xid, recordid tree,
|
||||||
const byte *key,
|
const byte *key,
|
||||||
|
lsm_page_allocator_t allocator, void *allocator_state,
|
||||||
long pageid);
|
long pageid);
|
||||||
|
|
||||||
/**
|
|
||||||
Override the page allocation algorithm that LSM tree uses by default
|
|
||||||
*/
|
|
||||||
void TlsmSetPageAllocator(pageid_t (*allocer)(int xid, void * ignored),
|
|
||||||
void * config);
|
|
||||||
/**
|
/**
|
||||||
Lookup a leaf page.
|
Lookup a leaf page.
|
||||||
|
|
||||||
|
|
|
@ -32,15 +32,18 @@ int cmp(const void *ap, const void *bp) {
|
||||||
void insertProbeIter(lsmkey_t NUM_ENTRIES) {
|
void insertProbeIter(lsmkey_t NUM_ENTRIES) {
|
||||||
int intcmp = 0;
|
int intcmp = 0;
|
||||||
lsmTreeRegisterComparator(intcmp,cmp);
|
lsmTreeRegisterComparator(intcmp,cmp);
|
||||||
|
TlsmRegionAllocConf_t alloc_conf = LSM_REGION_ALLOC_STATIC_INITIALIZER;
|
||||||
|
|
||||||
Tinit();
|
Tinit();
|
||||||
int xid = Tbegin();
|
int xid = Tbegin();
|
||||||
recordid tree = TlsmCreate(xid, intcmp, sizeof(lsmkey_t));
|
recordid tree = TlsmCreate(xid, intcmp,
|
||||||
|
TlsmRegionAlloc, &alloc_conf,
|
||||||
|
sizeof(lsmkey_t));
|
||||||
for(lsmkey_t i = 0; i < NUM_ENTRIES; i++) {
|
for(lsmkey_t i = 0; i < NUM_ENTRIES; i++) {
|
||||||
long pagenum = TlsmFindPage(xid, tree, (byte*)&i);
|
long pagenum = TlsmFindPage(xid, tree, (byte*)&i);
|
||||||
assert(pagenum == -1);
|
assert(pagenum == -1);
|
||||||
DEBUG("TlsmAppendPage %d\n",i);
|
DEBUG("TlsmAppendPage %d\n",i);
|
||||||
TlsmAppendPage(xid, tree, (const byte*)&i, i + OFFSET);
|
TlsmAppendPage(xid, tree, (const byte*)&i, TlsmRegionAlloc, &alloc_conf, i + OFFSET);
|
||||||
pagenum = TlsmFindPage(xid, tree, (byte*)&i);
|
pagenum = TlsmFindPage(xid, tree, (byte*)&i);
|
||||||
assert(pagenum == i + OFFSET);
|
assert(pagenum == i + OFFSET);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue