stasis-aries-wal/src/stasis/operations/lsmIterators.h

306 lines
7.3 KiB
C++

#ifndef _LSMITERATORS_H__
#define _LSMITERATORS_H__
#include "stasis/page.h"
#include "stasis/bufferManager.h"
#include "stasis/page/compression/compression.h"
#include "stasis/page/compression/tuple.h"
/**
@file
This file contains a number of C++ STL-style iterators that are
used during the LSM tree merges.
*/
/**
@todo get rid of these undefs once compensation.h has been removed...
*/
#undef end
#undef begin
namespace rose {
template <class ITER, class ROW> class mergeIterator;
template <class ITER, class ROW>
inline const byte * toByteArray(mergeIterator<ITER,ROW> * const t);
/**
Scans through an LSM tree's leaf pages, each tuple in the tree, in
order. This iterator is designed for maximum forward scan
performance, and does not support all STL operations.
*/
template <class ROW, class PAGELAYOUT>
class treeIterator {
public:
explicit treeIterator(recordid tree, ROW &scratch, int keylen) :
tree_(tree),
scratch_(scratch),
keylen_(keylen),
lsmIterator_(lsmTreeIterator_open(-1,tree)),
slot_(0)
{
if(!lsmTreeIterator_next(-1, lsmIterator_)) {
currentPage_ = 0;
pageid_ = -1;
p_ = 0;
} else {
pageid_t * pid_tmp;
lsmTreeIterator_value(-1,lsmIterator_,(byte**)&pid_tmp);
pageid_ = *pid_tmp;
p_ = loadPage(-1,pageid_);
currentPage_ = (PAGELAYOUT*)p_->impl;
}
}
explicit treeIterator(treeIterator& t) :
tree_(t.tree_),
scratch_(t.scratch_),
keylen_(t.keylen_),
lsmIterator_(lsmTreeIterator_copy(-1,t.lsmIterator_)),
slot_(t.slot_),
pageid_(t.pageid_),
p_((Page*)((t.p_)?loadPage(-1,t.p_->id):0)),
currentPage_((PAGELAYOUT*)((p_)?p_->impl:0)) {
}
~treeIterator() {
lsmTreeIterator_close(-1, lsmIterator_);
if(p_) {
releasePage(p_);
p_ = 0;
}
}
ROW & operator*() {
ROW* readTuple = currentPage_->recordRead(-1,slot_, &scratch_);
if(!readTuple) {
releasePage(p_);
p_=0;
if(lsmTreeIterator_next(-1,lsmIterator_)) {
pageid_t *pid_tmp;
slot_ = 0;
lsmTreeIterator_value(-1,lsmIterator_,(byte**)&pid_tmp);
pageid_ = *pid_tmp;
p_ = loadPage(-1,pageid_);
currentPage_ = (PAGELAYOUT*)p_->impl;
readTuple = currentPage_->recordRead(-1,slot_, &scratch_);
assert(readTuple);
} else {
// past end of iterator! "end" should contain the pageid of the
// last leaf, and 1+ numslots on that page.
abort();
}
}
return scratch_;
}
inline bool operator==(const treeIterator &a) const {
return (slot_ == a.slot_ && pageid_ == a.pageid_);
}
inline bool operator!=(const treeIterator &a) const {
return !(*this==a);
}
inline void operator++() {
slot_++;
}
inline void operator--() {
// This iterator consumes its input, and only partially supports
// "==". "--" is just for book keeping, so we don't need to worry
// about setting the other state.
slot_--;
}
inline treeIterator* end() {
treeIterator* t = new treeIterator(tree_,scratch_,keylen_);
if(t->p_) {
releasePage(t->p_);
t->p_=0;
}
t->currentPage_ = 0;
pageid_t pid = TlsmLastPage(-1,tree_);
if(pid != -1) {
t->pageid_= pid;
Page * p = loadPage(-1, t->pageid_);
PAGELAYOUT * lastPage = (PAGELAYOUT*)p->impl;
t->slot_ = 0;
while(lastPage->recordRead(-1,t->slot_,&scratch_)) { t->slot_++; }
releasePage(p);
} else {
// begin == end already; we're done.
}
return t;
}
private:
explicit treeIterator() { abort(); }
void operator=(treeIterator & t) { abort(); }
int operator-(treeIterator & t) { abort(); }
recordid tree_;
ROW & scratch_;
int keylen_;
lladdIterator_t * lsmIterator_;
slot_index_t slot_;
pageid_t pageid_;
Page * p_;
PAGELAYOUT * currentPage_;
};
/**
This iterator takes two otehr iterators as arguments, and merges
their output, dropping duplicate entries.
@todo LSM tree is not be very useful without support for deletion
(and, therefore, versioning). Such support will require
modifications to mergeIterator (or perhaps, a new iterator
class).
*/
template<class ITER, class ROW>
class mergeIterator {
private:
static const int A = 0;
static const int B = 1;
static const int NONE = -1;
static const int BOTH = -2;
inline int calcCurr(int oldcur) {
int cur;
if(oldcur == NONE) { return NONE; }
if(a_ == aend_) {
if(b_ == bend_) {
cur = NONE;
} else {
cur = B;
}
} else {
if(b_ == bend_) {
cur = A;
} else {
if((*a_) < (*b_)) {
cur = A;
} else if((*a_) == (*b_)) {
cur = BOTH;
} else {
cur = B;
}
}
}
return cur;
}
public:
mergeIterator(ITER & a, ITER & b, ITER & aend, ITER & bend) :
off_(0),
a_(a),
b_(b),
aend_(aend),
bend_(bend),
curr_(calcCurr(A)),
before_eof_(0)
{}
explicit mergeIterator(mergeIterator &i) :
off_(i.off_),
a_(i.a_),
b_(i.b_),
aend_(i.aend_),
bend_(i.bend_),
curr_(i.curr_),
before_eof_(i.before_eof_)
{ }
ROW& operator* () {
if(curr_ == A || curr_ == BOTH) { return *a_; }
if(curr_ == B) { return *b_; }
abort();
}
void seekEnd() {
curr_ = NONE;
}
// XXX Only works if exactly one of the comparators is derived from end.
inline bool operator==(const mergeIterator &o) const {
if(curr_ == NONE && o.curr_ == NONE) {
return 1;
} else if(curr_ != NONE && o.curr_ != NONE) {
return (a_ == o.a_) && (b_ == o.b_);
}
return 0;
}
inline bool operator!=(const mergeIterator &o) const {
return !(*this == o);
}
inline void operator++() {
off_++;
if(curr_ == BOTH) {
++a_;
++b_;
} else {
if(curr_ == A) { ++a_; }
if(curr_ == B) { ++b_; }
}
curr_ = calcCurr(curr_);
}
inline void operator--() {
off_--;
if(curr_ == BOTH) {
--a_;
--b_;
} else {
if(curr_ == A) { --a_; }
if(curr_ == B) { --b_; }
}
if(curr_ == NONE) {
before_eof_ = 1;
} else {
before_eof_ = 0;
}
}
inline int operator-(mergeIterator&i) {
return off_ - i.off_;
}
inline void operator=(mergeIterator const &i) {
off_ = i.off_;
a_ = i.a_;
b_ = i.b_;
aend_ = i.aend_;
bend_ = i.bend_;
curr_ = i.curr_;
before_eof_ = i.before_eof;
}
inline unsigned int offset() { return off_; }
private:
unsigned int off_;
ITER a_;
ITER b_;
ITER aend_;
ITER bend_;
int curr_;
int before_eof_;
friend const byte* toByteArray<ITER,ROW>(mergeIterator<ITER,ROW> * const t);
};
/** Produce a byte array from the value stored at t's current
position */
template <class ITER, class ROW>
inline const byte * toByteArray(mergeIterator<ITER,ROW> * const t) {
if(t->curr_ == t->A || t->curr_ == t->BOTH) {
return toByteArray(&t->a_);
} else if(t->curr_ == t->B) {
return toByteArray(&t->b_);
}
abort();
}
template <class PAGELAYOUT>
inline const byte* toByteArray(treeIterator<int,PAGELAYOUT> *const t) {
return (const byte*)&(**t);
}
template <class PAGELAYOUT,class TYPE>
inline const byte* toByteArray(treeIterator<Tuple<TYPE>,PAGELAYOUT> *const t) {
return (**t).toByteArray();
}
}
#endif // _LSMITERATORS_H__