stasis-aries-wal/src/stasis/page/compression/tuple.h

337 lines
10 KiB
C
Raw Normal View History

2007-10-15 17:46:44 +00:00
#ifndef _ROSE_COMPRESSION_TUPLE_H__
#define _ROSE_COMPRESSION_TUPLE_H__
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sears@google.com (Rusty Sears)
/**
@file Implementation of tuples (Tuple) and dispatch routines for
column wide compression (PluginDispatcher).
*/
#include <limits.h>
#include <ctype.h>
#include "compression.h"
#include "pstar-impl.h"
#include "multicolumn.h"
namespace rose {
template <class TUPLE> class Multicolumn;
template<class TYPE> class Tuple;
/**
PluginDispatcher essentially just wraps calls to compressors in
switch statements.
It has a number of deficiencies:
1) Performance. The switch statement is the main CPU bottleneck
for both of the current compression schemes.
2) PluginDispatcher has to "know" about all compression
algorithms and all data types that it may encounter.
This approach has one advantage; it doesn't preclude other
(templatized) implementations that hardcode schema formats a
compile time.
Performance could be partially addressed by using a blocking append
algorithm:
A Queue up multiple append requests (or precompute read requests)
when appropriate.
B Before appending, calculate a lower (pessimistic) bound on the
number of inserted tuples that can fit in the page:
n = (free bytes) / (maximum space per tuple)
C Compress n tuples from each column at a time. Only evaluate the
switch statement once for each column.
D Repeat steps B and C until n is below some threshold, then
revert the current behavior.
Batching read requests is simpler, and would be useful for
sequential scans over the data.
*/
class PluginDispatcher{
public:
#define dispatchSwitch(col,cases,...) \
static const int base = USER_DEFINED_PAGE(0) + 2 * 2 * 4;\
switch(plugin_ids_[col]-base) { \
cases(0, For<uint8_t>, col,uint8_t, __VA_ARGS__); \
cases(1, For<uint16_t>,col,uint16_t,__VA_ARGS__); \
cases(2, For<uint32_t>,col,uint32_t,__VA_ARGS__); \
cases(3, For<uint64_t>,col,uint64_t,__VA_ARGS__); \
cases(4, For<int8_t>, col,int8_t, __VA_ARGS__); \
cases(5, For<int16_t>, col,int16_t, __VA_ARGS__); \
cases(6, For<int32_t>, col,int32_t, __VA_ARGS__); \
cases(7, For<int64_t>, col,int64_t, __VA_ARGS__); \
cases(8, Rle<uint8_t>, col,uint8_t, __VA_ARGS__); \
cases(9, Rle<uint16_t>,col,uint16_t,__VA_ARGS__); \
cases(10,Rle<uint32_t>,col,uint32_t,__VA_ARGS__); \
cases(11,Rle<uint64_t>,col,uint64_t,__VA_ARGS__); \
cases(12,Rle<int8_t>, col,int8_t, __VA_ARGS__); \
cases(13,Rle<int16_t>, col,int16_t, __VA_ARGS__); \
cases(14,Rle<int32_t>, col,int32_t, __VA_ARGS__); \
cases(15,Rle<int64_t>, col,int64_t, __VA_ARGS__); \
default: abort(); \
};
#define caseAppend(off,plug_type,col,type,fcn,ret,xid,dat,...) \
case off: { \
ret = ((plug_type*)plugins_[col])->fcn(xid,*(type*)dat,__VA_ARGS__); } break
#define caseSetPlugin(off,plug_type,col,type,m) \
case off: { plugins_[col] = new plug_type(m); } break
#define caseDelPlugin(off,plug_type,col,type,m) \
case off: { delete (plug_type*)plugins_[col]; } break
#define caseRead(off,plug_type,col,type,m,ret,fcn,xid,slot,except,scratch) \
case off: { ret = ((plug_type*)plugins_[col])->fcn(xid,slot,except,(type*)scratch); } break
#define caseNoArg(off,plug_type,col,type,m,ret,fcn) \
case off: { ret = ((plug_type*)plugins_[col])->fcn(); } break
#define caseInitMem(off,plug_type,col,type,m) \
case off: { ((plug_type*)plugins_[col])->init_mem(m); } break
#define caseMem(off,plug_type,col,type,m) \
case off: { ((plug_type*)plugins_[col])->mem(m); } break
#define caseCompressor(off,plug_type,col,type,nil) \
case off: { ret = (plug_type*)plugins_[col]; } break
inline slot_index_t recordAppend(int xid, column_number_t col,
const void *dat, byte_off_t* except,
byte *exceptions, int *free_bytes) {
slot_index_t ret;
dispatchSwitch(col,caseAppend,append,ret,xid,dat,except,exceptions,
free_bytes);
return ret;
}
inline void *recordRead(int xid, byte *mem, column_number_t col,
slot_index_t slot, byte* exceptions, void *scratch) {
void * ret;
dispatchSwitch(col,caseRead,mem,ret,recordRead,xid,slot,exceptions,scratch);
return ret;
}
inline byte_off_t bytes_used(column_number_t col) {
byte_off_t ret;
dispatchSwitch(col,caseNoArg,mem,ret,bytes_used);
return ret;
}
inline void init_mem(byte * mem, column_number_t col) {
dispatchSwitch(col,caseInitMem,mem);
}
inline void mem(byte * mem, column_number_t col) {
dispatchSwitch(col,caseMem,mem);
}
inline void * compressor(column_number_t col) {
void * ret;
dispatchSwitch(col,caseCompressor,0);
return ret;
}
PluginDispatcher(column_number_t column_count) :
column_count_(column_count), plugin_ids_(new plugin_id_t[column_count]), plugins_(new void*[column_count]) {
for(column_number_t i = 0; i < column_count; i++) {
plugin_ids_[i] = 0;
}
}
PluginDispatcher(int xid, byte *mem,column_number_t column_count, plugin_id_t * plugins) :
column_count_(column_count), plugin_ids_(new plugin_id_t[column_count]), plugins_(new void*[column_count]) {
for(column_number_t i = 0; i < column_count; i++) {
plugin_ids_[i] = 0;
set_plugin(mem,i,plugins[i]);
}
}
inline void set_plugin(byte *mem,column_number_t c, plugin_id_t p) {
if(plugin_ids_[c]) {
dispatchSwitch(c,caseDelPlugin,0);
}
plugin_ids_[c] = p;
dispatchSwitch(c,caseSetPlugin,mem);
}
~PluginDispatcher() {
for(column_number_t i = 0; i < column_count_; i++) {
dispatchSwitch(i,caseDelPlugin,0);
}
delete[] plugin_ids_;
delete[] plugins_;
}
#undef caseAppend
#undef caseSetPlugin
#undef caseDelPlugin
#undef caseRead
#undef caseNoArg
#undef caseInitMem
#undef caseCompressor
private:
column_number_t column_count_;
plugin_id_t * plugin_ids_;
void ** plugins_;
};
template<class TYPE>
class Tuple {
public:
explicit Tuple(column_number_t count) : count_(count),
cols_(new TYPE[count]),
byteArray_(new byte[sizeof(count_)+count_*sizeof(TYPE)]) {}
/*explicit Tuple(byte* b) : count_(*(column_number_t*)b),
cols_(new TYPE[count_]),
byteArray_(new byte[sizeof(count_)+count_*sizeof(TYPE)]) {
memcpy(cols_,b+sizeof(column_number_t), sizeof(TYPE)*count_);
} */
explicit Tuple(Tuple& t) : count_(t.count_), cols_(new TYPE[count_]),
byteArray_(new byte[sizeof(count_)+count_*sizeof(TYPE)]) {
for(column_number_t c = 0; c < count_; c++) {
cols_[c] = t.cols_[c];
}
}
Tuple(TYPE t) : count_(0), cols_(new TYPE[1]),
byteArray_(new byte[sizeof(count_)+sizeof(TYPE)]) {
cols_[0] = t;
}
/* Tuple(Tuple *t) : count_(t->count_),cols_(new TYPE[count_]) {
for(column_number_t c = 0; c < count_; c++) {
cols_[c] = t->cols_[c];
}
} */
inline ~Tuple() { delete[] cols_; delete[] byteArray_; }
inline TYPE * set(column_number_t col,void* val) {
cols_[col] = *(TYPE*)val;
return (TYPE*)val;
}
inline TYPE * get(column_number_t col) const {
return &(cols_[col]);
}
inline column_number_t column_count() const {
return count_;
}
inline byte_off_t column_len(column_number_t col) const {
return sizeof(TYPE);
}
/* inline void fromByteArray(byte * b) {
assert(count_ == *(column_number_t*)b);
// memcpy(cols_,b+sizeof(column_number_t),sizeof(TYPE)*count_);
TYPE *newCols = (int*)(b + sizeof(column_number_t));
for(column_number_t i = 0; i < count_; i++) {
cols_[i] = newCols[i];
}
} */
inline byte* toByteArray() {
byte* ret = byteArray_;
memcpy(ret, &count_, sizeof(count_));
memcpy(ret+sizeof(count_), cols_, count_ * sizeof(TYPE));
return ret;
}
/* inline operator const byte * () {
return toByteArray();
} */
inline operator TYPE () {
return cols_[0]; //*get(0);
}
/* inline operator TYPE () {
assert(count_ == 0);
return cols_[0];
} */
static inline size_t sizeofBytes(column_number_t cols) {
return sizeof(column_number_t) + cols * sizeof(TYPE);
}
static const int TUPLE_ID = 0;
/* inline bool operator==(Tuple *t) {
return *this == *t;
} */
inline bool operator==(Tuple &t) {
//if(t.count_ != count_) return 0;
for(column_number_t i = 0; i < count_; i++) {
if(cols_[i] != t.cols_[i]) { return 0;}
}
return 1;
}
inline bool operator<(Tuple &t) {
//if(t.count_ != count_) return 0;
for(column_number_t i = 0; i < count_; i++) {
if(cols_[i] < t.cols_[i]) { return 1;}
}
return 0;
}
/* inline bool operator==(TYPE val) {
assert(count_ == 1);
return cols_[0] == val;
}*/
class iterator {
public:
inline iterator(column_number_t c, TYPE const *const *const dataset, int offset) :
c_(c),
dat_(dataset),
off_(offset),
scratch_(c_) {}
inline explicit iterator(const iterator &i) : c_(i.c_), dat_(i.dat_), off_(i.off_),
scratch_(c_) {}
inline Tuple<TYPE>& operator*() {
for(column_number_t i = 0; i < c_; i++) {
scratch_.set(i,(void*)&dat_[i][off_]);
}
return scratch_;
}
inline bool operator==(const iterator &a) const {
//assert(dat_==a.dat_ && c_==a.c_);
return (off_==a.off_);
}
inline bool operator!=(const iterator &a) const {
//assert(dat_==a.dat_ && c_==a.c_);
return (off_!=a.off_);
}
inline void operator++() { off_++; }
inline void operator--() { off_--; }
inline void operator+=(int i) { abort(); }
inline int operator-(iterator&i) {
return off_ - i.off_;
}
inline void operator=(iterator &i) {
assert(c_==i.c_);
assert(dat_==i.dat_);
off_=i.off_;
}
inline void offset(int off) {
off_=off;
}
private:
column_number_t c_;
TYPE const * const * dat_;
int off_;
Tuple<TYPE> scratch_;
};
private:
Tuple() { abort(); }
explicit Tuple(const Tuple& t) { abort(); }
column_number_t count_;
TYPE * const cols_;
byte * byteArray_;
};
}
#endif // _ROSE_COMPRESSION_TUPLE_H__