stasis-aries-wal/stasis/experimental/compression/multicolumn.h
2012-12-04 18:17:23 -08:00

176 lines
6 KiB
C++

#ifndef _ROSE_COMPRESSION_MULTICOLUMN_H__
#define _ROSE_COMPRESSION_MULTICOLUMN_H__
#include <limits.h>
#include <stasis/page.h>
#include <stasis/constants.h>
#include "compression.h"
//#include "pstar.h" // for typedefs + consts (XXX add new header?)
#include "tuple.h" // XXX rename tuple.hx
#include "pluginDispatcher.h"
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sears@google.com (Rusty Sears)
/**
@file
Page implementation for multi-column, compressed data
STRUCTURE OF A MULTICOLUMN PAGE
<pre>
+----------------------------------------------------------------------+
| col #0 compressed data (opaque) | col #1 compressed data (opaque) |
+-----+---------------------------+-----+------------------------------|
| ... | col \#N compressed data (opaque) | |
+-----+----------------------------------+ |
| Free space |
| |
| |
| +----------------------------------------+
| | Exceptions: |
+-----------------------------+ Includes data from multiple cols |
| |
| Exception data is managed (bytes are copied in and out of this |
| region) by the column implementations. Multicolumn mediates between |
| the columns, by recording the length and offset of this region. |
| |
| +---------------+---------------+
| ... | exception # 1 | exception # 0 |
+----------------------+---------------------+----+--------------------+
| first header byte -> | col # N off, plugin | .. | col #1 off, plugin |
+--------------------+-+--------------+------+----+----+-----------+---+
| col #0 off, plugin | exceptions len | exceptions off | # of cols | ? |
+--------------------+----------------+----------------+-----------+---+
</pre>
Notes:
The 'exceptions' portion of the page grows down from
first_header_byte, while the column data portion grows up from byte
zero... This was an arbitrary decision, and complicated the
implementation somewhat...
Functions whose names end in "_ptr" return pointers to bytes in the
page. That memory is persistant; and will eventually be written
back to the page file.
*/
namespace rose {
template <class TUPLE>
/**
* A "pageLoaded()" callback function for Stasis' buffer manager.
*/
void multicolumnLoaded(Page *p);
template <class TUPLE> class Multicolumn {
public:
static page_impl impl();
static const plugin_id_t PAGE_FORMAT_ID = 1;
typedef TUPLE TUP;
Multicolumn(int xid, Page *p, column_number_t column_count,
plugin_id_t * plugins);
~Multicolumn();
/**
@return the compressor used for a column. The nature of the
mapping between table region and compressor instance is
implementation defined, but there will never be more than one
compressor per-column, per-page.
@param col The column whose compressor should be returned.
@return A pointer to a compressor. This pointer is guaranteed to
be valid until the next call to this Multicolumn object. After
that, the pointer returned here is invalid.
*/
void* compressor(column_number_t col) {
return dispatcher_.compressor(col);
}
inline slot_index_t append(int xid, TUPLE const & dat);
inline TUPLE * recordRead(int xid, slot_index_t slot, TUPLE * buf);
inline TUPLE * recordFind(int xid, TUPLE& val, TUPLE& scratch);
inline slot_index_t recordCount(int xid);
inline void pack();
private:
typedef struct column_header {
byte_off_t off;
plugin_id_t plugin_id;
} column_header;
/**
Load an existing multicolumn Page
*/
Multicolumn(Page * p);
/**
The following functions perform pointer arithmetic. This code is
performance critical. These short, inlined functions mostly
perform simple arithmetic expression involving constants. g++'s
optimizer seems to combine and simplify these expressions for us.
See the page layout diagram at the top of this file for an
explanation of where these pointers are stored
*/
inline column_number_t * column_count_ptr(void) {
return reinterpret_cast<column_number_t*>(p_->memAddr+USABLE_SIZE_OF_PAGE)-1;
}
inline byte_off_t * exceptions_offset_ptr(void) {
return reinterpret_cast<byte_off_t*>(column_count_ptr())-1;
}
inline byte_off_t * exceptions_len_ptr(void) {
return exceptions_offset_ptr()-1;;
}
inline column_header * column_header_ptr(column_number_t column_number) {
return reinterpret_cast<column_header*>(exceptions_len_ptr())-(1+column_number);
}
inline byte_off_t * column_offset_ptr(column_number_t column_number) {
return &(column_header_ptr(column_number)->off);
}
/**
This stores the plugin_id associated with this page's compressor.
@see rose::plugin_id()
*/
inline plugin_id_t * column_plugin_id_ptr(column_number_t column_number) {
return &(column_header_ptr(column_number)->plugin_id);
}
/**
The first byte that contains data for this column.
The length of the column data can be determined by calling
COMPRESSOR's bytes_used() member function. (PluginDispatcher
can handle this).
*/
inline byte * column_base_ptr(column_number_t column_number) {
return *column_offset_ptr(column_number) + p_->memAddr;
}
inline byte * first_header_byte_ptr(void) {
return reinterpret_cast<byte*>(column_header_ptr((*column_count_ptr())-1));
}
static inline plugin_id_t plugin_id();
Page * p_;
byte ** columns_;
byte_off_t first_exception_byte_;
public:
PluginDispatcher dispatcher_;
private:
int bytes_left_;
int unpacked_;
friend void multicolumnLoaded<TUPLE>(Page *p);
};
} // namespace rose
#endif // _ROSE_COMPRESSION_MULTICOLUMN_H__