2010-01-23 02:13:59 +00:00
# include "logstore.h"
2010-03-17 21:51:26 +00:00
# include "merger.h"
2010-01-23 02:13:59 +00:00
2010-02-15 23:02:01 +00:00
# include <stasis/transactional.h>
2010-04-28 19:21:25 +00:00
# include <stasis/bufferManager.h>
# include <stasis/bufferManager/bufferHash.h>
2011-04-20 20:17:26 +00:00
# include <stasis/logger/logger2.h>
# include <stasis/logger/logHandle.h>
2011-04-20 21:51:04 +00:00
# include <stasis/logger/filePool.h>
2010-05-19 23:42:06 +00:00
# include "mergeStats.h"
2010-04-28 19:21:25 +00:00
2010-03-17 21:51:26 +00:00
# undef try
# undef end
2010-02-15 23:02:01 +00:00
static inline double tv_to_double ( struct timeval tv )
{
return static_cast < double > ( tv . tv_sec ) +
( static_cast < double > ( tv . tv_usec ) / 1000000.0 ) ;
}
2010-01-23 02:13:59 +00:00
/////////////////////////////////////////////////////////////////
// LOG TABLE IMPLEMENTATION
/////////////////////////////////////////////////////////////////
2010-03-17 21:51:26 +00:00
template < class TUPLE >
2011-04-20 21:51:04 +00:00
logtable < TUPLE > : : logtable ( int log_mode , pageid_t max_c0_size , pageid_t internal_region_size , pageid_t datapage_region_size , pageid_t datapage_size )
2010-01-23 02:13:59 +00:00
{
2011-04-20 20:17:26 +00:00
recovering = true ;
2010-12-14 00:06:32 +00:00
this - > max_c0_size = max_c0_size ;
2010-12-14 01:49:23 +00:00
this - > mean_c0_run_length = max_c0_size ;
2010-12-14 00:06:32 +00:00
this - > num_c0_mergers = 0 ;
2010-01-23 02:13:59 +00:00
2010-12-11 00:51:19 +00:00
r_val = 3.0 ; // MIN_R
2010-01-23 02:13:59 +00:00
tree_c0 = NULL ;
2010-02-18 23:31:57 +00:00
tree_c0_mergeable = NULL ;
2010-08-05 17:43:46 +00:00
c0_is_merging = false ;
tree_c1_prime = NULL ;
2010-01-23 02:13:59 +00:00
tree_c1 = NULL ;
2010-02-18 23:31:57 +00:00
tree_c1_mergeable = NULL ;
2010-01-23 02:13:59 +00:00
tree_c2 = NULL ;
2010-08-17 21:23:39 +00:00
// This bool is purely for external code.
this - > accepting_new_requests = true ;
2010-12-08 19:49:13 +00:00
this - > shutting_down_ = false ;
2011-03-25 20:05:49 +00:00
c0_flushing = false ;
c1_flushing = false ;
2010-12-14 00:06:32 +00:00
this - > merge_mgr = 0 ;
2010-01-23 02:13:59 +00:00
tmerger = new tuplemerger ( & replace_merger ) ;
2010-05-27 01:49:27 +00:00
header_mut = rwlc_initlock ( ) ;
pthread_mutex_init ( & rb_mut , 0 ) ;
2010-05-19 23:42:06 +00:00
pthread_cond_init ( & c0_needed , 0 ) ;
pthread_cond_init ( & c0_ready , 0 ) ;
pthread_cond_init ( & c1_needed , 0 ) ;
pthread_cond_init ( & c1_ready , 0 ) ;
2010-02-20 01:18:39 +00:00
epoch = 0 ;
2010-03-13 00:05:06 +00:00
this - > internal_region_size = internal_region_size ;
this - > datapage_region_size = datapage_region_size ;
this - > datapage_size = datapage_size ;
2011-04-20 20:17:26 +00:00
2011-04-20 21:51:04 +00:00
this - > log_mode = log_mode ;
2011-04-21 00:28:05 +00:00
this - > batch_size = 0 ;
log_file = stasis_log_file_pool_open ( " lsm_log " ,
stasis_log_file_mode ,
stasis_log_file_permissions ) ;
2010-01-23 02:13:59 +00:00
}
2010-02-10 21:49:50 +00:00
2010-03-17 21:51:26 +00:00
template < class TUPLE >
logtable < TUPLE > : : ~ logtable ( )
2010-01-23 02:13:59 +00:00
{
2010-12-11 00:51:19 +00:00
delete merge_mgr ; // shuts down pretty print thread.
2010-01-23 02:13:59 +00:00
if ( tree_c1 ! = NULL )
delete tree_c1 ;
if ( tree_c2 ! = NULL )
delete tree_c2 ;
if ( tree_c0 ! = NULL )
{
2010-03-09 01:42:23 +00:00
memTreeComponent < datatuple > : : tearDownTree ( tree_c0 ) ;
2010-01-23 02:13:59 +00:00
}
2011-04-21 00:28:05 +00:00
log_file - > close ( log_file ) ;
2010-05-27 01:49:27 +00:00
pthread_mutex_destroy ( & rb_mut ) ;
rwlc_deletelock ( header_mut ) ;
2010-05-19 23:42:06 +00:00
pthread_cond_destroy ( & c0_needed ) ;
pthread_cond_destroy ( & c0_ready ) ;
pthread_cond_destroy ( & c1_needed ) ;
pthread_cond_destroy ( & c1_ready ) ;
2010-01-23 02:13:59 +00:00
delete tmerger ;
}
2010-04-28 19:21:25 +00:00
template < class TUPLE >
void logtable < TUPLE > : : init_stasis ( ) {
DataPage < datatuple > : : register_stasis_page_impl ( ) ;
2011-03-25 20:05:49 +00:00
stasis_buffer_manager_hint_writes_are_sequential = 1 ;
2010-04-28 19:21:25 +00:00
Tinit ( ) ;
}
template < class TUPLE >
void logtable < TUPLE > : : deinit_stasis ( ) { Tdeinit ( ) ; }
2010-03-17 21:51:26 +00:00
template < class TUPLE >
recordid logtable < TUPLE > : : allocTable ( int xid )
2010-01-23 02:13:59 +00:00
{
table_rec = Talloc ( xid , sizeof ( tbl_header ) ) ;
2010-05-19 23:42:06 +00:00
mergeStats * stats = 0 ;
2010-01-23 02:13:59 +00:00
//create the big tree
2010-04-29 00:57:48 +00:00
tree_c2 = new diskTreeComponent ( xid , internal_region_size , datapage_region_size , datapage_size , stats ) ;
2010-01-23 02:13:59 +00:00
//create the small tree
2010-04-29 00:57:48 +00:00
tree_c1 = new diskTreeComponent ( xid , internal_region_size , datapage_region_size , datapage_size , stats ) ;
2010-02-27 00:35:13 +00:00
2010-12-14 00:06:32 +00:00
merge_mgr = new mergeManager ( this ) ;
merge_mgr - > set_c0_size ( max_c0_size ) ;
2010-06-21 20:03:05 +00:00
merge_mgr - > new_merge ( 0 ) ;
2010-12-11 00:51:19 +00:00
tree_c0 = new memTreeComponent < datatuple > : : rbtree_t ;
2010-12-14 00:06:32 +00:00
tbl_header . merge_manager = merge_mgr - > talloc ( xid ) ;
2011-04-20 20:17:26 +00:00
tbl_header . log_trunc = 0 ;
2010-12-14 00:06:32 +00:00
update_persistent_header ( xid ) ;
2010-02-27 00:35:13 +00:00
return table_rec ;
}
2010-03-17 21:51:26 +00:00
template < class TUPLE >
void logtable < TUPLE > : : openTable ( int xid , recordid rid ) {
2010-03-13 00:05:06 +00:00
table_rec = rid ;
Tread ( xid , table_rec , & tbl_header ) ;
2010-04-29 00:57:48 +00:00
tree_c2 = new diskTreeComponent ( xid , tbl_header . c2_root , tbl_header . c2_state , tbl_header . c2_dp_state , 0 ) ;
tree_c1 = new diskTreeComponent ( xid , tbl_header . c1_root , tbl_header . c1_state , tbl_header . c1_dp_state , 0 ) ;
2010-12-11 00:51:19 +00:00
tree_c0 = new memTreeComponent < datatuple > : : rbtree_t ;
2010-06-21 20:03:05 +00:00
2010-12-14 00:06:32 +00:00
merge_mgr = new mergeManager ( this , xid , tbl_header . merge_manager ) ;
merge_mgr - > set_c0_size ( max_c0_size ) ;
2010-06-21 20:03:05 +00:00
merge_mgr - > new_merge ( 0 ) ;
2010-03-01 21:26:07 +00:00
}
2011-04-20 20:17:26 +00:00
template < class TUPLE >
void logtable < TUPLE > : : logUpdate ( datatuple * tup ) {
LogEntry * e = stasis_log_write_update ( log_file , 0 , INVALID_PAGE , 0 /*Page**/ , 0 /*op*/ , tup - > to_bytes ( ) , tup - > byte_length ( ) ) ;
log_file - > write_entry_done ( log_file , e ) ;
}
template < class TUPLE >
void logtable < TUPLE > : : replayLog ( ) {
lsn_t start = tbl_header . log_trunc ;
LogHandle * lh = start ? getLSNHandle ( log_file , start ) : getLogHandle ( log_file ) ;
const LogEntry * e ;
while ( ( e = nextInLog ( lh ) ) ) {
switch ( e - > type ) {
case UPDATELOG : {
datatuple * tup = datatuple : : from_bytes ( ( byte * ) stasis_log_entry_update_args_cptr ( e ) ) ;
2011-04-20 21:51:04 +00:00
insertTuple ( tup ) ;
2011-04-20 20:17:26 +00:00
datatuple : : freetuple ( tup ) ;
} break ;
case INTERNALLOG : { } break ;
default : assert ( e - > type = = UPDATELOG ) ; abort ( ) ;
}
}
recovering = false ;
printf ( " \n Log replay complete. \n " ) ;
}
template < class TUPLE >
lsn_t logtable < TUPLE > : : get_log_offset ( ) {
2011-04-20 21:51:04 +00:00
if ( recovering | | ! log_mode ) { return INVALID_LSN ; }
2011-04-20 20:17:26 +00:00
return log_file - > next_available_lsn ( log_file ) ;
}
template < class TUPLE >
void logtable < TUPLE > : : truncate_log ( ) {
if ( recovering ) {
printf ( " Not truncating log until recovery is complete. \n " ) ;
} else {
2011-04-21 00:28:05 +00:00
if ( tbl_header . log_trunc ) {
printf ( " truncating log to %lld \n " , tbl_header . log_trunc ) ;
log_file - > truncate ( log_file , tbl_header . log_trunc ) ;
}
2011-04-20 20:17:26 +00:00
}
}
2010-03-17 21:51:26 +00:00
template < class TUPLE >
2011-04-20 20:17:26 +00:00
void logtable < TUPLE > : : update_persistent_header ( int xid , lsn_t trunc_lsn ) {
2010-02-27 00:35:13 +00:00
2010-05-19 23:42:06 +00:00
tbl_header . c2_root = tree_c2 - > get_root_rid ( ) ;
2010-03-13 00:05:06 +00:00
tbl_header . c2_dp_state = tree_c2 - > get_datapage_allocator_rid ( ) ;
tbl_header . c2_state = tree_c2 - > get_internal_node_allocator_rid ( ) ;
tbl_header . c1_root = tree_c1 - > get_root_rid ( ) ;
tbl_header . c1_dp_state = tree_c1 - > get_datapage_allocator_rid ( ) ;
tbl_header . c1_state = tree_c1 - > get_internal_node_allocator_rid ( ) ;
2010-01-23 02:13:59 +00:00
2010-12-14 00:06:32 +00:00
merge_mgr - > marshal ( xid , tbl_header . merge_manager ) ;
2010-06-21 20:03:05 +00:00
2011-04-20 20:17:26 +00:00
if ( trunc_lsn ! = INVALID_LSN ) {
printf ( " \n setting log truncation point to %lld \n " , trunc_lsn ) ;
tbl_header . log_trunc = trunc_lsn ;
}
2010-01-23 02:13:59 +00:00
Tset ( xid , table_rec , & tbl_header ) ;
}
2010-03-17 21:51:26 +00:00
template < class TUPLE >
void logtable < TUPLE > : : flushTable ( )
2010-01-23 02:13:59 +00:00
{
struct timeval start_tv , stop_tv ;
double start , stop ;
static double last_start ;
static bool first = 1 ;
static int merge_count = 0 ;
gettimeofday ( & start_tv , 0 ) ;
start = tv_to_double ( start_tv ) ;
2011-03-25 20:05:49 +00:00
c0_flushing = true ;
2010-05-19 23:42:06 +00:00
bool blocked = false ;
2010-01-23 02:13:59 +00:00
2010-08-05 17:43:46 +00:00
int expmcount = merge_count ;
//this waits for the previous merger of the mem-tree
//hopefullly this wont happen
while ( get_c0_is_merging ( ) ) {
2010-05-27 01:49:27 +00:00
rwlc_cond_wait ( & c0_needed , header_mut ) ;
2010-05-19 23:42:06 +00:00
blocked = true ;
if ( expmcount ! = merge_count ) {
return ;
}
2010-01-23 02:13:59 +00:00
}
2010-08-05 17:43:46 +00:00
set_c0_is_merging ( true ) ;
2010-01-23 02:13:59 +00:00
2010-12-14 00:06:32 +00:00
merge_mgr - > get_merge_stats ( 0 ) - > handed_off_tree ( ) ;
2010-06-02 21:47:58 +00:00
merge_mgr - > new_merge ( 0 ) ;
2010-05-26 00:58:17 +00:00
2010-01-23 02:13:59 +00:00
gettimeofday ( & stop_tv , 0 ) ;
stop = tv_to_double ( stop_tv ) ;
2010-05-19 23:42:06 +00:00
pthread_cond_signal ( & c0_ready ) ;
2010-05-26 00:58:17 +00:00
DEBUG ( " Signaled c0-c1 merge thread \n " ) ;
2010-01-23 02:13:59 +00:00
merge_count + + ;
2010-12-14 00:06:32 +00:00
merge_mgr - > get_merge_stats ( 0 ) - > starting_merge ( ) ;
2010-02-18 23:31:57 +00:00
2010-06-03 00:12:31 +00:00
if ( blocked & & stop - start > 1.0 ) {
2010-05-19 23:42:06 +00:00
if ( first )
{
printf ( " \n Blocked writes for %f sec \n " , stop - start ) ;
first = 0 ;
}
else
{
printf ( " \n Blocked writes for %f sec (serviced writes for %f sec) \n " ,
stop - start , start - last_start ) ;
}
last_start = stop ;
2010-05-21 23:43:17 +00:00
} else {
DEBUG ( " signaled c0-c1 merge \n " ) ;
2010-01-23 02:13:59 +00:00
}
2011-03-25 20:05:49 +00:00
c0_flushing = false ;
2010-01-23 02:13:59 +00:00
}
2010-03-17 21:51:26 +00:00
template < class TUPLE >
datatuple * logtable < TUPLE > : : findTuple ( int xid , const datatuple : : key_t key , size_t keySize )
2010-01-23 02:13:59 +00:00
{
//prepare a search tuple
2010-05-19 23:42:06 +00:00
datatuple * search_tuple = datatuple : : create ( key , keySize ) ;
2010-01-23 02:13:59 +00:00
2010-05-27 01:49:27 +00:00
pthread_mutex_lock ( & rb_mut ) ;
2010-01-23 02:13:59 +00:00
datatuple * ret_tuple = 0 ;
//step 1: look in tree_c0
2010-03-09 01:42:23 +00:00
memTreeComponent < datatuple > : : rbtree_t : : iterator rbitr = get_tree_c0 ( ) - > find ( search_tuple ) ;
2010-02-18 23:31:57 +00:00
if ( rbitr ! = get_tree_c0 ( ) - > end ( ) )
2010-01-23 02:13:59 +00:00
{
2010-02-18 23:31:57 +00:00
DEBUG ( " tree_c0 size %d \n " , get_tree_c0 ( ) - > size ( ) ) ;
2010-02-10 21:49:50 +00:00
ret_tuple = ( * rbitr ) - > create_copy ( ) ;
2010-01-23 02:13:59 +00:00
}
2010-05-27 01:49:27 +00:00
pthread_mutex_unlock ( & rb_mut ) ;
2010-06-21 20:03:05 +00:00
rwlc_readlock ( header_mut ) ; // XXX: FIXME with optimisitic concurrency control. Has to be before rb_mut, or we could merge the tuple with itself due to an intervening merge
2010-05-27 01:49:27 +00:00
2010-01-23 02:13:59 +00:00
bool done = false ;
//step: 2 look into first in tree if exists (a first level merge going on)
2010-02-18 23:31:57 +00:00
if ( get_tree_c0_mergeable ( ) ! = 0 )
2010-01-23 02:13:59 +00:00
{
DEBUG ( " old mem tree not null %d \n " , ( * ( mergedata - > old_c0 ) ) - > size ( ) ) ;
2010-02-18 23:31:57 +00:00
rbitr = get_tree_c0_mergeable ( ) - > find ( search_tuple ) ;
if ( rbitr ! = get_tree_c0_mergeable ( ) - > end ( ) )
2010-01-23 02:13:59 +00:00
{
2010-02-10 21:49:50 +00:00
datatuple * tuple = * rbitr ;
2010-01-23 02:13:59 +00:00
2010-02-10 21:49:50 +00:00
if ( tuple - > isDelete ( ) ) //tuple deleted
2010-01-23 02:13:59 +00:00
done = true ; //return ret_tuple
else if ( ret_tuple ! = 0 ) //merge the two
{
2010-02-10 21:49:50 +00:00
datatuple * mtuple = tmerger - > merge ( tuple , ret_tuple ) ; //merge the two
datatuple : : freetuple ( ret_tuple ) ; //free tuple from current tree
2010-01-23 02:13:59 +00:00
ret_tuple = mtuple ; //set return tuple to merge result
}
else //key first found in old mem tree
{
2010-05-19 23:42:06 +00:00
ret_tuple = tuple - > create_copy ( ) ;
2010-01-23 02:13:59 +00:00
}
//we cannot free tuple from old-tree 'cos it is not a copy
}
}
2010-08-05 17:43:46 +00:00
//step 2.5: check new c1 if exists
if ( ! done & & get_tree_c1_prime ( ) ! = 0 )
{
DEBUG ( " old c1 tree not null \n " ) ;
datatuple * tuple_oc1 = get_tree_c1_prime ( ) - > findTuple ( xid , key , keySize ) ;
if ( tuple_oc1 ! = NULL )
{
bool use_copy = false ;
if ( tuple_oc1 - > isDelete ( ) )
done = true ;
else if ( ret_tuple ! = 0 ) //merge the two
{
datatuple * mtuple = tmerger - > merge ( tuple_oc1 , ret_tuple ) ; //merge the two
datatuple : : freetuple ( ret_tuple ) ; //free tuple from before
ret_tuple = mtuple ; //set return tuple to merge result
}
else //found for the first time
{
use_copy = true ;
ret_tuple = tuple_oc1 ;
}
if ( ! use_copy )
{
datatuple : : freetuple ( tuple_oc1 ) ; //free tuple from tree old c1
}
}
}
//step 3: check c1
2010-01-23 02:13:59 +00:00
if ( ! done )
{
2010-03-13 00:05:06 +00:00
datatuple * tuple_c1 = get_tree_c1 ( ) - > findTuple ( xid , key , keySize ) ;
2010-01-23 02:13:59 +00:00
if ( tuple_c1 ! = NULL )
{
bool use_copy = false ;
if ( tuple_c1 - > isDelete ( ) ) //tuple deleted
2010-08-05 17:43:46 +00:00
done = true ;
2010-01-23 02:13:59 +00:00
else if ( ret_tuple ! = 0 ) //merge the two
{
datatuple * mtuple = tmerger - > merge ( tuple_c1 , ret_tuple ) ; //merge the two
2010-02-10 21:49:50 +00:00
datatuple : : freetuple ( ret_tuple ) ; //free tuple from before
2010-08-05 17:43:46 +00:00
ret_tuple = mtuple ; //set return tuple to merge result
}
2010-01-23 02:13:59 +00:00
else //found for the first time
{
use_copy = true ;
ret_tuple = tuple_c1 ;
}
if ( ! use_copy )
{
2010-02-10 21:49:50 +00:00
datatuple : : freetuple ( tuple_c1 ) ; //free tuple from tree c1
2010-01-23 02:13:59 +00:00
}
}
}
//step 4: check old c1 if exists
2010-02-18 23:31:57 +00:00
if ( ! done & & get_tree_c1_mergeable ( ) ! = 0 )
2010-01-23 02:13:59 +00:00
{
DEBUG ( " old c1 tree not null \n " ) ;
2010-03-13 00:05:06 +00:00
datatuple * tuple_oc1 = get_tree_c1_mergeable ( ) - > findTuple ( xid , key , keySize ) ;
2010-01-23 02:13:59 +00:00
if ( tuple_oc1 ! = NULL )
{
bool use_copy = false ;
if ( tuple_oc1 - > isDelete ( ) )
done = true ;
else if ( ret_tuple ! = 0 ) //merge the two
{
datatuple * mtuple = tmerger - > merge ( tuple_oc1 , ret_tuple ) ; //merge the two
2010-02-10 21:49:50 +00:00
datatuple : : freetuple ( ret_tuple ) ; //free tuple from before
2010-01-23 02:13:59 +00:00
ret_tuple = mtuple ; //set return tuple to merge result
}
else //found for the first time
{
use_copy = true ;
ret_tuple = tuple_oc1 ;
}
if ( ! use_copy )
{
2010-02-10 21:49:50 +00:00
datatuple : : freetuple ( tuple_oc1 ) ; //free tuple from tree old c1
2010-01-23 02:13:59 +00:00
}
}
}
//step 5: check c2
if ( ! done )
{
DEBUG ( " Not in old first disk tree \n " ) ;
2010-03-13 00:05:06 +00:00
datatuple * tuple_c2 = get_tree_c2 ( ) - > findTuple ( xid , key , keySize ) ;
2010-01-23 02:13:59 +00:00
if ( tuple_c2 ! = NULL )
{
bool use_copy = false ;
if ( tuple_c2 - > isDelete ( ) )
done = true ;
else if ( ret_tuple ! = 0 )
{
datatuple * mtuple = tmerger - > merge ( tuple_c2 , ret_tuple ) ; //merge the two
2010-02-10 21:49:50 +00:00
datatuple : : freetuple ( ret_tuple ) ; //free tuple from before
2010-01-23 02:13:59 +00:00
ret_tuple = mtuple ; //set return tuple to merge result
}
else //found for the first time
{
use_copy = true ;
ret_tuple = tuple_c2 ;
}
if ( ! use_copy )
{
2010-02-10 21:49:50 +00:00
datatuple : : freetuple ( tuple_c2 ) ; //free tuple from tree c2
2010-01-23 02:13:59 +00:00
}
}
}
2010-05-27 01:49:27 +00:00
rwlc_unlock ( header_mut ) ;
2010-02-10 21:49:50 +00:00
datatuple : : freetuple ( search_tuple ) ;
2010-01-23 02:13:59 +00:00
return ret_tuple ;
}
/*
* returns the first record found with the matching key
* ( not to be used together with diffs )
* */
2010-03-17 21:51:26 +00:00
template < class TUPLE >
datatuple * logtable < TUPLE > : : findTuple_first ( int xid , datatuple : : key_t key , size_t keySize )
2010-01-23 02:13:59 +00:00
{
//prepare a search tuple
2010-02-10 21:49:50 +00:00
datatuple * search_tuple = datatuple : : create ( key , keySize ) ;
2010-01-23 02:13:59 +00:00
datatuple * ret_tuple = 0 ;
//step 1: look in tree_c0
2010-05-27 01:49:27 +00:00
pthread_mutex_lock ( & rb_mut ) ;
2010-03-09 01:42:23 +00:00
memTreeComponent < datatuple > : : rbtree_t : : iterator rbitr = get_tree_c0 ( ) - > find ( search_tuple ) ;
2010-02-18 23:31:57 +00:00
if ( rbitr ! = get_tree_c0 ( ) - > end ( ) )
2010-01-23 02:13:59 +00:00
{
DEBUG ( " tree_c0 size %d \n " , tree_c0 - > size ( ) ) ;
2010-02-10 21:49:50 +00:00
ret_tuple = ( * rbitr ) - > create_copy ( ) ;
2010-05-27 01:49:27 +00:00
pthread_mutex_unlock ( & rb_mut ) ;
2010-01-23 02:13:59 +00:00
}
else
{
DEBUG ( " Not in mem tree %d \n " , tree_c0 - > size ( ) ) ;
2010-05-27 01:49:27 +00:00
pthread_mutex_unlock ( & rb_mut ) ;
2010-06-21 20:03:05 +00:00
rwlc_readlock ( header_mut ) ; // XXX FIXME WITH OCC!!
2010-05-27 01:49:27 +00:00
2010-01-23 02:13:59 +00:00
//step: 2 look into first in tree if exists (a first level merge going on)
2010-02-18 23:31:57 +00:00
if ( get_tree_c0_mergeable ( ) ! = NULL )
2010-01-23 02:13:59 +00:00
{
DEBUG ( " old mem tree not null %d \n " , ( * ( mergedata - > old_c0 ) ) - > size ( ) ) ;
2010-02-18 23:31:57 +00:00
rbitr = get_tree_c0_mergeable ( ) - > find ( search_tuple ) ;
if ( rbitr ! = get_tree_c0_mergeable ( ) - > end ( ) )
2010-01-23 02:13:59 +00:00
{
2010-02-10 21:49:50 +00:00
ret_tuple = ( * rbitr ) - > create_copy ( ) ;
2010-01-23 02:13:59 +00:00
}
}
2010-08-05 17:43:46 +00:00
if ( ret_tuple = = 0 )
{
DEBUG ( " Not in first disk tree \n " ) ;
//step 4: check in progress c1 if exists
if ( get_tree_c1_prime ( ) ! = 0 )
{
DEBUG ( " old c1 tree not null \n " ) ;
ret_tuple = get_tree_c1_prime ( ) - > findTuple ( xid , key , keySize ) ;
}
}
2010-01-23 02:13:59 +00:00
if ( ret_tuple = = 0 )
{
DEBUG ( " Not in old mem tree \n " ) ;
//step 3: check c1
2010-03-13 00:05:06 +00:00
ret_tuple = get_tree_c1 ( ) - > findTuple ( xid , key , keySize ) ;
2010-01-23 02:13:59 +00:00
}
if ( ret_tuple = = 0 )
{
DEBUG ( " Not in first disk tree \n " ) ;
//step 4: check old c1 if exists
2010-02-18 23:31:57 +00:00
if ( get_tree_c1_mergeable ( ) ! = 0 )
2010-01-23 02:13:59 +00:00
{
2010-03-13 00:05:06 +00:00
DEBUG ( " old c1 tree not null \n " ) ;
ret_tuple = get_tree_c1_mergeable ( ) - > findTuple ( xid , key , keySize ) ;
2010-01-23 02:13:59 +00:00
}
}
if ( ret_tuple = = 0 )
{
DEBUG ( " Not in old first disk tree \n " ) ;
//step 5: check c2
2010-03-13 00:05:06 +00:00
ret_tuple = get_tree_c2 ( ) - > findTuple ( xid , key , keySize ) ;
}
2010-06-18 00:06:46 +00:00
rwlc_unlock ( header_mut ) ;
2010-01-23 02:13:59 +00:00
}
2010-02-10 21:49:50 +00:00
datatuple : : freetuple ( search_tuple ) ;
2010-01-23 02:13:59 +00:00
return ret_tuple ;
}
2010-08-30 22:22:25 +00:00
2010-03-17 21:51:26 +00:00
template < class TUPLE >
2010-08-30 22:22:25 +00:00
datatuple * logtable < TUPLE > : : insertTupleHelper ( datatuple * tuple )
2010-01-23 02:13:59 +00:00
{
2010-08-30 22:22:25 +00:00
//find the previous tuple with same key in the memtree if exists
memTreeComponent < datatuple > : : rbtree_t : : iterator rbitr = tree_c0 - > find ( tuple ) ;
datatuple * t = 0 ;
datatuple * pre_t = 0 ;
if ( rbitr ! = tree_c0 - > end ( ) )
{
pre_t = * rbitr ;
//do the merging
datatuple * new_t = tmerger - > merge ( pre_t , tuple ) ;
2010-12-14 00:06:32 +00:00
merge_mgr - > get_merge_stats ( 0 ) - > merged_tuples ( new_t , tuple , pre_t ) ;
2010-08-30 22:22:25 +00:00
t = new_t ;
tree_c0 - > erase ( pre_t ) ; //remove the previous tuple
tree_c0 - > insert ( new_t ) ; //insert the new tuple
}
else //no tuple with same key exists in mem-tree
{
2010-01-28 02:20:49 +00:00
2010-08-30 22:22:25 +00:00
t = tuple - > create_copy ( ) ;
2010-01-28 02:20:49 +00:00
2010-08-30 22:22:25 +00:00
//insert tuple into the rbtree
tree_c0 - > insert ( t ) ;
}
2010-01-23 02:13:59 +00:00
2010-08-30 22:22:25 +00:00
return pre_t ;
}
template < class TUPLE >
void logtable < TUPLE > : : insertManyTuples ( datatuple * * tuples , int tuple_count ) {
for ( int i = 0 ; i < tuple_count ; i + + ) {
merge_mgr - > read_tuple_from_small_component ( 0 , tuples [ i ] ) ;
}
2011-04-21 00:28:05 +00:00
if ( log_mode & & ! recovering ) {
2011-04-20 21:51:04 +00:00
for ( int i = 0 ; i < tuple_count ; i + + ) {
logUpdate ( tuples [ i ] ) ;
}
batch_size + + ;
if ( batch_size > = log_mode ) {
log_file - > force_tail ( log_file , LOG_FORCE_COMMIT ) ;
batch_size = 0 ;
}
2011-04-20 20:17:26 +00:00
}
2010-08-30 22:22:25 +00:00
pthread_mutex_lock ( & rb_mut ) ;
int num_old_tups = 0 ;
pageid_t sum_old_tup_lens = 0 ;
for ( int i = 0 ; i < tuple_count ; i + + ) {
datatuple * old_tup = insertTupleHelper ( tuples [ i ] ) ;
if ( old_tup ) {
num_old_tups + + ;
sum_old_tup_lens + = old_tup - > byte_length ( ) ;
datatuple : : freetuple ( old_tup ) ;
}
}
pthread_mutex_unlock ( & rb_mut ) ;
merge_mgr - > read_tuple_from_large_component ( 0 , num_old_tups , sum_old_tup_lens ) ;
}
template < class TUPLE >
2011-04-20 21:51:04 +00:00
void logtable < TUPLE > : : insertTuple ( datatuple * tuple )
2010-08-30 22:22:25 +00:00
{
2011-04-21 00:28:05 +00:00
if ( log_mode & & ! recovering ) {
2011-04-20 20:17:26 +00:00
logUpdate ( tuple ) ;
2011-04-20 21:51:04 +00:00
batch_size + + ;
if ( batch_size > = log_mode ) {
log_file - > force_tail ( log_file , LOG_FORCE_COMMIT ) ;
batch_size = 0 ;
}
2011-04-20 20:17:26 +00:00
}
2010-08-30 22:22:25 +00:00
//lock the red-black tree
merge_mgr - > read_tuple_from_small_component ( 0 , tuple ) ; // has to be before rb_mut, since it calls tick with block = true, and that releases header_mut.
datatuple * pre_t = 0 ; // this is a pointer to any data tuples that we'll be deleting below. We need to update the merge_mgr statistics with it, but have to do so outside of the rb_mut region.
pthread_mutex_lock ( & rb_mut ) ;
pre_t = insertTupleHelper ( tuple ) ;
2010-06-18 00:06:46 +00:00
pthread_mutex_unlock ( & rb_mut ) ;
2010-08-05 17:43:46 +00:00
if ( pre_t ) {
// needs to be here; calls update_progress, which sometimes grabs mutexes..
merge_mgr - > read_tuple_from_large_component ( 0 , pre_t ) ; // was interspersed with the erase, insert above...
datatuple : : freetuple ( pre_t ) ; //free the previous tuple
}
2010-01-23 02:13:59 +00:00
DEBUG ( " tree size %d tuples %lld bytes. \n " , tsize , tree_bytes ) ;
}
2010-09-16 22:36:48 +00:00
template < class TUPLE >
bool logtable < TUPLE > : : testAndSetTuple ( datatuple * tuple , datatuple * tuple2 )
{
bool succ = false ;
static pthread_mutex_t test_and_set_mut = PTHREAD_MUTEX_INITIALIZER ;
pthread_mutex_lock ( & test_and_set_mut ) ;
datatuple * exists = findTuple_first ( - 1 , tuple2 ? tuple2 - > key ( ) : tuple - > key ( ) , tuple2 ? tuple2 - > keylen ( ) : tuple - > keylen ( ) ) ;
if ( ! tuple2 | | tuple2 - > isDelete ( ) ) {
if ( ! exists | | exists - > isDelete ( ) ) {
succ = true ;
} else {
succ = false ;
}
} else {
if ( tuple2 - > datalen ( ) = = exists - > datalen ( ) & & ! memcmp ( tuple2 - > data ( ) , exists - > data ( ) , tuple2 - > datalen ( ) ) ) {
succ = true ;
} else {
succ = false ;
}
}
if ( exists ) datatuple : : freetuple ( exists ) ;
if ( succ ) insertTuple ( tuple ) ;
pthread_mutex_unlock ( & test_and_set_mut ) ;
return succ ;
}
2010-03-17 21:51:26 +00:00
template < class TUPLE >
void logtable < TUPLE > : : registerIterator ( iterator * it ) {
2010-02-25 01:29:32 +00:00
its . push_back ( it ) ;
}
2010-03-17 21:51:26 +00:00
template < class TUPLE >
void logtable < TUPLE > : : forgetIterator ( iterator * it ) {
2010-02-25 01:29:32 +00:00
for ( unsigned int i = 0 ; i < its . size ( ) ; i + + ) {
if ( its [ i ] = = it ) {
its . erase ( its . begin ( ) + i ) ;
break ;
}
}
}
2010-03-17 21:51:26 +00:00
template < class TUPLE >
void logtable < TUPLE > : : bump_epoch ( ) {
2010-02-25 01:29:32 +00:00
epoch + + ;
for ( unsigned int i = 0 ; i < its . size ( ) ; i + + ) {
its [ i ] - > invalidate ( ) ;
}
}
2010-03-17 21:51:26 +00:00
template class logtable < datatuple > ;