initial import; removed cruft from mert's tarball, tweaked make's clean targets

git-svn-id: svn+ssh://svn.corp.yahoo.com/yahoo/yrl/labs/pnuts/code/logstore@520 8dad8b1f-cf64-0410-95b6-bcf113ffbcfe
This commit is contained in:
sears 2010-01-23 02:13:59 +00:00
commit d016498f8d
38 changed files with 10478 additions and 0 deletions

165
FwCode.h Normal file
View file

@ -0,0 +1,165 @@
/* Copyright (C) 2008 Yahoo! Inc. All Rights Reserved. */
#ifndef __FW_CODE__H
#define __FW_CODE__H
#include <string>
/**
* Global framework response codes.
*/
class FwCode {
public:
typedef int ResponseCode;
static const std::string unknownCodeStr;
/**
* The convention here is to keep related codes grouped together, so
* that it is easier to find all existing codes for a particular
* module. Each section is given a range of 50 codes, so that adding
* a new code to an existing section won't invalidate all of the codes
* following it in the enum (causing binary incompatibility).
*/
//----------- Generic section -------------
static const ResponseCode FwOk = 0; //!< All successes
static const ResponseCode FwError = 1; //!< General error code
static const ResponseCode FwCrit = 2; //!< General critical error. could be originated by low level library to indicate some nasty error has occurred.
static const ResponseCode MdbmOpenFailed = 3; //!< Any kind of mdbm open failure
static const ResponseCode MdbmOperationFailed = 4; //!< Any store/fetch/lock from mdbm failed
static const ResponseCode NoMem = 5; //!< Out Of Memory
static const ResponseCode InvalidParam = 6; //!< Invalid parameter
static const ResponseCode NotFound = 7; //!< Fail to find the specified info; usuall returned by access methods
static const ResponseCode InvalidState = 8; //!< Invalid state
static const ResponseCode ConnReset = 9; //!< connection reset
static const ResponseCode Timeout = 10; //!< operation timed out
static const ResponseCode InvalidData = 11; //!< buffer data is invalid
static const ResponseCode BufTooSmall = 12; //!< Buffer size is smaller than required
static const ResponseCode MalformedRequest = 13; //!< Request data (like the URI) is malformed
static const ResponseCode RequestTooLarge = 14; //!< Request data (like the body) is too big
static const ResponseCode ConvertToDhtDataFailed = 15; // !< Failed convert json string to DHT::Data
static const ResponseCode ConvertFromDhtDataFailed = 16; // !< Failed to convert DHT::Data to json string
static const ResponseCode BadHexString = 17; //!< Failed to parse a hex string
static const ResponseCode ShmemCorrupted = 18; //!< A shared mem corruption has been detected.
static const ResponseCode ParseError = 19; //!< Generic parsing problem
/// If mdbm unlock fails, most of the time we want to shut off the
/// system automatically, without letting the caller know that we did
/// so. On specific instances where the caller is the FaultHandler, or
/// Oversight Fault counter (there may be other examples), we don't want
/// to do this because we want to avoid cross-dependency.
static const ResponseCode MdbmUnlockFailed = 20;
//----------- Generic section -------------
// Config
static const ResponseCode ConfigFailure = 50; //!< Failure to find or parse a config entry
//----------- UChar section -------------
// UCharUtils
static const ResponseCode UcnvOpenFailed = 100; //!< Failed to open ucnv converter for utf-8
static const ResponseCode DataNotUtf8 = 101; //!< Data is not in utf-8 format
static const ResponseCode ConvertToUCharFailed = 102; //!< Failed to convert utf-8 string to UChar string
static const ResponseCode CompileRegExFailed = 103; //!< Failed to compile the regular expression
//----------- Yca section -------------
// YcaClient
static const ResponseCode YcaOpenFailed = 150; //!< Failed to open the yca database
static const ResponseCode YcaCertInvalid = 151; //!< Validation of presented cert failed
static const ResponseCode YcaCertNotFound = 152; //!< certificate for the requested appID was not found
//----------- Broker section -------------
static const ResponseCode BrokerClientOpenFailed = 200; //!< Failed to connect to broker
static const ResponseCode UncertainPublish = 201; //!< Publish was uncertain - unknown if it happened
static const ResponseCode PublishFailed = 202; //!< Publish failed (for certain :))
static const ResponseCode SubscribeFailed = 203; //!< Failed to subscribe to a topic
static const ResponseCode NoSubscriptionFound = 204; //!< Operation on a sub failed because we (locally)
// don't know about it
static const ResponseCode RegisterFailed = 205; //!< Failed to register handler for subscription
static const ResponseCode UnsubscribeFailed = 206; //!< Failed to unsubscribe from sub
static const ResponseCode ListTopicsFailed = 207; //!< Failed to list subscribed topics
static const ResponseCode ConsumeFailed = 208; //!< Failed to consume messages for a topic
static const ResponseCode TopicInvalid = 209; //!< Topic is invalid (was usurped or ymb 'lost' it)
static const ResponseCode NoMessageDelivered = 210; //!< Call to deliver() found no messages ready
static const ResponseCode ConsumeFailedBadTopic = 211; //!< The topic is bad - our handle is bad,
// or it got usurped
static const ResponseCode ConsumeFailedBadHandle = 212; //!< Our ymb handle is bad - not usable anymore
static const ResponseCode ConsumeFailedConnectionError = 213; //!< a recoverable connection error
static const ResponseCode ConsumeFailedServerBusy = 214; //!< ymb server is having a temporary issue,
// not a failure per se
// second argument to messageProcessed()
static const ResponseCode ConsumeMessage = 215; //!< consume this message
static const ResponseCode ConsumeAndUnsubscribe = 216; //!< end this channel
// Internal to ymb implementation
static const ResponseCode YmbSubscribeTempFailure = 217; //!< A failure that might be resolved on a retry
static const ResponseCode YmbSubscribeTimedout = 218; //!< A timeout failure
static const ResponseCode YmbSubscriptionExists = 219; //!< Attempt to create a sub that already exists
static const ResponseCode NoSuchSubscription = 220; //!< Attempt to attach to a sub that does not exist
static const ResponseCode AttachNoSuchSubscription = 221; //!< Specific to attach, no subscription to attach to (not necessarily an error)
static const ResponseCode BrokerInitFailed = 222; //!< Config or allocation failed
static const ResponseCode BrokerConnectionLost = 223; //!< Lost connection to broker
static const ResponseCode BrokerFatalError = 224; //!< Generally shared mem corruption
//----------- Daemon section -------------
// Daemon
static const ResponseCode NoImpl = 250; //!< No op
static const ResponseCode Restart = 251; //!< Exit the daemon so that it is restarted right away.
// request that the daemon do a soft restart
static const ResponseCode Exit = 252; //!< Exit the daemon so that it is NOT restarted right away. A monitoring process may restart the entire system later.
static const ResponseCode StopDelivery = 253; //!< Stop delivery on the topic, returned by Broker handlers only.
static const ResponseCode RetryDelivery = 254; //!< Stop delivery on the topic but retry after sometime, returned by Broker handlers only.
//----------- Lock section -------------
// LockManager
//ALL these lock errors are handled in SuFaulHandler.cc
//Any addition to these error codes requires update to the SuFaultHandler
static const ResponseCode LockSyserr = 301; //!< System error during lock/unlock op
static const ResponseCode LockInconsis = 302; //!< Inconsistency detected in LockManager.
static const ResponseCode LockNested = 303; //!< Nested locking of same key not allowed.
static const ResponseCode LockNosuchpid = 304; //!< This pid does not hold the lock.
static const ResponseCode LockUnavail = 305; //!< Outa lock
static const ResponseCode LockInitfail = 306; //!< Initialization failure of the lock subsystem
static const ResponseCode LockInvalidarg = 307; //!< Invalid arguments to lock subsystem.
//----------- Message section -------------
//Message and Message serialization
static const ResponseCode SerializeFailed = 350; //!< Message Serialization Failed
static const ResponseCode DeserializeFailed = 351; //!< Message Deserialization failed
static const ResponseCode NoResponseCodeInMessage = 352;
//----------- Transport Errors -------------
static const ResponseCode TransportSendError = 400; //!< Curl error in communicating with other server
static const ResponseCode TransportSetHeaderFailed = 401; //!< Error in setting header in curl request
static const ResponseCode TransportCurlInitError = 402; // !< Error initializing curl handle -- should be curl specific
static const ResponseCode TransportUncertain = 403; //!< Send came back uncertain (timeout, usually)
static const ResponseCode TransportInvalidResponseBody = 404; //!< Send came back unparsable body
//----------- Apache/Web section -------------
static const ResponseCode EndOfBody = 450; //!< Normal end of incoming request body
static const ResponseCode BodyReadFailed = 451; //!< Failed reading incoming request body
static const ResponseCode BodyWriteFailed = 452; //!< Failed writing outgoing request body
static const ResponseCode EncryptionFailed = 453; //!< Failed to encrypt body or header
static const ResponseCode DecryptionFailed = 454; //!< Failed to decrypt body or header
/**
* Give back a basic, generic string description of the response code.
*
* @param rc The response code to convert.
* @return The string describing it.
*/
static std::string toString(ResponseCode rc);
};
/* For customized vim control
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: sw=4:ts=4:et
* vim<600: sw=4:ts=4:et
*/
#endif

77
LogUtils.cc Normal file
View file

@ -0,0 +1,77 @@
/*! \file log4_util.cc
* \brief This file has the helper functions for log4cpp;
*
* Copyright (c) 2008 Yahoo, Inc.
* All rights reserved.
*/
#include <iostream>
#include <log4cpp/PropertyConfigurator.hh>
#include "LogUtils.h"
using namespace log4cpp;
using namespace std;
// hacked link to actioncontext
std::string s_trackPathLog;
LogMethod::
LogMethod(log4cpp::Category& log, log4cpp::Priority::Value priority,
const char *function) :
log_(log), priority_(priority), function_(function)
{
if(log_.isPriorityEnabled(priority_)) {
log_.getStream(priority_) << "Entering: " << function_;
}
}
LogMethod::
~LogMethod()
{
if(log_.isPriorityEnabled(priority_)) {
log_.getStream(priority_) << "Exiting: " << function_;
}
}
// Protects against multiple calls (won't try to re-init) and gives
// back the same answer the original call got.
static int log4cppInitResult = -1;
bool
initLog4cpp(const string &confFile)
{
if (log4cppInitResult != -1) {
return (log4cppInitResult == 0 ? true : false);
}
log4cppInitResult = 0; // Assume success.
try {
PropertyConfigurator::configure(confFile);
} catch (log4cpp::ConfigureFailure &e) {
cerr << "log4cpp configuration failure while loading '" <<
confFile << "' : " << e.what() << endl;
log4cppInitResult = 1;
} catch (std::exception &e) {
cerr << "exception caught while configuring log4cpp via '" <<
confFile << "': " << e.what() << endl;
log4cppInitResult = 1;
} catch (...) {
cerr << "unknown exception while configuring log4cpp via '" <<
confFile << "'." << endl;
log4cppInitResult = 1;
}
return (log4cppInitResult == 0 ? true : false);
}
/*
* For customized vim control
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: sw=4:ts=4:et
* vim<600: sw=4:ts=4:et
*/

130
LogUtils.h Normal file
View file

@ -0,0 +1,130 @@
/* Copyright (C) 2007 Yahoo! Inc. All Rights Reserved. */
#ifndef LOG_UTIL_H
#define LOG_UTIL_H
#include <log4cpp/Category.hh>
#include "StringUtils.h"
/**
* Quick and dirty link between LogUtils and ActionContext without having to
* resolve cross-inclusion issues, or force all components to start including
* ActionContext if they don't already.
*/
extern std::string s_trackPathLog;
// These macros cannot be protected by braces because of the trailing stream
// arguments that get appended. Care must taken not to use them inside if/else
// blocks that do not use curly braces.
// I.e., the following will give unexpected results:
// if(foo)
// DHT_DEBUG_STREAM() << "heyheyhey";
// else
// blah();
// The 'else' will end up applying to the 'if' within the debug macro.
// Regardless of this, our standards say to always use curly brackets
// on every block anyway, no matter what.
#define DHT_DEBUG_STREAM() if(log.isDebugEnabled()) log.debugStream() << __FUNCTION__ << "():" << __LINE__ << ":"
#define DHT_INFO_STREAM() if(log.isInfoEnabled()) log.infoStream() << __FUNCTION__ << "():" << __LINE__ << ":"
#define DHT_INFO_WITH_STACK_STREAM() if(log.isInfoEnabled()) log.infoStream() << __FUNCTION__ << "():" << __LINE__ << ":" << s_trackPathLog
#define DHT_WARN_STREAM() if(log.isWarnEnabled()) log.warnStream() << __FUNCTION__ << "():" << __LINE__ << ":" << s_trackPathLog
#define DHT_ERROR_STREAM() if(log.isErrorEnabled()) log.errorStream() << __FUNCTION__ << "():" << __LINE__ << ":" << s_trackPathLog
#define DHT_CRIT_STREAM() if(log.isCritEnabled()) log.critStream() << __FUNCTION__ << "():" << __LINE__ << ":" << s_trackPathLog
#define DHT_TRACE_PRIORITY log4cpp::Priority::DEBUG + 50
#define DHT_TRACE_STREAM() if (log.isPriorityEnabled(DHT_TRACE_PRIORITY)) log.getStream(DHT_TRACE_PRIORITY) << __FUNCTION__ << "():" << __LINE__ << ":"
// Sadly, sometimes 'log' is reserved by someone else so the code needs to
// use a different name for log. In that case, it can be passed in to these.
#define DHT_DEBUG_STREAML(x_log_hdl_x) if((x_log_hdl_x).isDebugEnabled()) (x_log_hdl_x).debugStream() << __FUNCTION__ << "():" << __LINE__ << ":"
#define DHT_INFO_STREAML(x_log_hdl_x) if((x_log_hdl_x).isInfoEnabled()) (x_log_hdl_x).infoStream() << __FUNCTION__ << "():" << __LINE__ << ":"
#define DHT_INFO_WITH_STACK_STREAML(x_log_hdl_x) if((x_log_hdl_x).isInfoEnabled()) (x_log_hdl_x).infoStream() << __FUNCTION__ << "():" << __LINE__ << ":" << s_trackPathLog
#define DHT_WARN_STREAML(x_log_hdl_x) if((x_log_hdl_x).isWarnEnabled()) (x_log_hdl_x).warnStream() << __FUNCTION__ << "():" << __LINE__ << ":" << s_trackPathLog
#define DHT_ERROR_STREAML(x_log_hdl_x) if((x_log_hdl_x).isErrorEnabled()) (x_log_hdl_x).errorStream() << __FUNCTION__ << "():" << __LINE__ << ":" << s_trackPathLog
#define DHT_CRIT_STREAML(x_log_hdl_x) if((x_log_hdl_x).isCritEnabled()) (x_log_hdl_x).critStream() << __FUNCTION__ << "():" << __LINE__ << ":" << s_trackPathLog
#define DHT_TRACE_STREAML(x_log_hdl_x) if ((x_log_hdl_x).isPriorityEnabled(DHT_TRACE_PRIORITY)) (x_log_hdl_x).getStream(DHT_TRACE_PRIORITY) << __FUNCTION__ << "():" << __LINE__ << ":"
//Macros to use when a function returns on error without writing any log message
// or error translation
#define RETURN_IF_NOT_OK(x_call_x) \
{ \
FwCode::ResponseCode rcx___ = (x_call_x); \
if(rcx___ != FwCode::FwOk) { \
return rcx___; \
} \
}
#define RETURN_THIS_IF_NOT_OK(x_othercode_x, x_call_x) \
{ \
FwCode::ResponseCode rcx___ = (x_call_x); \
if(rcx___ != FwCode::FwOk) { \
return (x_othercode_x); \
} \
}
/// Caution! Only use in checks for 'impossible' code conditions. Regular errors
/// should be handled regularly
#define BAD_CODE_ABORT() \
{ \
std::string x_msg_x("Bad code at " __FILE__ ":"); \
x_msg_x.append(StringUtils::toString(__LINE__)); \
throw std::runtime_error(x_msg_x); \
}
#define BAD_CODE_IF_NOT_OK(x_call_x) \
do {\
if((x_call_x) != FwCode::FwOk) { \
BAD_CODE_ABORT(); \
} \
} while(0)
/*
* Above macros are meant to be used by all components.
*/
/**
* Class that allows for method entry/exit logging with a single declaration.
* Always uses debug.
*/
class LogMethod
{
public:
LogMethod(log4cpp::Category& log, log4cpp::Priority::Value priority,
const char *function);
virtual ~LogMethod();
private:
log4cpp::Category& log_;
log4cpp::Priority::Value priority_;
const char *function_;
};
// convenience macros to use the above class
#define LOG_METHOD() LogMethod log_method_entry_exit(log, log4cpp::Priority::DEBUG, __FUNCTION__)
#define TRACE_METHOD() LogMethod log_method_entry_exit(log, DHT_TRACE_PRIORITY, __FUNCTION__)
/** Initialize log4cpp config file.
* This function needs to be called once for each executable. Multiple
* initializations will return the result of the first initialization (IOW,
* an executable can be initialized with exactly one config file). Errors
* encountered by this function are printed onto cerr. See log4cpp
* documentation for what happens when PropertyConfigurator::configure()
* fails.
* \param confFile is the path name of the log4cpp config file.
* Depending on the machine that the executable is running in, the path
* will be different.
* \return true if the initialization succeeds, false if it fails.
*/
bool initLog4cpp(const std::string & confFile);
#endif
/*
* For customized vim control
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: sw=4:ts=4:et
* vim<600: sw=4:ts=4:et
*/

74
Makefile Normal file
View file

@ -0,0 +1,74 @@
STASIS_DIR=../stasis
LIB=$(STASIS_DIR)/build/src/stasis \
-L/home/y/lib
INCLUDE=-I$(STASIS_DIR)/src/ -I$(STASIS_DIR) -I./ \
-I/home/y/include
LIBLIST=-lpthread \
-lstasis \
-lm
# -licui18n \
# -licuuc \
# -licudata \
# -licuio \
# -llog4cpp_y \
# -lthoth
FLAGS=-pg -g -O1
#FLAGS=-O3
HFILES=logserver.h logstore.h logiterators.h datapage.h merger.h tuplemerger.h datatuple.h
CFILES=logserver.cpp logstore.cpp logiterators.cpp datapage.cpp merger.cpp tuplemerger.cpp
# STASIS_DIR=../stasis
# LD_LIBRARY_PATH=$STASIS_DIR/build/src/stasis
# LD_LIBRARY_PATH=$STASIS_DIR/build/src/stasis ./hello
logstore: check_gen.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
test: dp_check lt_check ltable_check merger_check rb_check \
lmerger_check tmerger_check server_check tcpclient_check
lt_check: check_logtree.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
dp_check: check_datapage.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
ltable_check: check_logtable.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
merger_check: check_merge.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
lmerger_check: check_mergelarge.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
tmerger_check: check_mergetuple.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
rb_check: check_rbtree.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
server_check: check_server.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
tcpclient_check: check_tcpclient.cpp $(HFILES) $(CFILES)
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
hello : hello.cpp UCharUtils.cc LogUtils.cc
g++ -o $@ $^ -L$(LIB) $(INCLUDE) $(LIBLIST) $(FLAGS)
clean:
rm -f logstore server_check hello lt_check merger_check lmerger_check rb_check \
dp_check ltable_check tmerger_check rose tcpclient_check
veryclean: clean
rm -f *~ gmon.out prof.res

152
NOTES Normal file
View file

@ -0,0 +1,152 @@
######################################################################################
constants.h
######################################################################################
#define PAGE_SIZE 4096
#define BLOB_THRESHOLD_SIZE (PAGE_SIZE-30)
SLOT TYPES
#define INVALID_SLOT (-1)
/** This constant is used as a placeholder to mark slot locations that contain blobs.
@see slotted.c, indirect.c, blobManager.c */
#define BLOB_SLOT (-2)
#define NORMAL_SLOT (-3)
#define SLOT_TYPE_END (-4)
######################################################################################
allocationPolicy.h
######################################################################################
struct allocationPolicy {
struct LH_ENTRY(table) * xidAlloced;
struct LH_ENTRY(table) * xidDealloced;
struct RB_ENTRY(tree) * availablePages;
struct LH_ENTRY(table) * pageOwners;
struct LH_ENTRY(table) * allPages;
};
typedef struct allocationPolicy stasis_allocation_policy_t;
typedef struct availablePage {
int freespace;
pageid_t pageid;
int lockCount; // Number of active transactions that have alloced or dealloced from this page.
} availablePage;
availablePage * stasis_allocation_policy_pick_suitable_page(stasis_allocation_policy_t * ap, int xid, int freespace);
////////////////////////////////////////////////////////////////////////////////////
==15277== Thread 4:
==15277== Invalid free() / delete / delete[]
==15277== at 0x401BEFA: free (vg_replace_malloc.c:235)
==15277== by 0x4FD60FB: free_mem (in /lib/tls/libc-2.3.4.so)
==15277== by 0x4FD5B21: __libc_freeres (in /lib/tls/libc-2.3.4.so)
==15277== by 0x4017336: _vgw_freeres (vg_preloaded.c:62)
==15277== by 0x4030B25: pthread_cond_wait@@GLIBC_2.3.2 (in /lib/tls/libpthread-2.3.4.so)
==15277== by 0x402E370: start_thread (in /lib/tls/libpthread-2.3.4.so)
==15277== by 0x4F96FFD: clone (in /lib/tls/libc-2.3.4.so)
==15277== Address 0x4EC66B8 is not stack'd, malloc'd or (recently) free'd
==15277==
==15277== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 40 from 1)
==15277== malloc/free: in use at exit: 8,540,389 bytes in 912 blocks.
==15277== malloc/free: 1,815,016 allocs, 1,814,105 frees, 1,121,769,405 bytes allocated.
==15277== For counts of detected errors, rerun with: -v
==15277== searching for pointers to 912 not-freed blocks.
==15277== checked 43,383,184 bytes.
==15277==
==15277== Thread 1:
==15277==
==15277== 4,883,561 (32 direct, 4,883,529 indirect) bytes in 1 blocks are definitely lost in loss record 16 of 46
==15277== at 0x401B790: operator new(unsigned) (vg_replace_malloc.c:164)
==15277== by 0x8052C01: __gnu_cxx::new_allocator<std::_Rb_tree_node<datatuple> >::allocate(unsigned, void const*) (new_allocator.h:81)
==15277== by 0x8052B79: std::_Rb_tree<datatuple, datatuple, std::_Identity<datatuple>, datatuple, std::allocator<datatuple> >::_M_get_node() (stl_tree.h:356)
==15277== by 0x8052ACC: std::_Rb_tree<datatuple, datatuple, std::_Identity<datatuple>, datatuple, std::allocator<datatuple> >::_M_create_node(datatuple const&) (stl_tree.h:365)
==15277== by 0x8052978: std::_Rb_tree<datatuple, datatuple, std::_Identity<datatuple>, datatuple, std::allocator<datatuple> >::_M_insert(std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, datatuple const&) (stl_tree.h:783)
==15277== by 0x805270C: std::_Rb_tree<datatuple, datatuple, std::_Identity<datatuple>, datatuple, std::allocator<datatuple> >::insert_unique(datatuple const&) (stl_tree.h:881)
==15277== by 0x8052332: std::set<datatuple, datatuple, std::allocator<datatuple> >::insert(datatuple const&) (stl_set.h:314)
==15277== by 0x8050077: logtable::insertTuple(datatuple&) (logstore.cpp:1030)
==15277== by 0x804A641: insertProbeIter(int) (check_merge.cpp:160)
==15277== by 0x804AB9B: main (check_merge.cpp:235)
==15277==
==15277==
==15277== 336 (28 direct, 308 indirect) bytes in 1 blocks are definitely lost in loss record 17 of 46
==15277== at 0x401B405: malloc (vg_replace_malloc.c:149)
==15277== by 0x404D906: stasis_dirty_page_table_init (dirtyPageTable.c:133)
==15277== by 0x404BFA5: Tinit (transactional2.c:66)
==15277== by 0x804A2AE: insertProbeIter(int) (check_merge.cpp:97)
==15277== by 0x804AB9B: main (check_merge.cpp:235)
==15277==
==15277==
==15277== 40 bytes in 1 blocks are definitely lost in loss record 20 of 46
==15277== at 0x401B790: operator new(unsigned) (vg_replace_malloc.c:164)
==15277== by 0x8053025: merge_scheduler::addlogtable(logtable*) (merger.cpp:20)
==15277== by 0x804A33E: insertProbeIter(int) (check_merge.cpp:113)
==15277== by 0x804AB9B: main (check_merge.cpp:235)
==15277==
==15277==
==15277== 80 bytes in 10 blocks are definitely lost in loss record 32 of 46
==15277== at 0x401B405: malloc (vg_replace_malloc.c:149)
==15277== by 0x804D75E: logtree::create(int) (logstore.cpp:169)
==15277== by 0x8053BD5: memMergeThread(void*) (merger.cpp:236)
==15277== by 0x402E370: start_thread (in /lib/tls/libpthread-2.3.4.so)
==15277== by 0x4F96FFD: clone (in /lib/tls/libc-2.3.4.so)
==15277==
==15277==
==15277== 4,792 (432 direct, 4,360 indirect) bytes in 18 blocks are definitely lost in loss record 40 of 46
==15277== at 0x401B790: operator new(unsigned) (vg_replace_malloc.c:164)
==15277== by 0x80501C5: logtable::insertTuple(int, datatuple&, recordid&, logtree*) (logstore.cpp:1064)
==15277== by 0x8054FA7: insertTuple(int, DataPage<datatuple>*, datatuple&, logtable*, logtree*, recordid&, int&, int&) (merger.cpp:643)
==15277== by 0x8054AFF: merge_iterators(int, treeIterator<datatuple>*, memTreeIterator<std::set<datatuple, datatuple, std::allocator<datatuple> >, datatuple>*, logtable*, logtree*, int&) (merger.cpp:534)
==15277== by 0x8053C8F: memMergeThread(void*) (merger.cpp:251)
==15277== by 0x402E370: start_thread (in /lib/tls/libpthread-2.3.4.so)
==15277== by 0x4F96FFD: clone (in /lib/tls/libc-2.3.4.so)
==15277==
==15277==
==15277== 576 bytes in 4 blocks are possibly lost in loss record 41 of 46
==15277== at 0x401C6BF: calloc (vg_replace_malloc.c:279)
==15277== by 0x400E71A: _dl_allocate_tls (in /lib/ld-2.3.4.so)
==15277== by 0x402E91E: pthread_create@@GLIBC_2.1 (in /lib/tls/libpthread-2.3.4.so)
==15277== by 0x80538FF: merge_scheduler::startlogtable(int) (merger.cpp:184)
==15277== by 0x804A37E: insertProbeIter(int) (check_merge.cpp:116)
==15277== by 0x804AB9B: main (check_merge.cpp:235)
==15277==
==15277==
==15277== 3,175 bytes in 1 blocks are possibly lost in loss record 42 of 46
==15277== at 0x401B405: malloc (vg_replace_malloc.c:149)
==15277== by 0x8051BC7: DataPage<datatuple>::readbytes(int, int, int, unsigned char**) (datapage.cpp:235)
==15277== by 0x8051F7F: DataPage<datatuple>::RecordIterator::getnext(int) (datapage.cpp:442)
==15277== by 0x80512E0: DataPage<datatuple>::recordRead(int, unsigned char*, unsigned, datatuple**) (datapage.cpp:206)
==15277== by 0x8050449: logtable::findTuple(int, unsigned char*, unsigned, logtree*) (logstore.cpp:1104)
==15277== by 0x804FF48: logtable::findTuple(int, unsigned char*, unsigned) (logstore.cpp:979)
==15277== by 0x804A8D3: insertProbeIter(int) (check_merge.cpp:198)
==15277== by 0x804AB9B: main (check_merge.cpp:235)
==15277==
==15277==
==15277== 173,599 bytes in 2 blocks are possibly lost in loss record 43 of 46
==15277== at 0x401B405: malloc (vg_replace_malloc.c:149)
==15277== by 0x804FFD0: logtable::insertTuple(datatuple&) (logstore.cpp:1014)
==15277== by 0x804A641: insertProbeIter(int) (check_merge.cpp:160)
==15277== by 0x804AB9B: main (check_merge.cpp:235)
==15277==
==15277==
==15277== 2,281,057 bytes in 681 blocks are definitely lost in loss record 45 of 46
==15277== at 0x401B405: malloc (vg_replace_malloc.c:149)
==15277== by 0x8051BC7: DataPage<datatuple>::readbytes(int, int, int, unsigned char**) (datapage.cpp:235)
==15277== by 0x8051F7F: DataPage<datatuple>::RecordIterator::getnext(int) (datapage.cpp:442)
==15277== by 0x80512E0: DataPage<datatuple>::recordRead(int, unsigned char*, unsigned, datatuple**) (datapage.cpp:206)
==15277== by 0x8050449: logtable::findTuple(int, unsigned char*, unsigned, logtree*) (logstore.cpp:1104)
==15277== by 0x804FF81: logtable::findTuple(int, unsigned char*, unsigned) (logstore.cpp:990)
==15277== by 0x804A8D3: insertProbeIter(int) (check_merge.cpp:198)
==15277== by 0x804AB9B: main (check_merge.cpp:235)
==15277==
==15277== LEAK SUMMARY:
==15277== definitely lost: 2,281,669 bytes in 712 blocks.
==15277== indirectly lost: 4,888,197 bytes in 150 blocks.
==15277== possibly lost: 177,350 bytes in 7 blocks.
==15277== still reachable: 1,193,173 bytes in 43 blocks.
==15277== suppressed: 0 bytes in 0 blocks.
==15277== Reachable blocks (those to which a pointer was found) are not shown.
==15277== To see them, rerun with: --show-reachable=yes
Killed

345
StringUtils.h Normal file
View file

@ -0,0 +1,345 @@
/* $Id: StringUtils.h,v 1.17 2009/03/25 20:32:51 dlomax Exp $ */
/* Copyright (C) 2008 Yahoo! Inc. All Rights Reserved. */
#ifndef __STRING_UTIL_H
#define __STRING_UTIL_H
#include <iostream>
#include <iomanip>
#include <sstream>
#include "FwCode.h"
/**
* Container for static string manipulation utilities.
*/
class StringUtils
{
public:
/**
* Our replacement for yax_getroot(). Allows our code to have a different
* root than components we use or link with. Is nice for unit testing.
* @return Copy of the value in a std::string
*/
static std::string getDhtRoot();
/**
* Parse a tablet name into left and right limits.
* @return true if parsing successful, false if incorrect format
*/
static bool parseTabletName(const std::string& tablet, std::string& leftLimit,
std::string& rightLimit);
/**
* Construct a tablet name from left and right limits.
*/
static void buildTabletName(const std::string& leftLimit,
const std::string& rightLimit,
std::string& tablet);
/**
* General purpose method to assemble a full path name, using
* getDhtRoot() so that
* the root will be configurable. DO NOT supply "/home/y" in path1.
*/
static std::string makePath(const std::string& path1 = "",
const std::string& path2 = "",
const std::string& path3 = "",
const std::string& path4 = "",
const std::string& path5 = "",
const std::string& path6 = "");
/**
* Append additional paths to an existing one - does not prepend ROOT.
*/
static void appendPath(std::string& base_path, const std::string& path2 = "",
const std::string& path3 = "",
const std::string& path4 = "");
/**
* Construct a topic name from a table/tablet.
*
* @return the topic name
*/
static std::string buildTopicName(const std::string& table,
const std::string& tablet);
/**
* Construct a topic name from a table/tablet.
* @param topic Is filled with the topic name.
*/
static void buildTopicName(const std::string& table,
const std::string& tablet,
std::string &topic);
/**
* Parses <code>topic</code> into table and tablet portions.
*
* @param table Filled with the table name.
* @param tablet Filled with the tablet name.
* @param true if the parsing succeeded, false if not.
*/
static bool parseTopicName(const std::string& topic,
std::string& table,
std::string &tablet);
/**
* Only for use in log statements - this is slow. Produce a printable
* string where binary (<32) characters are hex encoded, but all others
* are left alone.
*
* @param str string to encode
* @param len length of string
* @return encoded string.
*/
static std::string toPrintable(const char *str, size_t len);
/**
* Convert a formatted hex string back into its original
* 64-bit value
*
* @param value the hex-encoded string
* @param out the value
* @return FwCode::FwOk on success, FwCode::BadHexString on parse failure
*/
static FwCode::ResponseCode
convertHexStringToUI64(const std::string& value, uint64_t& out);
/**
* Convert a formatted hex string back into its original
* 32-bit value
*
* @param value the hex-encoded string
* @param out the value
* @return FwCode::FwOk on success, FwCode::BadHexString on parse failure
*/
static FwCode::ResponseCode
convertHexStringToUI32(const std::string& value, uint32_t& out);
/**
* Standard means for formatting a 0x prefixed hex string from a
* 64-bit unsigned value. Will produce upper-case letters. Will
* pad with zeros at the beginning to fill out 16 hex chars.
*
* @param the value to format
* @return the formatted value, like "0xDEADBEEF00000000"
*/
static std::string convertUI64ToHexString( uint64_t val );
/**
* Standard means for formatting a 0x prefixed hex string from a
* 32-bit unsigned value. Will produce upper-case letters. Will
* pad with zeros at the beginning to fill out 8 hex chars.
*
* @param the value to format
* @return the formatted value, like "0xDEADBEEF"
*/
static std::string convertUI32ToHexString( unsigned int val );
/**
* Standard means for formatting a small hex string from a
* 32-bit unsigned value. The "0x" will NOT be included.
* Will produce upper-case letters. Will NOT pad with zeros
* at the beginning.
*
* @param the value to format
* @return the formatted value, like "DEADBEEF"
*/
static std::string convertUI32ToMinimalHexString( unsigned int val );
/**
* Assemble the fields of ENCRYPTED_BODY_HEADER and encrypt it for
* sending to the remote side.
* @param result is the out parameter having the resulting string.
* @param encKeyName is the name of the key in keydb whose value will be
* used as the encryption key
* @param bodyEncVersion is the version of the encryption scheme used to
* encrypt the body (not the encryption scheme of this header itself).
* @param expireTime is the time (in usecs) after which the request
* should not be processed by the receiver of this header.
*/
static FwCode::ResponseCode makeEncryptedBodyHdr(std::string & result,
const char *encKeyName, uint32_t bodyEncVersion, uint64_t expireTime);
/**
* Parse the incoming ENCRYPTED_BODY_HEADER, decrypting it, and
* separating the fields in it.
* @param inval is the incoming encrypted string.
* @param encKeyName is the name of the key in keydb whose value will be
* used as the decryption key
* @param bodyEncVersion is the version of the encryption scheme to be
* used to * decrypt the body (not for the decryption of this header
* itself).
* @param expireTime is the time (in usecs) after which the response
* should not be processed by the receiver of this header.
*/
static FwCode::ResponseCode parseEncryptedBodyHdr(const std::string & inval,
const char *encKeyName, uint32_t & bodyEncVersion, uint64_t & expireTime);
/**
* Get the hash for an un-normalized record name.
*
* @param unnormalizedRecordName a raw record name from user input
* @param (output) hashResult the hex string of the hash value.
* @return FwCode::FwOk on success, else an error relating to normalization
*/
static FwCode::ResponseCode normalizeAndHashRecordName
( const std::string& unnormalizedRecordName,
std::string & hashResult /* out */ );
/**
* Get the hash for a normalized record name.
*
* @param recordName the record name. MUST be previously normalized.
* @return hashResult the uint32_t of the hash value.
*/
static uint32_t hashRecordName(const std::string& recordName);
/**
* Get the hash for a normalized record name.
*
* @param recordName the record name. MUST be previously normalized.
* @param (output) hashResult the hex string of the hash value.
*/
static void hashRecordName( const std::string& recordName,
std::string & hashResult /* out */ );
/**
* Get the hash for a normalized record name in string and int form
*
* @param recordName the record name. MUST be previously normalized.
* @param (output) hashResult the hex string of the hash value.
* @param (output) hexNum numerical value of hash
*/
static void hashRecordName( const std::string& recordName,
std::string & hashResult /* out */,
uint32_t& hexNum);
/**
* Method to hash a string using crc32.
*
* @param buf data to hash
* @param len length of buf
* @return hash value
*/
static uint32_t crcHash(const char * buf, uint32_t len);
/**
* util function to convert any type to a string
*/
template<typename T> static inline std::string toString(T item);
/**
* convert string to any type of value
* @param strValue string value to parse
* @param value(out) value to read from strValue
* @return FwCode::FwOk on success
* FwCode::FwError on failure (error is *not* logged)
*/
template<typename T> static inline
FwCode::ResponseCode fromString(const std::string& strValue,
T& value);
/**
* convert a hexadecimal number to string representation
* of fixed width ( 2 * sizeof(T) )
* @param value number to convert to string
* @return string representation of value
*/
template<typename T> static inline
std::string numberToHexString(T value);
/**
* convert a hexadecimal number to minimal string representation
* @param value number to convert to string
* @return string representation of value
*/
template<typename T> static inline
std::string numberToMinimalHexString(T value);
/**
* convert a hexadecimal string to a number
* @param strvalue input string to read from
* @param value(out) output number
* @return FwCode::FwOk on successful conversion
* FwCode::FwError on failure to convert strvalue
* to number
*/
template<typename T> static inline
FwCode::ResponseCode hexStringToNumber(const std::string& strvalue,
T& value);
static const std::string EMPTY_STRING;
};
template<typename T>
std::string StringUtils::
toString(T item)
{
std::ostringstream buf;
buf << item;
return buf.str();
}
template<typename T>
FwCode::ResponseCode StringUtils::
fromString(const std::string& strValue,
T& value)
{
std::istringstream buf(strValue);
buf >> value;
if(buf.fail()||
(strValue.length() != buf.tellg() ))
{
return FwCode::FwError;
}
return FwCode::FwOk;
}
template<typename T>
std::string StringUtils::
numberToHexString(T value)
{
std::ostringstream buf;
buf << "0x" << std::hex
<< std::setw(sizeof(T) * 2) << std::setfill('0')
<< std::uppercase << value;
return buf.str();
}
template<typename T>
std::string StringUtils::
numberToMinimalHexString(T value)
{
std::ostringstream buf;
buf << std::hex << std::uppercase << value;
return buf.str();
}
template<typename T>
FwCode::ResponseCode StringUtils::
hexStringToNumber(const std::string& strvalue,
T& value)
{
std::istringstream buf(strvalue);
buf >> std::hex >> value;
if(buf.fail() ||
(strvalue.length() != buf.tellg() ))
{
return FwCode::FwError;
}
return FwCode::FwOk;
}
/*
* For customized vim control
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: sw=4:ts=4:et
* vim<600: sw=4:ts=4:et
*/
#endif

326
UCharUtils.cc Normal file
View file

@ -0,0 +1,326 @@
/* $Id: UCharUtils.cc,v 1.16 2009/03/03 20:19:18 dlomax Exp $ */
/* Copyright (C) 2008 Yahoo! Inc. All Rights Reserved. */
//#include <dht/UCharUtils.h>
#include "UCharUtils.h"
#include <log4cpp/Category.hh>
#include "LogUtils.h"
//#include "ActionContext.h"
#include <unicode/ucnv.h>
#include <unicode/unorm.h>
#include <thoth/validate.h> // To make sure we have UTF-8
static log4cpp::Category &log =
log4cpp::Category::getInstance("dht.framework." __FILE__);
UCharUtilsImpl *UCharUtils::instance_ = NULL;
UCharUtilsImpl::
UCharUtilsImpl() : uconv_(NULL) {
LOG_METHOD();
ucBuffLen = 0;
ucBuff = NULL;
ucNormBuffLen = 0;
ucNormBuff = NULL;
charBuffLen = 0;
charBuff = NULL;
}
FwCode::ResponseCode UCharUtilsImpl::
init()
{
UErrorCode erc = U_ZERO_ERROR;
uconv_ = ucnv_open("utf-8", &erc);
if (uconv_ == NULL) {
DHT_ERROR_STREAM() << "EC:UNICODE:Problem geting utf-8 converter, erc:" << erc
<< ", " << u_errorName(erc);
return FwCode::UcnvOpenFailed;
}
return FwCode::FwOk;
}
UCharUtilsImpl::
~UCharUtilsImpl() {
reset();
if (uconv_ != NULL) {
ucnv_close(uconv_);
uconv_ = NULL;
}
}
void UCharUtilsImpl::
reset() {
LOG_METHOD();
if (ucBuff != NULL) {
delete[] ucBuff;
ucBuffLen = 0;
ucBuff = NULL;
}
if (ucNormBuff != NULL) {
delete[] ucNormBuff;
ucNormBuffLen = 0;
ucNormBuff = NULL;
}
if (charBuff != NULL) {
delete[] charBuff;
charBuffLen = 0;
charBuff = NULL;
}
}
/**
* Small wrapper to hide multi-line thoth api inside single-line call.
*/
bool UCharUtils::
isUTF8(const std::string& value)
{
size_t pos = 0;
thoth_result result = thoth_validate_utf8(value.c_str(), value.length(),
&pos);
if(result != UTF8_VALID) {
std::cerr
//RESPONSE_DEBUG_STREAM(FwCode::DataNotUtf8)
<< "value (" << value << ") is not UTF-8. thoth_result:" << result
<< ", position=" << pos;
return false;
}
return true;
}
/**
* Small wrapper to hide multi-line thoth api inside single-line call.
*/
bool UCharUtils::
isUTF8(const char * value, size_t value_len)
{
size_t pos = 0;
thoth_result result = thoth_validate_utf8(value, value_len, &pos);
if(result != UTF8_VALID) {
//RESPONSE_DEBUG_STREAM(FwCode::DataNotUtf8)
std::cerr
<< "value (" << std::string(value, value_len)
<< ") is not UTF-8. thoth_result:" << result
<< ", position=" << pos;
return false;
}
return true;
}
// Convert an input string (expected to be UTF-8) into unicode UChars
// The result of the conversion will be sitting in our ucBuff area.
FwCode::ResponseCode UCharUtilsImpl::
convert(const std::string &input, int32_t &len)
{
LOG_METHOD();
//UTF-8 validation
if(!UCharUtils::isUTF8(input)) {
return FwCode::DataNotUtf8;
}
int size = input.length() * 2;
// Check if we already have a big enough buffer
if (ucBuffLen < size) {
// Nope, first check if we need to release what we've been using
if (ucBuff) {
delete[] ucBuff;
}
ucBuffLen = size;
ucBuff = new UChar[ucBuffLen];
}
UErrorCode erc = U_ZERO_ERROR;
len = ucnv_toUChars(uconv_,
ucBuff,
ucBuffLen,
input.data(),
input.length(), &erc);
if (U_FAILURE(erc)) {
//RESPONSE_ERROR_STREAM(FwCode::ConvertToUCharFailed)
std::cerr
<< "EC:UNICODE:error:" << erc
<< ", " << u_errorName(erc)
<< " from converting input:'" << input << "'";
len = 0;
return FwCode::ConvertToUCharFailed;
}
return FwCode::FwOk;
}
// Normalize an input string. Note that all three internal buffers will
// be used by this operation, but by the time we finish, we'll be done
// with them.
FwCode::ResponseCode UCharUtilsImpl::
normalize(const std::string &input, std::string &result /* out */)
{
LOG_METHOD();
// convert our UTF-8 into UChar
int32_t inLen = 0;
FwCode::ResponseCode rc = convert(input, inLen);
if (rc != FwCode::FwOk) {
result.erase();
return rc;
}
// Do a quick check if the input is already normalized so that
// we can duck out early
UErrorCode status = U_ZERO_ERROR;
if (unorm_quickCheck(ucBuff, inLen,
UNORM_NFC, &status) == UNORM_YES) {
DHT_DEBUG_STREAM() << "already normalized input:" << input;
result = input;
return FwCode::FwOk;
}
// Check if we have enough space for the normalized result.
// We'll make the output space twice as big as the input (although
// it's more likely that the normalized result will be shorter
// as it combines characters. E.g. 'A' 'put an accent on the previous'
int32_t newSize = inLen * 2;
if (newSize > ucNormBuffLen) {
DHT_DEBUG_STREAM() << "newSize:" << newSize
<< " ucNormBuffLen:" << ucNormBuffLen;
if (ucNormBuff) {
delete[] ucNormBuff;
}
ucNormBuffLen = newSize;
ucNormBuff = new UChar[ucNormBuffLen];
}
// Do the actual normalization
status = U_ZERO_ERROR;
int32_t normLen = unorm_normalize(ucBuff, inLen,
UNORM_NFC, 0,
ucNormBuff,
ucNormBuffLen,
&status);
if(U_FAILURE(status)) {
//RESPONSE_ERROR_STREAM(FwCode::FwError)
std::cerr
<< "EC:UNICODE:error:" << status << ", " << u_errorName(status)
<<" in unorm_normalize, inLen:" << inLen
<< " ucNormBuffLen:" << ucNormBuffLen;
return FwCode::FwError;
}
// Make sure we have some space to convert back to UTF-8
int32_t resultLen = normLen * 4;
if (resultLen > charBuffLen) {
DHT_DEBUG_STREAM() << "resultLen:" << resultLen
<< " charBuffLen:" << charBuffLen;
if (charBuff) {
delete[] charBuff;
charBuff= NULL;
}
charBuffLen = resultLen;
charBuff = new char[charBuffLen];
}
DHT_DEBUG_STREAM() <<"calling ucnv_fromUChars, normLen:" << normLen;
// Go from UChar array to UTF-8
int32_t actualLen = ucnv_fromUChars(uconv_,
charBuff, charBuffLen,
ucNormBuff, normLen,
&status);
if(U_FAILURE(status)) {
//RESPONSE_ERROR_STREAM(FwCode::FwError)
std::cerr
<< "EC:UNICODE:error:" << status << ", " << u_errorName(status)
<< " in ucnv_fromUChars charBuffLen:" << charBuffLen
<< " normLen:" << normLen;
return FwCode::FwError;
}
// Smack our UTF-8 characters into the result string
result.assign(charBuff, actualLen);
DHT_DEBUG_STREAM() << "leaving actualLen:" << actualLen
<< " result:" << result;
return FwCode::FwOk;
}
FwCode::ResponseCode UCharUtils::
init()
{
if (instance_ == NULL) {
instance_ = new UCharUtilsImpl();
return instance_->init();
}
return FwCode::FwOk; // already initialized
}
void UCharUtils::
close()
{
if(instance_ != NULL) {
delete instance_;
instance_ = NULL;
}
}
// Given an input string, return a unicode UChar array. Note that the
// return value is a pointer to our internal buffer.
UChar * UCharUtils::
getUChar(const std::string &input, int32_t& len) {
LOG_METHOD();
// do the conversion...somehow need 2x input len for utf8 to utf16
if(instance_->convert(input, len) != FwCode::FwOk) {
len = 0;
return NULL;
}
return instance_->ucBuff;
}
FwCode::ResponseCode UCharUtils::
normalize(const std::string &input, std::string &result) {
LOG_METHOD();
return(instance_->normalize(input, result));
}
FwCode::ResponseCode UCharUtils::
parseRegExpPattern(const std::string &pattern,
URegularExpression * & result /* out */)
{
UParseError perr;
UErrorCode erc = U_ZERO_ERROR;
int32_t ureglen = 0;
// Do not delete uregexp, it's a static reusable buffer inside UCharUtils
UChar *uregexp = UCharUtils::getUChar(pattern, ureglen);
if (uregexp == NULL) {
//RESPONSE_ERROR_STREAM(FwCode::ConvertToUCharFailed)
std::cerr
<< "EC:UNICODE|IMPOSSIBLE:Unable to convert pattern to unicode: " << pattern;
return FwCode::ConvertToUCharFailed;
}
URegularExpression *regexp= uregex_open(uregexp, ureglen, 0,
&perr,
&erc);
if(erc != U_ZERO_ERROR) {
//RESPONSE_DEBUG_STREAM(FwCode::CompileRegExFailed)
std::cerr
<< "Compiling regex failed at: " << perr.offset
<< "; re=" << pattern;
return FwCode::CompileRegExFailed;
}
result = regexp;
return FwCode::FwOk;
}

139
UCharUtils.h Normal file
View file

@ -0,0 +1,139 @@
/* Copyright (C) 2008 Yahoo! Inc. All Rights Reserved. */
#ifndef UCHAR_UTILS_H
#define UCHAR_UTILS_H
#include <unicode/ucnv.h>
#include <string>
#include "FwCode.h"
#include <unicode/uregex.h>
// Forward declaration
class UCharUtilsImpl;
/**
* Some handy utilities for working with unicode characters. Yes, these
* could have just been some regular routines instead of static methods
* in a class, but doing it this way gives us some containment of what
* other static tidbits might be necessary (like reusable buffer space).
* which are all hidden within the UCharUtilsImpl class.
*
* This is a singleton - do not use in a threaded program.
*/
class UCharUtils {
private:
/**
* Our pointer to all sorts of goodness.
*/
static UCharUtilsImpl *instance_;
public:
/**
* Initialize the utilities. Primarily opens the utf-8 converter.
* Calling this is required prior to using the converter.
*
* @return FwCode::FwOk on success, FwCode::UcnvOpenFailed on
* failure.
*/
static FwCode::ResponseCode init();
/**
* Release all resources. <code>init()</code> must be called again
* in order to use again.
*/
static void close();
/**
* Small wrapper to hide multi-line thoth api inside single-line call.
*
* @param value string to be tested for utf-8-ness
* @return true if it is utf-8, false if not
*/
static bool isUTF8(const std::string& value);
/**
* Small wrapper to hide multi-line thoth api inside single-line call.
*
* @param value char string to be tested for utf-8-ness
* @param value_len length of <code>value</code>
* @return true if it is utf-8, false if not
*/
static bool isUTF8(const char * value, size_t value_len);
/**
* Convert utf-8 strings into UChar strings. Note that the
* result is an internal reusable buffer so the caller should
* *not* release it.
* @param input utf-8 string to convert
* @param len set to length of output string
* @return NULL if anything bad happens, otherwise an allocated UChar *
* the caller must *NEVER* free this pointer.
*/
static UChar * getUChar(const std::string &input, int32_t& len);
/**
* Do a NFC normalization so that different yet equivalent strings
* will have a single representation. See
* http://www.unicode.org/unicode/reports/tr15/
* for more information.
* @param input A UTF-8 string that we want to normalize
* @param result (output) the normalized UTF-8 string
* @return FwCode::FwOk on success,
* FwCode::FwError on conversion failure,
* FwCode::InvalidData if input was not utf-8
*/
static FwCode::ResponseCode normalize(const std::string &input,
std::string &result);
/**
* Compile a regular expression in a unicode-friendly way.
*
* @param pattern the regexp pattern to compile. Assumed to
* be utf-8.
* @param result (output) Set to point to the compiled regexp.
* Must be released by the caller via uregex_close() when
* finished with it.
* @return FwCode::FwOk if compilation succeeded,
* FwCode::CompileRegExFailed or FwCode::ConvertToUCharFailed
* on failure.
*/
static FwCode::ResponseCode parseRegExpPattern
(const std::string &pattern,
URegularExpression * & result /* out */);
};
/**
* Bug 2574599 - Impl exposed for use by multiple threads; singleton not
* appropriate for multi-threaded program.
*/
class UCharUtilsImpl
{
private:
UConverter *uconv_;
public:
UCharUtilsImpl();
~UCharUtilsImpl();
FwCode::ResponseCode init();
void reset();
FwCode::ResponseCode convert(const std::string &input, int32_t &len);
FwCode::ResponseCode normalize(const std::string &nput, std::string &result);
// Buffer used to convert from UTF-* into UChar
int32_t ucBuffLen;
UChar *ucBuff;
// Buffer used for UChar normalization output
int32_t ucNormBuffLen;
UChar *ucNormBuff;
// Buffer used to convert UChars back to UTF-8
int32_t charBuffLen;
char *charBuff;
};
#endif // _DHT_UCHAR_UTILS_

130
adriana-lima.awk Executable file
View file

@ -0,0 +1,130 @@
#! /usr/bin/awk -f
BEGIN{
READ_SLA = 500;
WRITE_SLA = 750;
readcnt = 0;
writecnt = 0;
wlat_tot = 0;
wlat_max = 0;
wlat_sqtot = 0;
wlat_slafail = 0;
DIST_BUCKET_LENGTH = 100;
DIST_BUCKET_COUNT = 20;
for(i=1; i<=DIST_BUCKET_COUNT; i++)
{
rlat_dist[i] = 0;
wlat_dist[i] = 0;
}
rlat_tot = 0;
rlat_max = 0;
rlat_sqtot = 0;
rlat_slafail = 0;
printf("READ SLA:\t%d\n", READ_SLA);
printf("WRITE SLA:\t%d\n", WRITE_SLA);
printf("\n");
}
/INFO - doRead()/ { readcnt = readcnt + 1;
split(substr($0, match($0, "latency:")+ length("latency:")+1), tmp_arr, " ");
#printf("%d\n", strtonum(tmp_arr[1]));
lat_val = strtonum(tmp_arr[1]);
dist_index = int(lat_val / DIST_BUCKET_LENGTH) + 1;
if(dist_index > DIST_BUCKET_COUNT)
dist_index = DIST_BUCKET_COUNT;
rlat_dist[dist_index]++;
rlat_tot = rlat_tot + lat_val;
rlat_sqtot = rlat_sqtot + lat_val*lat_val;
if(lat_val > rlat_max)
rlat_max = lat_val;
if(lat_val > READ_SLA)
rlat_slafail = rlat_slafail + 1;
}
/INFO - doInsert()/ { writecnt = writecnt + 1;
split(substr($0, match($0, "latency:")+ length("latency:")+1), tmp_arr, " ");
lat_val = tmp_arr[1];
if(index(tmp_arr[1], ",")!= 0)
lat_val = substr(tmp_arr[1],1,index(tmp_arr[1],",")-1);
#printf("%d\n", strtonum(lat_val));
lat_val = strtonum(lat_val);
dist_index = int(lat_val / DIST_BUCKET_LENGTH) + 1;
if(dist_index > DIST_BUCKET_COUNT)
dist_index = DIST_BUCKET_COUNT;
wlat_dist[dist_index]++;
wlat_tot = wlat_tot + lat_val;
wlat_sqtot = wlat_sqtot + lat_val*lat_val;
if(lat_val > wlat_max)
wlat_max = lat_val;
if(lat_val > WRITE_SLA)
wlat_slafail = wlat_slafail + 1;
}
END{
printf("R/W ratio:\t%.2f\n", strtonum(readcnt) / strtonum(writecnt));
printf("\n");
printf("#reads:\t%d\n",readcnt);
if(strtonum(readcnt) != 0)
{
printf("avg read latency:\t%.2f\n", (rlat_tot / readcnt));
printf("var read latency:\t%.2f\n", (rlat_sqtot/readcnt) - (rlat_tot/readcnt)*(rlat_tot/readcnt));
printf("max read latency:\t%.2f\n", rlat_max);
printf("read SLA fail:\t%d\n", rlat_slafail);
printf("\nREAD LATENCY DISTRIBUTION\n");
for(i=1; i<DIST_BUCKET_COUNT; i++)
printf("\t%d - %d:\t%d\n", (i-1)*DIST_BUCKET_LENGTH, i*DIST_BUCKET_LENGTH-1, rlat_dist[i]);
printf("\t%d - Inf:\t%d\n", (i-1)*DIST_BUCKET_LENGTH, rlat_dist[i]);
}
printf("\n");
printf("#writes:\t%d\n",writecnt);
if(strtonum(writecnt) != 0)
{
printf("avg write latency:\t%.2f\n", (wlat_tot / writecnt));
printf("var write latency:\t%.2f\n", (wlat_sqtot/writecnt) - (wlat_tot/writecnt)*(wlat_tot/writecnt));
printf("max write latency:\t%.2f\n", wlat_max);
printf("write SLA fail:\t%d\n", wlat_slafail);
printf("\nWRITE LATENCY DISTRIBUTION\n");
for(i=1; i<DIST_BUCKET_COUNT; i++)
printf("\t%d - %d:\t%d\n", (i-1)*DIST_BUCKET_LENGTH, i*DIST_BUCKET_LENGTH-1, wlat_dist[i]);
printf("\t%d - Inf:\t%d\n", (i-1)*DIST_BUCKET_LENGTH, wlat_dist[i]);
}
}

321
check_datapage.cpp Normal file
View file

@ -0,0 +1,321 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include "datapage.cpp"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#undef begin
#undef end
template class DataPage<datatuple>;
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
//must be given a sorted array
void removeduplicates(std::vector<std::string> &arr)
{
for(int i=arr.size()-1; i>0; i--)
{
if(! (mycmp(arr[i], arr[i-1]) || mycmp(arr[i-1], arr[i])))
arr.erase(arr.begin()+i);
}
}
void preprandstr(int count, std::vector<std::string> &arr, int avg_len=50, bool duplicates_allowed=false)
{
for ( int j=0; j<count; j++)
{
int str_len = (rand()%(avg_len*2)) + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
//make sure there is no duplicate key
if(!duplicates_allowed)
{
bool dup = false;
for(int i=0; i<j; i++)
if(! (mycmp(arr[i], str) || mycmp(str, arr[i])))
{
dup=true;
break;
}
if(dup)
{
j--;
continue;
}
}
//printf("keylen-%d\t%d\t%s\n", str_len, str.length(),rc);
free(rc);
arr.push_back(str);
}
}
/**
* REGION ALLOCATION
**/
pageid_t alloc_region(int xid, void *conf)
{
RegionAllocConf_t* a = (RegionAllocConf_t*)conf;
if(a->nextPage == a->endOfRegion) {
if(a->regionList.size == -1) {
//DEBUG("nextPage: %lld\n", a->nextPage);
a->regionList = TarrayListAlloc(xid, 1, 4, sizeof(pageid_t));
DEBUG("regionList.page: %lld\n", a->regionList.page);
DEBUG("regionList.slot: %d\n", a->regionList.slot);
DEBUG("regionList.size: %lld\n", a->regionList.size);
a->regionCount = 0;
}
DEBUG("{%lld <- alloc region arraylist}\n", a->regionList.page);
TarrayListExtend(xid,a->regionList,1);
a->regionList.slot = a->regionCount;
DEBUG("region lst slot %d\n",a->regionList.slot);
a->regionCount++;
DEBUG("region count %lld\n",a->regionCount);
a->nextPage = TregionAlloc(xid, a->regionSize,12);
DEBUG("next page %lld\n",a->nextPage);
a->endOfRegion = a->nextPage + a->regionSize;
Tset(xid,a->regionList,&a->nextPage);
DEBUG("next page %lld\n",a->nextPage);
}
DEBUG("%lld ?= %lld\n", a->nextPage,a->endOfRegion);
pageid_t ret = a->nextPage;
// Ensure the page is in buffer cache without accessing disk (this
// sets it to clean and all zeros if the page is not in cache).
// Hopefully, future reads will get a cache hit, and avoid going to
// disk.
Page * p = loadUninitializedPage(xid, ret);
releasePage(p);
DEBUG("ret %lld\n",ret);
(a->nextPage)++;
return ret;
}
pageid_t alloc_region_rid(int xid, void * ridp) {
recordid rid = *(recordid*)ridp;
RegionAllocConf_t conf;
Tread(xid,rid,&conf);
pageid_t ret = alloc_region(xid,&conf);
DEBUG("{%lld <- alloc region extend}\n", conf.regionList.page);
Tset(xid,rid,&conf);
return ret;
}
void insertProbeIter(int NUM_ENTRIES)
{
srand(1000);
unlink("storefile.txt");
unlink("logfile.txt");
sync();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
std::vector<std::string> data_arr;
std::vector<std::string> key_arr;
preprandstr(NUM_ENTRIES, data_arr, 5*4096, true);
preprandstr(NUM_ENTRIES+200, key_arr, 50, true);//well i can handle upto 200
std::sort(key_arr.begin(), key_arr.end(), &mycmp);
removeduplicates(key_arr);
if(key_arr.size() > NUM_ENTRIES)
key_arr.erase(key_arr.begin()+NUM_ENTRIES, key_arr.end());
NUM_ENTRIES=key_arr.size();
if(data_arr.size() > NUM_ENTRIES)
data_arr.erase(data_arr.begin()+NUM_ENTRIES, data_arr.end());
//for(int i = 0; i < NUM_ENTRIES; i++)
//{
// printf("%s\t", arr[i].c_str());
// int keylen = arr[i].length()+1;
// printf("%d\n", keylen);
//}
recordid alloc_state = Talloc(xid,sizeof(RegionAllocConf_t));
Tset(xid,alloc_state, &logtree::REGION_ALLOC_STATIC_INITIALIZER);
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
int pcount = 10;
int dpages = 0;
DataPage<datatuple> *dp=0;
int64_t datasize = 0;
std::vector<pageid_t> dsp;
for(int i = 0; i < NUM_ENTRIES; i++)
{
//prepare the key
datatuple newtuple;
uint32_t keylen = key_arr[i].length()+1;
newtuple.keylen = &keylen;
newtuple.key = (datatuple::key_t) malloc(keylen);
for(int j=0; j<keylen-1; j++)
newtuple.key[j] = key_arr[i][j];
newtuple.key[keylen-1]='\0';
//prepare the data
uint32_t datalen = data_arr[i].length()+1;
newtuple.datalen = &datalen;
newtuple.data = (datatuple::data_t) malloc(datalen);
for(int j=0; j<datalen-1; j++)
newtuple.data[j] = data_arr[i][j];
newtuple.data[datalen-1]='\0';
/*
printf("key: \t, keylen: %u\ndata: datalen: %u\n",
//newtuple.key,
*newtuple.keylen,
//newtuple.data,
*newtuple.datalen);
*/
datasize += newtuple.byte_length();
if(dp==NULL || !dp->append(xid, newtuple))
{
dpages++;
if(dp)
delete dp;
dp = new DataPage<datatuple>(xid, pcount, &DataPage<datatuple>::dp_alloc_region_rid, &alloc_state );
if(!dp->append(xid, newtuple))
{
delete dp;
dp = new DataPage<datatuple>(xid, pcount, &DataPage<datatuple>::dp_alloc_region_rid, &alloc_state );
assert(dp->append(xid, newtuple));
}
dsp.push_back(dp->get_start_pid());
}
}
printf("Total data set length: %d\n", datasize);
printf("Storage utilization: %.2f\n", (datasize+.0) / (PAGE_SIZE * pcount * dpages));
printf("Number of datapages: %d\n", dpages);
printf("Writes complete.\n");
Tcommit(xid);
xid = Tbegin();
printf("Stage 2: Reading %d tuples\n", NUM_ENTRIES);
int tuplenum = 0;
for(int i = 0; i < dpages ; i++)
{
DataPage<datatuple> dp(xid, dsp[i]);
DataPage<datatuple>::RecordIterator itr = dp.begin();
datatuple *dt=0;
while( (dt=itr.getnext(xid)) != NULL)
{
assert(*(dt->keylen) == key_arr[tuplenum].length()+1);
assert(*(dt->datalen) == data_arr[tuplenum].length()+1);
tuplenum++;
free(dt->keylen);
free(dt);
dt = 0;
}
}
printf("Reads completed.\n");
/*
int64_t count = 0;
lladdIterator_t * it = logtreeIterator::open(xid, tree);
while(logtreeIterator::next(xid, it)) {
byte * key;
byte **key_ptr = &key;
int keysize = logtreeIterator::key(xid, it, (byte**)key_ptr);
pageid_t *value;
pageid_t **value_ptr = &value;
int valsize = lsmTreeIterator_value(xid, it, (byte**)value_ptr);
//printf("keylen %d key %s\n", keysize, (char*)(key)) ;
assert(valsize == sizeof(pageid_t));
assert(!mycmp(std::string((char*)key), arr[count]) && !mycmp(arr[count],std::string((char*)key)));
assert(keysize == arr[count].length()+1);
count++;
}
assert(count == NUM_ENTRIES);
logtreeIterator::close(xid, it);
*/
Tcommit(xid);
Tdeinit();
}
/** @test
*/
int main()
{
insertProbeIter(10000);
return 0;
}

39
check_gen.cpp Normal file
View file

@ -0,0 +1,39 @@
#include "logstore.h"
int main(int argc, char **argv)
{
unlink("storefile.txt");
unlink("logfile.txt");
sync();
// PAGELAYOUT::initPageLayout();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
logtable ltable;
recordid table_root = ltable.allocTable(xid);
Tcommit(xid);
//ltable.startTable();
// lsmTableHandle<PAGELAYOUT>* h = TlsmTableStart<PAGELAYOUT>(lsmTable, INVALID_COL);
xid = Tbegin();
logtreeIterator::open(xid,ltable.get_tree_c2()->get_root_rec() );
Tcommit(xid);
Tdeinit();
}

276
check_logtable.cpp Normal file
View file

@ -0,0 +1,276 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include "datapage.cpp"
#include "logiterators.cpp"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#undef begin
#undef end
//template class DataPage<datatuple>;
template class treeIterator<datatuple>;
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
//must be given a sorted array
void removeduplicates(std::vector<std::string> &arr)
{
for(int i=arr.size()-1; i>0; i--)
{
if(! (mycmp(arr[i], arr[i-1]) || mycmp(arr[i-1], arr[i])))
arr.erase(arr.begin()+i);
}
}
void preprandstr(int count, std::vector<std::string> &arr, int avg_len=50, bool duplicates_allowed=false)
{
for ( int j=0; j<count; j++)
{
int str_len = (rand()%(avg_len*2)) + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
//make sure there is no duplicate key
if(!duplicates_allowed)
{
bool dup = false;
for(int i=0; i<j; i++)
if(! (mycmp(arr[i], str) || mycmp(str, arr[i])))
{
dup=true;
break;
}
if(dup)
{
j--;
continue;
}
}
//printf("keylen-%d\t%d\t%s\n", str_len, str.length(),rc);
free(rc);
arr.push_back(str);
}
}
void insertProbeIter(int NUM_ENTRIES)
{
srand(1000);
unlink("storefile.txt");
unlink("logfile.txt");
sync();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
logtable ltable;
int pcount = 5;
ltable.set_fixed_page_count(pcount);
recordid table_root = ltable.allocTable(xid);
Tcommit(xid);
xid = Tbegin();
logtree *lt = ltable.get_tree_c1();
recordid tree_root = lt->get_root_rec();
std::vector<std::string> data_arr;
std::vector<std::string> key_arr;
preprandstr(NUM_ENTRIES, data_arr, 5*4096, true);
preprandstr(NUM_ENTRIES+200, key_arr, 50, true);//well i can handle upto 200
std::sort(key_arr.begin(), key_arr.end(), &mycmp);
removeduplicates(key_arr);
if(key_arr.size() > NUM_ENTRIES)
key_arr.erase(key_arr.begin()+NUM_ENTRIES, key_arr.end());
NUM_ENTRIES=key_arr.size();
if(data_arr.size() > NUM_ENTRIES)
data_arr.erase(data_arr.begin()+NUM_ENTRIES, data_arr.end());
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
int dpages = 0;
int npages = 0;
DataPage<datatuple> *dp=0;
int64_t datasize = 0;
std::vector<pageid_t> dsp;
for(int i = 0; i < NUM_ENTRIES; i++)
{
//prepare the key
datatuple newtuple;
uint32_t keylen = key_arr[i].length()+1;
newtuple.keylen = &keylen;
newtuple.key = (datatuple::key_t) malloc(keylen);
for(int j=0; j<keylen-1; j++)
newtuple.key[j] = key_arr[i][j];
newtuple.key[keylen-1]='\0';
//prepare the data
uint32_t datalen = data_arr[i].length()+1;
newtuple.datalen = &datalen;
newtuple.data = (datatuple::data_t) malloc(datalen);
for(int j=0; j<datalen-1; j++)
newtuple.data[j] = data_arr[i][j];
newtuple.data[datalen-1]='\0';
// printf("key: \t, keylen: %u\ndata: datalen: %u\n",
//newtuple.key,
// *newtuple.keylen,
//newtuple.data,
// *newtuple.datalen);
datasize += newtuple.byte_length();
if(dp == NULL)
{
dp = ltable.insertTuple(xid, newtuple, ltable.get_dpstate1(), lt);
dpages++;
dsp.push_back(dp->get_start_pid());
}
else
{
if(!dp->append(xid, newtuple))
{
npages += dp->get_page_count();
delete dp;
dp = ltable.insertTuple(xid, newtuple, ltable.get_dpstate1(), lt);
dpages++;
dsp.push_back(dp->get_start_pid());
}
}
free(newtuple.key);
free(newtuple.data);
}
printf("\nTREE STRUCTURE\n");
lt->print_tree(xid);
printf("Total data set length: %d\n", datasize);
printf("Storage utilization: %.2f\n", (datasize+.0) / (PAGE_SIZE * npages));
printf("Number of datapages: %d\n", dpages);
printf("Writes complete.\n");
Tcommit(xid);
xid = Tbegin();
printf("Stage 2: Sequentially reading %d tuples\n", NUM_ENTRIES);
int tuplenum = 0;
treeIterator<datatuple> tree_itr(tree_root);
datatuple *dt=0;
while( (dt=tree_itr.getnext()) != NULL)
{
assert(*(dt->keylen) == key_arr[tuplenum].length()+1);
assert(*(dt->datalen) == data_arr[tuplenum].length()+1);
tuplenum++;
free(dt->keylen);
free(dt);
dt = 0;
}
assert(tuplenum == key_arr.size());
printf("Sequential Reads completed.\n");
int rrsize=key_arr.size() / 3;
printf("Stage 3: Randomly reading %d tuples by key\n", rrsize);
for(int i=0; i<rrsize; i++)
{
//randomly pick a key
int ri = rand()%key_arr.size();
//get the key
uint32_t keylen = key_arr[ri].length()+1;
datatuple::key_t rkey = (datatuple::key_t) malloc(keylen);
for(int j=0; j<keylen-1; j++)
rkey[j] = key_arr[ri][j];
rkey[keylen-1]='\0';
//find the key with the given tuple
datatuple *dt = ltable.findTuple(xid, rkey, keylen, lt);
assert(dt!=0);
assert(*(dt->keylen) == key_arr[ri].length()+1);
assert(*(dt->datalen) == data_arr[ri].length()+1);
free(dt->keylen);
free(dt);
dt = 0;
}
printf("Random Reads completed.\n");
Tcommit(xid);
Tdeinit();
}
/** @test
*/
int main()
{
insertProbeIter(15000);
return 0;
}

331
check_logtree.cpp Normal file
View file

@ -0,0 +1,331 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#define LOG_NAME "check_logTree.log"
#define NUM_ENTRIES_A 10000
#define NUM_ENTRIES_B 10
#define NUM_ENTRIES_C 0
#define OFFSET (NUM_ENTRIES * 10)
#undef begin
#undef end
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
void preprandstr(int count, std::vector<std::string> &arr)
{
for ( int j=0; j<count; j++)
{
int str_len = rand()%100 + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
//make sure there is no duplicate key
bool dup = false;
for(int i=0; i<j; i++)
if(! (mycmp(arr[i], str) || mycmp(str, arr[i])))
{
dup=true;
break;
}
if(dup)
{
j--;
continue;
}
//printf("keylen-%d\t%d\t%s\n", str_len, str.length(),rc);
free(rc);
arr.push_back(str);
}
}
void insertProbeIter_str(int NUM_ENTRIES)
{
srand(1000);
unlink("storefile.txt");
unlink("logfile.txt");
sync();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
logtable ltable;
recordid table_root = ltable.allocTable(xid);
Tcommit(xid);
xid = Tbegin();
logtree *lt = ltable.get_tree_c1();
recordid tree = lt->get_root_rec();
long oldpagenum = -1;
std::vector<std::string> arr;
preprandstr(NUM_ENTRIES, arr);
std::sort(arr.begin(), arr.end(), &mycmp);
//for(int i = 0; i < NUM_ENTRIES; i++)
//{
// printf("%s\t", arr[i].c_str());
// int keylen = arr[i].length()+1;
// printf("%d\n", keylen);
//}
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
for(int i = 0; i < NUM_ENTRIES; i++)
{
int keylen = arr[i].length()+1;
byte *currkey = (byte*)malloc(keylen);
for(int j=0; j<keylen-1; j++)
currkey[j] = arr[i][j];
currkey[keylen-1]='\0';
//printf("\n#########\ni=%d\nkey:\t%s\nkeylen:%d\n",i,((char*)currkey),keylen);
long pagenum = logtree::findPage(xid, tree, currkey, keylen);
//printf("pagenum:%d\n", pagenum);
assert(pagenum == -1 || pagenum == oldpagenum || oldpagenum == -1);
//printf("TlsmAppendPage %d\n",i);
recordid rid = lt->get_tree_state();
RegionAllocConf_t alloc_conf;
Tread(xid,rid,&alloc_conf);
logtree::appendPage(xid, tree, lt->lastLeaf, currkey, keylen, lt->alloc_region, &alloc_conf, i + OFFSET);
//DEBUG("{%lld <- alloc region extend}\n", conf.regionList.page);
// XXX get rid of Tset by storing next page in memory, and losing it
// on crash.
Tset(xid,rid,&alloc_conf);
pagenum = logtree::findPage(xid, tree, currkey,keylen);
oldpagenum = pagenum;
//printf("pagenum:%d\n", pagenum);
assert(pagenum == i + OFFSET);
free(currkey);
}
printf("Writes complete.");
tree = lt->get_root_rec();
Tcommit(xid);
xid = Tbegin();
printf("\nTREE STRUCTURE\n");
lt->print_tree(xid);
printf("Stage 2: Looking up %d keys\n", NUM_ENTRIES);
for(int i = 0; i < NUM_ENTRIES; i++) {
int keylen = arr[i].length()+1;
byte *currkey = (byte*)malloc(keylen);
for(int j=0; j<keylen-1; j++)
currkey[j] = arr[i][j];
currkey[keylen-1]='\0';
//printf("\n#########\ni=%d\nkey:\t%s\nkeylen:%d\n",i,((char*)currkey),keylen);
long pagenum = logtree::findPage(xid, tree, currkey, keylen);
//printf("pagenum:%d\n", pagenum);
assert(pagenum == i + OFFSET);
free(currkey);
}
printf("Stage 3: Iterating over %d keys\n", NUM_ENTRIES);
int64_t count = 0;
lladdIterator_t * it = logtreeIterator::open(xid, tree);
while(logtreeIterator::next(xid, it)) {
byte * key;
byte **key_ptr = &key;
int keysize = logtreeIterator::key(xid, it, (byte**)key_ptr);
pageid_t *value;
pageid_t **value_ptr = &value;
int valsize = lsmTreeIterator_value(xid, it, (byte**)value_ptr);
//printf("keylen %d key %s\n", keysize, (char*)(key)) ;
assert(valsize == sizeof(pageid_t));
assert(!mycmp(std::string((char*)key), arr[count]) && !mycmp(arr[count],std::string((char*)key)));
assert(keysize == arr[count].length()+1);
count++;
}
assert(count == NUM_ENTRIES);
logtreeIterator::close(xid, it);
Tcommit(xid);
Tdeinit();
}
void insertProbeIter_int(int NUM_ENTRIES)
{
unlink("storefile.txt");
unlink("logfile.txt");
sync();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
logtable ltable;
recordid table_root = ltable.allocTable(xid);
Tcommit(xid);
xid = Tbegin();
logtree *lt = ltable.get_tree_c1();
recordid tree = lt->get_root_rec();
long oldpagenum = -1;
for(int32_t i = 0; i < NUM_ENTRIES; i++) {
int keylen = sizeof(int32_t);
byte *currkey = (byte*)malloc(keylen);
memcpy(currkey, (byte*)(&i), keylen);
//currkey[]='\0';
printf("\n#########\ni=%d\nkey:\t%d\nkeylen:%d\n",i,*((int32_t*)currkey),keylen);
long pagenum = logtree::findPage(xid, tree, currkey, keylen);
printf("pagenum:%d\n", pagenum);
assert(pagenum == -1 || pagenum == oldpagenum || oldpagenum == -1);
printf("TlsmAppendPage %d\n",i);
recordid rid = lt->get_tree_state();
RegionAllocConf_t alloc_conf;
Tread(xid,rid,&alloc_conf);
logtree::appendPage(xid, tree, lt->lastLeaf, currkey, keylen, lt->alloc_region, &alloc_conf, i + OFFSET);
//DEBUG("{%lld <- alloc region extend}\n", conf.regionList.page);
// XXX get rid of Tset by storing next page in memory, and losing it
// on crash.
Tset(xid,rid,&alloc_conf);
pagenum = logtree::findPage(xid, tree, currkey,keylen);
oldpagenum = pagenum;
printf("pagenum:%d\n", pagenum);
assert(pagenum == i + OFFSET);
free(currkey);
}
printf("Writes complete.");
tree = lt->get_root_rec();
Tcommit(xid);
xid = Tbegin();
printf("\nTREE STRUCTURE\n");
lt->print_tree(xid);
for(int32_t i = 1; i < NUM_ENTRIES; i++) {
int keylen = sizeof(int32_t);
byte *currkey = (byte*)malloc(keylen);
memcpy(currkey, (byte*)(&i), keylen);
printf("\n#########\ni=%d\nkey:\t%d\nkeylen:%d\n",i,*((int32_t*)currkey),keylen);
long pagenum = logtree::findPage(xid, tree, currkey, keylen);
printf("pagenum:%d\n", pagenum);
assert(pagenum == i + OFFSET);
free(currkey);
}
/*
int64_t count = 0;
lladdIterator_t * it = lsmTreeIterator_open(xid, tree);
while(lsmTreeIterator_next(xid, it)) {
lsmkey_t * key;
lsmkey_t **key_ptr = &key;
int size = lsmTreeIterator_key(xid, it, (byte**)key_ptr);
assert(size == sizeof(lsmkey_t));
long *value;
long **value_ptr = &value;
size = lsmTreeIterator_value(xid, it, (byte**)value_ptr);
assert(size == sizeof(pageid_t));
assert(*key + OFFSET == *value);
assert(*key == count);
count++;
}
assert(count == NUM_ENTRIES);
lsmTreeIterator_close(xid, it);
*/
Tcommit(xid);
Tdeinit();
}
/** @test
*/
int main()
{
insertProbeIter_str(NUM_ENTRIES_A);
//insertProbeIter_int(NUM_ENTRIES_A);
return 0;
}

246
check_merge.cpp Normal file
View file

@ -0,0 +1,246 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include "datapage.cpp"
#include "logiterators.cpp"
#include "merger.h"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#undef begin
#undef end
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
//must be given a sorted array
void removeduplicates(std::vector<std::string> *arr)
{
for(int i=arr->size()-1; i>0; i--)
{
if(! (mycmp((*arr)[i], (*arr)[i-1]) || mycmp((*arr)[i-1], (*arr)[i])))
arr->erase(arr->begin()+i);
}
}
void preprandstr(int count, std::vector<std::string> *arr, int avg_len=50)
{
for ( int j=0; j<count; j++)
{
int str_len = (rand()%(avg_len*2)) + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
free(rc);
arr->push_back(str);
}
}
void insertProbeIter(int NUM_ENTRIES)
{
srand(1000);
unlink("storefile.txt");
unlink("logfile.txt");
sync();
//data generation
std::vector<std::string> * data_arr = new std::vector<std::string>;
std::vector<std::string> * key_arr = new std::vector<std::string>;
preprandstr(NUM_ENTRIES, data_arr, 10*8192);
preprandstr(NUM_ENTRIES+200, key_arr, 100);
std::sort(key_arr->begin(), key_arr->end(), &mycmp);
removeduplicates(key_arr);
if(key_arr->size() > NUM_ENTRIES)
key_arr->erase(key_arr->begin()+NUM_ENTRIES, key_arr->end());
NUM_ENTRIES=key_arr->size();
if(data_arr->size() > NUM_ENTRIES)
data_arr->erase(data_arr->begin()+NUM_ENTRIES, data_arr->end());
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
merge_scheduler mscheduler;
logtable ltable;
int pcount = 5;
ltable.set_fixed_page_count(pcount);
recordid table_root = ltable.allocTable(xid);
Tcommit(xid);
xid = Tbegin();
int lindex = mscheduler.addlogtable(&ltable);
ltable.setMergeData(mscheduler.getMergeData(lindex));
mscheduler.startlogtable(lindex);
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
struct timeval start_tv, stop_tv, ti_st, ti_end;
double insert_time = 0;
int dpages = 0;
int npages = 0;
DataPage<datatuple> *dp=0;
int64_t datasize = 0;
std::vector<pageid_t> dsp;
gettimeofday(&start_tv,0);
for(int i = 0; i < NUM_ENTRIES; i++)
{
//prepare the key
datatuple newtuple;
uint32_t keylen = (*key_arr)[i].length()+1;
newtuple.keylen = &keylen;
newtuple.key = (datatuple::key_t) malloc(keylen);
memcpy((byte*)newtuple.key, (*key_arr)[i].c_str(), keylen);
//for(int j=0; j<keylen-1; j++)
// newtuple.key[j] = (*key_arr)[i][j];
//newtuple.key[keylen-1]='\0';
//prepare the data
uint32_t datalen = (*data_arr)[i].length()+1;
newtuple.datalen = &datalen;
newtuple.data = (datatuple::data_t) malloc(datalen);
memcpy((byte*)newtuple.data, (*data_arr)[i].c_str(), datalen);
// for(int j=0; j<datalen-1; j++)
// newtuple.data[j] = (*data_arr)[i][j];
// newtuple.data[datalen-1]='\0';
/*
printf("key: \t, keylen: %u\ndata: datalen: %u\n",
//newtuple.key,
*newtuple.keylen,
//newtuple.data,
*newtuple.datalen);
*/
datasize += newtuple.byte_length();
gettimeofday(&ti_st,0);
ltable.insertTuple(newtuple);
gettimeofday(&ti_end,0);
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
free(newtuple.key);
free(newtuple.data);
}
gettimeofday(&stop_tv,0);
printf("insert time: %6.1f\n", insert_time);
printf("insert time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
printf("\nTREE STRUCTURE\n");
//ltable.get_tree_c1()->print_tree(xid);
printf("datasize: %d\n", datasize);
//sleep(20);
Tcommit(xid);
xid = Tbegin();
printf("Stage 2: Looking up %d keys:\n", NUM_ENTRIES);
int found_tuples=0;
for(int i=NUM_ENTRIES-1; i>=0; i--)
{
int ri = i;
//printf("key index%d\n", i);
fflush(stdout);
//get the key
uint32_t keylen = (*key_arr)[ri].length()+1;
datatuple::key_t rkey = (datatuple::key_t) malloc(keylen);
memcpy((byte*)rkey, (*key_arr)[ri].c_str(), keylen);
//for(int j=0; j<keylen-1; j++)
//rkey[j] = (*key_arr)[ri][j];
//rkey[keylen-1]='\0';
//find the key with the given tuple
datatuple *dt = ltable.findTuple(xid, rkey, keylen);
assert(dt!=0);
//if(dt!=0)
{
found_tuples++;
assert(*(dt->keylen) == (*key_arr)[ri].length()+1);
assert(*(dt->datalen) == (*data_arr)[ri].length()+1);
free(dt->keylen);
free(dt);
}
dt = 0;
free(rkey);
}
printf("found %d\n", found_tuples);
key_arr->clear();
data_arr->clear();
delete key_arr;
delete data_arr;
mscheduler.shutdown();
printf("merge threads finished.\n");
gettimeofday(&stop_tv,0);
printf("run time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
Tcommit(xid);
Tdeinit();
}
/** @test
*/
int main()
{
insertProbeIter(5000);
return 0;
}

264
check_mergelarge.cpp Normal file
View file

@ -0,0 +1,264 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include "datapage.cpp"
#include "logiterators.cpp"
#include "merger.h"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#undef begin
#undef end
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
//must be given a sorted array
void removeduplicates(std::vector<std::string> *arr)
{
for(int i=arr->size()-1; i>0; i--)
{
if(! (mycmp((*arr)[i], (*arr)[i-1]) || mycmp((*arr)[i-1], (*arr)[i])))
arr->erase(arr->begin()+i);
}
}
void getnextdata(std::string &data, int avg_len)
{
int str_len = (rand()%(avg_len*2)) + 3;
data = std::string(str_len, rand()%10+48);
/*
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
data = std::string(rc);
free(rc);
*/
}
void preprandstr(int count, std::vector<std::string> *arr, int avg_len=50)
{
for ( int j=0; j<count; j++)
{
int str_len = (rand()%(avg_len*2)) + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
free(rc);
arr->push_back(str);
}
}
void insertProbeIter(int NUM_ENTRIES)
{
srand(1000);
unlink("storefile.txt");
unlink("logfile.txt");
sync();
//data generation
// std::vector<std::string> * data_arr = new std::vector<std::string>;
std::vector<std::string> * key_arr = new std::vector<std::string>;
// preprandstr(NUM_ENTRIES, data_arr, 10*8192);
preprandstr(NUM_ENTRIES+200, key_arr, 100);
std::sort(key_arr->begin(), key_arr->end(), &mycmp);
removeduplicates(key_arr);
if(key_arr->size() > NUM_ENTRIES)
key_arr->erase(key_arr->begin()+NUM_ENTRIES, key_arr->end());
NUM_ENTRIES=key_arr->size();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
merge_scheduler mscheduler;
logtable ltable;
int pcount = 100;
ltable.set_fixed_page_count(pcount);
recordid table_root = ltable.allocTable(xid);
Tcommit(xid);
//xid = Tbegin();
int lindex = mscheduler.addlogtable(&ltable);
ltable.setMergeData(mscheduler.getMergeData(lindex));
mscheduler.startlogtable(lindex);
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
struct timeval start_tv, stop_tv, ti_st, ti_end;
double insert_time = 0;
int dpages = 0;
int npages = 0;
DataPage<datatuple> *dp=0;
int64_t datasize = 0;
std::vector<pageid_t> dsp;
gettimeofday(&start_tv,0);
for(int i = 0; i < NUM_ENTRIES; i++)
{
//prepare the key
datatuple newtuple;
uint32_t keylen = (*key_arr)[i].length()+1;
newtuple.keylen = &keylen;
newtuple.key = (datatuple::key_t) malloc(keylen);
memcpy((byte*)newtuple.key, (*key_arr)[i].c_str(), keylen);
//for(int j=0; j<keylen-1; j++)
// newtuple.key[j] = (*key_arr)[i][j];
//newtuple.key[keylen-1]='\0';
//prepare the data
std::string ditem;
getnextdata(ditem, 10*8192);
uint32_t datalen = ditem.length()+1;
newtuple.datalen = &datalen;
newtuple.data = (datatuple::data_t) malloc(datalen);
memcpy((byte*)newtuple.data, ditem.c_str(), datalen);
// for(int j=0; j<datalen-1; j++)
// newtuple.data[j] = (*data_arr)[i][j];
// newtuple.data[datalen-1]='\0';
/*
printf("key: \t, keylen: %u\ndata: datalen: %u\n",
//newtuple.key,
*newtuple.keylen,
//newtuple.data,
*newtuple.datalen);
*/
datasize += newtuple.byte_length();
gettimeofday(&ti_st,0);
ltable.insertTuple(newtuple);
gettimeofday(&ti_end,0);
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
free(newtuple.key);
free(newtuple.data);
}
gettimeofday(&stop_tv,0);
printf("insert time: %6.1f\n", insert_time);
printf("insert time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
printf("\nTREE STRUCTURE\n");
//ltable.get_tree_c1()->print_tree(xid);
printf("datasize: %lld\n", datasize);
//sleep(20);
/*
//Tcommit(xid);
xid = Tbegin();
printf("Stage 2: Looking up %d keys:\n", NUM_ENTRIES);
int found_tuples=0;
for(int i=NUM_ENTRIES-1; i>=0; i--)
{
int ri = i;
//printf("key index%d\n", i);
fflush(stdout);
//get the key
uint32_t keylen = (*key_arr)[ri].length()+1;
datatuple::key_t rkey = (datatuple::key_t) malloc(keylen);
memcpy((byte*)rkey, (*key_arr)[ri].c_str(), keylen);
//for(int j=0; j<keylen-1; j++)
//rkey[j] = (*key_arr)[ri][j];
//rkey[keylen-1]='\0';
//find the key with the given tuple
datatuple *dt = ltable.findTuple(xid, rkey, keylen);
assert(dt!=0);
//if(dt!=0)
{
found_tuples++;
assert(*(dt->keylen) == (*key_arr)[ri].length()+1);
//assert(*(dt->datalen) == (*data_arr)[ri].length()+1);
free(dt->keylen);
free(dt);
}
dt = 0;
free(rkey);
}
printf("found %d\n", found_tuples);
key_arr->clear();
//data_arr->clear();
delete key_arr;
//delete data_arr;
*/
mscheduler.shutdown();
printf("merge threads finished.\n");
gettimeofday(&stop_tv,0);
printf("run time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
//Tcommit(xid);
Tdeinit();
}
/** @test
*/
int main()
{
insertProbeIter(25000);
return 0;
}

409
check_mergetuple.cpp Normal file
View file

@ -0,0 +1,409 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include "datapage.cpp"
#include "logiterators.cpp"
#include "merger.h"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#undef begin
#undef end
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
//must be given a sorted array
void removeduplicates(std::vector<std::string> *arr)
{
for(int i=arr->size()-1; i>0; i--)
{
if(! (mycmp((*arr)[i], (*arr)[i-1]) || mycmp((*arr)[i-1], (*arr)[i])))
arr->erase(arr->begin()+i);
}
}
void getnextdata(std::string &data, int avg_len)
{
int str_len = (rand()%(avg_len*2)) + 3;
data = std::string(str_len, rand()%10+48);
/*
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
data = std::string(rc);
free(rc);
*/
}
void preprandstr(int count, std::vector<std::string> *arr, int avg_len=50)
{
for ( int j=0; j<count; j++)
{
int str_len = (rand()%(avg_len*2)) + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
free(rc);
arr->push_back(str);
}
}
void insertProbeIter(int NUM_ENTRIES)
{
srand(1000);
//unlink("storefile.txt");
//unlink("logfile.txt");
sync();
double delete_freq = .05;
double update_freq = .15;
//data generation
typedef std::vector<std::string> key_v_t;
const static int max_partition_size = 100000;
int KEY_LEN = 100;
std::vector<key_v_t*> *key_v_list = new std::vector<key_v_t*>;
int list_size = NUM_ENTRIES / max_partition_size + 1;
for(int i =0; i<list_size; i++)
{
key_v_t * key_arr = new key_v_t;
if(NUM_ENTRIES < max_partition_size*(i+1))
preprandstr(NUM_ENTRIES-max_partition_size*i, key_arr, KEY_LEN);
else
preprandstr(max_partition_size, key_arr, KEY_LEN);
std::sort(key_arr->begin(), key_arr->end(), &mycmp);
key_v_list->push_back(key_arr);
printf("size partition %d is %d\n", i+1, key_arr->size());
}
key_v_t * key_arr = new key_v_t;
std::vector<key_v_t::iterator*> iters;
for(int i=0; i<list_size; i++)
{
iters.push_back(new key_v_t::iterator((*key_v_list)[i]->begin()));
}
int lc = 0;
while(true)
{
int list_index = -1;
for(int i=0; i<list_size; i++)
{
if(*iters[i] == (*key_v_list)[i]->end())
continue;
if(list_index == -1 || mycmp(**iters[i], **iters[list_index]))
list_index = i;
}
if(list_index == -1)
break;
if(key_arr->size() == 0 || mycmp(key_arr->back(), **iters[list_index]))
key_arr->push_back(**iters[list_index]);
(*iters[list_index])++;
lc++;
if(lc % max_partition_size == 0)
printf("%d/%d completed.\n", lc, NUM_ENTRIES);
}
for(int i=0; i<list_size; i++)
{
(*key_v_list)[i]->clear();
delete (*key_v_list)[i];
delete iters[i];
}
key_v_list->clear();
delete key_v_list;
// preprandstr(NUM_ENTRIES, data_arr, 10*8192);
printf("key arr size: %d\n", key_arr->size());
//removeduplicates(key_arr);
if(key_arr->size() > NUM_ENTRIES)
key_arr->erase(key_arr->begin()+NUM_ENTRIES, key_arr->end());
NUM_ENTRIES=key_arr->size();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
merge_scheduler mscheduler;
logtable ltable;
int pcount = 40;
ltable.set_fixed_page_count(pcount);
recordid table_root = ltable.allocTable(xid);
Tcommit(xid);
xid = Tbegin();
int lindex = mscheduler.addlogtable(&ltable);
ltable.setMergeData(mscheduler.getMergeData(lindex));
mscheduler.startlogtable(lindex);
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
struct timeval start_tv, stop_tv, ti_st, ti_end;
double insert_time = 0;
int dpages = 0;
int npages = 0;
int delcount = 0, upcount = 0;
DataPage<datatuple> *dp=0;
int64_t datasize = 0;
std::vector<pageid_t> dsp;
std::vector<int> del_list;
gettimeofday(&start_tv,0);
for(int i = 0; i < NUM_ENTRIES; i++)
{
//prepare the key
datatuple newtuple;
uint32_t keylen = (*key_arr)[i].length()+1;
newtuple.keylen = &keylen;
newtuple.key = (datatuple::key_t) malloc(keylen);
memcpy((byte*)newtuple.key, (*key_arr)[i].c_str(), keylen);
//for(int j=0; j<keylen-1; j++)
// newtuple.key[j] = (*key_arr)[i][j];
//newtuple.key[keylen-1]='\0';
//prepare the data
std::string ditem;
getnextdata(ditem, 8192);
uint32_t datalen = ditem.length()+1;
newtuple.datalen = &datalen;
newtuple.data = (datatuple::data_t) malloc(datalen);
memcpy((byte*)newtuple.data, ditem.c_str(), datalen);
// for(int j=0; j<datalen-1; j++)
// newtuple.data[j] = (*data_arr)[i][j];
// newtuple.data[datalen-1]='\0';
/*
printf("key: \t, keylen: %u\ndata: datalen: %u\n",
//newtuple.key,
*newtuple.keylen,
//newtuple.data,
*newtuple.datalen);
*/
datasize += newtuple.byte_length();
gettimeofday(&ti_st,0);
ltable.insertTuple(newtuple);
gettimeofday(&ti_end,0);
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
free(newtuple.key);
free(newtuple.data);
double rval = ((rand() % 100)+.0)/100;
if( rval < delete_freq) //delete a key
{
int del_index = i - (rand()%50); //delete one of the last inserted 50 elements
if(del_index >= 0 && std::find(del_list.begin(), del_list.end(), del_index) == del_list.end())
{
delcount++;
datatuple deltuple;
keylen = (*key_arr)[del_index].length()+1;
deltuple.keylen = &keylen;
deltuple.key = (datatuple::key_t) malloc(keylen);
memcpy((byte*)deltuple.key, (*key_arr)[del_index].c_str(), keylen);
deltuple.datalen = &datalen;
deltuple.setDelete();
gettimeofday(&ti_st,0);
ltable.insertTuple(deltuple);
gettimeofday(&ti_end,0);
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
free(deltuple.key);
del_list.push_back(del_index);
}
}
else if(rval < delete_freq + update_freq) //update a record
{
int up_index = i - (rand()%50); //update one of the last inserted 50 elements
if(up_index >= 0 && std::find(del_list.begin(), del_list.end(), up_index) == del_list.end())
{//only update non-deleted elements
upcount++;
datatuple uptuple;
keylen = (*key_arr)[up_index].length()+1;
uptuple.keylen = &keylen;
uptuple.key = (datatuple::key_t) malloc(keylen);
memcpy((byte*)uptuple.key, (*key_arr)[up_index].c_str(), keylen);
getnextdata(ditem, 512);
datalen = ditem.length()+1;
uptuple.datalen = &datalen;
uptuple.data = (datatuple::data_t) malloc(datalen);
memcpy((byte*)uptuple.data, ditem.c_str(), datalen);
gettimeofday(&ti_st,0);
ltable.insertTuple(uptuple);
gettimeofday(&ti_end,0);
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
free(uptuple.key);
free(uptuple.data);
}
}
}
gettimeofday(&stop_tv,0);
printf("insert time: %6.1f\n", insert_time);
printf("insert time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
printf("#deletions: %d\n#updates: %d\n", delcount, upcount);
printf("\nTREE STRUCTURE\n");
//ltable.get_tree_c1()->print_tree(xid);
printf("datasize: %lld\n", datasize);
//sleep(20);
Tcommit(xid);
xid = Tbegin();
printf("Stage 2: Looking up %d keys:\n", NUM_ENTRIES);
int found_tuples=0;
for(int i=NUM_ENTRIES-1; i>=0; i--)
{
int ri = i;
//printf("key index%d\n", i);
fflush(stdout);
//get the key
uint32_t keylen = (*key_arr)[ri].length()+1;
datatuple::key_t rkey = (datatuple::key_t) malloc(keylen);
memcpy((byte*)rkey, (*key_arr)[ri].c_str(), keylen);
//for(int j=0; j<keylen-1; j++)
//rkey[j] = (*key_arr)[ri][j];
//rkey[keylen-1]='\0';
//find the key with the given tuple
datatuple *dt = ltable.findTuple(xid, rkey, keylen);
if(std::find(del_list.begin(), del_list.end(), i) == del_list.end())
{
assert(dt!=0);
assert(!dt->isDelete());
found_tuples++;
assert(*(dt->keylen) == (*key_arr)[ri].length()+1);
//assert(*(dt->datalen) == (*data_arr)[ri].length()+1);
free(dt->keylen);
free(dt);
}
else
{
if(dt!=0)
{
assert(*(dt->keylen) == (*key_arr)[ri].length()+1);
assert(dt->isDelete());
free(dt->keylen);
free(dt);
}
}
dt = 0;
free(rkey);
}
printf("found %d\n", found_tuples);
key_arr->clear();
//data_arr->clear();
delete key_arr;
//delete data_arr;
mscheduler.shutdown();
printf("merge threads finished.\n");
gettimeofday(&stop_tv,0);
printf("run time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
Tcommit(xid);
Tdeinit();
}
/** @test
*/
int main()
{
//insertProbeIter(25000);
insertProbeIter(400000);
/*
insertProbeIter(5000);
insertProbeIter(2500);
insertProbeIter(1000);
insertProbeIter(500);
insertProbeIter(1000);
insertProbeIter(100);
insertProbeIter(10);
*/
return 0;
}

214
check_rbtree.cpp Normal file
View file

@ -0,0 +1,214 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include "datapage.cpp"
#include "logiterators.cpp"
#include "merger.h"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#undef begin
#undef end
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
//must be given a sorted array
void removeduplicates(std::vector<std::string> &arr)
{
for(int i=arr.size()-1; i>0; i--)
{
if(! (mycmp(arr[i], arr[i-1]) || mycmp(arr[i-1], arr[i])))
arr.erase(arr.begin()+i);
}
}
void preprandstr(int count, std::vector<std::string> &arr, int avg_len=50, bool duplicates_allowed=false)
{
for ( int j=0; j<count; j++)
{
int str_len = (rand()%(avg_len*2)) + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
//make sure there is no duplicate key
if(!duplicates_allowed)
{
bool dup = false;
for(int i=0; i<j; i++)
if(! (mycmp(arr[i], str) || mycmp(str, arr[i])))
{
dup=true;
break;
}
if(dup)
{
j--;
continue;
}
}
//printf("keylen-%d\t%d\t%s\n", str_len, str.length(),rc);
free(rc);
arr.push_back(str);
}
}
void insertProbeIter(int NUM_ENTRIES)
{
//data generation
std::vector<std::string> data_arr;
std::vector<std::string> key_arr;
preprandstr(NUM_ENTRIES, data_arr, 10*8192, true);
preprandstr(NUM_ENTRIES+200, key_arr, 100, true);
std::sort(key_arr.begin(), key_arr.end(), &mycmp);
removeduplicates(key_arr);
if(key_arr.size() > NUM_ENTRIES)
key_arr.erase(key_arr.begin()+NUM_ENTRIES, key_arr.end());
NUM_ENTRIES=key_arr.size();
if(data_arr.size() > NUM_ENTRIES)
data_arr.erase(data_arr.begin()+NUM_ENTRIES, data_arr.end());
std::set<datatuple, datatuple> rbtree;
int64_t datasize = 0;
std::vector<pageid_t> dsp;
for(int i = 0; i < NUM_ENTRIES; i++)
{
//prepare the key
datatuple newtuple;
uint32_t keylen = key_arr[i].length()+1;
newtuple.keylen = (uint32_t*)malloc(sizeof(uint32_t));
*newtuple.keylen = keylen;
newtuple.key = (datatuple::key_t) malloc(keylen);
for(int j=0; j<keylen-1; j++)
newtuple.key[j] = key_arr[i][j];
newtuple.key[keylen-1]='\0';
//prepare the data
uint32_t datalen = data_arr[i].length()+1;
newtuple.datalen = (uint32_t*)malloc(sizeof(uint32_t));
*newtuple.datalen = datalen;
newtuple.data = (datatuple::data_t) malloc(datalen);
for(int j=0; j<datalen-1; j++)
newtuple.data[j] = data_arr[i][j];
newtuple.data[datalen-1]='\0';
/*
printf("key: \t, keylen: %u\ndata: datalen: %u\n",
//newtuple.key,
*newtuple.keylen,
//newtuple.data,
*newtuple.datalen);
*/
datasize += newtuple.byte_length();
rbtree.insert(newtuple);
}
printf("\nTREE STRUCTURE\n");
//ltable.get_tree_c1()->print_tree(xid);
printf("datasize: %d\n", datasize);
printf("Stage 2: Looking up %d keys:\n", NUM_ENTRIES);
int found_tuples=0;
for(int i=NUM_ENTRIES-1; i>=0; i--)
{
int ri = i;
//get the key
uint32_t keylen = key_arr[ri].length()+1;
datatuple::key_t rkey = (datatuple::key_t) malloc(keylen);
for(int j=0; j<keylen-1; j++)
rkey[j] = key_arr[ri][j];
rkey[keylen-1]='\0';
//find the key with the given tuple
//prepare a search tuple
datatuple search_tuple;
search_tuple.keylen = (uint32_t*)malloc(sizeof(uint32_t));
*(search_tuple.keylen) = keylen;
search_tuple.key = rkey;
datatuple *ret_tuple=0;
//step 1: look in tree_c0
rbtree_t::iterator rbitr = rbtree.find(search_tuple);
if(rbitr != rbtree.end())
{
datatuple tuple = *rbitr;
byte *barr = tuple.to_bytes();
ret_tuple = datatuple::from_bytes(barr);
found_tuples++;
assert(*(ret_tuple->keylen) == key_arr[ri].length()+1);
assert(*(ret_tuple->datalen) == data_arr[ri].length()+1);
free(barr);
free(ret_tuple);
}
else
{
printf("Not in scratch_tree\n");
}
free(search_tuple.keylen);
free(rkey);
}
printf("found %d\n", found_tuples);
}
/** @test
*/
int main()
{
insertProbeIter(250);
return 0;
}

107
check_server.cpp Normal file
View file

@ -0,0 +1,107 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include "datapage.cpp"
#include "logiterators.cpp"
#include "merger.h"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#include <csignal>
#undef begin
#undef end
logserver *lserver=0;
merge_scheduler *mscheduler=0;
void terminate (int param)
{
printf ("Stopping server...\n");
lserver->stopserver();
delete lserver;
printf("Stopping merge threads...\n");
mscheduler->shutdown();
delete mscheduler;
printf("Deinitializing stasis...\n");
fflush(stdout);
Tdeinit();
exit(0);
}
void insertProbeIter(int NUM_ENTRIES)
{
//signal handling
void (*prev_fn)(int);
prev_fn = signal (SIGINT,terminate);
//if (prev_fn==SIG_IGN)
//signal (SIGTERM,SIG_IGN);
sync();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
mscheduler = new merge_scheduler;
logtable ltable;
int pcount = 40;
ltable.set_fixed_page_count(pcount);
recordid table_root = ltable.allocTable(xid);
Tcommit(xid);
int lindex = mscheduler->addlogtable(&ltable);
ltable.setMergeData(mscheduler->getMergeData(lindex));
mscheduler->startlogtable(lindex);
lserver = new logserver(10, 32432);
lserver->startserver(&ltable);
// Tdeinit();
}
/** @test
*/
int main()
{
//insertProbeIter(25000);
insertProbeIter(10000);
/*
insertProbeIter(5000);
insertProbeIter(2500);
insertProbeIter(1000);
insertProbeIter(500);
insertProbeIter(1000);
insertProbeIter(100);
insertProbeIter(10);
*/
return 0;
}

415
check_tcpclient.cpp Normal file
View file

@ -0,0 +1,415 @@
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include "logstore.h"
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#undef begin
#undef end
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
//must be given a sorted array
void removeduplicates(std::vector<std::string> *arr)
{
for(int i=arr->size()-1; i>0; i--)
{
if(! (mycmp((*arr)[i], (*arr)[i-1]) || mycmp((*arr)[i-1], (*arr)[i])))
arr->erase(arr->begin()+i);
}
}
void getnextdata(std::string &data, int avg_len)
{
int str_len = (rand()%(avg_len*2)) + 3;
data = std::string(str_len, rand()%10+48);
/*
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
data = std::string(rc);
free(rc);
*/
}
void preprandstr(int count, std::vector<std::string> *arr, int avg_len=50)
{
for ( int j=0; j<count; j++)
{
int str_len = (rand()%(avg_len*2)) + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
free(rc);
arr->push_back(str);
}
}
inline void readfromsocket(int sockd, byte *buf, int count)
{
int n = 0;
while( n < count )
{
n += read( sockd, buf + n, count - n);
}
}
inline void writetosocket(int sockd, byte *buf, int count)
{
int n = 0;
while( n < count )
{
n += write( sockd, buf + n, count - n);
}
}
datatuple * sendTuple(std::string & servername, int serverport, uint8_t opcode, datatuple &tuple)
{
struct sockaddr_in serveraddr;
struct hostent *server;
int sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0)
{
printf("ERROR opening socket.\n");
return 0;
}
server = gethostbyname(servername.c_str());
if (server == NULL) {
fprintf(stderr,"ERROR, no such host as %s\n", servername.c_str());
exit(0);
}
/* build the server's Internet address */
bzero((char *) &serveraddr, sizeof(serveraddr));
serveraddr.sin_family = AF_INET;
bcopy((char *)server->h_addr,
(char *)&serveraddr.sin_addr.s_addr, server->h_length);
serveraddr.sin_port = htons(serverport);
/* connect: create a connection with the server */
if (connect(sockfd, (sockaddr*) &serveraddr, sizeof(serveraddr)) < 0)
{
printf("ERROR connecting\n");
return 0;
}
//send the opcode
int n = write(sockfd, (byte*) &opcode, sizeof(uint8_t));
assert(n == sizeof(uint8_t));
//send the tuple
n = write(sockfd, (byte*) tuple.keylen, sizeof(uint32_t));
assert( n == sizeof(uint32_t));
n = write(sockfd, (byte*) tuple.datalen, sizeof(uint32_t));
assert( n == sizeof(uint32_t));
writetosocket(sockfd, (byte*) tuple.key, *tuple.keylen);
if(!tuple.isDelete() && *tuple.datalen != 0)
writetosocket(sockfd, (byte*) tuple.data, *tuple.datalen);
//read the reply code
uint8_t rcode;
n = read(sockfd, (byte*) &rcode, sizeof(uint8_t));
if(rcode == logserver::OP_SENDING_TUPLE)
{
datatuple *rcvdtuple = (datatuple*)malloc(sizeof(datatuple));
//read the keylen
rcvdtuple->keylen = (uint32_t*) malloc(sizeof(uint32_t));
n = read(sockfd, (byte*) rcvdtuple->keylen, sizeof(uint32_t));
assert(n == sizeof(uint32_t));
//read the datalen
rcvdtuple->datalen = (uint32_t*) malloc(sizeof(uint32_t));
n = read(sockfd, (byte*) rcvdtuple->datalen, sizeof(uint32_t));
assert(n == sizeof(uint32_t));
//read key
rcvdtuple->key = (byte*) malloc(*rcvdtuple->keylen);
readfromsocket(sockfd, (byte*) rcvdtuple->key, *rcvdtuple->keylen);
if(!rcvdtuple->isDelete())
{
//read key
rcvdtuple->data = (byte*) malloc(*rcvdtuple->datalen);
readfromsocket(sockfd, (byte*) rcvdtuple->data, *rcvdtuple->datalen);
}
close(sockfd);
return rcvdtuple;
}
else
assert(rcode == logserver::OP_SUCCESS);
close(sockfd);
return 0;
}
void insertProbeIter(int NUM_ENTRIES)
{
srand(1000);
std::string servername = "sherpa4";
int serverport = 32432;
double delete_freq = .05;
double update_freq = .15;
//data generation
typedef std::vector<std::string> key_v_t;
const static int max_partition_size = 100000;
int KEY_LEN = 100;
std::vector<key_v_t*> *key_v_list = new std::vector<key_v_t*>;
int list_size = NUM_ENTRIES / max_partition_size + 1;
for(int i =0; i<list_size; i++)
{
key_v_t * key_arr = new key_v_t;
if(NUM_ENTRIES < max_partition_size*(i+1))
preprandstr(NUM_ENTRIES-max_partition_size*i, key_arr, KEY_LEN);
else
preprandstr(max_partition_size, key_arr, KEY_LEN);
std::sort(key_arr->begin(), key_arr->end(), &mycmp);
key_v_list->push_back(key_arr);
printf("size partition %d is %d\n", i+1, key_arr->size());
}
key_v_t * key_arr = new key_v_t;
std::vector<key_v_t::iterator*> iters;
for(int i=0; i<list_size; i++)
{
iters.push_back(new key_v_t::iterator((*key_v_list)[i]->begin()));
}
int lc = 0;
while(true)
{
int list_index = -1;
for(int i=0; i<list_size; i++)
{
if(*iters[i] == (*key_v_list)[i]->end())
continue;
if(list_index == -1 || mycmp(**iters[i], **iters[list_index]))
list_index = i;
}
if(list_index == -1)
break;
if(key_arr->size() == 0 || mycmp(key_arr->back(), **iters[list_index]))
key_arr->push_back(**iters[list_index]);
(*iters[list_index])++;
lc++;
if(lc % max_partition_size == 0)
printf("%d/%d completed.\n", lc, NUM_ENTRIES);
}
for(int i=0; i<list_size; i++)
{
(*key_v_list)[i]->clear();
delete (*key_v_list)[i];
delete iters[i];
}
key_v_list->clear();
delete key_v_list;
// preprandstr(NUM_ENTRIES, data_arr, 10*8192);
printf("key arr size: %d\n", key_arr->size());
//removeduplicates(key_arr);
if(key_arr->size() > NUM_ENTRIES)
key_arr->erase(key_arr->begin()+NUM_ENTRIES, key_arr->end());
NUM_ENTRIES=key_arr->size();
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
struct timeval start_tv, stop_tv, ti_st, ti_end;
double insert_time = 0;
int dpages = 0;
int npages = 0;
int delcount = 0, upcount = 0;
int64_t datasize = 0;
std::vector<pageid_t> dsp;
std::vector<int> del_list;
gettimeofday(&start_tv,0);
for(int i = 0; i < NUM_ENTRIES; i++)
{
//prepare the key
datatuple newtuple;
uint32_t keylen = (*key_arr)[i].length()+1;
newtuple.keylen = &keylen;
newtuple.key = (datatuple::key_t) malloc(keylen);
memcpy((byte*)newtuple.key, (*key_arr)[i].c_str(), keylen);
//prepare the data
std::string ditem;
getnextdata(ditem, 8192);
uint32_t datalen = ditem.length()+1;
newtuple.datalen = &datalen;
newtuple.data = (datatuple::data_t) malloc(datalen);
memcpy((byte*)newtuple.data, ditem.c_str(), datalen);
/*
printf("key: \t, keylen: %u\ndata: datalen: %u\n",
//newtuple.key,
*newtuple.keylen,
//newtuple.data,
*newtuple.datalen);
*/
datasize += newtuple.byte_length();
gettimeofday(&ti_st,0);
//send the data
sendTuple(servername, serverport, logserver::OP_INSERT, newtuple);
gettimeofday(&ti_end,0);
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
free(newtuple.key);
free(newtuple.data);
if(i % 10000 == 0 && i > 0)
printf("%d / %d inserted.\n", i, NUM_ENTRIES);
}
gettimeofday(&stop_tv,0);
printf("insert time: %6.1f\n", insert_time);
printf("insert time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
printf("#deletions: %d\n#updates: %d\n", delcount, upcount);
printf("Stage 2: Looking up %d keys:\n", NUM_ENTRIES);
int found_tuples=0;
for(int i=NUM_ENTRIES-1; i>=0; i--)
{
int ri = i;
//printf("key index%d\n", i);
fflush(stdout);
//get the key
uint32_t keylen = (*key_arr)[ri].length()+1;
datatuple searchtuple;
searchtuple.keylen = (uint32_t*)malloc(2*sizeof(uint32_t) + keylen);
*searchtuple.keylen = keylen;
searchtuple.datalen = searchtuple.keylen + 1;
*searchtuple.datalen = 0;
searchtuple.key = (datatuple::key_t)(searchtuple.keylen + 2);
memcpy((byte*)searchtuple.key, (*key_arr)[ri].c_str(), keylen);
//find the key with the given tuple
datatuple *dt = sendTuple(servername, serverport, logserver::OP_FIND,
searchtuple);
assert(dt!=0);
assert(!dt->isDelete());
found_tuples++;
assert(*(dt->keylen) == (*key_arr)[ri].length()+1);
//free dt
free(dt->keylen);
free(dt->datalen);
free(dt->key);
free(dt->data);
free(dt);
dt = 0;
free(searchtuple.keylen);
}
printf("found %d\n", found_tuples);
key_arr->clear();
//data_arr->clear();
delete key_arr;
//delete data_arr;
gettimeofday(&stop_tv,0);
printf("run time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
}
/** @test
*/
int main()
{
//insertProbeIter(25000);
insertProbeIter(100000);
/*
insertProbeIter(5000);
insertProbeIter(2500);
insertProbeIter(1000);
insertProbeIter(500);
insertProbeIter(1000);
insertProbeIter(100);
insertProbeIter(10);
*/
return 0;
}

9
cmds.txt Normal file
View file

@ -0,0 +1,9 @@
dd if=/dev/zero of=storefile.txt bs=1M count=20000
/dhtRecOpsGenerator -d clientType=LogStoreClient host=sherpa4 numOps=10ls existingStartKey=100 existingEndKey=1000 insertRatio=1.0
dhtRecOpsGeneratorWrapper startClientID=1 endClientID=4 -d clientType=LogStoreClient host=sherpa4.corp.re1.yahoo.com numOps=5000000 existingStartKey=100 existingEndKey=10000000 insertRatio=1.0 readRatio=0 numClients=3

507
datapage.cpp Normal file
View file

@ -0,0 +1,507 @@
#include "logstore.h"
#include "datapage.h"
template <class TUPLE>
const int32_t DataPage<TUPLE>::HEADER_SIZE = sizeof(int32_t);
template <class TUPLE>
DataPage<TUPLE>::DataPage(int xid, pageid_t pid):
alloc_region(0),
alloc_state(0),
fix_pcount(-1)
{
assert(pid!=0);
pcount = readPageCount(xid, pid);
pidarr = (pageid_t *) malloc(sizeof(pageid_t) * pcount);
for(int i=0; i<pcount; i++)
pidarr[i] = i + pid;
byte_offset = HEADER_SIZE; //step over the header info
}
template <class TUPLE>
DataPage<TUPLE>::DataPage(int xid, int fix_pcount, pageid_t (*alloc_region)(int, void*), void * alloc_state)
{
assert(fix_pcount >= 1);
byte_offset = -1;
this->fix_pcount = fix_pcount;
if(alloc_region != 0)
this->alloc_region = alloc_region;
if(alloc_state != 0)
this->alloc_state = alloc_state;
initialize(xid);
}
template<class TUPLE>
DataPage<TUPLE>::~DataPage()
{
if(pidarr)
free(pidarr);
}
template<class TUPLE>
void DataPage<TUPLE>::initialize(int xid)
{
//initializes to an empty datapage
//alloc a new page
pageid_t pid = alloc_region(xid, alloc_state);
//load the first page
//Page *p = loadPage(xid, pid);
Page *p = loadPageOfType(xid, pid, SEGMENT_PAGE);
writelock(p->rwlatch,0);
//initialize header
//set number of pages to 1
int32_t * numpages_ptr = (int32_t*)stasis_page_byte_ptr_from_start(p, 0);
*numpages_ptr = 1;
//write 0 to first data size
int32_t * size_ptr = (int32_t*)stasis_page_byte_ptr_from_start(p, HEADER_SIZE);
*size_ptr = 0;
//set the page dirty
stasis_dirty_page_table_set_dirty((stasis_dirty_page_table_t*)stasis_runtime_dirty_page_table(), p);
//release the page
unlock(p->rwlatch);
releasePage(p);
//set the class variables
byte_offset = HEADER_SIZE;
pcount = 1;
pidarr = (pageid_t *) malloc(fix_pcount * sizeof(pageid_t));
pidarr[0] = pid;
}
template <class TUPLE>
inline bool DataPage<TUPLE>::append(int xid, TUPLE const & dat)
{
assert(byte_offset >= HEADER_SIZE);
assert(fix_pcount >= 1);
//check if there is enough space (for the data length + data)
int32_t blen = dat.byte_length() + sizeof(int32_t);
if(PAGE_SIZE * fix_pcount - byte_offset < blen)
{
//check if the record is too large
// and if so do we wanna accomodate here by going over the fix_pcount
if(PAGE_SIZE * fix_pcount - HEADER_SIZE < blen && //record is larger than datapage
PAGE_SIZE * fix_pcount - HEADER_SIZE > 2 * byte_offset)//accept if i am less than half full
{
//nothing
}
else
{
//printf("page has %d bytes left, we needed %d. (byte_offset %d)\n",
//PAGE_SIZE * fix_pcount - byte_offset, blen, byte_offset);
return false; //not enough mana, return
}
}
//write the length of the data
int32_t dsize = blen - sizeof(int32_t);
if(!writebytes(xid, sizeof(int32_t), (byte*)(&dsize)))
return false;
byte_offset += sizeof(int32_t);
//write the data
byte * barr = dat.to_bytes();
if(!writebytes(xid, dsize, barr)) //if write fails, undo the previous write
{
byte_offset -= sizeof(int32_t);
free(barr);
//write 0 for the next tuple size, if there is enough space in this page
if(PAGE_SIZE - (byte_offset % PAGE_SIZE) >= sizeof(int32_t))
{
dsize = 0;
writebytes(xid, sizeof(int32_t), (byte*)(&dsize));//this will succeed, since there is enough space on the page
}
return false;
}
free(barr);
byte_offset += dsize;
//write 0 for the next tuple size, if there is enough space in this page
if(PAGE_SIZE - (byte_offset % PAGE_SIZE) >= sizeof(int32_t))
{
dsize = 0;
writebytes(xid, sizeof(int32_t), (byte*)(&dsize));//this will succeed, since there is enough space on the page
}
return true;
}
template <class TUPLE>
bool DataPage<TUPLE>::writebytes(int xid, int count, byte *data)
{
int32_t bytes_copied = 0;
while(bytes_copied < count)
{
//load the page to copy into
int pindex = (byte_offset + bytes_copied) / PAGE_SIZE;
if(pindex == pcount) //then this page must be allocated
{
pageid_t newid = alloc_region(xid, alloc_state);
//check continuity
if(pidarr[pindex-1] != newid - 1)//so we started a new region and that is not right after the prev region in the file
{
return false;//we cant store this
}
//check whether we need to extend the pidarr, add fix_pcount many pageid_t slots
if(pindex >= fix_pcount && (pindex % fix_pcount==0))
{
pidarr = (pageid_t*)realloc(pidarr, (pindex + fix_pcount)*sizeof(pageid_t));
}
pidarr[pindex] = newid;
pcount++;
incrementPageCount(xid, pidarr[0]);
}
//Page *p = loadPage(xid, pidarr[pindex]);
Page *p = loadPageOfType(xid, pidarr[pindex], SEGMENT_PAGE);
writelock(p->rwlatch,0);
//copy the portion of bytes we can copy in this page
int32_t page_offset = (byte_offset+bytes_copied) % PAGE_SIZE;
int32_t copy_len = ( (PAGE_SIZE - page_offset < count - bytes_copied ) ? PAGE_SIZE - page_offset: count - bytes_copied);
byte * pb_ptr = stasis_page_byte_ptr_from_start(p, page_offset);
memcpy(pb_ptr, data+bytes_copied ,copy_len);
//release the page
stasis_dirty_page_table_set_dirty((stasis_dirty_page_table_t*)stasis_runtime_dirty_page_table(), p);
unlock(p->rwlatch);
releasePage(p);
//update the copied bytes_count
bytes_copied += copy_len;
}
assert(bytes_copied == count);
return true;
}
template <class TUPLE>
bool DataPage<TUPLE>::recordRead(int xid, typename TUPLE::key_t key, size_t keySize, TUPLE ** buf)
{
RecordIterator itr(this);
int match = -1;
while((*buf=itr.getnext(xid)) != 0)
{
match = TUPLE::compare((*buf)->get_key(), key);
if(match<0) //keep searching
{
free((*buf)->keylen);
free(*buf);
*buf=0;
}
else if(match==0) //found
{
return true;
}
else // match > 0, then does not exist
{
free((*buf)->keylen);
free(*buf);
*buf = 0;
break;
}
}
return false;
}
template <class TUPLE>
void DataPage<TUPLE>::readbytes(int xid, int32_t offset, int count, byte **data)
{
if(*data==NULL)
*data = (byte*)malloc(count);
int32_t bytes_copied = 0;
while(bytes_copied < count)
{
//load the page to copy from
int pindex = (offset + bytes_copied) / PAGE_SIZE;
//Page *p = loadPage(xid, pidarr[pindex]);
Page *p = loadPageOfType(xid, pidarr[pindex], SEGMENT_PAGE);
readlock(p->rwlatch,0);
//copy the portion of bytes we can copy from this page
int32_t page_offset = (offset+bytes_copied) % PAGE_SIZE;
int32_t copy_len = ( (PAGE_SIZE - page_offset < count - bytes_copied ) ? PAGE_SIZE - page_offset : count - bytes_copied);
byte * pb_ptr = stasis_page_byte_ptr_from_start(p, page_offset);
memcpy((*data)+bytes_copied, pb_ptr, copy_len);
//release the page
unlock(p->rwlatch);
releasePage(p);
//update the copied bytes_count
bytes_copied += copy_len;
}
assert(bytes_copied == count);
}
template <class TUPLE>
inline int DataPage<TUPLE>::readPageCount(int xid, pageid_t pid)
{
//Page *p = loadPage(xid, pid);
Page *p = loadPageOfType(xid, pid, SEGMENT_PAGE);
readlock(p->rwlatch,0);
int32_t numpages = *((int32_t*)stasis_page_byte_ptr_from_start(p, 0));
unlock(p->rwlatch);
releasePage(p);
return numpages;
}
template <class TUPLE>
inline void DataPage<TUPLE>::incrementPageCount(int xid, pageid_t pid, int add)
{
//Page *p = loadPage(xid, pid);
Page *p = loadPageOfType(xid, pid, SEGMENT_PAGE);
writelock(p->rwlatch,0);
int32_t *numpages_ptr = (int32_t*)stasis_page_byte_ptr_from_start(p, 0);
*numpages_ptr = *numpages_ptr + add;
stasis_dirty_page_table_set_dirty((stasis_dirty_page_table_t*)stasis_runtime_dirty_page_table(), p);
unlock(p->rwlatch);
releasePage(p);
}
template <class TUPLE>
inline uint16_t DataPage<TUPLE>::recordCount(int xid)
{
return 0;
}
template <class TUPLE>
pageid_t DataPage<TUPLE>::dp_alloc_region(int xid, void *conf)
{
RegionAllocConf_t* a = (RegionAllocConf_t*)conf;
if(a->nextPage == a->endOfRegion) {
if(a->regionList.size == -1) {
//DEBUG("nextPage: %lld\n", a->nextPage);
a->regionList = TarrayListAlloc(xid, 1, 4, sizeof(pageid_t));
DEBUG("regionList.page: %lld\n", a->regionList.page);
DEBUG("regionList.slot: %d\n", a->regionList.slot);
DEBUG("regionList.size: %lld\n", a->regionList.size);
a->regionCount = 0;
}
DEBUG("{%lld <- alloc region arraylist}\n", a->regionList.page);
TarrayListExtend(xid,a->regionList,1);
a->regionList.slot = a->regionCount;
DEBUG("region lst slot %d\n",a->regionList.slot);
a->regionCount++;
DEBUG("region count %lld\n",a->regionCount);
a->nextPage = TregionAlloc(xid, a->regionSize,12);
DEBUG("next page %lld\n",a->nextPage);
a->endOfRegion = a->nextPage + a->regionSize;
Tset(xid,a->regionList,&a->nextPage);
DEBUG("next page %lld\n",a->nextPage);
}
DEBUG("%lld ?= %lld\n", a->nextPage,a->endOfRegion);
pageid_t ret = a->nextPage;
// Ensure the page is in buffer cache without accessing disk (this
// sets it to clean and all zeros if the page is not in cache).
// Hopefully, future reads will get a cache hit, and avoid going to
// disk.
Page * p = loadUninitializedPage(xid, ret);
//writelock(p->rwlatch,0);
p->pageType = SEGMENT_PAGE;
//unlock(p->rwlatch);
releasePage(p);
DEBUG("ret %lld\n",ret);
(a->nextPage)++;
return ret;
}
template <class TUPLE>
pageid_t DataPage<TUPLE>::dp_alloc_region_rid(int xid, void * ridp) {
recordid rid = *(recordid*)ridp;
RegionAllocConf_t conf;
Tread(xid,rid,&conf);
pageid_t ret = dp_alloc_region(xid,&conf);
//DEBUG("{%lld <- alloc region extend}\n", conf.regionList.page);
// XXX get rid of Tset by storing next page in memory, and losing it
// on crash.
Tset(xid,rid,&conf);
return ret;
}
template <class TUPLE>
void DataPage<TUPLE>::dealloc_region_rid(int xid, void *conf)
{
RegionAllocConf_t a = *((RegionAllocConf_t*)conf);
DEBUG("{%lld <- dealloc region arraylist}\n", a.regionList.page);
for(int i = 0; i < a.regionCount; i++) {
a.regionList.slot = i;
pageid_t pid;
Tread(xid,a.regionList,&pid);
TregionDealloc(xid,pid);
}
}
template <class TUPLE>
void DataPage<TUPLE>::force_region_rid(int xid, void *conf)
{
recordid rid = *(recordid*)conf;
RegionAllocConf_t a;
Tread(xid,rid,&a);
for(int i = 0; i < a.regionCount; i++)
{
a.regionList.slot = i;
pageid_t pid;
Tread(xid,a.regionList,&pid);
stasis_dirty_page_table_flush_range((stasis_dirty_page_table_t*)stasis_runtime_dirty_page_table(), pid, pid+a.regionSize);
forcePageRange(pid, pid+a.regionSize);
}
}
///////////////////////////////////////////////////////////////
//RECORD ITERATOR
///////////////////////////////////////////////////////////////
template <class TUPLE>
TUPLE* DataPage<TUPLE>::RecordIterator::getnext(int xid)
{
int pindex = offset / PAGE_SIZE;
if(pindex == dp->pcount)//past end
return 0;
if(pindex == dp->pcount - 1 && (PAGE_SIZE - (offset % PAGE_SIZE) < sizeof(int32_t)))
return 0;
//Page *p = loadPage(xid, dp->pidarr[pindex]);
Page *p = loadPageOfType(xid, dp->pidarr[pindex], SEGMENT_PAGE);
readlock(p->rwlatch,0);
int32_t *dsize_ptr;
if(PAGE_SIZE - (offset % PAGE_SIZE) < sizeof(int32_t)) //int spread in two pages
{
dsize_ptr = 0;
dp->readbytes(xid, offset, sizeof(int32_t), (byte**)(&dsize_ptr));
}
else //int in a single page
dsize_ptr = (int32_t*)stasis_page_byte_ptr_from_start(p, offset % PAGE_SIZE);
offset += sizeof(int32_t);
if(*dsize_ptr == 0) //no more keys
{
unlock(p->rwlatch);
releasePage(p);
return 0;
}
byte* tb=0;
dp->readbytes(xid, offset, *dsize_ptr, &tb);
TUPLE *tup = TUPLE::from_bytes(tb);
offset += *dsize_ptr;
unlock(p->rwlatch);
releasePage(p);
return tup;
}
template <class TUPLE>
void DataPage<TUPLE>::RecordIterator::advance(int xid, int count)
{
int pindex = -1;
Page *p = 0;
for(int i=0; i<count; i++)
{
if(pindex != offset / PAGE_SIZE) //advance to new page if necessary
{
if(p!=NULL)
{
unlock(p->rwlatch);
releasePage(p);
}
pindex = offset / PAGE_SIZE;
if(pindex == dp->pcount)//past end
return;
//p = loadPage(xid, dp->pidarr[pindex]);
p = loadPageOfType(xid, dp->pidarr[pindex], SEGMENT_PAGE);
readlock(p->rwlatch,0);
}
if(pindex == dp->pcount - 1 && (PAGE_SIZE - (offset % PAGE_SIZE) < sizeof(int32_t)))
return;
int32_t *dsize_ptr=0;
if(PAGE_SIZE - (offset % PAGE_SIZE) < sizeof(int32_t)) //int spread in two pages
dp->readbytes(xid, offset, sizeof(int32_t), (byte**)(&dsize_ptr));
else //int in a single page
dsize_ptr = (int32_t*)stasis_page_byte_ptr_from_start(p, offset % PAGE_SIZE);
offset += sizeof(int32_t);
if(*dsize_ptr == 0) //no more keys
{
unlock(p->rwlatch);
releasePage(p);
return;
}
offset += *dsize_ptr;
}
}

110
datapage.h Normal file
View file

@ -0,0 +1,110 @@
#ifndef _SIMPLE_DATA_PAGE_H_
#define _SIMPLE_DATA_PAGE_H_
#include <limits.h>
#include <stasis/page.h>
#include <stasis/constants.h>
template<class TUPLE>
class DataPage
{
public:
class RecordIterator
{
public:
RecordIterator(DataPage *dp)
{
offset = HEADER_SIZE;
this->dp = dp;
}
RecordIterator(const RecordIterator &rhs)
{
this->offset = rhs.offset;
this->dp = rhs.dp;
}
void operator=(const RecordIterator &rhs)
{
this->offset = rhs.offset;
this->dp = rhs.dp;
}
//returns the next tuple and also advances the iterator
TUPLE *getnext(int xid);
//advance the iterator by count tuples, i.e. skip over count tuples
void advance(int xid, int count=1);
int32_t offset ;
DataPage *dp;
};
public:
//to be used when reading an existing data page from disk
DataPage( int xid, pageid_t pid );
//to be used to create new data pages
DataPage( int xid, int fix_pcount, pageid_t (*alloc_region)(int, void*), void * alloc_state);
~DataPage();
inline bool append(int xid, TUPLE const & dat);
bool recordRead(int xid, typename TUPLE::key_t key, size_t keySize, TUPLE ** buf);
inline uint16_t recordCount(int xid);
RecordIterator begin(){return RecordIterator(this);}
pageid_t get_start_pid(){return pidarr[0];}
int get_page_count(){return pcount;}
static pageid_t dp_alloc_region(int xid, void *conf);
static pageid_t dp_alloc_region_rid(int xid, void * ridp);
static void dealloc_region_rid(int xid, void* conf);
static void force_region_rid(int xid, void *conf);
public:
private:
void initialize(int xid);
//reads the page count information from the first page
int readPageCount(int xid, pageid_t pid);
void incrementPageCount(int xid, pageid_t pid, int add=1);
bool writebytes(int xid, int count, byte *data);
inline void readbytes(int xid, int32_t offset, int count, byte **data=0);
private:
int fix_pcount; //number of pages in a standard data page
int pcount;
pageid_t *pidarr;
int32_t byte_offset;//points to the next free byte
//page alloc function
pageid_t (*alloc_region)(int, void*);
void *alloc_state;
static const int32_t HEADER_SIZE;
};
#endif

147
datatuple.h Normal file
View file

@ -0,0 +1,147 @@
#ifndef _DATATUPLE_H_
#define _DATATUPLE_H_
typedef unsigned char uchar;
#include <string>
//#define byte unsigned char
typedef unsigned char byte;
#include <cstring>
//#include <stdio.h>
//#include <stdlib.h>
//#include <errno.h>
typedef struct datatuple
{
typedef uchar* key_t;
typedef uchar* data_t;
uint32_t *keylen; //key length should be size of string + 1 for \n
uint32_t *datalen;
key_t key;
data_t data;
//this is used by the stl set
bool operator() (const datatuple& lhs, const datatuple& rhs) const
{
//std::basic_string<uchar> s1(lhs.key);
//std::basic_string<uchar> s2(rhs.key);
return strcmp((char*)lhs.key,(char*)rhs.key) < 0;
//return (*((int32_t*)lhs.key)) <= (*((int32_t*)rhs.key));
}
/**
* return -1 if k1 < k2
* 0 if k1 == k2
* 1 of k1 > k2
**/
static int compare(const key_t k1,const key_t k2)
{
//for char* ending with \0
return strcmp((char*)k1,(char*)k2);
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
void setDelete()
{
*datalen = UINT_MAX;
}
inline bool isDelete() const
{
return *datalen == UINT_MAX;
}
static std::string key_to_str(const byte* k)
{
//for strings
return std::string((char*)k);
//for int
/*
std::ostringstream ostr;
ostr << *((int32_t*)k);
return ostr.str();
*/
}
//returns the length of the byte array representation
int32_t byte_length() const{
static const size_t isize = sizeof(uint32_t);
if(isDelete())
return isize + *keylen + isize;
else
return isize + *keylen + isize + (*datalen);
}
//format: key length _ data length _ key _ data
byte * to_bytes() const {
static const size_t isize = sizeof(uint32_t);
byte * ret;
if(!isDelete())
ret = (byte*) malloc(isize + *keylen + isize + *datalen);
else
ret = (byte*) malloc(isize + *keylen + isize);
memcpy(ret, (byte*)(keylen), isize);
memcpy(ret+isize, (byte*)(datalen), isize);
memcpy(ret+isize+isize, key, *keylen);
if(!isDelete())
memcpy(ret+isize+isize+*keylen, data, *datalen);
return ret;
}
//does not copy the data again
//just sets the pointers in the datatuple to
//right positions in the given arr
static datatuple* from_bytes(const byte * arr)
{
static const size_t isize = sizeof(uint32_t);
datatuple *dt = (datatuple*) malloc(sizeof(datatuple));
dt->keylen = (uint32_t*) arr;
dt->datalen = (uint32_t*) (arr+isize);
dt->key = (key_t) (arr+isize+isize);
if(!dt->isDelete())
dt->data = (data_t) (arr+isize+isize+ *(dt->keylen));
else
dt->data = 0;
return dt;
}
/*
static datatuple form_tuple(const byte * arr)
{
static const size_t isize = sizeof(uint32_t);
datatuple dt;
dt.keylen = (uint32_t*) arr;
dt.datalen = (uint32_t*) (arr+isize);
dt.key = (key_t) (arr+isize+isize);
if(!dt.isDelete())
dt.data = (data_t) (arr+isize+isize+ *(dt.keylen));
else
dt.data = 0;
return dt;
}
*/
byte * get_key() { return (byte*) key; }
byte * get_data() { return (byte*) data; }
//releases only the tuple
static void release(datatuple *dt)
{
free(dt);
}
} datatuple;
#endif

48
hello.cpp Normal file
View file

@ -0,0 +1,48 @@
#include <string>
#include <string.h>
#include <iostream>
#include<stasis/transactional.h>
typedef unsigned char uchar;
typedef struct datatuple
{
typedef byte* key_t;
typedef byte* data_t;
uint32_t keylen;
uint32_t datalen;
key_t key;
data_t data;
};
int main(int argc, char** argv) {
bool * m1 = new bool(false);
std::cout << *m1 << std::endl;
datatuple t;
std::cout << "size of datatuple:\t" << sizeof(datatuple) << std::endl;
t.key = (datatuple::key_t) malloc(10);
const char * str = "12345678";
strcpy((char*)t.key, (str));
t.keylen = strlen((char*)t.key);
t.data = (datatuple::data_t) malloc(10);
const char * str2 = "1234567";
strcpy((char*)t.data, (str2));
t.datalen = strlen((char*)t.data);
std::cout << "size of datatuple:\t" << sizeof(datatuple) << std::endl;
std::cout << "keylen:\t" << t.keylen <<
"\tdatalen:\t" << t.datalen <<
"\t" << t.key <<
"\t" << t.data <<
std::endl;
}

200
logiterators.cpp Normal file
View file

@ -0,0 +1,200 @@
#include "logstore.h"
//#include "datapage.cpp"
#include "logiterators.h"
//template <class MEMTREE, class TUPLE>
/*
template <>
const byte* toByteArray<std::set<datatuple,datatuple>, datatuple>(
memTreeIterator<std::set<datatuple,datatuple>, datatuple> * const t)
{
return (*(t->it_)).to_bytes();
}
*/
/////////////////////////////////////////////////////////////////////
// tree iterator implementation
/////////////////////////////////////////////////////////////////////
template <class TUPLE>
treeIterator<TUPLE>::treeIterator(recordid tree) :
tree_(tree),
lsmIterator_(logtreeIterator::open(-1,tree)),
curr_tuple(0)
{
init_helper();
}
template <class TUPLE>
treeIterator<TUPLE>::treeIterator(recordid tree, TUPLE& key) :
tree_(tree),
//scratch_(),
lsmIterator_(logtreeIterator::openAt(-1,tree,key.get_key()))//toByteArray())),
//slot_(0)
{
init_helper();
/*
treeIterator * end = this->end();
for(;*this != *end && **this < key; ++(*this))
{
DEBUG("treeIterator was not at the given TUPLE");
}
delete end;
*/
}
template <class TUPLE>
treeIterator<TUPLE>::~treeIterator()
{
if(lsmIterator_)
logtreeIterator::close(-1, lsmIterator_);
if(curr_tuple != NULL)
free(curr_tuple);
if(curr_page!=NULL)
{
delete curr_page;
curr_page = 0;
}
}
template <class TUPLE>
void treeIterator<TUPLE>::init_helper()
{
if(!lsmIterator_)
{
printf("treeIterator:\t__error__ init_helper():\tnull lsmIterator_");
curr_page = 0;
dp_itr = 0;
}
else
{
if(logtreeIterator::next(-1, lsmIterator_) == 0)
{
//printf("treeIterator:\t__error__ init_helper():\tlogtreeIteratr::next returned 0." );
curr_page = 0;
dp_itr = 0;
}
else
{
pageid_t * pid_tmp;
pageid_t ** hack = &pid_tmp;
logtreeIterator::value(-1,lsmIterator_,(byte**)hack);
curr_pageid = *pid_tmp;
curr_page = new DataPage<TUPLE>(-1, curr_pageid);
dp_itr = new DPITR_T(curr_page->begin());
}
}
}
template <class TUPLE>
TUPLE * treeIterator<TUPLE>::getnext()
{
assert(this->lsmIterator_);
if(dp_itr == 0)
return 0;
TUPLE* readTuple = dp_itr->getnext(-1);
if(!readTuple)
{
delete dp_itr;
dp_itr = 0;
delete curr_page;
curr_page = 0;
if(logtreeIterator::next(-1,lsmIterator_))
{
pageid_t *pid_tmp;
pageid_t **hack = &pid_tmp;
logtreeIterator::value(-1,lsmIterator_,(byte**)hack);
curr_pageid = *pid_tmp;
curr_page = new DataPage<TUPLE>(-1, curr_pageid);
dp_itr = new DPITR_T(curr_page->begin());
readTuple = dp_itr->getnext(-1);
assert(readTuple);
}
else
{
// TODO: what is this?
//past end of iterator! "end" should contain the pageid of the
// last leaf, and 1+ numslots on that page.
//abort();
}
}
return curr_tuple=readTuple;
}
/*
template <class TUPLE>
treeIterator<TUPLE>::treeIterator(treeIteratorHandle* tree, TUPLE& key) :
tree_(tree?tree->r_:NULLRID),
scratch_(),
lsmIterator_(logtreeIterator::openAt(-1,tree?tree->r_:NULLRID,key.get_key())),//toByteArray())),
slot_(0)
{
init_helper();
if(lsmIterator_) {
treeIterator * end = this->end();
for(;*this != *end && **this < key; ++(*this)) { }
delete end;
} else {
this->slot_ = 0;
this->pageid_ = 0;
}
}
template <class TUPLE>
treeIterator<TUPLE>::treeIterator(recordid tree, TUPLE &scratch) :
tree_(tree),
scratch_(scratch),
lsmIterator_(logtreeIterator::open(-1,tree)),
slot_(0)
{
init_helper();
}
template <class TUPLE>
treeIterator<TUPLE>::treeIterator(treeIteratorHandle* tree) :
tree_(tree?tree->r_:NULLRID),
scratch_(),
lsmIterator_(logtreeIterator::open(-1,tree?tree->r_:NULLRID)),
slot_(0)
{
init_helper();
}
template <class TUPLE>
treeIterator<TUPLE>::treeIterator(treeIterator& t) :
tree_(t.tree_),
scratch_(t.scratch_),
lsmIterator_(t.lsmIterator_?logtreeIterator::copy(-1,t.lsmIterator_):0),
slot_(t.slot_),
pageid_(t.pageid_),
p_((Page*)((t.p_)?loadPage(-1,t.p_->id):0))
//currentPage_((PAGELAYOUT*)((p_)?p_->impl:0))
{
if(p_)
readlock(p_->rwlatch,0);
}
*/

173
logiterators.h Normal file
View file

@ -0,0 +1,173 @@
#ifndef _LOG_ITERATORS_H_
#define _LOG_ITERATORS_H_
#include <assert.h>
#include <stasis/iterator.h>
#undef begin
#undef end
template <class MEMTREE, class TUPLE> class memTreeIterator;
template <class MEMTREE, class TUPLE>
const byte* toByteArray(memTreeIterator<MEMTREE,TUPLE> * const t);
template <class TUPLE>
class DataPage;
//////////////////////////////////////////////////////////////
// memTreeIterator
/////////////////////////////////////////////////////////////
template<class MEMTREE, class TUPLE>
class memTreeIterator{
private:
typedef typename MEMTREE::const_iterator MTITER;
public:
memTreeIterator( MEMTREE *s )
{
it_ = s->begin();
itend_ = s->end();
}
memTreeIterator( MTITER& it, MTITER& itend )
{
it_ = it;
itend_ = itend;
}
explicit memTreeIterator(memTreeIterator &i)
{
it_ = i.it_;
itend_ = i.itend_;
}
const TUPLE& operator* ()
{
return *it_;
}
void seekEnd()
{
it_ = itend_;
}
memTreeIterator * end()
{
return new memTreeIterator<MEMTREE,TUPLE>(itend_,itend_);
}
inline bool operator==(const memTreeIterator &o) const {
return it_ == o.it_;
}
inline bool operator!=(const memTreeIterator &o) const {
return !(*this == o);
}
inline void operator++() {
++it_;
}
inline void operator--() {
--it_;
}
inline int operator-(memTreeIterator &i) {
return it_ - i.it_;
}
inline void operator=(memTreeIterator const &i)
{
it_ = i.it_;
itend_ = i.itend_;
}
public:
typedef MEMTREE* handle;
private:
MTITER it_;
MTITER itend_;
friend const byte* toByteArray<MEMTREE,TUPLE>(memTreeIterator<MEMTREE,TUPLE> * const t);
};
template <class MEMTREE, class TUPLE>
const byte* toByteArray(memTreeIterator<MEMTREE,TUPLE> * const t)
{
return (*(t->it_)).to_bytes();//toByteArray();
}
/////////////////////////////////////////////////////////////////
/**
Scans through an LSM tree's leaf pages, each tuple in the tree, in
order. This iterator is designed for maximum forward scan
performance, and does not support all STL operations.
**/
template <class TUPLE>
class treeIterator
{
public:
// typedef recordid handle;
class treeIteratorHandle
{
public:
treeIteratorHandle() : r_(NULLRID) {}
treeIteratorHandle(const recordid r) : r_(r) {}
treeIteratorHandle * operator=(const recordid &r) {
r_ = r;
return this;
}
recordid r_;
};
typedef treeIteratorHandle* handle;
explicit treeIterator(recordid tree);
explicit treeIterator(recordid tree,TUPLE &key);
//explicit treeIterator(treeIteratorHandle* tree, TUPLE& key);
//explicit treeIterator(treeIteratorHandle* tree);
//explicit treeIterator(treeIterator& t);
~treeIterator();
TUPLE * getnext();
//void advance(int count=1);
private:
inline void init_helper();
explicit treeIterator() { abort(); }
void operator=(treeIterator & t) { abort(); }
int operator-(treeIterator & t) { abort(); }
private:
recordid tree_; //root of the tree
lladdIterator_t * lsmIterator_; //logtree iterator
pageid_t curr_pageid; //current page id
DataPage<TUPLE> *curr_page; //current page
typedef typename DataPage<TUPLE>::RecordIterator DPITR_T;
DPITR_T *dp_itr;
TUPLE *curr_tuple; //current tuple
};
#endif

649
logserver.cpp Normal file
View file

@ -0,0 +1,649 @@
#include "logserver.h"
#include "datatuple.h"
#include "logstore.h"
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <sys/select.h>
#include <errno.h>
#undef begin
#undef end
#undef try
//server codes
uint8_t logserver::OP_SUCCESS = 1;
uint8_t logserver::OP_FAIL = 2;
uint8_t logserver::OP_SENDING_TUPLE = 3;
//client codes
uint8_t logserver::OP_FIND = 4;
uint8_t logserver::OP_INSERT = 5;
uint8_t logserver::OP_DONE = 6;
uint8_t logserver::OP_INVALID = 32;
void *serverLoop(void *args);
void logserver::startserver(logtable *ltable)
{
sys_alive = true;
this->ltable = ltable;
selcond = new pthread_cond_t;
pthread_cond_init(selcond, 0);
//initialize threads
for(int i=0; i<nthreads; i++)
{
struct pthread_item *worker_th = new pthread_item;
th_list.push_back(worker_th);
worker_th->th_handle = new pthread_t;
struct pthread_data *worker_data = new pthread_data;
worker_th->data = worker_data;
worker_data->idleth_queue = &idleth_queue;
worker_data->ready_queue = &ready_queue;
worker_data->work_queue = &work_queue;
worker_data->qlock = qlock;
worker_data->selcond = selcond;
worker_data->th_cond = new pthread_cond_t;
pthread_cond_init(worker_data->th_cond,0);
worker_data->th_mut = new pthread_mutex_t;
pthread_mutex_init(worker_data->th_mut,0);
worker_data->workitem = new int;
*(worker_data->workitem) = -1;
//worker_data->table_lock = lsmlock;
worker_data->ltable = ltable;
worker_data->sys_alive = &sys_alive;
pthread_create(worker_th->th_handle, 0, thread_work_fn, worker_th);
idleth_queue.push(*worker_th);
}
//start server socket
sdata = new serverth_data;
sdata->server_socket = &serversocket;
sdata->server_port = server_port;
sdata->idleth_queue = &idleth_queue;
sdata->ready_queue = &ready_queue;
sdata->selcond = selcond;
sdata->qlock = qlock;
pthread_create(&server_thread, 0, serverLoop, sdata);
//start monitoring loop
eventLoop();
}
void logserver::stopserver()
{
//close the server socket
//stops receiving data on the server socket
shutdown(serversocket, 0);
//wait for all threads to be idle
while(idleth_queue.size() != nthreads)
sleep(1);
#ifdef STATS_ENABLED
printf("\n\nSTATISTICS\n");
std::map<std::string, int> num_reqsc;
std::map<std::string, double> work_timec;
#endif
//set the system running flag to false
sys_alive = false;
for(int i=0; i<nthreads; i++)
{
pthread_item *idle_th = th_list[i];
//wake up the thread
pthread_mutex_lock(idle_th->data->th_mut);
pthread_cond_signal(idle_th->data->th_cond);
pthread_mutex_unlock(idle_th->data->th_mut);
//wait for it to join
pthread_join(*(idle_th->th_handle), 0);
//free the thread variables
pthread_cond_destroy(idle_th->data->th_cond);
#ifdef STATS_ENABLED
if(i == 0)
{
tot_threadwork_time = 0;
num_reqs = 0;
}
tot_threadwork_time += idle_th->data->work_time;
num_reqs += idle_th->data->num_reqs;
printf("thread %d: work_time %.3f\t#calls %d\tavg req process time:\t%.3f\n",
i,
idle_th->data->work_time,
idle_th->data->num_reqs,
(( idle_th->data->num_reqs == 0 ) ? 0 : idle_th->data->work_time / idle_th->data->num_reqs)
);
for(std::map<std::string, int>::const_iterator itr = idle_th->data->num_reqsc.begin();
itr != idle_th->data->num_reqsc.end(); itr++)
{
std::string ckey = (*itr).first;
printf("\t%s\t%d\t%.3f\t%.3f\n", ckey.c_str(), (*itr).second, idle_th->data->work_timec[ckey],
idle_th->data->work_timec[ckey] / (*itr).second);
if(num_reqsc.find(ckey) == num_reqsc.end()){
num_reqsc[ckey] = 0;
work_timec[ckey] = 0;
}
num_reqsc[ckey] += (*itr).second;
work_timec[ckey] += idle_th->data->work_timec[ckey];
}
#endif
delete idle_th->data->th_cond;
delete idle_th->data->th_mut;
delete idle_th->data->workitem;
delete idle_th->data;
delete idle_th->th_handle;
}
th_list.clear();
#ifdef STATS_ENABLED
printf("\n\nAggregated Stats:\n");
for(std::map<std::string, int>::const_iterator itr = num_reqsc.begin();
itr != num_reqsc.end(); itr++)
{
std::string ckey = (*itr).first;
printf("\t%s\t%d\t%.3f\t%.3f\n", ckey.c_str(), (*itr).second, work_timec[ckey],
work_timec[ckey] / (*itr).second);
}
tot_time = (stop_tv.tv_sec - start_tv.tv_sec) * 1000 +
(stop_tv.tv_usec / 1000 - start_tv.tv_usec / 1000);
printf("\ntot time:\t%f\n",tot_time);
printf("tot work time:\t%f\n", tot_threadwork_time);
printf("load avg:\t%f\n", tot_threadwork_time / tot_time);
printf("tot num reqs\t%d\n", num_reqs);
if(num_reqs!= 0)
{
printf("tot work time / num reqs:\t%.3f\n", tot_threadwork_time / num_reqs);
printf("tot time / num reqs:\t%.3f\n", tot_time / num_reqs );
}
#endif
//close(serversocket);
return;
}
void logserver::eventLoop()
{
fd_set readfs;
std::vector<int> sel_list;
int maxfd;
struct timeval Timeout;
struct timespec ts;
while(true)
{
//clear readset
FD_ZERO(&readfs);
maxfd = -1;
ts.tv_nsec = 250000; //nanosec
ts.tv_sec = 0;
//Timeout.tv_usec = 250; /* microseconds */
//Timeout.tv_sec = 0; /* seconds */
//update select set
pthread_mutex_lock(qlock);
//while(ready_queue.size() == 0)
if(sel_list.size() == 0)
{
while(ready_queue.size() == 0)
pthread_cond_wait(selcond, qlock);
//pthread_cond_timedwait(selcond, qlock, &ts);
//printf("awoke\n");
}
//new connections + processed conns are in ready_queue
//add them to select list
while(ready_queue.size() > 0)
{
sel_list.push_back(ready_queue.front());
ready_queue.pop();
}
pthread_mutex_unlock(qlock);
//ready select set
for(std::vector<int>::const_iterator itr=sel_list.begin();
itr != sel_list.end(); itr++)
{
if(maxfd < *itr)
maxfd = *itr;
FD_SET(*itr, &readfs);
}
//select events
int sel_res = select(maxfd+1, &readfs, NULL, NULL, NULL);// &Timeout);
//printf("sel_res %d %d\n", sel_res, errno);
//fflush(stdout);
//job assignment to threads
//printf("sel_list size:\t%d ready_cnt\t%d\n", sel_list.size(), sel_res);
#ifdef STATS_ENABLED
if(num_selcalls == 0)
gettimeofday(&start_tv, 0);
num_selevents += sel_res;
num_selcalls++;
#endif
pthread_mutex_lock(qlock);
for(int i=0; i<sel_list.size(); i++ )
{
int currsock = sel_list[i];
if (FD_ISSET(currsock, &readfs))
{
//printf("sock %d ready\n", currsock);
// pthread_mutex_lock(qlock);
if(idleth_queue.size() > 0) //assign the job to an indle thread
{
pthread_item idle_th = idleth_queue.front();
idleth_queue.pop();
//wake up the thread to do work
pthread_mutex_lock(idle_th.data->th_mut);
//set the job of the idle thread
*(idle_th.data->workitem) = currsock;
pthread_cond_signal(idle_th.data->th_cond);
pthread_mutex_unlock(idle_th.data->th_mut);
//printf("%d:\tconn %d assigned.\n", i, currsock);
}
else
{
//insert the given element to the work queue
work_queue.push(currsock);
//printf("work queue size:\t%d\n", work_queue.size());
}
// pthread_mutex_unlock(qlock);
//remove from the sel_list
sel_list.erase(sel_list.begin()+i);
i--;
}
}
pthread_mutex_unlock(qlock);
#ifdef STATS_ENABLED
gettimeofday(&stop_tv, 0);
#endif
}
}
void *serverLoop(void *args)
{
serverth_data *sdata = (serverth_data*)args;
int sockfd; //socket descriptor
struct sockaddr_in serv_addr;
struct sockaddr_in cli_addr;
int newsockfd; //newly created
socklen_t clilen = sizeof(cli_addr);
//open a socket
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0)
{
printf("ERROR opening socket\n");
return 0;
}
bzero((char *) &serv_addr, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
serv_addr.sin_port = htons(sdata->server_port);
if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0)
{
printf("ERROR on binding.\n");
return 0;
}
//start listening on the server socket
//second arg is the max number of coonections waiting in queue
if(listen(sockfd,SOMAXCONN)==-1)
{
printf("ERROR on listen.\n");
return 0;
}
printf("LSM Server listenning...\n");
*(sdata->server_socket) = sockfd;
int flag, result;
while(true)
{
newsockfd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen);
if (newsockfd < 0)
{
printf("ERROR on accept.\n");
return 0; // we probably want to continue instead of return here (when not debugging)
}
flag = 1;
result = setsockopt(newsockfd, /* socket affected */
IPPROTO_TCP, /* set option at TCP level */
TCP_NODELAY, /* name of option */
(char *) &flag, /* the cast is historical
cruft */
sizeof(int)); /* length of option value */
if (result < 0)
{
printf("ERROR on setting socket option TCP_NODELAY.\n");
return 0;
}
char clientip[20];
inet_ntop(AF_INET, (void*) &(cli_addr.sin_addr), clientip, 20);
printf("Connection from:\t%s\n", clientip);
//printf("Number of idle threads %d\n", idleth_queue.size());
pthread_mutex_lock(sdata->qlock);
//insert the given element to the ready queue
sdata->ready_queue->push(newsockfd);
if(sdata->ready_queue->size() == 1) //signal the event loop
pthread_cond_signal(sdata->selcond);
pthread_mutex_unlock(sdata->qlock);
}
}
inline void readfromsocket(int sockd, byte *buf, int count)
{
int n = 0;
while( n < count )
{
n += read( sockd, buf + n, count - n);
}
}
inline void writetosocket(int sockd, byte *buf, int count)
{
int n = 0;
while( n < count )
{
n += write( sockd, buf + n, count - n);
}
}
void * thread_work_fn( void * args)
{
pthread_item * item = (pthread_item *) args;
pthread_mutex_lock(item->data->th_mut);
while(true)
{
while(*(item->data->workitem) == -1)
{
if(!*(item->data->sys_alive))
break;
pthread_cond_wait(item->data->th_cond, item->data->th_mut); //wait for job
}
#ifdef STATS_ENABLED
gettimeofday(& (item->data->start_tv), 0);
std::ostringstream ostr;
ostr << *(item->data->workitem) << "_";
#endif
if(!*(item->data->sys_alive))
{
//printf("thread quitted.\n");
break;
}
//step 1: read the opcode
uint8_t opcode;
ssize_t n = read(*(item->data->workitem), &opcode, sizeof(uint8_t));
assert( n == sizeof(uint8_t));
assert( opcode < logserver::OP_INVALID );
if( opcode == logserver::OP_DONE ) //close the conn on failure
{
pthread_mutex_lock(item->data->qlock);
printf("client done. conn closed. (%d, %d, %d, %d)\n",
n, errno, *(item->data->workitem), item->data->work_queue->size());
close(*(item->data->workitem));
if(item->data->work_queue->size() > 0)
{
int new_work = item->data->work_queue->front();
item->data->work_queue->pop();
//printf("work queue size:\t%d\n", item->data->work_queue->size());
*(item->data->workitem) = new_work;
}
else
{
//set work to -1
*(item->data->workitem) = -1;
//add self to idle queue
item->data->idleth_queue->push(*item);
}
pthread_mutex_unlock(item->data->qlock);
continue;
}
//step 2: read the tuple from client
datatuple tuple;
tuple.keylen = (uint32_t*)malloc(sizeof(uint32_t));
tuple.datalen = (uint32_t*)malloc(sizeof(uint32_t));
//read the key length
n = read(*(item->data->workitem), tuple.keylen, sizeof(uint32_t));
assert( n == sizeof(uint32_t));
//read the data length
n = read(*(item->data->workitem), tuple.datalen, sizeof(uint32_t));
assert( n == sizeof(uint32_t));
//read the key
tuple.key = (byte*) malloc(*tuple.keylen);
readfromsocket(*(item->data->workitem), (byte*) tuple.key, *tuple.keylen);
//read the data
if(!tuple.isDelete() && opcode != logserver::OP_FIND)
{
tuple.data = (byte*) malloc(*tuple.datalen);
readfromsocket(*(item->data->workitem), (byte*) tuple.data, *tuple.datalen);
}
else
tuple.data = 0;
//step 3: process the tuple
//pthread_mutex_lock(item->data->table_lock);
//readlock(item->data->table_lock,0);
if(opcode == logserver::OP_INSERT)
{
//insert/update/delete
item->data->ltable->insertTuple(tuple);
//unlock the lsmlock
//pthread_mutex_unlock(item->data->table_lock);
//unlock(item->data->table_lock);
//step 4: send response
uint8_t rcode = logserver::OP_SUCCESS;
n = write(*(item->data->workitem), &rcode, sizeof(uint8_t));
assert(n == sizeof(uint8_t));
}
else if(opcode == logserver::OP_FIND)
{
//find the tuple
datatuple *dt = item->data->ltable->findTuple(-1, tuple.key, *tuple.keylen);
//unlock the lsmlock
//pthread_mutex_unlock(item->data->table_lock);
//unlock(item->data->table_lock);
#ifdef STATS_ENABLED
if(dt == 0)
printf("key not found:\t%s\n", datatuple::key_to_str(tuple.key).c_str());
else if( *dt->datalen != 1024)
printf("data len for\t%s:\t%d\n", datatuple::key_to_str(tuple.key).c_str(),
*dt->datalen);
if(datatuple::compare(tuple.key, dt->key) != 0)
printf("key not equal:\t%s\t%s\n", datatuple::key_to_str(tuple.key).c_str(),
datatuple::key_to_str(dt->key).c_str());
#endif
if(dt == 0) //tuple deleted
{
dt = (datatuple*) malloc(sizeof(datatuple));
dt->keylen = (uint32_t*) malloc(2*sizeof(uint32_t) + *tuple.keylen);
*dt->keylen = *tuple.keylen;
dt->datalen = dt->keylen + 1;
dt->key = (datatuple::key_t) (dt->datalen+1);
memcpy((byte*) dt->key, (byte*) tuple.key, *tuple.keylen);
dt->setDelete();
}
//send the reply code
uint8_t rcode = logserver::OP_SENDING_TUPLE;
n = write(*(item->data->workitem), &rcode, sizeof(uint8_t));
assert(n == sizeof(uint8_t));
//send the tuple
writetosocket(*(item->data->workitem), (byte*) dt->keylen, dt->byte_length());
//free datatuple
free(dt->keylen);
free(dt);
}
//close the socket
//close(*(item->data->workitem));
//free the tuple
free(tuple.keylen);
free(tuple.datalen);
free(tuple.key);
free(tuple.data);
//printf("socket %d: work completed.", *(item->data->workitem));
pthread_mutex_lock(item->data->qlock);
//add conn desc to ready queue
item->data->ready_queue->push(*(item->data->workitem));
//printf("ready queue size: %d sock(%d)\n", item->data->ready_queue->size(), *(item->data->workitem));
if(item->data->ready_queue->size() == 1) //signal the event loop
pthread_cond_signal(item->data->selcond);
//printf("work complete, added to ready queue %d (size %d)\n", *(item->data->workitem),
// item->data->ready_queue->size());
if(item->data->work_queue->size() > 0)
{
int new_work = item->data->work_queue->front();
item->data->work_queue->pop();
//printf("work queue size:\t%d\n", item->data->work_queue->size());
*(item->data->workitem) = new_work;
}
else
{
//set work to -1
*(item->data->workitem) = -1;
//add self to idle queue
item->data->idleth_queue->push(*item);
}
pthread_mutex_unlock(item->data->qlock);
#ifdef STATS_ENABLED
if( item->data->num_reqs == 0 )
item->data->work_time = 0;
gettimeofday(& (item->data->stop_tv), 0);
(item->data->num_reqs)++;
//item->data->work_time += tv_to_double(item->data->stop_tv) - tv_to_double(item->data->start_tv);
item->data->work_time += (item->data->stop_tv.tv_sec - item->data->start_tv.tv_sec) * 1000 +
(item->data->stop_tv.tv_usec / 1000 - item->data->start_tv.tv_usec / 1000);
int iopcode = opcode;
ostr << iopcode;
std::string clientkey = ostr.str();
if(item->data->num_reqsc.find(clientkey) == item->data->num_reqsc.end())
{
item->data->num_reqsc[clientkey]=0;
item->data->work_timec[clientkey]=0;
}
item->data->num_reqsc[clientkey]++;
item->data->work_timec[clientkey] += (item->data->stop_tv.tv_sec - item->data->start_tv.tv_sec) * 1000 +
(item->data->stop_tv.tv_usec / 1000 - item->data->start_tv.tv_usec / 1000);;
#endif
}
pthread_mutex_unlock(item->data->th_mut);
}

197
logserver.h Normal file
View file

@ -0,0 +1,197 @@
#ifndef _LOGSERVER_H_
#define _LOGSERVER_H_
#include <queue>
#include <vector>
//#include "logstore.h"
#include "datatuple.h"
#include <stasis/transactional.h>
#include <pthread.h>
#undef begin
#undef try
#undef end
#define STATS_ENABLED 1
#ifdef STATS_ENABLED
#include <sys/time.h>
#include <time.h>
#include <map>
#endif
class logtable;
struct pthread_item;
struct pthread_data {
std::queue<pthread_item> *idleth_queue;
std::queue<int> *ready_queue;
std::queue<int> *work_queue;
pthread_mutex_t * qlock;
pthread_cond_t *selcond;
pthread_cond_t * th_cond;
pthread_mutex_t * th_mut;
int *workitem; //id of the socket to work
//pthread_mutex_t * table_lock;
//rwl *table_lock;
logtable *ltable;
bool *sys_alive;
#ifdef STATS_ENABLED
int num_reqs;
struct timeval start_tv, stop_tv;
double work_time;
std::map<std::string, int> num_reqsc;
std::map<std::string, double> work_timec;
#endif
};
struct pthread_item{
pthread_t * th_handle;
pthread_data *data;
};
//struct work_item
//{
// int sockd; //socket id
// datatuple in_tuple; //request
// datatuple out_tuple; //response
//};
struct serverth_data
{
int *server_socket;
int server_port;
std::queue<pthread_item> *idleth_queue;
std::queue<int> *ready_queue;
pthread_cond_t *selcond;
pthread_mutex_t *qlock;
};
void * thread_work_fn( void *);
class logserver
{
public:
//server codes
static uint8_t OP_SUCCESS;
static uint8_t OP_FAIL;
static uint8_t OP_SENDING_TUPLE;
//client codes
static uint8_t OP_FIND;
static uint8_t OP_INSERT;
static uint8_t OP_DONE;
static uint8_t OP_INVALID;
public:
logserver(int nthreads, int server_port){
this->nthreads = nthreads;
this->server_port = server_port;
//lsmlock = new pthread_mutex_t;
//pthread_mutex_init(lsmlock,0);
//lsmlock = initlock();
qlock = new pthread_mutex_t;
pthread_mutex_init(qlock,0);
ltable = 0;
#ifdef STATS_ENABLED
num_selevents = 0;
num_selcalls = 0;
#endif
}
~logserver()
{
//delete lsmlock;
//deletelock(lsmlock);
delete qlock;
}
void startserver(logtable *ltable);
void stopserver();
public:
private:
//main loop of server
//accept connections, assign jobs to threads
//void dispatchLoop();
void eventLoop();
private:
int server_port;
int nthreads;
bool sys_alive;
int serversocket; //server socket file descriptor
//ccqueue<int> conn_queue; //list of active connections (socket list)
//ccqueue<pthread_item> idleth_queue; //list of idle threads
std::queue<int> ready_queue; //connections to go inside select
std::queue<int> work_queue; //connections to be processed by worker threads
std::queue<pthread_item> idleth_queue;
pthread_mutex_t *qlock;
pthread_t server_thread;
serverth_data *sdata;
pthread_cond_t *selcond; //server loop cond
std::vector<pthread_item *> th_list; // list of threads
//rwl *lsmlock; //lock for using lsm table
logtable *ltable;
#ifdef STATS_ENABLED
int num_reqs;
int num_selevents;
int num_selcalls;
struct timeval start_tv, stop_tv;
double tot_threadwork_time;
double tot_time;
#endif
};
#endif

519
logserver_pers.cpp Normal file
View file

@ -0,0 +1,519 @@
#include "logserver.h"
#include "datatuple.h"
#include "logstore.h"
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <sys/select.h>
#include <errno.h>
#undef begin
#undef end
#undef try
//server codes
uint8_t logserver::OP_SUCCESS = 1;
uint8_t logserver::OP_FAIL = 2;
uint8_t logserver::OP_SENDING_TUPLE = 3;
//client codes
uint8_t logserver::OP_FIND = 4;
uint8_t logserver::OP_INSERT = 5;
uint8_t logserver::OP_DONE = 6;
uint8_t logserver::OP_INVALID = 32;
void *serverLoop(void *args);
void logserver::startserver(logtable *ltable)
{
sys_alive = true;
this->ltable = ltable;
selcond = new pthread_cond_t;
pthread_cond_init(selcond, 0);
//initialize threads
for(int i=0; i<nthreads; i++)
{
struct pthread_item *worker_th = new pthread_item;
th_list.push_back(worker_th);
worker_th->th_handle = new pthread_t;
struct pthread_data *worker_data = new pthread_data;
worker_th->data = worker_data;
worker_data->idleth_queue = &idleth_queue;
worker_data->ready_queue = &ready_queue;
worker_data->work_queue = &work_queue;
worker_data->qlock = qlock;
worker_data->selcond = selcond;
worker_data->th_cond = new pthread_cond_t;
pthread_cond_init(worker_data->th_cond,0);
worker_data->th_mut = new pthread_mutex_t;
pthread_mutex_init(worker_data->th_mut,0);
worker_data->workitem = new int;
*(worker_data->workitem) = -1;
worker_data->table_lock = lsmlock;
worker_data->ltable = ltable;
worker_data->sys_alive = &sys_alive;
pthread_create(worker_th->th_handle, 0, thread_work_fn, worker_th);
idleth_queue.push(*worker_th);
}
//start server socket
sdata = new serverth_data;
sdata->server_socket = &serversocket;
sdata->server_port = server_port;
sdata->idleth_queue = &idleth_queue;
sdata->ready_queue = &ready_queue;
sdata->selcond = selcond;
sdata->qlock = qlock;
pthread_create(&server_thread, 0, serverLoop, sdata);
//start monitoring loop
eventLoop();
}
void logserver::stopserver()
{
//close the server socket
//stops receiving data on the server socket
shutdown(serversocket, 0);
//wait for all threads to be idle
while(idleth_queue.size() != nthreads)
sleep(1);
//set the system running flag to false
sys_alive = false;
for(int i=0; i<nthreads; i++)
{
pthread_item *idle_th = th_list[i];
//wake up the thread
pthread_mutex_lock(idle_th->data->th_mut);
pthread_cond_signal(idle_th->data->th_cond);
pthread_mutex_unlock(idle_th->data->th_mut);
//wait for it to join
pthread_join(*(idle_th->th_handle), 0);
//free the thread variables
pthread_cond_destroy(idle_th->data->th_cond);
delete idle_th->data->th_cond;
delete idle_th->data->th_mut;
delete idle_th->data->workitem;
delete idle_th->data;
delete idle_th->th_handle;
}
th_list.clear();
//close(serversocket);
return;
}
void logserver::eventLoop()
{
fd_set readfs;
std::vector<int> sel_list;
int maxfd;
struct timeval Timeout;
struct timespec ts;
while(true)
{
//clear readset
FD_ZERO(&readfs);
maxfd = -1;
ts.tv_nsec = 250000; //nanosec
ts.tv_sec = 0;
//Timeout.tv_usec = 250; /* microseconds */
//Timeout.tv_sec = 0; /* seconds */
//update select set
pthread_mutex_lock(qlock);
while(ready_queue.size() == 0)
{
pthread_cond_wait(selcond, qlock);
//pthread_cond_timedwait(selcond, qlock, &ts);
//printf("awoke\n");
}
//new connections + processed conns are in ready_queue
//add them to select list
while(ready_queue.size() > 0)
{
sel_list.push_back(ready_queue.front());
ready_queue.pop();
}
pthread_mutex_unlock(qlock);
//ready select set
for(std::vector<int>::const_iterator itr=sel_list.begin();
itr != sel_list.end(); itr++)
{
if(maxfd < *itr)
maxfd = *itr;
FD_SET(*itr, &readfs);
}
//select events
int sel_res = select(maxfd+1, &readfs, NULL, NULL, NULL);// &Timeout);
//printf("sel_res %d %d\n", sel_res, errno);
//fflush(stdout);
//job assignment to threads
for(int i=0; i<sel_list.size(); i++ )
{
int currsock = sel_list[i];
if (FD_ISSET(currsock, &readfs))
{
//printf("sock %d ready\n", currsock);
pthread_mutex_lock(qlock);
if(idleth_queue.size() > 0) //assign the job to an indle thread
{
pthread_item idle_th = idleth_queue.front();
idleth_queue.pop();
//wake up the thread to do work
pthread_mutex_lock(idle_th.data->th_mut);
//set the job of the idle thread
*(idle_th.data->workitem) = currsock;
pthread_cond_signal(idle_th.data->th_cond);
pthread_mutex_unlock(idle_th.data->th_mut);
}
else
{
//insert the given element to the work queue
work_queue.push(currsock);
printf("work queue size:\t%d\n", work_queue.size());
}
//remove from the sel_list
sel_list.erase(sel_list.begin()+i);
i--;
pthread_mutex_unlock(qlock);
}
}
}
}
void *serverLoop(void *args)
{
serverth_data *sdata = (serverth_data*)args;
int sockfd; //socket descriptor
struct sockaddr_in serv_addr;
struct sockaddr_in cli_addr;
int newsockfd; //newly created
socklen_t clilen = sizeof(cli_addr);
//open a socket
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0)
{
printf("ERROR opening socket\n");
return 0;
}
bzero((char *) &serv_addr, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
serv_addr.sin_port = htons(sdata->server_port);
if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0)
{
printf("ERROR on binding.\n");
return 0;
}
//start listening on the server socket
//second arg is the max number of coonections waiting in queue
if(listen(sockfd,SOMAXCONN)==-1)
{
printf("ERROR on listen.\n");
return 0;
}
printf("LSM Server listenning...\n");
*(sdata->server_socket) = sockfd;
int flag, result;
while(true)
{
newsockfd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen);
if (newsockfd < 0)
{
printf("ERROR on accept.\n");
return 0; // we probably want to continue instead of return here (when not debugging)
}
flag = 1;
result = setsockopt(newsockfd, /* socket affected */
IPPROTO_TCP, /* set option at TCP level */
TCP_NODELAY, /* name of option */
(char *) &flag, /* the cast is historical
cruft */
sizeof(int)); /* length of option value */
if (result < 0)
{
printf("ERROR on setting socket option TCP_NODELAY.\n");
return 0;
}
char clientip[20];
inet_ntop(AF_INET, (void*) &(cli_addr.sin_addr), clientip, 20);
printf("Connection from:\t%s\n", clientip);
//printf("Number of idle threads %d\n", idleth_queue.size());
pthread_mutex_lock(sdata->qlock);
//insert the given element to the ready queue
sdata->ready_queue->push(newsockfd);
if(sdata->ready_queue->size() == 1) //signal the event loop
pthread_cond_signal(sdata->selcond);
pthread_mutex_unlock(sdata->qlock);
}
}
inline void readfromsocket(int sockd, byte *buf, int count)
{
int n = 0;
while( n < count )
{
n += read( sockd, buf + n, count - n);
}
}
inline void writetosocket(int sockd, byte *buf, int count)
{
int n = 0;
while( n < count )
{
n += write( sockd, buf + n, count - n);
}
}
void * thread_work_fn( void * args)
{
pthread_item * item = (pthread_item *) args;
pthread_mutex_lock(item->data->th_mut);
while(true)
{
while(*(item->data->workitem) == -1)
{
if(!*(item->data->sys_alive))
break;
pthread_cond_wait(item->data->th_cond, item->data->th_mut); //wait for job
}
if(!*(item->data->sys_alive))
{
//printf("thread quitted.\n");
break;
}
//step 1: read the opcode
uint8_t opcode;
ssize_t n = read(*(item->data->workitem), &opcode, sizeof(uint8_t));
assert( n == sizeof(uint8_t));
assert( opcode < logserver::OP_INVALID );
if( opcode == logserver::OP_DONE ) //close the conn on failure
{
pthread_mutex_lock(item->data->qlock);
printf("client done. conn closed. (%d, %d, %d, %d)\n",
n, errno, *(item->data->workitem), item->data->work_queue->size());
close(*(item->data->workitem));
if(item->data->work_queue->size() > 0)
{
int new_work = item->data->work_queue->front();
item->data->work_queue->pop();
printf("work queue size:\t%d\n", item->data->work_queue->size());
*(item->data->workitem) = new_work;
}
else
{
//set work to -1
*(item->data->workitem) = -1;
//add self to idle queue
item->data->idleth_queue->push(*item);
}
pthread_cond_signal(item->data->selcond);
pthread_mutex_unlock(item->data->qlock);
continue;
}
//step 2: read the tuple from client
datatuple tuple;
tuple.keylen = (uint32_t*)malloc(sizeof(uint32_t));
tuple.datalen = (uint32_t*)malloc(sizeof(uint32_t));
//read the key length
n = read(*(item->data->workitem), tuple.keylen, sizeof(uint32_t));
assert( n == sizeof(uint32_t));
//read the data length
n = read(*(item->data->workitem), tuple.datalen, sizeof(uint32_t));
assert( n == sizeof(uint32_t));
//read the key
tuple.key = (byte*) malloc(*tuple.keylen);
readfromsocket(*(item->data->workitem), (byte*) tuple.key, *tuple.keylen);
//read the data
if(!tuple.isDelete() && opcode != logserver::OP_FIND)
{
tuple.data = (byte*) malloc(*tuple.datalen);
readfromsocket(*(item->data->workitem), (byte*) tuple.data, *tuple.datalen);
}
else
tuple.data = 0;
//step 3: process the tuple
//pthread_mutex_lock(item->data->table_lock);
//readlock(item->data->table_lock,0);
if(opcode == logserver::OP_INSERT)
{
//insert/update/delete
item->data->ltable->insertTuple(tuple);
//unlock the lsmlock
//pthread_mutex_unlock(item->data->table_lock);
//unlock(item->data->table_lock);
//step 4: send response
uint8_t rcode = logserver::OP_SUCCESS;
n = write(*(item->data->workitem), &rcode, sizeof(uint8_t));
assert(n == sizeof(uint8_t));
}
else if(opcode == logserver::OP_FIND)
{
//find the tuple
datatuple *dt = item->data->ltable->findTuple(-1, tuple.key, *tuple.keylen);
//unlock the lsmlock
//pthread_mutex_unlock(item->data->table_lock);
//unlock(item->data->table_lock);
if(dt == 0) //tuple deleted
{
dt = (datatuple*) malloc(sizeof(datatuple));
dt->keylen = (uint32_t*) malloc(2*sizeof(uint32_t) + *tuple.keylen);
*dt->keylen = *tuple.keylen;
dt->datalen = dt->keylen + 1;
dt->key = (datatuple::key_t) (dt->datalen+1);
memcpy((byte*) dt->key, (byte*) tuple.key, *tuple.keylen);
dt->setDelete();
}
//send the reply code
uint8_t rcode = logserver::OP_SENDING_TUPLE;
n = write(*(item->data->workitem), &rcode, sizeof(uint8_t));
assert(n == sizeof(uint8_t));
//send the tuple
writetosocket(*(item->data->workitem), (byte*) dt->keylen, dt->byte_length());
//free datatuple
free(dt->keylen);
free(dt);
}
//close the socket
//close(*(item->data->workitem));
//free the tuple
free(tuple.keylen);
free(tuple.datalen);
free(tuple.key);
free(tuple.data);
//printf("socket %d: work completed.\n", *(item->data->workitem));
pthread_mutex_lock(item->data->qlock);
//add conn desc to ready queue
item->data->ready_queue->push(*(item->data->workitem));
//printf("ready queue size: %d sock(%d)\n", item->data->ready_queue->size(), *(item->data->workitem));
if(item->data->ready_queue->size() == 1) //signal the event loop
pthread_cond_signal(item->data->selcond);
if(item->data->work_queue->size() > 0)
{
int new_work = item->data->work_queue->front();
item->data->work_queue->pop();
printf("work queue size:\t%d\n", item->data->work_queue->size());
*(item->data->workitem) = new_work;
}
else
{
//set work to -1
*(item->data->workitem) = -1;
//add self to idle queue
item->data->idleth_queue->push(*item);
}
pthread_mutex_unlock(item->data->qlock);
}
pthread_mutex_unlock(item->data->th_mut);
}

163
logserver_pers.h Normal file
View file

@ -0,0 +1,163 @@
#ifndef _LOGSERVER_H_
#define _LOGSERVER_H_
#include <queue>
#include <vector>
//#include "logstore.h"
#include "datatuple.h"
#include <stasis/transactional.h>
#include <pthread.h>
#undef begin
#undef try
#undef end
class logtable;
struct pthread_item;
struct pthread_data {
std::queue<pthread_item> *idleth_queue;
std::queue<int> *ready_queue;
std::queue<int> *work_queue;
pthread_mutex_t * qlock;
pthread_cond_t *selcond;
pthread_cond_t * th_cond;
pthread_mutex_t * th_mut;
int *workitem; //id of the socket to work
//pthread_mutex_t * table_lock;
rwl *table_lock;
logtable *ltable;
bool *sys_alive;
};
struct pthread_item{
pthread_t * th_handle;
pthread_data *data;
};
//struct work_item
//{
// int sockd; //socket id
// datatuple in_tuple; //request
// datatuple out_tuple; //response
//};
struct serverth_data
{
int *server_socket;
int server_port;
std::queue<pthread_item> *idleth_queue;
std::queue<int> *ready_queue;
pthread_cond_t *selcond;
pthread_mutex_t *qlock;
};
void * thread_work_fn( void *);
class logserver
{
public:
//server codes
static uint8_t OP_SUCCESS;
static uint8_t OP_FAIL;
static uint8_t OP_SENDING_TUPLE;
//client codes
static uint8_t OP_FIND;
static uint8_t OP_INSERT;
static uint8_t OP_DONE;
static uint8_t OP_INVALID;
public:
logserver(int nthreads, int server_port){
this->nthreads = nthreads;
this->server_port = server_port;
//lsmlock = new pthread_mutex_t;
//pthread_mutex_init(lsmlock,0);
lsmlock = initlock();
qlock = new pthread_mutex_t;
pthread_mutex_init(qlock,0);
ltable = 0;
}
~logserver()
{
//delete lsmlock;
deletelock(lsmlock);
delete qlock;
}
void startserver(logtable *ltable);
void stopserver();
public:
private:
//main loop of server
//accept connections, assign jobs to threads
//void dispatchLoop();
void eventLoop();
private:
int server_port;
int nthreads;
bool sys_alive;
int serversocket; //server socket file descriptor
//ccqueue<int> conn_queue; //list of active connections (socket list)
//ccqueue<pthread_item> idleth_queue; //list of idle threads
std::queue<int> ready_queue; //connections to go inside select
std::queue<int> work_queue; //connections to be processed by worker threads
std::queue<pthread_item> idleth_queue;
pthread_mutex_t *qlock;
pthread_t server_thread;
serverth_data *sdata;
pthread_cond_t *selcond; //server loop cond
std::vector<pthread_item *> th_list; // list of threads
rwl *lsmlock; //lock for using lsm table
logtable *ltable;
};
#endif

409
logserver_simple.cpp Normal file
View file

@ -0,0 +1,409 @@
#include "logserver.h"
#include "datatuple.h"
#include "logstore.h"
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#undef begin
#undef end
#undef try
//server codes
uint8_t logserver::OP_SUCCESS = 1;
uint8_t logserver::OP_FAIL = 2;
uint8_t logserver::OP_SENDING_TUPLE = 3;
//client codes
uint8_t logserver::OP_FIND = 4;
uint8_t logserver::OP_INSERT = 5;
uint8_t logserver::OP_INVALID = 32;
void logserver::startserver(logtable *ltable)
{
sys_alive = true;
this->ltable = ltable;
//initialize threads
for(int i=0; i<nthreads; i++)
{
struct pthread_item *worker_th = new pthread_item;
th_list.push_back(worker_th);
worker_th->th_handle = new pthread_t;
struct pthread_data *worker_data = new pthread_data;
worker_th->data = worker_data;
worker_data->idleth_queue = &idleth_queue;
worker_data->conn_queue = &conn_queue;
worker_data->qlock = qlock;
worker_data->th_cond = new pthread_cond_t;
pthread_cond_init(worker_data->th_cond,0);
worker_data->th_mut = new pthread_mutex_t;
pthread_mutex_init(worker_data->th_mut,0);
worker_data->workitem = new int;
*(worker_data->workitem) = -1;
worker_data->table_lock = lsmlock;
worker_data->ltable = ltable;
worker_data->sys_alive = &sys_alive;
pthread_create(worker_th->th_handle, 0, thread_work_fn, worker_th);
idleth_queue.push(*worker_th);
}
dispatchLoop();
}
void logserver::stopserver()
{
//close the server socket
//stops receiving data on the server socket
shutdown(serversocket, 0);
//wait for all threads to be idle
while(idleth_queue.size() != nthreads)
sleep(1);
//set the system running flag to false
sys_alive = false;
for(int i=0; i<nthreads; i++)
{
pthread_item *idle_th = th_list[i];
//wake up the thread
pthread_mutex_lock(idle_th->data->th_mut);
pthread_cond_signal(idle_th->data->th_cond);
pthread_mutex_unlock(idle_th->data->th_mut);
//wait for it to join
pthread_join(*(idle_th->th_handle), 0);
//free the thread variables
pthread_cond_destroy(idle_th->data->th_cond);
delete idle_th->data->th_cond;
delete idle_th->data->th_mut;
delete idle_th->data->workitem;
delete idle_th->data;
delete idle_th->th_handle;
}
th_list.clear();
return;
}
void logserver::dispatchLoop()
{
int sockfd; //socket descriptor
struct sockaddr_in serv_addr;
struct sockaddr_in cli_addr;
int newsockfd; //newly created
socklen_t clilen = sizeof(cli_addr);
//open a socket
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0)
{
printf("ERROR opening socket\n");
return;
}
bzero((char *) &serv_addr, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
serv_addr.sin_port = htons(server_port);
if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0)
{
printf("ERROR on binding.\n");
return;
}
//start listening on the server socket
//second arg is the max number of coonections waiting in queue
if(listen(sockfd,SOMAXCONN)==-1)
{
printf("ERROR on listen.\n");
return;
}
printf("LSM Server listenning...\n");
serversocket = sockfd;
int flag, result;
while(true)
{
newsockfd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen);
if (newsockfd < 0)
{
printf("ERROR on accept.\n");
return; // we probably want to continue instead of return here (when not debugging)
}
flag = 1;
result = setsockopt(newsockfd, /* socket affected */
IPPROTO_TCP, /* set option at TCP level */
TCP_NODELAY, /* name of option */
(char *) &flag, /* the cast is historical
cruft */
sizeof(int)); /* length of option value */
if (result < 0)
{
printf("ERROR on setting socket option TCP_NODELAY.\n");
return;
}
char clientip[20];
inet_ntop(AF_INET, (void*) &(cli_addr.sin_addr), clientip, 20);
//printf("Connection from:\t%s\n", clientip);
//printf("Number of idle threads %d\n", idleth_queue.size());
pthread_mutex_lock(qlock);
if(idleth_queue.size() > 0)
{
pthread_item idle_th = idleth_queue.front();
idleth_queue.pop();
//wake up the thread to do work
pthread_mutex_lock(idle_th.data->th_mut);
//set the job of the idle thread
*(idle_th.data->workitem) = newsockfd;
pthread_cond_signal(idle_th.data->th_cond);
pthread_mutex_unlock(idle_th.data->th_mut);
}
else
{
//insert the given element to the queue
conn_queue.push(newsockfd);
//printf("Number of queued connections:\t%d\n", conn_queue.size());
}
pthread_mutex_unlock(qlock);
/*
try
{
pthread_item idle_th = idleth_queue.pop();
//wake up the thread to do work
pthread_mutex_lock(idle_th.data->th_mut);
//set the job of the idle thread
*(idle_th.data->workitem) = newsockfd;
pthread_cond_signal(idle_th.data->th_cond);
pthread_mutex_unlock(idle_th.data->th_mut);
}
catch(int empty_exception)
{
//insert the given element to the queue
conn_queue.push(newsockfd);
//printf("Number of queued connections:\t%d\n", conn_queue.size());
}
*/
}
}
inline void readfromsocket(int sockd, byte *buf, int count)
{
int n = 0;
while( n < count )
{
n += read( sockd, buf + n, count - n);
}
}
inline void writetosocket(int sockd, byte *buf, int count)
{
int n = 0;
while( n < count )
{
n += write( sockd, buf + n, count - n);
}
}
void * thread_work_fn( void * args)
{
pthread_item * item = (pthread_item *) args;
pthread_mutex_lock(item->data->th_mut);
while(true)
{
while(*(item->data->workitem) == -1)
{
if(!*(item->data->sys_alive))
break;
pthread_cond_wait(item->data->th_cond, item->data->th_mut); //wait for job
}
if(!*(item->data->sys_alive))
{
//printf("thread quitted.\n");
break;
}
//step 1: read the opcode
uint8_t opcode;
ssize_t n = read(*(item->data->workitem), &opcode, sizeof(uint8_t));
assert( n == sizeof(uint8_t));
assert( opcode < logserver::OP_INVALID );
//step 2: read the tuple from client
datatuple tuple;
tuple.keylen = (uint32_t*)malloc(sizeof(uint32_t));
tuple.datalen = (uint32_t*)malloc(sizeof(uint32_t));
//read the key length
n = read(*(item->data->workitem), tuple.keylen, sizeof(uint32_t));
assert( n == sizeof(uint32_t));
//read the data length
n = read(*(item->data->workitem), tuple.datalen, sizeof(uint32_t));
assert( n == sizeof(uint32_t));
//read the key
tuple.key = (byte*) malloc(*tuple.keylen);
readfromsocket(*(item->data->workitem), (byte*) tuple.key, *tuple.keylen);
//read the data
if(!tuple.isDelete() && opcode != logserver::OP_FIND)
{
tuple.data = (byte*) malloc(*tuple.datalen);
readfromsocket(*(item->data->workitem), (byte*) tuple.data, *tuple.datalen);
}
else
tuple.data = 0;
//step 3: process the tuple
//pthread_mutex_lock(item->data->table_lock);
//readlock(item->data->table_lock,0);
if(opcode == logserver::OP_INSERT)
{
//insert/update/delete
item->data->ltable->insertTuple(tuple);
//unlock the lsmlock
//pthread_mutex_unlock(item->data->table_lock);
//unlock(item->data->table_lock);
//step 4: send response
uint8_t rcode = logserver::OP_SUCCESS;
n = write(*(item->data->workitem), &rcode, sizeof(uint8_t));
assert(n == sizeof(uint8_t));
}
else if(opcode == logserver::OP_FIND)
{
//find the tuple
datatuple *dt = item->data->ltable->findTuple(-1, tuple.key, *tuple.keylen);
//unlock the lsmlock
//pthread_mutex_unlock(item->data->table_lock);
//unlock(item->data->table_lock);
if(dt == 0) //tuple deleted
{
dt = (datatuple*) malloc(sizeof(datatuple));
dt->keylen = (uint32_t*) malloc(2*sizeof(uint32_t) + *tuple.keylen);
*dt->keylen = *tuple.keylen;
dt->datalen = dt->keylen + 1;
dt->key = (datatuple::key_t) (dt->datalen+1);
memcpy((byte*) dt->key, (byte*) tuple.key, *tuple.keylen);
dt->setDelete();
}
//send the reply code
uint8_t rcode = logserver::OP_SENDING_TUPLE;
n = write(*(item->data->workitem), &rcode, sizeof(uint8_t));
assert(n == sizeof(uint8_t));
//send the tuple
writetosocket(*(item->data->workitem), (byte*) dt->keylen, dt->byte_length());
//free datatuple
free(dt->keylen);
free(dt);
}
//close the socket
close(*(item->data->workitem));
//free the tuple
free(tuple.keylen);
free(tuple.datalen);
free(tuple.key);
free(tuple.data);
//printf("socket %d: work completed.\n", *(item->data->workitem));
pthread_mutex_lock(item->data->qlock);
if(item->data->conn_queue->size() > 0)
{
int new_work = item->data->conn_queue->front();
item->data->conn_queue->pop();
*(item->data->workitem) = new_work;
}
else
{
//set work to -1
*(item->data->workitem) = -1;
//add self to idle queue
item->data->idleth_queue->push(*item);
}
pthread_mutex_unlock(item->data->qlock);
/*
//check if there is new work this thread can do
try
{
int new_work = item->data->conn_queue->pop();
*(item->data->workitem) = new_work; //set new work
//printf("socket %d: new work found.\n", *(item->data->workitem));
}
catch(int empty_exception)
{
//printf("socket %d: no new work found.\n", *(item->data->workitem));
//set work to -1
*(item->data->workitem) = -1;
//add self to idle queue
item->data->idleth_queue->push(*item);
}
*/
}
pthread_mutex_unlock(item->data->th_mut);
}

198
logserver_simple.h Normal file
View file

@ -0,0 +1,198 @@
#ifndef _LOGSERVER_H_
#define _LOGSERVER_H_
#include <queue>
#include <vector>
//#include "logstore.h"
#include "datatuple.h"
#include <stasis/transactional.h>
#include <pthread.h>
#undef begin
#undef try
#undef end
class logtable;
template<class T>
class ccqueue
{
public:
ccqueue()
{
qmut = new pthread_mutex_t;
pthread_mutex_init(qmut,0);
}
int size()
{
pthread_mutex_lock(qmut);
int qsize = m_queue.size();
pthread_mutex_unlock(qmut);
return qsize;
}
//inserts a copy of the given element to the queue
void push(const T &item)
{
pthread_mutex_lock(qmut);
m_queue.push(item);
pthread_mutex_unlock(qmut);
return;
}
//returns a copy of the next element
//deletes the copy in the queue
//throws an exception with -1 on empty queue
T pop() throw (int)
{
pthread_mutex_lock(qmut);
if(m_queue.size() > 0)
{
T item = m_queue.front();
m_queue.pop();
pthread_mutex_unlock(qmut);
return item;
}
pthread_mutex_unlock(qmut);
throw(-1);
}
~ccqueue()
{
delete qmut;
}
private:
std::queue<T> m_queue;
pthread_mutex_t *qmut;
};
struct pthread_item;
struct pthread_data {
std::queue<pthread_item> *idleth_queue;
std::queue<int> *conn_queue;
pthread_mutex_t * qlock;
pthread_cond_t * th_cond;
pthread_mutex_t * th_mut;
int *workitem; //id of the socket to work
//pthread_mutex_t * table_lock;
rwl *table_lock;
logtable *ltable;
bool *sys_alive;
};
struct pthread_item{
pthread_t * th_handle;
pthread_data *data;
};
struct work_item
{
int sockd; //socket id
datatuple in_tuple; //request
datatuple out_tuple; //response
};
void * thread_work_fn( void *);
class logserver
{
public:
//server codes
static uint8_t OP_SUCCESS;
static uint8_t OP_FAIL;
static uint8_t OP_SENDING_TUPLE;
//client codes
static uint8_t OP_FIND;
static uint8_t OP_INSERT;
static uint8_t OP_INVALID;
public:
logserver(int nthreads, int server_port){
this->nthreads = nthreads;
this->server_port = server_port;
//lsmlock = new pthread_mutex_t;
//pthread_mutex_init(lsmlock,0);
lsmlock = initlock();
qlock = new pthread_mutex_t;
pthread_mutex_init(qlock,0);
ltable = 0;
}
~logserver()
{
//delete lsmlock;
deletelock(lsmlock);
delete qlock;
}
void startserver(logtable *ltable);
void stopserver();
public:
private:
//main loop of server
//accept connections, assign jobs to threads
void dispatchLoop();
private:
int server_port;
int nthreads;
bool sys_alive;
int serversocket; //server socket file descriptor
//ccqueue<int> conn_queue; //list of active connections (socket list)
//ccqueue<pthread_item> idleth_queue; //list of idle threads
std::queue<int> conn_queue;
std::queue<pthread_item> idleth_queue;
pthread_mutex_t *qlock;
std::vector<pthread_item *> th_list; // list of threads
rwl *lsmlock; //lock for using lsm table
logtable *ltable;
};
#endif

1606
logstore.cpp Normal file

File diff suppressed because it is too large Load diff

302
logstore.h Normal file
View file

@ -0,0 +1,302 @@
#ifndef _LOGSTORE_H_
#define _LOGSTORE_H_
#undef end
#undef begin
#include <string>
#include <set>
#include <sstream>
#include <iostream>
#include <queue>
#include <vector>
#include "logserver.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <pthread.h>
#include <stasis/transactional.h>
#include <stasis/operations.h>
#include <stasis/bufferManager.h>
#include <stasis/allocationPolicy.h>
#include <stasis/blobManager.h>
#include <stasis/page.h>
#include <stasis/truncation.h>
#include "datapage.h"
#include "tuplemerger.h"
#include "datatuple.h"
double tv_to_double(struct timeval tv);
struct logtable_mergedata;
typedef struct RegionAllocConf_t
{
recordid regionList;
pageid_t regionCount;
pageid_t nextPage;
pageid_t endOfRegion;
pageid_t regionSize;
} RegionAllocConf_t;
//struct logtree_state {
// pageid_t lastLeaf;
//};
struct indexnode_rec {
pageid_t ptr;
};
typedef pageid_t(*logtree_page_allocator_t)(int, void *);
typedef void(*logtree_page_deallocator_t)(int, void *);
class logtree{
public:
logtree();
recordid create(int xid);
void print_tree(int xid);
static pageid_t alloc_region(int xid, void *conf);
static pageid_t alloc_region_rid(int xid, void * ridp);
static void force_region_rid(int xid, void *conf);
static void dealloc_region_rid(int xid, void *conf);
static void free_region_rid(int xid, recordid tree,
logtree_page_deallocator_t dealloc,
void *allocator_state);
static void writeNodeRecord(int xid, Page *p, recordid &rid,
const byte *key, size_t keylen, pageid_t ptr);
static void writeRecord(int xid, Page *p, recordid &rid,
const byte *data, size_t datalen);
static void writeRecord(int xid, Page *p, slotid_t slot,
const byte *data, size_t datalen);
static const byte* readRecord(int xid, Page * p, recordid &rid);
static const byte* readRecord(int xid, Page * p, slotid_t slot, int64_t size);
static int32_t readRecordLength(int xid, Page *p, slotid_t slot);
//return the left-most leaf, these are not data pages, although referred to as leaf
static pageid_t findFirstLeaf(int xid, Page *root, int64_t depth);
//return the right-most leaf
static pageid_t findLastLeaf(int xid, Page *root, int64_t depth) ;
//reads the given record and returns the page id stored in it
static pageid_t lookupLeafPageFromRid(int xid, recordid rid);
//returns a record that stores the pageid where the given key should be in, i.e. if it exists
static recordid lookup(int xid, Page *node, int64_t depth, const byte *key,
size_t keySize);
//returns the id of the data page that could contain the given key
static pageid_t findPage(int xid, recordid tree, const byte *key, size_t keySize);
//appends a leaf page, val_page is the id of the leaf page
//rmLeafID --> rightmost leaf id
static recordid appendPage(int xid, recordid tree, pageid_t & rmLeafID,
const byte *key,size_t keySize,
logtree_page_allocator_t allocator, void *allocator_state,
long val_page);
static recordid appendInternalNode(int xid, Page *p,
int64_t depth,
const byte *key, size_t key_len,
pageid_t val_page, pageid_t lastLeaf,
logtree_page_allocator_t allocator,
void *allocator_state);
static recordid buildPathToLeaf(int xid, recordid root, Page *root_p,
int64_t depth, const byte *key, size_t key_len,
pageid_t val_page, pageid_t lastLeaf,
logtree_page_allocator_t allocator,
void *allocator_state);
/**
Initialize a page for use as an internal node of the tree.
*/
inline static void initializeNodePage(int xid, Page *p);
recordid &get_tree_state(){return tree_state;}
recordid &get_root_rec(){return root_rec;}
public:
const static RegionAllocConf_t REGION_ALLOC_STATIC_INITIALIZER;
const static int64_t DEPTH;
const static int64_t COMPARATOR;
const static int64_t FIRST_SLOT;
const static size_t root_rec_size;
const static int64_t PREV_LEAF;
const static int64_t NEXT_LEAF;
pageid_t lastLeaf;
private:
void print_tree(int xid, pageid_t pid, int64_t depth);
private:
recordid tree_state;
recordid root_rec;
};
class logtable
{
public:
logtable();
~logtable();
//user access functions
datatuple * findTuple(int xid, datatuple::key_t key, size_t keySize);
datatuple * findTuple_first(int xid, datatuple::key_t key, size_t keySize);
void insertTuple(struct datatuple &tuple);
//other class functions
recordid allocTable(int xid);
void flushTable();
DataPage<datatuple>* insertTuple(int xid, struct datatuple &tuple, recordid &dpstate,logtree *ltree);
datatuple * findTuple(int xid, datatuple::key_t key, size_t keySize, logtree *ltree);
inline recordid & get_table_rec(){return table_rec;}
inline logtree * get_tree_c2(){return tree_c2;}
inline logtree * get_tree_c1(){return tree_c1;}
inline void set_tree_c1(logtree *t){tree_c1=t;}
inline void set_tree_c2(logtree *t){tree_c2=t;}
typedef std::set<datatuple, datatuple> rbtree_t;
typedef rbtree_t* rbtree_ptr_t;
inline rbtree_ptr_t get_tree_c0(){return tree_c0;}
void set_tree_c0(rbtree_ptr_t newtree){tree_c0 = newtree;}
inline recordid & get_dpstate1(){return tbl_header.c1_dp_state;}
inline recordid & get_dpstate2(){return tbl_header.c2_dp_state;}
int get_fixed_page_count(){return fixed_page_count;}
void set_fixed_page_count(int count){fixed_page_count = count;}
void setMergeData(logtable_mergedata * mdata) { this->mergedata = mdata;}
logtable_mergedata* getMergeData(){return mergedata;}
inline tuplemerger * gettuplemerger(){return tmerger;}
public:
struct table_header {
recordid c2_root; //tree root record --> points to the root of the b-tree
recordid c2_state; //tree state --> describes the regions used by the index tree
recordid c2_dp_state; //data pages state --> regions used by the data pages
recordid c1_root;
recordid c1_state;
recordid c1_dp_state;
//epoch_t beginning;
//epoch_t end;
};
const static RegionAllocConf_t DATAPAGE_REGION_ALLOC_STATIC_INITIALIZER;
logtable_mergedata * mergedata;
private:
private:
recordid table_rec;
struct table_header tbl_header;
logtree *tree_c2; //big tree
logtree *tree_c1; //small tree
rbtree_ptr_t tree_c0; // in-mem red black tree
int tsize; //number of tuples
int64_t tree_bytes; //number of bytes
//DATA PAGE SETTINGS
int fixed_page_count;//number of pages in a datapage
// logtable_mergedata * mergedata;
tuplemerger *tmerger;
};
typedef struct logtreeIterator_s {
Page * p;
recordid current;
indexnode_rec *t;
int justOnePage;
} logtreeIterator_s;
class logtreeIterator
{
public:
static lladdIterator_t* open(int xid, recordid root);
static lladdIterator_t* openAt(int xid, recordid root, const byte* key);
static int next(int xid, lladdIterator_t *it);
//static lladdIterator_t *copy(int xid, lladdIterator_t* i);
static void close(int xid, lladdIterator_t *it);
static inline int key (int xid, lladdIterator_t *it, byte **key)
{
logtreeIterator_s * impl = (logtreeIterator_s*)it->impl;
*key = (byte*)(impl->t+1);
return (int) impl->current.size - sizeof(indexnode_rec);
}
static inline int value(int xid, lladdIterator_t *it, byte **value)
{
logtreeIterator_s * impl = (logtreeIterator_s*)it->impl;
*value = (byte*)&(impl->t->ptr);
return sizeof(impl->t->ptr);
}
static inline void tupleDone(int xid, void *it) { }
static inline void releaseLock(int xid, void *it) { }
};
#endif

836
merger.cpp Normal file
View file

@ -0,0 +1,836 @@
#include <math.h>
#include "merger.h"
#include "logiterators.cpp"
#include "datapage.cpp"
//pageid_t merge_scheduler::C0_MEM_SIZE = 1000 * 1000 * 1000;
//template <> struct merger_args<rbtree_t>;
//template <> struct merger_args<logtree>;
inline DataPage<datatuple>*
insertTuple(int xid, DataPage<datatuple> *dp, datatuple &t,
logtable *ltable,
logtree * ltree,
recordid & dpstate,
int64_t &dpages, int64_t &npages);
int merge_scheduler::addlogtable(logtable *ltable)
{
struct logtable_mergedata * mdata = new logtable_mergedata;
// initialize merge data
mdata->header_lock = initlock();
mdata->rbtree_mut = new pthread_mutex_t;
pthread_mutex_init(mdata->rbtree_mut,0);
mdata->old_c0 = new rbtree_ptr_t;
*mdata->old_c0 = 0;
mdata->input_needed = new bool(false);
mdata->input_ready_cond = new pthread_cond_t;
pthread_cond_init(mdata->input_ready_cond,0);
mdata->input_needed_cond = new pthread_cond_t;
pthread_cond_init(mdata->input_needed_cond,0);
mdata->input_size = new int64_t(100);
mdata->diskmerge_args = new merger_args<logtree>;
mdata->memmerge_args = new merger_args<rbtree_t>;
mergedata.push_back(std::make_pair(ltable, mdata));
return mergedata.size()-1;
}
merge_scheduler::~merge_scheduler()
{
for(int i=0; i<mergedata.size(); i++)
{
logtable *ltable = mergedata[i].first;
logtable_mergedata *mdata = mergedata[i].second;
//delete the mergedata fields
deletelock(mdata->header_lock);
delete mdata->rbtree_mut;
delete mdata->old_c0;
delete mdata->input_needed;
delete mdata->input_ready_cond;
delete mdata->input_needed_cond;
delete mdata->input_size;
//delete the merge thread structure variables
delete (recordid*) mdata->memmerge_args->pageAllocState;
delete (recordid*) mdata->memmerge_args->oldAllocState;
delete mdata->memmerge_args->still_open;
delete (recordid*) mdata->diskmerge_args->pageAllocState;
delete (recordid*) mdata->diskmerge_args->oldAllocState;
pthread_cond_destroy(mdata->diskmerge_args->in_block_needed_cond);
delete mdata->diskmerge_args->in_block_needed_cond;
delete mdata->diskmerge_args->in_block_needed;
pthread_cond_destroy(mdata->diskmerge_args->out_block_needed_cond);
delete mdata->diskmerge_args->out_block_needed_cond;
delete mdata->diskmerge_args->out_block_needed;
pthread_cond_destroy(mdata->diskmerge_args->in_block_ready_cond);
delete mdata->diskmerge_args->in_block_ready_cond;
pthread_cond_destroy(mdata->diskmerge_args->out_block_ready_cond);
delete mdata->diskmerge_args->out_block_ready_cond;
delete mdata->diskmerge_args->my_tree_size;
delete mdata->diskmerge_args;
delete mdata->memmerge_args;
}
mergedata.clear();
}
void merge_scheduler::shutdown()
{
//signal shutdown
for(int i=0; i<mergedata.size(); i++)
{
logtable *ltable = mergedata[i].first;
logtable_mergedata *mdata = mergedata[i].second;
//flush the in memory table to write any tuples still in memory
ltable->flushTable();
pthread_mutex_lock(mdata->rbtree_mut);
*(mdata->memmerge_args->still_open)=false;
pthread_cond_signal(mdata->input_ready_cond);
//*(mdata->diskmerge_args->still_open)=false;//same pointer so no need
pthread_mutex_unlock(mdata->rbtree_mut);
}
for(int i=0; i<mergedata.size(); i++)
{
logtable_mergedata *mdata = mergedata[i].second;
pthread_join(mdata->memmerge_thread,0);
pthread_join(mdata->diskmerge_thread,0);
}
}
void merge_scheduler::startlogtable(int index)
{
logtable * ltable = mergedata[index].first;
struct logtable_mergedata *mdata = mergedata[index].second;
pthread_cond_t * block1_needed_cond = new pthread_cond_t;
pthread_cond_init(block1_needed_cond,0);
pthread_cond_t * block2_needed_cond = new pthread_cond_t;
pthread_cond_init(block2_needed_cond,0);
pthread_cond_t * block1_ready_cond = new pthread_cond_t;
pthread_cond_init(block1_ready_cond,0);
pthread_cond_t * block2_ready_cond = new pthread_cond_t;
pthread_cond_init(block2_ready_cond,0);
bool *block1_needed = new bool(false);
bool *block2_needed = new bool(false);
bool *system_running = new bool(true);
//wait to merge the next block until we have merged block FUDGE times.
static const int FUDGE = 1;
static double R = MIN_R;
int64_t * block1_size = new int64_t;
*block1_size = FUDGE * ((int)R) * (*(mdata->input_size));
//initialize rb-tree
ltable->set_tree_c0(new rbtree_t);
//disk merger args
recordid * ridp = new recordid;
*ridp = ltable->get_tree_c2()->get_tree_state(); //h.bigTreeAllocState;
recordid * oldridp = new recordid;
*oldridp = NULLRID;
logtree ** block1_scratch = new logtree*;
*block1_scratch=0;
//recordid * allocer_scratch = new recordid;
RegionAllocConf_t *allocer_scratch = new RegionAllocConf_t;
struct merger_args<logtree> diskmerge_args= {
ltable,
1, //worker id
logtree::alloc_region_rid, //pageAlloc
ridp, // pageAllocState
oldridp, // oldAllocState
mdata->rbtree_mut, //block_ready_mutex
block1_needed_cond, //in_block_needed_cond
block1_needed, //in_block_needed
block2_needed_cond, //out_block_needed_cond
block2_needed, //out_block_needed
block1_ready_cond, //in_block_ready_cond
block2_ready_cond, //out_block_ready_cond
system_running, //still_open i.e. system running
block1_size, //mytree_size ?
0, //out_tree_size, biggest component computes its size directly.
0, //max_tree_size No max size for biggest component
&R, //r_i
block1_scratch, //in-tree
allocer_scratch, //in_tree_allocer
0, //out_tree
0, //out_tree_allocer
new treeIterator<datatuple>::treeIteratorHandle(ltable->get_tree_c2()->get_root_rec()), // my_tree
ltable->get_table_rec() //tree
};
*mdata->diskmerge_args = diskmerge_args;
DEBUG("Tree C2 is %lld\n", (long long)ltable->get_tree_c2()->get_root_rec().page);
//memory merger args
ridp = new recordid;
*ridp = ltable->get_tree_c1()->get_tree_state();
oldridp = new recordid;
*oldridp = NULLRID;
DEBUG("Tree C1 is %lld\n", (long long)ltable->get_tree_c1()->get_root_rec().page);
struct merger_args<rbtree_t> memmerge_args =
{
ltable,
2,
logtree::alloc_region_rid, //pageAlloc
ridp, // pageAllocState
oldridp, // oldAllocState
mdata->rbtree_mut, //block_ready_mutex
mdata->input_needed_cond,
mdata->input_needed,
block1_needed_cond,
block1_needed,
mdata->input_ready_cond,
block1_ready_cond,
system_running,
mdata->input_size,
block1_size,
(int64_t)(R * R * MAX_C0_SIZE),
&R,
mdata->old_c0,
0,
block1_scratch,
allocer_scratch,
new treeIterator<datatuple>::treeIteratorHandle(ltable->get_tree_c1()->get_root_rec()),
ltable->get_table_rec() //tree
};
*mdata->memmerge_args = memmerge_args;
void * (*diskmerger)(void*) = diskMergeThread;
void * (*memmerger)(void*) = memMergeThread;
pthread_create(&mdata->diskmerge_thread, 0, diskmerger, mdata->diskmerge_args);
pthread_create(&mdata->memmerge_thread, 0, memmerger, mdata->memmerge_args);
}
//TODO: flush the data pages
// deallocate/free their region
// create new data region for new data pages
void* memMergeThread(void*arg)
{
int xid;// = Tbegin();
merger_args<rbtree_t> * a = (merger_args<rbtree_t>*)(arg);
assert(a->my_tree->r_.size != -1);
logtable * ltable = a->ltable;
int merge_count =0;
// pthread_mutex_lock(a->block_ready_mut);
while(true)
{
writelock(ltable->mergedata->header_lock,0);
int done = 0;
// get a new input for merge
while(!*(a->in_tree))
{
pthread_mutex_lock(a->block_ready_mut);
*a->in_block_needed = true;
//pthread_cond_signal(a->in_block_needed_cond);
pthread_cond_broadcast(a->in_block_needed_cond);
if(!*(a->still_open)){
done = 1;
pthread_mutex_unlock(a->block_ready_mut);
break;
}
printf("mmt:\twaiting for block ready cond\n");
unlock(ltable->mergedata->header_lock);
pthread_cond_wait(a->in_block_ready_cond, a->block_ready_mut);
pthread_mutex_unlock(a->block_ready_mut);
writelock(ltable->mergedata->header_lock,0);
printf("mmt:\tblock ready\n");
}
*a->in_block_needed = false;
if(done==1)
{
pthread_mutex_lock(a->block_ready_mut);
pthread_cond_signal(a->out_block_ready_cond);
pthread_mutex_unlock(a->block_ready_mut);
unlock(ltable->mergedata->header_lock);
break;
}
if((*a->in_tree)->size()==0) //input empty, this can only happen during shutdown
{
delete *a->in_tree;
*a->in_tree = 0;
unlock(ltable->mergedata->header_lock);
continue;
}
uint64_t insertedTuples=0;
int64_t mergedPages=0;
assert(a->my_tree->r_.size != -1);
//create the iterators
treeIterator<datatuple> *itrA = new treeIterator<datatuple>(a->my_tree->r_);
memTreeIterator<rbtree_t, datatuple> *itrB =
new memTreeIterator<rbtree_t, datatuple>(*a->in_tree);
memTreeIterator<rbtree_t, datatuple> *itrBend = itrB->end();
//Tcommit(xid);
xid = Tbegin();
//create a new tree
logtree * scratch_tree = new logtree;
recordid scratch_root = scratch_tree->create(xid);
//save the old dp state values
RegionAllocConf_t olddp_state;
Tread(xid, ltable->get_dpstate1(), &olddp_state);
//reinitialize the dp state
Tset(xid, ltable->get_dpstate1(), &logtable::DATAPAGE_REGION_ALLOC_STATIC_INITIALIZER);
//pthread_mutex_unlock(a->block_ready_mut);
unlock(ltable->mergedata->header_lock);
//: do the merge
printf("mmt:\tMerging:\n");
int64_t npages = 0;
mergedPages = merge_iterators(xid, itrA, itrB, ltable, scratch_tree, npages);
delete itrA;
delete itrB;
delete itrBend;
//force write the new region to disk
recordid scratch_alloc_state = scratch_tree->get_tree_state();
//TlsmForce(xid,scratch_root,logtree::force_region_rid, &scratch_alloc_state);
logtree::force_region_rid(xid, &scratch_alloc_state);
//force write the new datapages
DataPage<datatuple>::force_region_rid(xid, &ltable->get_dpstate1());
//writes complete
//now automically replace the old c1 with new c1
//pthread_mutex_lock(a->block_ready_mut);
writelock(ltable->mergedata->header_lock,0);
merge_count++;
*a->my_tree_size = mergedPages;
printf("mmt:\tmerge_count %d #pages written %lld\n", merge_count, npages);
delete ltable->get_tree_c1();
ltable->set_tree_c1(scratch_tree);
logtable::table_header h;
void * oldAllocState = a->pageAllocState;
Tread(xid, a->tree, &h);
h.c1_root = scratch_root;
h.c1_state = scratch_alloc_state;
//note we already updated the dpstate before the merge
printf("mmt:\tUpdated C1's position on disk to %lld\n",scratch_root.page);
Tset(xid, a->tree, &h);
//Tcommit(xid);
//xid = Tbegin();
// free old my_tree here
//TODO: check
logtree::free_region_rid(xid, a->my_tree->r_, logtree::dealloc_region_rid, oldAllocState);
//TlsmFree(xid,a->my_tree->r_,logtree::dealloc_region_rid,oldAllocState);
//TODO: check
//free the old data pages
DataPage<datatuple>::dealloc_region_rid(xid, &olddp_state);
Tcommit(xid);
//xid = Tbegin();
//TODO: this is simplistic for now
//signal the other merger if necessary
double target_R = *(a->r_i);
double new_c1_size = npages * PAGE_SIZE;
assert(target_R >= MIN_R);
if( (new_c1_size / MAX_C0_SIZE > target_R) ||
(a->max_size && new_c1_size > a->max_size ) )
{
printf("mmt:\tsignaling C2 for merge\n");
printf("mmt:\tnew_c1_size %.2f\tMAX_C0_SIZE %lld\ta->max_size %lld\t targetr %.2f \n", new_c1_size,
MAX_C0_SIZE, a->max_size, target_R);
// XXX need to report backpressure here!
while(*a->out_tree) {
pthread_mutex_lock(a->block_ready_mut);
unlock(ltable->mergedata->header_lock);
pthread_cond_wait(a->out_block_needed_cond, a->block_ready_mut);
pthread_mutex_unlock(a->block_ready_mut);
writelock(ltable->mergedata->header_lock,0);
}
*a->out_tree = scratch_tree;
xid = Tbegin();
Tread(xid, ltable->get_dpstate1(), a->out_tree_allocer);
pthread_cond_signal(a->out_block_ready_cond);
logtree *empty_tree = new logtree;
empty_tree->create(xid);
*(recordid*)(a->pageAllocState) = empty_tree->get_tree_state();
a->my_tree->r_ = empty_tree->get_root_rec();
ltable->set_tree_c1(empty_tree);
logtable::table_header h;
Tread(xid, a->tree, &h);
h.c1_root = empty_tree->get_root_rec(); //update root
h.c1_state = empty_tree->get_tree_state(); //update index alloc state
printf("mmt:\tUpdated C1's position on disk to %lld\n",empty_tree->get_root_rec().page);
Tset(xid, a->tree, &h);
//update datapage alloc state
Tset(xid, ltable->get_dpstate1(), &logtable::DATAPAGE_REGION_ALLOC_STATIC_INITIALIZER);
Tcommit(xid);
//xid = Tbegin();
}
else //not signaling the C2 for merge yet
{
printf("mmt:\tnot signaling C2 for merge\n");
*(recordid*)a->pageAllocState = scratch_alloc_state;
a->my_tree->r_ = scratch_root;
}
rbtree_ptr_t deltree = *a->in_tree;
*a->in_tree = 0;
//Tcommit(xid);
unlock(ltable->mergedata->header_lock);
//TODO: get the freeing outside of the lock
//// ----------- Free in_tree
for(rbtree_t::iterator delitr=deltree->begin();
delitr != deltree->end(); delitr++)
free((*delitr).keylen);
delete deltree;
//deltree = 0;
/*
for(rbtree_t::iterator delitr=(*a->in_tree)->begin();
delitr != (*a->in_tree)->end(); delitr++)
free((*delitr).keylen);
delete *a->in_tree;
*a->in_tree = 0;
*/
}
//pthread_mutex_unlock(a->block_ready_mut);
return 0;
}
void *diskMergeThread(void*arg)
{
int xid;// = Tbegin();
merger_args<logtree> * a = (merger_args<logtree>*)(arg);
assert(a->my_tree->r_.size != -1);
logtable * ltable = a->ltable;
int merge_count =0;
//pthread_mutex_lock(a->block_ready_mut);
while(true)
{
writelock(ltable->mergedata->header_lock,0);
int done = 0;
// get a new input for merge
while(!*(a->in_tree))
{
pthread_mutex_lock(a->block_ready_mut);
*a->in_block_needed = true;
pthread_cond_signal(a->in_block_needed_cond);
if(!*(a->still_open)){
done = 1;
pthread_mutex_unlock(a->block_ready_mut);
break;
}
printf("dmt:\twaiting for block ready cond\n");
unlock(ltable->mergedata->header_lock);
pthread_cond_wait(a->in_block_ready_cond, a->block_ready_mut);
pthread_mutex_unlock(a->block_ready_mut);
printf("dmt:\tblock ready\n");
writelock(ltable->mergedata->header_lock,0);
}
*a->in_block_needed = false;
if(done==1)
{
pthread_cond_signal(a->out_block_ready_cond);
unlock(ltable->mergedata->header_lock);
break;
}
uint64_t insertedTuples=0;
int64_t mergedPages=0;
assert(a->my_tree->r_.size != -1);
//create the iterators
treeIterator<datatuple> *itrA = new treeIterator<datatuple>(a->my_tree->r_);
treeIterator<datatuple> *itrB =
new treeIterator<datatuple>((*a->in_tree)->get_root_rec());
//Tcommit(xid);
xid = Tbegin();
//create a new tree
logtree * scratch_tree = new logtree;
recordid scratch_root = scratch_tree->create(xid);
//save the old dp state values
RegionAllocConf_t olddp_state;
Tread(xid, ltable->get_dpstate2(), &olddp_state);
//reinitialize the dp state
//TODO: maybe you want larger regions for the second tree?
Tset(xid, ltable->get_dpstate2(), &logtable::DATAPAGE_REGION_ALLOC_STATIC_INITIALIZER);
//pthread_mutex_unlock(a->block_ready_mut);
unlock(ltable->mergedata->header_lock);
//do the merge
printf("dmt:\tMerging:\n");
int64_t npages = 0;
mergedPages = merge_iterators(xid, itrA, itrB, ltable, scratch_tree, npages);
delete itrA;
delete itrB;
//force write the new region to disk
recordid scratch_alloc_state = scratch_tree->get_tree_state();
//TODO:
//TlsmForce(xid,scratch_root,logtree::force_region_rid, &scratch_alloc_state);
logtree::force_region_rid(xid, &scratch_alloc_state);
//force write the new datapages
DataPage<datatuple>::force_region_rid(xid, &ltable->get_dpstate2());
//writes complete
//now automically replace the old c2 with new c2
//pthread_mutex_lock(a->block_ready_mut);
writelock(ltable->mergedata->header_lock,0);
merge_count++;
*a->my_tree_size = mergedPages;
//update the current optimal R value
*(a->r_i) = std::max(MIN_R, sqrt( (npages * 1.0) / (MAX_C0_SIZE/PAGE_SIZE) ) );
printf("dmt:\tmerge_count %d\t#written pages: %lld\n optimal r %.2f", merge_count, npages, *(a->r_i));
delete ltable->get_tree_c2();
ltable->set_tree_c2(scratch_tree);
logtable::table_header h;
void * oldAllocState = a->pageAllocState;
Tread(xid, a->tree, &h);
h.c2_root = scratch_root;
h.c2_state = scratch_alloc_state;
//note we already updated the dpstate before the merge
printf("dmt:\tUpdated C2's position on disk to %lld\n",scratch_root.page);
Tset(xid, a->tree, &h);
// free old my_tree here
//TODO: check
logtree::free_region_rid(xid, a->my_tree->r_, logtree::dealloc_region_rid, oldAllocState);
//TlsmFree(xid,a->my_tree->r_,logtree::dealloc_region_rid,oldAllocState);
//TODO: check
//free the old data pages
DataPage<datatuple>::dealloc_region_rid(xid, &olddp_state);
*(recordid*)a->pageAllocState = scratch_alloc_state;
a->my_tree->r_ = scratch_root;
//// ----------- Free in_tree
//TODO: check
logtree::free_region_rid(xid, (*a->in_tree)->get_root_rec(),
logtree::dealloc_region_rid,
&((*a->in_tree)->get_tree_state()));
//TlsmFree(xid,a->my_tree->r_,logtree::dealloc_region_rid,oldAllocState);
//TODO: check
//free the old data pages
DataPage<datatuple>::dealloc_region_rid(xid, a->in_tree_allocer);//TODO:
Tcommit(xid);
//xid = Tbegin();
//Tcommit(xid);
delete *a->in_tree;
*a->in_tree = 0;
unlock(ltable->mergedata->header_lock);
}
//pthread_mutex_unlock(a->block_ready_mut);
return 0;
}
int64_t merge_iterators(int xid,
treeIterator<datatuple> *itrA,
memTreeIterator<rbtree_t, datatuple> * itrB,
logtable *ltable,
logtree *scratch_tree,
int64_t &npages )
{
int64_t dpages = 0;
//int npages = 0;
int64_t ntuples = 0;
DataPage<datatuple> *dp = 0;
memTreeIterator<rbtree_t, datatuple> *itrBend = itrB->end();
datatuple *t1 = itrA->getnext();
while(*itrB != *itrBend)
{
datatuple t2 = **itrB;
DEBUG("tuple\t%lld: keylen %d datalen %d\n", ntuples, *t2.keylen,*t2.datalen );
while(t1 != 0 && datatuple::compare(t1->key, t2.key) < 0) // t1 is less than t2
{
//insert t1
dp = insertTuple(xid, dp, *t1, ltable, scratch_tree, ltable->get_dpstate1(),
dpages, npages);
free(t1->keylen);
free(t1);
ntuples++;
//advance itrA
t1 = itrA->getnext();
}
if(t1 != 0 && datatuple::compare(t1->key, t2.key) == 0)
{
datatuple *mtuple = ltable->gettuplemerger()->merge(t1,&t2);
//insert merged tuple
dp = insertTuple(xid, dp, *mtuple, ltable, scratch_tree, ltable->get_dpstate1(),
dpages, npages);
free(t1->keylen);
free(t1);
t1 = itrA->getnext(); //advance itrA
free(mtuple->keylen);
free(mtuple);
}
else
{
//insert t2
dp = insertTuple(xid, dp, t2, ltable, scratch_tree, ltable->get_dpstate1(),
dpages, npages);
//free(t2.keylen); //cannot free here it may still be read through a lookup
}
ntuples++;
++(*itrB);
}
while(t1 != 0) // t1 is less than t2
{
dp = insertTuple(xid, dp, *t1, ltable, scratch_tree, ltable->get_dpstate1(),
dpages, npages);
free(t1->keylen);
free(t1);
ntuples++;
//advance itrA
t1 = itrA->getnext();
}
delete itrBend;
if(dp!=NULL)
delete dp;
DEBUG("dpages: %d\tnpages: %d\tntuples: %d\n", dpages, npages, ntuples);
fflush(stdout);
return dpages;
}
int64_t merge_iterators(int xid,
treeIterator<datatuple> *itrA, //iterator on c2
treeIterator<datatuple> *itrB, //iterator on c1
logtable *ltable,
logtree *scratch_tree,
int64_t &npages)
{
int64_t dpages = 0;
//int npages = 0;
int64_t ntuples = 0;
DataPage<datatuple> *dp = 0;
datatuple *t1 = itrA->getnext();
datatuple *t2 = 0;
while( (t2=itrB->getnext()) != 0)
{
DEBUG("tuple\t%lld: keylen %d datalen %d\n",
ntuples, *(t2->keylen),*(t2->datalen) );
while(t1 != 0 && datatuple::compare(t1->key, t2->key) < 0) // t1 is less than t2
{
//insert t1
dp = insertTuple(xid, dp, *t1, ltable, scratch_tree,
ltable->get_dpstate2(),
dpages, npages);
free(t1->keylen);
free(t1);
ntuples++;
//advance itrA
t1 = itrA->getnext();
}
if(t1 != 0 && datatuple::compare(t1->key, t2->key) == 0)
{
datatuple *mtuple = ltable->gettuplemerger()->merge(t1,t2);
//insert merged tuple, drop deletes
if(!mtuple->isDelete())
dp = insertTuple(xid, dp, *mtuple, ltable, scratch_tree, ltable->get_dpstate2(),
dpages, npages);
free(t1->keylen);
free(t1);
t1 = itrA->getnext(); //advance itrA
free(mtuple->keylen);
free(mtuple);
}
else
{
//insert t2
dp = insertTuple(xid, dp, *t2, ltable, scratch_tree, ltable->get_dpstate2(),
dpages, npages);
}
free(t2->keylen);
free(t2);
ntuples++;
}
while(t1 != 0)
{
dp = insertTuple(xid, dp, *t1, ltable, scratch_tree, ltable->get_dpstate2(),
dpages, npages);
free(t1->keylen);
free(t1);
ntuples++;
//advance itrA
t1 = itrA->getnext();
}
if(dp!=NULL)
delete dp;
DEBUG("dpages: %d\tnpages: %d\tntuples: %d\n", dpages, npages, ntuples);
fflush(stdout);
return dpages;
}
inline DataPage<datatuple>*
insertTuple(int xid, DataPage<datatuple> *dp, datatuple &t,
logtable *ltable,
logtree * ltree,
recordid & dpstate,
int64_t &dpages, int64_t &npages)
{
if(dp==0)
{
dp = ltable->insertTuple(xid, t, dpstate, ltree);
dpages++;
}
else if(!dp->append(xid, t))
{
npages += dp->get_page_count();
delete dp;
dp = ltable->insertTuple(xid, t, dpstate, ltree);
dpages++;
}
return dp;
}

127
merger.h Normal file
View file

@ -0,0 +1,127 @@
#ifndef _MERGER_H_
#define _MERGER_H_
#include <vector>
#include <utility>
#include "logstore.h"
#include "logiterators.h"
typedef std::set<datatuple, datatuple> rbtree_t;
typedef rbtree_t* rbtree_ptr_t;
//TODO: 400 bytes overhead per tuple, this is nuts, check if this is true...
static const int RB_TREE_OVERHEAD = 400;
static const int64_t MAX_C0_SIZE = 800 *1024*1024; //max size of c0
static const double MIN_R = 3.0;
//T is either logtree or red-black tree
template <class T>
struct merger_args
{
logtable * ltable;
int worker_id;
//page allocation information
pageid_t(*pageAlloc)(int,void*);
void *pageAllocState;
void *oldAllocState;
pthread_mutex_t * block_ready_mut;
pthread_cond_t * in_block_needed_cond;
bool * in_block_needed;
pthread_cond_t * out_block_needed_cond;
bool * out_block_needed;
pthread_cond_t * in_block_ready_cond;
pthread_cond_t * out_block_ready_cond;
bool * still_open;
int64_t * my_tree_size;
int64_t * out_tree_size;
int64_t max_size; //pageid_t
double * r_i;
T ** in_tree;
void * in_tree_allocer;
logtree ** out_tree;
void * out_tree_allocer;
treeIterator<datatuple>::treeIteratorHandle *my_tree;
recordid tree;
};
struct logtable_mergedata
{
//merge threads
pthread_t diskmerge_thread;
pthread_t memmerge_thread;
rwl *header_lock;
pthread_mutex_t * rbtree_mut;
rbtree_ptr_t *old_c0; //in-mem red black tree being merged / to be merged
bool *input_needed; // memmerge-input needed
pthread_cond_t * input_ready_cond;
pthread_cond_t * input_needed_cond;
int64_t * input_size;
//merge args 1
struct merger_args<logtree> *diskmerge_args;
//merge args 2
struct merger_args<rbtree_t> *memmerge_args;
};
class merge_scheduler
{
std::vector<std::pair<logtable *, logtable_mergedata*> > mergedata;
public:
//static pageid_t C0_MEM_SIZE;
~merge_scheduler();
int addlogtable(logtable * ltable);
void startlogtable(int index);
struct logtable_mergedata *getMergeData(int index){return mergedata[index].second;}
void shutdown();
};
void* memMergeThread(void* arg);
//merges and returns the number of data pages used
int64_t merge_iterators(int xid,
treeIterator<datatuple> *itrA,
memTreeIterator<rbtree_t, datatuple> * itrB,
logtable *ltable,
logtree *scratch_tree,
int64_t &npages);
int64_t merge_iterators(int xid,
treeIterator<datatuple> *itrA,
treeIterator<datatuple> *itrB,
logtable *ltable,
logtree *scratch_tree,
int64_t &npages);
void* diskMergeThread(void* arg);
#endif

84
tuplemerger.cpp Normal file
View file

@ -0,0 +1,84 @@
#include "tuplemerger.h"
#include "logstore.h"
datatuple* tuplemerger::merge(datatuple *t1, datatuple *t2)
{
assert(!t1->isDelete() || !t2->isDelete()); //both cannot be delete
datatuple *t;
if(t1->isDelete()) //delete - t2
{
t = datatuple::from_bytes(t2->to_bytes());
}
else if(t2->isDelete())
{
t = datatuple::from_bytes(t2->to_bytes());
}
else //neither is a delete
{
t = (*merge_fp)(t1,t2);
}
return t;
}
/**
* appends the data in t2 to data from t1
*
* deletes are handled by the tuplemerger::merge function
* so here neither t1 nor t2 is a delete datatuple
**/
datatuple* append_merger(datatuple *t1, datatuple *t2)
{
static const size_t isize = sizeof(uint32_t);
struct datatuple *t = (datatuple*) malloc(sizeof(datatuple));
byte *arr = (byte*)malloc(t1->byte_length() + *t2->datalen);
t->keylen = (uint32_t*) arr;
*(t->keylen) = *(t1->keylen);
t->datalen = (uint32_t*) (arr+isize);
*(t->datalen) = *(t1->datalen) + *(t2->datalen);
t->key = (datatuple::key_t) (arr+isize+isize);
memcpy((byte*)t->key, (byte*)t1->key, *(t1->keylen));
t->data = (datatuple::data_t) (arr+isize+isize+ *(t1->keylen));
memcpy((byte*)t->data, (byte*)t1->data, *(t1->datalen));
memcpy(((byte*)t->data) + *(t1->datalen), (byte*)t2->data, *(t2->datalen));
return t;
}
/**
* replaces the data with data from t2
*
* deletes are handled by the tuplemerger::merge function
* so here neither t1 nor t2 is a delete datatuple
**/
datatuple* replace_merger(datatuple *t1, datatuple *t2)
{
static const size_t isize = sizeof(uint32_t);
struct datatuple *t = (datatuple*) malloc(sizeof(datatuple));
byte *arr = (byte*)malloc(t2->byte_length());
t->keylen = (uint32_t*) arr;
*(t->keylen) = *(t2->keylen);
t->datalen = (uint32_t*) (arr+isize);
*(t->datalen) = *(t2->datalen);
t->key = (datatuple::key_t) (arr+isize+isize);
memcpy((byte*)t->key, (byte*)t2->key, *(t2->keylen));
t->data = (datatuple::data_t) (arr+isize+isize+ *(t2->keylen));
memcpy((byte*)t->data, (byte*)t2->data, *(t2->datalen));
return t;
}

34
tuplemerger.h Normal file
View file

@ -0,0 +1,34 @@
#ifndef _TUPLE_MERGER_H_
#define _TUPLE_MERGER_H_
struct datatuple;
typedef datatuple* (*merge_fn_t) (datatuple*, datatuple *);
datatuple* append_merger(datatuple *t1, datatuple *t2);
datatuple* replace_merger(datatuple *t1, datatuple *t2);
class tuplemerger
{
public:
tuplemerger(merge_fn_t merge_fp)
{
this->merge_fp = merge_fp;
}
datatuple* merge(datatuple *t1, datatuple *t2);
private:
merge_fn_t merge_fp;
};
#endif