Preliminary force_range implementation; lsmTable leaks significantly less space now.
This commit is contained in:
parent
95d7d3cd5f
commit
87a70f29bc
18 changed files with 285 additions and 23 deletions
|
@ -209,7 +209,7 @@ AC_FUNC_MEMCMP
|
|||
AC_FUNC_REALLOC
|
||||
AC_FUNC_STAT
|
||||
AC_FUNC_VPRINTF
|
||||
AC_CHECK_FUNCS([bzero fdatasync ftruncate getcwd gettimeofday inet_ntoa localtime_r memmove memset mkdir posix_memalign pow powl socket sqrt strchr strdup strerror strrchr strstr strtol strtoul tcase_set_timeout])
|
||||
AC_CHECK_FUNCS([bzero fdatasync floor ftruncate getcwd gettimeofday inet_ntoa localtime_r memmove memset mkdir posix_memalign pow powl socket sqrt strchr strdup strerror strrchr strstr strtol strtoul sync_file_range tcase_set_timeout])
|
||||
|
||||
#AC_CONFIG_LIBMYSQLD
|
||||
|
||||
|
|
|
@ -120,6 +120,7 @@ static int bufManBufInit() {
|
|||
loadPageImpl = bufManLoadPage;
|
||||
writeBackPage = pageWrite;
|
||||
forcePages = forcePageFile;
|
||||
forcePageRange = forceRangePageFile;
|
||||
bufDeinit = bufManBufDeinit;
|
||||
simulateBufferManagerCrash = bufManSimulateBufferManagerCrash;
|
||||
|
||||
|
@ -497,6 +498,7 @@ Page * (*loadPageImpl)(int xid, int pageid) = 0;
|
|||
void (*releasePageImpl)(Page * p) = 0;
|
||||
void (*writeBackPage)(Page * p) = 0;
|
||||
void (*forcePages)() = 0;
|
||||
void (*forcePageRange)() = 0;
|
||||
void (*bufDeinit)() = 0;
|
||||
void (*simulateBufferManagerCrash)() = 0;
|
||||
|
||||
|
|
|
@ -267,6 +267,9 @@ static void bhWriteBackPage(Page * p) {
|
|||
static void bhForcePages() {
|
||||
forcePageFile();
|
||||
}
|
||||
static void bhForcePageRange() {
|
||||
forceRangePageFile();
|
||||
}
|
||||
static void bhBufDeinit() {
|
||||
running = 0;
|
||||
|
||||
|
@ -325,6 +328,7 @@ void bhBufInit() {
|
|||
releasePageImpl = bhReleasePage;
|
||||
writeBackPage = bhWriteBackPage;
|
||||
forcePages = bhForcePages;
|
||||
forcePageRange = bhForcePageRange;
|
||||
bufDeinit = bhBufDeinit;
|
||||
simulateBufferManagerCrash = bhSimulateBufferManagerCrash;
|
||||
|
||||
|
|
|
@ -152,6 +152,13 @@ static int debug_force(stasis_handle_t *h) {
|
|||
printf("tid=%9ld retn force(%lx) = %d\n", pthread_self(), (unsigned long)hh, ret); fflush(stdout);
|
||||
return ret;
|
||||
}
|
||||
static int debug_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) {
|
||||
stasis_handle_t * hh = ((debug_impl*)h->impl)->h;
|
||||
printf("tid=%9ld call force(%lx,%lld,%lld)\n", pthread_self(), (unsigned long)hh, start, stop); fflush(stdout);
|
||||
int ret = hh->force_range(hh, start, stop);
|
||||
printf("tid=%9ld retn force(%lx) = %d\n", pthread_self(), (unsigned long)hh, ret); fflush(stdout);
|
||||
return ret;
|
||||
}
|
||||
static int debug_truncate_start(stasis_handle_t * h, lsn_t new_start) {
|
||||
stasis_handle_t * hh = ((debug_impl*)h->impl)->h;
|
||||
printf("tid=%9ld call truncate_start(%lx, %lld)\n", pthread_self(), (unsigned long)hh, new_start); fflush(stdout);
|
||||
|
@ -175,6 +182,7 @@ struct stasis_handle_t debug_func = {
|
|||
.read_buffer = debug_read_buffer,
|
||||
.release_read_buffer = debug_release_read_buffer,
|
||||
.force = debug_force,
|
||||
.force_range = debug_force_range,
|
||||
.truncate_start = debug_truncate_start,
|
||||
.error = 0
|
||||
};
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#include <config.h>
|
||||
#ifdef HAVE_SYNC_FILE_RANGE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#include <stasis/common.h>
|
||||
#include <stasis/io/handle.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -11,7 +15,6 @@
|
|||
#include <assert.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
/** @file */
|
||||
|
||||
|
@ -39,6 +42,7 @@ static int updateEOF(stasis_handle_t * h) {
|
|||
static int file_num_copies(stasis_handle_t * h) { return 0; }
|
||||
static int file_num_copies_buffer(stasis_handle_t * h) { return 0; }
|
||||
static int file_force(stasis_handle_t *h);
|
||||
static int file_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop);
|
||||
|
||||
static int file_close(stasis_handle_t * h) {
|
||||
file_force(h);
|
||||
|
@ -415,7 +419,17 @@ static int file_force(stasis_handle_t * h) {
|
|||
pthread_mutex_lock(&impl->mut); // must latch because of truncate... :(
|
||||
int fd = impl->fd;
|
||||
pthread_mutex_unlock(&impl->mut);
|
||||
|
||||
{
|
||||
static int warned = 0;
|
||||
if(!warned) {
|
||||
printf("Warning: There is a race condition between force() and "
|
||||
" truncate() in file.c (This shouldn't matter in practice, "
|
||||
"as the logger hasn't moved over to use file.c yet.\n");
|
||||
warned = 1;
|
||||
}
|
||||
}
|
||||
// XXX there is a race here; the file handle could have been invalidated
|
||||
// by truncate.
|
||||
#ifdef HAVE_FDATASYNC
|
||||
DEBUG("file_force() is calling fdatasync()\n");
|
||||
fdatasync(fd);
|
||||
|
@ -428,7 +442,52 @@ static int file_force(stasis_handle_t * h) {
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int file_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) {
|
||||
file_impl * impl = h->impl;
|
||||
int ret = 0;
|
||||
if(!impl->file_flags & O_SYNC) {
|
||||
// not opened synchronously; we need to explicitly sync.
|
||||
pthread_mutex_lock(&impl->mut);
|
||||
int fd = impl->fd;
|
||||
lsn_t off = impl->start_pos;
|
||||
pthread_mutex_unlock(&impl->mut);
|
||||
{
|
||||
static int warned = 0;
|
||||
if(!warned) {
|
||||
printf("Warning: There is a race condition between force_range() and "
|
||||
" truncate() in file.c (This shouldn't matter in practice, "
|
||||
"as the logger hasn't moved over to use file.c yet.\n");
|
||||
warned = 1;
|
||||
}
|
||||
}
|
||||
//#ifdef HAVE_F_SYNC_RANGE
|
||||
#ifdef HAVE_SYNC_FILE_RANGE
|
||||
printf("Calling sync_file_range\n");
|
||||
ret = sync_file_range(fd, start-off, (stop-start),
|
||||
SYNC_FILE_RANGE_WAIT_BEFORE |
|
||||
SYNC_FILE_RANGE_WRITE |
|
||||
SYNC_FILE_RANGE_WAIT_AFTER);
|
||||
if(ret) {
|
||||
int error = errno;
|
||||
assert(ret == -1);
|
||||
// With the possible exceptions of ENOMEM and ENOSPACE, all of the sync
|
||||
// errors are unrecoverable.
|
||||
h->error = EBADF;
|
||||
ret = error;
|
||||
}
|
||||
#else
|
||||
#ifdef HAVE_FDATASYNC
|
||||
printf("file_force_range() is calling fdatasync()\n");
|
||||
fdatasync(fd);
|
||||
#else
|
||||
printf("file_force_range() is calling fsync()\n");
|
||||
fsync(fd);
|
||||
#endif
|
||||
ret = 0;
|
||||
#endif
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
static int file_truncate_start(stasis_handle_t * h, lsn_t new_start) {
|
||||
file_impl * impl = h->impl;
|
||||
pthread_mutex_lock(&impl->mut);
|
||||
|
@ -508,6 +567,7 @@ struct stasis_handle_t file_func = {
|
|||
.read_buffer = file_read_buffer,
|
||||
.release_read_buffer = file_release_read_buffer,
|
||||
.force = file_force,
|
||||
.force_range = file_force_range,
|
||||
.truncate_start = file_truncate_start,
|
||||
.error = 0
|
||||
};
|
||||
|
|
|
@ -215,6 +215,9 @@ static int mem_read(stasis_handle_t * h,
|
|||
static int mem_force(stasis_handle_t *h) {
|
||||
return 0;
|
||||
}
|
||||
static int mem_force_range(stasis_handle_t *h,lsn_t start, lsn_t stop) {
|
||||
return 0;
|
||||
}
|
||||
static int mem_truncate_start(stasis_handle_t * h, lsn_t new_start) {
|
||||
mem_impl* impl = (mem_impl*) h->impl;
|
||||
pthread_mutex_lock(&(impl->mut));
|
||||
|
@ -255,6 +258,7 @@ struct stasis_handle_t mem_func = {
|
|||
.read_buffer = mem_read_buffer,
|
||||
.release_read_buffer = mem_release_read_buffer,
|
||||
.force = mem_force,
|
||||
.force_range = mem_force_range,
|
||||
.truncate_start = mem_truncate_start,
|
||||
.error = 0
|
||||
};
|
||||
|
|
|
@ -524,12 +524,16 @@ static int nbw_read(stasis_handle_t * h,
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
static int nbw_force(stasis_handle_t * h) {
|
||||
static int nbw_force_range_impl(stasis_handle_t * h, lsn_t start, lsn_t stop) {
|
||||
nbw_impl * impl = h->impl;
|
||||
pthread_mutex_lock(&impl->mut);
|
||||
const tree_node * n = RB_ENTRY(min)(impl->fast_handles);
|
||||
// pthread_mutex_lock(&impl->mut);
|
||||
tree_node scratch;
|
||||
scratch.start_pos = start;
|
||||
scratch.end_pos = start+1;
|
||||
const tree_node * n = RB_ENTRY(lookup)(RB_LUGTEQ,&scratch,impl->fast_handles); // min)(impl->fast_handles);
|
||||
int blocked = 0;
|
||||
while(n) {
|
||||
if(n->start_pos >= stop) { break; }
|
||||
if(n->dirty) {
|
||||
// cast strips const
|
||||
((tree_node*)n)->dirty = NEEDS_FORCE;
|
||||
|
@ -553,15 +557,31 @@ static int nbw_force(stasis_handle_t * h) {
|
|||
if(impl->slow_force_once) {
|
||||
if(impl->all_slow_handle_count) {
|
||||
stasis_handle_t * h = impl->all_slow_handles[0];
|
||||
ret = h->force(h);
|
||||
ret = h->force_range(h, start, stop);
|
||||
}
|
||||
} else {
|
||||
for(int i = 0; i < impl->all_slow_handle_count; i++) {
|
||||
stasis_handle_t * h = impl->all_slow_handles[i];
|
||||
int tmpret = h->force(h);
|
||||
int tmpret = h->force_range(h, start, stop);
|
||||
if(tmpret) { ret = tmpret; }
|
||||
}
|
||||
}
|
||||
// pthread_mutex_unlock(&impl->mut);
|
||||
return ret;
|
||||
}
|
||||
static int nbw_force(stasis_handle_t * h) {
|
||||
nbw_impl * impl = h->impl;
|
||||
pthread_mutex_lock(&impl->mut);
|
||||
int ret = nbw_force_range_impl(h, impl->start_pos, impl->end_pos);
|
||||
pthread_mutex_unlock(&impl->mut);
|
||||
return ret;
|
||||
}
|
||||
static int nbw_force_range(stasis_handle_t * h,
|
||||
off_t start,
|
||||
off_t stop) {
|
||||
nbw_impl * impl = h->impl;
|
||||
pthread_mutex_lock(&impl->mut);
|
||||
int ret = nbw_force_range_impl(h, start, stop);
|
||||
pthread_mutex_unlock(&impl->mut);
|
||||
return ret;
|
||||
}
|
||||
|
@ -596,6 +616,7 @@ struct stasis_handle_t nbw_func = {
|
|||
.read_buffer = nbw_read_buffer,
|
||||
.release_read_buffer = nbw_release_read_buffer,
|
||||
.force = nbw_force,
|
||||
.force_range = nbw_force_range,
|
||||
.truncate_start = nbw_truncate_start,
|
||||
.error = 0
|
||||
};
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
#include <config.h>
|
||||
#define _XOPEN_SOURCE 500
|
||||
#ifdef HAVE_SYNC_FILE_RANGE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
|
@ -378,6 +382,34 @@ static int pfile_force(stasis_handle_t *h) {
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
static int pfile_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) {
|
||||
pfile_impl * impl = h->impl;
|
||||
#ifdef HAVE_SYNC_FILE_RANGE
|
||||
printf("Calling sync_file_range\n");
|
||||
int ret = sync_file_range(impl->fd, start-impl->start_pos, (stop-start),
|
||||
SYNC_FILE_RANGE_WAIT_BEFORE |
|
||||
SYNC_FILE_RANGE_WRITE |
|
||||
SYNC_FILE_RANGE_WAIT_AFTER);
|
||||
if(ret) {
|
||||
int error = errno;
|
||||
assert(ret == -1);
|
||||
// With the possible exceptions of ENOMEM and ENOSPACE, all of the sync
|
||||
// errors are unrecoverable.
|
||||
h->error = EBADF;
|
||||
ret = error;
|
||||
}
|
||||
#else
|
||||
#ifdef HAVE_FDATASYNC
|
||||
printf("file_force_range() is calling fdatasync()\n");
|
||||
fdatasync(fd);
|
||||
#else
|
||||
printf("file_force_range() is calling fsync()\n");
|
||||
fsync(fd);
|
||||
#endif
|
||||
int ret = 0;
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
static int pfile_truncate_start(stasis_handle_t *h, lsn_t new_start) {
|
||||
static int truncate_warned = 0;
|
||||
if (!truncate_warned) {
|
||||
|
@ -403,6 +435,7 @@ struct stasis_handle_t pfile_func = {
|
|||
.read_buffer = pfile_read_buffer,
|
||||
.release_read_buffer = pfile_release_read_buffer,
|
||||
.force = pfile_force,
|
||||
.force_range = pfile_force_range,
|
||||
.truncate_start = pfile_truncate_start,
|
||||
.error = 0
|
||||
};
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <set>
|
||||
#include "lsmIterators.h"
|
||||
#include <stasis/truncation.h>
|
||||
|
||||
namespace rose {
|
||||
/**
|
||||
|
@ -24,6 +25,7 @@ namespace rose {
|
|||
int worker_id;
|
||||
pageid_t(*pageAlloc)(int,void*);
|
||||
void *pageAllocState;
|
||||
void *oldAllocState;
|
||||
pthread_mutex_t * block_ready_mut;
|
||||
pthread_cond_t * in_block_needed_cond;
|
||||
pthread_cond_t * out_block_needed_cond;
|
||||
|
@ -91,6 +93,7 @@ namespace rose {
|
|||
// experiments, but 450 bytes overhead per tuple is insane!
|
||||
static const int RB_TREE_OVERHEAD = 400; // = 450;
|
||||
static const pageid_t MEM_SIZE = 1000 * 1000 * 1000;
|
||||
// static const pageid_t MEM_SIZE = 100 * 1000;
|
||||
// How many pages should we try to fill with the first C1 merge?
|
||||
static const int R = 3; // XXX set this as low as possible (for dynamic setting. = sqrt(C2 size / C0 size))
|
||||
static const pageid_t START_SIZE = MEM_SIZE * R /( PAGE_SIZE * 4); //10 * 1000; /*10 **/ //1000; // XXX 4 is fudge related to RB overhead.
|
||||
|
@ -115,6 +118,8 @@ namespace rose {
|
|||
int xid = Tbegin();
|
||||
// Initialize tree with an empty tree.
|
||||
// XXX hardcodes ITERA's type:
|
||||
// We assume that the caller set pageAllocState for us; oldPageAllocState
|
||||
// shouldn't be set (it should be NULLRID)
|
||||
typename ITERA::handle tree
|
||||
= new typename ITERA::treeIteratorHandle(
|
||||
TlsmCreate(xid, PAGELAYOUT::cmp_id(),a->pageAlloc,
|
||||
|
@ -155,7 +160,17 @@ namespace rose {
|
|||
pthread_mutex_unlock(a->block_ready_mut);
|
||||
|
||||
xid = Tbegin();
|
||||
|
||||
// XXX hardcodes allocator type.
|
||||
if(((recordid*)a->oldAllocState)->size != -1) {
|
||||
// free the tree that we merged against during the last round.
|
||||
TlsmFree(xid,tree->r_,TlsmRegionDeallocRid,a->oldAllocState);
|
||||
}
|
||||
// we're merging against old alloc state this round.
|
||||
*(recordid*)(a->oldAllocState) = *(recordid*)(a->pageAllocState);
|
||||
// we're merging into pagealloc state.
|
||||
*(recordid*)(a->pageAllocState) = Talloc(xid, sizeof(TlsmRegionAllocConf_t));
|
||||
Tset(xid, *(recordid*)(a->pageAllocState),
|
||||
&LSM_REGION_ALLOC_STATIC_INITIALIZER);
|
||||
tree->r_ = TlsmCreate(xid, PAGELAYOUT::cmp_id(),a->pageAlloc,
|
||||
a->pageAllocState,PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||
|
||||
|
@ -189,12 +204,8 @@ namespace rose {
|
|||
delete taEnd;
|
||||
delete tbEnd;
|
||||
|
||||
|
||||
gettimeofday(&stop_tv,0);
|
||||
|
||||
// TlsmFree(wait_queue[0]) /// XXX Need to implement (de)allocation!
|
||||
// TlsmFree(wait_queue[1])
|
||||
|
||||
merge_count++;
|
||||
|
||||
double wait_elapsed = tv_to_double(wait_tv) - tv_to_double(start_tv);
|
||||
|
@ -252,6 +263,17 @@ namespace rose {
|
|||
// We don't want to ever look at the one we just handed upstream...
|
||||
// We could wait for an in tree to be ready, and then pass it directly
|
||||
// to compress data (to avoid all those merging comparisons...)
|
||||
|
||||
// old alloc state contains the tree that we used as input for this merge... we can still free it
|
||||
|
||||
// XXX storage leak; upstream is going to have to free this somehow...
|
||||
*(recordid*)(a->pageAllocState) = NULLRID;
|
||||
|
||||
// create a new allocator.
|
||||
*(recordid*)(a->pageAllocState) = Talloc(xid, sizeof(TlsmRegionAllocConf_t));
|
||||
Tset(xid, *(recordid*)(a->pageAllocState),
|
||||
&LSM_REGION_ALLOC_STATIC_INITIALIZER);
|
||||
|
||||
tree->r_ = TlsmCreate(xid, PAGELAYOUT::cmp_id(),a->pageAlloc,
|
||||
a->pageAllocState,PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||
|
||||
|
@ -272,8 +294,10 @@ namespace rose {
|
|||
typedef struct {
|
||||
recordid bigTree;
|
||||
recordid bigTreeAllocState; // this is probably the head of an arraylist of regions used by the tree...
|
||||
recordid oldBigTreeAllocState; // this is probably the head of an arraylist of regions used by the tree...
|
||||
recordid mediumTree;
|
||||
recordid mediumTreeAllocState;
|
||||
recordid oldMediumTreeAllocState;
|
||||
epoch_t beginning;
|
||||
epoch_t end;
|
||||
} lsmTableHeader_t;
|
||||
|
@ -287,11 +311,13 @@ namespace rose {
|
|||
|
||||
recordid ret = Talloc(xid, sizeof(lsmTableHeader_t));
|
||||
lsmTableHeader_t h;
|
||||
h.oldBigTreeAllocState = NULLRID;
|
||||
h.bigTreeAllocState = Talloc(xid,sizeof(TlsmRegionAllocConf_t));
|
||||
Tset(xid,h.bigTreeAllocState,&LSM_REGION_ALLOC_STATIC_INITIALIZER);
|
||||
h.bigTree = TlsmCreate(xid, PAGELAYOUT::cmp_id(),
|
||||
TlsmRegionAllocRid,&h.bigTreeAllocState,
|
||||
PAGELAYOUT::FMT::TUP::sizeofBytes());
|
||||
h.oldMediumTreeAllocState = NULLRID;
|
||||
h.mediumTreeAllocState = Talloc(xid,sizeof(TlsmRegionAllocConf_t));
|
||||
Tset(xid,h.mediumTreeAllocState,&LSM_REGION_ALLOC_STATIC_INITIALIZER);
|
||||
h.mediumTree = TlsmCreate(xid, PAGELAYOUT::cmp_id(),
|
||||
|
@ -401,6 +427,8 @@ namespace rose {
|
|||
|
||||
recordid * ridp = (recordid*)malloc(sizeof(recordid));
|
||||
*ridp = h.bigTreeAllocState;
|
||||
recordid * oldridp = (recordid*)malloc(sizeof(recordid));
|
||||
*oldridp = NULLRID;
|
||||
|
||||
ret->args1 = (merge_args<PAGELAYOUT,LSM_ITER,LSM_ITER>*)malloc(sizeof(merge_args<PAGELAYOUT,LSM_ITER,LSM_ITER>));
|
||||
merge_args<PAGELAYOUT, LSM_ITER, LSM_ITER> tmpargs1 =
|
||||
|
@ -408,6 +436,7 @@ namespace rose {
|
|||
1,
|
||||
TlsmRegionAllocRid,
|
||||
ridp,
|
||||
oldridp,
|
||||
block_ready_mut,
|
||||
block1_needed_cond,
|
||||
block2_needed_cond,
|
||||
|
@ -427,6 +456,8 @@ namespace rose {
|
|||
|
||||
ridp = (recordid*)malloc(sizeof(recordid));
|
||||
*ridp = h.mediumTreeAllocState;
|
||||
oldridp = (recordid*)malloc(sizeof(recordid));
|
||||
*oldridp = NULLRID;
|
||||
|
||||
ret->args2 = (merge_args<PAGELAYOUT,LSM_ITER,RB_ITER>*)malloc(sizeof(merge_args<PAGELAYOUT,LSM_ITER,RB_ITER>));
|
||||
merge_args<PAGELAYOUT, LSM_ITER, RB_ITER> tmpargs2 =
|
||||
|
@ -434,6 +465,7 @@ namespace rose {
|
|||
2,
|
||||
TlsmRegionAllocRid,
|
||||
ridp,
|
||||
oldridp,
|
||||
block_ready_mut,
|
||||
block0_needed_cond,
|
||||
block1_needed_cond,
|
||||
|
@ -557,16 +589,17 @@ namespace rose {
|
|||
byte * arry = val.toByteArray();
|
||||
|
||||
typename PAGELAYOUT::FMT::TUP * r = 0;
|
||||
if(h->args2->my_tree) {
|
||||
r = getRecordHelper<PAGELAYOUT>(xid, h->args2->my_tree->r_, val, scratch, arry);
|
||||
if(r) { pthread_mutex_unlock(h->mut); return r; }
|
||||
|
||||
}
|
||||
DEBUG("Not in first my_tree {%lld}\n", h->args2->my_tree->r_.size);
|
||||
|
||||
if(*h->args1->in_tree) {
|
||||
r = getRecordHelper<PAGELAYOUT>(xid, (**h->args1->in_tree)->r_, val, scratch, arry);
|
||||
if(r) { pthread_mutex_unlock(h->mut); return r; }
|
||||
} else {
|
||||
DEBUG("no tree");
|
||||
DEBUG("no second in_tree");
|
||||
}
|
||||
|
||||
DEBUG("Not in second in_tree\n");
|
||||
|
|
|
@ -11,19 +11,46 @@ static lsm_comparator_t comparators[MAX_LSM_COMPARATORS];
|
|||
static lsm_page_initializer_t initializers[MAX_LSM_PAGE_INITIALIZERS];
|
||||
|
||||
TlsmRegionAllocConf_t LSM_REGION_ALLOC_STATIC_INITIALIZER =
|
||||
{ -1, -1, 1000 };
|
||||
{ {0,0,-1}, 0, -1, -1, 1000 };
|
||||
|
||||
pageid_t TlsmRegionAlloc(int xid, void *conf) {
|
||||
TlsmRegionAllocConf_t* a = (TlsmRegionAllocConf_t*)conf;
|
||||
if(a->nextPage == a->endOfRegion) {
|
||||
a->nextPage = TregionAlloc(xid, a->regionSize,0);
|
||||
a->endOfRegion = a->nextPage + a->regionSize;
|
||||
if(a->regionList.size == -1) {
|
||||
a->regionList = TarrayListAlloc(xid, 1, 4, sizeof(pageid_t));
|
||||
a->regionCount = 0;
|
||||
}
|
||||
TarrayListExtend(xid,a->regionList,1);
|
||||
a->regionList.slot = a->regionCount;
|
||||
DEBUG("region lst slot %lld\n",a->regionList.slot);
|
||||
a->regionCount++;
|
||||
DEBUG("region count %lld\n",a->regionCount);
|
||||
a->nextPage = TregionAlloc(xid, a->regionSize,0);
|
||||
DEBUG("next page %lld\n",a->nextPage);
|
||||
a->endOfRegion = a->nextPage + a->regionSize;
|
||||
Tset(xid,a->regionList,&a->nextPage);
|
||||
DEBUG("next page %lld\n",a->nextPage);
|
||||
}
|
||||
DEBUG("%lld ?= %lld\n", a->nextPage,a->endOfRegion);
|
||||
pageid_t ret = a->nextPage;
|
||||
DEBUG("ret %lld\n",ret);
|
||||
(a->nextPage)++;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void TlsmRegionDeallocRid(int xid, void *conf) {
|
||||
recordid rid = *(recordid*)conf;
|
||||
TlsmRegionAllocConf_t a;
|
||||
Tread(xid,rid,&a);
|
||||
// TlsmRegionAllocConf_t* a = (TlsmRegionAllocConf_t*)conf;
|
||||
for(int i = 0; i < a.regionCount; i++) {
|
||||
a.regionList.slot = i;
|
||||
pageid_t pid;
|
||||
Tread(xid,a.regionList,&pid);
|
||||
TregionDealloc(xid,pid);
|
||||
}
|
||||
}
|
||||
|
||||
pageid_t TlsmRegionAllocRid(int xid, void * ridp) {
|
||||
recordid rid = *(recordid*)ridp;
|
||||
TlsmRegionAllocConf_t conf;
|
||||
|
@ -575,6 +602,13 @@ recordid TlsmAppendPage(int xid, recordid tree,
|
|||
|
||||
return ret;
|
||||
}
|
||||
void TlsmFree(int xid, recordid tree, lsm_page_deallocator_t dealloc,
|
||||
void *allocator_state) {
|
||||
// Tdealloc(xid,tree);
|
||||
dealloc(xid,allocator_state);
|
||||
// XXX fishy shouldn't caller do this?
|
||||
Tdealloc(xid, *(recordid*)allocator_state);
|
||||
}
|
||||
|
||||
static pageid_t lsmLookup(int xid, Page *node, int depth,
|
||||
const byte *key, size_t keySize, lsm_comparator_t cmp) {
|
||||
|
|
|
@ -124,7 +124,6 @@ For<TYPE>::recordFind(int xid, slot_index_t start, slot_index_t stop,
|
|||
}
|
||||
}
|
||||
}
|
||||
assert(ret); //XXX
|
||||
return ret;
|
||||
}
|
||||
} // namespace rose
|
||||
|
|
|
@ -28,6 +28,7 @@ static int stable = -1;
|
|||
static pthread_mutex_t stable_mutex;
|
||||
static void pfForcePageFile();
|
||||
static void pfClosePageFile();
|
||||
static void pfForceRangePageFile(lsn_t start, lsn_t stop) ;
|
||||
inline static pageid_t myLseekNoLock(int f, pageid_t offset, int whence);
|
||||
|
||||
static int oldOffset = -1;
|
||||
|
@ -134,6 +135,7 @@ void openPageFile() {
|
|||
pageRead = pfPageRead;
|
||||
pageWrite = pfPageWrite;
|
||||
forcePageFile = pfForcePageFile;
|
||||
forceRangePageFile = pfForceRangePageFile;
|
||||
closePageFile = pfClosePageFile;
|
||||
|
||||
DEBUG("Opening storefile.\n");
|
||||
|
@ -170,6 +172,23 @@ static void pfForcePageFile() {
|
|||
}
|
||||
}
|
||||
|
||||
static void pfForceRangePageFile(lsn_t start, lsn_t stop) {
|
||||
if(pageFile_isDurable) {
|
||||
#ifdef HAVE_SYNC_FILE_RANGE
|
||||
int ret = sync_file_range(stable, start, stop,
|
||||
SYNC_FILE_RANGE_WAIT_BEFORE |
|
||||
SYNC_FILE_RANGE_WRITE |
|
||||
SYNC_FILE_RANGE_WAIT_AFTER);
|
||||
assert(!ret);
|
||||
#else
|
||||
#ifdef HAVE_FDATASYNC
|
||||
fdatasync(fd);
|
||||
#else
|
||||
fsync(fd);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
static void pfClosePageFile() {
|
||||
assert(stable != -1);
|
||||
forcePageFile();
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
void (*pageWrite)(Page * dat);
|
||||
void (*pageRead)(Page * ret);
|
||||
void (*forcePageFile)();
|
||||
void (*forceRangePageFile)();
|
||||
void (*closePageFile)();
|
||||
|
||||
int printedForceWarning = 0;
|
||||
|
@ -57,6 +58,10 @@ static void phForce() {
|
|||
int err = h->force(h);
|
||||
assert(!err);
|
||||
}
|
||||
static void phForceRange(lsn_t start, lsn_t stop) {
|
||||
int err = h->force_range(h,start,stop);
|
||||
assert(!err);
|
||||
}
|
||||
static void phClose() {
|
||||
int err = h->close(h);
|
||||
DEBUG("Closing pageHandle\n");
|
||||
|
@ -72,6 +77,7 @@ void pageHandleOpen(stasis_handle_t * handle) {
|
|||
pageWrite = phWrite;
|
||||
pageRead = phRead;
|
||||
forcePageFile = phForce;
|
||||
forceRangePageFile = phForceRange;
|
||||
closePageFile = phClose;
|
||||
h = handle;
|
||||
}
|
||||
|
|
|
@ -106,7 +106,31 @@ static void dirtyPages_flush() {
|
|||
}
|
||||
free(staleDirtyPages);
|
||||
}
|
||||
void dirtyPages_flushRange(pageid_t start, pageid_t stop) {
|
||||
int * staleDirtyPages = malloc(sizeof(int) * (MAX_BUFFER_SIZE));
|
||||
int i;
|
||||
Page * p = 0;
|
||||
pthread_mutex_lock(&dirtyPages_mutex);
|
||||
void *tmp;
|
||||
i = 0;
|
||||
for(tmp = pblHtFirst(dirtyPages); tmp; tmp = pblHtNext(dirtyPages)) {
|
||||
int num = *((int*) pblHtCurrentKey(dirtyPages));
|
||||
if(num <= start && num < stop) {
|
||||
staleDirtyPages[i] = num;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
staleDirtyPages[i] = -1;
|
||||
pthread_mutex_unlock(&dirtyPages_mutex);
|
||||
|
||||
for(i = 0; i < MAX_BUFFER_SIZE && staleDirtyPages[i] != -1; i++) {
|
||||
p = loadPage(-1, staleDirtyPages[i]);
|
||||
writeBackPage(p);
|
||||
releasePage(p);
|
||||
}
|
||||
free(staleDirtyPages);
|
||||
forcePageRange(start,stop); // XXX
|
||||
}
|
||||
void dirtyPagesInit() {
|
||||
dirtyPages = pblHtCreate();
|
||||
}
|
||||
|
|
|
@ -149,7 +149,12 @@ extern void (*writeBackPage)(Page * p);
|
|||
a no-op.
|
||||
*/
|
||||
extern void (*forcePages)();
|
||||
/**
|
||||
Force written back pages that fall within a particular range to disk.
|
||||
|
||||
This does not force page that have not been written to with pageWrite().
|
||||
*/
|
||||
extern void (*forcePageRange)(pageid_t start, pageid_t stop);
|
||||
extern void (*simulateBufferManagerCrash)();
|
||||
|
||||
int bufInit(int type);
|
||||
|
|
|
@ -199,6 +199,7 @@ typedef struct stasis_handle_t {
|
|||
returned) may or may not be forced to disk.
|
||||
*/
|
||||
int (*force)(struct stasis_handle_t * h);
|
||||
int (*force_range)(struct stasis_handle_t * h, lsn_t start, lsn_t stop);
|
||||
/**
|
||||
Truncate bytes from the beginning of the file. This is needed by
|
||||
the log manager.
|
||||
|
|
|
@ -25,6 +25,7 @@ typedef struct {
|
|||
typedef int(*lsm_comparator_t)(const void* a, const void* b);
|
||||
typedef void*(*lsm_page_initializer_t)(Page *, void *);
|
||||
typedef pageid_t(*lsm_page_allocator_t)(int, void *);
|
||||
typedef void(*lsm_page_deallocator_t)(int, void *);
|
||||
|
||||
void lsmTreeRegisterComparator(int id, lsm_comparator_t i);
|
||||
void lsmTreeRegisterPageInitializer(int id, lsm_page_initializer_t i);
|
||||
|
@ -32,10 +33,15 @@ void lsmTreeRegisterPageInitializer(int id, lsm_page_initializer_t i);
|
|||
pageid_t TlsmRegionAlloc(int xid, void *conf);
|
||||
pageid_t TlsmRegionAllocRid(int xid, void *conf);
|
||||
typedef struct {
|
||||
recordid regionList;
|
||||
pageid_t regionCount;
|
||||
pageid_t nextPage;
|
||||
pageid_t endOfRegion;
|
||||
pageid_t regionSize;
|
||||
} TlsmRegionAllocConf_t;
|
||||
|
||||
void TlsmRegionDeallocRid(int xid, void *conf);
|
||||
|
||||
extern TlsmRegionAllocConf_t LSM_REGION_ALLOC_STATIC_INITIALIZER;
|
||||
|
||||
/**
|
||||
|
@ -66,6 +72,8 @@ recordid TlsmAppendPage(int xid, recordid tree,
|
|||
const byte *key,
|
||||
lsm_page_allocator_t allocator, void *allocator_state,
|
||||
long pageid);
|
||||
void TlsmFree(int xid, recordid tree, lsm_page_deallocator_t dealloc,
|
||||
void *allocator_state);
|
||||
/**
|
||||
Lookup a leaf page.
|
||||
|
||||
|
|
|
@ -47,6 +47,7 @@ extern void (*pageRead)(Page * ret);
|
|||
as well...)
|
||||
*/
|
||||
extern void (*forcePageFile)();
|
||||
extern void (*forceRangePageFile)();
|
||||
/**
|
||||
Force the page file to disk, then close it.
|
||||
*/
|
||||
|
|
Loading…
Reference in a new issue