Final commit from google.
This commit is contained in:
parent
5bd2138a8b
commit
5f954eb239
8 changed files with 399 additions and 166 deletions
78
benchmarks/sequentialThroughput.c
Normal file
78
benchmarks/sequentialThroughput.c
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <stasis/transactional.h>
|
||||||
|
#include <stasis/truncation.h>
|
||||||
|
|
||||||
|
/*static stasis_handle_t * memory_factory(lsn_t off, lsn_t len, void * ignored) {
|
||||||
|
stasis_handle_t * h = stasis_handle(open_memory)(off);
|
||||||
|
//h = stasis_handle(open_debug)(h);
|
||||||
|
stasis_write_buffer_t * w = h->append_buffer(h, len);
|
||||||
|
w->h->release_write_buffer(w);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
typedef struct sf_args {
|
||||||
|
char * filename;
|
||||||
|
int openMode;
|
||||||
|
int filePerm;
|
||||||
|
} sf_args;
|
||||||
|
static stasis_handle_t * traditional_file_factory(void * argsP) {
|
||||||
|
sf_args * args = (sf_args*) argsP;
|
||||||
|
stasis_handle_t * h = stasis_handle(open_file)(0, args->filename, args->openMode, args->filePerm);
|
||||||
|
//h = stasis_handle(open_debug)(h);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline long mb_to_page(long mb) {
|
||||||
|
return (mb * 1024 * 1024) / PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
int direct = 0;
|
||||||
|
int legacyBM = 0;
|
||||||
|
int legacyFH = 0;
|
||||||
|
|
||||||
|
long page_count = mb_to_page(100);
|
||||||
|
|
||||||
|
for(int i = 1; i < argc; i++) {
|
||||||
|
if(!strcmp(argv[i], "--direct")) {
|
||||||
|
direct = 1;
|
||||||
|
bufferManagerO_DIRECT = 1;
|
||||||
|
} else if(!strcmp(argv[i], "--deprecatedBM")) {
|
||||||
|
bufferManagerType = BUFFER_MANAGER_DEPRECATED_HASH;
|
||||||
|
legacyBM = 1;
|
||||||
|
} else if(!strcmp(argv[i], "--deprecatedFH")) {
|
||||||
|
bufferManagerFileHandleType = BUFFER_MANAGER_FILE_HANDLE_DEPRECATED;
|
||||||
|
legacyFH = 1;
|
||||||
|
} else if(!strcmp(argv[i], "--pfile")) {
|
||||||
|
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
|
||||||
|
} else if(!strcmp(argv[i], "--file")) {
|
||||||
|
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_FILE;
|
||||||
|
} else if(!strcmp(argv[i], "--mb")) {
|
||||||
|
i++;
|
||||||
|
page_count = mb_to_page(atoll(argv[i]));
|
||||||
|
} else {
|
||||||
|
printf("Unknown argument: %s\n", argv[i]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(legacyFH && direct) {
|
||||||
|
printf("--direct and --deprecatedFH are incompatible with each other\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Tinit();
|
||||||
|
|
||||||
|
for(long i =0; i < page_count; i++) {
|
||||||
|
Page * p = loadPage(-1, i);
|
||||||
|
dirtyPages_add(p);
|
||||||
|
releasePage(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Tdeinit();
|
||||||
|
return 0;
|
||||||
|
}
|
14
libdfa/rw.h
14
libdfa/rw.h
|
@ -22,6 +22,17 @@
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#ifndef __LIBDFA_RW_H
|
#ifndef __LIBDFA_RW_H
|
||||||
#define __LIBDFA_RW_H
|
#define __LIBDFA_RW_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
# define BEGIN_C_DECLS extern "C" {
|
||||||
|
# define END_C_DECLS }
|
||||||
|
#else /* !__cplusplus */
|
||||||
|
# define BEGIN_C_DECLS
|
||||||
|
# define END_C_DECLS
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
pthread_mutex_t *mut;
|
pthread_mutex_t *mut;
|
||||||
int writers;
|
int writers;
|
||||||
|
@ -55,4 +66,7 @@ rwargs *newRWargs (rwl *l, int i, long d);
|
||||||
void *reader (void *args);
|
void *reader (void *args);
|
||||||
void *writer (void *args);
|
void *writer (void *args);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
END_C_DECLS
|
||||||
|
|
||||||
#endif /* rw.h */
|
#endif /* rw.h */
|
||||||
|
|
|
@ -108,7 +108,7 @@ inline static Page * writeBackOnePage() {
|
||||||
assert(old == victim);
|
assert(old == victim);
|
||||||
|
|
||||||
// printf("Write(%ld)\n", (long)victim->id);
|
// printf("Write(%ld)\n", (long)victim->id);
|
||||||
pageWrite(victim);
|
pageWrite(victim); /// XXX pageCleanup and pageFlushed might be heavyweight.
|
||||||
pageCleanup(victim);
|
pageCleanup(victim);
|
||||||
// Make sure that no one mistakenly thinks this is still a live copy.
|
// Make sure that no one mistakenly thinks this is still a live copy.
|
||||||
victim->id = -1;
|
victim->id = -1;
|
||||||
|
|
|
@ -74,8 +74,8 @@ static inline stasis_read_buffer_t * alloc_read_buffer_error(stasis_handle_t *h,
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
/** @return a read buffer indicating a write error has occured */
|
/** @return a read buffer indicating a write error has occured */
|
||||||
static inline stasis_write_buffer_t * alloc_write_buffer_error(stasis_handle_t *h,
|
static inline stasis_write_buffer_t* alloc_write_buffer_error
|
||||||
int error) {
|
(stasis_handle_t *h, int error) {
|
||||||
assert(error);
|
assert(error);
|
||||||
stasis_write_buffer_t * w = malloc(sizeof(stasis_write_buffer_t));
|
stasis_write_buffer_t * w = malloc(sizeof(stasis_write_buffer_t));
|
||||||
w->h = h;
|
w->h = h;
|
||||||
|
@ -267,15 +267,24 @@ static inline const tree_node * allocFastHandle(nbw_impl * impl, lsn_t off,
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EAGER_MERGE
|
#ifdef EAGER_MERGE
|
||||||
// check for a mergable range immediately after the point we're interested in.
|
// check for a mergable range immediately after the point we're
|
||||||
|
// interested in. (DEAD CODE)
|
||||||
tree_node dummy;
|
tree_node dummy;
|
||||||
dummy.start_pos = n->end_pos;
|
dummy.start_pos = n->end_pos;
|
||||||
dummy.end_pos = n->end_pos+1;
|
dummy.end_pos = n->end_pos+1;
|
||||||
|
|
||||||
while((np = (tree_node*)RB_ENTRY(find)(&dummy, impl->fast_handles)) && np->dirty && !np->pin_count && np->write_count + n->write_count < MAX_MERGE) {
|
while((np = (tree_node*)RB_ENTRY(find)(&dummy, impl->fast_handles))
|
||||||
DEBUG("Did post-merge of page %lld-%lld (%d) and %lld-%lld (%d) outstanding = %d\n", n->start_pos/PAGE_SIZE, -1+n->end_pos/PAGE_SIZE, n->write_count, np->start_pos/PAGE_SIZE, -1+np->end_pos/PAGE_SIZE, np->write_count, impl->fast_handle_count);
|
&& np->dirty
|
||||||
|
&& !np->pin_count
|
||||||
|
&& np->write_count + n->write_count < MAX_MERGE) {
|
||||||
|
DEBUG("Did post-merge of page %lld-%lld (%d) and %lld-%lld (%d) "
|
||||||
|
"outstanding = %d\n", n->start_pos/PAGE_SIZE,
|
||||||
|
-1+n->end_pos/PAGE_SIZE, n->write_count, np->start_pos/PAGE_SIZE,
|
||||||
|
-1+np->end_pos/PAGE_SIZE, np->write_count, impl->fast_handle_count);
|
||||||
lsn_t appendLen = np->end_pos - np->start_pos;
|
lsn_t appendLen = np->end_pos - np->start_pos;
|
||||||
stasis_read_buffer_t * r= np->h->read_buffer(np->h,np->start_pos, appendLen);
|
stasis_read_buffer_t * r= np->h->read_buffer(np->h,np->start_pos,
|
||||||
|
appendLen);
|
||||||
|
|
||||||
int ret = n->h->write(n->h,np->start_pos,r->buf, appendLen);
|
int ret = n->h->write(n->h,np->start_pos,r->buf, appendLen);
|
||||||
assert(!ret);
|
assert(!ret);
|
||||||
ret = r->h->release_read_buffer(r);
|
ret = r->h->release_read_buffer(r);
|
||||||
|
@ -293,7 +302,8 @@ static inline const tree_node * allocFastHandle(nbw_impl * impl, lsn_t off,
|
||||||
|
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
static inline const tree_node * findFastHandle(nbw_impl * impl, lsn_t off, lsn_t len) {
|
static inline const tree_node * findFastHandle(nbw_impl * impl, lsn_t off,
|
||||||
|
lsn_t len) {
|
||||||
tree_node * np = allocTreeNode(off, len);
|
tree_node * np = allocTreeNode(off, len);
|
||||||
|
|
||||||
pthread_mutex_lock(&impl->mut);
|
pthread_mutex_lock(&impl->mut);
|
||||||
|
@ -357,7 +367,8 @@ static int nbw_close(stasis_handle_t * h) {
|
||||||
free(impl->workers);
|
free(impl->workers);
|
||||||
|
|
||||||
DEBUG("nbw had %d slow handles\n", impl->slow_handle_count);
|
DEBUG("nbw had %d slow handles\n", impl->slow_handle_count);
|
||||||
DEBUG("fast handles = %d, used buffer = %lld\n", impl->fast_handle_count, impl->used_buffer_size);
|
DEBUG("fast handles = %d, used buffer = %lld\n", impl->fast_handle_count,
|
||||||
|
impl->used_buffer_size);
|
||||||
if(impl->requested_bytes_written < impl->total_bytes_written) {
|
if(impl->requested_bytes_written < impl->total_bytes_written) {
|
||||||
printf("nbw: Problem with write coalescing detected.\n"
|
printf("nbw: Problem with write coalescing detected.\n"
|
||||||
"Client wrote %lld bytes, handle wrote %lld.\n",
|
"Client wrote %lld bytes, handle wrote %lld.\n",
|
||||||
|
@ -480,7 +491,8 @@ static int nbw_release_read_buffer(stasis_read_buffer_t * r) {
|
||||||
const tree_node * n = r_impl->n;
|
const tree_node * n = r_impl->n;
|
||||||
stasis_handle_t * oldHandle = r_impl->r->h;
|
stasis_handle_t * oldHandle = r_impl->r->h;
|
||||||
r_impl->r->h->release_read_buffer(r_impl->r);
|
r_impl->r->h->release_read_buffer(r_impl->r);
|
||||||
// XXX shouldn't need to check for this here; getFastHandle does something similar...
|
// XXX shouldn't need to check for this here; getFastHandle does
|
||||||
|
// something similar...
|
||||||
if(n) {
|
if(n) {
|
||||||
releaseFastHandle(impl, n, 0);
|
releaseFastHandle(impl, n, 0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -633,7 +645,8 @@ static void * nbw_worker(void * handle) {
|
||||||
}
|
}
|
||||||
|
|
||||||
stasis_handle_t * fast2 = np->h;
|
stasis_handle_t * fast2 = np->h;
|
||||||
stasis_read_buffer_t * r2 = fast2->read_buffer(fast2,np->start_pos, np_len);
|
stasis_read_buffer_t * r2 = fast2->read_buffer(fast2,np->start_pos,
|
||||||
|
np_len);
|
||||||
memcpy(buf + buf_off, r2->buf, np_len);
|
memcpy(buf + buf_off, r2->buf, np_len);
|
||||||
buf_off += np_len;
|
buf_off += np_len;
|
||||||
r2->h->release_read_buffer(r2);
|
r2->h->release_read_buffer(r2);
|
||||||
|
@ -647,7 +660,8 @@ static void * nbw_worker(void * handle) {
|
||||||
pthread_mutex_unlock(&impl->mut);
|
pthread_mutex_unlock(&impl->mut);
|
||||||
|
|
||||||
if(len != PAGE_SIZE) {
|
if(len != PAGE_SIZE) {
|
||||||
DEBUG("merged %lld pages at %lld into single write\n", len/PAGE_SIZE, off/PAGE_SIZE);
|
DEBUG("merged %lld pages at %lld into single write\n",
|
||||||
|
len/PAGE_SIZE, off/PAGE_SIZE);
|
||||||
}
|
}
|
||||||
slow->write(slow, off, buf, len);
|
slow->write(slow, off, buf, len);
|
||||||
|
|
||||||
|
@ -660,7 +674,8 @@ static void * nbw_worker(void * handle) {
|
||||||
pthread_mutex_lock(&impl->mut);
|
pthread_mutex_lock(&impl->mut);
|
||||||
node->pin_count--;
|
node->pin_count--;
|
||||||
}
|
}
|
||||||
tree_node *new_node = (tree_node*)RB_ENTRY(lookup)(RB_LUGREAT, node, impl->fast_handles);
|
tree_node *new_node = (tree_node*)RB_ENTRY(lookup)(RB_LUGREAT, node,
|
||||||
|
impl->fast_handles);
|
||||||
if(!node->dirty && !node->pin_count) {
|
if(!node->dirty && !node->pin_count) {
|
||||||
impl->fast_handle_count -= node->write_count;
|
impl->fast_handle_count -= node->write_count;
|
||||||
impl->used_buffer_size -= (node->end_pos - node->start_pos);
|
impl->used_buffer_size -= (node->end_pos - node->start_pos);
|
||||||
|
@ -680,12 +695,11 @@ static void * nbw_worker(void * handle) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
stasis_handle_t * stasis_handle(open_non_blocking)(stasis_handle_t * (*slow_factory)(void * arg),
|
stasis_handle_t * stasis_handle(open_non_blocking)
|
||||||
void * slow_factory_arg,
|
(stasis_handle_t * (*slow_factory)(void * arg), void * slow_factory_arg,
|
||||||
stasis_handle_t * (*fast_factory)(lsn_t, lsn_t, void *),
|
stasis_handle_t * (*fast_factory)(lsn_t, lsn_t, void *),
|
||||||
void * fast_factory_arg,
|
void * fast_factory_arg, int worker_thread_count, lsn_t buffer_size,
|
||||||
int worker_thread_count,
|
int max_fast_handles) {
|
||||||
lsn_t buffer_size, int max_fast_handles) {
|
|
||||||
nbw_impl * impl = malloc(sizeof(nbw_impl));
|
nbw_impl * impl = malloc(sizeof(nbw_impl));
|
||||||
pthread_mutex_init(&impl->mut, 0);
|
pthread_mutex_init(&impl->mut, 0);
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,10 @@
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
|
||||||
#include <stasis/common.h>
|
#include <stasis/common.h>
|
||||||
#include <stasis/io/handle.h>
|
#include <stasis/io/handle.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
@ -22,20 +24,52 @@
|
||||||
@see handle.h
|
@see handle.h
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
Per-handle information for pfile
|
||||||
|
*/
|
||||||
typedef struct pfile_impl {
|
typedef struct pfile_impl {
|
||||||
|
/**
|
||||||
|
This should be held whenever end_pos is accessed.
|
||||||
|
*/
|
||||||
pthread_mutex_t mut;
|
pthread_mutex_t mut;
|
||||||
|
/**
|
||||||
|
The logical offset of the file. Once the file is open, this will
|
||||||
|
never change, as pfile doesn't support truncation.
|
||||||
|
*/
|
||||||
lsn_t start_pos;
|
lsn_t start_pos;
|
||||||
|
/**
|
||||||
|
The logical end of the file.
|
||||||
|
*/
|
||||||
lsn_t end_pos;
|
lsn_t end_pos;
|
||||||
|
/**
|
||||||
|
File descriptor
|
||||||
|
*/
|
||||||
int fd;
|
int fd;
|
||||||
|
/**
|
||||||
|
Flags passed into open
|
||||||
|
*/
|
||||||
int file_flags;
|
int file_flags;
|
||||||
|
/**
|
||||||
|
File creation mode
|
||||||
|
*/
|
||||||
int file_mode;
|
int file_mode;
|
||||||
|
/**
|
||||||
|
The name of the underlying file.
|
||||||
|
*/
|
||||||
char *filename;
|
char *filename;
|
||||||
} pfile_impl;
|
} pfile_impl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
We can pass the caller's buffer directly into pread()/pwrite()
|
||||||
|
without making any copies.
|
||||||
|
*/
|
||||||
static int pfile_num_copies(stasis_handle_t *h) { return 0; }
|
static int pfile_num_copies(stasis_handle_t *h) { return 0; }
|
||||||
|
/**
|
||||||
|
We have to call malloc(), but not memcpy(). Maybe this should return 1.
|
||||||
|
*/
|
||||||
static int pfile_num_copies_buffer(stasis_handle_t *h) { return 0; }
|
static int pfile_num_copies_buffer(stasis_handle_t *h) { return 0; }
|
||||||
static int pfile_close(stasis_handle_t *h) {
|
|
||||||
|
|
||||||
|
static int pfile_close(stasis_handle_t *h) {
|
||||||
pfile_impl *impl = (pfile_impl*)h->impl;
|
pfile_impl *impl = (pfile_impl*)h->impl;
|
||||||
DEBUG("Closing pfile: end = %lld\n", impl->end_pos);
|
DEBUG("Closing pfile: end = %lld\n", impl->end_pos);
|
||||||
int fd = impl->fd;
|
int fd = impl->fd;
|
||||||
|
@ -46,10 +80,12 @@ static int pfile_close(stasis_handle_t *h) {
|
||||||
if (!ret) return 0;
|
if (!ret) return 0;
|
||||||
else return errno;
|
else return errno;
|
||||||
}
|
}
|
||||||
|
|
||||||
static lsn_t pfile_start_position(stasis_handle_t *h) {
|
static lsn_t pfile_start_position(stasis_handle_t *h) {
|
||||||
pfile_impl *impl = (pfile_impl*)h->impl;
|
pfile_impl *impl = (pfile_impl*)h->impl;
|
||||||
return impl->start_pos;
|
return impl->start_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
static lsn_t pfile_end_position(stasis_handle_t *h) {
|
static lsn_t pfile_end_position(stasis_handle_t *h) {
|
||||||
pfile_impl *impl = (pfile_impl*)h->impl;
|
pfile_impl *impl = (pfile_impl*)h->impl;
|
||||||
pthread_mutex_lock(&impl->mut);
|
pthread_mutex_lock(&impl->mut);
|
||||||
|
@ -57,11 +93,10 @@ static lsn_t pfile_end_position(stasis_handle_t *h) {
|
||||||
pthread_mutex_unlock(&impl->mut);
|
pthread_mutex_unlock(&impl->mut);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static int pfile_write_unlocked(int fd, lsn_t off, const byte *dat,
|
inline static int pfile_write_unlocked(int fd, lsn_t off, const byte *dat,
|
||||||
lsn_t len) {
|
lsn_t len) {
|
||||||
|
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
ssize_t bytes_written = 0;
|
ssize_t bytes_written = 0;
|
||||||
|
|
||||||
while (bytes_written < len) {
|
while (bytes_written < len) {
|
||||||
|
@ -92,13 +127,23 @@ inline static int pfile_write_unlocked(int fd, lsn_t off, const byte * dat,
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pfile_read(stasis_handle_t *h, lsn_t off, byte *buf, lsn_t len) {
|
static int pfile_read(stasis_handle_t *h, lsn_t off, byte *buf, lsn_t len) {
|
||||||
pfile_impl *impl = (pfile_impl*)(h->impl);
|
pfile_impl *impl = (pfile_impl*)(h->impl);
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
|
// reading impl->end_pos is probably atomic on most hardware, but
|
||||||
|
// this is safer.
|
||||||
|
pthread_mutex_lock(&impl->mut);
|
||||||
|
lsn_t end_pos = impl->end_pos;
|
||||||
|
pthread_mutex_unlock(&impl->mut);
|
||||||
|
|
||||||
|
// checking end_pos is adequate, (we assume this is the only handle
|
||||||
|
// touching the file)
|
||||||
|
|
||||||
if (off < impl->start_pos) {
|
if (off < impl->start_pos) {
|
||||||
error = EDOM;
|
error = EDOM;
|
||||||
} else if(off + len > impl->end_pos) {
|
} else if (off + len > end_pos) {
|
||||||
error = EDOM;
|
error = EDOM;
|
||||||
} else {
|
} else {
|
||||||
ssize_t bytes_read = 0;
|
ssize_t bytes_read = 0;
|
||||||
|
@ -115,6 +160,10 @@ static int pfile_read(stasis_handle_t * h, lsn_t off, byte * buf, lsn_t len) {
|
||||||
if (errno == EBADF) {
|
if (errno == EBADF) {
|
||||||
h->error = EBADF;
|
h->error = EBADF;
|
||||||
} else {
|
} else {
|
||||||
|
int err = errno;
|
||||||
|
// XXX Why is sys_errlist[] is unavailable here?
|
||||||
|
perror("pfile_read encountered an unknown error code.");
|
||||||
|
fprintf(stderr, "pread() returned -1; errno is %d\n",err);
|
||||||
abort(); // XXX other errors?
|
abort(); // XXX other errors?
|
||||||
}
|
}
|
||||||
error = errno;
|
error = errno;
|
||||||
|
@ -128,10 +177,11 @@ static int pfile_read(stasis_handle_t * h, lsn_t off, byte * buf, lsn_t len) {
|
||||||
}
|
}
|
||||||
assert(bytes_read == len);
|
assert(bytes_read == len);
|
||||||
}
|
}
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
static int pfile_write(stasis_handle_t *h, lsn_t off, const byte *dat, lsn_t len) {
|
|
||||||
|
static int pfile_write(stasis_handle_t *h, lsn_t off, const byte *dat,
|
||||||
|
lsn_t len) {
|
||||||
pfile_impl *impl = (pfile_impl*)(h->impl);
|
pfile_impl *impl = (pfile_impl*)(h->impl);
|
||||||
int error = 0;
|
int error = 0;
|
||||||
lsn_t phys_off;
|
lsn_t phys_off;
|
||||||
|
@ -140,6 +190,9 @@ static int pfile_write(stasis_handle_t *h, lsn_t off, const byte *dat, lsn_t len
|
||||||
} else {
|
} else {
|
||||||
pthread_mutex_lock(&impl->mut);
|
pthread_mutex_lock(&impl->mut);
|
||||||
if (impl->end_pos < off+len) {
|
if (impl->end_pos < off+len) {
|
||||||
|
// update end_pos now; the caller is not allowed to look at this
|
||||||
|
// part of the file until we return, so if they notice that the
|
||||||
|
// file hasn't been extended yet, it's a bug on their end.
|
||||||
impl->end_pos = off+len;
|
impl->end_pos = off+len;
|
||||||
}
|
}
|
||||||
pthread_mutex_unlock(&impl->mut);
|
pthread_mutex_unlock(&impl->mut);
|
||||||
|
@ -148,19 +201,20 @@ static int pfile_write(stasis_handle_t *h, lsn_t off, const byte *dat, lsn_t len
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
static int pfile_append(stasis_handle_t *h, lsn_t *off, const byte *dat, lsn_t len) {
|
|
||||||
|
static int pfile_append(stasis_handle_t *h, lsn_t *off, const byte *dat,
|
||||||
|
lsn_t len) {
|
||||||
pfile_impl *impl = (pfile_impl*)(h->impl);
|
pfile_impl *impl = (pfile_impl*)(h->impl);
|
||||||
pthread_mutex_lock(&impl->mut);
|
pthread_mutex_lock(&impl->mut);
|
||||||
*off = impl->end_pos;
|
*off = impl->end_pos;
|
||||||
impl->end_pos += len;
|
impl->end_pos += len;
|
||||||
pthread_mutex_unlock(&impl->mut);
|
pthread_mutex_unlock(&impl->mut);
|
||||||
lsn_t phys_off = *off - impl->start_pos;
|
lsn_t phys_off = *off - impl->start_pos;
|
||||||
int error = pfile_write_unlocked(impl->fd, phys_off, dat,len);
|
return pfile_write_unlocked(impl->fd, phys_off, dat,len);
|
||||||
return error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static stasis_write_buffer_t * pfile_write_buffer(stasis_handle_t *h,
|
static stasis_write_buffer_t * pfile_write_buffer(stasis_handle_t *h,
|
||||||
lsn_t off, lsn_t len) {
|
lsn_t off, lsn_t len) {
|
||||||
|
|
||||||
stasis_write_buffer_t *ret = malloc(sizeof(stasis_write_buffer_t));
|
stasis_write_buffer_t *ret = malloc(sizeof(stasis_write_buffer_t));
|
||||||
|
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
|
@ -178,6 +232,7 @@ static stasis_write_buffer_t * pfile_write_buffer(stasis_handle_t * h,
|
||||||
|
|
||||||
pthread_mutex_lock(&impl->mut);
|
pthread_mutex_lock(&impl->mut);
|
||||||
|
|
||||||
|
// @todo Come up with a reasonable way to avoid sparse files.
|
||||||
if (off + len > impl->end_pos) {
|
if (off + len > impl->end_pos) {
|
||||||
impl->end_pos = off+len;
|
impl->end_pos = off+len;
|
||||||
}
|
}
|
||||||
|
@ -205,9 +260,9 @@ static stasis_write_buffer_t * pfile_write_buffer(stasis_handle_t * h,
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static stasis_write_buffer_t *pfile_append_buffer(stasis_handle_t *h,
|
static stasis_write_buffer_t *pfile_append_buffer(stasis_handle_t *h,
|
||||||
lsn_t len) {
|
lsn_t len) {
|
||||||
|
|
||||||
// Allocate the handle
|
// Allocate the handle
|
||||||
stasis_write_buffer_t *ret = malloc(sizeof(stasis_write_buffer_t));
|
stasis_write_buffer_t *ret = malloc(sizeof(stasis_write_buffer_t));
|
||||||
if (!ret) { return NULL; }
|
if (!ret) { return NULL; }
|
||||||
|
@ -244,10 +299,9 @@ static stasis_write_buffer_t *pfile_append_buffer(stasis_handle_t * h,
|
||||||
ret->error = 0;
|
ret->error = 0;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
}
|
}
|
||||||
static int pfile_release_write_buffer(stasis_write_buffer_t * w) {
|
|
||||||
|
|
||||||
|
static int pfile_release_write_buffer(stasis_write_buffer_t *w) {
|
||||||
pfile_impl *impl = (pfile_impl*)(w->h->impl);
|
pfile_impl *impl = (pfile_impl*)(w->h->impl);
|
||||||
|
|
||||||
pthread_mutex_lock(&(impl->mut));
|
pthread_mutex_lock(&(impl->mut));
|
||||||
|
@ -259,9 +313,12 @@ static int pfile_release_write_buffer(stasis_write_buffer_t * w) {
|
||||||
pthread_mutex_unlock(&(impl->mut));
|
pthread_mutex_unlock(&(impl->mut));
|
||||||
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
error = pfile_write_unlocked(impl->fd, w->off-impl->start_pos, w->buf, w->len);
|
error = pfile_write_unlocked(impl->fd, w->off-impl->start_pos, w->buf,
|
||||||
|
w->len);
|
||||||
}
|
}
|
||||||
|
if (w->buf) {
|
||||||
free(w->buf);
|
free(w->buf);
|
||||||
|
}
|
||||||
free(w);
|
free(w);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
@ -298,19 +355,25 @@ static stasis_read_buffer_t * pfile_read_buffer(stasis_handle_t * h,
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pfile_release_read_buffer(stasis_read_buffer_t *r) {
|
static int pfile_release_read_buffer(stasis_read_buffer_t *r) {
|
||||||
|
if (r->buf) {
|
||||||
free((void*)r->buf);
|
free((void*)r->buf);
|
||||||
|
}
|
||||||
free(r);
|
free(r);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pfile_truncate_start(stasis_handle_t *h, lsn_t new_start) {
|
static int pfile_truncate_start(stasis_handle_t *h, lsn_t new_start) {
|
||||||
static int truncate_warned = 0;
|
static int truncate_warned = 0;
|
||||||
if (!truncate_warned) {
|
if (!truncate_warned) {
|
||||||
printf("\nWarning: pfile doesn't support truncation; ignoring truncation request\n");
|
printf("\nWarning: pfile doesn't support truncation; "
|
||||||
|
"ignoring truncation request\n");
|
||||||
truncate_warned = 1;
|
truncate_warned = 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stasis_handle_t pfile_func = {
|
struct stasis_handle_t pfile_func = {
|
||||||
.num_copies = pfile_num_copies,
|
.num_copies = pfile_num_copies,
|
||||||
.num_copies_buffer = pfile_num_copies_buffer,
|
.num_copies_buffer = pfile_num_copies_buffer,
|
||||||
|
@ -328,12 +391,17 @@ struct stasis_handle_t pfile_func = {
|
||||||
.truncate_start = pfile_truncate_start,
|
.truncate_start = pfile_truncate_start,
|
||||||
.error = 0
|
.error = 0
|
||||||
};
|
};
|
||||||
stasis_handle_t * stasis_handle(open_pfile)(lsn_t start_offset, char * filename, int flags, int mode) {
|
|
||||||
|
stasis_handle_t *stasis_handle(open_pfile)(lsn_t start_offset,
|
||||||
|
char *filename,
|
||||||
|
int flags, int mode) {
|
||||||
stasis_handle_t *ret = malloc(sizeof(stasis_handle_t));
|
stasis_handle_t *ret = malloc(sizeof(stasis_handle_t));
|
||||||
if (!ret) { return NULL; }
|
if (!ret) { return NULL; }
|
||||||
*ret = pfile_func;
|
*ret = pfile_func;
|
||||||
|
|
||||||
pfile_impl *impl = malloc(sizeof(pfile_impl));
|
pfile_impl *impl = malloc(sizeof(pfile_impl));
|
||||||
|
if (!impl) { free(ret); return NULL; }
|
||||||
|
|
||||||
ret->impl = impl;
|
ret->impl = impl;
|
||||||
pthread_mutex_init(&(impl->mut), 0);
|
pthread_mutex_init(&(impl->mut), 0);
|
||||||
impl->fd = open(filename, flags, mode);
|
impl->fd = open(filename, flags, mode);
|
||||||
|
@ -349,7 +417,8 @@ stasis_handle_t * stasis_handle(open_pfile)(lsn_t start_offset, char * filename,
|
||||||
ret->error = errno;
|
ret->error = errno;
|
||||||
}
|
}
|
||||||
impl->end_pos = file_len + start_offset;
|
impl->end_pos = file_len + start_offset;
|
||||||
DEBUG("file len = %lld, start_off = %lld, end = %lld\n", file_len, start_offset, impl->end_pos);
|
DEBUG("file len = %lld, start_off = %lld, end = %lld\n",
|
||||||
|
file_len, start_offset, impl->end_pos);
|
||||||
|
|
||||||
impl->filename = strdup(filename);
|
impl->filename = strdup(filename);
|
||||||
impl->file_flags = flags;
|
impl->file_flags = flags;
|
||||||
|
|
|
@ -450,8 +450,10 @@ recordid TlsmAppendPage(int xid, recordid tree,
|
||||||
|
|
||||||
if(ret.size == INVALID_SLOT) {
|
if(ret.size == INVALID_SLOT) {
|
||||||
if(lastLeaf->id != p->id) {
|
if(lastLeaf->id != p->id) {
|
||||||
|
assert(s->lastLeaf != tree.page);
|
||||||
unlock(lastLeaf->rwlatch);
|
unlock(lastLeaf->rwlatch);
|
||||||
releasePage(lastLeaf); // don't need that page anymore...
|
releasePage(lastLeaf); // don't need that page anymore...
|
||||||
|
lastLeaf = 0;
|
||||||
}
|
}
|
||||||
// traverse down the root of the tree.
|
// traverse down the root of the tree.
|
||||||
|
|
||||||
|
@ -537,6 +539,7 @@ recordid TlsmAppendPage(int xid, recordid tree,
|
||||||
writeNodeRecord(xid, lastLeaf, ret.slot, key, keySize, val_page);
|
writeNodeRecord(xid, lastLeaf, ret.slot, key, keySize, val_page);
|
||||||
|
|
||||||
if(lastLeaf->id != p->id) {
|
if(lastLeaf->id != p->id) {
|
||||||
|
assert(s->lastLeaf != tree.page);
|
||||||
unlock(lastLeaf->rwlatch);
|
unlock(lastLeaf->rwlatch);
|
||||||
releasePage(lastLeaf);
|
releasePage(lastLeaf);
|
||||||
}
|
}
|
||||||
|
@ -638,6 +641,37 @@ pageid_t TlsmFindPage(int xid, recordid tree, const byte *key) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pageid_t TlsmLastPage(int xid, recordid tree) {
|
||||||
|
Page * root = loadPage(xid, tree.page);
|
||||||
|
readlock(root->rwlatch,0);
|
||||||
|
lsmTreeState *state = root->impl;
|
||||||
|
int keySize = getKeySize(xid,root);
|
||||||
|
if(state->lastLeaf == -1) {
|
||||||
|
const lsmTreeNodeRecord *nr = readNodeRecord(xid,root,DEPTH,
|
||||||
|
keySize);
|
||||||
|
int depth = nr->ptr;
|
||||||
|
state->lastLeaf = findLastLeaf(xid,root,depth);
|
||||||
|
}
|
||||||
|
pageid_t ret = state->lastLeaf;
|
||||||
|
unlock(root->rwlatch);
|
||||||
|
|
||||||
|
// ret points to the last internal node at this point.
|
||||||
|
releasePage(root);
|
||||||
|
|
||||||
|
Page * p = loadPage(xid, ret);
|
||||||
|
readlock(p->rwlatch,0);
|
||||||
|
if(*recordcount_ptr(p) == 2) {
|
||||||
|
ret = -1;
|
||||||
|
} else {
|
||||||
|
const lsmTreeNodeRecord *nr = readNodeRecord(xid,p,(*recordcount_ptr(p))-1,keySize);
|
||||||
|
ret = nr->ptr;
|
||||||
|
}
|
||||||
|
unlock(p->rwlatch);
|
||||||
|
releasePage(p);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
The buffer manager calls this when the lsmTree's root page is
|
The buffer manager calls this when the lsmTree's root page is
|
||||||
loaded. This function allocates some storage for cached values
|
loaded. This function allocates some storage for cached values
|
||||||
|
@ -683,6 +717,9 @@ lladdIterator_t *lsmTreeIterator_open(int xid, recordid root) {
|
||||||
releasePage(p);
|
releasePage(p);
|
||||||
p = loadPage(xid,leafid);
|
p = loadPage(xid,leafid);
|
||||||
readlock(p->rwlatch,0);
|
readlock(p->rwlatch,0);
|
||||||
|
assert(depth != 0);
|
||||||
|
} else {
|
||||||
|
assert(depth == 0);
|
||||||
}
|
}
|
||||||
lsmIteratorImpl *impl = malloc(sizeof(lsmIteratorImpl));
|
lsmIteratorImpl *impl = malloc(sizeof(lsmIteratorImpl));
|
||||||
impl->p = p;
|
impl->p = p;
|
||||||
|
@ -708,6 +745,24 @@ lladdIterator_t *lsmTreeIterator_open(int xid, recordid root) {
|
||||||
} */
|
} */
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
lladdIterator_t *lsmTreeIterator_copy(int xid, lladdIterator_t* i) {
|
||||||
|
lsmIteratorImpl *it = i->impl;
|
||||||
|
lsmIteratorImpl *mine = malloc(sizeof(lsmIteratorImpl));
|
||||||
|
|
||||||
|
if(it->p) {
|
||||||
|
mine->p = loadPage(xid, it->p->id);
|
||||||
|
readlock(mine->p->rwlatch,0);
|
||||||
|
} else {
|
||||||
|
mine->p = 0;
|
||||||
|
}
|
||||||
|
memcpy(&mine->current, &it->current,sizeof(recordid));
|
||||||
|
mine->t = it->t;
|
||||||
|
mine->justOnePage = it->justOnePage;
|
||||||
|
lladdIterator_t * ret = malloc(sizeof(lladdIterator_t));
|
||||||
|
ret->type = -1; // XXX LSM_TREE_ITERATOR
|
||||||
|
ret->impl = mine;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
void lsmTreeIterator_close(int xid, lladdIterator_t *it) {
|
void lsmTreeIterator_close(int xid, lladdIterator_t *it) {
|
||||||
lsmIteratorImpl *impl = it->impl;
|
lsmIteratorImpl *impl = it->impl;
|
||||||
if(impl->p) {
|
if(impl->p) {
|
||||||
|
@ -738,7 +793,7 @@ int lsmTreeIterator_next(int xid, lladdIterator_t *it) {
|
||||||
impl->current.size = keySize;
|
impl->current.size = keySize;
|
||||||
} else {
|
} else {
|
||||||
impl->p = 0;
|
impl->p = 0;
|
||||||
impl->current.size = -1;
|
impl->current.size = INVALID_SLOT;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(impl->current.size == keySize + sizeof(lsmTreeNodeRecord));
|
assert(impl->current.size == keySize + sizeof(lsmTreeNodeRecord));
|
||||||
|
|
|
@ -82,10 +82,10 @@ terms specified in this license.
|
||||||
|
|
||||||
#ifndef MAX_BUFFER_SIZE
|
#ifndef MAX_BUFFER_SIZE
|
||||||
//#define MAX_BUFFER_SIZE 100003
|
//#define MAX_BUFFER_SIZE 100003
|
||||||
/*#define MAX_BUFFER_SIZE 20029 */
|
#define MAX_BUFFER_SIZE 20029
|
||||||
//#define MAX_BUFFER_SIZE 10007
|
//#define MAX_BUFFER_SIZE 10007
|
||||||
//#define MAX_BUFFER_SIZE 5003
|
//#define MAX_BUFFER_SIZE 5003
|
||||||
#define MAX_BUFFER_SIZE 2003
|
//#define MAX_BUFFER_SIZE 2003
|
||||||
//#define MAX_BUFFER_SIZE 4006
|
//#define MAX_BUFFER_SIZE 4006
|
||||||
/* #define MAX_BUFFER_SIZE 71 */
|
/* #define MAX_BUFFER_SIZE 71 */
|
||||||
/*#define MAX_BUFFER_SIZE 7 */
|
/*#define MAX_BUFFER_SIZE 7 */
|
||||||
|
|
|
@ -63,7 +63,10 @@ void TlsmSetPageAllocator(pageid_t (*allocer)(int xid, void * ignored),
|
||||||
*/
|
*/
|
||||||
pageid_t TlsmFindPage(int xid, recordid tree,
|
pageid_t TlsmFindPage(int xid, recordid tree,
|
||||||
const byte *key);
|
const byte *key);
|
||||||
|
/**
|
||||||
|
@todo TlsmFirstPage for symmetry?
|
||||||
|
*/
|
||||||
|
pageid_t TlsmLastPage(int xid, recordid tree);
|
||||||
/// --------------- Iterator implementation
|
/// --------------- Iterator implementation
|
||||||
|
|
||||||
typedef struct lsmTreeNodeRecord {
|
typedef struct lsmTreeNodeRecord {
|
||||||
|
@ -93,7 +96,7 @@ lladdIterator_t * lsmTreeIterator_open(int xid, recordid tree);
|
||||||
*/
|
*/
|
||||||
void lsmTreeIterator_close(int xid, lladdIterator_t * it);
|
void lsmTreeIterator_close(int xid, lladdIterator_t * it);
|
||||||
int lsmTreeIterator_next (int xid, lladdIterator_t * it);
|
int lsmTreeIterator_next (int xid, lladdIterator_t * it);
|
||||||
|
lladdIterator_t *lsmTreeIterator_copy(int xid, lladdIterator_t* i);
|
||||||
static inline int lsmTreeIterator_key (int xid, lladdIterator_t *it,
|
static inline int lsmTreeIterator_key (int xid, lladdIterator_t *it,
|
||||||
byte **key) {
|
byte **key) {
|
||||||
lsmIteratorImpl * impl = (lsmIteratorImpl*)it->impl;
|
lsmIteratorImpl * impl = (lsmIteratorImpl*)it->impl;
|
||||||
|
|
Loading…
Reference in a new issue