merge in changes from svn[r1572..r1601]

------------------------------------------------------------------------
r1601 | sears.russell@gmail.com | 2012-03-20 18:43:00 -0400 (Tue, 20
Mar 2012) | 1 line

commit bLSM bloom filter to stasis/util, which is where it really
belongs
------------------------------------------------------------------------
r1600 | sears.russell@gmail.com | 2012-03-04 01:58:38 -0500 (Sun, 04
Mar 2012) | 1 line

fix memory leak in skiplist unit test (now it is valgrind clean)
------------------------------------------------------------------------
r1599 | sears.russell@gmail.com | 2012-03-04 01:58:05 -0500 (Sun, 04
Mar 2012) | 1 line

fix typo in finalize type
------------------------------------------------------------------------
r1598 | sears.russell@gmail.com | 2012-03-04 00:59:59 -0500 (Sun, 04
Mar 2012) | 1 line

add comparator and finalizer parameters to skiplist constructor
------------------------------------------------------------------------
r1597 | sears.russell@gmail.com | 2012-03-03 18:23:16 -0500 (Sat, 03
Mar 2012) | 1 line

bugfixes for skiplist
------------------------------------------------------------------------
r1596 | sears.russell@gmail.com | 2012-03-02 15:05:07 -0500 (Fri, 02
Mar 2012) | 1 line

updated concurrentSkipList.  Seeing strange crashes
------------------------------------------------------------------------
r1595 | sears.russell@gmail.com | 2012-03-01 16:51:59 -0500 (Thu, 01
Mar 2012) | 1 line

add progress reports
------------------------------------------------------------------------
r1594 | sears.russell@gmail.com | 2012-02-28 13:17:05 -0500 (Tue, 28
Feb 2012) | 1 line

experimental support for automatic logfile preallocation
------------------------------------------------------------------------
r1593 | sears.russell@gmail.com | 2012-02-28 12:10:01 -0500 (Tue, 28
Feb 2012) | 1 line

add histogram reporting to rawIOPS benchmark
------------------------------------------------------------------------
r1592 | sears.russell@gmail.com | 2012-02-24 16:31:36 -0500 (Fri, 24
Feb 2012) | 1 line

userspace raid 0 implementation
------------------------------------------------------------------------
r1591 | sears.russell@gmail.com | 2012-02-12 01:47:25 -0500 (Sun, 12
Feb 2012) | 1 line

add skiplist unit test, fix compile warnings
------------------------------------------------------------------------
r1590 | sears.russell@gmail.com | 2012-02-12 00:52:52 -0500 (Sun, 12
Feb 2012) | 1 line

fix compile error
------------------------------------------------------------------------
r1589 | sears.russell@gmail.com | 2012-02-12 00:50:21 -0500 (Sun, 12
Feb 2012) | 1 line

fix some bugs in hazard.h surrounding thread list management and
overruns of R under high contention
------------------------------------------------------------------------
r1588 | sears.russell@gmail.com | 2012-02-11 14:23:10 -0500 (Sat, 11
Feb 2012) | 1 line

add hazard pointer for get_lock.  It was implicitly blowing away the
hazard pointer protecting y in the caller
------------------------------------------------------------------------
r1587 | sears.russell@gmail.com | 2012-02-10 18:51:25 -0500 (Fri, 10
Feb 2012) | 1 line

fix null pointer bug
------------------------------------------------------------------------
r1586 | sears.russell@gmail.com | 2012-02-10 18:03:39 -0500 (Fri, 10
Feb 2012) | 1 line

add simple refcounting scheme to concurrentSkipList.  This solves the
problem where a deleted node points to another deleted node, and we
only have a hazard pointer for the first node.
------------------------------------------------------------------------
r1585 | sears.russell@gmail.com | 2012-02-10 14:19:14 -0500 (Fri, 10
Feb 2012) | 1 line

add hazard pointers for update using the smallest free slot first.  The
old method left a race condition, since hazard_scan stops at the first
null pointer.
------------------------------------------------------------------------
r1584 | sears.russell@gmail.com | 2012-02-10 02:45:30 -0500 (Fri, 10
Feb 2012) | 1 line

add hazard pointers for update array
------------------------------------------------------------------------
r1583 | sears.russell@gmail.com | 2012-02-10 00:04:50 -0500 (Fri, 10
Feb 2012) | 1 line

skiplist update: concurrent, but broken
------------------------------------------------------------------------
r1582 | sears.russell@gmail.com | 2012-02-09 17:44:27 -0500 (Thu, 09
Feb 2012) | 1 line

skip list implementation.  Not concurrent yet.
------------------------------------------------------------------------
r1581 | sears.russell@gmail.com | 2012-02-08 13:33:29 -0500 (Wed, 08
Feb 2012) | 1 line

Commit of a bunch of new, unused code: KISS random number generator,
Hazard Pointers, SUX latches (untested) and bit twiddling for
concurrent b-tree
------------------------------------------------------------------------
r1580 | sears.russell@gmail.com | 2012-01-17 19:17:37 -0500 (Tue, 17
Jan 2012) | 1 line

fix typo
------------------------------------------------------------------------
r1579 | sears.russell@gmail.com | 2012-01-11 18:33:31 -0500 (Wed, 11
Jan 2012) | 1 line

static build fixes for linux.  hopefully these do not break macos...
------------------------------------------------------------------------
r1578 | sears.russell@gmail.com | 2012-01-09 19:13:34 -0500 (Mon, 09
Jan 2012) | 1 line

fix cmake under linux
------------------------------------------------------------------------
r1577 | sears.russell@gmail.com | 2012-01-09 18:37:15 -0500 (Mon, 09
Jan 2012) | 1 line

fix linux static binary compilation bugs
------------------------------------------------------------------------
r1576 | sears.russell | 2012-01-09 18:00:08 -0500 (Mon, 09 Jan 2012) |
1 line

port to macos x
------------------------------------------------------------------------
r1575 | sears.russell | 2012-01-09 17:39:43 -0500 (Mon, 09 Jan 2012) |
1 line

add missing _ from sync call name
------------------------------------------------------------------------
r1574 | sears.russell@gmail.com | 2012-01-09 14:26:31 -0500 (Mon, 09
Jan 2012) | 1 line

add -rt flag to static builds
------------------------------------------------------------------------
r1573 | sears.russell@gmail.com | 2011-12-20 23:38:29 -0500 (Tue, 20
Dec 2011) | 1 line

Simple makefile geared toward building libstasis.so and libstasis.a
(and nothing else)
------------------------------------------------------------------------
r1572 | sears.russell@gmail.com | 2011-12-20 22:37:54 -0500 (Tue, 20
Dec 2011) | 1 line

add some missing #include<config.h> lines
This commit is contained in:
Gregory Burd 2012-04-21 12:52:31 -04:00
parent 01f57a1be3
commit 3a559a3b3c
105 changed files with 2820 additions and 139 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
.svn

View file

@ -65,6 +65,8 @@ INCLUDE(CheckFunctionExists)
INCLUDE(CheckCSourceCompiles)
CHECK_FUNCTION_EXISTS(sync_file_range HAVE_SYNC_FILE_RANGE)
CHECK_FUNCTION_EXISTS(posix_fallocate HAVE_POSIX_FALLOCATE)
CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
CHECK_FUNCTION_EXISTS(posix_fadvise HAVE_POSIX_FADVISE)
CHECK_FUNCTION_EXISTS(fdatasync HAVE_FDATASYNC)
CHECK_FUNCTION_EXISTS(tdestroy HAVE_TDESTROY)
@ -83,15 +85,33 @@ endif(NOT DBUG_TEST)
SET(CMAKE_REQUIRED_FLAGS "-lm")
CHECK_FUNCTION_EXISTS(powl HAVE_POWL)
MACRO(CHECK_CONSTANT_EXISTS FLAG)
CHECK_C_SOURCE_COMPILES("#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <limits.h>
int main(int argc, char * argv[]) {
argc = O_DIRECT;
argc = ${FLAG};
}
" HAVE_O_DIRECT)
" HAVE_${FLAG})
ENDMACRO(CHECK_CONSTANT_EXISTS)
CHECK_CONSTANT_EXISTS(O_DIRECT)
CHECK_CONSTANT_EXISTS(O_DSYNC)
CHECK_CONSTANT_EXISTS(O_NOATIME)
CHECK_CONSTANT_EXISTS(PTHREAD_STACK_MIN)
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(ON_LINUX "LINUX")
ENDIF(CMAKE_SYSTEM_NAME)
IF(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
SET(ON_DARWIN "DARWIN")
ENDIF(CMAKE_SYSTEM_NAME)
#other options are "Windows" and "Solaris"
CHECK_C_SOURCE_COMPILES("#include <alloca.h>
int main(int argc, char * argv[]) { alloca(1); }" HAVE_ALLOCA_H)
@ -111,16 +131,6 @@ int main(int argc, char* argv[]) {
}
" HAVE_GCC_ATOMICS)
CHECK_C_SOURCE_COMPILES("
#include <pthread.h>
#include <limits.h>
int main(int argc, char* argv[]) {
pthread_attr_t a;
pthread_attr_setstacksize(&a, PTHREAD_STACK_MIN);
return 0;
}
" HAVE_PTHREAD_STACK_MIN)
MACRO(CREATE_CHECK NAME)
ADD_EXECUTABLE(${NAME} ${NAME}.c)
TARGET_LINK_LIBRARIES(${NAME} ${COMMON_LIBRARIES})

5
Makefile Normal file
View file

@ -0,0 +1,5 @@
LIBSRC = $(wildcard src/stasis/*.c) $(wildcard src/stasis/*/*.c) $(wildcard src/stasis/*/*/*.c) \
$(wildcard src/stasis/*.cpp) $(wildcard src/stasis/*/*.cpp) $(wildcard src/stasis/*/*/*.cpp)
LIBNAME = stasis
include config/Makefile.stasis

23
README
View file

@ -1,9 +1,5 @@
Stasis is a transactional storage library.
An increasing range of applications requires robust support for atomic, durable and concurrent transactions. Databases provide the default solution, but force applications to interact via SQL and to forfeit control over data layout and access mechanisms. In principle, a specialized database stack could be built for each application, but such approaches have proven to be impractical. We argue there is a gap between DBMSs and file systems that limits designers of data-oriented applications.
Stasis is a storage framework that incorporates ideas from traditional write-ahead logging algorithms and file systems. It provides applications with flexible control over data structures, data layout, robustness and performance. Stasis enables the development of unforeseen variants on transactional storage by generalizing write-ahead logging algorithms. Instead of implementing support for each new storage system from scratch, I have extended Stasis to provide specialized storage mechanisms to a wide variety of applications. It now provides cleaner semantics than similar application-specific approaches would, with significantly less source code than would be required by multiple separate storage implementations. In addition to the conventional write-ahead logging algorithms that Stasis was designed for, it now provides support for large objects, and for log-structured indexes. A number of other extensions, such as distributed recovery algorithms and snapshot-based recovery are under development.
Please see the COPYING file for licensing information, and INSTALL for
compilation instructions.
@ -11,22 +7,3 @@ More information, including papers, a tutorial and API documentation
are available at:
http://www.cs.berkeley.edu/~sears/stasis/
http://www.eecs.berkeley.edu/Pubs/TechRpts/2010/EECS-2010-2.html
http://www.eecs.berkeley.edu/Pubs/TechRpts/2010/EECS-2010-2.pdf
@phdthesis{Sears:EECS-2010-2,
Author = {Sears, Russell C},
Title = {Stasis: Flexible Transactional Storage},
School = {EECS Department, University of California, Berkeley},
Year = {2010},
Month = {Jan},
URL = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2010/EECS-2010-2.html},
Number = {UCB/EECS-2010-2},
Abstract = {An increasing range of applications requires robust support for atomic, durable and concurrent transactions. Databases provide the default solution, but force applications to interact via SQL and to forfeit control over data layout and access mechanisms. In principle, a specialized database stack could be built for each application, but such approaches have proven to be impractical. We argue there is a gap between DBMSs and file systems that limits designers of data-oriented applications.
Stasis is a storage framework that incorporates ideas from traditional write-ahead logging algorithms and file systems. It provides applications with flexible control over data structures, data layout, robustness and performance. Stasis enables the development of unforeseen variants on transactional storage by generalizing write-ahead logging algorithms. Instead of implementing support for each new storage system from scratch, I have extended Stasis to provide specialized storage mechanisms to a wide variety of applications. It now provides cleaner semantics than similar application-specific approaches would, with significantly less source code than would be required by multiple separate storage implementations. In addition to the conventional write-ahead logging algorithms that Stasis was designed for, it now provides support for large objects, and for log-structured indexes. A number of other extensions, such as distributed recovery algorithms and snapshot-based recovery are under development.
This dissertation describes the range of data models and program architectures that have been commonly used in the past, and argues that Stasis is sufficiently general to support most storage applications. It then turns to a description of Stasis' high-level application interfaces and APIs that are designed to allow applications to add their own transactional data structures to Stasis. The performance of a number of such extensions is evaluated, showing that Stasis performs favorably relative to existing systems.
The dissertation then turns to a careful definition of Stasis' recovery algorithms, and provides a novel generalization of ARIES, the de facto standard approach to transactional storage. The generalization is particularly promising in the context of distributed systems. Finally, it presents Stasis' lower-level interfaces, providing systems developers and application designers with the ability to tailor high-level transactional primitives to new types of storage hardware and operating system primitives. To the greatest extent possible, the ideas presented within are composable, allowing Stasis' simple implementation to support an unusually wide range of storage architectures.}
}

View file

@ -153,6 +153,8 @@ int main(int argc, char * argv[]) {
target_ops = atoi(argv[i]);
} else if(!strcmp(argv[i], "--raid1")) {
stasis_handle_factory = stasis_handle_raid1_factory;
} else if(!strcmp(argv[i], "--raid0")) {
stasis_handle_factory = stasis_handle_raid0_factory;
} else {
fprintf(stderr, "unknown argument: %s\n", argv[i]);
abort();

View file

@ -20,11 +20,19 @@ int main(int argc, char * argv[]) {
int sector_size = atoi(argv[3]);
int fd = -1;
#ifdef HAVE_O_DSYNC
fd = open(filename, O_WRONLY|O_DSYNC); //|O_DIRECT);
#else
fd = open(filename, O_WRONLY|O_SYNC);
#endif
struct timeval start, stop;
void * buf;
#ifdef HAVE_POSIX_MEMALIGN
posix_memalign(&buf, sector_size, sector_size);
#else
buf = malloc(2 * sector_size);
((intptr_t)buf) &= ~(sector_size-1);
#endif
memset(buf, 0, sector_size);

View file

@ -8,12 +8,15 @@
#include <stasis/common.h>
#include <stasis/util/random.h>
#include <stasis/util/time.h>
#include <stasis/util/histogram.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
DECLARE_HISTOGRAM_64(iop_hist)
static const long MB = (1024 * 1024);
typedef struct {
@ -25,19 +28,43 @@ typedef struct {
double elapsed;
} thread_arg;
uint64_t completed_ops;
uint64_t op_count;
void * status_worker(void * ignored) {
uint64_t last_ops = 0;
int iter = 0;
while(1) {
struct timespec ts = stasis_double_to_timespec(1.0);
nanosleep(&ts,0);
printf("current ops/sec %lld\n", completed_ops - last_ops);
last_ops = completed_ops;
iter ++;
if((! (iter % 10)) && (op_count == 0)) {
stasis_histograms_auto_dump();
stasis_histogram_64_clear(&iop_hist);
}
}
}
void * worker(void * argp) {
thread_arg * arg = argp;
void * buf = 0;
int err = posix_memalign(&buf, 512, arg->page_size);
int err;
#ifdef HAVE_POSIX_MEMALIGN
err = posix_memalign(&buf, 512, arg->page_size);
if(err) {
printf("Couldn't allocate memory with posix_memalign: %s\n", strerror(err));
fflush(stdout);
abort();
}
#else
buf = malloc(arg->page_size * 2);
buf = (void*)((intptr_t)buf & ~(arg->page_size-1));
#endif
struct timeval start, stop;
for(uint64_t i = 0; i < arg->opcount; i++) {
for(uint64_t i = 0; (!arg->opcount) || i < arg->opcount; i++) {
gettimeofday(&start, 0);
uint64_t offset
= arg->start_off + stasis_util_random64(arg->end_off
@ -45,14 +72,19 @@ void * worker(void * argp) {
offset &= ~(arg->page_size-1);
DEBUG("pread(%d %x %d %lld)\n", arg->fd,
(unsigned int)buf, (int)arg->page_size, (long long)offset);
stasis_histogram_tick(&iop_hist);
err = pread(arg->fd, buf, arg->page_size, offset);
stasis_histogram_tock(&iop_hist);
__sync_fetch_and_add(&completed_ops, 1);
if(err == -1) {
perror("Could not read from file"); fflush(stderr); fflush(stdout); abort();
}
gettimeofday(&stop, 0);
arg->elapsed += stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
}
#ifdef HAVE_POSIX_MEMALIGN
free(buf);
#endif
return 0;
}
@ -66,22 +98,29 @@ int main(int argc, char * argv[]) {
char * filename = argv[1];
int page_size = atoi(argv[2]);
int num_threads = atoi(argv[3]);
uint64_t op_count = atoll(argv[4]);
op_count = atoll(argv[4]);
uint64_t start_off = atoll(argv[5]);
uint64_t end_off = atoll(argv[6]) * MB;
int fd = open(filename, O_RDONLY|O_DIRECT);
completed_ops = 0;
#ifdef HAVE_O_DIRECT
int fd = open(filename, O_RDONLY|O_DIRECT);
#else
printf("Warning: not using O_DIRECT; file system cache will be used.\n");
int fd = open(filename, O_RDONLY);
#endif
if(fd == -1) {
perror("Couldn't open file");
abort();
}
struct timeval start, stop;
pthread_t status;
pthread_t * threads = malloc(sizeof(threads[0]) * num_threads);
thread_arg * arg = malloc(sizeof(arg[0]) * num_threads);
gettimeofday(&start,0);
pthread_create(&status, 0, status_worker, 0);
for(int i = 0; i < num_threads; i++) {
arg[i].fd = fd;
arg[i].page_size = page_size;
@ -109,6 +148,6 @@ int main(int argc, char * argv[]) {
printf("%d threads %lld mb %lld ops / %f seconds = %f IOPS.\n", num_threads, (long long)(end_off / MB), (long long)op_count, wallclock_elapsed, ((double)op_count) / wallclock_elapsed);
close(fd);
stasis_histograms_auto_dump();
return 0;
}

View file

@ -39,8 +39,12 @@ int main(int argc, char * argv[]) {
printf("Usage: %s filename steps iterations start_off length random_mode\n", argv[0]);
abort();
}
#ifdef HAVE_POSIX_MEMALIGN
posix_memalign(&buf, 512, 512);
#else
buf = malloc(2 * 512);
buf = (void*)(((intptr_t)buf) & ~(512-1));
#endif
const char * filename = argv[1];
int fd = open(filename, O_RDONLY);//|O_DIRECT);
if(fd == -1) {

View file

@ -60,7 +60,12 @@ int main(int argc, char ** argv) {
for(int i = 1; i < argc; i++) {
if(!strcmp(argv[i], "--direct")) {
direct = 1;
#ifdef HAVE_O_DIRECT
stasis_buffer_manager_io_handle_flags |= O_DIRECT;
#else
printf("O_DIRECT not supported by this build.");
return -1;
#endif
} else if(!strcmp(argv[i], "--log_safe_writes")) {
stasis_log_type = LOG_TO_FILE;
log_mode = 1;

View file

@ -20,11 +20,19 @@ int main(int argc, char * argv[]) {
int write_size = atoi(argv[4]);
uint64_t stride = atoll(argv[5]);
int fd = -1;
#ifdef HAVE_O_DSYNC
fd = open(argv[1], O_WRONLY|O_DSYNC); //|O_DIRECT);
#else
fd = open(argv[1], O_WRONLY|O_SYNC); //|O_DIRECT);
#endif
struct timeval start, stop;
void * buf;
#ifdef HAVE_POSIX_MEMALIGN
posix_memalign(&buf, 512, write_size);
#else
buf = malloc(2 * write_size);
buf = (void*)(((intptr_t)buf) & ~(write_size-1));
#endif
memset(buf, 0, write_size);
gettimeofday(&start, 0);

View file

@ -31,13 +31,22 @@ typedef struct worker {
void * func(void * argp) {
worker * arg = argp;
void * buf;
#ifdef HAVE_POSIX_MEMALIGN
posix_memalign(&buf, 512, arg->write_size);
#else
buf = malloc(2*arg->write_size);
buf = (void*)(((intptr_t)buf) & ~((intptr_t)arg->write_size-1));
#endif
memset(buf, 0, arg->write_size);
// void * buf = calloc(arg->write_size, 1);
pthread_mutex_lock(&arg->mutex);
uint64_t offset = 0;
if(many_handles) {
arg->fd = open(arg->filename, O_WRONLY|O_DSYNC); //|O_DIRECT);
#ifdef HAVE_O_DSYNC
arg->fd = open(arg->filename, O_WRONLY|O_DSYNC);
#else
arg->fd = open(arg->filename, O_WRONLY|O_SYNC);
#endif
if(arg->fd == -1) {
perror("Couldn't open file");
abort();
@ -81,7 +90,11 @@ int main(int argc, char * argv[]) {
uint64_t stride = atoll(argv[5]);
int fd = -1;
if(!many_handles) {
#ifdef HAVE_O_DSYNC
fd = open(argv[1], O_WRONLY|O_DSYNC); //|O_DIRECT);
#else
fd = open(argv[1], O_WRONLY|O_SYNC); //|O_DIRECT);
#endif
}
for(int i = 0; i < NUM_WORKERS; i++) {

View file

@ -1,15 +1,18 @@
#define __USE_GNU
#define _GNU_SOURCE
#cmakedefine HAVE_POSIX_FALLOCATE
#cmakedefine HAVE_POSIX_MEMALIGN
#cmakedefine HAVE_POSIX_FADVISE
#cmakedefine HAVE_FDATASYNC
#cmakedefine HAVE_SYNC_FILE_RANGE
#cmakedefine HAVE_O_NOATIME
#cmakedefine HAVE_O_DIRECT
#cmakedefine HAVE_O_DSYNC
#cmakedefine HAVE_GCC_ATOMICS
#cmakedefine HAVE_PTHREAD_STACK_MIN
#cmakedefine HAVE_ALLOCA_H
#cmakedefine HAVE_TDESTROY
#cmakedefine HAVE_POWL
#cmakedefine DBUG
//#ifndef HAVE_PTHREAD_STACK_MIN
//#define PTHREAD_STACK_MIN 32768 // wild guess.
//#endif
#cmakedefine ON_LINUX
#cmakedefine ON_MACOS

78
config/Makefile.stasis Normal file
View file

@ -0,0 +1,78 @@
ifeq ($(shell uname), Linux)
INC += -I . -I config/linux
LIB += -lrt
else
INC += -I . -I config/macos
CC = gcc-4.2
CXX = g++-4.2
endif
#LIB += -lm -lpthread
CPPFLAGS += $(INC)
CXXFLAGS += -O3 -g -Wall -pedantic -Wno-long-long -Wno-variadic-macros -fPIC $(CPPFLAGS)
CFLAGS += -std=gnu99 $(CXXFLAGS) $(CPPFLAGS)
ifeq ($(shell uname), Linux)
SO=bin/lib$(LIBNAME).so
endif
all: ${SO} \
bin/lib$(LIBNAME).a \
$(addprefix bin/, $(addsuffix .static, $(basename $(MAINSRC)))) \
print_cxx_params print_cc_params
bin/%.o : %.c
@echo CC $<
@mkdir -p $(dir $@)
@ $(CC) -c $(CFLAGS) -o $@ $<
print_cc_params:
@echo CC: $(CC) -c $(CFLAGS) -o %.o %.c
bin/%.o : %.cpp
@echo C++ $<
@mkdir -p $(dir $@)
@ $(CXX) -c $(CXXFLAGS) -o $@ $<
print_cxx_params:
@echo C++: $(CXX) -c $(CXXFLAGS) -o %.o %.cpp
bin/%.d: %.c
@mkdir -p $(dir $@)
@$(CC) -M $(CFLAGS) $< | perl -ne 's~^.+:~$@ $(basename $@).o :~;print;' > $@
bin/%.d: %.cpp
@mkdir -p $(dir $@)
@$(CC) -M $(CXXFLAGS) $< | perl -ne 's~^.+:~$@ $(basename $@).o :~;print;' > $@
bin/%.d: %.cc
@mkdir -p $(dir $@)
@$(CC) -M $(CXXFLAGS) $< | perl -ne 's~^.+:~$@ $(basename $@).o :~;print;' > $@
bin/lib$(LIBNAME).so: $(patsubst %.cpp,bin/%.o,$(patsubst %.c,bin/%.o,$(LIBSRC)))
@echo LINK $@
@g++ -shared -Wl,-soname,$@ $(LIB) $(CXXFLAGS) -o $@ $^
bin/lib$(LIBNAME).dylib: $(patsubst %.cpp,bin/%.o,$(patsubst %.c,bin/%.o,$(LIBSRC)))
@echo LINK $@
@g++ -dynamiclib $(LIB) $(CXXFLAGS) -o $@ $^
bin/lib$(LIBNAME).a: $(patsubst %.cpp,bin/%.o,$(patsubst %.c,bin/%.o,$(LIBSRC)))
@echo AR $@
@ar rcs $@ $^
bin/%.static: %.cpp bin/lib$(LIBNAME).a
@echo STATIC_EXE $@
@mkdir -p $(dir $@)
@g++ $(LIB) $(CXXFLAGS) -o $@ $^ -static -pthread $(STATIC_LIBS)
bin/%.static: %.cc bin/lib$(LIBNAME).a
@echo STATIC_EXE $@
@mkdir -p $(dir $@)
@g++ $(LIB) $(CXXFLAGS) -o $@ $^ -static -pthread $(STATIC_LIBS)
.PHONY: all clean print_cc_params print_cxx_params
clean:
rm -rf bin/
-include $(patsubst %.cc,bin/%.d,$(patsubst %.cpp,bin/%.d,$(patsubst %.c,bin/%.d,$(LIBSRC))))

19
config/linux/config.h Normal file
View file

@ -0,0 +1,19 @@
#define __USE_GNU
#define _GNU_SOURCE
#define ON_LINUX
#define HAVE_POSIX_FALLOCATE
#define HAVE_POSIX_MEMALIGN
#define HAVE_POSIX_FADVISE
#define HAVE_FDATASYNC
#define HAVE_SYNC_FILE_RANGE
#define HAVE_O_NOATIME
#define HAVE_O_DIRECT
#define HAVE_O_DSYNC
#define HAVE_GCC_ATOMICS
#define HAVE_PTHREAD_STACK_MIN
#define HAVE_ALLOCA_H
#define HAVE_TDESTROY
#define HAVE_POWL
#define PBL_COMPAT 1
#define STLSEARCH 1
//#define DBUG

17
config/macos/config.h Normal file
View file

@ -0,0 +1,17 @@
#define __USE_GNU
#define _GNU_SOURCE
#define ON_MACOS
//#define HAVE_POSIX_FALLOCATE
//#define HAVE_FDATASYNC
//#define HAVE_SYNC_FILE_RANGE
//#define HAVE_O_NOATIME
//#define HAVE_POSIX_FADVISE
#define HAVE_O_DIRECT
#define HAVE_GCC_ATOMICS
#define HAVE_PTHREAD_STACK_MIN
#define HAVE_ALLOCA_H
#define HAVE_TDESTROY
#define HAVE_POWL
#define PBL_COMPAT 1
#define STLSEARCH 1
//#define DBUG

View file

@ -104,7 +104,7 @@
</div>
<div class="version">
<hr size="0">
<small><i>$Id$</i></small>
<small><i>$Id: index.html 96 2004-10-27 03:49:02Z sears $</i></small>
</div>
</body>

View file

@ -5,7 +5,7 @@
% \usepackage{usenix-2e}
% and put {\rm ....} around the author names.
%
% $Id$
% $Id: usenix.sty 83 2004-10-23 02:19:01Z sears $
%
% The following definitions are modifications of standard article.sty
% definitions, arranged to do a better job of matching the USENIX

View file

@ -1,7 +1,7 @@
% usenix.sty - to be used with latex2e for USENIX.
% To use this style file, look at the template usenix_template.tex
%
% $Id$
% $Id: usenix.sty 679 2006-09-04 22:51:40Z sears $
%
% The following definitions are modifications of standard article.sty
% definitions, arranged to do a better job of matching the USENIX

View file

@ -1,3 +1,4 @@
IF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
IF(JNI_FOUND)
ADD_CUSTOM_COMMAND(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/org/stasis/Stasis.class
@ -15,3 +16,4 @@ IF(JNI_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${JAVA_INCLUDE_PATH} ${INCLUDE_DIRECTORIES})
ADD_LIBRARY(stasisjni ${CMAKE_CURRENT_SOURCE_DIR}/org_stasis_Stasis.c)
ENDIF(JNI_FOUND)
ENDIF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")

View file

@ -1,4 +1,5 @@
ADD_LIBRARY(stasis util/crc32.c
util/bloomFilter.c
util/redblack.c
util/lhtable.c
util/concurrentHash.c
@ -53,8 +54,15 @@ ADD_LIBRARY(stasis util/crc32.c
operations/regions.c
operations/bTree.c
operations/blobs.c
io/rangeTracker.c io/memory.c io/file.c io/pfile.c io/raid1.c
io/non_blocking.c io/debug.c io/handle.c
io/rangeTracker.c
io/memory.c
io/file.c
io/pfile.c
io/raid1.c
io/raid0.c
io/non_blocking.c
io/debug.c
io/handle.c
bufferManager/pageArray.c
bufferManager/bufferHash.c
replacementPolicy/lru.c
@ -63,6 +71,7 @@ ADD_LIBRARY(stasis util/crc32.c
replacementPolicy/concurrentWrapper.c
replacementPolicy/clock.c
)
IF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
ADD_LIBRARY(stasis_experimental
experimental/consumer.c
experimental/fifo.c
@ -75,5 +84,6 @@ ADD_LIBRARY(stasis_experimental
experimental/group.c
experimental/lsmTree.c
)
ENDIF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
INSTALL(TARGETS stasis LIBRARY DESTINATION lib)

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*******************************
* $Id$
* $Id: bufferManager.c 1543 2011-08-25 21:29:51Z sears.russell@gmail.com $
*
* implementation of the page buffer
* *************************************************/

View file

@ -45,7 +45,7 @@ terms specified in this license.
*
* Implementation of in memory buffer pool
*
* $Id$
* $Id: bufferPool.c 1542 2011-08-23 18:25:26Z sears.russell@gmail.com $
*
*/
#include <stasis/common.h>

View file

@ -1,5 +1,4 @@
#define _GNU_SOURCE
#include <config.h>
#include <stasis/common.h>
#include <stasis/util/latches.h>

View file

@ -1,3 +1,4 @@
#include <config.h>
#include <pbl/pbl.h>
#include <stasis/lockManager.h>
#include <stasis/util/latches.h>

View file

@ -1,3 +1,4 @@
#include <config.h>
#include <stasis/experimental/multiplexer.h>
#include <stasis/util/crc32.h>
#include <stasis/operations/linearHashNTA.h>

View file

@ -30,7 +30,11 @@ int stasis_buffer_manager_io_handle_flags =
#ifdef STASIS_BUFFER_MANAGER_IO_HANDLE_FLAGS
STASIS_BUFFER_MANAGER_IO_HANDLE_FLAGS;
#else
#ifdef HAVE_O_NOATIME
O_NOATIME;
#else
0;
#endif
#endif
int stasis_buffer_manager_preallocate_mode =
@ -94,18 +98,27 @@ stasis_handle_t* (*stasis_handle_factory)() =
#endif
stasis_handle_t* (*stasis_handle_file_factory)(const char* filename, int open_mode, int creat_perms) =
#ifdef STASIS_FILE_HANDLE_FACTORY
STASIS_FILE_HANDLE_FACTORY
STASIS_FILE_HANDLE_FACTORY;
#else
stasis_handle_open_pfile;
#endif
stasis_handle_t* (*stasis_non_blocking_handle_file_factory)(const char* filename, int open_mode, int creat_perms) =
#ifdef STASIS_NON_BLOCKING_HANDLE_FILE_FACTORY
STASIS_NON_BLOCKING_HANDLE_FILE_FACTORY
STASIS_NON_BLOCKING_HANDLE_FILE_FACTORY;
#else
stasis_handle_open_pfile;
#endif
uint32_t stasis_handle_raid0_stripe_size =
#ifdef STASIS_HANDLE_RAID0_STRIPE_SIZE
STASIS_HANDLE_RAID0_STRIPE_SIZE;
#else
(256 * 1024);
#endif
char ** stasis_handle_raid0_filenames = 0;
#ifdef STASIS_BUFFER_MANAGER_HINT_WRITES_ARE_SEQUENTIAL
int stasis_buffer_manager_hint_writes_are_sequential = STASIS_BUFFER_MANAGER_HINT_WRITES_ARE_SEQUENTIAL;
#else

View file

@ -86,8 +86,10 @@ static stasis_handle_t * pfile_dup(stasis_handle_t *h) {
static void pfile_enable_sequential_optimizations(stasis_handle_t *h) {
pfile_impl *impl = h->impl;
impl->sequential = 1;
#ifdef HAVE_POSIX_FADVISE
int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL to kernel failed");
#endif
}
static lsn_t pfile_end_position(stasis_handle_t *h) {
pfile_impl *impl = (pfile_impl*)h->impl;
@ -300,8 +302,10 @@ static int pfile_force(stasis_handle_t *h) {
DEBUG("File was opened with O_SYNC. pfile_force() is a no-op\n");
}
if(impl->sequential) {
#ifdef HAVE_POSIX_FADVISE
int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_DONTNEED);
if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL to kernel failed");
#endif
}
TOCK(force_hist);
return 0;
@ -334,10 +338,12 @@ static int pfile_async_force(stasis_handle_t *h) {
#endif
int ret = 0;
#endif
#ifdef HAVE_POSIX_FADVISE
if(impl->sequential) {
int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_DONTNEED);
if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL (for a range of a file) to kernel failed");
}
#endif
TOCK(force_range_hist);
return ret;
}
@ -370,10 +376,12 @@ static int pfile_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) {
#endif
int ret = 0;
#endif
#ifdef HAVE_POSIX_FADVISE
if(impl->sequential) {
int err = posix_fadvise(impl->fd, start, stop-start, POSIX_FADV_DONTNEED);
if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL (for a range of a file) to kernel failed");
}
#endif
TOCK(force_range_hist);
return ret;
}

222
src/stasis/io/raid0.c Normal file
View file

@ -0,0 +1,222 @@
/*
* raid0.c
*
* Created on: Feb 24, 2012
* Author: sears
*/
#include <config.h>
#include <stasis/flags.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stasis/io/handle.h>
#include <stasis/util/histogram.h>
#include <assert.h>
#ifdef RAID0_LATENCY_PROF
DECLARE_HISTOGRAM(read_hist)
DECLARE_HISTOGRAM(write_hist)
DECLARE_HISTOGRAM(force_hist)
DECLARE_HISTOGRAM(force_range_hist)
#define TICK(hist) stasis_histogram_tick(&hist);
#define TOCK(hist) stasis_histogram_tock(&hist);
#else
#define TICK(hist)
#define TOCK(hist)
#endif
typedef struct raid0_impl {
stasis_handle_t ** h;
int handle_count;
int stripe_size;
} raid0_impl;
static int raid0_num_copies(stasis_handle_t *h) {
raid0_impl * i = h->impl;
return i->h[0]->num_copies(i->h[0]);
}
static int raid0_num_copies_buffer(stasis_handle_t *h) {
raid0_impl * i = h->impl;
return i->h[0]->num_copies_buffer(i->h[0]);
}
static int raid0_close(stasis_handle_t *h) {
raid0_impl * r = h->impl;
int ret = 0;
for(int i = 0; i < r->handle_count; i++) {
int this_ret = r->h[i]->close(r->h[i]);
if(this_ret && !ret) ret = this_ret;
}
free(r->h);
free(r);
free(h);
return ret;
}
static stasis_handle_t* raid0_dup(stasis_handle_t *h) {
raid0_impl * r = h->impl;
stasis_handle_t ** h_dup = malloc(sizeof(h_dup[0]) * r->handle_count);
for(int i = 0; i < r->handle_count; i++) {
h_dup[i] = r->h[i]->dup(r->h[i]);
}
stasis_handle_t * ret = stasis_handle_open_raid0(r->handle_count, h_dup, r->stripe_size);
free(h_dup);
return ret;
}
static void raid0_enable_sequential_optimizations(stasis_handle_t *h) {
raid0_impl * r = h->impl;
for(int i = 0; i < r->handle_count; i++) {
r->h[i]->enable_sequential_optimizations(r->h[i]);
}
}
static lsn_t raid0_end_position(stasis_handle_t *h) {
raid0_impl *r = h->impl;
lsn_t max_end = 0;
for(int i = 0; i < r->handle_count; i++) {
lsn_t this_end = r->h[i]->end_position(r->h[i]) + (i * r->stripe_size);
if(this_end > max_end) max_end = this_end;
}
return max_end;
}
/**
* Figure out which stripe this operation belongs on. We don't support
* inter-stripe operations, so returning a single stripe suffices.
*
* @param off The first byte to be accessed.
* @param len If the access will span stripes, this method will call abort().
*/
static int raid0_calc_stripe(raid0_impl * r, lsn_t off, lsn_t len) {
assert(len < r->stripe_size);
int start_stripe = (off % (r->handle_count * r->stripe_size)) / r->stripe_size;
int end_stripe = ((off+len-1) % (r->handle_count * r->stripe_size)) / r->stripe_size;
assert(start_stripe == end_stripe);
return start_stripe;
}
static lsn_t raid0_calc_block(raid0_impl * r, lsn_t off, lsn_t len) {
return off / (r->stripe_size * r->handle_count);
}
static lsn_t raid0_calc_off(raid0_impl * r, lsn_t off, lsn_t len) {
lsn_t block = raid0_calc_block(r, off, len);
return block * r->stripe_size + (off % r->stripe_size);
}
static int raid0_read(stasis_handle_t *h, lsn_t off, byte *buf, lsn_t len) {
raid0_impl *r = h->impl;
int stripe = raid0_calc_stripe(r, off, len);
lsn_t stripe_off = raid0_calc_off(r, off, len);
return r->h[stripe]->read(r->h[stripe], stripe_off, buf, len);
}
static int raid0_write(stasis_handle_t *h, lsn_t off, const byte *dat, lsn_t len) {
raid0_impl *r = h->impl;
int stripe = raid0_calc_stripe(r, off, len);
lsn_t stripe_off = raid0_calc_off(r, off, len);
return r->h[stripe]->write(r->h[stripe], stripe_off, dat, len);
}
static stasis_write_buffer_t * raid0_write_buffer(stasis_handle_t *h, lsn_t off, lsn_t len) {
raid0_impl *r = h->impl;
int stripe = raid0_calc_stripe(r, off, len);
lsn_t stripe_off = raid0_calc_off(r, off, len);
return r->h[stripe]->write_buffer(r->h[stripe], stripe_off, len);
}
static int raid0_release_write_buffer(stasis_write_buffer_t *w) {
return w->h->release_write_buffer(w);
}
static stasis_read_buffer_t *raid0_read_buffer(stasis_handle_t *h,
lsn_t off, lsn_t len) {
raid0_impl *r = h->impl;
int stripe = raid0_calc_stripe(r, off, len);
lsn_t stripe_off = raid0_calc_off(r, off, len);
return r->h[stripe]->read_buffer(r->h[stripe], stripe_off, len);
}
static int raid0_release_read_buffer(stasis_read_buffer_t *r) {
return r->h->release_read_buffer(r);
}
static int raid0_force(stasis_handle_t *h) {
raid0_impl * r = h->impl;
int ret = 0;
for(int i = 0; i < r->handle_count; i++) {
int this_ret = r->h[i]->force(r->h[i]);
if(this_ret && !ret) ret = this_ret;
}
return ret;
}
/**
* TODO Implement raid0_force_range properly instead of forcing whole file.
*/
static int raid0_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) {
return raid0_force(h);
}
static int raid0_async_force(stasis_handle_t *h) {
raid0_impl * r = h->impl;
int ret = 0;
for(int i = 0; i < r->handle_count; i++) {
int this_ret = r->h[i]->async_force(r->h[i]);
if(this_ret && !ret) ret = this_ret;
}
return ret;
}
static int raid0_fallocate(stasis_handle_t *h, lsn_t off, lsn_t len) {
raid0_impl * r = h->impl;
int ret = 0;
lsn_t start_block = raid0_calc_block(r, off, 0);
lsn_t start_off = (start_block) * r->stripe_size;
lsn_t end_block = raid0_calc_block(r, off+len-1, 0);
lsn_t end_off = (end_block+1) * r->stripe_size;
for(int i = 0; i < r->handle_count; i++) {
int this_ret = r->h[i]->fallocate(r->h[i], start_off, end_off-start_off);
if(this_ret && !ret) ret = this_ret;
}
return ret;
}
struct stasis_handle_t raid0_func = {
.num_copies = raid0_num_copies,
.num_copies_buffer = raid0_num_copies_buffer,
.close = raid0_close,
.dup = raid0_dup,
.enable_sequential_optimizations = raid0_enable_sequential_optimizations,
.end_position = raid0_end_position,
.write = raid0_write,
.write_buffer = raid0_write_buffer,
.release_write_buffer = raid0_release_write_buffer,
.read = raid0_read,
.read_buffer = raid0_read_buffer,
.release_read_buffer = raid0_release_read_buffer,
.force = raid0_force,
.async_force = raid0_async_force,
.force_range = raid0_force_range,
.fallocate = raid0_fallocate,
.error = 0
};
stasis_handle_t * stasis_handle_open_raid0(int handle_count, stasis_handle_t** h, uint32_t stripe_size) {
stasis_handle_t * ret = malloc(sizeof(*ret));
*ret = raid0_func;
raid0_impl * r = malloc(sizeof(*r));
r->stripe_size = stripe_size;
r->handle_count = handle_count;
r->h = malloc(sizeof(r->h[0]) * handle_count);
for(int i = 0; i < handle_count; i++) {
r->h[i] = h[i];
}
ret->impl = r;
return ret;
}
stasis_handle_t * stasis_handle_raid0_factory() {
if(stasis_handle_raid0_filenames == NULL) {
stasis_handle_t * h[2];
h[0] = stasis_handle_file_factory(stasis_store_file_1_name, O_CREAT | O_RDWR | stasis_buffer_manager_io_handle_flags, FILE_PERM);
h[1] = stasis_handle_file_factory(stasis_store_file_2_name, O_CREAT | O_RDWR | stasis_buffer_manager_io_handle_flags, FILE_PERM);
return stasis_handle_open_raid0(2, h, stasis_handle_raid0_stripe_size);
} else {
int count = 0;
while(stasis_handle_raid0_filenames[count]) count++;
stasis_handle_t * h[count];
for(int i = 0; i < count; i++) {
h[i] = stasis_handle_file_factory(stasis_handle_raid0_filenames[i], O_CREAT | O_RDWR | stasis_buffer_manager_io_handle_flags, FILE_PERM);
}
return stasis_handle_open_raid0(count, h, stasis_handle_raid0_stripe_size);
}
}

View file

@ -11,7 +11,7 @@
#include <assert.h>
#ifdef RAID_LATENCY_PROF
#ifdef RAID1_LATENCY_PROF
DECLARE_HISTOGRAM(read_hist)
DECLARE_HISTOGRAM(write_hist)
DECLARE_HISTOGRAM(force_hist)
@ -21,6 +21,7 @@ DECLARE_HISTOGRAM(force_range_hist)
#else
#define TICK(hist)
#define TOCK(hist)
#endif
typedef struct raid1_impl {
stasis_handle_t * a;
@ -109,12 +110,24 @@ static int raid1_force(stasis_handle_t *h) {
int retB = i->b->force(i->b);
return retA ? retA : retB;
}
static int raid1_async_force(stasis_handle_t *h) {
raid1_impl *i = h->impl;
int retA = i->a->async_force(i->a);
int retB = i->b->async_force(i->b);
return retA ? retA : retB;
}
static int raid1_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) {
raid1_impl *i = h->impl;
int retA = i->a->force_range(i->a, start, stop);
int retB = i->b->force_range(i->b, start, stop);
return retA ? retA : retB;
}
static int raid1_fallocate(stasis_handle_t *h, lsn_t off, lsn_t len) {
raid1_impl *i = h->impl;
int retA = i->a->fallocate(i->a, off, len);
int retB = i->b->fallocate(i->b, off, len);
return retA ? retA : retB;
}
struct stasis_handle_t raid1_func = {
.num_copies = raid1_num_copies,
.num_copies_buffer = raid1_num_copies_buffer,
@ -129,7 +142,9 @@ struct stasis_handle_t raid1_func = {
.read_buffer = raid1_read_buffer,
.release_read_buffer = raid1_release_read_buffer,
.force = raid1_force,
.async_force = raid1_async_force,
.force_range = raid1_force_range,
.fallocate = raid1_fallocate,
.error = 0
};
@ -147,4 +162,3 @@ stasis_handle_t * stasis_handle_raid1_factory() {
stasis_handle_t * b = stasis_handle_file_factory(stasis_store_file_2_name, O_CREAT | O_RDWR | stasis_buffer_manager_io_handle_flags, FILE_PERM);
return stasis_handle_open_raid1(a, b);
}
#endif

View file

@ -1,6 +1,7 @@
#include <config.h>
#include <stasis/common.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@ -14,7 +15,6 @@
#include <stdio.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
@ -60,6 +60,7 @@ typedef struct {
pthread_t write_thread;
pthread_t write_thread2;
pthread_t prealloc_thread;
stasis_ringbuffer_t * ring;
/** Need this because the min aggregate in the ringbuffer doesn't
* want to malloc keys, but needs to maintain some sort of state
@ -68,6 +69,19 @@ typedef struct {
pthread_key_t handle_key;
pthread_mutex_t mut;
/**
* If there are fewer than this many dead files in the pool, a background
* thread allocates some more.
*/
int dead_threshold;
/**
* Signal this mutex to wake up the prealloc thread.
*/
pthread_cond_t prealloc_log_cond;
int shutdown;
} stasis_log_file_pool_state;
enum file_type {
@ -111,7 +125,15 @@ enum file_type stasis_log_file_pool_file_type(const struct dirent* file, lsn_t *
/**
* No latching required. Does not touch shared state.
*/
#ifdef ON_LINUX
int stasis_log_file_pool_file_filter(const struct dirent* file) {
#else
#ifdef ON_MACOS
int stasis_log_file_pool_file_filter(struct dirent* file) {
#else
#error Not on linux or macos?
#endif
#endif
lsn_t junk;
if(UNKNOWN != stasis_log_file_pool_file_type(file, &junk)) {
return 1;
@ -184,17 +206,20 @@ char * build_path(const char * dir, const char * file) {
* not concurrently change.
*/
void stasis_log_file_pool_chunk_open(stasis_log_file_pool_state * fp, int chunk) {
char* full_name = malloc(strlen(fp->dirname) + 1 + strlen(fp->live_filenames[chunk]) + 1);
full_name[0] = 0;
strcat(full_name, fp->dirname);
strcat(full_name, "/");
strcat(full_name, fp->live_filenames[chunk]);
char* full_name = build_path(fp->dirname, fp->live_filenames[chunk]);
// char* full_name = malloc(strlen(fp->dirname) + 1 + strlen(fp->live_filenames[chunk]) + 1);
// full_name[0] = 0;
// strcat(full_name, fp->dirname);
// strcat(full_name, "/");
// strcat(full_name, fp->live_filenames[chunk]);
fp->ro_fd[chunk] = open(full_name, fp->filemode, fp->fileperm);
free(full_name);
}
/**
* Does no latching. Relies on stability of fp->live_offsets and fp->live_count.
*
* @return chunk id or -1 if the offset is past the end of the live chunks.
*/
static int get_chunk_from_offset(stasis_log_t * log, lsn_t lsn) {
stasis_log_file_pool_state * fp = log->impl;
@ -207,6 +232,82 @@ static int get_chunk_from_offset(stasis_log_t * log, lsn_t lsn) {
}
return chunk;
}
static void stasis_log_file_pool_prealloc_file(stasis_log_file_pool_state * fp) {
if(fp->dead_count < fp->dead_threshold) {
char * tmpfile = "preallocating~";
char * tmpfilepath = build_path(fp->dirname, tmpfile);
size_t bufsz = PAGE_SIZE;
pthread_mutex_unlock(&fp->mut);
#ifdef HAVE_O_DSYNC
int sync = O_DSYNC; // XXX cut and pasted from above...
#else
int sync = O_SYNC;
#endif
int fd = open(tmpfilepath, fp->filemode & (~sync), fp->fileperm);
#ifdef HAVE_POSIX_FALLOCATE
printf("posix_fallocate()'ing empty log file...\n");
posix_fallocate(fd, 0, fp->target_chunk_size + bufsz);
#endif
printf("Writing zeros to empty log file...\n");
byte * buffer = calloc(bufsz, sizeof(byte));
for(off_t i = 0; i <= fp->target_chunk_size; i += bufsz) {
int ret = pwrite(fd, buffer, bufsz, i);
if(ret != bufsz) {
perror("Couldn't write to empty log");
abort();
}
}
free(buffer);
fsync(fd);
close(fd);
printf("Log preallocation done.\n");
pthread_mutex_lock(&fp->mut);
char * filenametmp = stasis_log_file_pool_build_filename(fp, fp->dead_count);
char * filename = malloc(strlen(filenametmp) + 2);
strcpy(filename, filenametmp);
strcat(filename, "~");
char * newfilepath = build_path(fp->dirname, filename);
fp->dead_filenames = realloc(fp->dead_filenames, sizeof(char**) * fp->dead_count + 1);
fp->dead_filenames[fp->dead_count] = filename;
fp->dead_count ++;
int err = rename(tmpfilepath, newfilepath);
printf("Created new chunk: %s.", newfilepath);
// TODO Another rename holding mutex sneaks in...
if(err) {
perror("could not rename file");
assert(err == -1);
abort();
}
free(tmpfilepath);
free(filenametmp);
// don't free filename; we installed it into the dead_filenames array!
free(newfilepath);
}
}
static void * stasis_log_file_pool_prealloc_worker(void * fpp) {
stasis_log_file_pool_state * fp = fpp;
pthread_mutex_lock(&fp->mut);
while(1) {
while(fp->dead_count >= fp->dead_threshold) {
// printf("Dead count = %d, threshold = %d, log preallocation sleeps.\n", fp->dead_count, fp->dead_threshold);
if(fp->shutdown) { break; }
pthread_cond_wait(&fp->prealloc_log_cond, &fp->mut);
}
if(fp->shutdown) { break; }
printf("Available log chunk count = %d (want %d), preallocating new chunk.\n", fp->dead_count, fp->dead_threshold);
stasis_log_file_pool_prealloc_file(fp);
}
pthread_mutex_unlock(&fp->mut);
return 0;
}
/**
* Does no latching. Modifies all mutable fields of fp.
*/
@ -223,6 +324,11 @@ int stasis_log_file_pool_append_chunk(stasis_log_t * log, off_t new_offset) {
if(fp->dead_count) {
old_file = fp->dead_filenames[fp->dead_count-1];
fp->dead_count--;
if(fp->dead_count < fp->dead_threshold) {
pthread_cond_signal(&fp->prealloc_log_cond);
}
char * old_path = build_path(fp->dirname, old_file);
int err = rename(old_path, new_path);
if(err) {
@ -464,8 +570,8 @@ int stasis_log_file_pool_truncate(struct stasis_log_t* log, lsn_t lsn) {
// Rename should be fast, but we're placing a lot of faith in the filesystem.
int err = rename(old, new);
if(err) {
assert(err == -1);
perror("could not rename file");
assert(err == -1);
abort();
}
close(fp->ro_fd[i]);
@ -513,11 +619,14 @@ int stasis_log_file_pool_close(stasis_log_t * log) {
stasis_log_file_pool_state * fp = log->impl;
log->force_tail(log, 0); /// xxx use real constant for wal mode..
stasis_ringbuffer_shutdown(fp->ring);
fp->shutdown = 1;
pthread_cond_signal(&fp->prealloc_log_cond);
pthread_join(fp->write_thread, 0);
// pthread_join(fp->write_thread2, 0);
pthread_join(fp->prealloc_thread, 0);
stasis_ringbuffer_free(fp->ring);
@ -600,7 +709,18 @@ void key_destr(void * key) { free(key); }
/**
* Does no latching. No shared state.
*/
#ifdef ON_LINUX
int filesort(const struct dirent ** a, const struct dirent ** b) {
#else
#ifdef ON_MACOS
int filesort(const void * ap, const void * bp) {
const struct dirent *const *const a = ap;
const struct dirent *const *const b = bp;
#else
#error Not on linux or macos?
#endif
#endif
int ret = strcmp((*a)->d_name, (*b)->d_name);
DEBUG("%d = %s <=> %s\n", ret, (*a)->d_name, (*b)->d_name);
return ret;
@ -676,11 +796,16 @@ stasis_log_t* stasis_log_file_pool_open(const char* dirname, int filemode, int f
fp->live_count = 0;
fp->dead_count = 0;
fp->target_chunk_size = 512 * 1024 * 1024;
fp->target_chunk_size = 128* 1024 * 1024;
fp->filemode = filemode | O_DSYNC; /// XXX should not hard-code O_SYNC.
#ifdef HAVE_O_DSYNC
int SYNC = O_DSYNC;
#else
int SYNC = O_SYNC;
#endif
fp->filemode = filemode | SYNC; /// XXX should not hard-code O_SYNC.
fp->fileperm = fileperm;
fp->softcommit = !(filemode & O_DSYNC);
fp->softcommit = !(filemode & SYNC);
off_t current_target = 0;
for(int i = 0; i < n; i++) {
@ -751,6 +876,10 @@ stasis_log_t* stasis_log_file_pool_open(const char* dirname, int filemode, int f
fp->ring = stasis_ringbuffer_init(26, next_lsn); // 64mb buffer
pthread_key_create(&fp->handle_key, key_destr);
fp->dead_threshold = 1;
pthread_cond_init(&fp->prealloc_log_cond, 0);
fp->shutdown = 0;
pthread_create(&fp->prealloc_thread, 0, stasis_log_file_pool_prealloc_worker, fp);
pthread_create(&fp->write_thread, 0, stasis_log_file_pool_writeback_worker, ret);
// pthread_create(&fp->write_thread2, 0, stasis_log_file_pool_writeback_worker, ret);

View file

@ -69,7 +69,7 @@
@ingroup OPERATIONS
$Id$
$Id: alloc.c 1569 2011-11-07 22:53:55Z sears.russell@gmail.com $
*/
//}end

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/**********************************************
* $Id$
* $Id: decrement.c 1310 2009-12-31 20:20:47Z sears.russell $
*
* Decrements the given reference by one
*********************************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/**********************************************
* $Id$
* $Id: increment.c 1310 2009-12-31 20:20:47Z sears.russell $
*
* Increments the given reference by one
**********************************************/

View file

@ -1,3 +1,4 @@
#include <config.h>
#include <stasis/bufferManager.h>
#include <stasis/operations.h>
#include <stasis/util/hash.h>

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/**********************************************
* $Id$
* $Id: noop.c 1210 2009-07-16 03:05:32Z sears.russell $
*
* sets the given reference to dat
**********************************************/

View file

@ -41,7 +41,7 @@ terms specified in this license.
---*/
/**********************************************
* $Id$
* $Id: prepare.c 1215 2009-07-26 18:51:45Z sears.russell $
*
* sets the given reference to dat
**********************************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/**********************************************
* $Id$
* $Id: set.c 1526 2011-06-13 11:26:25Z sears.russell@gmail.com $
*
* sets the given reference to dat
**********************************************/

View file

@ -0,0 +1,159 @@
/*
* bloomFilter.c
*
* Copyright 2010-2012 Yahoo! Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Author: sears
*/
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stasis/util/bloomFilter.h>
/**
Variable names:
m: number of bloom filter bits
n: number of bloom filter entries
k: number of hash functions = ln(2) * (m/n)
c: m/n
f: false positive rate = (1/2)^k ~= 0.6185)^(m/n) ;
taking log_0.6185 of both sides: k log_0.6185(1/2) = m/n ;
applying change of base: k log(1/2) / log(6.128) = m / n
(but that's not useful; this is:)
f ~= 0.6185 ^ (m/n)
log_0.6185(f) = m/n
log(f) / log(0.6185) = m / n
m = n log f / log 0.6185
p: probability a given bit is 1 ~= e^(-kn/m)
*/
static uint64_t stasis_bloom_filter_calc_num_buckets(uint64_t num_expected_items,
double false_positive_rate) {
// m = n log f / log 0.6185
return ((uint64_t) ceil(((double)num_expected_items) *
log(false_positive_rate) / log(0.6185)));
// m = - n ln f / ln 2 ^ 2 = - n ln f / 0.4804 = - n log f / (0.4343 * 0.4804) = -n log f / 0.2086
}
static int stasis_bloom_filter_calc_num_functions(uint64_t num_expected_items,
uint64_t num_buckets) {
// k = ln(2) * (m/n)
int ret = floor((log(2) / log(exp(1.0)))
* ((double) num_buckets) / (double) num_expected_items);
if(ret == 0) {
return 1;
} else {
return ret;
}
}
static double stasis_bloom_filter_current_false_positive_rate(uint64_t actual_number_of_items,
uint64_t num_buckets) {
// 0.6185^(m/n)
return pow(0.6185, ((double)num_buckets)/(double)actual_number_of_items);
}
struct stasis_bloom_filter_t {
uint64_t (*func_a)(const char *, int);
uint64_t (*func_b)(const char *, int);
uint64_t num_expected_items;
double desired_false_positive_rate;
uint64_t num_buckets;
uint8_t * buckets;
uint64_t num_functions;
uint64_t*result_scratch_space;
uint64_t actual_number_of_items;
};
stasis_bloom_filter_t * stasis_bloom_filter_create(uint64_t(*func_a)(const char*,int),
uint64_t(*func_b)(const char*,int),
uint64_t num_expected_items,
double false_positive_rate) {
stasis_bloom_filter_t * ret = malloc(sizeof(*ret));
ret->func_a = func_a;
ret->func_b = func_b;
ret->num_expected_items = num_expected_items;
ret->desired_false_positive_rate = false_positive_rate;
ret->num_buckets = stasis_bloom_filter_calc_num_buckets(ret->num_expected_items, ret->desired_false_positive_rate);
ret->buckets = calloc((ret->num_buckets / 8) + ((ret->num_buckets % 8 == 0) ? 0 : 1), 1);
ret->num_functions = stasis_bloom_filter_calc_num_functions(ret->num_expected_items, ret->num_buckets);
ret->result_scratch_space = malloc(sizeof(*ret->result_scratch_space) * ret->num_functions);
ret->actual_number_of_items = 0;
return ret;
}
void stasis_bloom_filter_destroy(stasis_bloom_filter_t* bf) {
free(bf->buckets);
free(bf->result_scratch_space);
free(bf);
}
// TODO this uses %. It would be better if it used &, but that would potentially double the memory we use. #define a flag.
static void stasis_bloom_filter_calc_functions(stasis_bloom_filter_t * bf, uint64_t* results, const char * key, int keylen) {
uint64_t fa = bf->func_a(key, keylen);
uint64_t fb = bf->func_b(key, keylen);
results[0] = (fa + fb) % bf->num_buckets;
for(int i = 1; i < bf->num_functions; i++) {
results[i] = (results[i-1] + fb ) % bf->num_buckets;
}
}
static const uint8_t stasis_bloom_filter_bit_masks[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
static void stasis_bloom_filter_set_bit(stasis_bloom_filter_t *bf, uint64_t bit) {
uint64_t array_offset = bit >> 3;
uint8_t bit_number = bit & 7;
assert(bit < bf->num_buckets);
bf->buckets[array_offset] |= stasis_bloom_filter_bit_masks[bit_number];
}
/**
@return 0 if the bit is not set, true otherwise.
*/
static uint8_t stasis_bloom_filter_get_bit(stasis_bloom_filter_t *bf, uint64_t bit) {
uint64_t array_offset = bit >> 3;
uint8_t bit_number = bit & 7;
assert(bit < bf->num_buckets);
return bf->buckets[array_offset] & stasis_bloom_filter_bit_masks[bit_number];
}
void stasis_bloom_filter_insert(stasis_bloom_filter_t * bf, const char *key, int len) {
stasis_bloom_filter_calc_functions(bf, bf->result_scratch_space, key, len);
for(int i = 0; i < bf->num_functions; i++) {
stasis_bloom_filter_set_bit(bf, bf->result_scratch_space[i]);
}
bf->actual_number_of_items++;
}
int stasis_bloom_filter_lookup(stasis_bloom_filter_t * bf, const char * key, int len) {
int ret = 1;
uint64_t * scratch = malloc(sizeof(*scratch) * bf->num_functions);
stasis_bloom_filter_calc_functions(bf, scratch, key, len);
for(int i = 0; i < bf->num_functions; i++) {
ret = ret && stasis_bloom_filter_get_bit(bf, scratch[i]);
}
free(scratch);
return ret;
}
void stasis_bloom_filter_print_stats(stasis_bloom_filter_t * bf) {
printf("Design capacity %lld design false positive %f\n"
"Current item count %lld current false positive %f\n"
"Number of buckets %lld (%f MB), number of hash functions %lld\n",
(long long)bf->num_expected_items, bf->desired_false_positive_rate,
(long long)bf->actual_number_of_items,
stasis_bloom_filter_current_false_positive_rate(bf->actual_number_of_items,
bf->num_buckets),
(long long)bf->num_buckets,
((double)bf->num_buckets) / (8.0 * 1024.0 * 1024.0),
(long long)bf->num_functions);
}

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/******************************
* $Id$
* $Id: linkedlist.c 1525 2011-06-13 10:19:44Z sears.russell@gmail.com $
*
* simple linked list
*****************************/

View file

@ -1,4 +1,4 @@
static char rcsid[]="$Id$";
static char rcsid[]="$Id: redblack.c 1525 2011-06-13 10:19:44Z sears.russell@gmail.com $";
/*
Redblack balanced tree algorithm

View file

@ -294,7 +294,8 @@ stasis_ringbuffer_t * stasis_ringbuffer_init(intptr_t base, lsn_t initial_offset
if(err == -1) { perror("Couldn't ftruncate file"); }
ring->mem = mmap(0, size*2, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
// Note: MAP_ANON is deprecated, but MAP_ANONYMOUS is not supported on MacOS.
ring->mem = mmap(0, size*2, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
if(ring->mem == MAP_FAILED) { perror("Couldn't mmap anonymous region"); abort(); }

View file

@ -74,7 +74,7 @@ terms specified in this license.
error is passed up to the application.
@ingroup BUFFER_MANAGER
* $Id$
* $Id: bufferManager.h 1560 2011-10-08 22:01:04Z sears.russell@gmail.com $
*/
#ifndef __BUFFERMANAGER_H__
#define __BUFFERMANAGER_H__

View file

@ -47,7 +47,7 @@ terms specified in this license.
* @file
*
* Functions to manage the in-memory pool of free page buffers.
* $Id$
* $Id: bufferPool.h 1542 2011-08-23 18:25:26Z sears.russell@gmail.com $
*/
#include <stasis/common.h>

View file

@ -53,13 +53,16 @@ terms specified in this license.
* the right thing' and build, even if they do not \#include the
* config.h file that all of the Stasis stuff uses.
*
* $Id$
* $Id: common.h 1576 2012-01-09 23:00:08Z sears.russell $
*/
//#define NDEBUG 1
#ifndef __stasis_common_h
#define __stasis_common_h
#ifndef _DARWIN_C_SOURCE
#define _DARWIN_C_SOURCE // For dirent on mac os
#endif
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600
#endif
@ -67,9 +70,11 @@ terms specified in this license.
#define _BSD_SOURCE
#endif
#include <alloca.h>
#include <stdint.h> // uint32, et. al. (has to be before sys/types.h for mcpp atop some broken gcc headers)
#include <fcntl.h>
#include <sys/types.h> // for size_t
#include <dirent.h>
#include <fcntl.h>
#ifdef __cplusplus
# define BEGIN_C_DECLS extern "C" {

View file

@ -50,7 +50,7 @@ terms specified in this license.
*
* @ingroup LLADD_CORE
*
* $Id$
* $Id: constants.h 1543 2011-08-25 21:29:51Z sears.russell@gmail.com $
*/
#ifndef __CONSTANTS_H__

View file

@ -14,7 +14,7 @@
@todo Move this all to some reasonably named interface. :)
$Id$
$Id: logMemory.h 1523 2011-06-13 06:58:34Z sears.russell@gmail.com $
*/
lladdFifo_t * logMemoryFifo(size_t size, lsn_t initialOffset);

View file

@ -95,6 +95,13 @@ extern stasis_handle_t* (*stasis_handle_factory)();
Valid options: stasis_handle_open_file(), stasis_handle_open_pfile(), and stasis_handle_non_blocking_factory.
*/
extern stasis_handle_t* (*stasis_handle_file_factory)(const char* filename, int open_mode, int creat_perms);
/**
* The default stripe size for Stasis' user space raid0 implementation.
*
* This must be a multiple of PAGE_SIZE.
*/
extern uint32_t stasis_handle_raid0_stripe_size;
extern char ** stasis_handle_raid0_filenames;
/**
The factory that non_blocking handles will use for slow handles. (Only
used if stasis_buffer_manager_io_handle_default_factory is set to

View file

@ -2,6 +2,8 @@
#define IO_HANDLE_H
#include <stasis/common.h>
BEGIN_C_DECLS
/**
stasis_handle() is a macro that prepends a unique prefix to the its
argument's function name. It's used to cope with namespace
@ -333,11 +335,23 @@ stasis_handle_t * stasis_handle(open_verifying)(stasis_handle_t * h);
*/
stasis_handle_t * stasis_handle(open_debug)(stasis_handle_t * h);
stasis_handle_t * stasis_handle(open_raid1)(stasis_handle_t *a, stasis_handle_t *b);
/**
* Open a raid0 handle
*
* @param handle_count The number of underlying file handles.
* @param h An array of pointers to the handles. The caller manages the memory that backs the array.
* @param stripe_size The raid 0 stripe size. Must be a multiple of PAGE_SIZE.
*/
stasis_handle_t * stasis_handle(open_raid0)(int handle_count, stasis_handle_t **h, uint32_t stripe_size);
stasis_handle_t * stasis_handle_raid1_factory();
stasis_handle_t * stasis_handle_raid0_factory();
/**
* Open a Stasis file handle using default arguments.
*/
stasis_handle_t * stasis_handle_default_factory();
END_C_DECLS
#endif

View file

@ -52,7 +52,7 @@ terms specified in this license.
@ingroup LLADD_CORE
$Id$
$Id: logEntry.h 1332 2010-01-19 02:14:09Z sears.russell $
*/
#include <stasis/common.h>

View file

@ -47,7 +47,7 @@ terms specified in this license.
*
* @ingroup OPERATIONS
* @todo The functions in operations.h don't belong in the API, but it defines some constants and typedefs that should be there.
* $Id$
* $Id: operations.h 1526 2011-06-13 11:26:25Z sears.russell@gmail.com $
*/
/**
@defgroup COLLECTIONS Collections

View file

@ -5,7 +5,7 @@
@ingroup OPERATIONS
$Id$
$Id: alloc.h 1517 2011-06-12 08:13:34Z sears.russell@gmail.com $
*/

View file

@ -67,7 +67,7 @@ terms specified in this license.
@ingroup COLLECTIONS
$Id$
$Id: arrayList.h 1517 2011-06-12 08:13:34Z sears.russell@gmail.com $
*/

View file

@ -49,7 +49,7 @@ terms specified in this license.
*
* @see increment.h
*
* $Id$
* $Id: decrement.h 1240 2009-08-22 00:01:02Z sears.russell $
*/
#ifndef __DECREMENT_H__

View file

@ -54,7 +54,7 @@ terms specified in this license.
*
* @ingroup OPERATIONS
*
* $Id$
* $Id: increment.h 1240 2009-08-22 00:01:02Z sears.russell $
*/

View file

@ -4,7 +4,7 @@
* (actually based on jbhash.c, which was based on pblhash)
*
*
* $Id$
* $Id: lladdhash.h 147 2005-01-20 21:19:47Z sears $
*/
/**
@ -27,7 +27,7 @@
@ingroup OPERATIONS
$Id$
$Id: lladdhash.h 147 2005-01-20 21:19:47Z sears $
*/

View file

@ -7,7 +7,7 @@
@ingroup OPERATIONS
$Id$
$Id: naiveLinearHash.h 1090 2008-11-03 21:42:42Z sears.russell $
*/
#ifndef __NAIVE_LINEAR_HASH_H

View file

@ -47,7 +47,7 @@ terms specified in this license.
*
* @ingroup OPERATIONS
*
* $Id$
* $Id: noop.h 1157 2009-03-31 05:02:54Z sears.russell $
*
**********************************************/

View file

@ -53,7 +53,7 @@ terms specified in this license.
*
* @see page.h
*
* $Id$
* $Id: pageOperations.h 1517 2011-06-12 08:13:34Z sears.russell@gmail.com $
*/
#ifndef __PAGE_OPERATIONS_H__
#define __PAGE_OPERATIONS_H__

View file

@ -67,7 +67,7 @@ terms specified in this license.
*
* @ingroup OPERATIONS
*
* $Id$
* $Id: prepare.h 1157 2009-03-31 05:02:54Z sears.russell $
*
*
*/

View file

@ -47,7 +47,7 @@ terms specified in this license.
*
* @ingroup OPERATIONS
*
* $Id$
* $Id: set.h 1519 2011-06-12 09:20:58Z sears.russell@gmail.com $
*
**********************************************/

View file

@ -52,7 +52,7 @@ Slotted page layout:
START
$Id$
$Id: slotted.h 1571 2011-11-09 21:37:38Z sears.russell@gmail.com $
@todo slotted.c Should know that specific record types (like blobs) exist,
(but should not hardcode information about these types) This

View file

@ -566,7 +566,7 @@ terms specified in this license.
* @todo error handling
*
* @ingroup LLADD_CORE
* $Id$
* $Id: transactional.h 1517 2011-06-12 08:13:34Z sears.russell@gmail.com $
*/
#ifndef __TRANSACTIONAL_H__

View file

@ -49,7 +49,7 @@ terms specified in this license.
* (instead of by absolute logfile size)
* @todo avoid copying the non-truncated tail of the log each time truncation occurs.
*
* $Id$
* $Id: truncation.h 1263 2009-10-14 21:22:50Z sears.russell $
*
*/
#ifndef STASIS_TRUNCATION_H

50
stasis/util/bloomFilter.h Normal file
View file

@ -0,0 +1,50 @@
/*
* bloomFilter.h
*
* Copyright 2010-2012 Yahoo! Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Author: sears
*/
#ifndef STASIS_BLOOM_FILTER_H
#define STASIS_BLOOM_FILTER_H
#include <stasis/common.h>
BEGIN_C_DECLS
typedef struct stasis_bloom_filter_t stasis_bloom_filter_t;
/**
@return 0 if there is not enough memory, or some other error occurred; a
pointer to the new bloom filter otherwise.
*/
stasis_bloom_filter_t * stasis_bloom_filter_create(uint64_t(*hash_func_a)(const char*,int),
uint64_t(*hash_func_b)(const char*,int),
uint64_t num_expected_items,
double false_positive_rate);
void stasis_bloom_filter_destroy(stasis_bloom_filter_t*);
void stasis_bloom_filter_insert(stasis_bloom_filter_t * bf, const char* key, int len);
/**
@return 1 if the value might be in the bloom filter, 0 otherwise
*/
int stasis_bloom_filter_lookup(stasis_bloom_filter_t * bf, const char* key, int len);
void stasis_bloom_filter_print_stats(stasis_bloom_filter_t * bf);
END_C_DECLS
#endif

View file

@ -0,0 +1,60 @@
#include <stasis/common.h>
#define PAGE void*
typedef uint64_t metadata_t;
// TODO: These need to be packed!
typedef struct stasis_btree_index_page_header {
metadata_t metadata;
PAGE parent;
pthread_rwlock_t sx_latch;
} stasis_btree_index_page;
typedef struct stasis_btree_data_page_header {
metadata_t metadata;
PAGE parent;
PAGE rightSibling;
PAGE leftSibling;
pthread_rwlock_t latch;
};
static inline byte metadata_is_leaf(metadata_t m) {
return m & 0x1;
}
static inline metadata_t metadata_set_leaf(metadata_t m) {
return m | 0x1;
}
static inline metadata_t metadata_clear_leaf(metadata_t m) {
return m & ~0x1;
}
static inline byte metadata_is_balanced(metadata_t m) {
return m & 0x2;
}
static inline byte metadata_set_balanced(metadata_t m) {
return m | 0x2;
}
static inline byte metadata_clear_balanced(metadata_t m) {
return m & ~0x2;
}
typedef enum {
TEMP = 0,
RED = 1,
GREEN = 2,
BLUE = 3
} color_t;
static inline color_t leaf_metadata_get_color(metadata_t m) {
return (m & (0x4 | 0x8)) >> 2;
}
static inline metadata_t leaf_metadata_set_color(metadata_t m, color_t c) {
return (m & ~(0x4 | 0x8)) | (c << 2);
}
static inline int index_metadata_get_level(metadata_t m) {
return (m & (0x4 | 0x8 | 0x10)) >> 2;
}
static inline metadata_t index_metadata_set_level(metadata_t m, int c) {
return (m & ~(0x4 | 0x8 | 0x10)) | (c << 2);
}

View file

@ -0,0 +1,393 @@
/*
* concurrentSkipList.h
*
* Created on: Feb 8, 2012
* Author: sears
*/
#ifndef CONCURRENTSKIPLIST_H_
#define CONCURRENTSKIPLIST_H_
#include <stdio.h>
#include <stasis/common.h>
#include <stasis/util/random.h>
BEGIN_C_DECLS
//#define stasis_util_skiplist_assert(x) assert(x)
#define stasis_util_skiplist_assert(x)
#define STASIS_SKIPLIST_HP_COUNT 3
#include <stasis/util/hazard.h>
typedef struct stasis_skiplist_node_t {
hazard_ptr key;
pthread_mutex_t level_mut;
char level;
int16_t refcount;
} stasis_skiplist_node_t;
typedef struct {
hazard_ptr header;
int levelCap;
int levelHint;
pthread_mutex_t levelHint_mut;
pthread_key_t k;
hazard_t * h;
hazard_t * ret_hazard;
int (*cmp)(const void *a, const void *b);
int (*finalize)(void *node, void *nul);
} stasis_skiplist_t;
static inline stasis_skiplist_node_t** stasis_util_skiplist_get_forward_raw(
stasis_skiplist_node_t * x, int n) {
return (stasis_skiplist_node_t**)(((intptr_t)(x + 1))
+(n-1)*(sizeof(stasis_skiplist_node_t*)+sizeof(pthread_mutex_t)));
}
static inline hazard_ptr* stasis_util_skiplist_get_forward(
stasis_skiplist_node_t * x, int n){
return (hazard_ptr*)stasis_util_skiplist_get_forward_raw(x,n);
}
static inline pthread_mutex_t * stasis_util_skiplist_get_forward_mutex(
stasis_skiplist_node_t * x, int n) {
return (pthread_mutex_t*)(stasis_util_skiplist_get_forward(x,n)+1);
}
int stasis_util_skiplist_node_finalize(void * pp, void * conf) {
stasis_skiplist_node_t * p = pp;
stasis_skiplist_t * list = conf;
if(p->refcount == 0) {
void * oldKey = (void*)p->key; // do this early to find races.
for(int i = 1; i <= p->level; i++) {
stasis_skiplist_node_t * n = *stasis_util_skiplist_get_forward_raw(p, i);
int oldval = __sync_sub_and_fetch(&n->refcount, 1);
(void)oldval;
stasis_util_skiplist_assert(oldval >= 0);
}
hazard_free(list->ret_hazard, oldKey);
pthread_mutex_destroy(&p->level_mut);
stasis_util_skiplist_assert(oldKey == (void*)p->key);
stasis_util_skiplist_assert(p->refcount == 0);
free(p);
return 1;
} else {
return 0;
}
}
int stasis_util_skiplist_default_key_finalize(void * p, void * ignored) {
free(p);
return 1;
}
static inline int stasis_util_skiplist_random_level(pthread_key_t k) {
kiss_table_t * kiss = pthread_getspecific(k);
if(kiss == 0) {
kiss = malloc(sizeof(*kiss));
stasis_util_random_kiss_settable(kiss,
random(), random(), random(), random(), random(), random());
pthread_setspecific(k, kiss);
}
// MWC is weaker but faster than KISS. The main drawback is that it has a
// period of 2^60. I can't imagine that mattering for our purposes.
// __builtin_ctz counts trailing zeros, so, this function hardcodes p = 0.5.
// MWC returns a 32-bit int; above 2^32 elements we start to violate the
// O(log n) bounds.
return 1+__builtin_ctz(stasis_util_random_kiss_MWC(kiss));
}
static inline hazard_ptr stasis_util_skiplist_make_node(int level, void * key) {
stasis_skiplist_node_t * x
= malloc(sizeof(*x)
+ (level) * (sizeof(hazard_ptr) + sizeof(pthread_mutex_t)));
x->key = (hazard_ptr)key;
x->level = level;
x->refcount = 0;
pthread_mutex_init(&x->level_mut,0);
for(int i = 1; i <= level; i++) {
pthread_mutex_init(stasis_util_skiplist_get_forward_mutex(x, i), 0);
*stasis_util_skiplist_get_forward(x, i) = 0;
}
return (hazard_ptr)x;
}
static inline void stasis_util_skiplist_cleanup_tls(void * p) {
free(p);
}
static inline int stasis_util_skiplist_cmp_helper(
stasis_skiplist_t* list, stasis_skiplist_node_t *a, void * bkey) {
if(a == NULL) { return 1; }
void *akey = hazard_ref(list->ret_hazard, 1, &(a->key));
int ret;
if(akey == NULL) { ret = (bkey == NULL ? 0 : -1); }
else if(bkey == NULL) { ret = 1; }
else {ret = list->cmp(akey, bkey); }
hazard_release(list->ret_hazard, 1);
return ret;
}
static inline int stasis_util_skiplist_cmp_helper2(
stasis_skiplist_t* list, stasis_skiplist_node_t *a, stasis_skiplist_node_t * b) {
if(b == NULL) { return a == NULL ? 0 : -1; }
void *bkey = hazard_ref(list->ret_hazard, 2, &(b->key));
int ret = stasis_util_skiplist_cmp_helper(list, a, bkey);
hazard_release(list->ret_hazard, 2);
return ret;
}
static inline stasis_skiplist_t * stasis_util_skiplist_init(
int (*cmp)(const void*, const void*),
int (*finalize)(void *, void * nul)) {
stasis_skiplist_t * list = malloc(sizeof(*list));
list->levelCap = 32;
list->h = hazard_init(STASIS_SKIPLIST_HP_COUNT+list->levelCap,
STASIS_SKIPLIST_HP_COUNT, 250, stasis_util_skiplist_node_finalize, list);
list->finalize
= finalize ? finalize : stasis_util_skiplist_default_key_finalize;
list->ret_hazard = hazard_init(3, 3, 250, list->finalize, NULL);
list->levelHint = 1;
pthread_mutex_init(&list->levelHint_mut, 0);
list->header = stasis_util_skiplist_make_node(list->levelCap, NULL);
pthread_key_create(&(list->k), stasis_util_skiplist_cleanup_tls);
list->cmp = cmp;
return list;
}
static inline void stasis_util_skiplist_deinit(stasis_skiplist_t * list) {
hazard_deinit(list->h);
hazard_deinit(list->ret_hazard);
pthread_mutex_destroy(&list->levelHint_mut);
free((void*)list->header);
kiss_table_t * kiss = pthread_getspecific(list->k);
if(kiss) {
stasis_util_skiplist_cleanup_tls(kiss);
pthread_setspecific(list->k, 0);
}
pthread_key_delete(list->k);
free(list);
}
static inline void * stasis_util_skiplist_search(stasis_skiplist_t * list, void * searchKey) {
// the = 0 here are to silence GCC -O3 warnings.
stasis_skiplist_node_t *x, *y = 0;
int cmp = 0;
x = hazard_set(list->h,0,(void*)list->header);
for(int i = list->levelHint; i > 0; i--) {
y = hazard_ref(list->h,1,stasis_util_skiplist_get_forward(x, i));
while((cmp = stasis_util_skiplist_cmp_helper(list, y, searchKey)) < 0) {
x = hazard_set(list->h,0,(void*)y);
y = hazard_ref(list->h,1,stasis_util_skiplist_get_forward(x, i));
}
}
void * ret;
if(cmp == 0) {
ret = hazard_ref(list->ret_hazard, 0, &(y->key));
} else {
ret = 0;
hazard_release(list->ret_hazard, 0);
}
hazard_release(list->h,0);
hazard_release(list->h,1);
return ret;
}
static inline stasis_skiplist_node_t * stasis_util_skiplist_get_lock(
stasis_skiplist_t * list, stasis_skiplist_node_t * x, void * searchKey, int i) {
stasis_skiplist_node_t * z
= hazard_ref(list->h, 2, stasis_util_skiplist_get_forward(x, i));
while(stasis_util_skiplist_cmp_helper(list, z, searchKey) < 0) {
x = hazard_set(list->h, 0, (void*)z);
z = hazard_ref(list->h, 2, stasis_util_skiplist_get_forward(x, i));
}
pthread_mutex_lock(stasis_util_skiplist_get_forward_mutex(x, i));
z = hazard_ref(list->h, 2, stasis_util_skiplist_get_forward(x, i));
while(stasis_util_skiplist_cmp_helper(list, z, searchKey) < 0) {
// Should lock of z be here?
pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(x, i));
x = hazard_set(list->h, 0, (void*)z);
// Note: lock of z was here (and it was called x)
pthread_mutex_lock(stasis_util_skiplist_get_forward_mutex(x, i));
z = hazard_ref(list->h, 2, stasis_util_skiplist_get_forward(x, i));
}
stasis_util_skiplist_assert(stasis_util_skiplist_cmp_helper2(list, x, (stasis_skiplist_node_t*)*stasis_util_skiplist_get_forward(x, i)) < 0);
hazard_release(list->h, 2);
return x;
}
/**
* Insert a value into the skiplist. Any existing value will be replaced.
* @return the old value or null if there was no such value.
*/
static inline void * stasis_util_skiplist_insert(stasis_skiplist_t * list, void * searchKey) {
stasis_skiplist_node_t * update[list->levelCap+1];
stasis_skiplist_node_t *x, *y;
IN:
x = hazard_set(list->h, 0, (void*)list->header);
int L = list->levelHint;
// for i = L downto 1
int i;
for(i = L+1; i > 1;) {
i--;
y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, i));
while(stasis_util_skiplist_cmp_helper(list, y, searchKey) < 0) {
x = hazard_set(list->h, 0, (void*)y);
y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, i));
}
update[i] = hazard_set(list->h, STASIS_SKIPLIST_HP_COUNT+(L-i), x);
}
// update[L..1] is set.
// h [HP_COUNT+[0..L-1] is set.
// Note get_lock grabs the hazard pointer for x.
x = stasis_util_skiplist_get_lock(list, x, searchKey, 1);
y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, 1));
if(stasis_util_skiplist_cmp_helper(list, y, searchKey) == 0) {
pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(x, 1));
pthread_mutex_lock(&y->level_mut);
x = hazard_ref(list->h, 0, stasis_util_skiplist_get_forward(y, 1));
int isGarbage = stasis_util_skiplist_cmp_helper(list, x, searchKey) < 0;
if(!isGarbage) {
void * oldKey;
do {
oldKey = hazard_ref(list->ret_hazard, 0, &(y->key));
} while(!__sync_bool_compare_and_swap(&(y->key), oldKey, searchKey));
pthread_mutex_unlock(&y->level_mut);
hazard_release(list->h, 0);
hazard_release(list->h, 1);
for(int i = L; i > 0; i--) {
hazard_release(list->h, (i-1)+STASIS_SKIPLIST_HP_COUNT);
// h [HP_COUNT+[L-1..0] is cleared
}
hazard_free(list->ret_hazard, oldKey);
return oldKey;
} else {
pthread_mutex_unlock(&y->level_mut);
// printf("insert landed on garbage node. retrying.\n");
goto IN;
}
}
hazard_ptr newnode = stasis_util_skiplist_make_node(stasis_util_skiplist_random_level(list->k), searchKey);
y = hazard_set(list->h, 1, (void*)newnode);
pthread_mutex_lock(&y->level_mut);
for(int i = L+1; i <= y->level; i++) {
update[i] = (void*)list->header;
}
// update[L+1..y->level] is set
for(int i = 1; i <= y->level; i++) {
if(i != 1) {
x = stasis_util_skiplist_get_lock(list, update[i], searchKey, i);
}
*stasis_util_skiplist_get_forward(y, i) = *stasis_util_skiplist_get_forward(x, i);
*stasis_util_skiplist_get_forward(x, i) = (hazard_ptr)y;
pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(x, i));
}
pthread_mutex_unlock(&y->level_mut);
int L2 = list->levelHint;
if(L2 < list->levelCap && *stasis_util_skiplist_get_forward((void*)list->header, L2+1) != 0) {
if(pthread_mutex_trylock(&list->levelHint_mut) == 0) {
while(list->levelHint < list->levelCap &&
*stasis_util_skiplist_get_forward((void*)list->header, list->levelHint+1) != 0) {
list->levelHint = list->levelHint+1; // XXX atomics?
}
pthread_mutex_unlock(&list->levelHint_mut);
}
}
hazard_release(list->h, 0);
hazard_release(list->h, 1);
for(int i = L; i > 0; i--) {
// h [HP_COUNT+[L-1..0] is cleared
hazard_release(list->h, (i-1)+STASIS_SKIPLIST_HP_COUNT);
}
return NULL;
}
/**
* Delete a value from the list, returning it if it existed.
* @return The old value, or null.
*/
static inline void * stasis_util_skiplist_delete(stasis_skiplist_t * list, void * searchKey) {
stasis_skiplist_node_t * update[list->levelCap+1];
stasis_skiplist_node_t *x, *y;
x = hazard_set(list->h, 0, (void*)list->header);
int L = list->levelHint;
// for i = L downto 1
int i;
for(i = L+1; i > 1;) {
i--; // decrement after check, so that i is 1 at the end of the loop.
y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, i));
while(stasis_util_skiplist_cmp_helper(list, y, searchKey) < 0) {
x = hazard_set(list->h, 0, (void*)y);
y = hazard_ref(list->h, 1, stasis_util_skiplist_get_forward(x, i));
}
update[i] = hazard_set(list->h, STASIS_SKIPLIST_HP_COUNT+(L-i), x);
}
// h[HP_COUNT+[0..L-1] is set
y = hazard_set(list->h, 1, (void*)x);
int isGarbage = 0;
int first = 1;
// do ... until equal and not garbage
do {
// Note: it is unsafe to copy y->i directly into y, since doing so releases
// the hazard pointer in race. Fortunately, we don't need x for anything
// until we overwrite it immediately below.
x = hazard_ref(list->h, 0, stasis_util_skiplist_get_forward(y, i));
if(first) {
first = 0;
} else {
// This unlock was not in the pseudocode, but seems to be necessary...
pthread_mutex_unlock(&y->level_mut);
}
y = hazard_set(list->h, 1, x);
if(stasis_util_skiplist_cmp_helper(list, y, searchKey) > 0) {
hazard_release(list->ret_hazard, 0);
hazard_release(list->h, 0);
hazard_release(list->h, 1);
for(i = L+1; i > 1;) {
i--;
hazard_release(list->h, (i-1)+STASIS_SKIPLIST_HP_COUNT);
// h[HP_COUNT+[L-1..0] is cleared
}
return NULL;
}
pthread_mutex_lock(&y->level_mut);
x = hazard_ref(list->h, 0, stasis_util_skiplist_get_forward(y, i));
// Note: this is a > in pseudocode, which lets equal nodes link back into themselves.
isGarbage = stasis_util_skiplist_cmp_helper2(list, y, x) > 0;
// pseudocode would unlock if garbage here. Moved unlock to top of loop.
} while(!(!isGarbage && stasis_util_skiplist_cmp_helper(list, y, searchKey) == 0));
for(int i = L+1; i <= y->level; i++) { update[i] = (void*)list->header; }
for(int i = y->level; i > 0; i--) {
x = stasis_util_skiplist_get_lock(list, update[i], searchKey, i);
pthread_mutex_lock(stasis_util_skiplist_get_forward_mutex(y, i));
stasis_util_skiplist_assert(*stasis_util_skiplist_get_forward(x, i) == (intptr_t)y);
__sync_fetch_and_add(&x->refcount, 1);
*stasis_util_skiplist_get_forward(x, i) = *stasis_util_skiplist_get_forward(y, i);
*stasis_util_skiplist_get_forward(y, i) = (hazard_ptr)x;
stasis_util_skiplist_assert(stasis_util_skiplist_cmp_helper2(list, y, x) > 0); // assert is garbage
pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(x, i));
pthread_mutex_unlock(stasis_util_skiplist_get_forward_mutex(y, i));
}
void * oldKey = hazard_ref(list->ret_hazard, 0, &(y->key));
pthread_mutex_unlock(&y->level_mut);
int L2 = list->levelHint;
if(L2 > 1 && *stasis_util_skiplist_get_forward((void*)list->header, L2) == 0) {
if(pthread_mutex_trylock(&list->levelHint_mut) == 0) {
while(list->levelHint > 1 && (stasis_skiplist_node_t*)*stasis_util_skiplist_get_forward((void*)list->header, list->levelHint) == 0) {
list->levelHint = list->levelHint - 1;
}
pthread_mutex_unlock(&list->levelHint_mut);
}
}
hazard_release(list->h, 0);
hazard_release(list->h, 1);
for(i = L+1; i > 1;) {
i--;
hazard_release(list->h, (i-1)+STASIS_SKIPLIST_HP_COUNT);
// h[HP_COUNT+[L-1..0] is cleared
}
// oldKey will be freed by y's finalizer
hazard_free(list->h, y);
return oldKey;
}
void stasis_skiplist_release(stasis_skiplist_t * list) {
hazard_release(list->ret_hazard, 0);
}
END_C_DECLS
#endif /* CONCURRENTSKIPLIST_H_ */

205
stasis/util/hazard.h Normal file
View file

@ -0,0 +1,205 @@
/*
* hazard.h
*
* Created on: Feb 7, 2012
* Author: sears
*/
#include <stasis/common.h>
#include <stasis/util/time.h>
#include <assert.h>
#ifndef HAZARD_H_
#define HAZARD_H_
typedef intptr_t hazard_ptr;
typedef struct hazard_t hazard_t;
typedef struct hazard_ptr_rec_t {
hazard_t * h;
hazard_ptr * hp;
void ** rlist;
int rlist_len;
struct hazard_ptr_rec_t * next;
} hazard_ptr_rec_t;
struct hazard_t {
pthread_key_t hp;
int num_slots;
int stack_start;
int num_r_slots;
int (*finalizer)(void *, void* conf);
void * conf;
hazard_ptr_rec_t * tls_list;
pthread_mutex_t tls_list_mut;
pthread_cond_t thread_shutdown;
};
static int intptr_cmp(const void * ap, const void *bp) {
intptr_t a = *(intptr_t*)ap;
intptr_t b = *(intptr_t*)bp;
return (a < b) ? -1 : ( (a > b) ? 1 : 0 );
}
static inline void hazard_scan(hazard_t * h, hazard_ptr_rec_t * rec) {
if(rec == NULL) {
rec = pthread_getspecific(h->hp);
}
if(rec == NULL) { return; }
qsort(rec->rlist, rec->rlist_len, sizeof(void*), intptr_cmp);
hazard_ptr * ptrs = 0;
int ptrs_len = 0;
pthread_mutex_lock(&h->tls_list_mut);
hazard_ptr_rec_t * list = h->tls_list;
while(list != NULL) {
ptrs = realloc(ptrs, sizeof(hazard_ptr) * (ptrs_len+h->num_slots));
// int would_stop = 0;
for(int i = 0; i < h->num_slots; i++) {
ptrs[ptrs_len] = list->hp[i];
if(!ptrs[ptrs_len]) {
// if(i >= h->stack_start) { would_stop = 1; }
if(i >= h->stack_start) { break; }
} else {
// assert(! would_stop);
ptrs_len++;
}
}
list = list->next;
}
pthread_mutex_unlock(&h->tls_list_mut);
qsort(ptrs, ptrs_len, sizeof(void*), intptr_cmp);
int i = 0, j = 0;
while(j < rec->rlist_len) {
while(i < ptrs_len && (hazard_ptr)rec->rlist[j] > ptrs[i]) { i++; }
if(i == ptrs_len || (hazard_ptr)rec->rlist[j] != ptrs[i]) {
if(h->finalizer((void*)rec->rlist[j], h->conf)) {
rec->rlist[j] = 0;
}
}
j++;
}
j = 0;
for(i = 0; i < rec->rlist_len; i++) {
if(rec->rlist[i] != 0) {
rec->rlist[j] = rec->rlist[i];
j++;
}
}
rec->rlist_len = j;
free(ptrs);
}
static void hazard_deinit_thread(void * p) {
hazard_ptr_rec_t * rec = p;
if(rec != NULL) {
while(rec->rlist_len != 0) {
hazard_scan(rec->h, rec);
if(rec->rlist_len != 0) {
pthread_mutex_lock(&rec->h->tls_list_mut);
struct timeval tv;
gettimeofday(&tv, 0);
struct timespec ts = stasis_double_to_timespec(stasis_timeval_to_double(tv) + 0.01);
pthread_cond_timedwait(&rec->h->thread_shutdown,
&rec->h->tls_list_mut, &ts);
pthread_mutex_unlock(&rec->h->tls_list_mut);
}
}
pthread_cond_broadcast(&rec->h->thread_shutdown);
pthread_mutex_lock(&rec->h->tls_list_mut);
hazard_ptr_rec_t ** last = &rec->h->tls_list;
hazard_ptr_rec_t * list = *last;
while(list != rec) {
last = &list->next;
list = *last;
}
*last = rec->next;
pthread_mutex_unlock(&rec->h->tls_list_mut);
free(rec->hp);
free(rec->rlist);
free(rec);
}
}
/**
* Init the state necessary for a set of hazard pointers. This module
* implements an optimization where the higher numbered pointers can be treated
* as a fixed length stack. Entries after the first NULL in that region will
* be ignored. This allows applications that need varying numbers of hazard
* pointers to be collected efficiently.
*
* @param hp_slots the total number of slots.
* @param stack_start the first hazard pointer in the "stack" region.
* @param r_slots the max number of uncollected values per thread.
*/
static inline hazard_t* hazard_init(int hp_slots, int stack_start, int r_slots,
int (*finalizer)(void*, void*), void * conf) {
hazard_t * ret = malloc(sizeof(hazard_t));
pthread_key_create(&ret->hp, hazard_deinit_thread);
ret->num_slots = hp_slots;
ret->stack_start = stack_start;
ret->num_r_slots = r_slots;
ret->tls_list = NULL;
ret->finalizer = finalizer;
ret->conf = conf;
pthread_mutex_init(&ret->tls_list_mut,0);
pthread_cond_init(&ret->thread_shutdown, 0);
return ret;
}
static inline hazard_ptr_rec_t * hazard_ensure_tls(hazard_t * h) {
hazard_ptr_rec_t * rec = pthread_getspecific(h->hp);
if(rec == NULL) {
rec = malloc(sizeof(hazard_ptr_rec_t));
rec->hp = calloc(h->num_slots, sizeof(hazard_ptr));
rec->rlist = calloc(h->num_r_slots, sizeof(hazard_ptr));
rec->rlist_len = 0;
rec->h = h;
pthread_setspecific(h->hp, rec);
pthread_mutex_lock(&h->tls_list_mut);
rec->next = h->tls_list;
h->tls_list = rec;
pthread_mutex_unlock(&h->tls_list_mut);
}
return rec;
}
static inline void hazard_deinit(hazard_t * h) {
hazard_ptr_rec_t * rec = pthread_getspecific(h->hp);
hazard_deinit_thread(rec);
pthread_key_delete(h->hp);
assert(h->tls_list == NULL);
pthread_mutex_destroy(&h->tls_list_mut);
pthread_cond_destroy(&h->thread_shutdown);
free(h);
}
static inline void * hazard_ref(hazard_t* h, int slot, hazard_ptr* ptr) {
hazard_ptr_rec_t * rec = hazard_ensure_tls(h);
do {
rec->hp[slot] = *ptr; // Read ptr from ram
__sync_synchronize(); // Push HP to ram
} while(rec->hp[slot] != *ptr); // Re-read ptr from ram
return (void*) rec->hp[slot];
}
/**
* Set a hazard pointer using a known-stable address. This is mostly useful
* when the value pointed to by one hazard pointer should be pointed to by
* another hazard pointer.
*/
static inline void* hazard_set(hazard_t* h, int slot, void* val) {
hazard_ptr_rec_t * rec = hazard_ensure_tls(h);
rec->hp[slot] = (hazard_ptr)val;
// val is stable (and on our stack!) so there's no reason to re-check it.
__sync_synchronize();
return val;
}
static inline void hazard_release(hazard_t* h, int slot) {
hazard_ptr_rec_t * rec = hazard_ensure_tls(h);
__sync_synchronize(); // prevent the = 0 from being moved before this line.
rec->hp[slot] = 0;
}
// Must be called *after* all references to ptr are removed.
static inline void hazard_free(hazard_t* h, void* ptr) {
hazard_ptr_rec_t * rec = hazard_ensure_tls(h);
rec->rlist[rec->rlist_len] = ptr;
(rec->rlist_len)++;
while(rec->rlist_len == h->num_r_slots) {
hazard_scan(h, rec);
if(rec->rlist_len == h->num_r_slots) {
struct timespec slp = stasis_double_to_timespec(0.001);
nanosleep(&slp,0);
}
}
}
#endif /* HAZARD_H_ */

View file

@ -115,7 +115,7 @@ void __profile_deletelock (rwl *lock);
#define FETCH_AND_ADD(_o,_i) __sync_fetch_and_add(_o,_i)
#if ULONG_MAX <= 4294967295 // are we on a 32 bit machine?
#define ATOMIC_READ_64(mutex,_o) FETCH_AND_ADD(_o,0)
#define ATOMIC_WRITE_64(mutex,_o,_n) _sync_lock_test_and_set(_o,_n)
#define ATOMIC_WRITE_64(mutex,_o,_n) __sync_lock_test_and_set(_o,_n)
#else // this is a 33 or greater bit machine. Assume it's 64 bit, and that 64 bit writes are atomic.
#define ATOMIC_READ_64(mutex,_a) *_a
#define ATOMIC_WRITE_64(mutex,_a,_n) do {*_a=_n; } while (0)

View file

@ -14,5 +14,76 @@ BEGIN_C_DECLS
uint64_t stasis_util_random64(uint64_t x);
/**
* The remainder of this file is a cleaned up and reentrant version of George
* Marsaglia's generators from stat.sci.math in 1999.
*
* I was concerned that replacing the macros with static inlines would hurt
* the optimizer, but for some reason, on my machine, and with -O3,
* check_kiss.c is about 2x faster than the version from the newsgroup posting.
*
* @see check_kiss.c for the original, unmodified code.
*/
typedef struct {
uint32_t x;
uint32_t y;
uint32_t bro;
uint8_t c;
uint32_t z;
uint32_t w;
uint32_t jsr;
uint32_t jcong;
uint32_t a;
uint32_t b;
uint32_t t[256];
} kiss_table_t;
static inline uint32_t stasis_util_random_kiss_znew(kiss_table_t* k) {
return k->z=36969*(k->z&65535)+(k->z>>16);
}
static inline uint32_t stasis_util_random_kiss_wnew(kiss_table_t* k) {
return k->w=18000*(k->w&65535)+(k->w>>16);
}
static inline uint32_t stasis_util_random_kiss_MWC(kiss_table_t* k) {
return (stasis_util_random_kiss_znew(k)<<16)+stasis_util_random_kiss_wnew(k);
}
static inline uint32_t stasis_util_random_kiss_SHR3(kiss_table_t* k) {
return (k->jsr^=(k->jsr<<17), k->jsr^=(k->jsr>>13), k->jsr^=(k->jsr<<5));
}
static inline uint32_t stasis_util_random_kiss_CONG(kiss_table_t* k) {
return k->jcong=69069*k->jcong+1234567;
}
static inline uint32_t stasis_util_random_kiss_FIB(kiss_table_t* k) {
return ((k->b=k->a+k->b),(k->a=k->b-k->a));
}
static inline uint32_t stasis_util_random_kiss_KISS(kiss_table_t* k) {
return (stasis_util_random_kiss_MWC(k)^stasis_util_random_kiss_CONG(k))
+ stasis_util_random_kiss_SHR3(k);
}
static inline uint32_t stasis_util_random_kiss_LFIB4(kiss_table_t* k) {
(k->c)++;
return
k->t[k->c]=k->t[k->c]
+k->t[(uint8_t)(k->c+58)]
+k->t[(uint8_t)(k->c+119)]
+k->t[(uint8_t)(k->c+178)];
}
static inline uint32_t stasis_util_random_kiss_SWB(kiss_table_t* k) {
return (k->c++,
k->bro=(k->x<k->y),
k->t[k->c]=(k->x=k->t[(uint8_t)(k->c+34)])-(k->y=k->t[(uint8_t)(k->c+19)]+k->bro));
}
static inline float stasis_util_random_kiss_UNI(kiss_table_t *k) {
return stasis_util_random_kiss_KISS(k) * 2.328306e-10;
}
static inline float stasis_util_random_kiss_VNI(kiss_table_t *k) {
return ((int32_t) stasis_util_random_kiss_KISS(k))*4.656613e-10;
}
static inline void stasis_util_random_kiss_settable(kiss_table_t *k,
uint32_t i1, uint32_t i2, uint32_t i3,
uint32_t i4, uint32_t i5, uint32_t i6) {
k->x=0;k->y=0;k->c=0;k->z=i1;k->w=i2,k->jsr=i3; k->jcong=i4; k->a=i5; k->b=i6;
for(int i = 0; i < 256; i++) { k->t[i] = stasis_util_random_kiss_KISS(k); }
}
END_C_DECLS
#endif /* RANDOM_H_ */

View file

@ -1,5 +1,5 @@
/*
* RCS $Id$
* RCS $Id: redblack.h 1525 2011-06-13 10:19:44Z sears.russell@gmail.com $
*/
/*

78
stasis/util/sux.h Normal file
View file

@ -0,0 +1,78 @@
/*
* sux.h
*
* Created on: Jul 19, 2011
* Author: sears
*/
/**
* @file SUX: Shared, Update, Exclusive latch implementation.
*
* Latch protocol used by some concurrent data structures.
*
* Lock compatibility table:
*
* S U X
* +------
* S | Y Y N
* U | Y N N
* X | N N N
*
* This file implements SUX locks using a pthread mutex and a pthread rwlock.
*
* Lock mode | Holds rwlock? | Holds mutex?
* Shared | Read | No
* Update | No | Yes
* eXclusive | Write | Yes
*
* The mutex is always acquired before the rwlock, which allows us to safely
* upgrade and downgrade the SUX latch between U and X.
*/
#ifndef SUX_H_
#define SUX_H_
#include <stasis/common.h>
BEGIN_C_DECLS
typedef struct {
pthread_rwlock_t sx_rwl;
pthread_mutex_t ux_mut;
} sux_latch;
void sux_latch_init(sux_latch *sux) {
pthread_rwlock_init(&sux->sx_rwl);
pthread_mutex_init(&sux->ux_mut);
}
void sux_latch_destroy(sux_latch *sux) {
pthread_rwlock_destroy(&sux->sx_rwl);
pthread_mutex_destroy(&sux->ux_mut);
}
void sux_latch_slock(sux_latch *sux) {
pthread_rwlock_rdlock(&sux->sx_rwl);
}
void sux_latch_sunlock(sux_latch *sux) {
pthread_rwlock_unlock(&sux->sx_rwl);
}
void sux_latch_ulock(sux_latch *sux) {
pthread_mutex_lock(&sux->ux_mut);
}
void sux_latch_uunlock(sux_latch *sux) {
pthread_mutex_unlock(&sux->ux_mut);
}
void sux_latch_upgrade(sux_latch *sux) {
pthread_rwlock_wrlock(&sux->sx_rwl);
}
void sux_latch_downgrade(sux_latch *sux) {
pthread_rwlock_unlock(&sux->sx_rwl);
}
void sux_latch_xlock(sux_latch *sux) {
sux_latch_ulock(sux);
sux_latch_upgrade(sux);
}
void sux_latch_xunlock(sux_latch *sux) {
sux_latch_downgrade(sux);
sux_latch_uunlock(sux);
}
END_C_DECLS
#endif /* SUX_H_ */

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: TAbort.c 2 2004-06-24 21:10:31Z sears $
*
* testing with various strings
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: TGet.c 2 2004-06-24 21:10:31Z sears $
*
* testing with various strings
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: TInsert.c 2 2004-06-24 21:10:31Z sears $
*
* testing with various strings
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: TInsertSequential.c 2 2004-06-24 21:10:31Z sears $
*
* testing with various strings
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: TUpdate.c 2 2004-06-24 21:10:31Z sears $
*
* testing with various strings
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: abort-speed.c 2 2004-06-24 21:10:31Z sears $
*
* how fast can we abort? does it depend on anything?
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: abort.c 2 2004-06-24 21:10:31Z sears $
*
* aborting and committing transactions
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: commit-speed.c 2 2004-06-24 21:10:31Z sears $
*
* how fast can we abort? does it depend on anything?
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: inc_dec.c 2 2004-06-24 21:10:31Z sears $
*
* testing other user functions
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: interleaved.c 2 2004-06-24 21:10:31Z sears $
*
* interleaved transactions
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: logvals.c 2 2004-06-24 21:10:31Z sears $
*
* long values
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: loop.c 2 2004-06-24 21:10:31Z sears $
*
* test by looping
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: nontrans.c 2 2004-06-24 21:10:31Z sears $
*
* doesn't use transactions
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: prepare_1.c 2 2004-06-24 21:10:31Z sears $
*
* testing other user functions
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: prepare_2.c 2 2004-06-24 21:10:31Z sears $
*
* testing other user functions
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: prepare_3.c 2 2004-06-24 21:10:31Z sears $
*
* testing other user functions
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: recover.c 2 2004-06-24 21:10:31Z sears $
*
* recover from a crash
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: strings.c 2 2004-06-24 21:10:31Z sears $
*
* testing with various strings
* *******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*******************************************
* $Id$
* $Id: test.c 2 2004-06-24 21:10:31Z sears $
*
* Testing driver
* ****************************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/**********************************************
* $Id$
* $Id: test.h 2 2004-06-24 21:10:31Z sears $
*
* dummy header file for tests
* ********************************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/****************************
* $Id$
* $Id: test0.c 2 2004-06-24 21:10:31Z sears $
*
* glassbox testing of pages
* *************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/********************************
* $Id$
* $Id: test1.c 2 2004-06-24 21:10:31Z sears $
*
* simple reading, writing
* ******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/********************************
* $Id$
* $Id: test2.c 2 2004-06-24 21:10:31Z sears $
*
* simple reading, writing
* ******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/********************************
* $Id$
* $Id: test3.c 2 2004-06-24 21:10:31Z sears $
*
* ******************************/

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: twentyfour.c 2 2004-06-24 21:10:31Z sears $
*
* if you run this test 23 times, it seems to work fine. On the 24th time, you
* need to allocate a new page, and it crashes;

View file

@ -40,7 +40,7 @@ permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/*********************************
* $Id$
* $Id: twentyfour2.c 2 2004-06-24 21:10:31Z sears $
*
* if you run this test 23 times, its fine, but on the 24 time, shit happens
* *******************************/

View file

@ -1,5 +1,10 @@
SUBDIRS(fault_injection)
CREATE_CHECK(check_concurrentSkipList)
CREATE_CHECK(check_bloomFilter)
CREATE_CHECK(check_hazard)
CREATE_CHECK(check_kiss)
CREATE_CHECK(check_redblack)
CREATE_CHECK(check_concurrentBTree)
CREATE_CHECK(check_concurrentHash)
CREATE_CHECK(check_lhtable)
CREATE_CHECK(check_concurrentRingbuffer)
@ -12,23 +17,25 @@ CREATE_CHECK(check_operations)
CREATE_CHECK(check_transactional2)
CREATE_CHECK(check_recovery)
CREATE_CHECK(check_blobRecovery)
CREATE_CHECK(check_bufferManager)
CREATE_CHECK(check_pageOperations)
CREATE_CHECK(check_linearHash)
CREATE_CHECK(check_header)
CREATE_CHECK(check_linkedListNTA)
CREATE_CHECK(check_linearHashNTA)
CREATE_CHECK(check_pageOrientedList)
CREATE_EXPERIMENTAL_CHECK(check_ringbuffer)
CREATE_EXPERIMENTAL_CHECK(check_iterator)
CREATE_EXPERIMENTAL_CHECK(check_multiplexer)
CREATE_CHECK(check_bTree)
CREATE_CHECK(check_regions)
CREATE_CHECK(check_allocationPolicy)
CREATE_CHECK(check_dirtyPageTable)
CREATE_CHECK(check_io)
CREATE_CHECK(check_rangeTracker)
CREATE_CHECK(check_replacementPolicy)
if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
CREATE_CHECK(check_bufferManager)
CREATE_CHECK(check_allocationPolicy)
CREATE_EXPERIMENTAL_CHECK(check_ringbuffer)
CREATE_EXPERIMENTAL_CHECK(check_iterator)
CREATE_EXPERIMENTAL_CHECK(check_multiplexer)
CREATE_EXPERIMENTAL_CHECK(check_lsmTree)
CREATE_EXPERIMENTAL_CHECK(check_groupBy)
ENDIF(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
CREATE_CHECK(check_boundedLog)

View file

@ -0,0 +1,116 @@
/*
* check_bloomFilter.cpp
*
* Copyright 2010-2012 Yahoo! Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Created on: Oct 2, 2010
* Author: sears
*/
#include <stasis/util/hashFunctions.h>
#include <stasis/util/bloomFilter.h>
#include <assert.h>
#include <stdio.h>
#include <sys/time.h>
#include <stasis/util/crc32.h>
/*
* This file can test CRC and FNV-1 based hash functions. Based on early experiments:
*
* CRC32 insert/lookup: 11/13 seconds, 1.1% false positive
* FNV-1 insert/lookup: 8/9 seconds, 2.8% false positive
*
* Expected false positive rate is 1%.
*/
static uint64_t hash_a(const char* a, int len) {
return stasis_crc32(a,len,0xcafebabe);
}
static uint64_t hash_b(const char* a, int len) {
return stasis_crc32(a,len,0xdeadbeef);
}
static uint64_t hash_a_fnv(const char* a, int len) {
return stasis_util_hash_fnv_1_uint32_t((const byte*)a, len);
}
static uint64_t hash_b_fnv(const char* a, int len) {
return stasis_util_hash_fnv_1_uint64_t((const byte*)a, len);
}
static char * malloc_random_string(int group) {
char * str = 0;
int strlen = 0;
while(!strlen) strlen = 128 + (rand() & 127);
str = (char*)malloc(strlen + 1);
str[0] = group;
for(int i = 1; i < strlen; i++) {
str[i] = (rand() & 128) + 1;
}
str[strlen] = 0;
return str;
}
int main(int argc, char * argv[]) {
(void)hash_a; (void)hash_b;
(void)hash_a_fnv; (void)hash_b_fnv;
const int num_inserts = 1000000;
char ** strings = (char**)malloc(num_inserts * sizeof(char*));
uint64_t sum_strlen = 0;
struct timeval start, stop;
gettimeofday(&start, 0);
printf("seed: %lld\n", (long long)start.tv_sec);
srand(start.tv_sec);
for(int i = 0; i < num_inserts; i++) {
strings[i] = malloc_random_string(1);
sum_strlen += strlen(strings[i]);
}
gettimeofday(&stop,0);
printf("Generated strings in %d seconds. Mean string length: %f\n", (int)(stop.tv_sec - start.tv_sec), (double)(sum_strlen)/(double)num_inserts);
stasis_bloom_filter_t * bf = stasis_bloom_filter_create(hash_a, hash_b, num_inserts, 0.01);
stasis_bloom_filter_print_stats(bf);
gettimeofday(&start, 0);
for(int i = 0; i < num_inserts; i++) {
stasis_bloom_filter_insert(bf,strings[i], strlen(strings[i]));
}
gettimeofday(&stop, 0);
printf("Inserted strings in %d seconds.\n", (int)(stop.tv_sec - start.tv_sec));
gettimeofday(&start, 0);
for(int i = 0; i < num_inserts; i++) {
assert(stasis_bloom_filter_lookup(bf, strings[i], strlen(strings[i])));
}
gettimeofday(&stop, 0);
printf("Looked up strings in %d seconds.\n", (int)(stop.tv_sec - start.tv_sec));
stasis_bloom_filter_print_stats(bf);
uint64_t false_positives = 0;
gettimeofday(&start, 0);
for(int i = 0; i < num_inserts; i++) {
char * str = malloc_random_string(2);
if(stasis_bloom_filter_lookup(bf, str, strlen(str))) {
false_positives ++;
}
assert(stasis_bloom_filter_lookup(bf, strings[i], strlen(strings[i])));
free(str);
}
gettimeofday(&stop, 0);
printf("Generated and looked up non-existant strings in %d seconds\n"
"false positive rate was %f\n", (int)(stop.tv_sec - start.tv_sec),
((double)false_positives)/(double)num_inserts);
return 0;
}

View file

@ -0,0 +1,98 @@
/*
* check_concurrentBTree.c
*
* Created on: Dec 22, 2011
* Author: sears
*/
#define _GNU_SOURCE
#include "../check_includes.h"
#include <stasis/util/concurrentHash.h>
#include <stasis/util/random.h>
#include <stdio.h>
#include <time.h>
#include <limits.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <stasis/util/concurrentBTree.h>
#define LOG_NAME "check_concurrentBTree.log"
START_TEST(metadataBitTest) {
int NUM_ITERS = 1000000;
metadata_t m = 0;
byte isLeaf = 0;
byte balanced = 0;
byte color = 0;
byte level = 0;
for(int i = 0; i < NUM_ITERS; i++) {
switch(stasis_util_random64(3)) {
case 0: {// leaf
if(isLeaf) {
assert(metadata_is_leaf(m));
} else {
assert(!metadata_is_leaf(m));
}
isLeaf = stasis_util_random64(2);
if(isLeaf) {
m = metadata_set_leaf(m);
m = leaf_metadata_set_color(m, color);
} else {
m = metadata_clear_leaf(m);
m = index_metadata_set_level(m, level);
}
} break;
case 1: {// balanced
if(balanced) {
assert(metadata_is_balanced(m));
} else {
assert(!metadata_is_balanced(m));
}
balanced = stasis_util_random64(2);
if(balanced) {
m = metadata_set_balanced(m);
} else {
m = metadata_clear_balanced(m);
}
} break;
case 2: {
if(isLeaf) { // color
assert(color == leaf_metadata_get_color(m));
color = stasis_util_random64(3);
m = leaf_metadata_set_color(m, color);
} else { // level
assert(level == index_metadata_get_level(m));
level = stasis_util_random64(8);
m = index_metadata_set_level(m, level);
assert(level == index_metadata_get_level(m));
}
} break;
default: abort();
}
}
} END_TEST
Suite * check_suite(void) {
Suite *s = suite_create("lhtable");
/* Begin a new test */
TCase *tc = tcase_create("lhtable");
tcase_set_timeout(tc, 0); // disable timeouts
/* Sub tests are added, one per line, here */
tcase_add_test(tc, metadataBitTest);
/* --------------------------------------------- */
tcase_add_checked_fixture(tc, setup, teardown);
suite_add_tcase(s, tc);
return s;
}
#include "../check_setup.h"

Some files were not shown because too many files have changed in this diff Show more